logistics/CHECK_PROFIT.ipynb

12 KiB
Raw Blame History

In [ ]:
from login_for_cookie import Vc
import requests
import pandas as pd
import re  
cookie = Vc()
header = {"cookie":cookie}
In [ ]:
# 固定汇率
def rate_to_rmb(curruency):
    if curruency == "CNY":
        rate = 1
    if curruency == "USD":
        rate = 7
    if curruency == "EUR":
        rate = 8 
    if curruency == "GBP":
        rate = 9
    if curruency == "JPY":
        rate = 0.05
    if curruency == "AUD":
        rate = 5
    if curruency == "CAD":
        rate = 5
    if curruency == "HKD":
        rate = 1
    return rate
In [ ]:
aurl = "https://cp.maso.hk/index.php?main=biphp&act=package_fund&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&package=991517855"
burl = "https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=expend_settle_detail&id=3277"
curl = "https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=bol_settle_detail&id=2324"
durl = "https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=express_settle_detail&id=7787"

处理尾端结算数据

In [ ]:
from utils.gtools import MySQLconnect
with MySQLconnect("workflow") as db:
    engine = db.engine()
    sql_list= pd.read_sql("SELECT 物流结算号 FROM `flow_check_tail`", engine)
    # 转成列表
    D_LIST = sql_list['物流结算号'].dropna().astype(int).tolist()
express_df = pd.DataFrame()
for d in D_LIST:
    url = "https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=express_settle_detail&id=%s"% d
    p_get = requests.get(url)
    data = p_get.json()
    temp_df = pd.DataFrame(data["data"])
    temp_df["结算号"]=d
    temp_df["track_number"] = temp_df["track_number"].astype(str)   
    express_df = pd.concat([express_df,temp_df],axis=0)

from utils.gtools import MySQLconnect
import json
express_df['账单运费'] = pd.to_numeric(express_df['bill_express_fee'].str.replace(r'[A-Z]+', '', regex=True),errors='coerce')
express_df['币种'] = express_df['bill_express_fee'].str.replace(r'[0-9.]+', '', regex=True)
express_df['rmb'] = express_df.apply(lambda row: round(row['账单运费']*rate_to_rmb(row['币种'])[0],2), axis=1)

# 根据快递跟踪号找到包裹号
missing_odr_df = express_df[express_df['odr_express_id'].isna() | (express_df['odr_express_id']=='--')]
track_list = missing_odr_df['track_number'].dropna().unique().tolist()
if track_list:
    with MySQLconnect('ods') as db:
        conn = db.connect()
        format_strings = ','.join(['%s'] * len(track_list))
        sql = f"SELECT 快递跟踪号 AS track_number,包裹号 AS odr_express_id FROM `order_express` WHERE 快递跟踪号 IN  ({format_strings})"
        df_mapping = pd.read_sql(sql, conn, params=track_list)
        mapping_dict = dict(zip(df_mapping['track_number'], df_mapping['odr_express_id']))
        def fill_odr(row):
            if pd.isna(row['odr_express_id']) or row['odr_express_id'] == '--':
                return mapping_dict.get(row['track_number'], row['odr_express_id'])
            else:
                return row['odr_express_id']
        express_df['odr_express_id'] = express_df.apply(fill_odr, axis=1)

workflow = MySQLconnect('workflow')
def safe_json(x):
    if isinstance(x, (dict, list, set)):
        return json.dumps(x, ensure_ascii=False)
    return x
express_df['bill_add_express_fee'] = express_df['bill_add_express_fee'].apply(safe_json)
express_df.to_sql('flow_check_express_wxx', workflow.engine(), if_exists='replace')

处理头程结算数据

In [ ]:
with MySQLconnect("workflow") as db:
    engine = db.engine()
    sql_list= pd.read_sql("SELECT 物流结算号 FROM `flow_check_head`", engine)
    # 转成列表
    C_LIST = sql_list['物流结算号'].dropna().astype(int).tolist()

bol_df = pd.DataFrame()
for c in C_LIST:
    url = "https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=bol_settle_detail&id=%s"% c
    p_get = requests.get(url)
    # print(p_get.text
    p_get_new=p_get.text.replace("\t", "")
    data = json.loads(p_get_new)
    temp_df = pd.DataFrame(data["data"])
    temp_df["结算号"]=c
    bol_df = pd.concat([bol_df,temp_df],axis=0)
bol_df
In [ ]:
#处理头程费用转rmb和体积单价
bol_df['price'] = bol_df['price'].astype(float)
bol_df['rmb'] = bol_df.apply(lambda row: round(row['price']*rate_to_rmb(row['currency'])[0],2), axis=1)
bol_list = bol_df['bol_code'].dropna().unique().tolist()
if bol_list:
    with MySQLconnect('ods') as db:
        conn = db.connect()
        format_strings = ','.join(['%s'] * len(bol_list))
        sql = f"""WITH t1 AS (
                    SELECT
                    `提单/柜号` AS bol_code,
                        `提单ID` AS bol_id ,
                        `体积cm3`,
                        ROW_NUMBER() OVER(PARTITION BY `提单/柜号` ORDER BY `离港时间` DESC) AS row_index
                    FROM
                        bol_list 
                    WHERE
                        `提单/柜号` IN ({format_strings})
                    )
                        SELECT `bol_code`,`bol_id`,`体积cm3` FROM t1 WHERE row_index = 1
                    """
        df_mapping = pd.read_sql(sql, conn, params=bol_list)
        # 合并
        bol_df = pd.merge(bol_df, df_mapping, on=['bol_code'], how='left')
bol_df['体积cm3'] = bol_df['体积cm3'].astype(float)
bol_df['体积单价'] =  bol_df['rmb']/bol_df['体积cm3'] 
workflow = MySQLconnect('workflow')
bol_df.to_sql('flow_check_head_wxx', workflow.engine(), if_exists='replace')
In [ ]:
#找bol_id下的所有包裹和体积得到单包裹的头程价格
bol_id_list = bol_df['bol_id'].dropna().unique().tolist()
with MySQLconnect('ods') as db:
    conn = db.connect()
    format_strings = ','.join(['%s'] * len(bol_id_list))
    sql = f"""SELECT
        `包裹号`,
        btp.`提单ID` AS bol_id,
        length,
        width,
        hight,
        weight,
        length * width * hight AS 体积 
    FROM
        bol_to_package btp
        LEFT JOIN package_vol_info pvi ON btp.包裹号 = pvi.package 
    WHERE btp.`提单ID` IN ({format_strings})
    """
    bol_package = pd.read_sql(sql, conn, params=bol_id_list)

# 根据bol_df计算每个bol_id的单价多行求和,保存为一个map字典
bol_id_price = bol_df.groupby('bol_id')['体积单价'].sum().to_dict()
# bol_package新增一列为体积*体积单价,体积单价根据bol_id从字典中获取
bol_package['头程价格'] = bol_package['体积'] * bol_package['bol_id'].map(bol_id_price)
bol_package = bol_package.drop_duplicates()
workflow = MySQLconnect('workflow')
bol_package.to_sql('flow_check_package_head_wxx', workflow.engine(), if_exists='replace')

处理额外费用数据

In [ ]:
with MySQLconnect("workflow") as db:
    engine = db.engine()
    sql_list= pd.read_sql("SELECT 物流结算号 FROM `flow_check_expend`", engine)
    # 转成列表
    B_LIST = sql_list['物流结算号'].dropna().astype(int).tolist()

expend_df = pd.DataFrame()
for b in B_LIST:
    url = "https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=expend_settle_detail&id=%s "% b
    p_get = requests.get(url)
    data = p_get.json()
    temp_df = pd.DataFrame(data["data"])
    temp_df["结算号"]=b
    expend_df = pd.concat([expend_df,temp_df],axis=0)

expend_df['账单运费'] = pd.to_numeric(expend_df['pay_price'].str.replace(r'[A-Z]+', '', regex=True),errors='coerce')
expend_df['币种'] = expend_df['pay_price'].str.replace(r'[0-9.]+', '', regex=True)
expend_df['rmb'] = expend_df.apply(lambda row: round(row['账单运费']*rate_to_rmb(row['币种'])[0],2), axis=1)

workflow = MySQLconnect('workflow')
expend_df.to_sql('flow_check_expend_wxx', workflow.engine(), if_exists='replace')

三种费用合并,以包裹为单位保存

In [ ]:
with MySQLconnect('workflow') as db:
    conn = db.connect()
    format_strings = ','.join(['%s'] * len(bol_id_list))
    sql = """SELECT DISTINCT
            tail.odr_express_id,
            head.`头程价格`,
            tail.rmb AS 尾程费用,
            expend.rmb AS 额外费用
            FROM
            flow_check_express_wxx tail 
            LEFT JOIN flow_check_package_head_wxx head ON tail.odr_express_id = head.`包裹号`
            LEFT JOIN flow_check_expend_wxx expend ON tail.odr_express_id = expend.odr_express_id
    """
    package_fee = pd.read_sql(sql, conn)
    package_fee.to_sql('flow_check_package_fee_wxx', workflow.engine(), if_exists='replace')