12 KiB
12 KiB
In [ ]:
from login_for_cookie import Vc import requests import pandas as pd import re cookie = Vc() header = {"cookie":cookie}
In [ ]:
# 固定汇率 def rate_to_rmb(curruency): if curruency == "CNY": rate = 1 if curruency == "USD": rate = 7 if curruency == "EUR": rate = 8 if curruency == "GBP": rate = 9 if curruency == "JPY": rate = 0.05 if curruency == "AUD": rate = 5 if curruency == "CAD": rate = 5 if curruency == "HKD": rate = 1 return rate
In [ ]:
aurl = "https://cp.maso.hk/index.php?main=biphp&act=package_fund&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&package=991517855" burl = "https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=expend_settle_detail&id=3277" curl = "https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=bol_settle_detail&id=2324" durl = "https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=express_settle_detail&id=7787"
处理尾端结算数据
In [ ]:
from utils.gtools import MySQLconnect with MySQLconnect("workflow") as db: engine = db.engine() sql_list= pd.read_sql("SELECT 物流结算号 FROM `flow_check_tail`", engine) # 转成列表 D_LIST = sql_list['物流结算号'].dropna().astype(int).tolist() express_df = pd.DataFrame() for d in D_LIST: url = "https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=express_settle_detail&id=%s"% d p_get = requests.get(url) data = p_get.json() temp_df = pd.DataFrame(data["data"]) temp_df["结算号"]=d temp_df["track_number"] = temp_df["track_number"].astype(str) express_df = pd.concat([express_df,temp_df],axis=0) from utils.gtools import MySQLconnect import json express_df['账单运费'] = pd.to_numeric(express_df['bill_express_fee'].str.replace(r'[A-Z]+', '', regex=True),errors='coerce') express_df['币种'] = express_df['bill_express_fee'].str.replace(r'[0-9.]+', '', regex=True) express_df['rmb'] = express_df.apply(lambda row: round(row['账单运费']*rate_to_rmb(row['币种'])[0],2), axis=1) # 根据快递跟踪号找到包裹号 missing_odr_df = express_df[express_df['odr_express_id'].isna() | (express_df['odr_express_id']=='--')] track_list = missing_odr_df['track_number'].dropna().unique().tolist() if track_list: with MySQLconnect('ods') as db: conn = db.connect() format_strings = ','.join(['%s'] * len(track_list)) sql = f"SELECT 快递跟踪号 AS track_number,包裹号 AS odr_express_id FROM `order_express` WHERE 快递跟踪号 IN ({format_strings})" df_mapping = pd.read_sql(sql, conn, params=track_list) mapping_dict = dict(zip(df_mapping['track_number'], df_mapping['odr_express_id'])) def fill_odr(row): if pd.isna(row['odr_express_id']) or row['odr_express_id'] == '--': return mapping_dict.get(row['track_number'], row['odr_express_id']) else: return row['odr_express_id'] express_df['odr_express_id'] = express_df.apply(fill_odr, axis=1) workflow = MySQLconnect('workflow') def safe_json(x): if isinstance(x, (dict, list, set)): return json.dumps(x, ensure_ascii=False) return x express_df['bill_add_express_fee'] = express_df['bill_add_express_fee'].apply(safe_json) express_df.to_sql('flow_check_express_wxx', workflow.engine(), if_exists='replace')
处理头程结算数据
In [ ]:
with MySQLconnect("workflow") as db: engine = db.engine() sql_list= pd.read_sql("SELECT 物流结算号 FROM `flow_check_head`", engine) # 转成列表 C_LIST = sql_list['物流结算号'].dropna().astype(int).tolist() bol_df = pd.DataFrame() for c in C_LIST: url = "https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=bol_settle_detail&id=%s"% c p_get = requests.get(url) # print(p_get.text p_get_new=p_get.text.replace("\t", "") data = json.loads(p_get_new) temp_df = pd.DataFrame(data["data"]) temp_df["结算号"]=c bol_df = pd.concat([bol_df,temp_df],axis=0) bol_df
In [ ]:
#处理头程费用转rmb和体积单价 bol_df['price'] = bol_df['price'].astype(float) bol_df['rmb'] = bol_df.apply(lambda row: round(row['price']*rate_to_rmb(row['currency'])[0],2), axis=1) bol_list = bol_df['bol_code'].dropna().unique().tolist() if bol_list: with MySQLconnect('ods') as db: conn = db.connect() format_strings = ','.join(['%s'] * len(bol_list)) sql = f"""WITH t1 AS ( SELECT `提单/柜号` AS bol_code, `提单ID` AS bol_id , `体积cm3`, ROW_NUMBER() OVER(PARTITION BY `提单/柜号` ORDER BY `离港时间` DESC) AS row_index FROM bol_list WHERE `提单/柜号` IN ({format_strings}) ) SELECT `bol_code`,`bol_id`,`体积cm3` FROM t1 WHERE row_index = 1 """ df_mapping = pd.read_sql(sql, conn, params=bol_list) # 合并 bol_df = pd.merge(bol_df, df_mapping, on=['bol_code'], how='left') bol_df['体积cm3'] = bol_df['体积cm3'].astype(float) bol_df['体积单价'] = bol_df['rmb']/bol_df['体积cm3'] workflow = MySQLconnect('workflow') bol_df.to_sql('flow_check_head_wxx', workflow.engine(), if_exists='replace')
In [ ]:
#找bol_id下的所有包裹和体积,得到单包裹的头程价格 bol_id_list = bol_df['bol_id'].dropna().unique().tolist() with MySQLconnect('ods') as db: conn = db.connect() format_strings = ','.join(['%s'] * len(bol_id_list)) sql = f"""SELECT `包裹号`, btp.`提单ID` AS bol_id, length, width, hight, weight, length * width * hight AS 体积 FROM bol_to_package btp LEFT JOIN package_vol_info pvi ON btp.包裹号 = pvi.package WHERE btp.`提单ID` IN ({format_strings}) """ bol_package = pd.read_sql(sql, conn, params=bol_id_list) # 根据bol_df计算每个bol_id的单价(多行求和),保存为一个map字典 bol_id_price = bol_df.groupby('bol_id')['体积单价'].sum().to_dict() # bol_package新增一列,为体积*体积单价,体积单价根据bol_id从字典中获取 bol_package['头程价格'] = bol_package['体积'] * bol_package['bol_id'].map(bol_id_price) bol_package = bol_package.drop_duplicates() workflow = MySQLconnect('workflow') bol_package.to_sql('flow_check_package_head_wxx', workflow.engine(), if_exists='replace')
处理额外费用数据
In [ ]:
with MySQLconnect("workflow") as db: engine = db.engine() sql_list= pd.read_sql("SELECT 物流结算号 FROM `flow_check_expend`", engine) # 转成列表 B_LIST = sql_list['物流结算号'].dropna().astype(int).tolist() expend_df = pd.DataFrame() for b in B_LIST: url = "https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=expend_settle_detail&id=%s "% b p_get = requests.get(url) data = p_get.json() temp_df = pd.DataFrame(data["data"]) temp_df["结算号"]=b expend_df = pd.concat([expend_df,temp_df],axis=0) expend_df['账单运费'] = pd.to_numeric(expend_df['pay_price'].str.replace(r'[A-Z]+', '', regex=True),errors='coerce') expend_df['币种'] = expend_df['pay_price'].str.replace(r'[0-9.]+', '', regex=True) expend_df['rmb'] = expend_df.apply(lambda row: round(row['账单运费']*rate_to_rmb(row['币种'])[0],2), axis=1) workflow = MySQLconnect('workflow') expend_df.to_sql('flow_check_expend_wxx', workflow.engine(), if_exists='replace')
三种费用合并,以包裹为单位保存
In [ ]:
with MySQLconnect('workflow') as db: conn = db.connect() format_strings = ','.join(['%s'] * len(bol_id_list)) sql = """SELECT DISTINCT tail.odr_express_id, head.`头程价格`, tail.rmb AS 尾程费用, expend.rmb AS 额外费用 FROM flow_check_express_wxx tail LEFT JOIN flow_check_package_head_wxx head ON tail.odr_express_id = head.`包裹号` LEFT JOIN flow_check_expend_wxx expend ON tail.odr_express_id = expend.odr_express_id """ package_fee = pd.read_sql(sql, conn) package_fee.to_sql('flow_check_package_fee_wxx', workflow.engine(), if_exists='replace')