322 lines
12 KiB
Plaintext
322 lines
12 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "d40f14fe",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from login_for_cookie import Vc\n",
|
||
"import requests\n",
|
||
"import pandas as pd\n",
|
||
"import re \n",
|
||
"cookie = Vc()\n",
|
||
"header = {\"cookie\":cookie}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "fa7b4aa7",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 固定汇率\n",
|
||
"def rate_to_rmb(curruency):\n",
|
||
" if curruency == \"CNY\":\n",
|
||
" rate = 1\n",
|
||
" if curruency == \"USD\":\n",
|
||
" rate = 7\n",
|
||
" if curruency == \"EUR\":\n",
|
||
" rate = 8 \n",
|
||
" if curruency == \"GBP\":\n",
|
||
" rate = 9\n",
|
||
" if curruency == \"JPY\":\n",
|
||
" rate = 0.05\n",
|
||
" if curruency == \"AUD\":\n",
|
||
" rate = 5\n",
|
||
" if curruency == \"CAD\":\n",
|
||
" rate = 5\n",
|
||
" if curruency == \"HKD\":\n",
|
||
" rate = 1\n",
|
||
" return rate"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "99fd242f",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"aurl = \"https://cp.maso.hk/index.php?main=biphp&act=package_fund&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&package=991517855\"\n",
|
||
"burl = \"https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=expend_settle_detail&id=3277\"\n",
|
||
"curl = \"https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=bol_settle_detail&id=2324\"\n",
|
||
"durl = \"https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=express_settle_detail&id=7787\""
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "5e6e24f4",
|
||
"metadata": {},
|
||
"source": [
|
||
"处理尾端结算数据"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "705c5336",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from utils.gtools import MySQLconnect\n",
|
||
"with MySQLconnect(\"workflow\") as db:\n",
|
||
" engine = db.engine()\n",
|
||
" sql_list= pd.read_sql(\"SELECT 物流结算号 FROM `flow_check_tail`\", engine)\n",
|
||
" # 转成列表\n",
|
||
" D_LIST = sql_list['物流结算号'].dropna().astype(int).tolist()\n",
|
||
"express_df = pd.DataFrame()\n",
|
||
"for d in D_LIST:\n",
|
||
" url = \"https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=express_settle_detail&id=%s\"% d\n",
|
||
" p_get = requests.get(url)\n",
|
||
" data = p_get.json()\n",
|
||
" temp_df = pd.DataFrame(data[\"data\"])\n",
|
||
" temp_df[\"结算号\"]=d\n",
|
||
" temp_df[\"track_number\"] = temp_df[\"track_number\"].astype(str) \n",
|
||
" express_df = pd.concat([express_df,temp_df],axis=0)\n",
|
||
"\n",
|
||
"from utils.gtools import MySQLconnect\n",
|
||
"import json\n",
|
||
"express_df['账单运费'] = pd.to_numeric(express_df['bill_express_fee'].str.replace(r'[A-Z]+', '', regex=True),errors='coerce')\n",
|
||
"express_df['币种'] = express_df['bill_express_fee'].str.replace(r'[0-9.]+', '', regex=True)\n",
|
||
"express_df['rmb'] = express_df.apply(lambda row: round(row['账单运费']*rate_to_rmb(row['币种'])[0],2), axis=1)\n",
|
||
"\n",
|
||
"# 根据快递跟踪号找到包裹号\n",
|
||
"missing_odr_df = express_df[express_df['odr_express_id'].isna() | (express_df['odr_express_id']=='--')]\n",
|
||
"track_list = missing_odr_df['track_number'].dropna().unique().tolist()\n",
|
||
"if track_list:\n",
|
||
" with MySQLconnect('ods') as db:\n",
|
||
" conn = db.connect()\n",
|
||
" format_strings = ','.join(['%s'] * len(track_list))\n",
|
||
" sql = f\"SELECT 快递跟踪号 AS track_number,包裹号 AS odr_express_id FROM `order_express` WHERE 快递跟踪号 IN ({format_strings})\"\n",
|
||
" df_mapping = pd.read_sql(sql, conn, params=track_list)\n",
|
||
" mapping_dict = dict(zip(df_mapping['track_number'], df_mapping['odr_express_id']))\n",
|
||
" def fill_odr(row):\n",
|
||
" if pd.isna(row['odr_express_id']) or row['odr_express_id'] == '--':\n",
|
||
" return mapping_dict.get(row['track_number'], row['odr_express_id'])\n",
|
||
" else:\n",
|
||
" return row['odr_express_id']\n",
|
||
" express_df['odr_express_id'] = express_df.apply(fill_odr, axis=1)\n",
|
||
"\n",
|
||
"workflow = MySQLconnect('workflow')\n",
|
||
"def safe_json(x):\n",
|
||
" if isinstance(x, (dict, list, set)):\n",
|
||
" return json.dumps(x, ensure_ascii=False)\n",
|
||
" return x\n",
|
||
"express_df['bill_add_express_fee'] = express_df['bill_add_express_fee'].apply(safe_json)\n",
|
||
"express_df.to_sql('flow_check_express_wxx', workflow.engine(), if_exists='replace')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "af1fa88d",
|
||
"metadata": {},
|
||
"source": [
|
||
"处理头程结算数据"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "f30b25ad",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"with MySQLconnect(\"workflow\") as db:\n",
|
||
" engine = db.engine()\n",
|
||
" sql_list= pd.read_sql(\"SELECT 物流结算号 FROM `flow_check_head`\", engine)\n",
|
||
" # 转成列表\n",
|
||
" C_LIST = sql_list['物流结算号'].dropna().astype(int).tolist()\n",
|
||
"\n",
|
||
"bol_df = pd.DataFrame()\n",
|
||
"for c in C_LIST:\n",
|
||
" url = \"https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=bol_settle_detail&id=%s\"% c\n",
|
||
" p_get = requests.get(url)\n",
|
||
" # print(p_get.text\n",
|
||
" p_get_new=p_get.text.replace(\"\\t\", \"\")\n",
|
||
" data = json.loads(p_get_new)\n",
|
||
" temp_df = pd.DataFrame(data[\"data\"])\n",
|
||
" temp_df[\"结算号\"]=c\n",
|
||
" bol_df = pd.concat([bol_df,temp_df],axis=0)\n",
|
||
"bol_df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "9bb83abf",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#处理头程费用转rmb和体积单价\n",
|
||
"bol_df['price'] = bol_df['price'].astype(float)\n",
|
||
"bol_df['rmb'] = bol_df.apply(lambda row: round(row['price']*rate_to_rmb(row['currency'])[0],2), axis=1)\n",
|
||
"bol_list = bol_df['bol_code'].dropna().unique().tolist()\n",
|
||
"if bol_list:\n",
|
||
" with MySQLconnect('ods') as db:\n",
|
||
" conn = db.connect()\n",
|
||
" format_strings = ','.join(['%s'] * len(bol_list))\n",
|
||
" sql = f\"\"\"WITH t1 AS (\n",
|
||
" SELECT\n",
|
||
" `提单/柜号` AS bol_code,\n",
|
||
" `提单ID` AS bol_id ,\n",
|
||
" `体积cm3`,\n",
|
||
" ROW_NUMBER() OVER(PARTITION BY `提单/柜号` ORDER BY `离港时间` DESC) AS row_index\n",
|
||
" FROM\n",
|
||
" bol_list \n",
|
||
" WHERE\n",
|
||
" `提单/柜号` IN ({format_strings})\n",
|
||
" )\n",
|
||
" SELECT `bol_code`,`bol_id`,`体积cm3` FROM t1 WHERE row_index = 1\n",
|
||
" \"\"\"\n",
|
||
" df_mapping = pd.read_sql(sql, conn, params=bol_list)\n",
|
||
" # 合并\n",
|
||
" bol_df = pd.merge(bol_df, df_mapping, on=['bol_code'], how='left')\n",
|
||
"bol_df['体积cm3'] = bol_df['体积cm3'].astype(float)\n",
|
||
"bol_df['体积单价'] = bol_df['rmb']/bol_df['体积cm3'] \n",
|
||
"workflow = MySQLconnect('workflow')\n",
|
||
"bol_df.to_sql('flow_check_head_wxx', workflow.engine(), if_exists='replace') "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "4a78e03f",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#找bol_id下的所有包裹和体积,得到单包裹的头程价格\n",
|
||
"bol_id_list = bol_df['bol_id'].dropna().unique().tolist()\n",
|
||
"with MySQLconnect('ods') as db:\n",
|
||
" conn = db.connect()\n",
|
||
" format_strings = ','.join(['%s'] * len(bol_id_list))\n",
|
||
" sql = f\"\"\"SELECT\n",
|
||
" `包裹号`,\n",
|
||
" btp.`提单ID` AS bol_id,\n",
|
||
" length,\n",
|
||
" width,\n",
|
||
" hight,\n",
|
||
" weight,\n",
|
||
" length * width * hight AS 体积 \n",
|
||
" FROM\n",
|
||
" bol_to_package btp\n",
|
||
" LEFT JOIN package_vol_info pvi ON btp.包裹号 = pvi.package \n",
|
||
" WHERE btp.`提单ID` IN ({format_strings})\n",
|
||
" \"\"\"\n",
|
||
" bol_package = pd.read_sql(sql, conn, params=bol_id_list)\n",
|
||
"\n",
|
||
"# 根据bol_df计算每个bol_id的单价(多行求和),保存为一个map字典\n",
|
||
"bol_id_price = bol_df.groupby('bol_id')['体积单价'].sum().to_dict()\n",
|
||
"# bol_package新增一列,为体积*体积单价,体积单价根据bol_id从字典中获取\n",
|
||
"bol_package['头程价格'] = bol_package['体积'] * bol_package['bol_id'].map(bol_id_price)\n",
|
||
"bol_package = bol_package.drop_duplicates()\n",
|
||
"workflow = MySQLconnect('workflow')\n",
|
||
"bol_package.to_sql('flow_check_package_head_wxx', workflow.engine(), if_exists='replace') "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "431bcdff",
|
||
"metadata": {},
|
||
"source": [
|
||
"处理额外费用数据"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "7184abdf",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"with MySQLconnect(\"workflow\") as db:\n",
|
||
" engine = db.engine()\n",
|
||
" sql_list= pd.read_sql(\"SELECT 物流结算号 FROM `flow_check_expend`\", engine)\n",
|
||
" # 转成列表\n",
|
||
" B_LIST = sql_list['物流结算号'].dropna().astype(int).tolist()\n",
|
||
"\n",
|
||
"expend_df = pd.DataFrame()\n",
|
||
"for b in B_LIST:\n",
|
||
" url = \"https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=expend_settle_detail&id=%s \"% b\n",
|
||
" p_get = requests.get(url)\n",
|
||
" data = p_get.json()\n",
|
||
" temp_df = pd.DataFrame(data[\"data\"])\n",
|
||
" temp_df[\"结算号\"]=b\n",
|
||
" expend_df = pd.concat([expend_df,temp_df],axis=0)\n",
|
||
"\n",
|
||
"expend_df['账单运费'] = pd.to_numeric(expend_df['pay_price'].str.replace(r'[A-Z]+', '', regex=True),errors='coerce')\n",
|
||
"expend_df['币种'] = expend_df['pay_price'].str.replace(r'[0-9.]+', '', regex=True)\n",
|
||
"expend_df['rmb'] = expend_df.apply(lambda row: round(row['账单运费']*rate_to_rmb(row['币种'])[0],2), axis=1)\n",
|
||
"\n",
|
||
"workflow = MySQLconnect('workflow')\n",
|
||
"expend_df.to_sql('flow_check_expend_wxx', workflow.engine(), if_exists='replace') "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "361b5796",
|
||
"metadata": {},
|
||
"source": [
|
||
"三种费用合并,以包裹为单位保存"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "c6d66242",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"with MySQLconnect('workflow') as db:\n",
|
||
" conn = db.connect()\n",
|
||
" format_strings = ','.join(['%s'] * len(bol_id_list))\n",
|
||
" sql = \"\"\"SELECT DISTINCT\n",
|
||
" tail.odr_express_id,\n",
|
||
" head.`头程价格`,\n",
|
||
" tail.rmb AS 尾程费用,\n",
|
||
" expend.rmb AS 额外费用\n",
|
||
" FROM\n",
|
||
" flow_check_express_wxx tail \n",
|
||
" LEFT JOIN flow_check_package_head_wxx head ON tail.odr_express_id = head.`包裹号`\n",
|
||
" LEFT JOIN flow_check_expend_wxx expend ON tail.odr_express_id = expend.odr_express_id\n",
|
||
" \"\"\"\n",
|
||
" package_fee = pd.read_sql(sql, conn)\n",
|
||
" package_fee.to_sql('flow_check_package_fee_wxx', workflow.engine(), if_exists='replace') \n"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "base",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.5"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|