logistics/CHECK_PROFIT.ipynb

322 lines
12 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "d40f14fe",
"metadata": {},
"outputs": [],
"source": [
"from login_for_cookie import Vc\n",
"import requests\n",
"import pandas as pd\n",
"import re \n",
"cookie = Vc()\n",
"header = {\"cookie\":cookie}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fa7b4aa7",
"metadata": {},
"outputs": [],
"source": [
"# 固定汇率\n",
"def rate_to_rmb(curruency):\n",
" if curruency == \"CNY\":\n",
" rate = 1\n",
" if curruency == \"USD\":\n",
" rate = 7\n",
" if curruency == \"EUR\":\n",
" rate = 8 \n",
" if curruency == \"GBP\":\n",
" rate = 9\n",
" if curruency == \"JPY\":\n",
" rate = 0.05\n",
" if curruency == \"AUD\":\n",
" rate = 5\n",
" if curruency == \"CAD\":\n",
" rate = 5\n",
" if curruency == \"HKD\":\n",
" rate = 1\n",
" return rate"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "99fd242f",
"metadata": {},
"outputs": [],
"source": [
"aurl = \"https://cp.maso.hk/index.php?main=biphp&act=package_fund&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&package=991517855\"\n",
"burl = \"https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=expend_settle_detail&id=3277\"\n",
"curl = \"https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=bol_settle_detail&id=2324\"\n",
"durl = \"https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=express_settle_detail&id=7787\""
]
},
{
"cell_type": "markdown",
"id": "5e6e24f4",
"metadata": {},
"source": [
"处理尾端结算数据"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "705c5336",
"metadata": {},
"outputs": [],
"source": [
"from utils.gtools import MySQLconnect\n",
"with MySQLconnect(\"workflow\") as db:\n",
" engine = db.engine()\n",
" sql_list= pd.read_sql(\"SELECT 物流结算号 FROM `flow_check_tail`\", engine)\n",
" # 转成列表\n",
" D_LIST = sql_list['物流结算号'].dropna().astype(int).tolist()\n",
"express_df = pd.DataFrame()\n",
"for d in D_LIST:\n",
" url = \"https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=express_settle_detail&id=%s\"% d\n",
" p_get = requests.get(url)\n",
" data = p_get.json()\n",
" temp_df = pd.DataFrame(data[\"data\"])\n",
" temp_df[\"结算号\"]=d\n",
" temp_df[\"track_number\"] = temp_df[\"track_number\"].astype(str) \n",
" express_df = pd.concat([express_df,temp_df],axis=0)\n",
"\n",
"from utils.gtools import MySQLconnect\n",
"import json\n",
"express_df['账单运费'] = pd.to_numeric(express_df['bill_express_fee'].str.replace(r'[A-Z]+', '', regex=True),errors='coerce')\n",
"express_df['币种'] = express_df['bill_express_fee'].str.replace(r'[0-9.]+', '', regex=True)\n",
"express_df['rmb'] = express_df.apply(lambda row: round(row['账单运费']*rate_to_rmb(row['币种'])[0],2), axis=1)\n",
"\n",
"# 根据快递跟踪号找到包裹号\n",
"missing_odr_df = express_df[express_df['odr_express_id'].isna() | (express_df['odr_express_id']=='--')]\n",
"track_list = missing_odr_df['track_number'].dropna().unique().tolist()\n",
"if track_list:\n",
" with MySQLconnect('ods') as db:\n",
" conn = db.connect()\n",
" format_strings = ','.join(['%s'] * len(track_list))\n",
" sql = f\"SELECT 快递跟踪号 AS track_number,包裹号 AS odr_express_id FROM `order_express` WHERE 快递跟踪号 IN ({format_strings})\"\n",
" df_mapping = pd.read_sql(sql, conn, params=track_list)\n",
" mapping_dict = dict(zip(df_mapping['track_number'], df_mapping['odr_express_id']))\n",
" def fill_odr(row):\n",
" if pd.isna(row['odr_express_id']) or row['odr_express_id'] == '--':\n",
" return mapping_dict.get(row['track_number'], row['odr_express_id'])\n",
" else:\n",
" return row['odr_express_id']\n",
" express_df['odr_express_id'] = express_df.apply(fill_odr, axis=1)\n",
"\n",
"workflow = MySQLconnect('workflow')\n",
"def safe_json(x):\n",
" if isinstance(x, (dict, list, set)):\n",
" return json.dumps(x, ensure_ascii=False)\n",
" return x\n",
"express_df['bill_add_express_fee'] = express_df['bill_add_express_fee'].apply(safe_json)\n",
"express_df.to_sql('flow_check_express_wxx', workflow.engine(), if_exists='replace')"
]
},
{
"cell_type": "markdown",
"id": "af1fa88d",
"metadata": {},
"source": [
"处理头程结算数据"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f30b25ad",
"metadata": {},
"outputs": [],
"source": [
"with MySQLconnect(\"workflow\") as db:\n",
" engine = db.engine()\n",
" sql_list= pd.read_sql(\"SELECT 物流结算号 FROM `flow_check_head`\", engine)\n",
" # 转成列表\n",
" C_LIST = sql_list['物流结算号'].dropna().astype(int).tolist()\n",
"\n",
"bol_df = pd.DataFrame()\n",
"for c in C_LIST:\n",
" url = \"https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=bol_settle_detail&id=%s\"% c\n",
" p_get = requests.get(url)\n",
" # print(p_get.text\n",
" p_get_new=p_get.text.replace(\"\\t\", \"\")\n",
" data = json.loads(p_get_new)\n",
" temp_df = pd.DataFrame(data[\"data\"])\n",
" temp_df[\"结算号\"]=c\n",
" bol_df = pd.concat([bol_df,temp_df],axis=0)\n",
"bol_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9bb83abf",
"metadata": {},
"outputs": [],
"source": [
"#处理头程费用转rmb和体积单价\n",
"bol_df['price'] = bol_df['price'].astype(float)\n",
"bol_df['rmb'] = bol_df.apply(lambda row: round(row['price']*rate_to_rmb(row['currency'])[0],2), axis=1)\n",
"bol_list = bol_df['bol_code'].dropna().unique().tolist()\n",
"if bol_list:\n",
" with MySQLconnect('ods') as db:\n",
" conn = db.connect()\n",
" format_strings = ','.join(['%s'] * len(bol_list))\n",
" sql = f\"\"\"WITH t1 AS (\n",
" SELECT\n",
" `提单/柜号` AS bol_code,\n",
" `提单ID` AS bol_id ,\n",
" `体积cm3`,\n",
" ROW_NUMBER() OVER(PARTITION BY `提单/柜号` ORDER BY `离港时间` DESC) AS row_index\n",
" FROM\n",
" bol_list \n",
" WHERE\n",
" `提单/柜号` IN ({format_strings})\n",
" )\n",
" SELECT `bol_code`,`bol_id`,`体积cm3` FROM t1 WHERE row_index = 1\n",
" \"\"\"\n",
" df_mapping = pd.read_sql(sql, conn, params=bol_list)\n",
" # 合并\n",
" bol_df = pd.merge(bol_df, df_mapping, on=['bol_code'], how='left')\n",
"bol_df['体积cm3'] = bol_df['体积cm3'].astype(float)\n",
"bol_df['体积单价'] = bol_df['rmb']/bol_df['体积cm3'] \n",
"workflow = MySQLconnect('workflow')\n",
"bol_df.to_sql('flow_check_head_wxx', workflow.engine(), if_exists='replace') "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4a78e03f",
"metadata": {},
"outputs": [],
"source": [
"#找bol_id下的所有包裹和体积得到单包裹的头程价格\n",
"bol_id_list = bol_df['bol_id'].dropna().unique().tolist()\n",
"with MySQLconnect('ods') as db:\n",
" conn = db.connect()\n",
" format_strings = ','.join(['%s'] * len(bol_id_list))\n",
" sql = f\"\"\"SELECT\n",
" `包裹号`,\n",
" btp.`提单ID` AS bol_id,\n",
" length,\n",
" width,\n",
" hight,\n",
" weight,\n",
" length * width * hight AS 体积 \n",
" FROM\n",
" bol_to_package btp\n",
" LEFT JOIN package_vol_info pvi ON btp.包裹号 = pvi.package \n",
" WHERE btp.`提单ID` IN ({format_strings})\n",
" \"\"\"\n",
" bol_package = pd.read_sql(sql, conn, params=bol_id_list)\n",
"\n",
"# 根据bol_df计算每个bol_id的单价多行求和,保存为一个map字典\n",
"bol_id_price = bol_df.groupby('bol_id')['体积单价'].sum().to_dict()\n",
"# bol_package新增一列为体积*体积单价,体积单价根据bol_id从字典中获取\n",
"bol_package['头程价格'] = bol_package['体积'] * bol_package['bol_id'].map(bol_id_price)\n",
"bol_package = bol_package.drop_duplicates()\n",
"workflow = MySQLconnect('workflow')\n",
"bol_package.to_sql('flow_check_package_head_wxx', workflow.engine(), if_exists='replace') "
]
},
{
"cell_type": "markdown",
"id": "431bcdff",
"metadata": {},
"source": [
"处理额外费用数据"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7184abdf",
"metadata": {},
"outputs": [],
"source": [
"with MySQLconnect(\"workflow\") as db:\n",
" engine = db.engine()\n",
" sql_list= pd.read_sql(\"SELECT 物流结算号 FROM `flow_check_expend`\", engine)\n",
" # 转成列表\n",
" B_LIST = sql_list['物流结算号'].dropna().astype(int).tolist()\n",
"\n",
"expend_df = pd.DataFrame()\n",
"for b in B_LIST:\n",
" url = \"https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=expend_settle_detail&id=%s \"% b\n",
" p_get = requests.get(url)\n",
" data = p_get.json()\n",
" temp_df = pd.DataFrame(data[\"data\"])\n",
" temp_df[\"结算号\"]=b\n",
" expend_df = pd.concat([expend_df,temp_df],axis=0)\n",
"\n",
"expend_df['账单运费'] = pd.to_numeric(expend_df['pay_price'].str.replace(r'[A-Z]+', '', regex=True),errors='coerce')\n",
"expend_df['币种'] = expend_df['pay_price'].str.replace(r'[0-9.]+', '', regex=True)\n",
"expend_df['rmb'] = expend_df.apply(lambda row: round(row['账单运费']*rate_to_rmb(row['币种'])[0],2), axis=1)\n",
"\n",
"workflow = MySQLconnect('workflow')\n",
"expend_df.to_sql('flow_check_expend_wxx', workflow.engine(), if_exists='replace') "
]
},
{
"cell_type": "markdown",
"id": "361b5796",
"metadata": {},
"source": [
"三种费用合并,以包裹为单位保存"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c6d66242",
"metadata": {},
"outputs": [],
"source": [
"with MySQLconnect('workflow') as db:\n",
" conn = db.connect()\n",
" format_strings = ','.join(['%s'] * len(bol_id_list))\n",
" sql = \"\"\"SELECT DISTINCT\n",
" tail.odr_express_id,\n",
" head.`头程价格`,\n",
" tail.rmb AS 尾程费用,\n",
" expend.rmb AS 额外费用\n",
" FROM\n",
" flow_check_express_wxx tail \n",
" LEFT JOIN flow_check_package_head_wxx head ON tail.odr_express_id = head.`包裹号`\n",
" LEFT JOIN flow_check_expend_wxx expend ON tail.odr_express_id = expend.odr_express_id\n",
" \"\"\"\n",
" package_fee = pd.read_sql(sql, conn)\n",
" package_fee.to_sql('flow_check_package_fee_wxx', workflow.engine(), if_exists='replace') \n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}