{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "d40f14fe", "metadata": {}, "outputs": [], "source": [ "from login_for_cookie import Vc\n", "import requests\n", "import pandas as pd\n", "import re \n", "cookie = Vc()\n", "header = {\"cookie\":cookie}" ] }, { "cell_type": "code", "execution_count": null, "id": "fa7b4aa7", "metadata": {}, "outputs": [], "source": [ "# 固定汇率\n", "def rate_to_rmb(curruency):\n", " if curruency == \"CNY\":\n", " rate = 1\n", " if curruency == \"USD\":\n", " rate = 7\n", " if curruency == \"EUR\":\n", " rate = 8 \n", " if curruency == \"GBP\":\n", " rate = 9\n", " if curruency == \"JPY\":\n", " rate = 0.05\n", " if curruency == \"AUD\":\n", " rate = 5\n", " if curruency == \"CAD\":\n", " rate = 5\n", " if curruency == \"HKD\":\n", " rate = 1\n", " return rate" ] }, { "cell_type": "code", "execution_count": null, "id": "99fd242f", "metadata": {}, "outputs": [], "source": [ "aurl = \"https://cp.maso.hk/index.php?main=biphp&act=package_fund&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&package=991517855\"\n", "burl = \"https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=expend_settle_detail&id=3277\"\n", "curl = \"https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=bol_settle_detail&id=2324\"\n", "durl = \"https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=express_settle_detail&id=7787\"" ] }, { "cell_type": "markdown", "id": "5e6e24f4", "metadata": {}, "source": [ "处理尾端结算数据" ] }, { "cell_type": "code", "execution_count": null, "id": "705c5336", "metadata": {}, "outputs": [], "source": [ "from utils.gtools import MySQLconnect\n", "with MySQLconnect(\"workflow\") as db:\n", " engine = db.engine()\n", " sql_list= pd.read_sql(\"SELECT 物流结算号 FROM `flow_check_tail`\", engine)\n", " # 转成列表\n", " D_LIST = sql_list['物流结算号'].dropna().astype(int).tolist()\n", "express_df = pd.DataFrame()\n", "for d in D_LIST:\n", " url = \"https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=express_settle_detail&id=%s\"% d\n", " p_get = requests.get(url)\n", " data = p_get.json()\n", " temp_df = pd.DataFrame(data[\"data\"])\n", " temp_df[\"结算号\"]=d\n", " temp_df[\"track_number\"] = temp_df[\"track_number\"].astype(str) \n", " express_df = pd.concat([express_df,temp_df],axis=0)\n", "\n", "from utils.gtools import MySQLconnect\n", "import json\n", "express_df['账单运费'] = pd.to_numeric(express_df['bill_express_fee'].str.replace(r'[A-Z]+', '', regex=True),errors='coerce')\n", "express_df['币种'] = express_df['bill_express_fee'].str.replace(r'[0-9.]+', '', regex=True)\n", "express_df['rmb'] = express_df.apply(lambda row: round(row['账单运费']*rate_to_rmb(row['币种'])[0],2), axis=1)\n", "\n", "# 根据快递跟踪号找到包裹号\n", "missing_odr_df = express_df[express_df['odr_express_id'].isna() | (express_df['odr_express_id']=='--')]\n", "track_list = missing_odr_df['track_number'].dropna().unique().tolist()\n", "if track_list:\n", " with MySQLconnect('ods') as db:\n", " conn = db.connect()\n", " format_strings = ','.join(['%s'] * len(track_list))\n", " sql = f\"SELECT 快递跟踪号 AS track_number,包裹号 AS odr_express_id FROM `order_express` WHERE 快递跟踪号 IN ({format_strings})\"\n", " df_mapping = pd.read_sql(sql, conn, params=track_list)\n", " mapping_dict = dict(zip(df_mapping['track_number'], df_mapping['odr_express_id']))\n", " def fill_odr(row):\n", " if pd.isna(row['odr_express_id']) or row['odr_express_id'] == '--':\n", " return mapping_dict.get(row['track_number'], row['odr_express_id'])\n", " else:\n", " return row['odr_express_id']\n", " express_df['odr_express_id'] = express_df.apply(fill_odr, axis=1)\n", "\n", "workflow = MySQLconnect('workflow')\n", "def safe_json(x):\n", " if isinstance(x, (dict, list, set)):\n", " return json.dumps(x, ensure_ascii=False)\n", " return x\n", "express_df['bill_add_express_fee'] = express_df['bill_add_express_fee'].apply(safe_json)\n", "express_df.to_sql('flow_check_express_wxx', workflow.engine(), if_exists='replace')" ] }, { "cell_type": "markdown", "id": "af1fa88d", "metadata": {}, "source": [ "处理头程结算数据" ] }, { "cell_type": "code", "execution_count": null, "id": "f30b25ad", "metadata": {}, "outputs": [], "source": [ "with MySQLconnect(\"workflow\") as db:\n", " engine = db.engine()\n", " sql_list= pd.read_sql(\"SELECT 物流结算号 FROM `flow_check_head`\", engine)\n", " # 转成列表\n", " C_LIST = sql_list['物流结算号'].dropna().astype(int).tolist()\n", "\n", "bol_df = pd.DataFrame()\n", "for c in C_LIST:\n", " url = \"https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=bol_settle_detail&id=%s\"% c\n", " p_get = requests.get(url)\n", " # print(p_get.text\n", " p_get_new=p_get.text.replace(\"\\t\", \"\")\n", " data = json.loads(p_get_new)\n", " temp_df = pd.DataFrame(data[\"data\"])\n", " temp_df[\"结算号\"]=c\n", " bol_df = pd.concat([bol_df,temp_df],axis=0)\n", "bol_df" ] }, { "cell_type": "code", "execution_count": null, "id": "9bb83abf", "metadata": {}, "outputs": [], "source": [ "#处理头程费用转rmb和体积单价\n", "bol_df['price'] = bol_df['price'].astype(float)\n", "bol_df['rmb'] = bol_df.apply(lambda row: round(row['price']*rate_to_rmb(row['currency'])[0],2), axis=1)\n", "bol_list = bol_df['bol_code'].dropna().unique().tolist()\n", "if bol_list:\n", " with MySQLconnect('ods') as db:\n", " conn = db.connect()\n", " format_strings = ','.join(['%s'] * len(bol_list))\n", " sql = f\"\"\"WITH t1 AS (\n", " SELECT\n", " `提单/柜号` AS bol_code,\n", " `提单ID` AS bol_id ,\n", " `体积cm3`,\n", " ROW_NUMBER() OVER(PARTITION BY `提单/柜号` ORDER BY `离港时间` DESC) AS row_index\n", " FROM\n", " bol_list \n", " WHERE\n", " `提单/柜号` IN ({format_strings})\n", " )\n", " SELECT `bol_code`,`bol_id`,`体积cm3` FROM t1 WHERE row_index = 1\n", " \"\"\"\n", " df_mapping = pd.read_sql(sql, conn, params=bol_list)\n", " # 合并\n", " bol_df = pd.merge(bol_df, df_mapping, on=['bol_code'], how='left')\n", "bol_df['体积cm3'] = bol_df['体积cm3'].astype(float)\n", "bol_df['体积单价'] = bol_df['rmb']/bol_df['体积cm3'] \n", "workflow = MySQLconnect('workflow')\n", "bol_df.to_sql('flow_check_head_wxx', workflow.engine(), if_exists='replace') " ] }, { "cell_type": "code", "execution_count": null, "id": "4a78e03f", "metadata": {}, "outputs": [], "source": [ "#找bol_id下的所有包裹和体积,得到单包裹的头程价格\n", "bol_id_list = bol_df['bol_id'].dropna().unique().tolist()\n", "with MySQLconnect('ods') as db:\n", " conn = db.connect()\n", " format_strings = ','.join(['%s'] * len(bol_id_list))\n", " sql = f\"\"\"SELECT\n", " `包裹号`,\n", " btp.`提单ID` AS bol_id,\n", " length,\n", " width,\n", " hight,\n", " weight,\n", " length * width * hight AS 体积 \n", " FROM\n", " bol_to_package btp\n", " LEFT JOIN package_vol_info pvi ON btp.包裹号 = pvi.package \n", " WHERE btp.`提单ID` IN ({format_strings})\n", " \"\"\"\n", " bol_package = pd.read_sql(sql, conn, params=bol_id_list)\n", "\n", "# 根据bol_df计算每个bol_id的单价(多行求和),保存为一个map字典\n", "bol_id_price = bol_df.groupby('bol_id')['体积单价'].sum().to_dict()\n", "# bol_package新增一列,为体积*体积单价,体积单价根据bol_id从字典中获取\n", "bol_package['头程价格'] = bol_package['体积'] * bol_package['bol_id'].map(bol_id_price)\n", "bol_package = bol_package.drop_duplicates()\n", "workflow = MySQLconnect('workflow')\n", "bol_package.to_sql('flow_check_package_head_wxx', workflow.engine(), if_exists='replace') " ] }, { "cell_type": "markdown", "id": "431bcdff", "metadata": {}, "source": [ "处理额外费用数据" ] }, { "cell_type": "code", "execution_count": null, "id": "7184abdf", "metadata": {}, "outputs": [], "source": [ "with MySQLconnect(\"workflow\") as db:\n", " engine = db.engine()\n", " sql_list= pd.read_sql(\"SELECT 物流结算号 FROM `flow_check_expend`\", engine)\n", " # 转成列表\n", " B_LIST = sql_list['物流结算号'].dropna().astype(int).tolist()\n", "\n", "expend_df = pd.DataFrame()\n", "for b in B_LIST:\n", " url = \"https://cp.maso.hk/index.php?main=biphp&key=W6BOYJ7BH27YCGRFCA0LWBVKMU1KRU5Q&act=expend_settle_detail&id=%s \"% b\n", " p_get = requests.get(url)\n", " data = p_get.json()\n", " temp_df = pd.DataFrame(data[\"data\"])\n", " temp_df[\"结算号\"]=b\n", " expend_df = pd.concat([expend_df,temp_df],axis=0)\n", "\n", "expend_df['账单运费'] = pd.to_numeric(expend_df['pay_price'].str.replace(r'[A-Z]+', '', regex=True),errors='coerce')\n", "expend_df['币种'] = expend_df['pay_price'].str.replace(r'[0-9.]+', '', regex=True)\n", "expend_df['rmb'] = expend_df.apply(lambda row: round(row['账单运费']*rate_to_rmb(row['币种'])[0],2), axis=1)\n", "\n", "workflow = MySQLconnect('workflow')\n", "expend_df.to_sql('flow_check_expend_wxx', workflow.engine(), if_exists='replace') " ] }, { "cell_type": "markdown", "id": "361b5796", "metadata": {}, "source": [ "三种费用合并,以包裹为单位保存" ] }, { "cell_type": "code", "execution_count": null, "id": "c6d66242", "metadata": {}, "outputs": [], "source": [ "with MySQLconnect('workflow') as db:\n", " conn = db.connect()\n", " format_strings = ','.join(['%s'] * len(bol_id_list))\n", " sql = \"\"\"SELECT DISTINCT\n", " tail.odr_express_id,\n", " head.`头程价格`,\n", " tail.rmb AS 尾程费用,\n", " expend.rmb AS 额外费用\n", " FROM\n", " flow_check_express_wxx tail \n", " LEFT JOIN flow_check_package_head_wxx head ON tail.odr_express_id = head.`包裹号`\n", " LEFT JOIN flow_check_expend_wxx expend ON tail.odr_express_id = expend.odr_express_id\n", " \"\"\"\n", " package_fee = pd.read_sql(sql, conn)\n", " package_fee.to_sql('flow_check_package_fee_wxx', workflow.engine(), if_exists='replace') \n" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 }