logistics/售价模型审核.ipynb

313 lines
11 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"1.查询5.1-5.28测量包裹尺寸的订单数据\n",
"2.以美国的售价计算它当前应该有的售价\n",
"3.判断"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from utils.gtools import MySQLconnect\n",
"\n",
"# 读取需要计算的包裹信息\n",
"with MySQLconnect('ads') as db:\n",
" sql = r\"\"\" \n",
" # 限制范围是测量时间取得SKU种类为1且数量为1的订单且重复SKU只取最近的订单\n",
"\n",
"WITH\n",
"t1 AS (\n",
"SELECT\n",
"order_id,\n",
"SKU,\n",
"order_date,\n",
"sum(CASE WHEN opl.order_product_id LIKE '%\\_%' ESCAPE '\\\\' \n",
" AND opl.order_product_id NOT LIKE '%\\_%\\_%' ESCAPE '\\\\' THEN product_num END) AS product_num,\n",
"DATE_FORMAT(order_date,\"%Y-%m-%d\") AS 订单时间,\n",
"count(opl.SKU) AS 产品种类\n",
"FROM\n",
"dws.fact_order_product_list opl\n",
"WHERE\n",
" NOT EXISTS (\n",
" SELECT 1 \n",
" FROM dws.log_order_reissue_detail AS r \n",
" WHERE r.order_product_id = opl.order_product_id\n",
" )\n",
"AND order_date >= \"20250501\"\n",
"AND order_date < \"20250612\"\n",
"AND SKU <> \"\"\n",
"GROUP BY order_id\n",
")\n",
",\n",
"t2 AS (\n",
"SELECT\t\t\t\n",
" a.`包裹测量时间`,\n",
"\t\t\t\t\t\tt1.order_id,\n",
"\t\t\t\t\t\tt1.SKU,\n",
"\t\t\t\t\t\tt1.order_date,\n",
" a.包裹号,\n",
" a.快递公司,\n",
" a.运输方式,\n",
"\t\t\t\t\t\ta.`目的国`,\n",
" d.postcode,\n",
" CONCAT(\n",
" '\"', b.package, '\": {',\n",
" '\"长\": ', length, ', ',\n",
" '\"宽\": ', width, ', ',\n",
" '\"高\": ', hight, ', ',\n",
" '\"重量\": ', weight, '}'\n",
" ) AS package_json\n",
" FROM\n",
"\t\t\t\tt1\n",
" LEFT JOIN order_express a ON t1.order_id = a.单号\n",
" JOIN package_vol_info b ON a.`包裹号` = b.package\n",
" JOIN order_list d ON a.`单号` = d.order_id \n",
" WHERE\n",
" a.`包裹状态` IN ( '客户签收', '已经投递') \n",
" AND b.hight > 0 \n",
" AND b.length > 0 \n",
" AND b.width > 0 \n",
" AND b.hight > 0 \n",
" AND b.weight > 0\n",
"-- AND a.`目的国` = \"United States\"\n",
"\t\t\t\t\t\tAND t1.product_num = 1\n",
"\t\t\t\t\t\tAND t1.产品种类=1\n",
"\t\t\t\t\t\tAND a.`包裹测量时间` >= '2025-05-01'\n",
"\t\t\t\t\t\tAND a.`包裹测量时间` < '2025-06-12'\n",
"),\n",
"t3 AS (\n",
"SELECT\n",
"t2.*,\n",
"sku.成本价 AS ERP采购价,\n",
"ess.erp_package_vol AS ERP包裹数据,\n",
"CONCAT('{', GROUP_CONCAT(package_json SEPARATOR ','), '}') AS 实际包裹数据,\n",
"ROW_NUMBER() OVER (PARTITION BY SKU ORDER BY 包裹测量时间 DESC) as rn\n",
"FROM\n",
"t2\n",
"LEFT JOIN dwd.dim_erp_sku_package_vol_info ess ON t2.SKU=ess.erp_sku\n",
"LEFT JOIN stg_bayshop_litfad_sku sku ON t2.SKU=sku.SKU\n",
"WHERE\n",
"ess.`erp_package_vol`<>\"{}\" AND ess.`erp_package_vol`<>\"\"\n",
"GROUP BY order_id\n",
")\n",
"SELECT\n",
"包裹测量时间,\n",
"order_id,\n",
"SKU,\n",
"DATE_FORMAT(order_date,\"%Y-%M-%D\") AS 订单时间,\n",
"包裹号,\n",
"`快递公司`,\n",
"`运输方式`,\n",
"`目的国`,\n",
"postcode,\n",
"ERP采购价,\n",
"ERP包裹数据,\n",
"实际包裹数据\n",
"FROM\n",
"t3\n",
"WHERE\n",
"rn=1\n",
"\n",
"\n",
"\n",
" \"\"\"\n",
" df=pd.read_sql(\"SELECT * FROM `order_complet4` WHERE buy_amount is not null and `实际尺寸售价` IS NULL limit 1000 \",db.con)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def call_sell_price(price, package_dict,head_type=\"海运\"):\n",
" import json\n",
" from sell.sell_price import call_sell_and_order_price\n",
" try:\n",
" package_dict = json.loads(package_dict)\n",
" all_sell_price, order_price, order_type = call_sell_and_order_price(price, package_dict,head_type)\n",
" except Exception as e:\n",
" print(f\" 报错: {e}\")\n",
" return (\"\",\"\",\"\")\n",
" if all_sell_price == 0:\n",
" return (\"\",\"\",\"\")\n",
" sell_price= all_sell_price[0]\n",
" # logis_price = all_sell_price[1]\n",
" return (sell_price, order_price, order_type)\n",
"# 计算当前售价\n",
"for index,row in df.iterrows():\n",
" price = row['buy_amount']\n",
" # package_dict = json.loads(row['erp_package_vol'])\n",
" sell_price = call_sell_price(price, row['package_json'],\"海运\")\n",
" print(sell_price)\n",
" df.loc[index,'网站售价'] = sell_price[0]\n",
" df.loc[index,'订单物流费'] = sell_price[1]\n",
" df.loc[index,'尾端类型'] = sell_price[2]\n",
" print(f\"SKU: {row['sku']} 网站售价: {sell_price[0]} 订单物流费: {sell_price[1]} 尾端类型: {sell_price[2]}\")\n",
"df.to_clipboard(index=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"新的|计算欧洲各国每种货型占比"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Admin\\AppData\\Local\\Temp\\ipykernel_35372\\42756626.py:31: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n",
" df = pd.read_sql(sql, db.con)\n"
]
}
],
"source": [
"import pandas as pd\n",
"from utils.gtools import MySQLconnect\n",
"sql=r\"\"\"SELECT\n",
"包裹号,\n",
"单号,\n",
"快递公司,\n",
"目的国,\n",
"快递分区,\n",
"投递时间,\n",
"postcode,\n",
"length,\n",
"width,\n",
"hight,\n",
"weight\n",
"FROM\n",
"order_express oe\n",
"LEFT JOIN package_vol_info pvi ON oe.`包裹号` = pvi.package\n",
"left join order_list ON oe.单号 = order_list.order_id\n",
"WHERE\n",
"`投递时间` >='2025-05-01'\n",
"AND `投递时间`<'2025-08-01'\n",
"AND `包裹状态` NOT REGEXP '已作废'\n",
"AND length >0\n",
"AND width >0\n",
"AND hight>0\n",
"AND weight>0\n",
"# AND 目的国 NOT REGEXP \"United States|Australia|United Kingdom|Japan|Canada\"\n",
"and 目的国 REGEXP \"Australia\"\n",
"# AND 目的国 <>''\"\"\"\n",
"with MySQLconnect('ods') as db:\n",
" df = pd.read_sql(sql, db.con)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(df.columns)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Admin\\AppData\\Local\\Temp\\ipykernel_35372\\4220111735.py:9: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
" type_series = df.groupby('单号').apply(order_type)\n",
"C:\\Users\\Admin\\AppData\\Local\\Temp\\ipykernel_35372\\4220111735.py:17: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
" weight_series = df.groupby('单号').apply(order_weight)\n"
]
}
],
"source": [
"def order_type(group):\n",
" # 判断组中是否有任何一个包裹满足“卡派”条件\n",
" if (group['length'] >= 200).any() or (group['weight'] >= 31500).any():\n",
" return '卡派'\n",
" else:\n",
" return '快递'\n",
"\n",
"# 计算每个单号的类型\n",
"type_series = df.groupby('单号').apply(order_type)\n",
"def order_weight(group):\n",
" # 计算单号的重量\n",
" # 计算6000系数的体积重\n",
" bill_weight= 0\n",
" for i in range(len(group)):\n",
" bill_weight += max(group.iloc[i]['length'] * group.iloc[i]['width'] * group.iloc[i]['hight'] / 4000, group.iloc[i]['weight']/1000)\n",
" return bill_weight\n",
"weight_series = df.groupby('单号').apply(order_weight)\n",
"# 将结果映射回原表\n",
"# df['类型'] = df['单号'].map(type_series)\n",
"df['计费重'] = df['单号'].map(weight_series)\n",
"df.to_clipboard(index= False)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"from logisticsClass.logisticsTail_AU import *\n",
"\n",
"for i, row in df.iterrows():\n",
" if \"POST\" in row['快递公司']:\n",
" gel = PostLogistics_AU()\n",
" zone = gel.is_remote(row['postcode'])\n",
" df.loc[i,'渠道'] = \"POST\"\n",
" elif \"TOLL\" in row['快递公司']:\n",
" gel = TollLogistics_AU()\n",
" zone = gel.is_remote(row['postcode'])\n",
" df.loc[i,'渠道'] = \"TOLL\"\n",
" elif \"ALL\" in row['快递公司']:\n",
" gel = AllLogistics_AU()\n",
" zone = gel.is_remote(row['postcode'])\n",
" df.loc[i,'渠道'] = \"ALL\"\n",
" else:\n",
" zone = \"其他渠道\"\n",
" df.loc[i,'分区'] = zone\n",
"\n",
"df.to_clipboard(index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}