313 lines
11 KiB
Plaintext
313 lines
11 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"1.查询5.1-5.28测量包裹尺寸的订单数据\n",
|
||
"2.以美国的售价计算它当前应该有的售价\n",
|
||
"3.判断"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"from utils.gtools import MySQLconnect\n",
|
||
"\n",
|
||
"# 读取需要计算的包裹信息\n",
|
||
"with MySQLconnect('ads') as db:\n",
|
||
" sql = r\"\"\" \n",
|
||
" # 限制范围是测量时间,取得SKU种类为1且数量为1的订单,且重复SKU只取最近的订单\n",
|
||
"\n",
|
||
"WITH\n",
|
||
"t1 AS (\n",
|
||
"SELECT\n",
|
||
"order_id,\n",
|
||
"SKU,\n",
|
||
"order_date,\n",
|
||
"sum(CASE WHEN opl.order_product_id LIKE '%\\_%' ESCAPE '\\\\' \n",
|
||
" AND opl.order_product_id NOT LIKE '%\\_%\\_%' ESCAPE '\\\\' THEN product_num END) AS product_num,\n",
|
||
"DATE_FORMAT(order_date,\"%Y-%m-%d\") AS 订单时间,\n",
|
||
"count(opl.SKU) AS 产品种类\n",
|
||
"FROM\n",
|
||
"dws.fact_order_product_list opl\n",
|
||
"WHERE\n",
|
||
" NOT EXISTS (\n",
|
||
" SELECT 1 \n",
|
||
" FROM dws.log_order_reissue_detail AS r \n",
|
||
" WHERE r.order_product_id = opl.order_product_id\n",
|
||
" )\n",
|
||
"AND order_date >= \"20250501\"\n",
|
||
"AND order_date < \"20250612\"\n",
|
||
"AND SKU <> \"\"\n",
|
||
"GROUP BY order_id\n",
|
||
")\n",
|
||
",\n",
|
||
"t2 AS (\n",
|
||
"SELECT\t\t\t\n",
|
||
" a.`包裹测量时间`,\n",
|
||
"\t\t\t\t\t\tt1.order_id,\n",
|
||
"\t\t\t\t\t\tt1.SKU,\n",
|
||
"\t\t\t\t\t\tt1.order_date,\n",
|
||
" a.包裹号,\n",
|
||
" a.快递公司,\n",
|
||
" a.运输方式,\n",
|
||
"\t\t\t\t\t\ta.`目的国`,\n",
|
||
" d.postcode,\n",
|
||
" CONCAT(\n",
|
||
" '\"', b.package, '\": {',\n",
|
||
" '\"长\": ', length, ', ',\n",
|
||
" '\"宽\": ', width, ', ',\n",
|
||
" '\"高\": ', hight, ', ',\n",
|
||
" '\"重量\": ', weight, '}'\n",
|
||
" ) AS package_json\n",
|
||
" FROM\n",
|
||
"\t\t\t\tt1\n",
|
||
" LEFT JOIN order_express a ON t1.order_id = a.单号\n",
|
||
" JOIN package_vol_info b ON a.`包裹号` = b.package\n",
|
||
" JOIN order_list d ON a.`单号` = d.order_id \n",
|
||
" WHERE\n",
|
||
" a.`包裹状态` IN ( '客户签收', '已经投递') \n",
|
||
" AND b.hight > 0 \n",
|
||
" AND b.length > 0 \n",
|
||
" AND b.width > 0 \n",
|
||
" AND b.hight > 0 \n",
|
||
" AND b.weight > 0\n",
|
||
"-- AND a.`目的国` = \"United States\"\n",
|
||
"\t\t\t\t\t\tAND t1.product_num = 1\n",
|
||
"\t\t\t\t\t\tAND t1.产品种类=1\n",
|
||
"\t\t\t\t\t\tAND a.`包裹测量时间` >= '2025-05-01'\n",
|
||
"\t\t\t\t\t\tAND a.`包裹测量时间` < '2025-06-12'\n",
|
||
"),\n",
|
||
"t3 AS (\n",
|
||
"SELECT\n",
|
||
"t2.*,\n",
|
||
"sku.成本价 AS ERP采购价,\n",
|
||
"ess.erp_package_vol AS ERP包裹数据,\n",
|
||
"CONCAT('{', GROUP_CONCAT(package_json SEPARATOR ','), '}') AS 实际包裹数据,\n",
|
||
"ROW_NUMBER() OVER (PARTITION BY SKU ORDER BY 包裹测量时间 DESC) as rn\n",
|
||
"FROM\n",
|
||
"t2\n",
|
||
"LEFT JOIN dwd.dim_erp_sku_package_vol_info ess ON t2.SKU=ess.erp_sku\n",
|
||
"LEFT JOIN stg_bayshop_litfad_sku sku ON t2.SKU=sku.SKU\n",
|
||
"WHERE\n",
|
||
"ess.`erp_package_vol`<>\"{}\" AND ess.`erp_package_vol`<>\"\"\n",
|
||
"GROUP BY order_id\n",
|
||
")\n",
|
||
"SELECT\n",
|
||
"包裹测量时间,\n",
|
||
"order_id,\n",
|
||
"SKU,\n",
|
||
"DATE_FORMAT(order_date,\"%Y-%M-%D\") AS 订单时间,\n",
|
||
"包裹号,\n",
|
||
"`快递公司`,\n",
|
||
"`运输方式`,\n",
|
||
"`目的国`,\n",
|
||
"postcode,\n",
|
||
"ERP采购价,\n",
|
||
"ERP包裹数据,\n",
|
||
"实际包裹数据\n",
|
||
"FROM\n",
|
||
"t3\n",
|
||
"WHERE\n",
|
||
"rn=1\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
" \"\"\"\n",
|
||
" df=pd.read_sql(\"SELECT * FROM `order_complet4` WHERE buy_amount is not null and `实际尺寸售价` IS NULL limit 1000 \",db.con)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def call_sell_price(price, package_dict,head_type=\"海运\"):\n",
|
||
" import json\n",
|
||
" from sell.sell_price import call_sell_and_order_price\n",
|
||
" try:\n",
|
||
" package_dict = json.loads(package_dict)\n",
|
||
" all_sell_price, order_price, order_type = call_sell_and_order_price(price, package_dict,head_type)\n",
|
||
" except Exception as e:\n",
|
||
" print(f\" 报错: {e}\")\n",
|
||
" return (\"\",\"\",\"\")\n",
|
||
" if all_sell_price == 0:\n",
|
||
" return (\"\",\"\",\"\")\n",
|
||
" sell_price= all_sell_price[0]\n",
|
||
" # logis_price = all_sell_price[1]\n",
|
||
" return (sell_price, order_price, order_type)\n",
|
||
"# 计算当前售价\n",
|
||
"for index,row in df.iterrows():\n",
|
||
" price = row['buy_amount']\n",
|
||
" # package_dict = json.loads(row['erp_package_vol'])\n",
|
||
" sell_price = call_sell_price(price, row['package_json'],\"海运\")\n",
|
||
" print(sell_price)\n",
|
||
" df.loc[index,'网站售价'] = sell_price[0]\n",
|
||
" df.loc[index,'订单物流费'] = sell_price[1]\n",
|
||
" df.loc[index,'尾端类型'] = sell_price[2]\n",
|
||
" print(f\"SKU: {row['sku']} 网站售价: {sell_price[0]} 订单物流费: {sell_price[1]} 尾端类型: {sell_price[2]}\")\n",
|
||
"df.to_clipboard(index=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"新的|计算欧洲各国每种货型占比"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"C:\\Users\\Admin\\AppData\\Local\\Temp\\ipykernel_35372\\42756626.py:31: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n",
|
||
" df = pd.read_sql(sql, db.con)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"from utils.gtools import MySQLconnect\n",
|
||
"sql=r\"\"\"SELECT\n",
|
||
"包裹号,\n",
|
||
"单号,\n",
|
||
"快递公司,\n",
|
||
"目的国,\n",
|
||
"快递分区,\n",
|
||
"投递时间,\n",
|
||
"postcode,\n",
|
||
"length,\n",
|
||
"width,\n",
|
||
"hight,\n",
|
||
"weight\n",
|
||
"FROM\n",
|
||
"order_express oe\n",
|
||
"LEFT JOIN package_vol_info pvi ON oe.`包裹号` = pvi.package\n",
|
||
"left join order_list ON oe.单号 = order_list.order_id\n",
|
||
"WHERE\n",
|
||
"`投递时间` >='2025-05-01'\n",
|
||
"AND `投递时间`<'2025-08-01'\n",
|
||
"AND `包裹状态` NOT REGEXP '已作废'\n",
|
||
"AND length >0\n",
|
||
"AND width >0\n",
|
||
"AND hight>0\n",
|
||
"AND weight>0\n",
|
||
"# AND 目的国 NOT REGEXP \"United States|Australia|United Kingdom|Japan|Canada\"\n",
|
||
"and 目的国 REGEXP \"Australia\"\n",
|
||
"# AND 目的国 <>''\"\"\"\n",
|
||
"with MySQLconnect('ods') as db:\n",
|
||
" df = pd.read_sql(sql, db.con)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(df.columns)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"C:\\Users\\Admin\\AppData\\Local\\Temp\\ipykernel_35372\\4220111735.py:9: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
|
||
" type_series = df.groupby('单号').apply(order_type)\n",
|
||
"C:\\Users\\Admin\\AppData\\Local\\Temp\\ipykernel_35372\\4220111735.py:17: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
|
||
" weight_series = df.groupby('单号').apply(order_weight)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"def order_type(group):\n",
|
||
" # 判断组中是否有任何一个包裹满足“卡派”条件\n",
|
||
" if (group['length'] >= 200).any() or (group['weight'] >= 31500).any():\n",
|
||
" return '卡派'\n",
|
||
" else:\n",
|
||
" return '快递'\n",
|
||
"\n",
|
||
"# 计算每个单号的类型\n",
|
||
"type_series = df.groupby('单号').apply(order_type)\n",
|
||
"def order_weight(group):\n",
|
||
" # 计算单号的重量\n",
|
||
" # 计算6000系数的体积重\n",
|
||
" bill_weight= 0\n",
|
||
" for i in range(len(group)):\n",
|
||
" bill_weight += max(group.iloc[i]['length'] * group.iloc[i]['width'] * group.iloc[i]['hight'] / 4000, group.iloc[i]['weight']/1000)\n",
|
||
" return bill_weight\n",
|
||
"weight_series = df.groupby('单号').apply(order_weight)\n",
|
||
"# 将结果映射回原表\n",
|
||
"# df['类型'] = df['单号'].map(type_series)\n",
|
||
"df['计费重'] = df['单号'].map(weight_series)\n",
|
||
"df.to_clipboard(index= False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from logisticsClass.logisticsTail_AU import *\n",
|
||
"\n",
|
||
"for i, row in df.iterrows():\n",
|
||
" if \"POST\" in row['快递公司']:\n",
|
||
" gel = PostLogistics_AU()\n",
|
||
" zone = gel.is_remote(row['postcode'])\n",
|
||
" df.loc[i,'渠道'] = \"POST\"\n",
|
||
" elif \"TOLL\" in row['快递公司']:\n",
|
||
" gel = TollLogistics_AU()\n",
|
||
" zone = gel.is_remote(row['postcode'])\n",
|
||
" df.loc[i,'渠道'] = \"TOLL\"\n",
|
||
" elif \"ALL\" in row['快递公司']:\n",
|
||
" gel = AllLogistics_AU()\n",
|
||
" zone = gel.is_remote(row['postcode'])\n",
|
||
" df.loc[i,'渠道'] = \"ALL\"\n",
|
||
" else:\n",
|
||
" zone = \"其他渠道\"\n",
|
||
" df.loc[i,'分区'] = zone\n",
|
||
"\n",
|
||
"df.to_clipboard(index=False)"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "base",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.5"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|