2025-06-17 13:40:20 +08:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"1.查询5.1-5.28测量包裹尺寸的订单数据\n",
"2.以美国的售价计算它当前应该有的售价\n",
"3.判断"
]
},
{
"cell_type": "code",
2025-11-26 14:34:04 +08:00
"execution_count": null,
2025-06-17 13:40:20 +08:00
"metadata": {},
2025-11-26 14:34:04 +08:00
"outputs": [],
2025-06-17 13:40:20 +08:00
"source": [
2025-06-26 00:50:14 +08:00
"import pandas as pd\n",
"from utils.gtools import MySQLconnect\n",
"\n",
"# 读取需要计算的包裹信息\n",
"with MySQLconnect('ads') as db:\n",
" sql = r\"\"\" \n",
" # 限制范围是测量时间, 取得SKU种类为1且数量为1的订单, 且重复SKU只取最近的订单\n",
"\n",
"WITH\n",
"t1 AS (\n",
"SELECT\n",
"order_id,\n",
"SKU,\n",
"order_date,\n",
"sum(CASE WHEN opl.order_product_id LIKE '%\\_%' ESCAPE '\\\\' \n",
" AND opl.order_product_id NOT LIKE '%\\_%\\_%' ESCAPE '\\\\' THEN product_num END) AS product_num,\n",
"DATE_FORMAT(order_date,\"%Y-%m-%d\") AS 订单时间,\n",
"count(opl.SKU) AS 产品种类\n",
"FROM\n",
"dws.fact_order_product_list opl\n",
"WHERE\n",
" NOT EXISTS (\n",
" SELECT 1 \n",
" FROM dws.log_order_reissue_detail AS r \n",
" WHERE r.order_product_id = opl.order_product_id\n",
" )\n",
"AND order_date >= \"20250501\"\n",
"AND order_date < \"20250612\"\n",
"AND SKU <> \"\"\n",
"GROUP BY order_id\n",
")\n",
",\n",
"t2 AS (\n",
"SELECT\t\t\t\n",
" a.`包裹测量时间`,\n",
"\t\t\t\t\t\tt1.order_id,\n",
"\t\t\t\t\t\tt1.SKU,\n",
"\t\t\t\t\t\tt1.order_date,\n",
" a.包裹号,\n",
" a.快递公司,\n",
" a.运输方式,\n",
"\t\t\t\t\t\ta.`目的国`,\n",
" d.postcode,\n",
" CONCAT(\n",
" '\"', b.package, '\": {',\n",
" '\"长\": ', length, ', ',\n",
" '\"宽\": ', width, ', ',\n",
" '\"高\": ', hight, ', ',\n",
" '\"重量\": ', weight, '}'\n",
" ) AS package_json\n",
" FROM\n",
"\t\t\t\tt1\n",
" LEFT JOIN order_express a ON t1.order_id = a.单号\n",
" JOIN package_vol_info b ON a.`包裹号` = b.package\n",
" JOIN order_list d ON a.`单号` = d.order_id \n",
" WHERE\n",
" a.`包裹状态` IN ( '客户签收', '已经投递') \n",
" AND b.hight > 0 \n",
" AND b.length > 0 \n",
" AND b.width > 0 \n",
" AND b.hight > 0 \n",
" AND b.weight > 0\n",
"-- AND a.`目的国` = \"United States\"\n",
"\t\t\t\t\t\tAND t1.product_num = 1\n",
"\t\t\t\t\t\tAND t1.产品种类=1\n",
"\t\t\t\t\t\tAND a.`包裹测量时间` >= '2025-05-01'\n",
"\t\t\t\t\t\tAND a.`包裹测量时间` < '2025-06-12'\n",
"),\n",
"t3 AS (\n",
"SELECT\n",
"t2.*,\n",
"sku.成本价 AS ERP采购价,\n",
"ess.erp_package_vol AS ERP包裹数据,\n",
"CONCAT('{', GROUP_CONCAT(package_json SEPARATOR ','), '}') AS 实际包裹数据,\n",
"ROW_NUMBER() OVER (PARTITION BY SKU ORDER BY 包裹测量时间 DESC) as rn\n",
"FROM\n",
"t2\n",
"LEFT JOIN dwd.dim_erp_sku_package_vol_info ess ON t2.SKU=ess.erp_sku\n",
"LEFT JOIN stg_bayshop_litfad_sku sku ON t2.SKU=sku.SKU\n",
"WHERE\n",
"ess.`erp_package_vol`<>\"{}\" AND ess.`erp_package_vol`<>\"\"\n",
"GROUP BY order_id\n",
")\n",
"SELECT\n",
"包裹测量时间,\n",
"order_id,\n",
"SKU,\n",
"DATE_FORMAT(order_date,\"%Y-%M-%D\") AS 订单时间,\n",
"包裹号,\n",
"`快递公司`,\n",
"`运输方式`,\n",
"`目的国`,\n",
"postcode,\n",
"ERP采购价,\n",
"ERP包裹数据,\n",
"实际包裹数据\n",
"FROM\n",
"t3\n",
"WHERE\n",
"rn=1\n",
"\n",
"\n",
"\n",
" \"\"\"\n",
2025-06-26 02:51:51 +08:00
" df=pd.read_sql(\"SELECT * FROM `order_complet4` WHERE buy_amount is not null and `实际尺寸售价` IS NULL limit 1000 \",db.con)\n"
2025-06-26 00:50:14 +08:00
]
},
{
"cell_type": "code",
2025-11-26 14:34:04 +08:00
"execution_count": null,
2025-06-26 00:50:14 +08:00
"metadata": {},
2025-11-26 14:34:04 +08:00
"outputs": [],
2025-06-26 00:50:14 +08:00
"source": [
"def call_sell_price(price, package_dict,head_type=\"海运\"):\n",
" import json\n",
" from sell.sell_price import call_sell_and_order_price\n",
" try:\n",
" package_dict = json.loads(package_dict)\n",
" all_sell_price, order_price, order_type = call_sell_and_order_price(price, package_dict,head_type)\n",
" except Exception as e:\n",
" print(f\" 报错: {e}\")\n",
" return (\"\",\"\",\"\")\n",
" if all_sell_price == 0:\n",
" return (\"\",\"\",\"\")\n",
" sell_price= all_sell_price[0]\n",
" # logis_price = all_sell_price[1]\n",
" return (sell_price, order_price, order_type)\n",
"# 计算当前售价\n",
"for index,row in df.iterrows():\n",
" price = row['buy_amount']\n",
" # package_dict = json.loads(row['erp_package_vol'])\n",
" sell_price = call_sell_price(price, row['package_json'],\"海运\")\n",
" print(sell_price)\n",
" df.loc[index,'网站售价'] = sell_price[0]\n",
" df.loc[index,'订单物流费'] = sell_price[1]\n",
" df.loc[index,'尾端类型'] = sell_price[2]\n",
" print(f\"SKU: {row['sku']} 网站售价: {sell_price[0]} 订单物流费: {sell_price[1]} 尾端类型: {sell_price[2]}\")\n",
"df.to_clipboard(index=False)"
2025-06-17 13:40:20 +08:00
]
2025-11-26 14:34:04 +08:00
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"新的|计算欧洲各国每种货型占比"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Admin\\AppData\\Local\\Temp\\ipykernel_35372\\42756626.py:31: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n",
" df = pd.read_sql(sql, db.con)\n"
]
}
],
"source": [
"import pandas as pd\n",
"from utils.gtools import MySQLconnect\n",
"sql=r\"\"\"SELECT\n",
"包裹号,\n",
"单号,\n",
"快递公司,\n",
"目的国,\n",
"快递分区,\n",
"投递时间,\n",
"postcode,\n",
"length,\n",
"width,\n",
"hight,\n",
"weight\n",
"FROM\n",
"order_express oe\n",
"LEFT JOIN package_vol_info pvi ON oe.`包裹号` = pvi.package\n",
"left join order_list ON oe.单号 = order_list.order_id\n",
"WHERE\n",
"`投递时间` >='2025-05-01'\n",
"AND `投递时间`<'2025-08-01'\n",
"AND `包裹状态` NOT REGEXP '已作废'\n",
"AND length >0\n",
"AND width >0\n",
"AND hight>0\n",
"AND weight>0\n",
"# AND 目的国 NOT REGEXP \"United States|Australia|United Kingdom|Japan|Canada\"\n",
"and 目的国 REGEXP \"Australia\"\n",
"# AND 目的国 <>''\"\"\"\n",
"with MySQLconnect('ods') as db:\n",
" df = pd.read_sql(sql, db.con)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(df.columns)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Admin\\AppData\\Local\\Temp\\ipykernel_35372\\4220111735.py:9: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
" type_series = df.groupby('单号').apply(order_type)\n",
"C:\\Users\\Admin\\AppData\\Local\\Temp\\ipykernel_35372\\4220111735.py:17: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
" weight_series = df.groupby('单号').apply(order_weight)\n"
]
}
],
"source": [
"def order_type(group):\n",
" # 判断组中是否有任何一个包裹满足“卡派”条件\n",
" if (group['length'] >= 200).any() or (group['weight'] >= 31500).any():\n",
" return '卡派'\n",
" else:\n",
" return '快递'\n",
"\n",
"# 计算每个单号的类型\n",
"type_series = df.groupby('单号').apply(order_type)\n",
"def order_weight(group):\n",
" # 计算单号的重量\n",
" # 计算6000系数的体积重\n",
" bill_weight= 0\n",
" for i in range(len(group)):\n",
" bill_weight += max(group.iloc[i]['length'] * group.iloc[i]['width'] * group.iloc[i]['hight'] / 4000, group.iloc[i]['weight']/1000)\n",
" return bill_weight\n",
"weight_series = df.groupby('单号').apply(order_weight)\n",
"# 将结果映射回原表\n",
"# df['类型'] = df['单号'].map(type_series)\n",
"df['计费重'] = df['单号'].map(weight_series)\n",
"df.to_clipboard(index= False)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"from logisticsClass.logisticsTail_AU import *\n",
"\n",
"for i, row in df.iterrows():\n",
" if \"POST\" in row['快递公司']:\n",
" gel = PostLogistics_AU()\n",
" zone = gel.is_remote(row['postcode'])\n",
" df.loc[i,'渠道'] = \"POST\"\n",
" elif \"TOLL\" in row['快递公司']:\n",
" gel = TollLogistics_AU()\n",
" zone = gel.is_remote(row['postcode'])\n",
" df.loc[i,'渠道'] = \"TOLL\"\n",
" elif \"ALL\" in row['快递公司']:\n",
" gel = AllLogistics_AU()\n",
" zone = gel.is_remote(row['postcode'])\n",
" df.loc[i,'渠道'] = \"ALL\"\n",
" else:\n",
" zone = \"其他渠道\"\n",
" df.loc[i,'分区'] = zone\n",
"\n",
"df.to_clipboard(index=False)"
]
2025-06-17 13:40:20 +08:00
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
2025-06-26 00:50:14 +08:00
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
2025-06-17 13:40:20 +08:00
"name": "python",
2025-06-26 00:50:14 +08:00
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2025-11-26 14:34:04 +08:00
"version": "3.11.5"
2025-06-17 13:40:20 +08:00
}
},
"nbformat": 4,
"nbformat_minor": 2
}