2025-06-17 13:40:20 +08:00
|
|
|
|
{
|
|
|
|
|
|
"cells": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2025-11-28 10:55:11 +08:00
|
|
|
|
"execution_count": null,
|
2025-06-17 13:40:20 +08:00
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"outputs": [],
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"import pandas as pd\n",
|
2025-06-18 12:08:06 +08:00
|
|
|
|
"from utils.gtools import MySQLconnect\n",
|
|
|
|
|
|
"categories = '94 - Office Desks'\n",
|
|
|
|
|
|
"with MySQLconnect('ods') as db:\n",
|
|
|
|
|
|
" engine = db.engine()\n",
|
|
|
|
|
|
" sql = f\"\"\"\n",
|
|
|
|
|
|
"WITH a AS (\n",
|
|
|
|
|
|
"\tSELECT\n",
|
|
|
|
|
|
"\t\tt1.SPU,\n",
|
|
|
|
|
|
"\t\tt2.SKU,\n",
|
|
|
|
|
|
"\t\tt1.产品分类,\n",
|
|
|
|
|
|
"\t\tt1.添加时间,\n",
|
|
|
|
|
|
"\t\torder_date,\n",
|
|
|
|
|
|
"\t\tt2.成本价,\n",
|
|
|
|
|
|
"\t\topl.product_price_dollar,\n",
|
|
|
|
|
|
"\t\topl.product_num,\n",
|
|
|
|
|
|
"\t\tTIMESTAMPDIFF( MONTH, t1.添加时间, order_date ) AS month_diff \n",
|
|
|
|
|
|
"\tFROM\n",
|
|
|
|
|
|
"\t\tods.stg_bayshop_litfad_spu t1\n",
|
|
|
|
|
|
"\t\tLEFT JOIN ods.stg_bayshop_litfad_sku t2 ON t2.产品PID = t1.产品PID\n",
|
|
|
|
|
|
"\t\tLEFT JOIN dws.order_product_list opl ON t2.SKU = opl.SKU \n",
|
|
|
|
|
|
"\tWHERE\n",
|
|
|
|
|
|
"\t\tt1.添加时间 BETWEEN '2023-01-01' \n",
|
2025-11-28 10:55:11 +08:00
|
|
|
|
"\t\tAND '2024-12-31 23:59:59' \n",
|
2025-06-18 12:08:06 +08:00
|
|
|
|
"\t\tAND 产品分类 = '{categories}'\n",
|
|
|
|
|
|
"\t\tAND t2.SKU IS NOT NULL \n",
|
|
|
|
|
|
"\t),\n",
|
|
|
|
|
|
"\tb AS (\n",
|
|
|
|
|
|
"\tSELECT\n",
|
|
|
|
|
|
"\t\tSPU,\n",
|
|
|
|
|
|
"\t\tSKU,添加时间,产品分类,成本价,\n",
|
|
|
|
|
|
"\t\tb.erp_package_vol,\n",
|
|
|
|
|
|
"\t\torder_date,\n",
|
|
|
|
|
|
"\tIF\n",
|
|
|
|
|
|
"\t\t( month_diff >= 6, NULL, month_diff ) AS month_diff,\n",
|
|
|
|
|
|
"\t\tROW_NUMBER() over ( PARTITION BY SKU ORDER BY order_date DESC ) AS ranking \n",
|
|
|
|
|
|
"\tFROM\n",
|
|
|
|
|
|
"\t\ta\n",
|
|
|
|
|
|
"\t\tLEFT JOIN dwd.dim_erp_sku_package_vol_info b ON a.SKU = b.erp_sku \n",
|
|
|
|
|
|
"\t) SELECT\n",
|
|
|
|
|
|
"\tSPU,\n",
|
|
|
|
|
|
"\tSKU,添加时间,产品分类,成本价,\n",
|
|
|
|
|
|
"\tb.erp_package_vol \n",
|
|
|
|
|
|
"FROM\n",
|
|
|
|
|
|
"\tb \n",
|
|
|
|
|
|
"WHERE\n",
|
|
|
|
|
|
"\tranking = 1 \n",
|
|
|
|
|
|
"\tAND month_diff IS NULL\n",
|
|
|
|
|
|
"\"\"\"\n",
|
|
|
|
|
|
" df = pd.read_sql(sql, engine)"
|
2025-06-17 13:40:20 +08:00
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"得到每个SKU的最长边,围长,总重量,6000抛重,采购体积比(采购/6000抛重)"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"outputs": [],
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"import json\n",
|
|
|
|
|
|
"import re\n",
|
2025-11-28 10:55:11 +08:00
|
|
|
|
"\n",
|
|
|
|
|
|
"import numpy as np\n",
|
|
|
|
|
|
"\n",
|
2025-06-18 12:08:06 +08:00
|
|
|
|
"from sell.sell_price import call_sell_and_order_price\n",
|
2025-06-17 13:40:20 +08:00
|
|
|
|
"def extract_number(value):\n",
|
|
|
|
|
|
" # 提取字符串中的第一个数字\n",
|
|
|
|
|
|
" match = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", str(value))\n",
|
|
|
|
|
|
" return float(match.group()) if match else 0.0\n",
|
|
|
|
|
|
"for index, row in df.iterrows():\n",
|
2025-06-18 12:08:06 +08:00
|
|
|
|
" package_dict = json.loads(row['erp_package_vol'])\n",
|
|
|
|
|
|
" max_length = 0 # 最长边\n",
|
|
|
|
|
|
" max_girth = 0 # 最大围长\n",
|
|
|
|
|
|
" all_weight = 0 # 总重量\n",
|
|
|
|
|
|
" all_vol_weight = 0 # 总抛重\n",
|
|
|
|
|
|
" for key, package in package_dict.items():\n",
|
2025-06-17 13:40:20 +08:00
|
|
|
|
" package['长'] = extract_number(package['长'])\n",
|
|
|
|
|
|
" package['宽'] = extract_number(package['宽'])\n",
|
|
|
|
|
|
" package['高'] = extract_number(package['高'])\n",
|
|
|
|
|
|
" package['重量'] = extract_number(package['重量'])\n",
|
2025-06-18 12:08:06 +08:00
|
|
|
|
" size =sorted([package['长'],package['宽'],package['高']])\n",
|
2025-11-28 10:55:11 +08:00
|
|
|
|
" fst_size = size[2]\n",
|
2025-06-18 12:08:06 +08:00
|
|
|
|
" snd_size = size[1]\n",
|
2025-11-28 10:55:11 +08:00
|
|
|
|
" thd_size = size[0]\n",
|
2025-06-18 12:08:06 +08:00
|
|
|
|
" max_length=max(max_length,fst_size)\n",
|
|
|
|
|
|
" max_girth=max(max_girth,fst_size+(snd_size+thd_size)*2)\n",
|
|
|
|
|
|
" all_weight+=package['重量']/1000\n",
|
|
|
|
|
|
" all_vol_weight+=package['长']*package['宽']*package['高']/6000\n",
|
|
|
|
|
|
" sell_price,order_price,order_type = call_sell_and_order_price(row['成本价'], row['erp_package_vol'],\"海运\")\n",
|
|
|
|
|
|
" df.loc[index,'网站售价'] = sell_price[0]\n",
|
|
|
|
|
|
" df.loc[index,'物流分摊费'] = sell_price[1]\n",
|
|
|
|
|
|
" df.loc[index,'订单物流费'] = order_price\n",
|
|
|
|
|
|
" df.loc[index,'尾端类型'] = order_type\n",
|
|
|
|
|
|
" df.loc[index,'最长边'] = max_length\n",
|
|
|
|
|
|
" df.loc[index,'最大围长'] = max_girth\n",
|
|
|
|
|
|
" df.loc[index,'总重量'] = all_weight\n",
|
|
|
|
|
|
" df.loc[index,'总抛重'] = all_vol_weight\n",
|
2025-11-28 10:55:11 +08:00
|
|
|
|
" print(index)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"\n"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"outputs": [],
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"# 按照那个分组,按照总抛重,每5总抛重为一组,最长边取大,最短边取小,最大实重取大,最小实重取小,网站售价求和,物流分摊费求和,订单物流费求和,尾端类型不要,\n",
|
|
|
|
|
|
"cost_bins = list(range(0, 4000, 10)) +[28700]\n",
|
|
|
|
|
|
"df['成本价分组'] = pd.cut(df['成本价'], bins=cost_bins, right=True, labels=cost_bins[1:])\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# 2. 总抛重分组(按5为一组,0-5 为一组,5.01-10 为一组,等)\n",
|
|
|
|
|
|
"df['总抛重分组'] = (np.ceil(df['总抛重'] / 5) * 5).astype(int)\n",
|
|
|
|
|
|
"df = df.dropna(subset=['成本价分组'])\n",
|
|
|
|
|
|
"# 3. 分组聚合\n",
|
|
|
|
|
|
"agg_df = df.groupby(['成本价分组', '总抛重分组'], observed=True).agg({\n",
|
|
|
|
|
|
" '最长边': ['max', 'min'], # 每组最大 每组最小\n",
|
|
|
|
|
|
" '最大围长': 'max',\n",
|
|
|
|
|
|
" '总重量': ['max', 'min','sum'], # 分别取最大/最小实重\n",
|
|
|
|
|
|
" '网站售价': 'sum',\n",
|
|
|
|
|
|
" '物流分摊费': 'sum',\n",
|
|
|
|
|
|
" '订单物流费': 'sum',\n",
|
|
|
|
|
|
" 'SKU': 'count'\n",
|
|
|
|
|
|
"}).reset_index()\n",
|
2025-06-18 12:08:06 +08:00
|
|
|
|
"\n",
|
2025-11-28 10:55:11 +08:00
|
|
|
|
"# 4. 重命名列\n",
|
|
|
|
|
|
"agg_df.columns = [\n",
|
|
|
|
|
|
" '成本价分组', '总抛重分组',\n",
|
|
|
|
|
|
" '最长边max', '最长边min', '最大围长',\n",
|
|
|
|
|
|
" '总重量max', '总重量min','总重量',\n",
|
|
|
|
|
|
" '网站售价', '物流分摊费', '订单物流费','SKU种类'\n",
|
|
|
|
|
|
"]\n",
|
|
|
|
|
|
"agg_df.to_clipboard(index=False)"
|
2025-06-17 13:40:20 +08:00
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
|
"display_name": "base",
|
|
|
|
|
|
"language": "python",
|
|
|
|
|
|
"name": "python3"
|
|
|
|
|
|
},
|
|
|
|
|
|
"language_info": {
|
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
|
"version": 3
|
|
|
|
|
|
},
|
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
|
"name": "python",
|
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
|
|
"version": "3.11.5"
|
|
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
|
"nbformat_minor": 2
|
|
|
|
|
|
}
|