734 lines
28 KiB
Plaintext
734 lines
28 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"取数据,SKU多次订单的情况下取最新的订单数据"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"from utils.gtools import MySQLconnect\n",
|
||
"\n",
|
||
"# 读取需要计算的包裹信息\n",
|
||
"with MySQLconnect('ods') as db:\n",
|
||
" sql = r\"\"\" \n",
|
||
" # 限制范围是测量时间,取得SKU种类为1且数量为1的订单,且重复SKU只取最近的订单\n",
|
||
"# 测量时间D +2 天进行汇总数据\n",
|
||
"# 订单汇总产品数和取出\n",
|
||
"WITH\n",
|
||
"t1 AS (\n",
|
||
"SELECT\n",
|
||
"order_id,\n",
|
||
"SKU,\n",
|
||
"order_date,\n",
|
||
"sum(CASE WHEN opl.order_product_id REGEXP \"[0-9]{15}_[0-9]*$\"\n",
|
||
" THEN product_num END) AS product_num,\n",
|
||
"DATE_FORMAT(order_date,\"%Y-%m-%d\") AS 订单时间,\n",
|
||
"count(DISTINCT opl.SKU) AS 产品种类\n",
|
||
"FROM\n",
|
||
"dws.order_product_list opl\n",
|
||
"WHERE\n",
|
||
" NOT EXISTS (\n",
|
||
" SELECT 1 \n",
|
||
" FROM dws.log_order_reissue_detail AS r \n",
|
||
" WHERE left(r.order_product_id,15) = opl.order_id\n",
|
||
" \n",
|
||
" )\n",
|
||
"AND order_date >= \"2025-10-01\"\n",
|
||
"AND order_date < \"2025-11-01\"\n",
|
||
"AND SKU <> \"\"\n",
|
||
"GROUP BY order_id\n",
|
||
")\n",
|
||
",\n",
|
||
"t2 AS (\n",
|
||
"SELECT \n",
|
||
" a.`包裹测量时间`,\n",
|
||
" t1.order_id,\n",
|
||
" t1.SKU,\n",
|
||
" t1.order_date,\n",
|
||
" a.包裹号,\n",
|
||
" a.快递公司,\n",
|
||
" a.运输方式,\n",
|
||
" a.`目的国`,\n",
|
||
" d.postcode,\n",
|
||
" CONCAT(\n",
|
||
" '\"', b.package, '\": {',\n",
|
||
" '\"长\": ', length, ', ',\n",
|
||
" '\"宽\": ', width, ', ',\n",
|
||
" '\"高\": ', hight, ', ',\n",
|
||
" '\"重量\": ', weight, '}'\n",
|
||
" ) AS package_json\n",
|
||
" FROM\n",
|
||
" t1\n",
|
||
" LEFT JOIN order_express a ON t1.order_id = a.单号\n",
|
||
" JOIN package_vol_info b ON a.`包裹号` = b.package\n",
|
||
" JOIN order_list d ON a.`单号` = d.order_id \n",
|
||
" WHERE\n",
|
||
" a.`包裹状态` != '--'\n",
|
||
" AND b.hight > 0 \n",
|
||
" AND b.length > 0 \n",
|
||
" AND b.width > 0 \n",
|
||
" AND b.hight > 0 \n",
|
||
" AND b.weight > 0\n",
|
||
" AND t1.product_num = 1\n",
|
||
" AND t1.产品种类=1\n",
|
||
"-- AND a.`包裹测量时间` >= '2025-09-01'\n",
|
||
"-- AND a.`包裹测量时间` < '2025-10-01'\n",
|
||
"),\n",
|
||
"t3 AS (\n",
|
||
"SELECT\n",
|
||
"t2.*,\n",
|
||
"SPU,\n",
|
||
"sku.成本价 AS ERP采购价,\n",
|
||
"CONCAT('{', GROUP_CONCAT(package_json SEPARATOR ','), '}') AS 实际包裹数据,\n",
|
||
"count(package_json) AS 包裹数,\n",
|
||
"ROW_NUMBER() OVER (PARTITION BY SKU ORDER BY 包裹测量时间 DESC) as rn\n",
|
||
"FROM\n",
|
||
"t2\n",
|
||
"LEFT JOIN stg_bayshop_litfad_sku sku ON t2.SKU=sku.SKU\n",
|
||
"left JOIN stg_bayshop_litfad_spu spu ON sku.产品PID=spu.产品PID\n",
|
||
"\n",
|
||
"GROUP BY order_id\n",
|
||
")\n",
|
||
"SELECT\n",
|
||
"包裹测量时间,\n",
|
||
"order_id,\n",
|
||
"SPU,\n",
|
||
"SKU,\n",
|
||
"DATE_FORMAT(order_date,\"%Y-%m-%d\") AS 订单时间,\n",
|
||
"包裹号,\n",
|
||
"`快递公司`,\n",
|
||
"`运输方式`,\n",
|
||
"`目的国`,\n",
|
||
"postcode,\n",
|
||
"ERP采购价,\n",
|
||
"实际包裹数据,\n",
|
||
"包裹数,\n",
|
||
"rn AS 从新到旧\n",
|
||
"FROM\n",
|
||
"t3\n",
|
||
"\n",
|
||
" \"\"\"\n",
|
||
" df=pd.read_sql(sql,db.con)\n",
|
||
" df.to_clipboard(index=False)\n",
|
||
"\n",
|
||
"# df=df[df['实际包裹数量']==1]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# 取这些SPU下的所有SKU及其现在售价"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"\n",
|
||
"# from sell.sell_price import call_sell_and_order_price\n",
|
||
"import json\n",
|
||
"from utils.gtools import MySQLconnect\n",
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"base_df = df[df['从新到旧']==1]\n",
|
||
"spu_list = (\n",
|
||
" base_df['SPU']\n",
|
||
" .apply(pd.to_numeric, errors='coerce')\n",
|
||
" .dropna()\n",
|
||
" .astype(int)\n",
|
||
" .astype(str)\n",
|
||
" .drop_duplicates() # 加这一行\n",
|
||
" .tolist()\n",
|
||
")\n",
|
||
"\n",
|
||
"def chunk_list(lst, size):\n",
|
||
" for i in range(0, len(lst), size):\n",
|
||
" yield lst[i:i+size]\n",
|
||
"\n",
|
||
"result_list = []\n",
|
||
"with MySQLconnect('ods') as db:\n",
|
||
" enginal = db.engine()\n",
|
||
" for chunk in chunk_list(spu_list, 5000):\n",
|
||
" quoted_spus = ','.join([f\"'{spu}'\" for spu in chunk]) # 加引号防止 SQL 错误\n",
|
||
" sql = f\"\"\"\n",
|
||
" SELECT\n",
|
||
" 产品品类,\n",
|
||
" 产品分类,\n",
|
||
" SPU,\n",
|
||
" SKU,\n",
|
||
" sku.成本价,\n",
|
||
" spvi.erp_package_vol AS ERP包裹数据,\n",
|
||
" 物流分摊,\n",
|
||
" 产品售价\n",
|
||
" from stg_bayshop_litfad_spu spu \n",
|
||
" LEFT JOIN stg_bayshop_litfad_sku sku ON sku.产品PID = spu.产品PID\n",
|
||
" LEFT JOIN dwd.dim_erp_sku_package_vol_info spvi ON sku.SKU = spvi.erp_sku\n",
|
||
" WHERE spu.SPU IN ({quoted_spus})\n",
|
||
" \"\"\"\n",
|
||
" df_chunk = pd.read_sql(sql, enginal)\n",
|
||
" result_list.append(df_chunk)\n",
|
||
" print(f\"已处理 {len(result_list) * 5000} 个SPU\")\n",
|
||
"\n",
|
||
"result = pd.concat(result_list, ignore_index=True)\n",
|
||
"\n",
|
||
"# 合并df\n",
|
||
"all_df = pd.merge(result,base_df, on=['SPU','SKU'], how='left')\n",
|
||
"all_df = all_df.drop_duplicates(subset=['SKU'])\n",
|
||
"all_df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"len(all_df[all_df['产品品类'] == \"66 - Furniture\"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"给SPU下的SKU分层级打标签,# 按 SPU 分组;在组内按 成本价升序 排序;成本价相同的 SKU 属于同一个层次;层次号就是「第几种不同的成本价」。从小到大排序\n",
|
||
" "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 取出产品品类为 66 - Furniture 的数据\n",
|
||
"\n",
|
||
"furniture_df = all_df[all_df['产品品类'] == \"66 - Furniture\"]\n",
|
||
"# 根据品类拿到对应表的队友SKU的标准/预设属性集\n",
|
||
"furniture_df['SKU'] = furniture_df['SKU'].astype(str)\n",
|
||
"with MySQLconnect('ods') as ods:\n",
|
||
" sku_list = furniture_df['SKU'].tolist()\n",
|
||
" placeholders = ','.join(['%s'] * len(sku_list))\n",
|
||
" sql = f\"\"\"\n",
|
||
" SELECT SKU,`标准/预设属性集` FROM erp_furniture_sku where SKU in ({placeholders})\"\"\"\n",
|
||
" sku_df = pd.read_sql(sql, ods.engine(), params=tuple(sku_list))\n",
|
||
"sku_df['SKU'] = sku_df['SKU'].astype(str)\n",
|
||
"# 合并数据\n",
|
||
"furniture_df = pd.merge(furniture_df, sku_df, on='SKU', how='left')\n",
|
||
"\n",
|
||
"# \n",
|
||
"furniture_df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"cl_df = furniture_df.drop_duplicates(subset=['SKU'])\n",
|
||
"property_df = pd.read_excel(r'D:\\\\test\\\\logistics\\\\test_excel\\\\furniture-规格属性映射.xlsx')\n",
|
||
"# 筛选是否有效列为1的属性集,并以规格属性映射\t是否有效做成数据\n",
|
||
"property_df = property_df[property_df['是否有效'] == 1]\n",
|
||
"property_dict = {row['规格属性映射']: row['是否有效'] for index, row in property_df.iterrows()}\n",
|
||
"\n",
|
||
"# 属性统计\n",
|
||
"def parse_attr_set(attr_set_str):\n",
|
||
" \"\"\"解析属性集,返回排序后的 {属性值: 属性名} 的数据\"\"\"\n",
|
||
" mapping = {}\n",
|
||
" for item in attr_set_str.split(\";\"):\n",
|
||
" item = item.strip()\n",
|
||
" if not item:\n",
|
||
" continue\n",
|
||
" try:\n",
|
||
" left, value = item.split(\"~\", 1) # \"231012:大小~23206184:140*80*75\"\n",
|
||
" attr_name = left.split(\":\")[1]\n",
|
||
" value = value.split(\":\")[1]\n",
|
||
" if attr_name in property_dict.keys():\n",
|
||
" # 有效属性集\n",
|
||
" mapping[attr_name] = value\n",
|
||
" except Exception:\n",
|
||
" continue\n",
|
||
" \n",
|
||
" # 对数据按键进行排序,返回有序数据\n",
|
||
" return dict(sorted(mapping.items()))\n",
|
||
"\n",
|
||
"# 1. 为每个SKU解析有效属性集并创建新列\n",
|
||
"cl_df['有效属性集'] = cl_df['标准/预设属性集'].apply(\n",
|
||
" lambda x: parse_attr_set(x) if pd.notna(x) else {}\n",
|
||
")\n",
|
||
"\n",
|
||
"# 2. 将有效属性集转换为可比较的格式(排序后的元组)\n",
|
||
"cl_df['属性集签名'] = cl_df['有效属性集'].apply(\n",
|
||
" lambda x: tuple(sorted(x.items())) # 转换为排序后的元组,便于比较\n",
|
||
")\n",
|
||
"\n",
|
||
"cl_df['分组码'] = ''\n",
|
||
"\n",
|
||
"for spu in cl_df['SPU'].unique():\n",
|
||
" spu_indices = cl_df[cl_df['SPU'] == spu].index\n",
|
||
" spu_data = cl_df.loc[spu_indices]\n",
|
||
" \n",
|
||
" # 按属性集签名分组\n",
|
||
" signature_groups = {}\n",
|
||
" group_num = 1\n",
|
||
" \n",
|
||
" for idx in spu_indices:\n",
|
||
" signature = cl_df.loc[idx, '属性集签名']\n",
|
||
" \n",
|
||
" if signature not in signature_groups:\n",
|
||
" signature_groups[signature] = f\"{spu}_{group_num:03d}\"\n",
|
||
" group_num += 1\n",
|
||
" \n",
|
||
" cl_df.loc[idx, '分组码'] = signature_groups[signature]\n",
|
||
"cl_df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# 哪几个层级有实际数据,估算其他没有数据的层级的数据\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 直接在原数据框添加组内编号\n",
|
||
"cl_df['组内编号'] = cl_df.groupby('SPU')['产品售价'].rank(method='dense').astype(int)\n",
|
||
"\n",
|
||
"cl_df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def dict_to_json_str(data_dict):\n",
|
||
" \"\"\"将字典转换为JSON字符串\"\"\"\n",
|
||
" if not data_dict or pd.isna(data_dict):\n",
|
||
" return None\n",
|
||
" try:\n",
|
||
" return json.dumps(data_dict, ensure_ascii=False)\n",
|
||
" except (TypeError, ValueError):\n",
|
||
" return None\n",
|
||
"\n",
|
||
"def json_str_to_dict(json_str):\n",
|
||
" \"\"\"将JSON字符串转换为字典\"\"\"\n",
|
||
" if not json_str or pd.isna(json_str):\n",
|
||
" return {}\n",
|
||
" try:\n",
|
||
" return json.loads(json_str)\n",
|
||
" except (json.JSONDecodeError, TypeError):\n",
|
||
" return {}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import re\n",
|
||
"def split_packages(package_dict_str):\n",
|
||
" \"\"\"\n",
|
||
" 处理包裹数据,按体积排序并重新排列长宽高\n",
|
||
" Args:\n",
|
||
" package_dict_str: 包裹数据的JSON字符串或数据\n",
|
||
" \n",
|
||
" Returns:\n",
|
||
" dict: 排序后的包裹数据,键为1,2,3...(按体积降序)\n",
|
||
" \"\"\"\n",
|
||
" # 处理空值和非字符串/数据类型\n",
|
||
" if package_dict_str is None or pd.isna(package_dict_str):\n",
|
||
" return {}\n",
|
||
" \n",
|
||
" if isinstance(package_dict_str, (int, float)):\n",
|
||
" return {}\n",
|
||
" # 如果输入是字符串,先转换为数据\n",
|
||
" if isinstance(package_dict_str, str):\n",
|
||
" try:\n",
|
||
" package_dict = json.loads(package_dict_str)\n",
|
||
" except json.JSONDecodeError:\n",
|
||
" return {}\n",
|
||
" else:\n",
|
||
" package_dict = package_dict_str\n",
|
||
"\n",
|
||
" # 提取并处理每个包裹的数据\n",
|
||
" packages = []\n",
|
||
" for package_name, package_data in package_dict.items():\n",
|
||
" item = {}\n",
|
||
" for key, value in package_data.items():\n",
|
||
" try:\n",
|
||
" # 使用正则表达式提取数字部分\n",
|
||
" number_str = re.findall(r\"[-+]?\\d*\\.\\d+|\\d+\", str(value))\n",
|
||
" if number_str:\n",
|
||
" item[key] = float(number_str[0])\n",
|
||
" else:\n",
|
||
" item[key] = value\n",
|
||
" except ValueError:\n",
|
||
" item[key] = value\n",
|
||
" # 确保有长宽高数据\n",
|
||
" if all(k in item for k in ['长', '宽', '高']):\n",
|
||
" # 将长宽高按实际尺寸重新排序:最长边作为长,次长边作为宽,最短边作为高\n",
|
||
" dimensions = [item['长'], item['宽'], item['高']]\n",
|
||
" dimensions.sort(reverse=True) # 降序排列\n",
|
||
" # 重新赋值\n",
|
||
" item['长'] = dimensions[0] # 最长边\n",
|
||
" item['宽'] = dimensions[1] # 次长边\n",
|
||
" item['高'] = dimensions[2] # 最短边\n",
|
||
" # 计算体积\n",
|
||
" item['体积'] = dimensions[0] * dimensions[1] * dimensions[2]\n",
|
||
" packages.append(item)\n",
|
||
" # 按体积降序排序\n",
|
||
" packages.sort(key=lambda x: x.get('体积', 0), reverse=True)\n",
|
||
" # 构建结果数据\n",
|
||
" result = {}\n",
|
||
" for i, package in enumerate(packages, 1):\n",
|
||
" # 只保留长宽高和重量信息\n",
|
||
" result_package = {\n",
|
||
" '长': package.get('长', 0),\n",
|
||
" '宽': package.get('宽', 0),\n",
|
||
" '高': package.get('高', 0)\n",
|
||
" }\n",
|
||
" # 如果有重量信息也保留\n",
|
||
" if '重量' in package:\n",
|
||
" result_package['重量'] = package['重量']\n",
|
||
" result[i] = result_package\n",
|
||
" return dict_to_json_str(result)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 先把包裹数据拆成数据且排序\n",
|
||
"result_df = cl_df.copy()\n",
|
||
"for index, row in result_df.iterrows():\n",
|
||
" if row['ERP包裹数据'] is not None:\n",
|
||
" result_df.at[index, 'ERP包裹数据'] = split_packages(row['ERP包裹数据'])\n",
|
||
" result_df.at[index, '实际包裹数据'] = split_packages(row['实际包裹数据'])\n",
|
||
" print(result_df.at[index, '实际包裹数据'])\n",
|
||
" else:\n",
|
||
" result_df.at[index, 'ERP包裹数据'] = []\n",
|
||
" result_df.at[index, '实际包裹数据'] = []\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 确保订单时间是datetime类型\n",
|
||
"result_df['订单时间'] = pd.to_datetime(result_df['订单时间'])\n",
|
||
"\n",
|
||
"# 先找到每个SPU的最大订单时间\n",
|
||
"max_order_time = result_df.groupby('SPU')['订单时间'].transform('max')\n",
|
||
"\n",
|
||
"# 然后设置is_first\n",
|
||
"result_df['is_first'] = 0\n",
|
||
"result_df.loc[result_df['实际包裹数据'].notna() & (result_df['订单时间'] == max_order_time), 'is_first'] = 1\n",
|
||
"result_df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 根据SPU基准SKU计算系数\n",
|
||
"import numpy as np\n",
|
||
"\n",
|
||
"def cal_size_coefficients(erp_packages, actual_packages):\n",
|
||
" \"\"\"\n",
|
||
" 计算每个SPU的尺寸系数\n",
|
||
" \n",
|
||
" Args:\n",
|
||
" erp_packages: ERP包裹数据\n",
|
||
" actual_packages: 实际包裹数据\n",
|
||
" Returns:\n",
|
||
" dict: 尺寸系数数据\n",
|
||
" \"\"\"\n",
|
||
" # 检查输入是否有效\n",
|
||
" if not erp_packages or not actual_packages:\n",
|
||
" return {}\n",
|
||
" \n",
|
||
" # 获取包裹数量\n",
|
||
" erp_count = len(erp_packages)\n",
|
||
" actual_count = len(actual_packages)\n",
|
||
" \n",
|
||
" # 如果包裹数量不相等,取较小的数量\n",
|
||
" min_count = min(erp_count, actual_count)\n",
|
||
" \n",
|
||
" coefficients = {}\n",
|
||
" \n",
|
||
" for i in range(1, min_count + 1):\n",
|
||
" erp_pkg = erp_packages.get(str(i), {})\n",
|
||
" actual_pkg = actual_packages.get(str(i), {})\n",
|
||
" \n",
|
||
" # 检查必要的维度是否存在\n",
|
||
" if not all(k in erp_pkg for k in ['长', '宽', '高']) or not all(k in actual_pkg for k in ['长', '宽', '高']):\n",
|
||
" continue\n",
|
||
" \n",
|
||
" pkg_coefficients = {}\n",
|
||
" \n",
|
||
" # 计算长宽高系数\n",
|
||
" for dimension in ['长', '宽', '高']:\n",
|
||
" old_val = erp_pkg.get(dimension, 0)\n",
|
||
" new_val = actual_pkg.get(dimension, 0)\n",
|
||
" pkg_coefficients[f'{dimension}系数'] = cal_size(old_val, new_val)\n",
|
||
" \n",
|
||
" # 计算重量系数(如果存在)\n",
|
||
" if '重量' in erp_pkg and '重量' in actual_pkg:\n",
|
||
" old_weight = erp_pkg.get('重量', 0)\n",
|
||
" new_weight = actual_pkg.get('重量', 0)\n",
|
||
" pkg_coefficients['重量系数'] = cal_size(old_weight, new_weight)\n",
|
||
" \n",
|
||
" coefficients[i] = pkg_coefficients\n",
|
||
" \n",
|
||
" return dict_to_json_str(coefficients)\n",
|
||
"\n",
|
||
"def cal_size(old, new):\n",
|
||
" \"\"\"\n",
|
||
" 计算单个维度的系数\n",
|
||
" \"\"\"\n",
|
||
" try:\n",
|
||
" old = float(old)\n",
|
||
" new = float(new)\n",
|
||
" if old == 0:\n",
|
||
" return None\n",
|
||
" return (new - old) / old\n",
|
||
" except (ValueError, TypeError):\n",
|
||
" return None\n",
|
||
"\n",
|
||
"# 应用计算\n",
|
||
"# 首先筛选出is_first=1的行作为基准\n",
|
||
"first_rows = result_df[result_df['is_first'] == 1].copy()\n",
|
||
"\n",
|
||
"# 计算每个SPU的尺寸系数\n",
|
||
"for index, row in first_rows.iterrows():\n",
|
||
" erp_packages = json_str_to_dict(row['ERP包裹数据'])\n",
|
||
" actual_packages = json_str_to_dict(row['实际包裹数据'])\n",
|
||
" first_rows.at[index, '尺寸系数'] = cal_size_coefficients(erp_packages, actual_packages)\n",
|
||
" print(first_rows.at[index, '尺寸系数'])\n",
|
||
"\n",
|
||
"# 创建SPU到尺寸系数的映射\n",
|
||
"spu_coefficient_map = first_rows.set_index('SPU')['尺寸系数'].to_dict()\n",
|
||
"\n",
|
||
"# 将尺寸系数应用到所有相同SPU的行\n",
|
||
"result_df['尺寸系数'] = result_df['SPU'].map(spu_coefficient_map)\n",
|
||
"result_df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"\"\"\"\n",
|
||
"根据SPU尺寸系数,计算预测包裹数据,\n",
|
||
"1.如果分组码相同,筛选有实际数据的行,按分组码分组,在每个组内按订单时间降序排列,然后取每个组的第一条数据作为全组的估算包裹尺寸\n",
|
||
"2.如果分组码不同,则按照尺寸系数计算估算包裹尺寸\n",
|
||
" 2.1.如果尺寸系数为{},则跳过\n",
|
||
" 2.2.如果ERP包裹数>尺寸系数的数量,前面对应的ERP包裹根据尺寸系数计算得到新的ERP包裹尺寸((1+尺寸系数)*ERP包裹尺寸),多出来的包裹就取ERP包裹(也可以理解为尺寸系数都是0)\n",
|
||
" 2.3.如果ERP包裹数<=尺寸系数的数量,则按ERP包裹数计算估算包裹尺寸\n",
|
||
"\"\"\"\n",
|
||
"# 先筛选出order_id不为空的行,然后按分组码分组,在每个组内按订单时间降序排列,取每个组的第一条\n",
|
||
"latest_actual_data = (result_df\n",
|
||
" [result_df['order_id'].notna() & \n",
|
||
" result_df['实际包裹数据'].notna() & \n",
|
||
" (result_df['实际包裹数据'] != '')]\n",
|
||
" .sort_values(['分组码', '订单时间'], ascending=[True, False])\n",
|
||
" .groupby('分组码')\n",
|
||
" .first()\n",
|
||
" ['实际包裹数据'])\n",
|
||
"\n",
|
||
"# 映射回原DataFrame\n",
|
||
"result_df['估算包裹尺寸'] = result_df['分组码'].map(latest_actual_data)\n",
|
||
"\n",
|
||
"print(f\"order_id不为空的分组码数量: {len(latest_actual_data)}\")\n",
|
||
"print(f\"映射后估算包裹尺寸不为空的行数: {result_df['估算包裹尺寸'].notna().sum()}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"\n",
|
||
"from math import ceil\n",
|
||
"import math\n",
|
||
"\n",
|
||
"\n",
|
||
"def cal_est_package(erp_packages, size_coefficient_str):\n",
|
||
" \"\"\"\n",
|
||
" 根据ERP包裹数据和尺寸系数计算估算包裹尺寸\n",
|
||
" \"\"\"\n",
|
||
" # 如果尺寸系数为空,返回None\n",
|
||
" if not size_coefficient_str or size_coefficient_str == '{}':\n",
|
||
" return None\n",
|
||
" \n",
|
||
" if not erp_packages or not size_coefficient_str:\n",
|
||
" return None\n",
|
||
" \n",
|
||
" erp_count = len(erp_packages)\n",
|
||
" coefficient_count = len(size_coefficient_str)\n",
|
||
" \n",
|
||
" estimated_packages = {}\n",
|
||
" \n",
|
||
" # 2.2 如果ERP包裹数 > 尺寸系数的数量\n",
|
||
" if erp_count > coefficient_count:\n",
|
||
" for i in range(1, erp_count + 1):\n",
|
||
" erp_pkg = erp_packages.get(str(i), {})\n",
|
||
" if not erp_pkg:\n",
|
||
" continue\n",
|
||
" \n",
|
||
" estimated_pkg = {}\n",
|
||
" \n",
|
||
" # 前面对应的包裹使用尺寸系数计算\n",
|
||
" if i <= coefficient_count:\n",
|
||
" coeff_pkg = size_coefficient_str.get(str(i), {})\n",
|
||
" for dimension in ['长', '宽', '高']:\n",
|
||
" erp_value = erp_pkg.get(dimension, 0)\n",
|
||
" coeff = coeff_pkg.get(f'{dimension}系数', 0)\n",
|
||
" if coeff is not None and erp_value is not None:\n",
|
||
" estimated_pkg[dimension] = (1 + coeff) * erp_value\n",
|
||
" else:\n",
|
||
" estimated_pkg[dimension] = erp_value\n",
|
||
" \n",
|
||
" # 处理重量\n",
|
||
" if '重量' in erp_pkg:\n",
|
||
" erp_weight = erp_pkg.get('重量', 0)\n",
|
||
" weight_coeff = coeff_pkg.get('重量系数', 0)\n",
|
||
" if weight_coeff is not None and erp_weight is not None:\n",
|
||
" estimated_pkg['重量'] = (1 + weight_coeff) * erp_weight\n",
|
||
" else:\n",
|
||
" estimated_pkg['重量'] = erp_weight\n",
|
||
" else:\n",
|
||
" # 多出来的包裹直接取ERP包裹数据(尺寸系数为0)\n",
|
||
" estimated_pkg = erp_pkg.copy()\n",
|
||
" \n",
|
||
" estimated_packages[str(i)] = estimated_pkg\n",
|
||
" \n",
|
||
" # 2.3 如果ERP包裹数 <= 尺寸系数的数量\n",
|
||
" else:\n",
|
||
" for i in range(1, erp_count + 1):\n",
|
||
" erp_pkg = erp_packages.get(str(i), {})\n",
|
||
" coeff_pkg = size_coefficient_str.get(str(i), {})\n",
|
||
" \n",
|
||
" if not erp_pkg:\n",
|
||
" continue\n",
|
||
" \n",
|
||
" estimated_pkg = {}\n",
|
||
" for dimension in ['长', '宽', '高']:\n",
|
||
" erp_value = erp_pkg.get(dimension, 0)\n",
|
||
" coeff = coeff_pkg.get(f'{dimension}系数', 0)\n",
|
||
" if coeff is not None and erp_value is not None:\n",
|
||
" estimated_pkg[dimension] = math.ceil((1 + coeff) * erp_value)\n",
|
||
" else:\n",
|
||
" estimated_pkg[dimension] = math.ceil(erp_value)\n",
|
||
" \n",
|
||
" # 处理重量\n",
|
||
" if '重量' in erp_pkg:\n",
|
||
" erp_weight = erp_pkg.get('重量', 0)\n",
|
||
" weight_coeff = coeff_pkg.get('重量系数', 0)\n",
|
||
" if weight_coeff is not None and erp_weight is not None:\n",
|
||
" estimated_pkg['重量'] = math.ceil((1 + weight_coeff) * erp_weight)\n",
|
||
" else:\n",
|
||
" estimated_pkg['重量'] = math.ceil(erp_weight)\n",
|
||
" \n",
|
||
" estimated_packages[str(i)] = estimated_pkg\n",
|
||
" \n",
|
||
" return dict_to_json_str(estimated_packages)\n",
|
||
"\n",
|
||
"\n",
|
||
"# 第二步:对于估算包裹尺寸为空字典或空列表的行,使用尺寸系数计算\n",
|
||
"different_group_rows = result_df[\n",
|
||
" result_df['估算包裹尺寸'].isna() \n",
|
||
"].copy()\n",
|
||
"print(f\"找到 {len(different_group_rows)} 行需要计算估算包裹尺寸\")\n",
|
||
"\n",
|
||
"# 对筛选出来的行进行计算\n",
|
||
"for index, row in different_group_rows.iterrows():\n",
|
||
" if not row['ERP包裹数据'] or not row['尺寸系数']:\n",
|
||
" continue\n",
|
||
" erp_packages = json_str_to_dict(row['ERP包裹数据'])\n",
|
||
" size_coefficients = json_str_to_dict(row['尺寸系数'])\n",
|
||
" estimated_packages = cal_est_package(erp_packages, size_coefficients)\n",
|
||
" different_group_rows.at[index, '估算包裹尺寸'] = estimated_packages\n",
|
||
" print(f\"处理第 {index} 行, SKU: {row['SKU']}, 估算结果: {estimated_packages}\")\n",
|
||
"\n",
|
||
"# 创建预估尺寸映射\n",
|
||
"est_map = different_group_rows.set_index('SKU')['估算包裹尺寸'].to_dict()\n",
|
||
"\n",
|
||
"# 只更新那些原来为空的行的估算包裹尺寸,保留第一步的结果\n",
|
||
"result_df.loc[result_df['估算包裹尺寸'].isna(), '估算包裹尺寸'] = result_df.loc[result_df['估算包裹尺寸'].isna(), 'SKU'].map(est_map)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"different_group_rows = result_df[\n",
|
||
" result_df['估算包裹尺寸'].isna() \n",
|
||
"].copy()\n",
|
||
"print(f\"找到 {len(different_group_rows)} 行需要计算估算包裹尺寸\")\n",
|
||
"result_df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"result_df.to_excel(f\"D:\\\\test\\\\logistics\\\\test_excel\\\\估算尺寸\\\\furniture.xlsx\", index=False)"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "base",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.5"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|