logistics/一票一件订单.ipynb

1094 lines
42 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from utils.gtools import MySQLconnect\n",
"\n",
"# 读取需要计算的包裹信息\n",
"with MySQLconnect('ods') as db:\n",
" sql = r\"\"\" \n",
" WITH\n",
"t1 AS (\n",
"SELECT\n",
"order_id,\n",
"SKU,\n",
"order_date,\n",
"sum(CASE WHEN opl.order_product_id LIKE '%\\_%' ESCAPE '\\\\' \n",
" AND opl.order_product_id NOT LIKE '%\\_%\\_%' ESCAPE '\\\\' THEN product_num END) AS product_num,\n",
"DATE_FORMAT(order_date,\"%Y-%m-%d\") AS 订单时间,\n",
"count(opl.SKU) AS 产品种类\n",
"FROM\n",
"dws.fact_order_product_list opl\n",
"WHERE\n",
" NOT EXISTS (\n",
" SELECT 1 \n",
" FROM dws.log_order_reissue_detail AS r \n",
" WHERE r.order_product_id = opl.order_product_id\n",
" )\n",
"AND order_date >= \"20250201\"\n",
"AND order_date < \"20250601\"\n",
"AND SKU <> \"\"\n",
"GROUP BY order_id\n",
")\n",
",\n",
"t2 AS (\n",
"SELECT\t\t\t\n",
" a.`包裹测量时间`,\n",
"\t\t\t\t\t\tt1.order_id,\n",
"\t\t\t\t\t\tt1.SKU,\n",
"\t\t\t\t\t\tt1.order_date,\n",
" a.包裹号,\n",
" a.快递公司,\n",
" a.运输方式,\n",
"\t\t\t\t\t\ta.`目的国`,\n",
" d.postcode,\n",
" CONCAT(\n",
" '\"', b.package, '\": {',\n",
" '\"长\": ', length, ', ',\n",
" '\"宽\": ', width, ', ',\n",
" '\"高\": ', hight, ', ',\n",
" '\"重量\": ', weight, '}'\n",
" ) AS package_json\n",
" FROM\n",
"\t\t\t\tt1\n",
" LEFT JOIN order_express a ON t1.order_id = a.单号\n",
" JOIN package_vol_info b ON a.`包裹号` = b.package\n",
" JOIN order_list d ON a.`单号` = d.order_id \n",
" WHERE\n",
" a.`包裹状态` IN ( '客户签收', '已经投递') \n",
" AND b.hight > 0 \n",
" AND b.length > 0 \n",
" AND b.width > 0 \n",
" AND b.hight > 0 \n",
" AND b.weight > 0\n",
" # AND a.`目的国` = \"United States\"\n",
"\t\t\t\t\t\tAND t1.product_num = 1\n",
"\t\t\t\t\t\tAND t1.产品种类=1\n",
"\t\t\t\t\t\tAND a.`包裹测量时间` >= '2025-03-01'\n",
"\t\t\t\t\t\tAND a.`包裹测量时间` < '2025-06-01'\n",
"),\n",
"t3 AS (\n",
"SELECT\n",
"t2.*,\n",
"sku.成本价 AS ERP采购价,\n",
"ess.包裹数据 AS ERP包裹数据,\n",
"CONCAT('{', GROUP_CONCAT(package_json SEPARATOR ','), '}') AS 实际包裹数据,\n",
"ROW_NUMBER() OVER (PARTITION BY SKU ORDER BY 包裹测量时间 DESC) as rn\n",
"FROM\n",
"t2\n",
"LEFT JOIN ads.new_erp_sku_size ess ON t2.SKU=ess.SKU\n",
"LEFT JOIN stg_bayshop_litfad_sku sku ON t2.SKU=sku.SKU\n",
"WHERE\n",
"ess.`包裹数据`<>''\n",
"GROUP BY order_id\n",
")\n",
"SELECT\n",
"包裹测量时间,\n",
"order_id,\n",
"SKU,\n",
"DATE_FORMAT(order_date,\"%Y-%M-%D\") AS 订单时间,\n",
"包裹号,\n",
"`快递公司`,\n",
"`运输方式`,\n",
"`目的国`,\n",
"postcode,\n",
"ERP采购价,\n",
"ERP包裹数据,\n",
"实际包裹数据\n",
"FROM\n",
"t3\n",
"WHERE\n",
"rn=1\n",
"\n",
"\n",
" \"\"\"\n",
" df=pd.read_sql(sql,db.con)\n",
" print(df)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"list_order_id = df[\"order_id\"].drop_duplicates().tolist()\n",
"param_order_id = \",\".join(f\"'{order_id}'\" for order_id in list_order_id)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"先计算美国的实际利润率"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from utils.gtools import MySQLconnect\n",
"\n",
"ods = MySQLconnect(\"ods\")\n",
"engine = ods.engine()\n",
"cursor = ods.connect().cursor()\n",
"\n",
"batch_size = 50000 # 每次查询 500 个 order_id避免 SQL 语句过长\n",
"order_id_list = df[\"order_id\"].drop_duplicates().tolist() # 取出所有 order_id\n",
"# df['postcode'] = \"38016\"\n",
"# 存储分批查询的结果\n",
"result_dfs1 = []\n",
"result_dfs2 = []\n",
"for i in range(0, len(order_id_list), batch_size):\n",
" batch_order_ids = order_id_list[i:i + batch_size] # 取当前批次的 order_id\n",
" param = \",\".join(f\"'{order_id}'\" for order_id in batch_order_ids)\n",
"\n",
" purchase_order_sql = f\"\"\"\n",
" WITH t1 AS (\n",
" SELECT LEFT(ol.out_detials_outlink_id, 15) AS order_id,\n",
" SUM(out_detials_qty * price) AS instock_cost,\n",
" NULL AS buy_cost\n",
" FROM ods.outstock_list ol\n",
" JOIN ods.instock_list il ON ol.store_in_id = il.id \n",
" WHERE LEFT(ol.out_detials_outlink_id, 15) IN ({param})\n",
" GROUP BY LEFT(ol.out_detials_outlink_id, 15)\n",
" \n",
" UNION ALL\n",
" \n",
" SELECT LEFT(order_product_id, 15) AS order_id, \n",
" NULL AS instock_cost,\n",
" SUM(buy_num * actual_price) AS buy_cost\n",
" FROM warehouse_purchasing\n",
" WHERE LEFT(order_product_id, 15) IN ({param}) \n",
" AND buy_audit = \"采购完成\"\n",
" GROUP BY LEFT(order_product_id, 15)\n",
" )\n",
" SELECT order_id,\n",
" SUM(CASE \n",
" WHEN instock_cost IS NULL THEN buy_cost\n",
" ELSE instock_cost \n",
" END) AS 采购成本\n",
" FROM t1 \n",
" GROUP BY order_id\n",
" \"\"\"\n",
" \n",
"\n",
" batch_df1 = pd.read_sql(purchase_order_sql, con=engine) # 运行 SQL 查询\n",
" result_dfs1.append(batch_df1) # 存入结果列表\n",
" print(f\"已完成 {i + batch_size} 个 order_id 的查询\")\n",
"\n",
"# 合并所有查询结果\n",
"purchase_order_df1 = pd.concat(result_dfs1, ignore_index=True)\n",
"purchase_order_df1[\"order_id\"] = purchase_order_df1[\"order_id\"].astype(str)\n",
"\n",
"\n",
"# 转换数据类型,确保匹配\n",
"df[\"order_id\"] = df[\"order_id\"].astype(str)\n",
"\n",
"# 进行合并\n",
"df = pd.merge(df, purchase_order_df1, on='order_id', how='left')\n",
"# 复制到剪贴板\n",
"df.to_clipboard(index=False)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"df = pd.read_clipboard()\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from utils.countryOperator import OperateCountry\n",
"from utils.logisticsBill import BillFactory\n",
"from utils.Package import Package, Package_group\n",
"import pandas as pd\n",
"import json\n",
"import re\n",
"# 美国 \n",
"from utils.logisticsBill import Billing\n",
"import requests\n",
"\n",
"for index, row in df.iterrows():\n",
" opCountry = OperateCountry('US')\n",
" postcode = row['postcode']\n",
" if pd.isna(postcode) or str(postcode).lower() == \"nan\":\n",
" continue\n",
" try:\n",
" package_dict = json.loads(row['实际包裹数据'])\n",
" except Exception as e:\n",
" print(f\"行 {index} 解析失败: {e}\")\n",
" print(row['实际包裹数据'])\n",
" continue\n",
" packages = Package_group()\n",
" def extract_number(value):\n",
" # 提取字符串中的第一个数字\n",
" match = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", str(value))\n",
" return float(match.group()) if match else 0.0\n",
" for key, package in package_dict.items():\n",
" package['长'] = extract_number(package['长'])\n",
" package['宽'] = extract_number(package['宽'])\n",
" package['高'] = extract_number(package['高'])\n",
" package['重量'] = extract_number(package['重量'])\n",
" \n",
" if package['长'] == 0 or package['宽'] == 0 or package['高'] == 0 or package['重量'] == 0:\n",
" continue\n",
" packages.add_package(Package(key,package['长'], package['宽'], package['高'], package['重量']))\n",
" if packages is None:\n",
" continue\n",
" if \"海运\" in row['运输方式']:\n",
" head_type = 1\n",
" else:\n",
" head_type = 0\n",
"\n",
" # if \"FEDEX-SAIR-G\" in row['快递公司']:\n",
" # company_name = \"Fedex-GROUD\"\n",
" # elif \"FEDEX-SAIR-H\" in row['快递公司']:\n",
" # company_name = \"Fedex-HOME\"\n",
" # elif \"FEDEX02\" in row['快递公司']:\n",
" # company_name = \"Fedex-彩虹小马\"\n",
" # elif \"大包\" in row['快递公司'] or row['快递公司'] == '海MS-FEDEX':\n",
" # company_name = \"Fedex-金宏亚\"\n",
" # elif \"GIGA\" in row['快递公司']:\n",
" # company_name = \"大健-GIGA\"\n",
" # elif \"CEVA\" in row['快递公司']:\n",
" # company_name = \"大健-CEVA\"\n",
" # elif \"USPS\" in row['快递公司']:\n",
" # company_name = \"Fedex-GROUD\"\n",
" # else:\n",
" # company_name = \"大健-Metro\"\n",
" \n",
" bill = Billing(str(index),opCountry,packages,postcode,company_name=\"Fedex-GROUD\",head_type=head_type,beizhu='1')\n",
" head_price = bill.head_amount[0]\n",
" tail_price = bill.tail_amount[0]\n",
" if \"USPS\" in row['快递公司']:\n",
" tail_price = tail_price/2\n",
" # df.loc[index,'头程CNY'] = head_price\n",
" df.loc[index,'头程CNY'] = head_price\n",
" # df.loc[index,'最优渠道'] = bill.company_name\n",
" print(f\"行 {index} 处理完成\")\n",
" \n",
"df.to_clipboard(index=False)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"us_df = df[(df['目的国']=='United States')]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from utils.countryOperator import OperateCountry\n",
"from utils.logisticsBill import BillFactory\n",
"from utils.Package import Package, Package_group\n",
"import pandas as pd\n",
"# 美国 \n",
"df1=pd.read_excel(r\"D:\\test\\logistics\\拦截数据\\1-3月利润分段.xlsx\",sheet_name=\"11-4月全球订单\")\n",
"df = df1[(df1['尾端渠道']==\"大健-Metro\")|(df1['尾端渠道']==\"大健-CEVA\")]\n",
"# df['postcode'] = df['postcode'].astype(str)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from utils.logisticsBill import Billing\n",
"opCountry = OperateCountry('US')\n",
"billFactory = BillFactory()\n",
"for index,row in df.iterrows():\n",
" print(row['postcode'])\n",
" package = Package(row['包裹号'],row['长'],row['宽'],row['高'],row['重量'])\n",
" packages= Package_group([package])\n",
" postcode = row['postcode']\n",
" head_type = 1 if row['运输方式'] == '海运' else 0\n",
" try:\n",
" bill = Billing(str(row['包裹号']),opCountry,packages,postcode=str(postcode),company_name=\"大健-Metro\",head_type=head_type,beizhu=0)\n",
" df.loc[index,\"美西\"] = bill.tail_amount[0]\n",
" except:\n",
" bill = None\n",
" df.loc[index,\"美西\"] = \"不可算\"\n",
" \n",
" print(index)\n",
" print(bill)\n",
"# bill_df = billFactory.bills_to_df()\n",
"# bill_df['ID'] = bill_df['ID'].astype(int)\n",
"# bill_df.to_clipboard()\n",
"\n",
"# merged_df = pd.merge(df, bill_df, left_on=\"包裹号\",right_on=\"ID\", how='inner')\n",
"df.to_clipboard(index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"from data.us_zone import zone_west\n",
"def get_west_zone(postcode):\n",
" \"\"\"获取美西邮编分区\"\"\"\n",
" if not re.match(r'\\d{5}-\\d{4}|\\d{5}', postcode):\n",
" return \"邮编格式不合法\"\n",
" postcode = postcode[:5]\n",
" postcode = int(postcode)\n",
" for zone, postcodes in zone_west.items():\n",
" start =int(postcodes[0].split(\"-\")[0])\n",
" end = int(postcodes[-1].split(\"-\")[-1])\n",
" if start > postcode or postcode > end:\n",
" continue\n",
" for postcode_range in postcodes:\n",
" if \"-\" in postcode_range:\n",
" start, end = map(int, postcode_range.split(\"-\"))\n",
" if start <= postcode <= end:\n",
" return zone\n",
" else:\n",
" if int(zone) == postcode:\n",
" return zone\n",
" return \"未查询到邮编分区\"\n",
"df['分区']=df['postcode'].apply(get_west_zone)\n",
"df.to_clipboard(index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 取sku所属SPU下所有sku及其现在售价\n",
"from sell.sell_price import call_sell_and_order_price\n",
"import json\n",
"sku_list = (\n",
" order_id_df_cal['SKU']\n",
" .apply(pd.to_numeric, errors='coerce') # 转数字,非法的变 NaN\n",
" .dropna()\n",
" .astype(int)\n",
" .astype(str)\n",
" .tolist()\n",
")\n",
"placeholders = ','.join(['%s'] * len(sku_list)) \n",
"# result = []\n",
"with MySQLconnect('ods') as db:\n",
" enginal = db.engine()\n",
" sql = f\"\"\"SELECT\n",
" SPU,\n",
" sku.SKU,\n",
" sku.`成本价`,\n",
" spi.`包裹数据`\n",
" FROM\n",
" stg_bayshop_litfad_sku sku\n",
" LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID` \n",
" LEFT JOIN ads.new_erp_sku_size spi ON sku.SKU =spi.SKU\n",
" WHERE\n",
" spu.SPU IN (\n",
" SELECT\n",
" SPU \n",
" FROM\n",
" stg_bayshop_litfad_sku sku\n",
" LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID` \n",
" WHERE\n",
" sku.SKU IN ({placeholders}) \n",
" )\n",
" \"\"\"\n",
" result = pd.read_sql(sql, enginal,params= tuple(sku_list))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"from sell.sell_price import call_sell_and_order_price\n",
"# 计算当前售价\n",
"for index,row in df.iterrows():\n",
" price = row['采购成本']\n",
" try:\n",
" package_dict = json.loads(row['实际包裹数据'])\n",
" sell_price, order_price, order_type = call_sell_and_order_price(price, package_dict)\n",
" except Exception as e:\n",
" print(f\"SKU: {row['SKU']} 报错: {e}\")\n",
" continue\n",
" if sell_price == 0:\n",
" continue\n",
" df.loc[index, '实际应有售价'] = sell_price[0]\n",
" df.loc[index, '实际物流分摊费'] = sell_price[1]\n",
" df.loc[index, '实际头程cny'] = sell_price[2]\n",
" df.loc[index, '实际头程USD'] = sell_price[3]\n",
" df.loc[index, '订单物流费'] = order_price\n",
" df.loc[index, '尾端类型'] = order_type\n",
" print(f\" SKU {row['SKU']} \")\n",
"df.to_clipboard(index=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"写计算物流费的海运空运函数\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import math\n",
"express_price = pd.read_excel(r'D:\\test\\logistics\\data\\售价尾端价格.xlsx', sheet_name='Sheet1')\n",
"key_column = express_price.iloc[:, 8] # 第 I 列\n",
"value_column = express_price.iloc[:, 9] # 第 J 列\n",
"small_column = express_price.iloc[:, 10] # 第 K 列\n",
"big_column = express_price.iloc[:, 11] # 第 L 列\n",
"air_small_dict = dict(zip(key_column, small_column))\n",
"air_big_dict = dict(zip(key_column, big_column))\n",
"# 转换成字典\n",
"ocean_price_dict = dict(zip(key_column, value_column))\n",
"def ocean_order_price(packages):\n",
" express_fee = 0 # 快递基础费\n",
" long_fee = 0 # 超长费\n",
" weight_fee = 0 # 超重费\n",
" big_fee = 0 # 大包裹费\n",
" express_type = ''\n",
" for package in packages:\n",
" for key, value in ocean_price_dict.items():\n",
" if package.weight <=key:\n",
" express_fee+=value\n",
" break\n",
" if package.fst_size>=116 or package.sed_size>=71 or package.girth>=251:\n",
" long_fee += 16.3\n",
" express_type +=\"超长\"\n",
" if package.weight>=21000 and package.fst_size<238 and package.girth<315:\n",
" weight_fee+=25.5\n",
" express_type +=\"超重\"\n",
" if package.fst_size>=238 or package.girth>=315:\n",
" big_fee+=61.6\n",
" express_type +=\"大包裹\"\n",
" express_fee = express_fee + long_fee + weight_fee + big_fee\n",
"\n",
" # 卡派步长为3\n",
" ltl_base = 0\n",
" ltl_fee = 0\n",
" count1 = 0\n",
" count2 = 0\n",
" count3 = 0\n",
" count4 = 0\n",
" order_type2 = '卡派'\n",
" order_other_type1 = ''\n",
" order_other_type2 = ''\n",
" order_other_type3 = ''\n",
" order_other_type4 = ''\n",
" order_ltl_oversize = 0\n",
" order_ltl_overweight1 = 0\n",
" order_ltl_overweight2 = 0\n",
" order_ltl_overpackage = 0\n",
" sku_total_cubic_feet = 0\n",
" for package in packages:\n",
" cubic_feet= package.length * package.width * package.height / 1000000 * 35.3\n",
" sku_total_cubic_feet += cubic_feet\n",
" # 卡派额外费用\n",
" if package.fst_size>= 250:\n",
" count1 += 1\n",
" order_ltl_oversize = 118\n",
" order_other_type1 = '超长'\n",
" if package.weight >= 111000:\n",
" count2 += 1\n",
" order_ltl_overweight1 = 78\n",
" order_other_type2 = '超重'\n",
" if package.weight >= 130000:\n",
" count3 += 1\n",
" order_ltl_overweight2 = 30\n",
" order_other_type3 = '超重'\n",
" if package.fst_size >= 310:\n",
" count4 += 1\n",
" order_ltl_overpackage = 30\n",
" order_other_type4 = '大包裹'\n",
" order_type2 += order_other_type3 + order_other_type1 + order_other_type2 + order_other_type4\n",
"\n",
" # 卡派基础费用 体积/1000000 *35.3\n",
" if sku_total_cubic_feet < 25:\n",
" ltl_base = round(163 / 0.45 / 2, 2) # 181.11\n",
"\n",
" elif sku_total_cubic_feet < 35:\n",
" ltl_base = round(180 / 0.45 / 2, 2) # 200\n",
" else:\n",
"\t # 大于一个立方的35立方英尺 按照每立方英尺*5美金\n",
" # 最低为190美金\n",
" ltl_base = round(max(190, 5 * sku_total_cubic_feet) / 0.359 / 2)\n",
"\n",
" \n",
" ltl_fee = math.ceil(count1 / 3) * order_ltl_oversize + math.ceil(count2 / 3) * order_ltl_overweight1 + math.ceil(\n",
" count3 / 3) * order_ltl_overweight2 + math.ceil(count4 / 3) * order_ltl_overpackage + ltl_base\n",
"\n",
" if ltl_fee < express_fee:\n",
" ocean_fee = ltl_fee\n",
" order_type = order_type2\n",
" else:\n",
" ocean_fee = express_fee\n",
" order_type = express_type\n",
" return ocean_fee, order_type\n",
"\n",
"def air_order_price(packages):\n",
" express_fee = 0 \n",
" express_type = ''\n",
" for package in packages:\n",
" price=0\n",
" bill_weight = max(package.weight, package.get_volume_weight(8500))\n",
" if package.weight<=420 and package.fst_size<=50 and package.sed_size<=40 and package.trd_size<=30:\n",
" for key, value in air_small_dict.items():\n",
" if package.weight <=key:\n",
" price =value\n",
" break\n",
" elif package.weight<=2718 and package.fst_size<=50 and package.sed_size<=40 and package.trd_size<=30:\n",
" for key, value in air_small_dict.items():\n",
" if bill_weight <=key:\n",
" price =value\n",
" break\n",
" else:\n",
" for key, value in air_big_dict.items():\n",
" if bill_weight <=key:\n",
" price =value\n",
" break\n",
" if package.weight<=420:\n",
" express_fee+=((((min(max(package.density,37),337)*0.093+27.7)/6+0.65)*package.get_volume_weight(6000))*0.3+price)/0.45\n",
" express_type+='USPS'\n",
" elif package.weight<=2718:\n",
" express_fee+=(((min(max(package.density,37),337)*0.093+27.7)/6+0.65)*package.get_volume_weight(8500)*0.3+price)/0.45\n",
" express_type+='UandF'\n",
" else:\n",
" express_fee+=(((min(max(package.density,37),337)*0.093+27.7-1.08)/6+0.65-1.06)*package.get_volume_weight(8500))/0.45+price\n",
" express_type+='FEDEX'\n",
" return express_fee, express_type\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 记录包裹的最大重量和\n",
"\n",
"from utils.Package import Package, Package_group\n",
"import re\n",
"for index, row in df.iterrows():\n",
" try:\n",
" package_dict = json.loads(row['ERP包裹数据'])\n",
" except Exception as e:\n",
" print(f\"行 {index} 解析失败: {e}\")\n",
" print(row['实际包裹数据'])\n",
" continue\n",
" packages = Package_group()\n",
" def extract_number(value):\n",
" # 提取字符串中的第一个数字\n",
" match = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", str(value))\n",
" return float(match.group()) if match else 0.0\n",
" for key, package in package_dict.items():\n",
" package['长'] = extract_number(package['长'])\n",
" package['宽'] = extract_number(package['宽'])\n",
" package['高'] = extract_number(package['高'])\n",
" package['重量'] = extract_number(package['重量'])\n",
" \n",
" if package['长'] == 0 or package['宽'] == 0 or package['高'] == 0 or package['重量'] == 0:\n",
" continue\n",
" packages.add_package(Package(key,package['长'], package['宽'], package['高'], package['重量']))\n",
" if packages is None:\n",
" continue\n",
" if row['运输方式']=='海运':\n",
" order_fee, order_type = ocean_order_price(packages)\n",
" else:\n",
" order_fee, order_type = air_order_price(packages)\n",
" \n",
" # 订单信息\n",
" df.loc[index, 'ERP物流费'] = order_fee\n",
" df.loc[index, '尾端类型'] = order_type\n",
" print(order_fee, order_type)\n",
"df.to_clipboard(index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 取sku所属SPU下所有sku及其现在售价\n",
"import json\n",
"# 单个品类一个一个处理\n",
"category = \"66 - Furniture\"\n",
"df_one = order_id_df_cal[order_id_df_cal['产品品类']==category]\n",
"sku_list = (\n",
" df_one['SKU']\n",
" .apply(pd.to_numeric, errors='coerce') # 转数字,非法的变 NaN\n",
" .dropna()\n",
" .astype(int)\n",
" .astype(str)\n",
" .tolist()\n",
")\n",
"\n",
"\n",
"placeholders = ','.join(['%s'] * len(sku_list)) \n",
"\n",
"# 从哪个表查询\n",
"with MySQLconnect('ods') as db:\n",
" enginal = db.engine()\n",
" sql = f\"\"\"SELECT\n",
" SKU,\n",
" 价格 AS ERP采购价,\n",
" 规格,\n",
" cpmaso规格,\n",
" `标准/预设属性集`,\n",
" 自定义属性集\n",
"\n",
" FROM\n",
" erp_furniture_sku sku\n",
" LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID`\n",
" WHERE\n",
" spu.SPU IN (\n",
" SELECT\n",
" SPU \n",
" FROM\n",
" stg_bayshop_litfad_sku sku\n",
" LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID` \n",
" WHERE\n",
" sku.SKU IN ({placeholders}) \n",
" )\n",
" \"\"\"\n",
" result = pd.read_sql(sql, enginal,params= tuple(sku_list))\n",
"df_one = pd.merge(df_one, result, on=['SKU'], how='left')\n",
"df_one.to_excel(f'{category}.xlsx')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"1.找到SPU最新到仓的SKU及其体积\n",
"2.根据算法F(x)计算这个SPU的其他SKU理论长宽高重量\n",
"3.找到该SPU其他有过实际体积的SKU记录这些实际体积\n",
"4.分析这些SKU的实际体积和理论体积的差距"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"df = pd.read_excel(r'D:\\test\\logistics\\拦截数据\\一票一件发货订单.xlsx',sheet_name=\"单包裹数据\")\n",
"# 写一个cal_size算法输入原属性和新属性返回涨幅系数\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"import json\n",
"for index, row in df.iterrows():\n",
" package_dict = json.loads(row['包裹数据'])\n",
" for package in package_dict.values():\n",
" item = {}\n",
" for key, value in package.items():\n",
" try:\n",
" # 使用正则表达式提取数字部分\n",
" number_str = re.findall(r\"[-+]?\\d*\\.\\d+|\\d+\", str(value))\n",
" if number_str:\n",
" item[key] = float(number_str[0]) # 取第一个匹配到的数字并转换为 float\n",
" else:\n",
" item[key] = value # 如果没有数字部分,保留原值\n",
" except ValueError:\n",
" item[key] = value # 如果遇到无法转换的值,保留原值\n",
" size = []\n",
" size.append(item['长'])\n",
" size.append(item['宽'])\n",
" size.append(item['高'])\n",
" weight = item['重量']\n",
" size.sort()\n",
" length = size[2]\n",
" width = size[1]\n",
" height = size[0]\n",
" df.loc[index, 'ERP长'] = length\n",
" df.loc[index, 'ERP宽'] = width\n",
" df.loc[index, 'ERP高'] = height\n",
" df.loc[index, 'ERP重量'] = weight\n",
" print(f\"{row['SKU']}尺寸为:{width},h:{height},d:{length},w:{weight}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def cal_size(old,new):\n",
" try:\n",
" old = float(old)\n",
" new = float(new)\n",
" if old == 0:\n",
" return None # 或 return 0防止除以0\n",
" return (new - old) / old\n",
" except (ValueError, TypeError):\n",
" return None # 遇到不能转为 float 的就返回 None\n",
"# 按SPU分组first_df取每个SPU组里订单月份最大的那行数据如果订单月份相同取第一条\n",
"first_df = df.groupby('SPU').apply(lambda x: x.loc[x['订单月份'].idxmax()]).reset_index(drop=True)\n",
"# 按SPU分组计算每个SPU的涨幅系数\n",
"for index, row in first_df.iterrows():\n",
" first_df.loc[index, '长系数'] = cal_size(row['ERP长'],row['长'])\n",
" first_df.loc[index, '宽系数'] = cal_size(row['ERP宽'],row['宽'])\n",
" first_df.loc[index, '高系数'] = cal_size(row['ERP高'],row['高'])\n",
" first_df.loc[index, '重量系数'] = cal_size(row['ERP重量'],row['重量'])\n",
" print(\n",
" f\"{row['SPU']} 的系数为 \"\n",
" f\"{first_df.loc[index, '长系数']}, \"\n",
" f\"{first_df.loc[index, '宽系数']}, \"\n",
" f\"{first_df.loc[index, '高系数']}, \"\n",
" f\"{first_df.loc[index, '重量系数']}\"\n",
" )\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 将每个SPU的系数并入df表中\n",
"df = pd.merge(df, first_df[['SPU','长系数','宽系数','高系数','重量系数']], on='SPU',how='left')\n",
"print(\"合并完成\")\n",
"# 根据系数计算每个SKU的理论尺寸\n",
"df['理论长'] = ((1 + df['长系数']) * df['ERP长']).round(2)\n",
"df['理论宽'] = ((1 + df['宽系数']) * df['ERP宽']).round(2)\n",
"df['理论高'] = ((1 + df['高系数']) * df['ERP高']).round(2)\n",
"df['理论重量'] = ((1 + df['重量系数']) * df['ERP重量']).round(2)\n",
"\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 构造一个set存储first_df中的 (SKU, 订单月份)\n",
"first_packages = set(first_df['包裹号'])\n",
"\n",
"# 新增列包裹号在first_packages中标1否则0\n",
"df['is_first'] = df['包裹号'].apply(lambda x: 1 if x in first_packages else 0)\n",
"df.to_clipboard(index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 计算预测后的尺寸下,一票一件订单的售价和订单价格\n",
"from sell.sell_price import call_sell_and_order_price\n",
"for index,row in df.iterrows():\n",
" price = row['成本价']\n",
" package_dict1={}\n",
" package_dict2={}\n",
" try:\n",
" package_dict1['包裹1'] = {}\n",
" package_dict2['包裹1'] = {}\n",
" package_dict1['包裹1']['长'] = row['长']\n",
" package_dict1['包裹1']['宽'] = row['宽']\n",
" package_dict1['包裹1']['高'] = row['高']\n",
" package_dict1['包裹1']['重量'] = row['重量']\n",
" package_dict2['包裹1']['长'] = row['理论长']\n",
" package_dict2['包裹1']['宽'] = row['理论宽'] \n",
" package_dict2['包裹1']['高'] = row['理论高']\n",
" package_dict2['包裹1']['重量'] = row['理论重量']\n",
" sell_price1, order_price1, order_type1 = call_sell_and_order_price(price, package_dict1)\n",
" sell_price2, order_price2, order_type2 = call_sell_and_order_price(price, package_dict2)\n",
" except Exception as e:\n",
" print(f\"SKU: {row['SKU']} 报错: {e}\")\n",
" continue\n",
" df.loc[index, '实际体积售价'] = sell_price1\n",
" df.loc[index, '实际体积订单价'] = order_price1\n",
" df.loc[index, '实际体积订单类型'] = order_type1\n",
" df.loc[index, '理论体积售价'] = sell_price2\n",
" df.loc[index, '理论体积订单价'] = order_price2\n",
" df.loc[index, '理论体积订单类型'] = order_type2\n",
" print(f\"SPU: {row['SPU']}, SKU {row['SKU']} 实际体积售价: {sell_price1}, 理论体积售价: {sell_price2},\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"计算SPU下所有SKU的网站售价实际尺寸售价预测尺寸售价"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 取表格数据is_first为1的数据取spu,长宽高重量系数\n",
"import pandas as pd\n",
"df = pd.read_excel(r'D:\\test\\logistics\\拦截数据\\一票一件发货订单.xlsx',sheet_name=\"单包裹系数计算\")\n",
"# 写一个cal_size算法输入原属性和新属性返回涨幅系数\n",
"df1=df[['is_first','SPU','SKU','长','宽','高','重量']]\n",
"base_df = df[df['is_first']==1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"# 取这些SPU下的所有SKU及其现在售价\n",
"from sell.sell_price import call_sell_and_order_price\n",
"import json\n",
"from utils.gtools import MySQLconnect\n",
"import pandas as pd\n",
"spu_list = (\n",
" base_df['SPU']\n",
" .apply(pd.to_numeric, errors='coerce') # 转数字,非法的变 NaN\n",
" .dropna()\n",
" .astype(int)\n",
" .astype(str)\n",
" .tolist()\n",
")\n",
"placeholders = ','.join(['%s'] * len(spu_list)) \n",
"# result = []\n",
"with MySQLconnect('ods') as db:\n",
" enginal = db.engine()\n",
" sql = f\"\"\"SELECT\n",
" 产品品类,\n",
" 产品分类,\n",
" SPU,\n",
" sku.SKU,\n",
" sku.`成本价`,\n",
" spi.`包裹数据`,\n",
" 物流分摊,\n",
" 产品售价\n",
" FROM\n",
" stg_bayshop_litfad_sku sku\n",
" LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID` \n",
" LEFT JOIN ads.new_erp_sku_size spi ON sku.SKU =spi.SKU\n",
" WHERE\n",
" spu.SPU IN ({placeholders}) \n",
" \"\"\"\n",
" result = pd.read_sql(sql, enginal,params= tuple(spu_list))\n",
"# 合并df\n",
"all_df = pd.merge(result,df, on=['SKU'], how='left')\n",
"all_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 先把ERP包裹数据拆出来\n",
"import re\n",
"import json\n",
"for index, row in all_df.iterrows():\n",
" if not isinstance(row['包裹数据'], str) or not row['包裹数据']:\n",
" print(f\"第{index}行包裹数据为空或非字符串,跳过\")\n",
" continue\n",
" try:\n",
" package_dict = json.loads(row['包裹数据'])\n",
" except json.JSONDecodeError as e:\n",
" print(f\"解析失败:第{index}行,错误信息:{e}\")\n",
" continue\n",
" try:\n",
" for package in package_dict.values():\n",
" item = {}\n",
" for key, value in package.items():\n",
" try:\n",
" # 使用正则表达式提取数字部分\n",
" number_str = re.findall(r\"[-+]?\\d*\\.\\d+|\\d+\", str(value))\n",
" if number_str:\n",
" item[key] = float(number_str[0]) # 取第一个匹配到的数字并转换为 float\n",
" else:\n",
" item[key] = value # 如果没有数字部分,保留原值\n",
" except ValueError:\n",
" item[key] = value # 如果遇到无法转换的值,保留原值\n",
" except AttributeError:\n",
" print(f\"解析失败:第{index}行,错误信息:包裹数据为空\")\n",
" continue\n",
" size = []\n",
" size.append(item['长'])\n",
" size.append(item['宽'])\n",
" size.append(item['高'])\n",
" weight = item['重量']\n",
" size.sort()\n",
" length = size[2]\n",
" width = size[1]\n",
" height = size[0]\n",
" all_df.loc[index, 'ERP长'] = length\n",
" all_df.loc[index, 'ERP宽'] = width\n",
" all_df.loc[index, 'ERP高'] = height\n",
" all_df.loc[index, 'ERP重量'] = weight\n",
" print(f\"{row['SKU']}尺寸为:{width},h:{height},d:{length},w:{weight}\")\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 计算每个SPU的长宽高重量系数\n",
"def cal_size(old,new):\n",
" try:\n",
" old = float(old)\n",
" new = float(new)\n",
" if old == 0:\n",
" return None # 或 return 0防止除以0\n",
" return (new - old) / old\n",
" except (ValueError, TypeError):\n",
" return None # 遇到不能转为 float 的就返回 None\n",
" \n",
"test_df = all_df[all_df['is_first']==1]\n",
"# 取基准数据SPU的系数\n",
"for index, row in test_df.iterrows():\n",
" test_df.loc[index, '长系数'] = cal_size(row['ERP长'],row['长'])\n",
" test_df.loc[index, '宽系数'] = cal_size(row['ERP宽'],row['宽'])\n",
" test_df.loc[index, '高系数'] = cal_size(row['ERP高'],row['高'])\n",
" test_df.loc[index, '重量系数'] = cal_size(row['ERP重量'],row['重量'])\n",
" print(\n",
" f\"{row['SPU_x']} 的系数为 \"\n",
" f\"{test_df.loc[index, '长系数']}, \"\n",
" f\"{test_df.loc[index, '宽系数']}, \"\n",
" f\"{test_df.loc[index, '高系数']}, \"\n",
" f\"{test_df.loc[index, '重量系数']}\"\n",
" )\n",
"# 将SPU的基准系数合并至all_df\n",
"all_df = pd.merge(all_df, test_df[['SPU_x', '长系数', '宽系数', '高系数', '重量系数']], on='SPU_x', how='left')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 根据系数 得到所有SKU的预测尺寸\n",
"# 根据系数计算每个SKU的理论尺寸\n",
"all_df['理论长'] = ((1 + all_df['长系数']) * all_df['ERP长']).round(2)\n",
"all_df['理论宽'] = ((1 + all_df['宽系数']) * all_df['ERP宽']).round(2)\n",
"all_df['理论高'] = ((1 + all_df['高系数']) * all_df['ERP高']).round(2)\n",
"all_df['理论重量'] = ((1 + all_df['重量系数']) * all_df['ERP重量']).round(2)\n",
"all_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 计算三种尺寸下的售价\n",
"# 计算预测后的尺寸下,一票一件订单的售价和订单价格\n",
"from sell.sell_price import call_sell_and_order_price\n",
"for index,row in all_df.iterrows():\n",
" price = row['成本价']\n",
" package_dict1={}\n",
" package_dict2={}\n",
" package_dict3={}\n",
" try:\n",
" package_dict1['包裹1'] = {}\n",
" package_dict2['包裹1'] = {}\n",
" package_dict3['包裹1'] = {}\n",
" package_dict1['包裹1']['长'] = row['长']\n",
" package_dict1['包裹1']['宽'] = row['宽']\n",
" package_dict1['包裹1']['高'] = row['高']\n",
" package_dict1['包裹1']['重量'] = row['重量']\n",
" package_dict2['包裹1']['长'] = row['理论长']\n",
" package_dict2['包裹1']['宽'] = row['理论宽'] \n",
" package_dict2['包裹1']['高'] = row['理论高']\n",
" package_dict2['包裹1']['重量'] = row['理论重量']\n",
" package_dict3['包裹1']['长'] = row['ERP长']\n",
" package_dict3['包裹1']['宽'] = row['ERP宽'] \n",
" package_dict3['包裹1']['高'] = row['ERP高']\n",
" package_dict3['包裹1']['重量'] = row['ERP重量']\n",
" sell_price1, order_price1, order_type1 = call_sell_and_order_price(price, package_dict1)\n",
" sell_price2, order_price2, order_type2 = call_sell_and_order_price(price, package_dict2)\n",
" sell_price3, order_price3, order_type3 = call_sell_and_order_price(price, package_dict3)\n",
" except Exception as e:\n",
" print(f\"SKU: {row['SKU']} 报错: {e}\")\n",
" continue\n",
" all_df.loc[index, 'ERP售价'] = sell_price3\n",
" all_df.loc[index, '实际体积售价'] = sell_price1\n",
" all_df.loc[index, '理论体积售价'] = sell_price2\n",
"\n",
" all_df.loc[index, 'ERP订单价'] = order_price3\n",
" all_df.loc[index, '实际体积订单价'] = order_price1\n",
" all_df.loc[index, '理论体积订单价'] = order_price2\n",
" # all_df.loc[index, '理论体积订单类型'] = order_type2\n",
" print(f\"SPU: {row['SPU_x']}, SKU {row['SKU']} ,ERP售价: {sell_price3}, 实际体积售价: {sell_price1}, 理论体积售价: {sell_price2},\")\n",
"all_df.to_excel('单包裹SKU售价分析.xlsx', index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}