logistics/账单预测.ipynb

1330 lines
57 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Admin\\AppData\\Local\\Temp\\ipykernel_18728\\4048319598.py:101: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n",
" df=pd.read_sql(sql,db.con)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" 包裹测量时间 order_id SKU 订单时间 \\\n",
"0 2025-03-01 14:06:35 250225025414549 22696 2025-February-25th \n",
"1 2025-04-02 15:44:20 250330050647870 220934 2025-March-30th \n",
"2 2025-03-03 16:31:27 250225062602893 221468 2025-February-25th \n",
"3 2025-03-15 10:37:19 250311002220317 222890 2025-March-11th \n",
"4 2025-04-03 18:16:25 250331190637251 224072 2025-March-31st \n",
"... ... ... ... ... \n",
"13489 2025-05-04 16:39:49 250429213610207 2207148642 2025-April-29th \n",
"13490 2025-05-04 13:03:06 250501025810935 2207161446 2025-May-1st \n",
"13491 2025-05-05 17:41:38 250430063602142 2207176415 2025-April-30th \n",
"13492 2025-05-02 13:33:34 250430035236822 2207192083 2025-April-30th \n",
"13493 2025-05-08 20:15:50 250430104809255 2207202010 2025-April-30th \n",
"\n",
" 包裹号 快递公司 运输方式 目的国 postcode ERP采购价 \\\n",
"0 991344640 海MS-FEDEX 海运 United States 55414 50.0 \n",
"1 991399418 海MS-FEDEX 海运 United States 37014 45.0 \n",
"2 991348761 海MS-FEDEX02 海运 United States 11803 65.0 \n",
"3 991369401 海MS-FEDEX02 海运 United States 54568-9248 35.0 \n",
"4 991401728 海MS-FEDEX 海运 United States 34112 180.0 \n",
"... ... ... ... ... ... ... \n",
"13489 991447512 海MS-FEDEX-SAIR-H 海运 United States 32168 NaN \n",
"13490 991446594 海MS-FEDEX-SAIR-H 海运 United States 92647 NaN \n",
"13491 991449178 海MS-FEDEX-SAIR-H 海运 United States 90019 518.0 \n",
"13492 991444756 海MS-FEDEX-SAIR-H 海运 United States 78572 228.0 \n",
"13493 991454248 海MS-FEDEX-SAIR-H 海运 United States 89129 NaN \n",
"\n",
" ERP包裹数据 \\\n",
"0 {\"包裹1\": {\"宽\": \"21.0\", \"长\": \"21.0\", \"高\": \"21.0\"... \n",
"1 {\"包裹1\": {\"宽\": \"41.0\", \"长\": \"41.0\", \"高\": \"21.0\"... \n",
"2 {\"包裹1\": {\"宽\": \"32.0\", \"长\": \"33.0\", \"高\": \"28.0\"... \n",
"3 {\"包裹1\": {\"宽\": \"28.0\", \"长\": \"28.0\", \"高\": \"13.0\"... \n",
"4 {\"包裹1\": {\"宽\": \"25.0\", \"长\": \"75.0\", \"高\": \"28.0\"... \n",
"... ... \n",
"13489 {\"包裹1\": {\"宽\": \"70.0\", \"长\": \"190.0\", \"高\": \"20.0... \n",
"13490 {\"包裹1\": {\"宽\": \"48.0\", \"长\": \"54.0\", \"高\": \"14.0\"... \n",
"13491 {\"包裹1\": {\"宽\": \"30.0\", \"长\": \"193.0\", \"高\": \"13.0... \n",
"13492 {\"包裹1\": {\"宽\": \"51.0\", \"长\": \"53.0\", \"高\": \"33.0\"... \n",
"13493 {\"包裹1\": {\"宽\": \"70.0\", \"长\": \"130.0\", \"高\": \"45.0... \n",
"\n",
" 实际包裹数据 \n",
"0 {\"991344640\": {\"长\": 45.00, \"宽\": 45.00, \"高\": 17... \n",
"1 {\"991399418\": {\"长\": 41.00, \"宽\": 41.00, \"高\": 21... \n",
"2 {\"991348761\": {\"长\": 31.00, \"宽\": 31.00, \"高\": 24... \n",
"3 {\"991369401\": {\"长\": 28.00, \"宽\": 28.00, \"高\": 12... \n",
"4 {\"991401728\": {\"长\": 75.00, \"宽\": 25.00, \"高\": 16... \n",
"... ... \n",
"13489 {\"991447512\": {\"长\": 208.00, \"宽\": 54.00, \"高\": 1... \n",
"13490 {\"991446594\": {\"长\": 53.00, \"宽\": 48.00, \"高\": 20... \n",
"13491 {\"991449178\": {\"长\": 193.00, \"宽\": 30.00, \"高\": 1... \n",
"13492 {\"991444756\": {\"长\": 53.00, \"宽\": 51.00, \"高\": 33... \n",
"13493 {\"991454248\": {\"长\": 141.00, \"宽\": 76.00, \"高\": 2... \n",
"\n",
"[13494 rows x 12 columns]\n"
]
}
],
"source": [
"import pandas as pd\n",
"from utils.gtools import MySQLconnect\n",
"\n",
"# 读取需要计算的包裹信息\n",
"with MySQLconnect('ods') as db:\n",
" sql = r\"\"\" \n",
" WITH\n",
"t1 AS (\n",
"SELECT\n",
"order_id,\n",
"SKU,\n",
"order_date,\n",
"sum(CASE WHEN opl.order_product_id LIKE '%\\_%' ESCAPE '\\\\' \n",
" AND opl.order_product_id NOT LIKE '%\\_%\\_%' ESCAPE '\\\\' THEN product_num END) AS product_num,\n",
"DATE_FORMAT(order_date,\"%Y-%m-%d\") AS 订单时间,\n",
"count(opl.SKU) AS 产品种类\n",
"FROM\n",
"dws.fact_order_product_list opl\n",
"WHERE\n",
" NOT EXISTS (\n",
" SELECT 1 \n",
" FROM dws.log_order_reissue_detail AS r \n",
" WHERE r.order_product_id = opl.order_product_id\n",
" )\n",
"AND order_date >= \"20250101\"\n",
"AND order_date < \"20250601\"\n",
"AND SKU <> \"\"\n",
"GROUP BY order_id\n",
")\n",
",\n",
"t2 AS (\n",
"SELECT\t\t\t\n",
" a.`包裹测量时间`,\n",
"\t\t\t\t\t\tt1.order_id,\n",
"\t\t\t\t\t\tt1.SKU,\n",
"\t\t\t\t\t\tt1.order_date,\n",
" a.包裹号,\n",
" a.快递公司,\n",
" a.运输方式,\n",
"\t\t\t\t\t\ta.`目的国`,\n",
" d.postcode,\n",
" CONCAT(\n",
" '\"', b.package, '\": {',\n",
" '\"长\": ', length, ', ',\n",
" '\"宽\": ', width, ', ',\n",
" '\"高\": ', hight, ', ',\n",
" '\"重量\": ', weight, '}'\n",
" ) AS package_json\n",
" FROM\n",
"\t\t\t\tt1\n",
" LEFT JOIN order_express a ON t1.order_id = a.单号\n",
" JOIN package_vol_info b ON a.`包裹号` = b.package\n",
" JOIN order_list d ON a.`单号` = d.order_id \n",
" WHERE\n",
" a.`包裹状态` IN ( '客户签收', '已经投递') \n",
" AND b.hight > 0 \n",
" AND b.length > 0 \n",
" AND b.width > 0 \n",
" AND b.hight > 0 \n",
" AND b.weight > 0\n",
" AND a.`目的国` = \"United States\"\n",
"\t\t\t\t\t\tAND t1.product_num = 1\n",
"\t\t\t\t\t\tAND t1.产品种类=1\n",
"\t\t\t\t\t\tAND a.`包裹测量时间` >= '2025-03-01'\n",
"\t\t\t\t\t\tAND a.`包裹测量时间` < '2025-06-01'\n",
"),\n",
"t3 AS (\n",
"SELECT\n",
"t2.*,\n",
"sku.成本价 AS ERP采购价,\n",
"ess.包裹数据 AS ERP包裹数据,\n",
"CONCAT('{', GROUP_CONCAT(package_json SEPARATOR ','), '}') AS 实际包裹数据,\n",
"ROW_NUMBER() OVER (PARTITION BY SKU ORDER BY 包裹测量时间 DESC) as rn\n",
"FROM\n",
"t2\n",
"LEFT JOIN ads.new_erp_sku_size ess ON t2.SKU=ess.SKU\n",
"LEFT JOIN stg_bayshop_litfad_sku sku ON t2.SKU=sku.SKU\n",
"WHERE\n",
"ess.`包裹数据`<>''\n",
"GROUP BY order_id\n",
")\n",
"SELECT\n",
"包裹测量时间,\n",
"order_id,\n",
"SKU,\n",
"DATE_FORMAT(order_date,\"%Y-%M-%D\") AS 订单时间,\n",
"包裹号,\n",
"`快递公司`,\n",
"`运输方式`,\n",
"`目的国`,\n",
"postcode,\n",
"ERP采购价,\n",
"ERP包裹数据,\n",
"实际包裹数据\n",
"FROM\n",
"t3\n",
"WHERE\n",
"rn=1\n",
"\n",
" \"\"\"\n",
" df=pd.read_sql(sql,db.con)\n",
" print(df)\n",
" df.to_clipboard(index=False)\n",
"\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from utils.countryOperator import OperateCountry\n",
"from utils.logisticsBill import BillFactory\n",
"from utils.Package import Package, Package_group\n",
"import pandas as pd\n",
"# 美国 \n",
"from utils.logisticsBill import Billing\n",
"import requests\n",
"def get_package(packages_id):\n",
" \"\"\"\n",
" 根据订单号的包裹ID,返回包裹类包裹明细包裹实重包裹体积重6000\n",
" \"\"\"\n",
" url = f'https://cp.maso.hk/index.php?main=biphp&act=package_fund&key=JJ57S744ZJR26ORGRMMSJ8V4D4UVF5AU&package={package_id}'\n",
" resp = requests.get(url).json()\n",
" if resp['code'] == \"0\":\n",
" weight = int(float(resp['data'][0]['weight'])*1000)\n",
" package_length = resp['data'][0]['l'].replace(\",\",\"\") if len(resp['data'][0]['l'])>0 else \"0\"\n",
" package_width = resp['data'][0]['w'].replace(\",\",\"\") if len(resp['data'][0]['w'])>0 else \"0\"\n",
" package_hight = resp['data'][0]['h'].replace(\",\",\"\") if len(resp['data'][0]['h'])>0 else \"0\"\n",
" return float(package_length),float(package_width),float(package_hight),int(weight)\n",
"\n",
"# df1=pd.read_excel(r\"F:\\DOCUMENTS\\WXWork\\1688854527635889\\Cache\\File\\2025-05\\新建Microsoft Excel 工作表 (2)(1).xlsx\",sheet_name=\"包裹成本信息\")\n",
"# df = df1[(df1['快递公司']==\"海MS-大健-METRO\")|(df1['快递公司']==\"海MS-大健-CEVA\")|(df1['快递公司']==\"空LAX-大健-CEVA\")]\n",
"# df = df1\n",
"df_grouped= df.groupby('order_id')\n",
"# for index,row in df.iterrows():\n",
"# if \"CEVA\" in row['快递公司']:\n",
"# df.loc[index,'类型'] = \"卡派ceva\"\n",
"# elif \"GIGA\" in row['快递公司']:\n",
"# df.loc[index,'类型'] = \"卡派giga\"\n",
"# elif \"METRO\" in row['快递公司']:\n",
"# df.loc[index,'类型'] = \"卡派metro\"\n",
"# elif any(x in row['快递公司'] for x in [\"FEDEX-SAIR-G\", \"FEDEX-SAIR-H\", \"FEDEX02\"]):\n",
"# df.loc[index,'类型'] = \"快递\"\n",
"# else:\n",
"# df.loc[index,'类型'] = \"其他\"\n",
"\n",
"for order_num, group in df_grouped:\n",
" if pd.isna(order_num) or str(order_num).lower() == \"nan\" or order_num == 0 or order_num == \"手工包裹\":\n",
" continue\n",
" order_num = str(int(order_num))\n",
" # opCountry = OperateCountry(group['目的国'].iloc[0])\n",
" opCountry = OperateCountry('US')\n",
" postcode = group['postcode'].iloc[0]\n",
" if pd.isna(postcode) or str(postcode).lower() == \"nan\":\n",
" continue\n",
" packages1= Package_group()\n",
" packages2= Package_group()\n",
" packages3= Package_group()\n",
"\n",
" # 记录包裹号\n",
" packages_id1=[]\n",
" packages_id2=[]\n",
" packages_id3=[]\n",
"\n",
" if \"海\" in group['快递公司'].iloc[0]:\n",
" head_type = 1\n",
" else:\n",
" head_type = 0\n",
"\n",
" for index,row in group.iterrows():\n",
" print(f\"正在处理包裹 {row['包裹号']}\")\n",
" df.at[index, 'order_id'] = str(int(row['order_id']))\n",
" # print(f\"正在处理订单 {df.loc[index,'order_id']}\")\n",
" if any(pd.isna([row['长'], row['宽'], row['高'], row['重量']])):\n",
" row['长'],row['宽'],row['高'],row['重量'] = get_package(row['包裹号'])\n",
"\n",
" if \"FEDEX-SAIR-G\" in row['快递公司']:\n",
" company_name = \"Fedex-GROUD\"\n",
" elif \"FEDEX-SAIR-H\" in row['快递公司']:\n",
" company_name = \"Fedex-HOME\"\n",
" elif \"FEDEX02\" in row['快递公司']:\n",
" company_name = \"Fedex-彩虹小马\"\n",
"\n",
" package = Package(row['包裹号'],row['长'],row['宽'],row['高'],row['重量'])\n",
" if row['类型'] == \"快递\":\n",
" packages= Package_group()\n",
" packages.add_package(package)\n",
" bill = Billing(str(index),opCountry,packages,postcode,company_name=company_name,head_type=head_type,beizhu='1')\n",
" head_price = bill.head_amount[0]\n",
" tail_price = bill.tail_amount[0]\n",
" df.loc[index,'头程'] = head_price\n",
" df.loc[index,'尾端'] = tail_price\n",
" elif row['类型'] == \"卡派ceva\":\n",
" packages1.add_package(package)\n",
" packages_id1.append(row['包裹号'])\n",
" elif row['类型'] == \"卡派giga\":\n",
" packages2.add_package(package)\n",
" packages_id2.append(row['包裹号'])\n",
" elif row['类型'] == \"卡派metro\":\n",
" packages3.add_package(package)\n",
" packages_id3.append(row['包裹号'])\n",
" else:\n",
" continue\n",
" \n",
" if len(packages1)>0:\n",
" bill1 = Billing(str(index),opCountry,packages1,postcode,company_name=\"大健-CEVA\",head_type=head_type,beizhu='1')\n",
" for package_id in packages_id1:\n",
" df.loc[df['包裹号']==package_id,'头程'] = bill1.head_amount[0]\n",
" df.loc[df['包裹号']==package_id,'尾端'] = bill1.tail_amount[0]\n",
" if len(packages2)>0:\n",
" bill2 = Billing(str(index),opCountry,packages2,postcode,company_name=\"大健-GIGA\",head_type=head_type,beizhu='1')\n",
" for package_id in packages_id2:\n",
" df.loc[df['包裹号']==package_id,'头程'] = bill2.head_amount[0]\n",
" df.loc[df['包裹号']==package_id,'尾端'] = bill2.tail_amount[0]\n",
" if len(packages3)>0:\n",
" bill3 = Billing(str(index),opCountry,packages3,postcode,company_name=\"大健-Metro\",head_type=head_type,beizhu='1')\n",
" for package_id in packages_id3:\n",
" df.loc[df['包裹号']==package_id,'头程'] = bill3.head_amount[0]\n",
" df.loc[df['包裹号']==package_id,'尾端'] = bill3.tail_amount[0]\n",
"\n",
"df.to_clipboard(index=False)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.to_clipboard(index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from utils.countryOperator import OperateCountry\n",
"from utils.logisticsBill import BillFactory\n",
"from utils.Package import Package, Package_group\n",
"# 美国 \n",
"from utils.logisticsBill import Billing\n",
"\n",
"df_grouped= df.groupby('订单号')\n",
"calculated_results = []\n",
"for order_num, group in df_grouped:\n",
" # opCountry = OperateCountry(group['目的国'].iloc[0])\n",
" opCountry = OperateCountry('US')\n",
" postcode = '33900'\n",
" packages= Package_group()\n",
" packages_dict = {}\n",
" volume_weight = 0\n",
" weight = 0\n",
" for index,row in group.iterrows():\n",
" package = Package(row['包裹号'],row['长'],row['宽'],row['高'],row['重量'])\n",
" packages.add_package(package)\n",
" packages_dict[row['包裹号']] = {\n",
" \"长\": row['长'],\n",
" \"宽\": row['宽'],\n",
" \"高\": row['高'],\n",
" \"重量\": row['重量']\n",
" }\n",
" # weight += row['重量']/1000\n",
" # volume_weight += package.get_volume_weight(6000)\n",
" # postcode = row['postcode']\n",
" # head_type = 1 if row['运输方式'] == '海运' else 0\n",
" # try:\n",
" # bill = Billing(str(index),opCountry,packages,postcode,company_name=None,head_type=head_type,beizhu='1')\n",
" # head_price = bill.head_amount[0]\n",
" # tail_price = bill.tail_amount[0]\n",
" # tail_currency = bill.tail_amount[1]\n",
" # except:\n",
" # head_price ='出错'\n",
" # tail_price = '出错'\n",
" # tail_currency = '出错'\n",
" result = {\n",
" '订单号': order_num,\n",
" '目的国': group['目的国'].iloc[0], # Same for all rows in the group\n",
" '快递公司': group['快递公司'].iloc[0], # Same for all rows in the group\n",
" '运输方式': group['运输方式'].iloc[0], # Same for all rows in the group\n",
" 'postcode': group['postcode'].iloc[0], # Same for all rows in the group\n",
" '销售额USD':group['销售额'].iloc[0],\n",
" '真实尾端CNY': group['真实尾端CNY'].sum(),# Same for all rows in the group\n",
" '真实头程CNY': group['真实头程CNY'].sum(), # Same for all rows in the group\n",
" \n",
" # '头程预测': head_price,\n",
" # '尾端预测': tail_price,\n",
" # '尾端货币': tail_currency,\n",
" '产品品类': group['产品品类'].iloc[0], # Same for all rows in the group\n",
" '产品分类': group['产品分类'].iloc[0], # Same for all rows in the group\n",
" 'SKU': group['SKU'].iloc[0], # Same for all rows in the group\n",
" '订单日期': group['订单日期'].iloc[0], # Same for all rows in the group\n",
" 'packages_dict': packages_dict,\n",
" '包裹数据':group['包裹数据'].iloc[0],\n",
"\n",
" }\n",
" calculated_results.append(result)\n",
"\n",
" print(packages_dict)\n",
"calculated_df = pd.DataFrame(calculated_results)\n",
"# 将calculated_df的订单号改为order_id\n",
"calculated_df.rename(columns={'订单号':'order_id'},inplace=True)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from utils.gtools import MySQLconnect\n",
"\n",
"ods = MySQLconnect(\"ods\")\n",
"engine = ods.engine()\n",
"cursor = ods.connect().cursor()\n",
"\n",
"\n",
"batch_size = 50000 # 每次查询 500 个 order_id避免 SQL 语句过长\n",
"order_id_list = df[\"order_id\"].tolist()\n",
"\n",
"# 存储分批查询的结果\n",
"result_dfs1 = []\n",
"result_dfs2 = []\n",
"for i in range(0, len(order_id_list), batch_size):\n",
" batch_order_ids = order_id_list[i:i + batch_size] # 取当前批次的 order_id\n",
" param = \",\".join(f\"'{order_id}'\" for order_id in batch_order_ids)\n",
"\n",
" purchase_order_sql = f\"\"\"\n",
" WITH t1 AS (\n",
" SELECT LEFT(ol.out_detials_outlink_id, 15) AS order_id,\n",
" SUM(out_detials_qty * price) AS instock_cost,\n",
" NULL AS buy_cost\n",
" FROM ods.outstock_list ol\n",
" JOIN ods.instock_list il ON ol.store_in_id = il.id \n",
" WHERE LEFT(ol.out_detials_outlink_id, 15) IN ({param})\n",
" GROUP BY LEFT(ol.out_detials_outlink_id, 15)\n",
" \n",
" UNION ALL\n",
" \n",
" SELECT LEFT(order_product_id, 15) AS order_id, \n",
" NULL AS instock_cost,\n",
" SUM(buy_num * actual_price) AS buy_cost\n",
" FROM warehouse_purchasing\n",
" WHERE LEFT(order_product_id, 15) IN ({param}) \n",
" AND buy_audit = \"采购完成\"\n",
" GROUP BY LEFT(order_product_id, 15)\n",
" )\n",
" SELECT order_id,\n",
" SUM(CASE \n",
" WHEN instock_cost IS NULL THEN buy_cost\n",
" ELSE instock_cost \n",
" END) AS 采购成本\n",
" FROM t1 \n",
" GROUP BY order_id\n",
" \"\"\"\n",
"\n",
"# sql_biaozhun = f\"\"\"\n",
" \n",
"# SELECT\n",
"# order_id,\n",
"# sum(s1.`成本价`*product_num) AS \"入库采购价\"\n",
"# FROM\n",
"# dws.order_product_list opl LEFT JOIN ods.stg_bayshop_litfad_sku s1 on s1.sku = opl.sku\n",
"# LEFT JOIN dwd.dim_erp_sku_package_vol_info des on des.erp_sku = opl.sku\n",
" \n",
"# WHERE\n",
"# order_id IN ({param}) \n",
"# AND order_product_id regexp '^[0-9]{{15}}_[1-9][0-9]*$'\n",
"# GROUP BY order_id \n",
" \n",
"# \"\"\"\n",
" batch_df1 = pd.read_sql(purchase_order_sql, con=engine) # 运行 SQL 查询\n",
" result_dfs1.append(batch_df1) # 存入结果列表\n",
" # batch_df2 = pd.read_sql(sql_biaozhun, con=engine) # 运行 SQL 查询\n",
" # result_dfs2.append(batch_df2) # 存入结果列表\n",
" print(f\"已完成 {i + batch_size} 个 order_id 的查询\")\n",
"\n",
"# 合并所有查询结果\n",
"purchase_order_df1 = pd.concat(result_dfs1, ignore_index=True)\n",
"# purchase_order_df2 = pd.concat(result_dfs2, ignore_index=True)\n",
"purchase_order_df1[\"order_id\"] = purchase_order_df1[\"order_id\"].astype(str)\n",
"# purchase_order_df2[\"order_id\"] = purchase_order_df2[\"order_id\"].astype(str)\n",
"# purchase_order_df = pd.merge(purchase_order_df1, purchase_order_df2, on='order_id', how='left')\n",
"\n",
"# 转换数据类型,确保匹配\n",
"df[\"order_id\"] = df[\"order_id\"].astype(str)\n",
"\n",
"# 进行合并\n",
"order_id_df_cal = pd.merge(df, purchase_order_df1, on='order_id', how='left')\n",
"# order_id_df_cal.drop_duplicates(subset=[\"order_id\"], inplace=True)\n",
"# 复制到剪贴板\n",
"order_id_df_cal.to_clipboard(index=False)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(len(df),len(purchase_order_df1),len(order_id_df_cal))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 用系统售价模型和实际体积计算实际应该有的售价和订单价"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import pandas as pd\n",
"order_id_df_cal=pd.read_excel(r\"F:\\DOCUMENTS\\WXWork\\1688854527635889\\Cache\\File\\2025-05\\新建Microsoft Excel 工作表 (2)(1).xlsx\",sheet_name=\"包裹成本信息\")\n",
"df=order_id_df_cal.groupby('order_id')\n",
"for order_num,group in df:\n",
" packages_dict = {}\n",
" for index,row in group.iterrows():\n",
" if row[\"长\"]==0 or row[\"宽\"]==0 or row[\"高\"]==0 or row[\"重量\"]==0 or pd.isnull(row[\"长\"]) or pd.isnull(row[\"宽\"]) or pd.isnull(row[\"高\"]) or pd.isnull(row[\"重量\"]):\n",
" continue\n",
" package_dict = {\n",
" \"长\":row['长'],\n",
" \"宽\":row['宽'],\n",
" \"高\":row['高'],\n",
" \"重量\":row['重量']\n",
" }\n",
" packages_dict[row['包裹号']] = package_dict\n",
" print(packages_dict)\n",
" order_id_df_cal.loc[order_id_df_cal['order_id']==order_num,'packages_dict'] = json.dumps(packages_dict) if len(packages_dict)>0 else None\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 用系统售价模型和实际体积计算实际应该有的售价和订单价\n",
"from sell.sell_price import call_sell_and_order_price\n",
"import re\n",
"order_id_df_cal['预测售价'] = 0\n",
"for index, row in order_id_df_cal.iterrows():\n",
" # 如果没有预测售价这一列,正常执行\n",
" if row['预测售价'] > 0:\n",
" continue\n",
" print(row['packages_dict'])\n",
" if not isinstance(row['packages_dict'], str):\n",
" print(f\"跳过第 {index} 行packages_dict 不是字符串:{row['packages_dict']}\")\n",
" continue\n",
" if not isinstance(package_dict, dict) or not package_dict:\n",
" continue\n",
" package_dict = json.loads(row['packages_dict'])\n",
" # biaozhun_dict = row['包裹数据']\n",
" # if isinstance(biaozhun_dict, str):\n",
" # biaozhun_dict = eval(biaozhun_dict)\n",
" # if biaozhun_dict is None:\n",
" # continue\n",
"\n",
" # for key, value in biaozhun_dict.items():\n",
" # for k, v in value.items():\n",
" # if isinstance(v, str):\n",
" # v = v.replace(',', '') # 移除千分位逗号\n",
" # numbers = re.findall(r\"\\d+\\.?\\d*\", v) # 提取所有数字部分\n",
" # if numbers:\n",
" # biaozhun_dict[key][k] = float(numbers[0])\n",
"\n",
" \n",
"\n",
" price = row['总订单采购成本']\n",
" # price1=row['入库采购价']\n",
" sell_price,order_price,order_type = call_sell_and_order_price(price, package_dict)\n",
" # sell_price1,order_price1,order_type1 = call_sell_and_order_price(price1, biaozhun_dict)\n",
" order_id_df_cal.loc[order_id_df_cal['order_id']==row['order_id'],'预测售价'] = sell_price\n",
" order_id_df_cal.loc[order_id_df_cal['order_id']==row['order_id'],'预测订单价'] = order_price\n",
" order_id_df_cal.loc[order_id_df_cal['order_id']==row['order_id'],'预测订单类型'] = order_type\n",
" # order_id_df_cal.loc[index,'ERP售价'] = sell_price1\n",
" # order_id_df_cal.loc[index,'ERP订单价'] = order_price1\n",
" # order_id_df_cal.loc[index,'ERP订单类型'] = order_type1\n",
" print(f\"第{index+1}个订单{row['order_id']}的预测售价为{sell_price},预测订单价为{order_price},订单类型为{order_type}\")\n",
"order_id_df_cal.to_clipboard(index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"order_id_df_cal.to_clipboard(index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"\n",
"for index, row in order_id_df_cal.iterrows():\n",
" package_dict = row['packages_dict']\n",
" \n",
" if not isinstance(package_dict, dict):\n",
" continue # 跳过无效行\n",
"\n",
" price = row['总订单采购成本']\n",
"\n",
" try:\n",
" sell_price, order_price, order_type = call_sell_and_order_price(price, package_dict)\n",
" except Exception as e:\n",
" print(f\"订单 {row['order_id']} 报错: {e}\")\n",
" continue\n",
"\n",
" order_id_df_cal.loc[index, '预测售价'] = sell_price\n",
" order_id_df_cal.loc[index, '预测订单价'] = order_price\n",
" order_id_df_cal.loc[index, '预测订单类型'] = order_type\n",
"\n",
" print(f\"第{index+1}个订单 {row['order_id']} 的预测售价为 {sell_price},预测订单价为 {order_price},订单类型为 {order_type}\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"order_id_df_cal.to_clipboard(index=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"计算各条目订单SKU的标准体积下的物流成本和采购成本\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from utils.gtools import MySQLconnect\n",
"from utils.countryOperator import OperateCountry\n",
"from utils.logisticsBill import BillFactory\n",
"from utils.Package import Package, Package_group\n",
"import json\n",
"# 美国 \n",
"from utils.logisticsBill import Billing\n",
"import re\n",
"\n",
"def get_package_info_by_sql(order_df):\n",
" packages = Package_group()\n",
" for index, row in order_df.iterrows():\n",
" order_id = row['订单号']\n",
" if row['包裹数据'] is None:\n",
" return None, \"\"\n",
" row['包裹数据'] = json.loads(row['包裹数据'])\n",
" item_list = []\n",
" for package in row['包裹数据'].values():\n",
" item = {}\n",
" for key, value in package.items():\n",
" try:\n",
" # 使用正则表达式提取数字部分\n",
" number_str = re.findall(r\"[-+]?\\d*\\.\\d+|\\d+\", str(value))\n",
" if number_str:\n",
" item[key] = float(number_str[0]) # 取第一个匹配到的数字并转换为 float\n",
" else:\n",
" item[key] = value # 如果没有数字部分,保留原值\n",
" except ValueError:\n",
" item[key] = value # 如果遇到无法转换的值,保留原值\n",
" item_list.append(item)\n",
" \n",
" for item in item_list:\n",
" if item['长'] == 0 or item['宽'] == 0 or item['高'] == 0 or item['重量'] == 0:\n",
" return None, \"\"\n",
" package = Package(row['SKU'], item['长'], item['宽'], item['高'], item['重量'])\n",
" packages.add_package(package)\n",
" \n",
" return order_id, packages\n",
"\n",
"\n",
"ods = MySQLconnect(\"ods\")\n",
"engine = ods.engine()\n",
"cursor = ods.connect().cursor()\n",
"with MySQLconnect(\"ods\") as db:\n",
" engine = db.engine()\n",
" cursor = db.connect().cursor()\n",
" sql = \"\"\"\n",
" WITH t1 as \n",
" (\n",
" SELECT\n",
" opl.order_id,\n",
" -- opl.cate_3,\n",
" `产品品类`,\n",
" `产品分类`,\n",
" # opl.SKU,\n",
" SUM(opl.as_value_amount) AS 销售额,\n",
" COUNT(`产品分类`) AS 条目数,\n",
" sum(sku.`成本价`*product_num) AS \"入库采购价\"\n",
" FROM\n",
" dws.fact_order_product_list opl \n",
" LEFT JOIN stg_bayshop_litfad_sku sku ON opl.SKU = sku.SKU\n",
" LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID`\n",
" LEFT JOIN order_express oe ON opl.order_id = oe.单号\n",
" WHERE\n",
" opl.order_date >= '20240901'\n",
" AND opl.order_date<'20250101'\n",
" AND opl.site_name REGEXP 'Litfad'\n",
" AND opl.SKU IS NOT NULL\n",
" AND EXISTS (\n",
" SELECT 1 \n",
" FROM order_express oe_sub \n",
" WHERE oe_sub.单号 = opl.order_id \n",
" AND oe_sub.包裹状态 REGEXP '签收|投递'\n",
" )\n",
" GROUP BY opl.order_id\n",
" )\n",
" SELECT\n",
" a.目的国,\n",
" a.单号 AS `订单号`,\n",
" a.运输方式,\n",
" d.order_price_dollar AS '销售额',\n",
" d.postcode,\n",
" t1.产品品类,\n",
" t1.产品分类,\n",
" t1.入库采购价\n",
" FROM\n",
" order_express a\n",
" JOIN order_list d ON a.`单号` = d.order_id \n",
" JOIN t1 ON a.单号 = t1.order_id\n",
" WHERE\n",
" 目的国 IN ('United Kingdom','United States','Australia','Germany','Spain','France')\n",
" AND t1.条目数 = 1\n",
" AND t1.产品分类 IS NOT NULL\n",
" ORDER BY d.order_id\n",
" \"\"\"\n",
" biaozhun_df = pd.read_sql(sql, engine)\n",
" print(f\"获取订单基本信息{biaozhun_df.shape}\")\n",
" batch_size = 10000 \n",
" order_id_list = biaozhun_df[\"订单号\"].tolist()\n",
" result_dfs=[]\n",
" for i in range(0, len(order_id_list), batch_size):\n",
" batch_order_ids = order_id_list[i:i + batch_size] # 取当前批次的 order_id\n",
" param = \",\".join(f\"'{order_id}'\" for order_id in batch_order_ids)\n",
"\n",
" packages_sql =f\"\"\"\n",
" SELECT\n",
" CAST(opl.order_id AS CHAR) AS 订单号,\n",
" opl.SKU,\n",
" 包裹数据\n",
" FROM\n",
" dws.order_product_list opl\n",
" LEFT JOIN ads.new_erp_sku_size spi ON opl.SKU =spi.SKU\n",
" WHERE\n",
" order_id in ({param})\n",
" AND\n",
" opl.order_product_id REGEXP '[0-9]{{15}}_[0-9]*$'\n",
" \"\"\"\n",
" batch_df = pd.read_sql(packages_sql, engine)\n",
" result_df = batch_df.groupby(\"订单号\").apply(lambda x: pd.Series(get_package_info_by_sql(x), index=[\"订单号\", \"Packages\"])).reset_index(drop=True)\n",
" result_dfs.append(result_df)\n",
" print(f\"已完成 {i} / {len(order_id_list)}\")\n",
" packages_df = pd.concat(result_dfs, ignore_index=True)\n",
" # # 去除空值\n",
" # packages_df = packages_df[packages_df['Packages'].notnull()]\n",
" # 转换数据类型,确保匹配\n",
" packages_df[\"订单号\"] = packages_df[\"订单号\"].astype(str)\n",
" biaozhun_df[\"订单号\"] = biaozhun_df[\"订单号\"].astype(str)\n",
" # 进行合并\n",
" biaozhun_df = pd.merge(biaozhun_df, packages_df, on='订单号', how='left')\n",
" biaozhun_df.drop_duplicates(subset=[\"订单号\"], inplace=True)\n",
" print(biaozhun_df.head())\n",
"\n",
" for index, row in biaozhun_df.iterrows():\n",
" order_id = row['订单号']\n",
" packages = row['Packages'] \n",
" opCountry = OperateCountry('US')\n",
" postcode ='33900'\n",
" head_type = 1 if row['运输方式'] == '海运' else 0\n",
" if packages is None:\n",
" continue\n",
" try:\n",
" bill = Billing(str(index),opCountry,packages,postcode,company_name=None,head_type=head_type,beizhu='1')\n",
" head_price = bill.head_amount[0]\n",
" tail_price = bill.tail_amount[0]\n",
" tail_currency = bill.tail_amount[1]\n",
" except:\n",
" head_price ='出错'\n",
" tail_price = '出错'\n",
" tail_currency = '出错'\n",
" print(f\"{order_id} 头费:{head_price} {bill.head_amount[1]} 尾费:{tail_price} {tail_currency}\")\n",
" biaozhun_df.loc[index, '标准头程CNY'] = head_price\n",
" biaozhun_df.loc[index, '标准尾程'] = tail_price\n",
" biaozhun_df.loc[index, '尾程币种'] = tail_currency\n",
"biaozhun_df.to_clipboard(index=False)\n",
"\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"biaozhun_df.to_clipboard(index=False)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from utils.gtools import MySQLconnect\n",
"import pandas as pd\n",
"df=pd.read_excel(r\"F:\\DOCUMENTS\\WXWork\\1688854527635889\\Cache\\File\\2025-03\\导出订单维护任务数据2025-3-20.xlsx\",sheet_name=\"导出订单维护任务数据2025-3-20\")\n",
"# df['订单数量'] = df['订单数量'].astype(str)\n",
"# df = df[df['订单数量'].str.len() > 0]\n",
"# df['订单数量'] = df['订单数量'].astype(float)\n",
"# df = df[df['订单数量']>0]\n",
"df=df[['erp sku','订单号']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from utils.countryOperator import OperateCountry\n",
"from utils.logisticsBill import BillFactory\n",
"from utils.Package import Package, Package_group\n",
"# 美国 \n",
"from utils.logisticsBill import Billing\n",
"df = df[df['erp sku'] != 0]\n",
"# df的订单号转换字符串\n",
"df['订单号'] = df['订单号'].astype(str)\n",
"# 过滤掉订单号长度为0的行\n",
"df = df[df['订单号'].str.len() > 0]\n",
"\n",
"sku_df = pd.DataFrame()\n",
"with MySQLconnect(\"ods\") as db:\n",
" engine = db.engine()\n",
" conn = db.connect()\n",
" cursor = conn.cursor()\n",
" df_group = df.groupby('erp sku')\n",
" for sku, group in df_group:\n",
" for index1,row in group.iterrows():\n",
" order_list = group[\"订单号\"].tolist()\n",
" param = \",\".join(f\"'{order_id}'\" for order_id in order_list)\n",
" sql = f\"\"\"\n",
" SELECT\n",
" order_id\n",
" FROM\n",
" dws.fact_order_product_list\n",
" WHERE\n",
" SKU = {sku}\n",
" AND order_id IN ({param})\n",
" and site_type IN (\"独立站\")\n",
" group by order_id\n",
" \"\"\"\n",
" # 找到sku的订单号根据订单号计算该订单号的利润情况\n",
" order_id_df = pd.read_sql(sql, con=engine)\n",
" if order_id_df.empty:\n",
" continue\n",
" order_id_df['sku']=sku\n",
" sku_df = pd.concat([sku_df, order_id_df], axis=0, ignore_index=True)\n",
" print(f\"sku:{sku} 订单号数量:{len(order_id_df)}\")\n",
"print(sku_df)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ods = MySQLconnect(\"ods\")\n",
"engine = ods.engine()\n",
"cursor = ods.connect().cursor()\n",
"sql = \"\"\"\n",
" SELECT\n",
"opl.order_id,\n",
"`产品品类`,\n",
"`产品分类`,\n",
"SUM(opl.as_value_amount) AS 销售额,\n",
"COUNT(`产品分类`) AS 条目数\n",
"FROM\n",
"dws.fact_order_product_list opl \n",
"LEFT JOIN stg_bayshop_litfad_sku sku ON opl.SKU = sku.SKU\n",
"LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID`\n",
"WHERE\n",
"opl.order_date >= '20240901'\n",
"AND opl.order_date<'20250101'\n",
"AND opl.site_name REGEXP 'Litfad'\n",
"GROUP BY opl.order_id\n",
"\"\"\"\n",
"sku_df = pd.read_sql(sql, con=engine)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 找采购价\n",
"from utils.gtools import MySQLconnect\n",
"\n",
"ods = MySQLconnect(\"ods\")\n",
"engine = ods.engine()\n",
"cursor = ods.connect().cursor()\n",
"import pandas as pd\n",
"order_id_list = sku_df[\"order_id\"].tolist()\n",
"param = \",\".join(f\"'{order_id}'\" for order_id in order_id_list)\n",
"# 取采购费\n",
"purchase_order_sql = f\"\"\"\n",
"with t1 AS (SELECT LEFT\n",
" ( ol.out_detials_outlink_id, 15 ) AS order_id,\n",
" SUM( out_detials_qty * price )/ 7 AS instock_cost,\n",
" NULL AS buy_cost\n",
"FROM\n",
" ods.outstock_list ol\n",
" JOIN ods.instock_list il ON ol.store_in_id = il.id \n",
"WHERE\n",
" LEFT ( ol.out_detials_outlink_id, 15 ) IN ({param}) \n",
"GROUP BY\n",
" LEFT ( ol.out_detials_outlink_id, 15 )\n",
"UNION ALL\n",
"SELECT\n",
" LEFT ( order_product_id, 15 ) as order_id, \n",
" NULL as instock_cost,\n",
" SUM(buy_num * actual_price)/7 AS buy_cost\n",
"FROM\n",
" `warehouse_purchasing`\n",
"WHERE\n",
" LEFT ( order_product_id, 15 ) IN ({param}) \n",
" AND buy_audit = \"采购完成\"\n",
"group by LEFT ( order_product_id, 15 )\n",
" )\n",
"\n",
"SELECT\n",
"order_id,\n",
"SUM(CASE \n",
"WHEN instock_cost is null THEN\n",
" buy_cost\n",
"ELSE\n",
" instock_cost END) AS pur_cost\n",
"FROM\n",
"t1 \n",
"GROUP BY order_id\n",
"\n",
"\"\"\"\n",
"purchase_order_df = pd.read_sql(purchase_order_sql, con=engine)\n",
"\n",
"# 转换数据类型,确保匹配\n",
"purchase_order_df[\"order_id\"] = purchase_order_df[\"order_id\"].astype(str)\n",
"sku_df[\"order_id\"] = sku_df[\"order_id\"].astype(str)\n",
"\n",
"# 进行合并\n",
"sku_df = pd.merge(sku_df, purchase_order_df, on='order_id', how='left')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# sku_df=sku_df[['order_id','sku','pur_cost_y']]\n",
"sku_df.columns=['order_id','sku','采购价']\n",
"sku_df = sku_df[sku_df['采购价']>0] \n",
"print(sku_df.head())\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 取包裹数据\n",
"package_sql = f\"\"\"\n",
" SELECT\n",
" a.目的国,\n",
" a.单号 AS `订单号`,\n",
" a.包裹号,\n",
" a.快递公司,\n",
" a.运输方式,\n",
" d.order_price_dollar AS `销售额`,\n",
" d.postcode,\n",
" b.length AS `长`,\n",
" b.width AS `宽`,\n",
" b.hight AS `高`,\n",
" b.weight AS `重量`\n",
" FROM\n",
" order_express a\n",
" JOIN package_vol_info b ON a.`包裹号` = b.package\n",
" JOIN order_list d ON a.`单号` = d.order_id\n",
" WHERE\n",
" a.`包裹状态` IN ( '客户签收', '已经投递' ) \n",
" AND b.hight > 0 \n",
" AND b.length > 0 \n",
" AND b.width > 0 \n",
" AND b.hight > 0 \n",
" AND b.weight > 0 \n",
" AND a.单号 IN ({param})\n",
" ORDER BY\n",
" order_id\n",
" \"\"\"\n",
"package_df = pd.read_sql(package_sql, con=engine)\n",
"print(\"获取包裹数据成功\")\n",
"i = 0\n",
"package_grouped= package_df.groupby('订单号')\n",
"calculated_results = []\n",
"for order_id, group2 in package_grouped:\n",
" print(f\"开始计算订单号{order_id}的账单,第{i}个订单\")\n",
" i += 1\n",
" opCountry = OperateCountry(group2['目的国'].iloc[0])\n",
" packages= Package_group()\n",
" for index,row in group2.iterrows():\n",
" package = Package(row['包裹号'],row['长'],row['宽'],row['高'],row['重量'])\n",
" packages.add_package(package)\n",
" postcode = row['postcode']\n",
" head_type = 1 if row['运输方式'] == '海运' else 0\n",
" try:\n",
" bill = Billing(str(index),opCountry,packages,postcode,company_name=None,head_type=head_type,beizhu='1')\n",
" head_price = bill.head_amount[0]\n",
" tail_price = bill.tail_amount[0]\n",
" tail_currency = bill.tail_amount[1]\n",
" except:\n",
" head_price ='出错'\n",
" tail_price = '出错'\n",
" tail_currency = '出错'\n",
" print(f\"{bill}\")\n",
" result = {\n",
" '订单号': order_id,\n",
" '目的国': group2['目的国'].iloc[0], # Same for all rows in the group\n",
" '快递公司': group2['快递公司'].iloc[0], # Same for all rows in the group\n",
" '运输方式': group2['运输方式'].iloc[0], # Same for all rows in the group\n",
" 'postcode': group2['postcode'].iloc[0], # Same for all rows in the group\n",
" '销售额USD':group2['销售额'].iloc[0],\n",
" '头程预测': head_price,\n",
" '尾端预测': tail_price,\n",
" '尾端货币': tail_currency,\n",
" }\n",
" calculated_results.append(result)\n",
"calculated_df = pd.DataFrame(calculated_results)\n",
"calculated_df.rename(columns={'订单号':'order_id'},inplace=True)\n",
"sku_df[\"order_id\"] = sku_df[\"order_id\"].astype(str)\n",
"calculated_df[\"order_id\"] = calculated_df[\"order_id\"].astype(str)\n",
"# 进行合并\n",
"order_id_df_cal = pd.merge(calculated_df, sku_df, on='order_id', how='left')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# order_id_df_cal = pd.merge(calculated_df, sku_df, on='order_id', how='left')\n",
"order_id_df_cal.to_clipboard(index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from logisticsClass.logisticsTail_US import MetroLogistics_US\n",
"from utils.countryOperator import OperateCountry\n",
"from utils.logisticsBill import BillFactory\n",
"from utils.Package import Package, Package_group\n",
"import pandas as pd\n",
"from utils.gtools import MySQLconnect\n",
"# 美国 \n",
"from utils.logisticsBill import Billing\n",
"ods = MySQLconnect('ods')\n",
"cursor = ods.connect().cursor()\n",
"sql = f\"\"\"SELECT\n",
"\toe.`包裹号`,\n",
"\toe.`单号`,\n",
"\toe.`关联提单号`,\n",
"\toe.`包裹状态`,\n",
"\toe.`目的国`,\n",
"\toe.`快递公司`,\n",
"\tol.postcode,\n",
"\tpvi.length,\n",
"\tpvi.width,\n",
"\tpvi.hight,\n",
"\tpvi.weight\n",
"FROM\n",
"\t`order_express` oe left JOIN package_vol_info pvi ON oe.`包裹号` = pvi.package\n",
"\tLEFT JOIN order_list ol ON oe.`单号` =ol.order_id\n",
"WHERE\n",
"\t关联提单号 IN (\n",
"\t\t'BMOU4594999',\n",
"\t\t'CAIU9043860',\n",
"\t'GVCU5328406',\n",
"\t'TCNU7259447')\n",
"\tAND `包裹状态` NOT REGEXP '已作废'\n",
" \"\"\"\n",
"df = pd.read_sql(sql, ods.connect())\n",
"print(\"查询完成\")\n",
"calculated_results = []\n",
"# package_list = \",\".join(str(item) for item in df['ORDER#'].tolist())\n",
"# print(package_list)\n",
"# query = f\"SELECT package,length,width,hight,weight FROM `package_vol_info` WHERE `package` IN ({package_list})\"\n",
"# result = cursor.execute(query)\n",
"# packages_info = cursor.fetchall()\n",
"\n",
"# print(packages_info)\n",
"# 将df和result合并\n",
"# new_df = pd.DataFrame(packages_info, columns=['package', 'length', 'width', 'hight', 'weight'])\n",
"# df = df.merge(new_df, left_on='ORDER#', right_on='package')\n",
"# df = df.drop(columns='package')\n",
"df_grouped= df.groupby('单号')\n",
"\n",
"MetroLogistics_US.refresh()\n",
"for order_num, group in df_grouped:\n",
" # opCountry = OperateCountry(group['目的国'].iloc[0])\n",
" opCountry = OperateCountry('US')\n",
" postcode = str(group['postcode'].iloc[0])\n",
" packages= Package_group()\n",
" packages_dict = {}\n",
" # volume_weight = 0\n",
" # weight = 0\n",
" for index,row in group.iterrows():\n",
" length = float(row['length']) \n",
" width = float(row['width'])\n",
" hight = float(row['hight'])\n",
" weight = float(row['weight'])\n",
" package = Package(row['包裹号'],length,width,hight,weight)\n",
" packages.add_package(package)\n",
" # packages_dict[row['包裹号']] = {\n",
" # \"长\": row['长'],\n",
" # \"宽\": row['宽'],\n",
" # \"高\": row['高'],\n",
" # \"重量\": row['重量']\n",
" # }\n",
" # weight += row['重量']/1000\n",
" # volume_weight += package.get_volume_weight(6000)\n",
" # postcode = row['postcode']\n",
" # head_type = 1 if row['运输方式'] == '海运' else 0\n",
" try:\n",
" bill1 = Billing(str(index),opCountry,packages,postcode,company_name='大健-Metro',head_type=1,beizhu='1')\n",
" tail_price1 = bill1.tail_amount[0]\n",
" except:\n",
" tail_price1 = '出错'\n",
" try:\n",
" bill2 = Billing(str(index),opCountry,packages,postcode,company_name='大健-CEVA',head_type=1,beizhu='1')\n",
" tail_price2 = bill2.tail_amount[0]\n",
" except:\n",
" tail_price2 = '出错'\n",
" try:\n",
" bill3 = Billing(str(index),opCountry,packages,postcode,company_name='大健-GIGA',head_type=1,beizhu='1')\n",
" tail_price3 = bill3.tail_amount[0]\n",
" except:\n",
" tail_price3 = '出错'\n",
" \n",
" df.loc[df['单号']==order_num,'大健-METRO']= tail_price1\n",
" df.loc[df['单号']==order_num,'大健-CEVA']= tail_price2\n",
" df.loc[df['单号']==order_num,'大健-GIGA']= tail_price3\n",
" print(order_num)\n",
"# print(result)\n",
"# calculated_results.append(result)\n",
"# # print(packages_dict)\n",
"# calculated_df = pd.DataFrame(calculated_results)\n",
"# 将calculated_df的订单号改为order_id\n",
"# calculated_df.rename(columns={'订单号':'order_id'},inplace=True)\n",
"print(df)\n",
"df.to_clipboard(index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from utils.countryOperator import OperateCountry\n",
"from utils.logisticsBill import BillFactory\n",
"from utils.Package import Package, Package_group\n",
"import pandas as pd\n",
"from utils.gtools import MySQLconnect\n",
"# 美国 \n",
"from utils.logisticsBill import Billing\n",
"ods = MySQLconnect('ods')\n",
"cursor = ods.connect().cursor()\n",
"df = pd.read_excel(r'F:\\DOCUMENTS\\WXWork\\1688854527635889\\Cache\\File\\2025-04\\Litfad 4-10-25 with data analysis-2(1).xlsx',\n",
" sheet_name='CA- 96004885')\n",
"package_list = \",\".join(str(item) for item in df['ORDER#'].tolist())\n",
"print(package_list)\n",
"query = f\"SELECT package,length,width,hight,weight FROM `package_vol_info` WHERE `package` IN ({package_list})\"\n",
"result = cursor.execute(query)\n",
"packages_info = cursor.fetchall()\n",
"package_df = pd.DataFrame(packages_info, columns=['package','length','width','hight', 'weight'])\n",
"package_df.to_clipboard(index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from utils.gtools import MySQLconnect\n",
"ods = MySQLconnect('ods')\n",
"engine = ods.engine()\n",
"cursor = ods.connect().cursor()\n",
"df = pd.read_excel(r'D:\\test\\logistics\\售价_2024-01-01.xlsx')\n",
"batch_size = 50000 # 每次查询 500 个 order_id避免 SQL 语句过长\n",
"sku_list = df[\"SKU\"].tolist() \n",
"result_dfs1 = []\n",
"for i in range(0, len(sku_list), batch_size):\n",
" batch_order_ids = sku_list[i:i + batch_size] # 取当前批次的 order_id\n",
" param = \",\".join(f\"'{sku}'\" for sku in batch_order_ids)\n",
"\n",
" sku_sql = f\"\"\"\n",
" SELECT\n",
" SKU,\n",
" 产品售价,\n",
" `产品品类`,\n",
" `产品分类`\n",
" FROM\n",
" `stg_bayshop_litfad_sku` sku\n",
" LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` =spu.`产品PID`\n",
" WHERE\n",
" SKU IN ({param}) \n",
" \"\"\" \n",
" batch_df1 = pd.read_sql(sku_sql, con=engine) # 运行 SQL 查询\n",
" result_dfs1.append(batch_df1)\n",
" print(f\"已完成 {i} / {len(sku_list)}\")\n",
"sku_df = pd.concat(result_dfs1) # 合并结果\n",
"merge_df = pd.merge(df, sku_df, on=\"SKU\", how=\"left\") # 合并数据\n",
"merge_df.to_excel(r'D:\\test\\logistics\\售价_sku_1-9月.xlsx', index=False) # 保存结果"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"筛选取原尺寸还是理论预估尺寸\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Admin\\AppData\\Local\\Temp\\ipykernel_8364\\2603640831.py:4: FutureWarning: The provided callable <built-in function max> is currently using SeriesGroupBy.max. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"max\" instead.\n",
" all_df['SPU最大涨幅']=all_df.groupby('SPU')['售价涨跌幅'].transform(max)\n",
"C:\\Users\\Admin\\AppData\\Local\\Temp\\ipykernel_8364\\2603640831.py:5: FutureWarning: The provided callable <built-in function min> is currently using SeriesGroupBy.min. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"min\" instead.\n",
" all_df['SPU最小涨幅']=all_df.groupby('SPU')['售价涨跌幅'].transform(min)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"10{\"stdout\":\"[{\\\"variableName\\\": \\\"ID_TO_MEANING\\\", \\\"type\\\": \\\"dictionary\\\", \\\"supportedEngines\\\": [\\\"pandas\\\"], \\\"isLocalVariable\\\": true, \\\"rawType\\\": \\\"builtins.dict\\\"}, {\\\"variableName\\\": \\\"NULL\\\", \\\"type\\\": \\\"unknown\\\", \\\"supportedEngines\\\": [\\\"pandas\\\"], \\\"isLocalVariable\\\": true, \\\"rawType\\\": \\\"_pydevd_bundle.pydevd_constants.Null\\\"}]\\n\",\"stderr\":\"\",\"mime\":[]}\n",
"10{\"stdout\":\"[{\\\"variableName\\\": \\\"ID_TO_MEANING\\\", \\\"type\\\": \\\"dictionary\\\", \\\"supportedEngines\\\": [\\\"pandas\\\"], \\\"isLocalVariable\\\": true, \\\"rawType\\\": \\\"builtins.dict\\\"}, {\\\"variableName\\\": \\\"NULL\\\", \\\"type\\\": \\\"unknown\\\", \\\"supportedEngines\\\": [\\\"pandas\\\"], \\\"isLocalVariable\\\": true, \\\"rawType\\\": \\\"_pydevd_bundle.pydevd_constants.Null\\\"}]\\n\",\"stderr\":\"\",\"mime\":[]}\n",
"10{\"stdout\":\"[{\\\"variableName\\\": \\\"ID_TO_MEANING\\\", \\\"type\\\": \\\"dictionary\\\", \\\"supportedEngines\\\": [\\\"pandas\\\"], \\\"isLocalVariable\\\": true, \\\"rawType\\\": \\\"builtins.dict\\\"}, {\\\"variableName\\\": \\\"NULL\\\", \\\"type\\\": \\\"unknown\\\", \\\"supportedEngines\\\": [\\\"pandas\\\"], \\\"isLocalVariable\\\": true, \\\"rawType\\\": \\\"_pydevd_bundle.pydevd_constants.Null\\\"}]\\n\",\"stderr\":\"\",\"mime\":[]}\n"
]
}
],
"source": [
"import pandas as pd\n",
"all_df = pd.read_excel('单包裹SKU售价分析1.xlsx',sheet_name=\"Sheet1\")\n",
"all_df = all_df[(all_df['是否有过修改记录']==\"否\")&(all_df['使用尺寸售价']!=\"ERP售价\")]\n",
"all_df['SPU最大涨幅']=all_df.groupby('SPU')['售价涨跌幅'].transform(max)\n",
"all_df['SPU最小涨幅']=all_df.groupby('SPU')['售价涨跌幅'].transform(min)\n",
"\n",
"filtered_df = all_df[(all_df['SPU最大涨幅'] <= 0.5) & (all_df['SPU最小涨幅'] >= -0.5)]\n",
"for index,row in filtered_df.iterrows():\n",
" if row['使用尺寸售价']==\"实际体积售价\":\n",
" length = str(row['长'])\n",
" width = str(row['宽'])\n",
" height = str(row['高'])\n",
" weight = str(row['重量'])\n",
" else:\n",
" length = str(row['理论长'])\n",
" width = str(row['理论宽'])\n",
" height = str(row['理论高'])\n",
" weight = str(row['理论重量'])\n",
" filtered_df.loc['尺寸重量']=weight+\"|\"+length+\"*\"+width+\"*\"+height+\"*1,\"\n",
" print(filtered_df.loc['尺寸重量'])\n",
"spu_list = filtered_df['SPU'].unique()\n",
"filtered_df = filtered_df[['SKU','成本价','尺寸重量']]\n",
"filtered_df "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}