{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Admin\\AppData\\Local\\Temp\\ipykernel_18728\\4048319598.py:101: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n", " df=pd.read_sql(sql,db.con)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 包裹测量时间 order_id SKU 订单时间 \\\n", "0 2025-03-01 14:06:35 250225025414549 22696 2025-February-25th \n", "1 2025-04-02 15:44:20 250330050647870 220934 2025-March-30th \n", "2 2025-03-03 16:31:27 250225062602893 221468 2025-February-25th \n", "3 2025-03-15 10:37:19 250311002220317 222890 2025-March-11th \n", "4 2025-04-03 18:16:25 250331190637251 224072 2025-March-31st \n", "... ... ... ... ... \n", "13489 2025-05-04 16:39:49 250429213610207 2207148642 2025-April-29th \n", "13490 2025-05-04 13:03:06 250501025810935 2207161446 2025-May-1st \n", "13491 2025-05-05 17:41:38 250430063602142 2207176415 2025-April-30th \n", "13492 2025-05-02 13:33:34 250430035236822 2207192083 2025-April-30th \n", "13493 2025-05-08 20:15:50 250430104809255 2207202010 2025-April-30th \n", "\n", " 包裹号 快递公司 运输方式 目的国 postcode ERP采购价 \\\n", "0 991344640 海MS-FEDEX 海运 United States 55414 50.0 \n", "1 991399418 海MS-FEDEX 海运 United States 37014 45.0 \n", "2 991348761 海MS-FEDEX02 海运 United States 11803 65.0 \n", "3 991369401 海MS-FEDEX02 海运 United States 54568-9248 35.0 \n", "4 991401728 海MS-FEDEX 海运 United States 34112 180.0 \n", "... ... ... ... ... ... ... \n", "13489 991447512 海MS-FEDEX-SAIR-H 海运 United States 32168 NaN \n", "13490 991446594 海MS-FEDEX-SAIR-H 海运 United States 92647 NaN \n", "13491 991449178 海MS-FEDEX-SAIR-H 海运 United States 90019 518.0 \n", "13492 991444756 海MS-FEDEX-SAIR-H 海运 United States 78572 228.0 \n", "13493 991454248 海MS-FEDEX-SAIR-H 海运 United States 89129 NaN \n", "\n", " ERP包裹数据 \\\n", "0 {\"包裹1\": {\"宽\": \"21.0\", \"长\": \"21.0\", \"高\": \"21.0\"... \n", "1 {\"包裹1\": {\"宽\": \"41.0\", \"长\": \"41.0\", \"高\": \"21.0\"... \n", "2 {\"包裹1\": {\"宽\": \"32.0\", \"长\": \"33.0\", \"高\": \"28.0\"... \n", "3 {\"包裹1\": {\"宽\": \"28.0\", \"长\": \"28.0\", \"高\": \"13.0\"... \n", "4 {\"包裹1\": {\"宽\": \"25.0\", \"长\": \"75.0\", \"高\": \"28.0\"... \n", "... ... \n", "13489 {\"包裹1\": {\"宽\": \"70.0\", \"长\": \"190.0\", \"高\": \"20.0... \n", "13490 {\"包裹1\": {\"宽\": \"48.0\", \"长\": \"54.0\", \"高\": \"14.0\"... \n", "13491 {\"包裹1\": {\"宽\": \"30.0\", \"长\": \"193.0\", \"高\": \"13.0... \n", "13492 {\"包裹1\": {\"宽\": \"51.0\", \"长\": \"53.0\", \"高\": \"33.0\"... \n", "13493 {\"包裹1\": {\"宽\": \"70.0\", \"长\": \"130.0\", \"高\": \"45.0... \n", "\n", " 实际包裹数据 \n", "0 {\"991344640\": {\"长\": 45.00, \"宽\": 45.00, \"高\": 17... \n", "1 {\"991399418\": {\"长\": 41.00, \"宽\": 41.00, \"高\": 21... \n", "2 {\"991348761\": {\"长\": 31.00, \"宽\": 31.00, \"高\": 24... \n", "3 {\"991369401\": {\"长\": 28.00, \"宽\": 28.00, \"高\": 12... \n", "4 {\"991401728\": {\"长\": 75.00, \"宽\": 25.00, \"高\": 16... \n", "... ... \n", "13489 {\"991447512\": {\"长\": 208.00, \"宽\": 54.00, \"高\": 1... \n", "13490 {\"991446594\": {\"长\": 53.00, \"宽\": 48.00, \"高\": 20... \n", "13491 {\"991449178\": {\"长\": 193.00, \"宽\": 30.00, \"高\": 1... \n", "13492 {\"991444756\": {\"长\": 53.00, \"宽\": 51.00, \"高\": 33... \n", "13493 {\"991454248\": {\"长\": 141.00, \"宽\": 76.00, \"高\": 2... \n", "\n", "[13494 rows x 12 columns]\n" ] } ], "source": [ "import pandas as pd\n", "from utils.gtools import MySQLconnect\n", "\n", "# 读取需要计算的包裹信息\n", "with MySQLconnect('ods') as db:\n", " sql = r\"\"\" \n", " WITH\n", "t1 AS (\n", "SELECT\n", "order_id,\n", "SKU,\n", "order_date,\n", "sum(CASE WHEN opl.order_product_id LIKE '%\\_%' ESCAPE '\\\\' \n", " AND opl.order_product_id NOT LIKE '%\\_%\\_%' ESCAPE '\\\\' THEN product_num END) AS product_num,\n", "DATE_FORMAT(order_date,\"%Y-%m-%d\") AS 订单时间,\n", "count(opl.SKU) AS 产品种类\n", "FROM\n", "dws.fact_order_product_list opl\n", "WHERE\n", " NOT EXISTS (\n", " SELECT 1 \n", " FROM dws.log_order_reissue_detail AS r \n", " WHERE r.order_product_id = opl.order_product_id\n", " )\n", "AND order_date >= \"20250101\"\n", "AND order_date < \"20250601\"\n", "AND SKU <> \"\"\n", "GROUP BY order_id\n", ")\n", ",\n", "t2 AS (\n", "SELECT\t\t\t\n", " a.`包裹测量时间`,\n", "\t\t\t\t\t\tt1.order_id,\n", "\t\t\t\t\t\tt1.SKU,\n", "\t\t\t\t\t\tt1.order_date,\n", " a.包裹号,\n", " a.快递公司,\n", " a.运输方式,\n", "\t\t\t\t\t\ta.`目的国`,\n", " d.postcode,\n", " CONCAT(\n", " '\"', b.package, '\": {',\n", " '\"长\": ', length, ', ',\n", " '\"宽\": ', width, ', ',\n", " '\"高\": ', hight, ', ',\n", " '\"重量\": ', weight, '}'\n", " ) AS package_json\n", " FROM\n", "\t\t\t\tt1\n", " LEFT JOIN order_express a ON t1.order_id = a.单号\n", " JOIN package_vol_info b ON a.`包裹号` = b.package\n", " JOIN order_list d ON a.`单号` = d.order_id \n", " WHERE\n", " a.`包裹状态` IN ( '客户签收', '已经投递') \n", " AND b.hight > 0 \n", " AND b.length > 0 \n", " AND b.width > 0 \n", " AND b.hight > 0 \n", " AND b.weight > 0\n", " AND a.`目的国` = \"United States\"\n", "\t\t\t\t\t\tAND t1.product_num = 1\n", "\t\t\t\t\t\tAND t1.产品种类=1\n", "\t\t\t\t\t\tAND a.`包裹测量时间` >= '2025-03-01'\n", "\t\t\t\t\t\tAND a.`包裹测量时间` < '2025-06-01'\n", "),\n", "t3 AS (\n", "SELECT\n", "t2.*,\n", "sku.成本价 AS ERP采购价,\n", "ess.包裹数据 AS ERP包裹数据,\n", "CONCAT('{', GROUP_CONCAT(package_json SEPARATOR ','), '}') AS 实际包裹数据,\n", "ROW_NUMBER() OVER (PARTITION BY SKU ORDER BY 包裹测量时间 DESC) as rn\n", "FROM\n", "t2\n", "LEFT JOIN ads.new_erp_sku_size ess ON t2.SKU=ess.SKU\n", "LEFT JOIN stg_bayshop_litfad_sku sku ON t2.SKU=sku.SKU\n", "WHERE\n", "ess.`包裹数据`<>''\n", "GROUP BY order_id\n", ")\n", "SELECT\n", "包裹测量时间,\n", "order_id,\n", "SKU,\n", "DATE_FORMAT(order_date,\"%Y-%M-%D\") AS 订单时间,\n", "包裹号,\n", "`快递公司`,\n", "`运输方式`,\n", "`目的国`,\n", "postcode,\n", "ERP采购价,\n", "ERP包裹数据,\n", "实际包裹数据\n", "FROM\n", "t3\n", "WHERE\n", "rn=1\n", "\n", " \"\"\"\n", " df=pd.read_sql(sql,db.con)\n", " print(df)\n", " df.to_clipboard(index=False)\n", "\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from utils.countryOperator import OperateCountry\n", "from utils.logisticsBill import BillFactory\n", "from utils.Package import Package, Package_group\n", "import pandas as pd\n", "# 美国 \n", "from utils.logisticsBill import Billing\n", "import requests\n", "def get_package(packages_id):\n", " \"\"\"\n", " 根据订单号的包裹ID,返回包裹类,包裹明细,包裹实重,包裹体积重6000\n", " \"\"\"\n", " url = f'https://cp.maso.hk/index.php?main=biphp&act=package_fund&key=JJ57S744ZJR26ORGRMMSJ8V4D4UVF5AU&package={package_id}'\n", " resp = requests.get(url).json()\n", " if resp['code'] == \"0\":\n", " weight = int(float(resp['data'][0]['weight'])*1000)\n", " package_length = resp['data'][0]['l'].replace(\",\",\"\") if len(resp['data'][0]['l'])>0 else \"0\"\n", " package_width = resp['data'][0]['w'].replace(\",\",\"\") if len(resp['data'][0]['w'])>0 else \"0\"\n", " package_hight = resp['data'][0]['h'].replace(\",\",\"\") if len(resp['data'][0]['h'])>0 else \"0\"\n", " return float(package_length),float(package_width),float(package_hight),int(weight)\n", "\n", "# df1=pd.read_excel(r\"F:\\DOCUMENTS\\WXWork\\1688854527635889\\Cache\\File\\2025-05\\新建Microsoft Excel 工作表 (2)(1).xlsx\",sheet_name=\"包裹成本信息\")\n", "# df = df1[(df1['快递公司']==\"海MS-大健-METRO\")|(df1['快递公司']==\"海MS-大健-CEVA\")|(df1['快递公司']==\"空LAX-大健-CEVA\")]\n", "# df = df1\n", "df_grouped= df.groupby('order_id')\n", "# for index,row in df.iterrows():\n", "# if \"CEVA\" in row['快递公司']:\n", "# df.loc[index,'类型'] = \"卡派ceva\"\n", "# elif \"GIGA\" in row['快递公司']:\n", "# df.loc[index,'类型'] = \"卡派giga\"\n", "# elif \"METRO\" in row['快递公司']:\n", "# df.loc[index,'类型'] = \"卡派metro\"\n", "# elif any(x in row['快递公司'] for x in [\"FEDEX-SAIR-G\", \"FEDEX-SAIR-H\", \"FEDEX02\"]):\n", "# df.loc[index,'类型'] = \"快递\"\n", "# else:\n", "# df.loc[index,'类型'] = \"其他\"\n", "\n", "for order_num, group in df_grouped:\n", " if pd.isna(order_num) or str(order_num).lower() == \"nan\" or order_num == 0 or order_num == \"手工包裹\":\n", " continue\n", " order_num = str(int(order_num))\n", " # opCountry = OperateCountry(group['目的国'].iloc[0])\n", " opCountry = OperateCountry('US')\n", " postcode = group['postcode'].iloc[0]\n", " if pd.isna(postcode) or str(postcode).lower() == \"nan\":\n", " continue\n", " packages1= Package_group()\n", " packages2= Package_group()\n", " packages3= Package_group()\n", "\n", " # 记录包裹号\n", " packages_id1=[]\n", " packages_id2=[]\n", " packages_id3=[]\n", "\n", " if \"海\" in group['快递公司'].iloc[0]:\n", " head_type = 1\n", " else:\n", " head_type = 0\n", "\n", " for index,row in group.iterrows():\n", " print(f\"正在处理包裹 {row['包裹号']}\")\n", " df.at[index, 'order_id'] = str(int(row['order_id']))\n", " # print(f\"正在处理订单 {df.loc[index,'order_id']}\")\n", " if any(pd.isna([row['长'], row['宽'], row['高'], row['重量']])):\n", " row['长'],row['宽'],row['高'],row['重量'] = get_package(row['包裹号'])\n", "\n", " if \"FEDEX-SAIR-G\" in row['快递公司']:\n", " company_name = \"Fedex-GROUD\"\n", " elif \"FEDEX-SAIR-H\" in row['快递公司']:\n", " company_name = \"Fedex-HOME\"\n", " elif \"FEDEX02\" in row['快递公司']:\n", " company_name = \"Fedex-彩虹小马\"\n", "\n", " package = Package(row['包裹号'],row['长'],row['宽'],row['高'],row['重量'])\n", " if row['类型'] == \"快递\":\n", " packages= Package_group()\n", " packages.add_package(package)\n", " bill = Billing(str(index),opCountry,packages,postcode,company_name=company_name,head_type=head_type,beizhu='1')\n", " head_price = bill.head_amount[0]\n", " tail_price = bill.tail_amount[0]\n", " df.loc[index,'头程'] = head_price\n", " df.loc[index,'尾端'] = tail_price\n", " elif row['类型'] == \"卡派ceva\":\n", " packages1.add_package(package)\n", " packages_id1.append(row['包裹号'])\n", " elif row['类型'] == \"卡派giga\":\n", " packages2.add_package(package)\n", " packages_id2.append(row['包裹号'])\n", " elif row['类型'] == \"卡派metro\":\n", " packages3.add_package(package)\n", " packages_id3.append(row['包裹号'])\n", " else:\n", " continue\n", " \n", " if len(packages1)>0:\n", " bill1 = Billing(str(index),opCountry,packages1,postcode,company_name=\"大健-CEVA\",head_type=head_type,beizhu='1')\n", " for package_id in packages_id1:\n", " df.loc[df['包裹号']==package_id,'头程'] = bill1.head_amount[0]\n", " df.loc[df['包裹号']==package_id,'尾端'] = bill1.tail_amount[0]\n", " if len(packages2)>0:\n", " bill2 = Billing(str(index),opCountry,packages2,postcode,company_name=\"大健-GIGA\",head_type=head_type,beizhu='1')\n", " for package_id in packages_id2:\n", " df.loc[df['包裹号']==package_id,'头程'] = bill2.head_amount[0]\n", " df.loc[df['包裹号']==package_id,'尾端'] = bill2.tail_amount[0]\n", " if len(packages3)>0:\n", " bill3 = Billing(str(index),opCountry,packages3,postcode,company_name=\"大健-Metro\",head_type=head_type,beizhu='1')\n", " for package_id in packages_id3:\n", " df.loc[df['包裹号']==package_id,'头程'] = bill3.head_amount[0]\n", " df.loc[df['包裹号']==package_id,'尾端'] = bill3.tail_amount[0]\n", "\n", "df.to_clipboard(index=False)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df.to_clipboard(index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from utils.countryOperator import OperateCountry\n", "from utils.logisticsBill import BillFactory\n", "from utils.Package import Package, Package_group\n", "# 美国 \n", "from utils.logisticsBill import Billing\n", "\n", "df_grouped= df.groupby('订单号')\n", "calculated_results = []\n", "for order_num, group in df_grouped:\n", " # opCountry = OperateCountry(group['目的国'].iloc[0])\n", " opCountry = OperateCountry('US')\n", " postcode = '33900'\n", " packages= Package_group()\n", " packages_dict = {}\n", " volume_weight = 0\n", " weight = 0\n", " for index,row in group.iterrows():\n", " package = Package(row['包裹号'],row['长'],row['宽'],row['高'],row['重量'])\n", " packages.add_package(package)\n", " packages_dict[row['包裹号']] = {\n", " \"长\": row['长'],\n", " \"宽\": row['宽'],\n", " \"高\": row['高'],\n", " \"重量\": row['重量']\n", " }\n", " # weight += row['重量']/1000\n", " # volume_weight += package.get_volume_weight(6000)\n", " # postcode = row['postcode']\n", " # head_type = 1 if row['运输方式'] == '海运' else 0\n", " # try:\n", " # bill = Billing(str(index),opCountry,packages,postcode,company_name=None,head_type=head_type,beizhu='1')\n", " # head_price = bill.head_amount[0]\n", " # tail_price = bill.tail_amount[0]\n", " # tail_currency = bill.tail_amount[1]\n", " # except:\n", " # head_price ='出错'\n", " # tail_price = '出错'\n", " # tail_currency = '出错'\n", " result = {\n", " '订单号': order_num,\n", " '目的国': group['目的国'].iloc[0], # Same for all rows in the group\n", " '快递公司': group['快递公司'].iloc[0], # Same for all rows in the group\n", " '运输方式': group['运输方式'].iloc[0], # Same for all rows in the group\n", " 'postcode': group['postcode'].iloc[0], # Same for all rows in the group\n", " '销售额USD':group['销售额'].iloc[0],\n", " '真实尾端CNY': group['真实尾端CNY'].sum(),# Same for all rows in the group\n", " '真实头程CNY': group['真实头程CNY'].sum(), # Same for all rows in the group\n", " \n", " # '头程预测': head_price,\n", " # '尾端预测': tail_price,\n", " # '尾端货币': tail_currency,\n", " '产品品类': group['产品品类'].iloc[0], # Same for all rows in the group\n", " '产品分类': group['产品分类'].iloc[0], # Same for all rows in the group\n", " 'SKU': group['SKU'].iloc[0], # Same for all rows in the group\n", " '订单日期': group['订单日期'].iloc[0], # Same for all rows in the group\n", " 'packages_dict': packages_dict,\n", " '包裹数据':group['包裹数据'].iloc[0],\n", "\n", " }\n", " calculated_results.append(result)\n", "\n", " print(packages_dict)\n", "calculated_df = pd.DataFrame(calculated_results)\n", "# 将calculated_df的订单号改为order_id\n", "calculated_df.rename(columns={'订单号':'order_id'},inplace=True)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "len(df)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from utils.gtools import MySQLconnect\n", "\n", "ods = MySQLconnect(\"ods\")\n", "engine = ods.engine()\n", "cursor = ods.connect().cursor()\n", "\n", "\n", "batch_size = 50000 # 每次查询 500 个 order_id,避免 SQL 语句过长\n", "order_id_list = df[\"order_id\"].tolist()\n", "\n", "# 存储分批查询的结果\n", "result_dfs1 = []\n", "result_dfs2 = []\n", "for i in range(0, len(order_id_list), batch_size):\n", " batch_order_ids = order_id_list[i:i + batch_size] # 取当前批次的 order_id\n", " param = \",\".join(f\"'{order_id}'\" for order_id in batch_order_ids)\n", "\n", " purchase_order_sql = f\"\"\"\n", " WITH t1 AS (\n", " SELECT LEFT(ol.out_detials_outlink_id, 15) AS order_id,\n", " SUM(out_detials_qty * price) AS instock_cost,\n", " NULL AS buy_cost\n", " FROM ods.outstock_list ol\n", " JOIN ods.instock_list il ON ol.store_in_id = il.id \n", " WHERE LEFT(ol.out_detials_outlink_id, 15) IN ({param})\n", " GROUP BY LEFT(ol.out_detials_outlink_id, 15)\n", " \n", " UNION ALL\n", " \n", " SELECT LEFT(order_product_id, 15) AS order_id, \n", " NULL AS instock_cost,\n", " SUM(buy_num * actual_price) AS buy_cost\n", " FROM warehouse_purchasing\n", " WHERE LEFT(order_product_id, 15) IN ({param}) \n", " AND buy_audit = \"采购完成\"\n", " GROUP BY LEFT(order_product_id, 15)\n", " )\n", " SELECT order_id,\n", " SUM(CASE \n", " WHEN instock_cost IS NULL THEN buy_cost\n", " ELSE instock_cost \n", " END) AS 采购成本\n", " FROM t1 \n", " GROUP BY order_id\n", " \"\"\"\n", "\n", "# sql_biaozhun = f\"\"\"\n", " \n", "# SELECT\n", "# order_id,\n", "# sum(s1.`成本价`*product_num) AS \"入库采购价\"\n", "# FROM\n", "# dws.order_product_list opl LEFT JOIN ods.stg_bayshop_litfad_sku s1 on s1.sku = opl.sku\n", "# LEFT JOIN dwd.dim_erp_sku_package_vol_info des on des.erp_sku = opl.sku\n", " \n", "# WHERE\n", "# order_id IN ({param}) \n", "# AND order_product_id regexp '^[0-9]{{15}}_[1-9][0-9]*$'\n", "# GROUP BY order_id \n", " \n", "# \"\"\"\n", " batch_df1 = pd.read_sql(purchase_order_sql, con=engine) # 运行 SQL 查询\n", " result_dfs1.append(batch_df1) # 存入结果列表\n", " # batch_df2 = pd.read_sql(sql_biaozhun, con=engine) # 运行 SQL 查询\n", " # result_dfs2.append(batch_df2) # 存入结果列表\n", " print(f\"已完成 {i + batch_size} 个 order_id 的查询\")\n", "\n", "# 合并所有查询结果\n", "purchase_order_df1 = pd.concat(result_dfs1, ignore_index=True)\n", "# purchase_order_df2 = pd.concat(result_dfs2, ignore_index=True)\n", "purchase_order_df1[\"order_id\"] = purchase_order_df1[\"order_id\"].astype(str)\n", "# purchase_order_df2[\"order_id\"] = purchase_order_df2[\"order_id\"].astype(str)\n", "# purchase_order_df = pd.merge(purchase_order_df1, purchase_order_df2, on='order_id', how='left')\n", "\n", "# 转换数据类型,确保匹配\n", "df[\"order_id\"] = df[\"order_id\"].astype(str)\n", "\n", "# 进行合并\n", "order_id_df_cal = pd.merge(df, purchase_order_df1, on='order_id', how='left')\n", "# order_id_df_cal.drop_duplicates(subset=[\"order_id\"], inplace=True)\n", "# 复制到剪贴板\n", "order_id_df_cal.to_clipboard(index=False)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(len(df),len(purchase_order_df1),len(order_id_df_cal))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 用系统售价模型和实际体积计算实际应该有的售价和订单价" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import json\n", "import pandas as pd\n", "order_id_df_cal=pd.read_excel(r\"F:\\DOCUMENTS\\WXWork\\1688854527635889\\Cache\\File\\2025-05\\新建Microsoft Excel 工作表 (2)(1).xlsx\",sheet_name=\"包裹成本信息\")\n", "df=order_id_df_cal.groupby('order_id')\n", "for order_num,group in df:\n", " packages_dict = {}\n", " for index,row in group.iterrows():\n", " if row[\"长\"]==0 or row[\"宽\"]==0 or row[\"高\"]==0 or row[\"重量\"]==0 or pd.isnull(row[\"长\"]) or pd.isnull(row[\"宽\"]) or pd.isnull(row[\"高\"]) or pd.isnull(row[\"重量\"]):\n", " continue\n", " package_dict = {\n", " \"长\":row['长'],\n", " \"宽\":row['宽'],\n", " \"高\":row['高'],\n", " \"重量\":row['重量']\n", " }\n", " packages_dict[row['包裹号']] = package_dict\n", " print(packages_dict)\n", " order_id_df_cal.loc[order_id_df_cal['order_id']==order_num,'packages_dict'] = json.dumps(packages_dict) if len(packages_dict)>0 else None\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 用系统售价模型和实际体积计算实际应该有的售价和订单价\n", "from sell.sell_price import call_sell_and_order_price\n", "import re\n", "order_id_df_cal['预测售价'] = 0\n", "for index, row in order_id_df_cal.iterrows():\n", " # 如果没有预测售价这一列,正常执行\n", " if row['预测售价'] > 0:\n", " continue\n", " print(row['packages_dict'])\n", " if not isinstance(row['packages_dict'], str):\n", " print(f\"跳过第 {index} 行,packages_dict 不是字符串:{row['packages_dict']}\")\n", " continue\n", " if not isinstance(package_dict, dict) or not package_dict:\n", " continue\n", " package_dict = json.loads(row['packages_dict'])\n", " # biaozhun_dict = row['包裹数据']\n", " # if isinstance(biaozhun_dict, str):\n", " # biaozhun_dict = eval(biaozhun_dict)\n", " # if biaozhun_dict is None:\n", " # continue\n", "\n", " # for key, value in biaozhun_dict.items():\n", " # for k, v in value.items():\n", " # if isinstance(v, str):\n", " # v = v.replace(',', '') # 移除千分位逗号\n", " # numbers = re.findall(r\"\\d+\\.?\\d*\", v) # 提取所有数字部分\n", " # if numbers:\n", " # biaozhun_dict[key][k] = float(numbers[0])\n", "\n", " \n", "\n", " price = row['总订单采购成本']\n", " # price1=row['入库采购价']\n", " sell_price,order_price,order_type = call_sell_and_order_price(price, package_dict)\n", " # sell_price1,order_price1,order_type1 = call_sell_and_order_price(price1, biaozhun_dict)\n", " order_id_df_cal.loc[order_id_df_cal['order_id']==row['order_id'],'预测售价'] = sell_price\n", " order_id_df_cal.loc[order_id_df_cal['order_id']==row['order_id'],'预测订单价'] = order_price\n", " order_id_df_cal.loc[order_id_df_cal['order_id']==row['order_id'],'预测订单类型'] = order_type\n", " # order_id_df_cal.loc[index,'ERP售价'] = sell_price1\n", " # order_id_df_cal.loc[index,'ERP订单价'] = order_price1\n", " # order_id_df_cal.loc[index,'ERP订单类型'] = order_type1\n", " print(f\"第{index+1}个订单{row['order_id']}的预测售价为{sell_price},预测订单价为{order_price},订单类型为{order_type}\")\n", "order_id_df_cal.to_clipboard(index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "order_id_df_cal.to_clipboard(index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import re\n", "\n", "for index, row in order_id_df_cal.iterrows():\n", " package_dict = row['packages_dict']\n", " \n", " if not isinstance(package_dict, dict):\n", " continue # 跳过无效行\n", "\n", " price = row['总订单采购成本']\n", "\n", " try:\n", " sell_price, order_price, order_type = call_sell_and_order_price(price, package_dict)\n", " except Exception as e:\n", " print(f\"订单 {row['order_id']} 报错: {e}\")\n", " continue\n", "\n", " order_id_df_cal.loc[index, '预测售价'] = sell_price\n", " order_id_df_cal.loc[index, '预测订单价'] = order_price\n", " order_id_df_cal.loc[index, '预测订单类型'] = order_type\n", "\n", " print(f\"第{index+1}个订单 {row['order_id']} 的预测售价为 {sell_price},预测订单价为 {order_price},订单类型为 {order_type}\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "order_id_df_cal.to_clipboard(index=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "计算各条目订单SKU的标准体积下的物流成本和采购成本\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from utils.gtools import MySQLconnect\n", "from utils.countryOperator import OperateCountry\n", "from utils.logisticsBill import BillFactory\n", "from utils.Package import Package, Package_group\n", "import json\n", "# 美国 \n", "from utils.logisticsBill import Billing\n", "import re\n", "\n", "def get_package_info_by_sql(order_df):\n", " packages = Package_group()\n", " for index, row in order_df.iterrows():\n", " order_id = row['订单号']\n", " if row['包裹数据'] is None:\n", " return None, \"\"\n", " row['包裹数据'] = json.loads(row['包裹数据'])\n", " item_list = []\n", " for package in row['包裹数据'].values():\n", " item = {}\n", " for key, value in package.items():\n", " try:\n", " # 使用正则表达式提取数字部分\n", " number_str = re.findall(r\"[-+]?\\d*\\.\\d+|\\d+\", str(value))\n", " if number_str:\n", " item[key] = float(number_str[0]) # 取第一个匹配到的数字并转换为 float\n", " else:\n", " item[key] = value # 如果没有数字部分,保留原值\n", " except ValueError:\n", " item[key] = value # 如果遇到无法转换的值,保留原值\n", " item_list.append(item)\n", " \n", " for item in item_list:\n", " if item['长'] == 0 or item['宽'] == 0 or item['高'] == 0 or item['重量'] == 0:\n", " return None, \"\"\n", " package = Package(row['SKU'], item['长'], item['宽'], item['高'], item['重量'])\n", " packages.add_package(package)\n", " \n", " return order_id, packages\n", "\n", "\n", "ods = MySQLconnect(\"ods\")\n", "engine = ods.engine()\n", "cursor = ods.connect().cursor()\n", "with MySQLconnect(\"ods\") as db:\n", " engine = db.engine()\n", " cursor = db.connect().cursor()\n", " sql = \"\"\"\n", " WITH t1 as \n", " (\n", " SELECT\n", " opl.order_id,\n", " -- opl.cate_3,\n", " `产品品类`,\n", " `产品分类`,\n", " # opl.SKU,\n", " SUM(opl.as_value_amount) AS 销售额,\n", " COUNT(`产品分类`) AS 条目数,\n", " sum(sku.`成本价`*product_num) AS \"入库采购价\"\n", " FROM\n", " dws.fact_order_product_list opl \n", " LEFT JOIN stg_bayshop_litfad_sku sku ON opl.SKU = sku.SKU\n", " LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID`\n", " LEFT JOIN order_express oe ON opl.order_id = oe.单号\n", " WHERE\n", " opl.order_date >= '20240901'\n", " AND opl.order_date<'20250101'\n", " AND opl.site_name REGEXP 'Litfad'\n", " AND opl.SKU IS NOT NULL\n", " AND EXISTS (\n", " SELECT 1 \n", " FROM order_express oe_sub \n", " WHERE oe_sub.单号 = opl.order_id \n", " AND oe_sub.包裹状态 REGEXP '签收|投递'\n", " )\n", " GROUP BY opl.order_id\n", " )\n", " SELECT\n", " a.目的国,\n", " a.单号 AS `订单号`,\n", " a.运输方式,\n", " d.order_price_dollar AS '销售额',\n", " d.postcode,\n", " t1.产品品类,\n", " t1.产品分类,\n", " t1.入库采购价\n", " FROM\n", " order_express a\n", " JOIN order_list d ON a.`单号` = d.order_id \n", " JOIN t1 ON a.单号 = t1.order_id\n", " WHERE\n", " 目的国 IN ('United Kingdom','United States','Australia','Germany','Spain','France')\n", " AND t1.条目数 = 1\n", " AND t1.产品分类 IS NOT NULL\n", " ORDER BY d.order_id\n", " \"\"\"\n", " biaozhun_df = pd.read_sql(sql, engine)\n", " print(f\"获取订单基本信息{biaozhun_df.shape}\")\n", " batch_size = 10000 \n", " order_id_list = biaozhun_df[\"订单号\"].tolist()\n", " result_dfs=[]\n", " for i in range(0, len(order_id_list), batch_size):\n", " batch_order_ids = order_id_list[i:i + batch_size] # 取当前批次的 order_id\n", " param = \",\".join(f\"'{order_id}'\" for order_id in batch_order_ids)\n", "\n", " packages_sql =f\"\"\"\n", " SELECT\n", " CAST(opl.order_id AS CHAR) AS 订单号,\n", " opl.SKU,\n", " 包裹数据\n", " FROM\n", " dws.order_product_list opl\n", " LEFT JOIN ads.new_erp_sku_size spi ON opl.SKU =spi.SKU\n", " WHERE\n", " order_id in ({param})\n", " AND\n", " opl.order_product_id REGEXP '[0-9]{{15}}_[0-9]*$'\n", " \"\"\"\n", " batch_df = pd.read_sql(packages_sql, engine)\n", " result_df = batch_df.groupby(\"订单号\").apply(lambda x: pd.Series(get_package_info_by_sql(x), index=[\"订单号\", \"Packages\"])).reset_index(drop=True)\n", " result_dfs.append(result_df)\n", " print(f\"已完成 {i} / {len(order_id_list)}\")\n", " packages_df = pd.concat(result_dfs, ignore_index=True)\n", " # # 去除空值\n", " # packages_df = packages_df[packages_df['Packages'].notnull()]\n", " # 转换数据类型,确保匹配\n", " packages_df[\"订单号\"] = packages_df[\"订单号\"].astype(str)\n", " biaozhun_df[\"订单号\"] = biaozhun_df[\"订单号\"].astype(str)\n", " # 进行合并\n", " biaozhun_df = pd.merge(biaozhun_df, packages_df, on='订单号', how='left')\n", " biaozhun_df.drop_duplicates(subset=[\"订单号\"], inplace=True)\n", " print(biaozhun_df.head())\n", "\n", " for index, row in biaozhun_df.iterrows():\n", " order_id = row['订单号']\n", " packages = row['Packages'] \n", " opCountry = OperateCountry('US')\n", " postcode ='33900'\n", " head_type = 1 if row['运输方式'] == '海运' else 0\n", " if packages is None:\n", " continue\n", " try:\n", " bill = Billing(str(index),opCountry,packages,postcode,company_name=None,head_type=head_type,beizhu='1')\n", " head_price = bill.head_amount[0]\n", " tail_price = bill.tail_amount[0]\n", " tail_currency = bill.tail_amount[1]\n", " except:\n", " head_price ='出错'\n", " tail_price = '出错'\n", " tail_currency = '出错'\n", " print(f\"{order_id} 头费:{head_price} {bill.head_amount[1]} 尾费:{tail_price} {tail_currency}\")\n", " biaozhun_df.loc[index, '标准头程CNY'] = head_price\n", " biaozhun_df.loc[index, '标准尾程'] = tail_price\n", " biaozhun_df.loc[index, '尾程币种'] = tail_currency\n", "biaozhun_df.to_clipboard(index=False)\n", "\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "biaozhun_df.to_clipboard(index=False)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from utils.gtools import MySQLconnect\n", "import pandas as pd\n", "df=pd.read_excel(r\"F:\\DOCUMENTS\\WXWork\\1688854527635889\\Cache\\File\\2025-03\\导出订单维护任务数据2025-3-20.xlsx\",sheet_name=\"导出订单维护任务数据2025-3-20\")\n", "# df['订单数量'] = df['订单数量'].astype(str)\n", "# df = df[df['订单数量'].str.len() > 0]\n", "# df['订单数量'] = df['订单数量'].astype(float)\n", "# df = df[df['订单数量']>0]\n", "df=df[['erp sku','订单号']]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from utils.countryOperator import OperateCountry\n", "from utils.logisticsBill import BillFactory\n", "from utils.Package import Package, Package_group\n", "# 美国 \n", "from utils.logisticsBill import Billing\n", "df = df[df['erp sku'] != 0]\n", "# df的订单号转换字符串\n", "df['订单号'] = df['订单号'].astype(str)\n", "# 过滤掉订单号长度为0的行\n", "df = df[df['订单号'].str.len() > 0]\n", "\n", "sku_df = pd.DataFrame()\n", "with MySQLconnect(\"ods\") as db:\n", " engine = db.engine()\n", " conn = db.connect()\n", " cursor = conn.cursor()\n", " df_group = df.groupby('erp sku')\n", " for sku, group in df_group:\n", " for index1,row in group.iterrows():\n", " order_list = group[\"订单号\"].tolist()\n", " param = \",\".join(f\"'{order_id}'\" for order_id in order_list)\n", " sql = f\"\"\"\n", " SELECT\n", " order_id\n", " FROM\n", " dws.fact_order_product_list\n", " WHERE\n", " SKU = {sku}\n", " AND order_id IN ({param})\n", " and site_type IN (\"独立站\")\n", " group by order_id\n", " \"\"\"\n", " # 找到sku的订单号,根据订单号计算该订单号的利润情况\n", " order_id_df = pd.read_sql(sql, con=engine)\n", " if order_id_df.empty:\n", " continue\n", " order_id_df['sku']=sku\n", " sku_df = pd.concat([sku_df, order_id_df], axis=0, ignore_index=True)\n", " print(f\"sku:{sku} 订单号数量:{len(order_id_df)}\")\n", "print(sku_df)\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ods = MySQLconnect(\"ods\")\n", "engine = ods.engine()\n", "cursor = ods.connect().cursor()\n", "sql = \"\"\"\n", " SELECT\n", "opl.order_id,\n", "`产品品类`,\n", "`产品分类`,\n", "SUM(opl.as_value_amount) AS 销售额,\n", "COUNT(`产品分类`) AS 条目数\n", "FROM\n", "dws.fact_order_product_list opl \n", "LEFT JOIN stg_bayshop_litfad_sku sku ON opl.SKU = sku.SKU\n", "LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID`\n", "WHERE\n", "opl.order_date >= '20240901'\n", "AND opl.order_date<'20250101'\n", "AND opl.site_name REGEXP 'Litfad'\n", "GROUP BY opl.order_id\n", "\"\"\"\n", "sku_df = pd.read_sql(sql, con=engine)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 找采购价\n", "from utils.gtools import MySQLconnect\n", "\n", "ods = MySQLconnect(\"ods\")\n", "engine = ods.engine()\n", "cursor = ods.connect().cursor()\n", "import pandas as pd\n", "order_id_list = sku_df[\"order_id\"].tolist()\n", "param = \",\".join(f\"'{order_id}'\" for order_id in order_id_list)\n", "# 取采购费\n", "purchase_order_sql = f\"\"\"\n", "with t1 AS (SELECT LEFT\n", " ( ol.out_detials_outlink_id, 15 ) AS order_id,\n", " SUM( out_detials_qty * price )/ 7 AS instock_cost,\n", " NULL AS buy_cost\n", "FROM\n", " ods.outstock_list ol\n", " JOIN ods.instock_list il ON ol.store_in_id = il.id \n", "WHERE\n", " LEFT ( ol.out_detials_outlink_id, 15 ) IN ({param}) \n", "GROUP BY\n", " LEFT ( ol.out_detials_outlink_id, 15 )\n", "UNION ALL\n", "SELECT\n", " LEFT ( order_product_id, 15 ) as order_id, \n", " NULL as instock_cost,\n", " SUM(buy_num * actual_price)/7 AS buy_cost\n", "FROM\n", " `warehouse_purchasing`\n", "WHERE\n", " LEFT ( order_product_id, 15 ) IN ({param}) \n", " AND buy_audit = \"采购完成\"\n", "group by LEFT ( order_product_id, 15 )\n", " )\n", "\n", "SELECT\n", "order_id,\n", "SUM(CASE \n", "WHEN instock_cost is null THEN\n", " buy_cost\n", "ELSE\n", " instock_cost END) AS pur_cost\n", "FROM\n", "t1 \n", "GROUP BY order_id\n", "\n", "\"\"\"\n", "purchase_order_df = pd.read_sql(purchase_order_sql, con=engine)\n", "\n", "# 转换数据类型,确保匹配\n", "purchase_order_df[\"order_id\"] = purchase_order_df[\"order_id\"].astype(str)\n", "sku_df[\"order_id\"] = sku_df[\"order_id\"].astype(str)\n", "\n", "# 进行合并\n", "sku_df = pd.merge(sku_df, purchase_order_df, on='order_id', how='left')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# sku_df=sku_df[['order_id','sku','pur_cost_y']]\n", "sku_df.columns=['order_id','sku','采购价']\n", "sku_df = sku_df[sku_df['采购价']>0] \n", "print(sku_df.head())\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 取包裹数据\n", "package_sql = f\"\"\"\n", " SELECT\n", " a.目的国,\n", " a.单号 AS `订单号`,\n", " a.包裹号,\n", " a.快递公司,\n", " a.运输方式,\n", " d.order_price_dollar AS `销售额`,\n", " d.postcode,\n", " b.length AS `长`,\n", " b.width AS `宽`,\n", " b.hight AS `高`,\n", " b.weight AS `重量`\n", " FROM\n", " order_express a\n", " JOIN package_vol_info b ON a.`包裹号` = b.package\n", " JOIN order_list d ON a.`单号` = d.order_id\n", " WHERE\n", " a.`包裹状态` IN ( '客户签收', '已经投递' ) \n", " AND b.hight > 0 \n", " AND b.length > 0 \n", " AND b.width > 0 \n", " AND b.hight > 0 \n", " AND b.weight > 0 \n", " AND a.单号 IN ({param})\n", " ORDER BY\n", " order_id\n", " \"\"\"\n", "package_df = pd.read_sql(package_sql, con=engine)\n", "print(\"获取包裹数据成功\")\n", "i = 0\n", "package_grouped= package_df.groupby('订单号')\n", "calculated_results = []\n", "for order_id, group2 in package_grouped:\n", " print(f\"开始计算订单号{order_id}的账单,第{i}个订单\")\n", " i += 1\n", " opCountry = OperateCountry(group2['目的国'].iloc[0])\n", " packages= Package_group()\n", " for index,row in group2.iterrows():\n", " package = Package(row['包裹号'],row['长'],row['宽'],row['高'],row['重量'])\n", " packages.add_package(package)\n", " postcode = row['postcode']\n", " head_type = 1 if row['运输方式'] == '海运' else 0\n", " try:\n", " bill = Billing(str(index),opCountry,packages,postcode,company_name=None,head_type=head_type,beizhu='1')\n", " head_price = bill.head_amount[0]\n", " tail_price = bill.tail_amount[0]\n", " tail_currency = bill.tail_amount[1]\n", " except:\n", " head_price ='出错'\n", " tail_price = '出错'\n", " tail_currency = '出错'\n", " print(f\"{bill}\")\n", " result = {\n", " '订单号': order_id,\n", " '目的国': group2['目的国'].iloc[0], # Same for all rows in the group\n", " '快递公司': group2['快递公司'].iloc[0], # Same for all rows in the group\n", " '运输方式': group2['运输方式'].iloc[0], # Same for all rows in the group\n", " 'postcode': group2['postcode'].iloc[0], # Same for all rows in the group\n", " '销售额USD':group2['销售额'].iloc[0],\n", " '头程预测': head_price,\n", " '尾端预测': tail_price,\n", " '尾端货币': tail_currency,\n", " }\n", " calculated_results.append(result)\n", "calculated_df = pd.DataFrame(calculated_results)\n", "calculated_df.rename(columns={'订单号':'order_id'},inplace=True)\n", "sku_df[\"order_id\"] = sku_df[\"order_id\"].astype(str)\n", "calculated_df[\"order_id\"] = calculated_df[\"order_id\"].astype(str)\n", "# 进行合并\n", "order_id_df_cal = pd.merge(calculated_df, sku_df, on='order_id', how='left')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# order_id_df_cal = pd.merge(calculated_df, sku_df, on='order_id', how='left')\n", "order_id_df_cal.to_clipboard(index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from logisticsClass.logisticsTail_US import MetroLogistics_US\n", "from utils.countryOperator import OperateCountry\n", "from utils.logisticsBill import BillFactory\n", "from utils.Package import Package, Package_group\n", "import pandas as pd\n", "from utils.gtools import MySQLconnect\n", "# 美国 \n", "from utils.logisticsBill import Billing\n", "ods = MySQLconnect('ods')\n", "cursor = ods.connect().cursor()\n", "sql = f\"\"\"SELECT\n", "\toe.`包裹号`,\n", "\toe.`单号`,\n", "\toe.`关联提单号`,\n", "\toe.`包裹状态`,\n", "\toe.`目的国`,\n", "\toe.`快递公司`,\n", "\tol.postcode,\n", "\tpvi.length,\n", "\tpvi.width,\n", "\tpvi.hight,\n", "\tpvi.weight\n", "FROM\n", "\t`order_express` oe left JOIN package_vol_info pvi ON oe.`包裹号` = pvi.package\n", "\tLEFT JOIN order_list ol ON oe.`单号` =ol.order_id\n", "WHERE\n", "\t关联提单号 IN (\n", "\t\t'BMOU4594999',\n", "\t\t'CAIU9043860',\n", "\t'GVCU5328406',\n", "\t'TCNU7259447')\n", "\tAND `包裹状态` NOT REGEXP '已作废'\n", " \"\"\"\n", "df = pd.read_sql(sql, ods.connect())\n", "print(\"查询完成\")\n", "calculated_results = []\n", "# package_list = \",\".join(str(item) for item in df['ORDER#'].tolist())\n", "# print(package_list)\n", "# query = f\"SELECT package,length,width,hight,weight FROM `package_vol_info` WHERE `package` IN ({package_list})\"\n", "# result = cursor.execute(query)\n", "# packages_info = cursor.fetchall()\n", "\n", "# print(packages_info)\n", "# 将df和result合并\n", "# new_df = pd.DataFrame(packages_info, columns=['package', 'length', 'width', 'hight', 'weight'])\n", "# df = df.merge(new_df, left_on='ORDER#', right_on='package')\n", "# df = df.drop(columns='package')\n", "df_grouped= df.groupby('单号')\n", "\n", "MetroLogistics_US.refresh()\n", "for order_num, group in df_grouped:\n", " # opCountry = OperateCountry(group['目的国'].iloc[0])\n", " opCountry = OperateCountry('US')\n", " postcode = str(group['postcode'].iloc[0])\n", " packages= Package_group()\n", " packages_dict = {}\n", " # volume_weight = 0\n", " # weight = 0\n", " for index,row in group.iterrows():\n", " length = float(row['length']) \n", " width = float(row['width'])\n", " hight = float(row['hight'])\n", " weight = float(row['weight'])\n", " package = Package(row['包裹号'],length,width,hight,weight)\n", " packages.add_package(package)\n", " # packages_dict[row['包裹号']] = {\n", " # \"长\": row['长'],\n", " # \"宽\": row['宽'],\n", " # \"高\": row['高'],\n", " # \"重量\": row['重量']\n", " # }\n", " # weight += row['重量']/1000\n", " # volume_weight += package.get_volume_weight(6000)\n", " # postcode = row['postcode']\n", " # head_type = 1 if row['运输方式'] == '海运' else 0\n", " try:\n", " bill1 = Billing(str(index),opCountry,packages,postcode,company_name='大健-Metro',head_type=1,beizhu='1')\n", " tail_price1 = bill1.tail_amount[0]\n", " except:\n", " tail_price1 = '出错'\n", " try:\n", " bill2 = Billing(str(index),opCountry,packages,postcode,company_name='大健-CEVA',head_type=1,beizhu='1')\n", " tail_price2 = bill2.tail_amount[0]\n", " except:\n", " tail_price2 = '出错'\n", " try:\n", " bill3 = Billing(str(index),opCountry,packages,postcode,company_name='大健-GIGA',head_type=1,beizhu='1')\n", " tail_price3 = bill3.tail_amount[0]\n", " except:\n", " tail_price3 = '出错'\n", " \n", " df.loc[df['单号']==order_num,'大健-METRO']= tail_price1\n", " df.loc[df['单号']==order_num,'大健-CEVA']= tail_price2\n", " df.loc[df['单号']==order_num,'大健-GIGA']= tail_price3\n", " print(order_num)\n", "# print(result)\n", "# calculated_results.append(result)\n", "# # print(packages_dict)\n", "# calculated_df = pd.DataFrame(calculated_results)\n", "# 将calculated_df的订单号改为order_id\n", "# calculated_df.rename(columns={'订单号':'order_id'},inplace=True)\n", "print(df)\n", "df.to_clipboard(index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from utils.countryOperator import OperateCountry\n", "from utils.logisticsBill import BillFactory\n", "from utils.Package import Package, Package_group\n", "import pandas as pd\n", "from utils.gtools import MySQLconnect\n", "# 美国 \n", "from utils.logisticsBill import Billing\n", "ods = MySQLconnect('ods')\n", "cursor = ods.connect().cursor()\n", "df = pd.read_excel(r'F:\\DOCUMENTS\\WXWork\\1688854527635889\\Cache\\File\\2025-04\\Litfad 4-10-25 with data analysis-2(1).xlsx',\n", " sheet_name='CA- 96004885')\n", "package_list = \",\".join(str(item) for item in df['ORDER#'].tolist())\n", "print(package_list)\n", "query = f\"SELECT package,length,width,hight,weight FROM `package_vol_info` WHERE `package` IN ({package_list})\"\n", "result = cursor.execute(query)\n", "packages_info = cursor.fetchall()\n", "package_df = pd.DataFrame(packages_info, columns=['package','length','width','hight', 'weight'])\n", "package_df.to_clipboard(index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from utils.gtools import MySQLconnect\n", "ods = MySQLconnect('ods')\n", "engine = ods.engine()\n", "cursor = ods.connect().cursor()\n", "df = pd.read_excel(r'D:\\test\\logistics\\售价_2024-01-01.xlsx')\n", "batch_size = 50000 # 每次查询 500 个 order_id,避免 SQL 语句过长\n", "sku_list = df[\"SKU\"].tolist() \n", "result_dfs1 = []\n", "for i in range(0, len(sku_list), batch_size):\n", " batch_order_ids = sku_list[i:i + batch_size] # 取当前批次的 order_id\n", " param = \",\".join(f\"'{sku}'\" for sku in batch_order_ids)\n", "\n", " sku_sql = f\"\"\"\n", " SELECT\n", " SKU,\n", " 产品售价,\n", " `产品品类`,\n", " `产品分类`\n", " FROM\n", " `stg_bayshop_litfad_sku` sku\n", " LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` =spu.`产品PID`\n", " WHERE\n", " SKU IN ({param}) \n", " \"\"\" \n", " batch_df1 = pd.read_sql(sku_sql, con=engine) # 运行 SQL 查询\n", " result_dfs1.append(batch_df1)\n", " print(f\"已完成 {i} / {len(sku_list)}\")\n", "sku_df = pd.concat(result_dfs1) # 合并结果\n", "merge_df = pd.merge(df, sku_df, on=\"SKU\", how=\"left\") # 合并数据\n", "merge_df.to_excel(r'D:\\test\\logistics\\售价_sku_1-9月.xlsx', index=False) # 保存结果" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "筛选取原尺寸还是理论预估尺寸\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Admin\\AppData\\Local\\Temp\\ipykernel_8364\\2603640831.py:4: FutureWarning: The provided callable is currently using SeriesGroupBy.max. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"max\" instead.\n", " all_df['SPU最大涨幅']=all_df.groupby('SPU')['售价涨跌幅'].transform(max)\n", "C:\\Users\\Admin\\AppData\\Local\\Temp\\ipykernel_8364\\2603640831.py:5: FutureWarning: The provided callable is currently using SeriesGroupBy.min. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"min\" instead.\n", " all_df['SPU最小涨幅']=all_df.groupby('SPU')['售价涨跌幅'].transform(min)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "10{\"stdout\":\"[{\\\"variableName\\\": \\\"ID_TO_MEANING\\\", \\\"type\\\": \\\"dictionary\\\", \\\"supportedEngines\\\": [\\\"pandas\\\"], \\\"isLocalVariable\\\": true, \\\"rawType\\\": \\\"builtins.dict\\\"}, {\\\"variableName\\\": \\\"NULL\\\", \\\"type\\\": \\\"unknown\\\", \\\"supportedEngines\\\": [\\\"pandas\\\"], \\\"isLocalVariable\\\": true, \\\"rawType\\\": \\\"_pydevd_bundle.pydevd_constants.Null\\\"}]\\n\",\"stderr\":\"\",\"mime\":[]}\n", "10{\"stdout\":\"[{\\\"variableName\\\": \\\"ID_TO_MEANING\\\", \\\"type\\\": \\\"dictionary\\\", \\\"supportedEngines\\\": [\\\"pandas\\\"], \\\"isLocalVariable\\\": true, \\\"rawType\\\": \\\"builtins.dict\\\"}, {\\\"variableName\\\": \\\"NULL\\\", \\\"type\\\": \\\"unknown\\\", \\\"supportedEngines\\\": [\\\"pandas\\\"], \\\"isLocalVariable\\\": true, \\\"rawType\\\": \\\"_pydevd_bundle.pydevd_constants.Null\\\"}]\\n\",\"stderr\":\"\",\"mime\":[]}\n", "10{\"stdout\":\"[{\\\"variableName\\\": \\\"ID_TO_MEANING\\\", \\\"type\\\": \\\"dictionary\\\", \\\"supportedEngines\\\": [\\\"pandas\\\"], \\\"isLocalVariable\\\": true, \\\"rawType\\\": \\\"builtins.dict\\\"}, {\\\"variableName\\\": \\\"NULL\\\", \\\"type\\\": \\\"unknown\\\", \\\"supportedEngines\\\": [\\\"pandas\\\"], \\\"isLocalVariable\\\": true, \\\"rawType\\\": \\\"_pydevd_bundle.pydevd_constants.Null\\\"}]\\n\",\"stderr\":\"\",\"mime\":[]}\n" ] } ], "source": [ "import pandas as pd\n", "all_df = pd.read_excel('单包裹SKU售价分析1.xlsx',sheet_name=\"Sheet1\")\n", "all_df = all_df[(all_df['是否有过修改记录']==\"否\")&(all_df['使用尺寸售价']!=\"ERP售价\")]\n", "all_df['SPU最大涨幅']=all_df.groupby('SPU')['售价涨跌幅'].transform(max)\n", "all_df['SPU最小涨幅']=all_df.groupby('SPU')['售价涨跌幅'].transform(min)\n", "\n", "filtered_df = all_df[(all_df['SPU最大涨幅'] <= 0.5) & (all_df['SPU最小涨幅'] >= -0.5)]\n", "for index,row in filtered_df.iterrows():\n", " if row['使用尺寸售价']==\"实际体积售价\":\n", " length = str(row['长'])\n", " width = str(row['宽'])\n", " height = str(row['高'])\n", " weight = str(row['重量'])\n", " else:\n", " length = str(row['理论长'])\n", " width = str(row['理论宽'])\n", " height = str(row['理论高'])\n", " weight = str(row['理论重量'])\n", " filtered_df.loc['尺寸重量']=weight+\"|\"+length+\"*\"+width+\"*\"+height+\"*1,\"\n", " print(filtered_df.loc['尺寸重量'])\n", "spu_list = filtered_df['SPU'].unique()\n", "filtered_df = filtered_df[['SKU','成本价','尺寸重量']]\n", "filtered_df " ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 2 }