{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from utils.gtools import MySQLconnect\n", "\n", "# 读取需要计算的包裹信息\n", "with MySQLconnect('ods') as db:\n", " sql = r\"\"\" \n", " WITH\n", "t1 AS (\n", "SELECT\n", "order_id,\n", "SKU,\n", "order_date,\n", "sum(CASE WHEN opl.order_product_id LIKE '%\\_%' ESCAPE '\\\\' \n", " AND opl.order_product_id NOT LIKE '%\\_%\\_%' ESCAPE '\\\\' THEN product_num END) AS product_num,\n", "DATE_FORMAT(order_date,\"%Y-%m-%d\") AS 订单时间,\n", "count(opl.SKU) AS 产品种类\n", "FROM\n", "dws.fact_order_product_list opl\n", "WHERE\n", " NOT EXISTS (\n", " SELECT 1 \n", " FROM dws.log_order_reissue_detail AS r \n", " WHERE r.order_product_id = opl.order_product_id\n", " )\n", "AND order_date >= \"20250201\"\n", "AND order_date < \"20250601\"\n", "AND SKU <> \"\"\n", "GROUP BY order_id\n", ")\n", ",\n", "t2 AS (\n", "SELECT\t\t\t\n", " a.`包裹测量时间`,\n", "\t\t\t\t\t\tt1.order_id,\n", "\t\t\t\t\t\tt1.SKU,\n", "\t\t\t\t\t\tt1.order_date,\n", " a.包裹号,\n", " a.快递公司,\n", " a.运输方式,\n", "\t\t\t\t\t\ta.`目的国`,\n", " d.postcode,\n", " CONCAT(\n", " '\"', b.package, '\": {',\n", " '\"长\": ', length, ', ',\n", " '\"宽\": ', width, ', ',\n", " '\"高\": ', hight, ', ',\n", " '\"重量\": ', weight, '}'\n", " ) AS package_json\n", " FROM\n", "\t\t\t\tt1\n", " LEFT JOIN order_express a ON t1.order_id = a.单号\n", " JOIN package_vol_info b ON a.`包裹号` = b.package\n", " JOIN order_list d ON a.`单号` = d.order_id \n", " WHERE\n", " a.`包裹状态` IN ( '客户签收', '已经投递') \n", " AND b.hight > 0 \n", " AND b.length > 0 \n", " AND b.width > 0 \n", " AND b.hight > 0 \n", " AND b.weight > 0\n", " # AND a.`目的国` = \"United States\"\n", "\t\t\t\t\t\tAND t1.product_num = 1\n", "\t\t\t\t\t\tAND t1.产品种类=1\n", "\t\t\t\t\t\tAND a.`包裹测量时间` >= '2025-03-01'\n", "\t\t\t\t\t\tAND a.`包裹测量时间` < '2025-06-01'\n", "),\n", "t3 AS (\n", "SELECT\n", "t2.*,\n", "sku.成本价 AS ERP采购价,\n", "ess.包裹数据 AS ERP包裹数据,\n", "CONCAT('{', GROUP_CONCAT(package_json SEPARATOR ','), '}') AS 实际包裹数据,\n", "ROW_NUMBER() OVER (PARTITION BY SKU ORDER BY 包裹测量时间 DESC) as rn\n", "FROM\n", "t2\n", "LEFT JOIN ads.new_erp_sku_size ess ON t2.SKU=ess.SKU\n", "LEFT JOIN stg_bayshop_litfad_sku sku ON t2.SKU=sku.SKU\n", "WHERE\n", "ess.`包裹数据`<>''\n", "GROUP BY order_id\n", ")\n", "SELECT\n", "包裹测量时间,\n", "order_id,\n", "SKU,\n", "DATE_FORMAT(order_date,\"%Y-%M-%D\") AS 订单时间,\n", "包裹号,\n", "`快递公司`,\n", "`运输方式`,\n", "`目的国`,\n", "postcode,\n", "ERP采购价,\n", "ERP包裹数据,\n", "实际包裹数据\n", "FROM\n", "t3\n", "WHERE\n", "rn=1\n", "\n", "\n", " \"\"\"\n", " df=pd.read_sql(sql,db.con)\n", " print(df)\n", "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "list_order_id = df[\"order_id\"].drop_duplicates().tolist()\n", "param_order_id = \",\".join(f\"'{order_id}'\" for order_id in list_order_id)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "先计算美国的实际利润率" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from utils.gtools import MySQLconnect\n", "\n", "ods = MySQLconnect(\"ods\")\n", "engine = ods.engine()\n", "cursor = ods.connect().cursor()\n", "\n", "batch_size = 50000 # 每次查询 500 个 order_id,避免 SQL 语句过长\n", "order_id_list = df[\"order_id\"].drop_duplicates().tolist() # 取出所有 order_id\n", "# df['postcode'] = \"38016\"\n", "# 存储分批查询的结果\n", "result_dfs1 = []\n", "result_dfs2 = []\n", "for i in range(0, len(order_id_list), batch_size):\n", " batch_order_ids = order_id_list[i:i + batch_size] # 取当前批次的 order_id\n", " param = \",\".join(f\"'{order_id}'\" for order_id in batch_order_ids)\n", "\n", " purchase_order_sql = f\"\"\"\n", " WITH t1 AS (\n", " SELECT LEFT(ol.out_detials_outlink_id, 15) AS order_id,\n", " SUM(out_detials_qty * price) AS instock_cost,\n", " NULL AS buy_cost\n", " FROM ods.outstock_list ol\n", " JOIN ods.instock_list il ON ol.store_in_id = il.id \n", " WHERE LEFT(ol.out_detials_outlink_id, 15) IN ({param})\n", " GROUP BY LEFT(ol.out_detials_outlink_id, 15)\n", " \n", " UNION ALL\n", " \n", " SELECT LEFT(order_product_id, 15) AS order_id, \n", " NULL AS instock_cost,\n", " SUM(buy_num * actual_price) AS buy_cost\n", " FROM warehouse_purchasing\n", " WHERE LEFT(order_product_id, 15) IN ({param}) \n", " AND buy_audit = \"采购完成\"\n", " GROUP BY LEFT(order_product_id, 15)\n", " )\n", " SELECT order_id,\n", " SUM(CASE \n", " WHEN instock_cost IS NULL THEN buy_cost\n", " ELSE instock_cost \n", " END) AS 采购成本\n", " FROM t1 \n", " GROUP BY order_id\n", " \"\"\"\n", " \n", "\n", " batch_df1 = pd.read_sql(purchase_order_sql, con=engine) # 运行 SQL 查询\n", " result_dfs1.append(batch_df1) # 存入结果列表\n", " print(f\"已完成 {i + batch_size} 个 order_id 的查询\")\n", "\n", "# 合并所有查询结果\n", "purchase_order_df1 = pd.concat(result_dfs1, ignore_index=True)\n", "purchase_order_df1[\"order_id\"] = purchase_order_df1[\"order_id\"].astype(str)\n", "\n", "\n", "# 转换数据类型,确保匹配\n", "df[\"order_id\"] = df[\"order_id\"].astype(str)\n", "\n", "# 进行合并\n", "df = pd.merge(df, purchase_order_df1, on='order_id', how='left')\n", "# 复制到剪贴板\n", "df.to_clipboard(index=False)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "df = pd.read_clipboard()\n", "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from utils.countryOperator import OperateCountry\n", "from utils.logisticsBill import BillFactory\n", "from utils.Package import Package, Package_group\n", "import pandas as pd\n", "import json\n", "import re\n", "# 美国 \n", "from utils.logisticsBill import Billing\n", "import requests\n", "\n", "for index, row in df.iterrows():\n", " opCountry = OperateCountry('US')\n", " postcode = row['postcode']\n", " if pd.isna(postcode) or str(postcode).lower() == \"nan\":\n", " continue\n", " try:\n", " package_dict = json.loads(row['实际包裹数据'])\n", " except Exception as e:\n", " print(f\"行 {index} 解析失败: {e}\")\n", " print(row['实际包裹数据'])\n", " continue\n", " packages = Package_group()\n", " def extract_number(value):\n", " # 提取字符串中的第一个数字\n", " match = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", str(value))\n", " return float(match.group()) if match else 0.0\n", " for key, package in package_dict.items():\n", " package['长'] = extract_number(package['长'])\n", " package['宽'] = extract_number(package['宽'])\n", " package['高'] = extract_number(package['高'])\n", " package['重量'] = extract_number(package['重量'])\n", " \n", " if package['长'] == 0 or package['宽'] == 0 or package['高'] == 0 or package['重量'] == 0:\n", " continue\n", " packages.add_package(Package(key,package['长'], package['宽'], package['高'], package['重量']))\n", " if packages is None:\n", " continue\n", " if \"海运\" in row['运输方式']:\n", " head_type = 1\n", " else:\n", " head_type = 0\n", "\n", " # if \"FEDEX-SAIR-G\" in row['快递公司']:\n", " # company_name = \"Fedex-GROUD\"\n", " # elif \"FEDEX-SAIR-H\" in row['快递公司']:\n", " # company_name = \"Fedex-HOME\"\n", " # elif \"FEDEX02\" in row['快递公司']:\n", " # company_name = \"Fedex-彩虹小马\"\n", " # elif \"大包\" in row['快递公司'] or row['快递公司'] == '海MS-FEDEX':\n", " # company_name = \"Fedex-金宏亚\"\n", " # elif \"GIGA\" in row['快递公司']:\n", " # company_name = \"大健-GIGA\"\n", " # elif \"CEVA\" in row['快递公司']:\n", " # company_name = \"大健-CEVA\"\n", " # elif \"USPS\" in row['快递公司']:\n", " # company_name = \"Fedex-GROUD\"\n", " # else:\n", " # company_name = \"大健-Metro\"\n", " \n", " bill = Billing(str(index),opCountry,packages,postcode,company_name=\"Fedex-GROUD\",head_type=head_type,beizhu='1')\n", " head_price = bill.head_amount[0]\n", " tail_price = bill.tail_amount[0]\n", " if \"USPS\" in row['快递公司']:\n", " tail_price = tail_price/2\n", " # df.loc[index,'头程CNY'] = head_price\n", " df.loc[index,'头程CNY'] = head_price\n", " # df.loc[index,'最优渠道'] = bill.company_name\n", " print(f\"行 {index} 处理完成\")\n", " \n", "df.to_clipboard(index=False)\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "us_df = df[(df['目的国']=='United States')]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from utils.countryOperator import OperateCountry\n", "from utils.logisticsBill import BillFactory\n", "from utils.Package import Package, Package_group\n", "import pandas as pd\n", "# 美国 \n", "df1=pd.read_excel(r\"D:\\test\\logistics\\拦截数据\\1-3月利润分段.xlsx\",sheet_name=\"11-4月全球订单\")\n", "df = df1[(df1['尾端渠道']==\"大健-Metro\")|(df1['尾端渠道']==\"大健-CEVA\")]\n", "# df['postcode'] = df['postcode'].astype(str)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from utils.logisticsBill import Billing\n", "opCountry = OperateCountry('US')\n", "billFactory = BillFactory()\n", "for index,row in df.iterrows():\n", " print(row['postcode'])\n", " package = Package(row['包裹号'],row['长'],row['宽'],row['高'],row['重量'])\n", " packages= Package_group([package])\n", " postcode = row['postcode']\n", " head_type = 1 if row['运输方式'] == '海运' else 0\n", " try:\n", " bill = Billing(str(row['包裹号']),opCountry,packages,postcode=str(postcode),company_name=\"大健-Metro\",head_type=head_type,beizhu=0)\n", " df.loc[index,\"美西\"] = bill.tail_amount[0]\n", " except:\n", " bill = None\n", " df.loc[index,\"美西\"] = \"不可算\"\n", " \n", " print(index)\n", " print(bill)\n", "# bill_df = billFactory.bills_to_df()\n", "# bill_df['ID'] = bill_df['ID'].astype(int)\n", "# bill_df.to_clipboard()\n", "\n", "# merged_df = pd.merge(df, bill_df, left_on=\"包裹号\",right_on=\"ID\", how='inner')\n", "df.to_clipboard(index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import re\n", "from data.us_zone import zone_west\n", "def get_west_zone(postcode):\n", " \"\"\"获取美西邮编分区\"\"\"\n", " if not re.match(r'\\d{5}-\\d{4}|\\d{5}', postcode):\n", " return \"邮编格式不合法\"\n", " postcode = postcode[:5]\n", " postcode = int(postcode)\n", " for zone, postcodes in zone_west.items():\n", " start =int(postcodes[0].split(\"-\")[0])\n", " end = int(postcodes[-1].split(\"-\")[-1])\n", " if start > postcode or postcode > end:\n", " continue\n", " for postcode_range in postcodes:\n", " if \"-\" in postcode_range:\n", " start, end = map(int, postcode_range.split(\"-\"))\n", " if start <= postcode <= end:\n", " return zone\n", " else:\n", " if int(zone) == postcode:\n", " return zone\n", " return \"未查询到邮编分区\"\n", "df['分区']=df['postcode'].apply(get_west_zone)\n", "df.to_clipboard(index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 取sku所属SPU下所有sku及其现在售价\n", "from sell.sell_price import call_sell_and_order_price\n", "import json\n", "sku_list = (\n", " order_id_df_cal['SKU']\n", " .apply(pd.to_numeric, errors='coerce') # 转数字,非法的变 NaN\n", " .dropna()\n", " .astype(int)\n", " .astype(str)\n", " .tolist()\n", ")\n", "placeholders = ','.join(['%s'] * len(sku_list)) \n", "# result = []\n", "with MySQLconnect('ods') as db:\n", " enginal = db.engine()\n", " sql = f\"\"\"SELECT\n", " SPU,\n", " sku.SKU,\n", " sku.`成本价`,\n", " spi.`包裹数据`\n", " FROM\n", " stg_bayshop_litfad_sku sku\n", " LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID` \n", " LEFT JOIN ads.new_erp_sku_size spi ON sku.SKU =spi.SKU\n", " WHERE\n", " spu.SPU IN (\n", " SELECT\n", " SPU \n", " FROM\n", " stg_bayshop_litfad_sku sku\n", " LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID` \n", " WHERE\n", " sku.SKU IN ({placeholders}) \n", " )\n", " \"\"\"\n", " result = pd.read_sql(sql, enginal,params= tuple(sku_list))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import json\n", "from sell.sell_price import call_sell_and_order_price\n", "# 计算当前售价\n", "for index,row in df.iterrows():\n", " price = row['采购成本']\n", " try:\n", " package_dict = json.loads(row['实际包裹数据'])\n", " sell_price, order_price, order_type = call_sell_and_order_price(price, package_dict)\n", " except Exception as e:\n", " print(f\"SKU: {row['SKU']} 报错: {e}\")\n", " continue\n", " if sell_price == 0:\n", " continue\n", " df.loc[index, '实际应有售价'] = sell_price[0]\n", " df.loc[index, '实际物流分摊费'] = sell_price[1]\n", " df.loc[index, '实际头程cny'] = sell_price[2]\n", " df.loc[index, '实际头程USD'] = sell_price[3]\n", " df.loc[index, '订单物流费'] = order_price\n", " df.loc[index, '尾端类型'] = order_type\n", " print(f\" SKU {row['SKU']} \")\n", "df.to_clipboard(index=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "写计算物流费的海运空运函数\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import math\n", "express_price = pd.read_excel(r'D:\\test\\logistics\\data\\售价尾端价格.xlsx', sheet_name='Sheet1')\n", "key_column = express_price.iloc[:, 8] # 第 I 列\n", "value_column = express_price.iloc[:, 9] # 第 J 列\n", "small_column = express_price.iloc[:, 10] # 第 K 列\n", "big_column = express_price.iloc[:, 11] # 第 L 列\n", "air_small_dict = dict(zip(key_column, small_column))\n", "air_big_dict = dict(zip(key_column, big_column))\n", "# 转换成字典\n", "ocean_price_dict = dict(zip(key_column, value_column))\n", "def ocean_order_price(packages):\n", " express_fee = 0 # 快递基础费\n", " long_fee = 0 # 超长费\n", " weight_fee = 0 # 超重费\n", " big_fee = 0 # 大包裹费\n", " express_type = ''\n", " for package in packages:\n", " for key, value in ocean_price_dict.items():\n", " if package.weight <=key:\n", " express_fee+=value\n", " break\n", " if package.fst_size>=116 or package.sed_size>=71 or package.girth>=251:\n", " long_fee += 16.3\n", " express_type +=\"超长\"\n", " if package.weight>=21000 and package.fst_size<238 and package.girth<315:\n", " weight_fee+=25.5\n", " express_type +=\"超重\"\n", " if package.fst_size>=238 or package.girth>=315:\n", " big_fee+=61.6\n", " express_type +=\"大包裹\"\n", " express_fee = express_fee + long_fee + weight_fee + big_fee\n", "\n", " # 卡派(步长为3)\n", " ltl_base = 0\n", " ltl_fee = 0\n", " count1 = 0\n", " count2 = 0\n", " count3 = 0\n", " count4 = 0\n", " order_type2 = '卡派'\n", " order_other_type1 = ''\n", " order_other_type2 = ''\n", " order_other_type3 = ''\n", " order_other_type4 = ''\n", " order_ltl_oversize = 0\n", " order_ltl_overweight1 = 0\n", " order_ltl_overweight2 = 0\n", " order_ltl_overpackage = 0\n", " sku_total_cubic_feet = 0\n", " for package in packages:\n", " cubic_feet= package.length * package.width * package.height / 1000000 * 35.3\n", " sku_total_cubic_feet += cubic_feet\n", " # 卡派额外费用\n", " if package.fst_size>= 250:\n", " count1 += 1\n", " order_ltl_oversize = 118\n", " order_other_type1 = '超长'\n", " if package.weight >= 111000:\n", " count2 += 1\n", " order_ltl_overweight1 = 78\n", " order_other_type2 = '超重'\n", " if package.weight >= 130000:\n", " count3 += 1\n", " order_ltl_overweight2 = 30\n", " order_other_type3 = '超重'\n", " if package.fst_size >= 310:\n", " count4 += 1\n", " order_ltl_overpackage = 30\n", " order_other_type4 = '大包裹'\n", " order_type2 += order_other_type3 + order_other_type1 + order_other_type2 + order_other_type4\n", "\n", " # 卡派基础费用 体积/1000000 *35.3\n", " if sku_total_cubic_feet < 25:\n", " ltl_base = round(163 / 0.45 / 2, 2) # 181.11\n", "\n", " elif sku_total_cubic_feet < 35:\n", " ltl_base = round(180 / 0.45 / 2, 2) # 200\n", " else:\n", "\t # 大于一个立方的(35立方英尺) 按照每立方英尺*5美金\n", " # 最低为190美金\n", " ltl_base = round(max(190, 5 * sku_total_cubic_feet) / 0.359 / 2)\n", "\n", " \n", " ltl_fee = math.ceil(count1 / 3) * order_ltl_oversize + math.ceil(count2 / 3) * order_ltl_overweight1 + math.ceil(\n", " count3 / 3) * order_ltl_overweight2 + math.ceil(count4 / 3) * order_ltl_overpackage + ltl_base\n", "\n", " if ltl_fee < express_fee:\n", " ocean_fee = ltl_fee\n", " order_type = order_type2\n", " else:\n", " ocean_fee = express_fee\n", " order_type = express_type\n", " return ocean_fee, order_type\n", "\n", "def air_order_price(packages):\n", " express_fee = 0 \n", " express_type = ''\n", " for package in packages:\n", " price=0\n", " bill_weight = max(package.weight, package.get_volume_weight(8500))\n", " if package.weight<=420 and package.fst_size<=50 and package.sed_size<=40 and package.trd_size<=30:\n", " for key, value in air_small_dict.items():\n", " if package.weight <=key:\n", " price =value\n", " break\n", " elif package.weight<=2718 and package.fst_size<=50 and package.sed_size<=40 and package.trd_size<=30:\n", " for key, value in air_small_dict.items():\n", " if bill_weight <=key:\n", " price =value\n", " break\n", " else:\n", " for key, value in air_big_dict.items():\n", " if bill_weight <=key:\n", " price =value\n", " break\n", " if package.weight<=420:\n", " express_fee+=((((min(max(package.density,37),337)*0.093+27.7)/6+0.65)*package.get_volume_weight(6000))*0.3+price)/0.45\n", " express_type+='USPS'\n", " elif package.weight<=2718:\n", " express_fee+=(((min(max(package.density,37),337)*0.093+27.7)/6+0.65)*package.get_volume_weight(8500)*0.3+price)/0.45\n", " express_type+='UandF'\n", " else:\n", " express_fee+=(((min(max(package.density,37),337)*0.093+27.7-1.08)/6+0.65-1.06)*package.get_volume_weight(8500))/0.45+price\n", " express_type+='FEDEX'\n", " return express_fee, express_type\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 记录包裹的最大重量和\n", "\n", "from utils.Package import Package, Package_group\n", "import re\n", "for index, row in df.iterrows():\n", " try:\n", " package_dict = json.loads(row['ERP包裹数据'])\n", " except Exception as e:\n", " print(f\"行 {index} 解析失败: {e}\")\n", " print(row['实际包裹数据'])\n", " continue\n", " packages = Package_group()\n", " def extract_number(value):\n", " # 提取字符串中的第一个数字\n", " match = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", str(value))\n", " return float(match.group()) if match else 0.0\n", " for key, package in package_dict.items():\n", " package['长'] = extract_number(package['长'])\n", " package['宽'] = extract_number(package['宽'])\n", " package['高'] = extract_number(package['高'])\n", " package['重量'] = extract_number(package['重量'])\n", " \n", " if package['长'] == 0 or package['宽'] == 0 or package['高'] == 0 or package['重量'] == 0:\n", " continue\n", " packages.add_package(Package(key,package['长'], package['宽'], package['高'], package['重量']))\n", " if packages is None:\n", " continue\n", " if row['运输方式']=='海运':\n", " order_fee, order_type = ocean_order_price(packages)\n", " else:\n", " order_fee, order_type = air_order_price(packages)\n", " \n", " # 订单信息\n", " df.loc[index, 'ERP物流费'] = order_fee\n", " df.loc[index, '尾端类型'] = order_type\n", " print(order_fee, order_type)\n", "df.to_clipboard(index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 取sku所属SPU下所有sku及其现在售价\n", "import json\n", "# 单个品类一个一个处理\n", "category = \"66 - Furniture\"\n", "df_one = order_id_df_cal[order_id_df_cal['产品品类']==category]\n", "sku_list = (\n", " df_one['SKU']\n", " .apply(pd.to_numeric, errors='coerce') # 转数字,非法的变 NaN\n", " .dropna()\n", " .astype(int)\n", " .astype(str)\n", " .tolist()\n", ")\n", "\n", "\n", "placeholders = ','.join(['%s'] * len(sku_list)) \n", "\n", "# 从哪个表查询\n", "with MySQLconnect('ods') as db:\n", " enginal = db.engine()\n", " sql = f\"\"\"SELECT\n", " SKU,\n", " 价格 AS ERP采购价,\n", " 规格,\n", " cpmaso规格,\n", " `标准/预设属性集`,\n", " 自定义属性集\n", "\n", " FROM\n", " erp_furniture_sku sku\n", " LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID`\n", " WHERE\n", " spu.SPU IN (\n", " SELECT\n", " SPU \n", " FROM\n", " stg_bayshop_litfad_sku sku\n", " LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID` \n", " WHERE\n", " sku.SKU IN ({placeholders}) \n", " )\n", " \"\"\"\n", " result = pd.read_sql(sql, enginal,params= tuple(sku_list))\n", "df_one = pd.merge(df_one, result, on=['SKU'], how='left')\n", "df_one.to_excel(f'{category}.xlsx')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "1.找到SPU最新到仓的SKU及其体积\n", "2.根据算法F(x)计算这个SPU的其他SKU理论长宽高重量\n", "3.找到该SPU其他有过实际体积的SKU,记录这些实际体积\n", "4.分析这些SKU的实际体积和理论体积的差距" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "df = pd.read_excel(r'D:\\test\\logistics\\拦截数据\\一票一件发货订单.xlsx',sheet_name=\"单包裹数据\")\n", "# 写一个cal_size算法,输入原属性和新属性,返回涨幅系数\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import re\n", "import json\n", "for index, row in df.iterrows():\n", " package_dict = json.loads(row['包裹数据'])\n", " for package in package_dict.values():\n", " item = {}\n", " for key, value in package.items():\n", " try:\n", " # 使用正则表达式提取数字部分\n", " number_str = re.findall(r\"[-+]?\\d*\\.\\d+|\\d+\", str(value))\n", " if number_str:\n", " item[key] = float(number_str[0]) # 取第一个匹配到的数字并转换为 float\n", " else:\n", " item[key] = value # 如果没有数字部分,保留原值\n", " except ValueError:\n", " item[key] = value # 如果遇到无法转换的值,保留原值\n", " size = []\n", " size.append(item['长'])\n", " size.append(item['宽'])\n", " size.append(item['高'])\n", " weight = item['重量']\n", " size.sort()\n", " length = size[2]\n", " width = size[1]\n", " height = size[0]\n", " df.loc[index, 'ERP长'] = length\n", " df.loc[index, 'ERP宽'] = width\n", " df.loc[index, 'ERP高'] = height\n", " df.loc[index, 'ERP重量'] = weight\n", " print(f\"{row['SKU']}尺寸为:{width},h:{height},d:{length},w:{weight}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def cal_size(old,new):\n", " try:\n", " old = float(old)\n", " new = float(new)\n", " if old == 0:\n", " return None # 或 return 0,防止除以0\n", " return (new - old) / old\n", " except (ValueError, TypeError):\n", " return None # 遇到不能转为 float 的就返回 None\n", "# 按SPU分组,first_df取每个SPU组里订单月份最大的那行数据,如果订单月份相同,取第一条\n", "first_df = df.groupby('SPU').apply(lambda x: x.loc[x['订单月份'].idxmax()]).reset_index(drop=True)\n", "# 按SPU分组,计算每个SPU的涨幅系数\n", "for index, row in first_df.iterrows():\n", " first_df.loc[index, '长系数'] = cal_size(row['ERP长'],row['长'])\n", " first_df.loc[index, '宽系数'] = cal_size(row['ERP宽'],row['宽'])\n", " first_df.loc[index, '高系数'] = cal_size(row['ERP高'],row['高'])\n", " first_df.loc[index, '重量系数'] = cal_size(row['ERP重量'],row['重量'])\n", " print(\n", " f\"{row['SPU']} 的系数为 \"\n", " f\"{first_df.loc[index, '长系数']}, \"\n", " f\"{first_df.loc[index, '宽系数']}, \"\n", " f\"{first_df.loc[index, '高系数']}, \"\n", " f\"{first_df.loc[index, '重量系数']}\"\n", " )\n", " \n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 将每个SPU的系数,并入df表中\n", "df = pd.merge(df, first_df[['SPU','长系数','宽系数','高系数','重量系数']], on='SPU',how='left')\n", "print(\"合并完成\")\n", "# 根据系数计算每个SKU的理论尺寸\n", "df['理论长'] = ((1 + df['长系数']) * df['ERP长']).round(2)\n", "df['理论宽'] = ((1 + df['宽系数']) * df['ERP宽']).round(2)\n", "df['理论高'] = ((1 + df['高系数']) * df['ERP高']).round(2)\n", "df['理论重量'] = ((1 + df['重量系数']) * df['ERP重量']).round(2)\n", "\n", "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 构造一个set,存储first_df中的 (SKU, 订单月份)\n", "first_packages = set(first_df['包裹号'])\n", "\n", "# 新增列,包裹号在first_packages中标1,否则0\n", "df['is_first'] = df['包裹号'].apply(lambda x: 1 if x in first_packages else 0)\n", "df.to_clipboard(index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 计算预测后的尺寸下,一票一件订单的售价和订单价格\n", "from sell.sell_price import call_sell_and_order_price\n", "for index,row in df.iterrows():\n", " price = row['成本价']\n", " package_dict1={}\n", " package_dict2={}\n", " try:\n", " package_dict1['包裹1'] = {}\n", " package_dict2['包裹1'] = {}\n", " package_dict1['包裹1']['长'] = row['长']\n", " package_dict1['包裹1']['宽'] = row['宽']\n", " package_dict1['包裹1']['高'] = row['高']\n", " package_dict1['包裹1']['重量'] = row['重量']\n", " package_dict2['包裹1']['长'] = row['理论长']\n", " package_dict2['包裹1']['宽'] = row['理论宽'] \n", " package_dict2['包裹1']['高'] = row['理论高']\n", " package_dict2['包裹1']['重量'] = row['理论重量']\n", " sell_price1, order_price1, order_type1 = call_sell_and_order_price(price, package_dict1)\n", " sell_price2, order_price2, order_type2 = call_sell_and_order_price(price, package_dict2)\n", " except Exception as e:\n", " print(f\"SKU: {row['SKU']} 报错: {e}\")\n", " continue\n", " df.loc[index, '实际体积售价'] = sell_price1\n", " df.loc[index, '实际体积订单价'] = order_price1\n", " df.loc[index, '实际体积订单类型'] = order_type1\n", " df.loc[index, '理论体积售价'] = sell_price2\n", " df.loc[index, '理论体积订单价'] = order_price2\n", " df.loc[index, '理论体积订单类型'] = order_type2\n", " print(f\"SPU: {row['SPU']}, SKU {row['SKU']} 实际体积售价: {sell_price1}, 理论体积售价: {sell_price2},\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "计算SPU下所有SKU的网站售价,实际尺寸售价,预测尺寸售价" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 取表格数据is_first为1的数据,取spu,长宽高重量系数\n", "import pandas as pd\n", "df = pd.read_excel(r'D:\\test\\logistics\\拦截数据\\一票一件发货订单.xlsx',sheet_name=\"单包裹系数计算\")\n", "# 写一个cal_size算法,输入原属性和新属性,返回涨幅系数\n", "df1=df[['is_first','SPU','SKU','长','宽','高','重量']]\n", "base_df = df[df['is_first']==1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "# 取这些SPU下的所有SKU及其现在售价\n", "from sell.sell_price import call_sell_and_order_price\n", "import json\n", "from utils.gtools import MySQLconnect\n", "import pandas as pd\n", "spu_list = (\n", " base_df['SPU']\n", " .apply(pd.to_numeric, errors='coerce') # 转数字,非法的变 NaN\n", " .dropna()\n", " .astype(int)\n", " .astype(str)\n", " .tolist()\n", ")\n", "placeholders = ','.join(['%s'] * len(spu_list)) \n", "# result = []\n", "with MySQLconnect('ods') as db:\n", " enginal = db.engine()\n", " sql = f\"\"\"SELECT\n", " 产品品类,\n", " 产品分类,\n", " SPU,\n", " sku.SKU,\n", " sku.`成本价`,\n", " spi.`包裹数据`,\n", " 物流分摊,\n", " 产品售价\n", " FROM\n", " stg_bayshop_litfad_sku sku\n", " LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID` \n", " LEFT JOIN ads.new_erp_sku_size spi ON sku.SKU =spi.SKU\n", " WHERE\n", " spu.SPU IN ({placeholders}) \n", " \"\"\"\n", " result = pd.read_sql(sql, enginal,params= tuple(spu_list))\n", "# 合并df\n", "all_df = pd.merge(result,df, on=['SKU'], how='left')\n", "all_df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 先把ERP包裹数据拆出来\n", "import re\n", "import json\n", "for index, row in all_df.iterrows():\n", " if not isinstance(row['包裹数据'], str) or not row['包裹数据']:\n", " print(f\"第{index}行包裹数据为空或非字符串,跳过\")\n", " continue\n", " try:\n", " package_dict = json.loads(row['包裹数据'])\n", " except json.JSONDecodeError as e:\n", " print(f\"解析失败:第{index}行,错误信息:{e}\")\n", " continue\n", " try:\n", " for package in package_dict.values():\n", " item = {}\n", " for key, value in package.items():\n", " try:\n", " # 使用正则表达式提取数字部分\n", " number_str = re.findall(r\"[-+]?\\d*\\.\\d+|\\d+\", str(value))\n", " if number_str:\n", " item[key] = float(number_str[0]) # 取第一个匹配到的数字并转换为 float\n", " else:\n", " item[key] = value # 如果没有数字部分,保留原值\n", " except ValueError:\n", " item[key] = value # 如果遇到无法转换的值,保留原值\n", " except AttributeError:\n", " print(f\"解析失败:第{index}行,错误信息:包裹数据为空\")\n", " continue\n", " size = []\n", " size.append(item['长'])\n", " size.append(item['宽'])\n", " size.append(item['高'])\n", " weight = item['重量']\n", " size.sort()\n", " length = size[2]\n", " width = size[1]\n", " height = size[0]\n", " all_df.loc[index, 'ERP长'] = length\n", " all_df.loc[index, 'ERP宽'] = width\n", " all_df.loc[index, 'ERP高'] = height\n", " all_df.loc[index, 'ERP重量'] = weight\n", " print(f\"{row['SKU']}尺寸为:{width},h:{height},d:{length},w:{weight}\")\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 计算每个SPU的长宽高重量系数\n", "def cal_size(old,new):\n", " try:\n", " old = float(old)\n", " new = float(new)\n", " if old == 0:\n", " return None # 或 return 0,防止除以0\n", " return (new - old) / old\n", " except (ValueError, TypeError):\n", " return None # 遇到不能转为 float 的就返回 None\n", " \n", "test_df = all_df[all_df['is_first']==1]\n", "# 取基准数据SPU的系数\n", "for index, row in test_df.iterrows():\n", " test_df.loc[index, '长系数'] = cal_size(row['ERP长'],row['长'])\n", " test_df.loc[index, '宽系数'] = cal_size(row['ERP宽'],row['宽'])\n", " test_df.loc[index, '高系数'] = cal_size(row['ERP高'],row['高'])\n", " test_df.loc[index, '重量系数'] = cal_size(row['ERP重量'],row['重量'])\n", " print(\n", " f\"{row['SPU_x']} 的系数为 \"\n", " f\"{test_df.loc[index, '长系数']}, \"\n", " f\"{test_df.loc[index, '宽系数']}, \"\n", " f\"{test_df.loc[index, '高系数']}, \"\n", " f\"{test_df.loc[index, '重量系数']}\"\n", " )\n", "# 将SPU的基准系数合并至all_df\n", "all_df = pd.merge(all_df, test_df[['SPU_x', '长系数', '宽系数', '高系数', '重量系数']], on='SPU_x', how='left')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 根据系数 得到所有SKU的预测尺寸\n", "# 根据系数计算每个SKU的理论尺寸\n", "all_df['理论长'] = ((1 + all_df['长系数']) * all_df['ERP长']).round(2)\n", "all_df['理论宽'] = ((1 + all_df['宽系数']) * all_df['ERP宽']).round(2)\n", "all_df['理论高'] = ((1 + all_df['高系数']) * all_df['ERP高']).round(2)\n", "all_df['理论重量'] = ((1 + all_df['重量系数']) * all_df['ERP重量']).round(2)\n", "all_df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 计算三种尺寸下的售价\n", "# 计算预测后的尺寸下,一票一件订单的售价和订单价格\n", "from sell.sell_price import call_sell_and_order_price\n", "for index,row in all_df.iterrows():\n", " price = row['成本价']\n", " package_dict1={}\n", " package_dict2={}\n", " package_dict3={}\n", " try:\n", " package_dict1['包裹1'] = {}\n", " package_dict2['包裹1'] = {}\n", " package_dict3['包裹1'] = {}\n", " package_dict1['包裹1']['长'] = row['长']\n", " package_dict1['包裹1']['宽'] = row['宽']\n", " package_dict1['包裹1']['高'] = row['高']\n", " package_dict1['包裹1']['重量'] = row['重量']\n", " package_dict2['包裹1']['长'] = row['理论长']\n", " package_dict2['包裹1']['宽'] = row['理论宽'] \n", " package_dict2['包裹1']['高'] = row['理论高']\n", " package_dict2['包裹1']['重量'] = row['理论重量']\n", " package_dict3['包裹1']['长'] = row['ERP长']\n", " package_dict3['包裹1']['宽'] = row['ERP宽'] \n", " package_dict3['包裹1']['高'] = row['ERP高']\n", " package_dict3['包裹1']['重量'] = row['ERP重量']\n", " sell_price1, order_price1, order_type1 = call_sell_and_order_price(price, package_dict1)\n", " sell_price2, order_price2, order_type2 = call_sell_and_order_price(price, package_dict2)\n", " sell_price3, order_price3, order_type3 = call_sell_and_order_price(price, package_dict3)\n", " except Exception as e:\n", " print(f\"SKU: {row['SKU']} 报错: {e}\")\n", " continue\n", " all_df.loc[index, 'ERP售价'] = sell_price3\n", " all_df.loc[index, '实际体积售价'] = sell_price1\n", " all_df.loc[index, '理论体积售价'] = sell_price2\n", "\n", " all_df.loc[index, 'ERP订单价'] = order_price3\n", " all_df.loc[index, '实际体积订单价'] = order_price1\n", " all_df.loc[index, '理论体积订单价'] = order_price2\n", " # all_df.loc[index, '理论体积订单类型'] = order_type2\n", " print(f\"SPU: {row['SPU_x']}, SKU {row['SKU']} ,ERP售价: {sell_price3}, 实际体积售价: {sell_price1}, 理论体积售价: {sell_price2},\")\n", "all_df.to_excel('单包裹SKU售价分析.xlsx', index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 2 }