logistics/售价模型审核.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "1.查询5.1-5.28测量包裹尺寸的订单数据\n",
    "2.以美国的售价计算它当前应该有的售价\n",
    "3.判断"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from utils.gtools import MySQLconnect\n",
    "\n",
    "# 读取需要计算的包裹信息\n",
    "with MySQLconnect('ads') as db:\n",
    "    sql = r\"\"\"  \n",
    "       # 限制范围是测量时间，取得SKU种类为1且数量为1的订单，且重复SKU只取最近的订单\n",
    "\n",
    "WITH\n",
    "t1 AS (\n",
    "SELECT\n",
    "order_id,\n",
    "SKU,\n",
    "order_date,\n",
    "sum(CASE WHEN opl.order_product_id LIKE '%\\_%' ESCAPE '\\\\' \n",
    "         AND opl.order_product_id NOT LIKE '%\\_%\\_%' ESCAPE '\\\\' THEN product_num END) AS product_num,\n",
    "DATE_FORMAT(order_date,\"%Y-%m-%d\") AS 订单时间,\n",
    "count(opl.SKU) AS 产品种类\n",
    "FROM\n",
    "dws.fact_order_product_list opl\n",
    "WHERE\n",
    "  NOT EXISTS (\n",
    "    SELECT 1 \n",
    "    FROM dws.log_order_reissue_detail AS r \n",
    "    WHERE r.order_product_id = opl.order_product_id\n",
    "  )\n",
    "AND order_date >= \"20250501\"\n",
    "AND order_date < \"20250612\"\n",
    "AND SKU <> \"\"\n",
    "GROUP BY order_id\n",
    ")\n",
    ",\n",
    "t2 AS (\n",
    "SELECT\t\t\t\n",
    "            a.`包裹测量时间`,\n",
    "\t\t\t\t\t\tt1.order_id,\n",
    "\t\t\t\t\t\tt1.SKU,\n",
    "\t\t\t\t\t\tt1.order_date,\n",
    "            a.包裹号,\n",
    "            a.快递公司,\n",
    "            a.运输方式,\n",
    "\t\t\t\t\t\ta.`目的国`,\n",
    "            d.postcode,\n",
    "            CONCAT(\n",
    "            '\"', b.package, '\": {',\n",
    "            '\"长\": ', length, ', ',\n",
    "            '\"宽\": ', width, ', ',\n",
    "            '\"高\": ', hight, ', ',\n",
    "            '\"重量\": ', weight, '}'\n",
    "        ) AS package_json\n",
    "        FROM\n",
    "\t\t\t\tt1\n",
    "            LEFT JOIN order_express a ON t1.order_id = a.单号\n",
    "            JOIN package_vol_info b ON a.`包裹号` = b.package\n",
    "            JOIN order_list d ON a.`单号` = d.order_id \n",
    "        WHERE\n",
    "            a.`包裹状态` IN ( '客户签收', '已经投递') \n",
    "            AND b.hight > 0 \n",
    "            AND b.length > 0 \n",
    "            AND b.width > 0 \n",
    "            AND b.hight > 0 \n",
    "            AND b.weight > 0\n",
    "--             AND a.`目的国` = \"United States\"\n",
    "\t\t\t\t\t\tAND t1.product_num = 1\n",
    "\t\t\t\t\t\tAND t1.产品种类=1\n",
    "\t\t\t\t\t\tAND a.`包裹测量时间` >= '2025-05-01'\n",
    "\t\t\t\t\t\tAND a.`包裹测量时间` < '2025-06-12'\n",
    "),\n",
    "t3 AS (\n",
    "SELECT\n",
    "t2.*,\n",
    "sku.成本价 AS ERP采购价,\n",
    "ess.erp_package_vol AS ERP包裹数据,\n",
    "CONCAT('{', GROUP_CONCAT(package_json SEPARATOR ','), '}') AS 实际包裹数据,\n",
    "ROW_NUMBER() OVER (PARTITION BY SKU ORDER BY 包裹测量时间 DESC) as rn\n",
    "FROM\n",
    "t2\n",
    "LEFT JOIN dwd.dim_erp_sku_package_vol_info ess ON t2.SKU=ess.erp_sku\n",
    "LEFT JOIN stg_bayshop_litfad_sku sku ON t2.SKU=sku.SKU\n",
    "WHERE\n",
    "ess.`erp_package_vol`<>\"{}\" AND ess.`erp_package_vol`<>\"\"\n",
    "GROUP BY order_id\n",
    ")\n",
    "SELECT\n",
    "包裹测量时间,\n",
    "order_id,\n",
    "SKU,\n",
    "DATE_FORMAT(order_date,\"%Y-%M-%D\") AS 订单时间,\n",
    "包裹号,\n",
    "`快递公司`,\n",
    "`运输方式`,\n",
    "`目的国`,\n",
    "postcode,\n",
    "ERP采购价,\n",
    "ERP包裹数据,\n",
    "实际包裹数据\n",
    "FROM\n",
    "t3\n",
    "WHERE\n",
    "rn=1\n",
    "\n",
    "\n",
    "\n",
    "    \"\"\"\n",
    "    df=pd.read_sql(\"SELECT * FROM `order_complet4` WHERE buy_amount is not null and `实际尺寸售价` IS NULL limit 1000 \",db.con)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def call_sell_price(price, package_dict,head_type=\"海运\"):\n",
    "    import json\n",
    "    from sell.sell_price import call_sell_and_order_price\n",
    "    try:\n",
    "        package_dict = json.loads(package_dict)\n",
    "        all_sell_price, order_price, order_type = call_sell_and_order_price(price, package_dict,head_type)\n",
    "    except Exception as e:\n",
    "        print(f\" 报错: {e}\")\n",
    "        return  (\"\",\"\",\"\")\n",
    "    if all_sell_price == 0:\n",
    "        return  (\"\",\"\",\"\")\n",
    "    sell_price= all_sell_price[0]\n",
    "    # logis_price = all_sell_price[1]\n",
    "    return (sell_price, order_price, order_type)\n",
    "# 计算当前售价\n",
    "for index,row in df.iterrows():\n",
    "    price = row['buy_amount']\n",
    "    # package_dict = json.loads(row['erp_package_vol'])\n",
    "    sell_price = call_sell_price(price, row['package_json'],\"海运\")\n",
    "    print(sell_price)\n",
    "    df.loc[index,'网站售价'] = sell_price[0]\n",
    "    df.loc[index,'订单物流费'] = sell_price[1]\n",
    "    df.loc[index,'尾端类型'] = sell_price[2]\n",
    "    print(f\"SKU: {row['sku']} 网站售价: {sell_price[0]}  订单物流费: {sell_price[1]} 尾端类型: {sell_price[2]}\")\n",
    "df.to_clipboard(index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "新的|计算欧洲各国每种货型占比"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Admin\\AppData\\Local\\Temp\\ipykernel_35372\\42756626.py:31: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n",
      "  df = pd.read_sql(sql, db.con)\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from utils.gtools import MySQLconnect\n",
    "sql=r\"\"\"SELECT\n",
    "包裹号,\n",
    "单号,\n",
    "快递公司,\n",
    "目的国,\n",
    "快递分区,\n",
    "投递时间,\n",
    "postcode,\n",
    "length,\n",
    "width,\n",
    "hight,\n",
    "weight\n",
    "FROM\n",
    "order_express oe\n",
    "LEFT JOIN package_vol_info pvi ON oe.`包裹号` = pvi.package\n",
    "left join order_list ON oe.单号 = order_list.order_id\n",
    "WHERE\n",
    "`投递时间` >='2025-05-01'\n",
    "AND `投递时间`<'2025-08-01'\n",
    "AND `包裹状态` NOT REGEXP '已作废'\n",
    "AND length >0\n",
    "AND width >0\n",
    "AND hight>0\n",
    "AND weight>0\n",
    "# AND 目的国 NOT REGEXP \"United States|Australia|United Kingdom|Japan|Canada\"\n",
    "and 目的国 REGEXP \"Australia\"\n",
    "# AND 目的国 <>''\"\"\"\n",
    "with MySQLconnect('ods') as db:\n",
    "    df = pd.read_sql(sql, db.con)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(df.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Admin\\AppData\\Local\\Temp\\ipykernel_35372\\4220111735.py:9: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
      "  type_series = df.groupby('单号').apply(order_type)\n",
      "C:\\Users\\Admin\\AppData\\Local\\Temp\\ipykernel_35372\\4220111735.py:17: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
      "  weight_series = df.groupby('单号').apply(order_weight)\n"
     ]
    }
   ],
   "source": [
    "def order_type(group):\n",
    "    # 判断组中是否有任何一个包裹满足“卡派”条件\n",
    "    if (group['length'] >= 200).any() or (group['weight'] >= 31500).any():\n",
    "        return '卡派'\n",
    "    else:\n",
    "        return '快递'\n",
    "\n",
    "# 计算每个单号的类型\n",
    "type_series = df.groupby('单号').apply(order_type)\n",
    "def order_weight(group):\n",
    "    # 计算单号的重量\n",
    "    # 计算6000系数的体积重\n",
    "    bill_weight= 0\n",
    "    for i in range(len(group)):\n",
    "        bill_weight += max(group.iloc[i]['length'] * group.iloc[i]['width'] * group.iloc[i]['hight'] / 4000, group.iloc[i]['weight']/1000)\n",
    "    return bill_weight\n",
    "weight_series = df.groupby('单号').apply(order_weight)\n",
    "# 将结果映射回原表\n",
    "# df['类型'] = df['单号'].map(type_series)\n",
    "df['计费重'] = df['单号'].map(weight_series)\n",
    "df.to_clipboard(index= False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "from logisticsClass.logisticsTail_AU import *\n",
    "\n",
    "for i, row in df.iterrows():\n",
    "    if \"POST\" in row['快递公司']:\n",
    "        gel = PostLogistics_AU()\n",
    "        zone =  gel.is_remote(row['postcode'])\n",
    "        df.loc[i,'渠道'] = \"POST\"\n",
    "    elif \"TOLL\" in row['快递公司']:\n",
    "        gel = TollLogistics_AU()\n",
    "        zone =  gel.is_remote(row['postcode'])\n",
    "        df.loc[i,'渠道'] = \"TOLL\"\n",
    "    elif \"ALL\" in row['快递公司']:\n",
    "        gel = AllLogistics_AU()\n",
    "        zone = gel.is_remote(row['postcode'])\n",
    "        df.loc[i,'渠道'] = \"ALL\"\n",
    "    else:\n",
    "        zone = \"其他渠道\"\n",
    "    df.loc[i,'分区'] = zone\n",
    "\n",
    "df.to_clipboard(index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}