logistics/售价模型审核.ipynb

11 KiB
Raw Blame History

1.查询5.1-5.28测量包裹尺寸的订单数据 2.以美国的售价计算它当前应该有的售价 3.判断

In [ ]:
import pandas as pd
from utils.gtools import MySQLconnect

# 读取需要计算的包裹信息
with MySQLconnect('ads') as db:
    sql = r"""  
       # 限制范围是测量时间取得SKU种类为1且数量为1的订单且重复SKU只取最近的订单

WITH
t1 AS (
SELECT
order_id,
SKU,
order_date,
sum(CASE WHEN opl.order_product_id LIKE '%\_%' ESCAPE '\\' 
         AND opl.order_product_id NOT LIKE '%\_%\_%' ESCAPE '\\' THEN product_num END) AS product_num,
DATE_FORMAT(order_date,"%Y-%m-%d") AS 订单时间,
count(opl.SKU) AS 产品种类
FROM
dws.fact_order_product_list opl
WHERE
  NOT EXISTS (
    SELECT 1 
    FROM dws.log_order_reissue_detail AS r 
    WHERE r.order_product_id = opl.order_product_id
  )
AND order_date >= "20250501"
AND order_date < "20250612"
AND SKU <> ""
GROUP BY order_id
)
,
t2 AS (
SELECT			
            a.`包裹测量时间`,
						t1.order_id,
						t1.SKU,
						t1.order_date,
            a.包裹号,
            a.快递公司,
            a.运输方式,
						a.`目的国`,
            d.postcode,
            CONCAT(
            '"', b.package, '": {',
            '"长": ', length, ', ',
            '"宽": ', width, ', ',
            '"高": ', hight, ', ',
            '"重量": ', weight, '}'
        ) AS package_json
        FROM
				t1
            LEFT JOIN order_express a ON t1.order_id = a.单号
            JOIN package_vol_info b ON a.`包裹号` = b.package
            JOIN order_list d ON a.`单号` = d.order_id 
        WHERE
            a.`包裹状态` IN ( '客户签收', '已经投递') 
            AND b.hight > 0 
            AND b.length > 0 
            AND b.width > 0 
            AND b.hight > 0 
            AND b.weight > 0
--             AND a.`目的国` = "United States"
						AND t1.product_num = 1
						AND t1.产品种类=1
						AND a.`包裹测量时间` >= '2025-05-01'
						AND a.`包裹测量时间` < '2025-06-12'
),
t3 AS (
SELECT
t2.*,
sku.成本价 AS ERP采购价,
ess.erp_package_vol AS ERP包裹数据,
CONCAT('{', GROUP_CONCAT(package_json SEPARATOR ','), '}') AS 实际包裹数据,
ROW_NUMBER() OVER (PARTITION BY SKU ORDER BY 包裹测量时间 DESC) as rn
FROM
t2
LEFT JOIN dwd.dim_erp_sku_package_vol_info ess ON t2.SKU=ess.erp_sku
LEFT JOIN stg_bayshop_litfad_sku sku ON t2.SKU=sku.SKU
WHERE
ess.`erp_package_vol`<>"{}" AND ess.`erp_package_vol`<>""
GROUP BY order_id
)
SELECT
包裹测量时间,
order_id,
SKU,
DATE_FORMAT(order_date,"%Y-%M-%D") AS 订单时间,
包裹号,
`快递公司`,
`运输方式`,
`目的国`,
postcode,
ERP采购价,
ERP包裹数据,
实际包裹数据
FROM
t3
WHERE
rn=1



    """
    df=pd.read_sql("SELECT * FROM `order_complet4` WHERE buy_amount is not null and `实际尺寸售价` IS NULL limit 1000 ",db.con)
In [ ]:
def call_sell_price(price, package_dict,head_type="海运"):
    import json
    from sell.sell_price import call_sell_and_order_price
    try:
        package_dict = json.loads(package_dict)
        all_sell_price, order_price, order_type = call_sell_and_order_price(price, package_dict,head_type)
    except Exception as e:
        print(f" 报错: {e}")
        return  ("","","")
    if all_sell_price == 0:
        return  ("","","")
    sell_price= all_sell_price[0]
    # logis_price = all_sell_price[1]
    return (sell_price, order_price, order_type)
# 计算当前售价
for index,row in df.iterrows():
    price = row['buy_amount']
    # package_dict = json.loads(row['erp_package_vol'])
    sell_price = call_sell_price(price, row['package_json'],"海运")
    print(sell_price)
    df.loc[index,'网站售价'] = sell_price[0]
    df.loc[index,'订单物流费'] = sell_price[1]
    df.loc[index,'尾端类型'] = sell_price[2]
    print(f"SKU: {row['sku']} 网站售价: {sell_price[0]}  订单物流费: {sell_price[1]} 尾端类型: {sell_price[2]}")
df.to_clipboard(index=False)

新的|计算欧洲各国每种货型占比

In [1]:
import pandas as pd
from utils.gtools import MySQLconnect
sql=r"""SELECT
包裹号,
单号,
快递公司,
目的国,
快递分区,
投递时间,
postcode,
length,
width,
hight,
weight
FROM
order_express oe
LEFT JOIN package_vol_info pvi ON oe.`包裹号` = pvi.package
left join order_list ON oe.单号 = order_list.order_id
WHERE
`投递时间` >='2025-05-01'
AND `投递时间`<'2025-08-01'
AND `包裹状态` NOT REGEXP '已作废'
AND length >0
AND width >0
AND hight>0
AND weight>0
# AND 目的国 NOT REGEXP "United States|Australia|United Kingdom|Japan|Canada"
and 目的国 REGEXP "Australia"
# AND 目的国 <>''"""
with MySQLconnect('ods') as db:
    df = pd.read_sql(sql, db.con)
C:\Users\Admin\AppData\Local\Temp\ipykernel_35372\42756626.py:31: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.
  df = pd.read_sql(sql, db.con)
In [ ]:
print(df.columns)
In [6]:
def order_type(group):
    # 判断组中是否有任何一个包裹满足“卡派”条件
    if (group['length'] >= 200).any() or (group['weight'] >= 31500).any():
        return '卡派'
    else:
        return '快递'

# 计算每个单号的类型
type_series = df.groupby('单号').apply(order_type)
def order_weight(group):
    # 计算单号的重量
    # 计算6000系数的体积重
    bill_weight= 0
    for i in range(len(group)):
        bill_weight += max(group.iloc[i]['length'] * group.iloc[i]['width'] * group.iloc[i]['hight'] / 4000, group.iloc[i]['weight']/1000)
    return bill_weight
weight_series = df.groupby('单号').apply(order_weight)
# 将结果映射回原表
# df['类型'] = df['单号'].map(type_series)
df['计费重'] = df['单号'].map(weight_series)
df.to_clipboard(index= False)
C:\Users\Admin\AppData\Local\Temp\ipykernel_35372\4220111735.py:9: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.
  type_series = df.groupby('单号').apply(order_type)
C:\Users\Admin\AppData\Local\Temp\ipykernel_35372\4220111735.py:17: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.
  weight_series = df.groupby('单号').apply(order_weight)
In [7]:
from logisticsClass.logisticsTail_AU import *

for i, row in df.iterrows():
    if "POST" in row['快递公司']:
        gel = PostLogistics_AU()
        zone =  gel.is_remote(row['postcode'])
        df.loc[i,'渠道'] = "POST"
    elif "TOLL" in row['快递公司']:
        gel = TollLogistics_AU()
        zone =  gel.is_remote(row['postcode'])
        df.loc[i,'渠道'] = "TOLL"
    elif "ALL" in row['快递公司']:
        gel = AllLogistics_AU()
        zone = gel.is_remote(row['postcode'])
        df.loc[i,'渠道'] = "ALL"
    else:
        zone = "其他渠道"
    df.loc[i,'分区'] = zone

df.to_clipboard(index=False)