logistics/产品上限优化.ipynb

6.0 KiB
Raw Blame History

In [ ]:
import pandas as pd
from utils.gtools import MySQLconnect
categories = '94 - Office Desks'
with MySQLconnect('ods') as db:
    engine = db.engine()
    sql = f"""
WITH a AS (
	SELECT
		t1.SPU,
		t2.SKU,
		t1.产品分类,
		t1.添加时间,
		order_date,
		t2.成本价,
		opl.product_price_dollar,
		opl.product_num,
		TIMESTAMPDIFF( MONTH, t1.添加时间, order_date ) AS month_diff 
	FROM
		ods.stg_bayshop_litfad_spu t1
		LEFT JOIN ods.stg_bayshop_litfad_sku t2 ON t2.产品PID = t1.产品PID
		LEFT JOIN dws.order_product_list opl ON t2.SKU = opl.SKU 
	WHERE
		t1.添加时间 BETWEEN '2023-01-01' 
		AND '2024-12-31 23:59:59' 
		AND 产品分类 = '{categories}'
		AND t2.SKU IS NOT NULL 
	),
	b AS (
	SELECT
		SPU,
		SKU,添加时间,产品分类,成本价,
		b.erp_package_vol,
		order_date,
	IF
		( month_diff >= 6, NULL, month_diff ) AS month_diff,
		ROW_NUMBER() over ( PARTITION BY SKU ORDER BY order_date DESC ) AS ranking 
	FROM
		a
		LEFT JOIN dwd.dim_erp_sku_package_vol_info b ON a.SKU = b.erp_sku 
	) SELECT
	SPU,
	SKU,添加时间,产品分类,成本价,
	b.erp_package_vol 
FROM
	b 
WHERE
	ranking = 1 
	AND month_diff IS NULL
"""
    df = pd.read_sql(sql, engine)

得到每个SKU的最长边围长总重量6000抛重采购体积比采购/6000抛重

In [ ]:
import json
import re

import numpy as np

from sell.sell_price import call_sell_and_order_price
def extract_number(value):
    # 提取字符串中的第一个数字
        match = re.search(r"[-+]?\d*\.\d+|\d+", str(value))
        return float(match.group()) if match else 0.0
for index, row in df.iterrows():
    package_dict = json.loads(row['erp_package_vol'])
    max_length = 0 # 最长边
    max_girth = 0 # 最大围长
    all_weight = 0 # 总重量
    all_vol_weight = 0 # 总抛重
    for key, package in package_dict.items():
        package['长'] = extract_number(package['长'])
        package['宽'] = extract_number(package['宽'])
        package['高'] = extract_number(package['高'])
        package['重量'] = extract_number(package['重量'])
        size =sorted([package['长'],package['宽'],package['高']])
        fst_size = size[2]
        snd_size = size[1]
        thd_size = size[0]
        max_length=max(max_length,fst_size)
        max_girth=max(max_girth,fst_size+(snd_size+thd_size)*2)
        all_weight+=package['重量']/1000
        all_vol_weight+=package['长']*package['宽']*package['高']/6000
    sell_price,order_price,order_type = call_sell_and_order_price(row['成本价'], row['erp_package_vol'],"海运")
    df.loc[index,'网站售价'] = sell_price[0]
    df.loc[index,'物流分摊费'] = sell_price[1]
    df.loc[index,'订单物流费'] = order_price
    df.loc[index,'尾端类型'] = order_type
    df.loc[index,'最长边'] = max_length
    df.loc[index,'最大围长'] = max_girth
    df.loc[index,'总重量'] = all_weight
    df.loc[index,'总抛重'] = all_vol_weight
    print(index)
In [ ]:
# 按照那个分组按照总抛重每5总抛重为一组最长边取大最短边取小最大实重取大最小实重取小网站售价求和物流分摊费求和订单物流费求和尾端类型不要
cost_bins = list(range(0, 4000, 10)) +[28700]
df['成本价分组'] = pd.cut(df['成本价'], bins=cost_bins, right=True, labels=cost_bins[1:])

# 2. 总抛重分组按5为一组0-5 为一组5.01-10 为一组,等)
df['总抛重分组'] = (np.ceil(df['总抛重'] / 5) * 5).astype(int)
df = df.dropna(subset=['成本价分组'])
# 3. 分组聚合
agg_df = df.groupby(['成本价分组', '总抛重分组'], observed=True).agg({
    '最长边': ['max', 'min'],      # 每组最大 每组最小
    '最大围长': 'max',
    '总重量': ['max', 'min','sum'],  # 分别取最大/最小实重
    '网站售价': 'sum',
    '物流分摊费': 'sum',
    '订单物流费': 'sum',
    'SKU': 'count'
}).reset_index()

# 4. 重命名列
agg_df.columns = [
    '成本价分组', '总抛重分组',
    '最长边max', '最长边min', '最大围长',
    '总重量max', '总重量min','总重量',
    '网站售价', '物流分摊费', '订单物流费','SKU种类'
]
agg_df.to_clipboard(index=False)