logistics/一票一件订单.ipynb

42 KiB
Raw Blame History

In [ ]:
import pandas as pd
from utils.gtools import MySQLconnect

# 读取需要计算的包裹信息
with MySQLconnect('ods') as db:
    sql = r"""  
           WITH
t1 AS (
SELECT
order_id,
SKU,
order_date,
sum(CASE WHEN opl.order_product_id LIKE '%\_%' ESCAPE '\\' 
         AND opl.order_product_id NOT LIKE '%\_%\_%' ESCAPE '\\' THEN product_num END) AS product_num,
DATE_FORMAT(order_date,"%Y-%m-%d") AS 订单时间,
count(opl.SKU) AS 产品种类
FROM
dws.fact_order_product_list opl
WHERE
  NOT EXISTS (
    SELECT 1 
    FROM dws.log_order_reissue_detail AS r 
    WHERE r.order_product_id = opl.order_product_id
  )
AND order_date >= "20250201"
AND order_date < "20250601"
AND SKU <> ""
GROUP BY order_id
)
,
t2 AS (
SELECT			
            a.`包裹测量时间`,
						t1.order_id,
						t1.SKU,
						t1.order_date,
            a.包裹号,
            a.快递公司,
            a.运输方式,
						a.`目的国`,
            d.postcode,
            CONCAT(
            '"', b.package, '": {',
            '"长": ', length, ', ',
            '"宽": ', width, ', ',
            '"高": ', hight, ', ',
            '"重量": ', weight, '}'
        ) AS package_json
        FROM
				t1
            LEFT JOIN order_express a ON t1.order_id = a.单号
            JOIN package_vol_info b ON a.`包裹号` = b.package
            JOIN order_list d ON a.`单号` = d.order_id 
        WHERE
            a.`包裹状态` IN ( '客户签收', '已经投递') 
            AND b.hight > 0 
            AND b.length > 0 
            AND b.width > 0 
            AND b.hight > 0 
            AND b.weight > 0
            #  AND a.`目的国` = "United States"
						AND t1.product_num = 1
						AND t1.产品种类=1
						AND a.`包裹测量时间` >= '2025-03-01'
						AND a.`包裹测量时间` < '2025-06-01'
),
t3 AS (
SELECT
t2.*,
sku.成本价 AS ERP采购价,
ess.包裹数据 AS ERP包裹数据,
CONCAT('{', GROUP_CONCAT(package_json SEPARATOR ','), '}') AS 实际包裹数据,
ROW_NUMBER() OVER (PARTITION BY SKU ORDER BY 包裹测量时间 DESC) as rn
FROM
t2
LEFT JOIN ads.new_erp_sku_size ess ON t2.SKU=ess.SKU
LEFT JOIN stg_bayshop_litfad_sku sku ON t2.SKU=sku.SKU
WHERE
ess.`包裹数据`<>''
GROUP BY order_id
)
SELECT
包裹测量时间,
order_id,
SKU,
DATE_FORMAT(order_date,"%Y-%M-%D") AS 订单时间,
包裹号,
`快递公司`,
`运输方式`,
`目的国`,
postcode,
ERP采购价,
ERP包裹数据,
实际包裹数据
FROM
t3
WHERE
rn=1


    """
    df=pd.read_sql(sql,db.con)
    print(df)
df
In [ ]:
list_order_id = df["order_id"].drop_duplicates().tolist()
param_order_id = ",".join(f"'{order_id}'" for order_id in list_order_id)

先计算美国的实际利润率

In [ ]:
from utils.gtools import MySQLconnect

ods = MySQLconnect("ods")
engine = ods.engine()
cursor = ods.connect().cursor()

batch_size = 50000  # 每次查询 500 个 order_id避免 SQL 语句过长
order_id_list = df["order_id"].drop_duplicates().tolist() # 取出所有 order_id
# df['postcode'] = "38016"
# 存储分批查询的结果
result_dfs1 = []
result_dfs2 = []
for i in range(0, len(order_id_list), batch_size):
    batch_order_ids = order_id_list[i:i + batch_size]  # 取当前批次的 order_id
    param = ",".join(f"'{order_id}'" for order_id in batch_order_ids)

    purchase_order_sql = f"""
    WITH t1 AS (
        SELECT LEFT(ol.out_detials_outlink_id, 15) AS order_id,
               SUM(out_detials_qty * price) AS instock_cost,
               NULL AS buy_cost
        FROM ods.outstock_list ol
        JOIN ods.instock_list il ON ol.store_in_id = il.id 
        WHERE LEFT(ol.out_detials_outlink_id, 15) IN ({param})
        GROUP BY LEFT(ol.out_detials_outlink_id, 15)
        
        UNION ALL
        
        SELECT LEFT(order_product_id, 15) AS order_id, 
               NULL AS instock_cost,
               SUM(buy_num * actual_price) AS buy_cost
        FROM warehouse_purchasing
        WHERE LEFT(order_product_id, 15) IN ({param}) 
              AND buy_audit = "采购完成"
        GROUP BY LEFT(order_product_id, 15)
    )
    SELECT order_id,
           SUM(CASE 
               WHEN instock_cost IS NULL THEN buy_cost
               ELSE instock_cost 
           END) AS 采购成本
    FROM t1 
    GROUP BY order_id
    """
 

    batch_df1 = pd.read_sql(purchase_order_sql, con=engine)  # 运行 SQL 查询
    result_dfs1.append(batch_df1)  # 存入结果列表
    print(f"已完成 {i + batch_size} 个 order_id 的查询")

# 合并所有查询结果
purchase_order_df1 = pd.concat(result_dfs1, ignore_index=True)
purchase_order_df1["order_id"] = purchase_order_df1["order_id"].astype(str)


# 转换数据类型,确保匹配
df["order_id"] = df["order_id"].astype(str)

# 进行合并
df = pd.merge(df, purchase_order_df1, on='order_id', how='left')
# 复制到剪贴板
df.to_clipboard(index=False)
In [ ]:
import pandas as pd
df = pd.read_clipboard()
df
In [ ]:
from utils.countryOperator import OperateCountry
from utils.logisticsBill import BillFactory
from utils.Package import Package, Package_group
import pandas as pd
import json
import re
# 美国 
from utils.logisticsBill import Billing
import requests

for index, row in df.iterrows():
    opCountry = OperateCountry('US')
    postcode = row['postcode']
    if pd.isna(postcode) or str(postcode).lower() == "nan":
        continue
    try:
        package_dict = json.loads(row['实际包裹数据'])
    except Exception as e:
        print(f"行 {index} 解析失败: {e}")
        print(row['实际包裹数据'])
        continue
    packages = Package_group()
    def extract_number(value):
    # 提取字符串中的第一个数字
        match = re.search(r"[-+]?\d*\.\d+|\d+", str(value))
        return float(match.group()) if match else 0.0
    for key, package in package_dict.items():
        package['长'] = extract_number(package['长'])
        package['宽'] = extract_number(package['宽'])
        package['高'] = extract_number(package['高'])
        package['重量'] = extract_number(package['重量'])
    
        if package['长'] == 0 or package['宽'] == 0 or package['高'] == 0 or package['重量'] == 0:
            continue
        packages.add_package(Package(key,package['长'], package['宽'], package['高'], package['重量']))
    if packages is None:
        continue
    if "海运" in row['运输方式']:
        head_type = 1
    else:
        head_type = 0

    # if "FEDEX-SAIR-G" in row['快递公司']:
    #     company_name = "Fedex-GROUD"
    # elif "FEDEX-SAIR-H" in row['快递公司']:
    #     company_name = "Fedex-HOME"
    # elif "FEDEX02" in row['快递公司']:
    #     company_name = "Fedex-彩虹小马"
    # elif "大包" in row['快递公司'] or row['快递公司'] == '海MS-FEDEX':
    #     company_name = "Fedex-金宏亚"
    # elif "GIGA" in row['快递公司']:
    #     company_name = "大健-GIGA"
    # elif "CEVA" in row['快递公司']:
    #     company_name = "大健-CEVA"
    # elif "USPS" in row['快递公司']:
    #     company_name = "Fedex-GROUD"
    # else:
    #     company_name = "大健-Metro"
    
    bill = Billing(str(index),opCountry,packages,postcode,company_name="Fedex-GROUD",head_type=head_type,beizhu='1')
    head_price = bill.head_amount[0]
    tail_price = bill.tail_amount[0]
    if "USPS" in row['快递公司']:
        tail_price = tail_price/2
    # df.loc[index,'头程CNY'] = head_price
    df.loc[index,'头程CNY'] = head_price
    # df.loc[index,'最优渠道'] = bill.company_name
    print(f"行 {index} 处理完成")
    
df.to_clipboard(index=False)
In [ ]:
us_df = df[(df['目的国']=='United States')]
In [ ]:
from utils.countryOperator import OperateCountry
from utils.logisticsBill import BillFactory
from utils.Package import Package, Package_group
import pandas as pd
# 美国 
df1=pd.read_excel(r"D:\test\logistics\拦截数据\1-3月利润分段.xlsx",sheet_name="11-4月全球订单")
df = df1[(df1['尾端渠道']=="大健-Metro")|(df1['尾端渠道']=="大健-CEVA")]
# df['postcode'] = df['postcode'].astype(str)
In [ ]:
from utils.logisticsBill import Billing
opCountry = OperateCountry('US')
billFactory = BillFactory()
for index,row in df.iterrows():
    print(row['postcode'])
    package = Package(row['包裹号'],row['长'],row['宽'],row['高'],row['重量'])
    packages= Package_group([package])
    postcode = row['postcode']
    head_type = 1 if row['运输方式'] == '海运' else 0
    try:
        bill = Billing(str(row['包裹号']),opCountry,packages,postcode=str(postcode),company_name="大健-Metro",head_type=head_type,beizhu=0)
        df.loc[index,"美西"] = bill.tail_amount[0]
    except:
        bill = None
        df.loc[index,"美西"] = "不可算"
    
    print(index)
    print(bill)
# bill_df = billFactory.bills_to_df()
# bill_df['ID'] = bill_df['ID'].astype(int)
# bill_df.to_clipboard()

# merged_df = pd.merge(df, bill_df, left_on="包裹号",right_on="ID", how='inner')
df.to_clipboard(index=False)
In [ ]:
import re
from data.us_zone import zone_west
def get_west_zone(postcode):
    """获取美西邮编分区"""
    if not re.match(r'\d{5}-\d{4}|\d{5}', postcode):
        return "邮编格式不合法"
    postcode = postcode[:5]
    postcode = int(postcode)
    for zone, postcodes in zone_west.items():
        start =int(postcodes[0].split("-")[0])
        end = int(postcodes[-1].split("-")[-1])
        if start > postcode or postcode > end:
            continue
        for postcode_range in postcodes:
            if "-" in postcode_range:
                start, end = map(int, postcode_range.split("-"))
                if start <= postcode <= end:
                    return zone
            else:
                if int(zone) == postcode:
                    return zone
    return "未查询到邮编分区"
df['分区']=df['postcode'].apply(get_west_zone)
df.to_clipboard(index=False)
In [ ]:
# 取sku所属SPU下所有sku及其现在售价
from sell.sell_price import call_sell_and_order_price
import json
sku_list = (
    order_id_df_cal['SKU']
    .apply(pd.to_numeric, errors='coerce')  # 转数字,非法的变 NaN
    .dropna()
    .astype(int)
    .astype(str)
    .tolist()
)
placeholders = ','.join(['%s'] * len(sku_list)) 
# result = []
with MySQLconnect('ods') as db:
    enginal = db.engine()
    sql = f"""SELECT
        SPU,
        sku.SKU,
        sku.`成本价`,
        spi.`包裹数据`
    FROM
        stg_bayshop_litfad_sku sku
        LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID` 
        LEFT JOIN ads.new_erp_sku_size spi ON sku.SKU =spi.SKU
    WHERE
        spu.SPU IN (
        SELECT
            SPU 
        FROM
            stg_bayshop_litfad_sku sku
            LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID` 
        WHERE
            sku.SKU IN ({placeholders}) 
    )
        """
    result = pd.read_sql(sql, enginal,params= tuple(sku_list))
In [ ]:
import json
from sell.sell_price import call_sell_and_order_price
# 计算当前售价
for index,row in df.iterrows():
    price = row['采购成本']
    try:
        package_dict = json.loads(row['实际包裹数据'])
        sell_price, order_price, order_type = call_sell_and_order_price(price, package_dict)
    except Exception as e:
        print(f"SKU: {row['SKU']} 报错: {e}")
        continue
    if sell_price == 0:
        continue
    df.loc[index, '实际应有售价'] = sell_price[0]
    df.loc[index, '实际物流分摊费'] = sell_price[1]
    df.loc[index, '实际头程cny'] = sell_price[2]
    df.loc[index, '实际头程USD'] = sell_price[3]
    df.loc[index, '订单物流费'] = order_price
    df.loc[index, '尾端类型'] = order_type
    print(f" SKU {row['SKU']} ")
df.to_clipboard(index=False)

写计算物流费的海运空运函数

In [ ]:
import math
express_price = pd.read_excel(r'D:\test\logistics\data\售价尾端价格.xlsx', sheet_name='Sheet1')
key_column = express_price.iloc[:, 8]  # 第 I 列
value_column = express_price.iloc[:, 9]  # 第 J 列
small_column = express_price.iloc[:, 10]  # 第 K 列
big_column = express_price.iloc[:, 11]  # 第 L 列
air_small_dict = dict(zip(key_column, small_column))
air_big_dict = dict(zip(key_column, big_column))
# 转换成字典
ocean_price_dict = dict(zip(key_column, value_column))
def ocean_order_price(packages):
    express_fee = 0 # 快递基础费
    long_fee = 0 # 超长费
    weight_fee = 0 # 超重费
    big_fee = 0 # 大包裹费
    express_type = ''
    for package in packages:
        for key, value in ocean_price_dict.items():
            if package.weight <=key:
                express_fee+=value
                break
        if package.fst_size>=116 or package.sed_size>=71 or package.girth>=251:
            long_fee += 16.3
            express_type +="超长"
        if package.weight>=21000 and package.fst_size<238 and package.girth<315:
            weight_fee+=25.5
            express_type +="超重"
        if package.fst_size>=238 or package.girth>=315:
            big_fee+=61.6
            express_type +="大包裹"
    express_fee = express_fee + long_fee + weight_fee + big_fee

    # 卡派步长为3
    ltl_base = 0
    ltl_fee = 0
    count1 = 0
    count2 = 0
    count3 = 0
    count4 = 0
    order_type2 = '卡派'
    order_other_type1 = ''
    order_other_type2 = ''
    order_other_type3 = ''
    order_other_type4 = ''
    order_ltl_oversize = 0
    order_ltl_overweight1 = 0
    order_ltl_overweight2 = 0
    order_ltl_overpackage = 0
    sku_total_cubic_feet = 0
    for package in packages:
        cubic_feet= package.length * package.width * package.height / 1000000 * 35.3
        sku_total_cubic_feet += cubic_feet
        # 卡派额外费用
        if package.fst_size>= 250:
            count1 += 1
            order_ltl_oversize = 118
            order_other_type1 = '超长'
        if package.weight >= 111000:
            count2 += 1
            order_ltl_overweight1 = 78
            order_other_type2 = '超重'
        if package.weight >= 130000:
            count3 += 1
            order_ltl_overweight2 = 30
            order_other_type3 = '超重'
        if package.fst_size >= 310:
            count4 += 1
            order_ltl_overpackage = 30
            order_other_type4 = '大包裹'
        order_type2 += order_other_type3 + order_other_type1 + order_other_type2 + order_other_type4

    # 卡派基础费用 体积/1000000 *35.3
    if sku_total_cubic_feet < 25:
        ltl_base = round(163 / 0.45 / 2, 2)  # 181.11

    elif sku_total_cubic_feet < 35:
        ltl_base = round(180 / 0.45 / 2, 2)  # 200
    else:
	        # 大于一个立方的35立方英尺 按照每立方英尺*5美金
        # 最低为190美金
        ltl_base = round(max(190, 5 * sku_total_cubic_feet) / 0.359 / 2)

        
    ltl_fee = math.ceil(count1 / 3) * order_ltl_oversize + math.ceil(count2 / 3) * order_ltl_overweight1 + math.ceil(
        count3 / 3) * order_ltl_overweight2 + math.ceil(count4 / 3) * order_ltl_overpackage + ltl_base

    if ltl_fee < express_fee:
        ocean_fee = ltl_fee
        order_type = order_type2
    else:
       ocean_fee = express_fee
       order_type = express_type
    return ocean_fee, order_type

def air_order_price(packages):
    express_fee = 0 
    express_type = ''
    for package in packages:
        price=0
        bill_weight = max(package.weight, package.get_volume_weight(8500))
        if package.weight<=420 and package.fst_size<=50 and package.sed_size<=40 and package.trd_size<=30:
            for key, value in air_small_dict.items():
                if package.weight <=key:
                    price =value
                    break
        elif package.weight<=2718 and package.fst_size<=50 and package.sed_size<=40 and package.trd_size<=30:
            for key, value in air_small_dict.items():
                if bill_weight <=key:
                    price =value
                    break
        else:
            for key, value in air_big_dict.items():
                if bill_weight <=key:
                    price =value
                    break
        if package.weight<=420:
            express_fee+=((((min(max(package.density,37),337)*0.093+27.7)/6+0.65)*package.get_volume_weight(6000))*0.3+price)/0.45
            express_type+='USPS'
        elif package.weight<=2718:
            express_fee+=(((min(max(package.density,37),337)*0.093+27.7)/6+0.65)*package.get_volume_weight(8500)*0.3+price)/0.45
            express_type+='UandF'
        else:
            express_fee+=(((min(max(package.density,37),337)*0.093+27.7-1.08)/6+0.65-1.06)*package.get_volume_weight(8500))/0.45+price
            express_type+='FEDEX'
    return express_fee, express_type
In [ ]:
# 记录包裹的最大重量和

from utils.Package import Package, Package_group
import re
for index, row in df.iterrows():
    try:
        package_dict = json.loads(row['ERP包裹数据'])
    except Exception as e:
        print(f"行 {index} 解析失败: {e}")
        print(row['实际包裹数据'])
        continue
    packages = Package_group()
    def extract_number(value):
    # 提取字符串中的第一个数字
        match = re.search(r"[-+]?\d*\.\d+|\d+", str(value))
        return float(match.group()) if match else 0.0
    for key, package in package_dict.items():
        package['长'] = extract_number(package['长'])
        package['宽'] = extract_number(package['宽'])
        package['高'] = extract_number(package['高'])
        package['重量'] = extract_number(package['重量'])
    
        if package['长'] == 0 or package['宽'] == 0 or package['高'] == 0 or package['重量'] == 0:
            continue
        packages.add_package(Package(key,package['长'], package['宽'], package['高'], package['重量']))
    if packages is None:
        continue
    if row['运输方式']=='海运':
        order_fee, order_type = ocean_order_price(packages)
    else:
        order_fee, order_type = air_order_price(packages)
        
    # 订单信息
    df.loc[index, 'ERP物流费'] = order_fee
    df.loc[index, '尾端类型'] = order_type
    print(order_fee, order_type)
df.to_clipboard(index=False)
In [ ]:
# 取sku所属SPU下所有sku及其现在售价
import json
# 单个品类一个一个处理
category = "66 - Furniture"
df_one = order_id_df_cal[order_id_df_cal['产品品类']==category]
sku_list = (
    df_one['SKU']
    .apply(pd.to_numeric, errors='coerce')  # 转数字,非法的变 NaN
    .dropna()
    .astype(int)
    .astype(str)
    .tolist()
)


placeholders = ','.join(['%s'] * len(sku_list)) 

# 从哪个表查询
with MySQLconnect('ods') as db:
    enginal = db.engine()
    sql = f"""SELECT
        SKU,
        价格 AS ERP采购价,
        规格,
        cpmaso规格,
        `标准/预设属性集`,
        自定义属性集

    FROM
        erp_furniture_sku sku
        LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID`
    WHERE
        spu.SPU IN (
        SELECT
            SPU 
        FROM
            stg_bayshop_litfad_sku sku
            LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID` 
        WHERE
            sku.SKU IN ({placeholders}) 
    )
        """
    result = pd.read_sql(sql, enginal,params= tuple(sku_list))
df_one = pd.merge(df_one, result, on=['SKU'], how='left')
df_one.to_excel(f'{category}.xlsx')

1.找到SPU最新到仓的SKU及其体积 2.根据算法F(x)计算这个SPU的其他SKU理论长宽高重量 3.找到该SPU其他有过实际体积的SKU记录这些实际体积 4.分析这些SKU的实际体积和理论体积的差距

In [ ]:
import pandas as pd
df = pd.read_excel(r'D:\test\logistics\拦截数据\一票一件发货订单.xlsx',sheet_name="单包裹数据")
# 写一个cal_size算法输入原属性和新属性返回涨幅系数
In [ ]:
import re
import json
for index, row in df.iterrows():
    package_dict = json.loads(row['包裹数据'])
    for package in package_dict.values():
        item = {}
        for key, value in package.items():
            try:
                # 使用正则表达式提取数字部分
                number_str = re.findall(r"[-+]?\d*\.\d+|\d+", str(value))
                if number_str:
                    item[key] = float(number_str[0])  # 取第一个匹配到的数字并转换为 float
                else:
                    item[key] = value  # 如果没有数字部分,保留原值
            except ValueError:
                item[key] = value  # 如果遇到无法转换的值,保留原值
        size = []
        size.append(item['长'])
        size.append(item['宽'])
        size.append(item['高'])
        weight = item['重量']
        size.sort()
        length = size[2]
        width = size[1]
        height = size[0]
    df.loc[index, 'ERP长'] = length
    df.loc[index, 'ERP宽'] = width
    df.loc[index, 'ERP高'] = height
    df.loc[index, 'ERP重量'] = weight
    print(f"{row['SKU']}尺寸为:{width},h:{height},d:{length},w:{weight}")
In [ ]:
def cal_size(old,new):
    try:
        old = float(old)
        new = float(new)
        if old == 0:
            return None  # 或 return 0防止除以0
        return (new - old) / old
    except (ValueError, TypeError):
        return None  # 遇到不能转为 float 的就返回 None
# 按SPU分组first_df取每个SPU组里订单月份最大的那行数据如果订单月份相同取第一条
first_df = df.groupby('SPU').apply(lambda x: x.loc[x['订单月份'].idxmax()]).reset_index(drop=True)
# 按SPU分组计算每个SPU的涨幅系数
for index, row in first_df.iterrows():
    first_df.loc[index, '长系数'] = cal_size(row['ERP长'],row['长'])
    first_df.loc[index, '宽系数'] = cal_size(row['ERP宽'],row['宽'])
    first_df.loc[index, '高系数'] = cal_size(row['ERP高'],row['高'])
    first_df.loc[index, '重量系数'] = cal_size(row['ERP重量'],row['重量'])
    print(
        f"{row['SPU']} 的系数为 "
        f"{first_df.loc[index, '长系数']}, "
        f"{first_df.loc[index, '宽系数']}, "
        f"{first_df.loc[index, '高系数']}, "
        f"{first_df.loc[index, '重量系数']}"
    )
In [ ]:
# 将每个SPU的系数并入df表中
df = pd.merge(df, first_df[['SPU','长系数','宽系数','高系数','重量系数']], on='SPU',how='left')
print("合并完成")
# 根据系数计算每个SKU的理论尺寸
df['理论长'] = ((1 + df['长系数']) * df['ERP长']).round(2)
df['理论宽'] = ((1 + df['宽系数']) * df['ERP宽']).round(2)
df['理论高'] = ((1 + df['高系数']) * df['ERP高']).round(2)
df['理论重量'] = ((1 + df['重量系数']) * df['ERP重量']).round(2)

df
In [ ]:
# 构造一个set存储first_df中的 (SKU, 订单月份)
first_packages = set(first_df['包裹号'])

# 新增列包裹号在first_packages中标1否则0
df['is_first'] = df['包裹号'].apply(lambda x: 1 if x in first_packages else 0)
df.to_clipboard(index=False)
In [ ]:
# 计算预测后的尺寸下,一票一件订单的售价和订单价格
from sell.sell_price import call_sell_and_order_price
for index,row in df.iterrows():
    price = row['成本价']
    package_dict1={}
    package_dict2={}
    try:
        package_dict1['包裹1'] = {}
        package_dict2['包裹1'] = {}
        package_dict1['包裹1']['长'] = row['长']
        package_dict1['包裹1']['宽'] = row['宽']
        package_dict1['包裹1']['高'] = row['高']
        package_dict1['包裹1']['重量'] = row['重量']
        package_dict2['包裹1']['长'] = row['理论长']
        package_dict2['包裹1']['宽'] = row['理论宽']    
        package_dict2['包裹1']['高'] = row['理论高']
        package_dict2['包裹1']['重量'] = row['理论重量']
        sell_price1, order_price1, order_type1 = call_sell_and_order_price(price, package_dict1)
        sell_price2, order_price2, order_type2 = call_sell_and_order_price(price, package_dict2)
    except Exception as e:
        print(f"SKU: {row['SKU']} 报错: {e}")
        continue
    df.loc[index, '实际体积售价'] = sell_price1
    df.loc[index, '实际体积订单价'] = order_price1
    df.loc[index, '实际体积订单类型'] = order_type1
    df.loc[index, '理论体积售价'] = sell_price2
    df.loc[index, '理论体积订单价'] = order_price2
    df.loc[index, '理论体积订单类型'] = order_type2
    print(f"SPU: {row['SPU']}, SKU {row['SKU']} 实际体积售价: {sell_price1}, 理论体积售价: {sell_price2},")

计算SPU下所有SKU的网站售价实际尺寸售价预测尺寸售价

In [ ]:
# 取表格数据is_first为1的数据取spu,长宽高重量系数
import pandas as pd
df = pd.read_excel(r'D:\test\logistics\拦截数据\一票一件发货订单.xlsx',sheet_name="单包裹系数计算")
# 写一个cal_size算法输入原属性和新属性返回涨幅系数
df1=df[['is_first','SPU','SKU','长','宽','高','重量']]
base_df = df[df['is_first']==1]
In [ ]:
# 取这些SPU下的所有SKU及其现在售价
from sell.sell_price import call_sell_and_order_price
import json
from utils.gtools import MySQLconnect
import pandas as pd
spu_list = (
    base_df['SPU']
    .apply(pd.to_numeric, errors='coerce')  # 转数字,非法的变 NaN
    .dropna()
    .astype(int)
    .astype(str)
    .tolist()
)
placeholders = ','.join(['%s'] * len(spu_list)) 
# result = []
with MySQLconnect('ods') as db:
    enginal = db.engine()
    sql = f"""SELECT
    产品品类,
    产品分类,
        SPU,
        sku.SKU,
        sku.`成本价`,
        spi.`包裹数据`,
        物流分摊,
        产品售价
    FROM
        stg_bayshop_litfad_sku sku
        LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID` 
        LEFT JOIN ads.new_erp_sku_size spi ON sku.SKU =spi.SKU
    WHERE
        spu.SPU IN ({placeholders}) 
        """
    result = pd.read_sql(sql, enginal,params= tuple(spu_list))
# 合并df
all_df = pd.merge(result,df, on=['SKU'], how='left')
all_df
In [ ]:
# 先把ERP包裹数据拆出来
import re
import json
for index, row in all_df.iterrows():
    if not isinstance(row['包裹数据'], str) or not row['包裹数据']:
        print(f"第{index}行包裹数据为空或非字符串,跳过")
        continue
    try:
        package_dict = json.loads(row['包裹数据'])
    except json.JSONDecodeError as e:
        print(f"解析失败:第{index}行,错误信息:{e}")
        continue
    try:
        for package in package_dict.values():
            item = {}
            for key, value in package.items():
                try:
                    # 使用正则表达式提取数字部分
                    number_str = re.findall(r"[-+]?\d*\.\d+|\d+", str(value))
                    if number_str:
                        item[key] = float(number_str[0])  # 取第一个匹配到的数字并转换为 float
                    else:
                        item[key] = value  # 如果没有数字部分,保留原值
                except ValueError:
                    item[key] = value  # 如果遇到无法转换的值,保留原值
    except AttributeError:
        print(f"解析失败:第{index}行,错误信息:包裹数据为空")
        continue
    size = []
    size.append(item['长'])
    size.append(item['宽'])
    size.append(item['高'])
    weight = item['重量']
    size.sort()
    length = size[2]
    width = size[1]
    height = size[0]
    all_df.loc[index, 'ERP长'] = length
    all_df.loc[index, 'ERP宽'] = width
    all_df.loc[index, 'ERP高'] = height
    all_df.loc[index, 'ERP重量'] = weight
    print(f"{row['SKU']}尺寸为:{width},h:{height},d:{length},w:{weight}")
In [ ]:
# 计算每个SPU的长宽高重量系数
def cal_size(old,new):
    try:
        old = float(old)
        new = float(new)
        if old == 0:
            return None  # 或 return 0防止除以0
        return (new - old) / old
    except (ValueError, TypeError):
        return None  # 遇到不能转为 float 的就返回 None
    
test_df = all_df[all_df['is_first']==1]
# 取基准数据SPU的系数
for index, row in test_df.iterrows():
    test_df.loc[index, '长系数'] = cal_size(row['ERP长'],row['长'])
    test_df.loc[index, '宽系数'] = cal_size(row['ERP宽'],row['宽'])
    test_df.loc[index, '高系数'] = cal_size(row['ERP高'],row['高'])
    test_df.loc[index, '重量系数'] = cal_size(row['ERP重量'],row['重量'])
    print(
        f"{row['SPU_x']} 的系数为 "
        f"{test_df.loc[index, '长系数']}, "
        f"{test_df.loc[index, '宽系数']}, "
        f"{test_df.loc[index, '高系数']}, "
        f"{test_df.loc[index, '重量系数']}"
    )
# 将SPU的基准系数合并至all_df
all_df = pd.merge(all_df, test_df[['SPU_x', '长系数', '宽系数', '高系数', '重量系数']], on='SPU_x', how='left')
In [ ]:
# 根据系数 得到所有SKU的预测尺寸
# 根据系数计算每个SKU的理论尺寸
all_df['理论长'] = ((1 + all_df['长系数']) * all_df['ERP长']).round(2)
all_df['理论宽'] = ((1 + all_df['宽系数']) * all_df['ERP宽']).round(2)
all_df['理论高'] = ((1 + all_df['高系数']) * all_df['ERP高']).round(2)
all_df['理论重量'] = ((1 + all_df['重量系数']) * all_df['ERP重量']).round(2)
all_df
In [ ]:
# 计算三种尺寸下的售价
# 计算预测后的尺寸下,一票一件订单的售价和订单价格
from sell.sell_price import call_sell_and_order_price
for index,row in all_df.iterrows():
    price = row['成本价']
    package_dict1={}
    package_dict2={}
    package_dict3={}
    try:
        package_dict1['包裹1'] = {}
        package_dict2['包裹1'] = {}
        package_dict3['包裹1'] = {}
        package_dict1['包裹1']['长'] = row['长']
        package_dict1['包裹1']['宽'] = row['宽']
        package_dict1['包裹1']['高'] = row['高']
        package_dict1['包裹1']['重量'] = row['重量']
        package_dict2['包裹1']['长'] = row['理论长']
        package_dict2['包裹1']['宽'] = row['理论宽']    
        package_dict2['包裹1']['高'] = row['理论高']
        package_dict2['包裹1']['重量'] = row['理论重量']
        package_dict3['包裹1']['长'] = row['ERP长']
        package_dict3['包裹1']['宽'] = row['ERP宽']    
        package_dict3['包裹1']['高'] = row['ERP高']
        package_dict3['包裹1']['重量'] = row['ERP重量']
        sell_price1, order_price1, order_type1 = call_sell_and_order_price(price, package_dict1)
        sell_price2, order_price2, order_type2 = call_sell_and_order_price(price, package_dict2)
        sell_price3, order_price3, order_type3 = call_sell_and_order_price(price, package_dict3)
    except Exception as e:
        print(f"SKU: {row['SKU']} 报错: {e}")
        continue
    all_df.loc[index, 'ERP售价'] = sell_price3
    all_df.loc[index, '实际体积售价'] = sell_price1
    all_df.loc[index, '理论体积售价'] = sell_price2

    all_df.loc[index, 'ERP订单价'] = order_price3
    all_df.loc[index, '实际体积订单价'] = order_price1
    all_df.loc[index, '理论体积订单价'] = order_price2
    # all_df.loc[index, '理论体积订单类型'] = order_type2
    print(f"SPU: {row['SPU_x']}, SKU {row['SKU']} ,ERP售价: {sell_price3}, 实际体积售价: {sell_price1}, 理论体积售价: {sell_price2},")
all_df.to_excel('单包裹SKU售价分析.xlsx', index=False)