42 KiB
42 KiB
In [ ]:
import pandas as pd from utils.gtools import MySQLconnect # 读取需要计算的包裹信息 with MySQLconnect('ods') as db: sql = r""" WITH t1 AS ( SELECT order_id, SKU, order_date, sum(CASE WHEN opl.order_product_id LIKE '%\_%' ESCAPE '\\' AND opl.order_product_id NOT LIKE '%\_%\_%' ESCAPE '\\' THEN product_num END) AS product_num, DATE_FORMAT(order_date,"%Y-%m-%d") AS 订单时间, count(opl.SKU) AS 产品种类 FROM dws.fact_order_product_list opl WHERE NOT EXISTS ( SELECT 1 FROM dws.log_order_reissue_detail AS r WHERE r.order_product_id = opl.order_product_id ) AND order_date >= "20250201" AND order_date < "20250601" AND SKU <> "" GROUP BY order_id ) , t2 AS ( SELECT a.`包裹测量时间`, t1.order_id, t1.SKU, t1.order_date, a.包裹号, a.快递公司, a.运输方式, a.`目的国`, d.postcode, CONCAT( '"', b.package, '": {', '"长": ', length, ', ', '"宽": ', width, ', ', '"高": ', hight, ', ', '"重量": ', weight, '}' ) AS package_json FROM t1 LEFT JOIN order_express a ON t1.order_id = a.单号 JOIN package_vol_info b ON a.`包裹号` = b.package JOIN order_list d ON a.`单号` = d.order_id WHERE a.`包裹状态` IN ( '客户签收', '已经投递') AND b.hight > 0 AND b.length > 0 AND b.width > 0 AND b.hight > 0 AND b.weight > 0 # AND a.`目的国` = "United States" AND t1.product_num = 1 AND t1.产品种类=1 AND a.`包裹测量时间` >= '2025-03-01' AND a.`包裹测量时间` < '2025-06-01' ), t3 AS ( SELECT t2.*, sku.成本价 AS ERP采购价, ess.包裹数据 AS ERP包裹数据, CONCAT('{', GROUP_CONCAT(package_json SEPARATOR ','), '}') AS 实际包裹数据, ROW_NUMBER() OVER (PARTITION BY SKU ORDER BY 包裹测量时间 DESC) as rn FROM t2 LEFT JOIN ads.new_erp_sku_size ess ON t2.SKU=ess.SKU LEFT JOIN stg_bayshop_litfad_sku sku ON t2.SKU=sku.SKU WHERE ess.`包裹数据`<>'' GROUP BY order_id ) SELECT 包裹测量时间, order_id, SKU, DATE_FORMAT(order_date,"%Y-%M-%D") AS 订单时间, 包裹号, `快递公司`, `运输方式`, `目的国`, postcode, ERP采购价, ERP包裹数据, 实际包裹数据 FROM t3 WHERE rn=1 """ df=pd.read_sql(sql,db.con) print(df) df
In [ ]:
list_order_id = df["order_id"].drop_duplicates().tolist() param_order_id = ",".join(f"'{order_id}'" for order_id in list_order_id)
先计算美国的实际利润率
In [ ]:
from utils.gtools import MySQLconnect ods = MySQLconnect("ods") engine = ods.engine() cursor = ods.connect().cursor() batch_size = 50000 # 每次查询 500 个 order_id,避免 SQL 语句过长 order_id_list = df["order_id"].drop_duplicates().tolist() # 取出所有 order_id # df['postcode'] = "38016" # 存储分批查询的结果 result_dfs1 = [] result_dfs2 = [] for i in range(0, len(order_id_list), batch_size): batch_order_ids = order_id_list[i:i + batch_size] # 取当前批次的 order_id param = ",".join(f"'{order_id}'" for order_id in batch_order_ids) purchase_order_sql = f""" WITH t1 AS ( SELECT LEFT(ol.out_detials_outlink_id, 15) AS order_id, SUM(out_detials_qty * price) AS instock_cost, NULL AS buy_cost FROM ods.outstock_list ol JOIN ods.instock_list il ON ol.store_in_id = il.id WHERE LEFT(ol.out_detials_outlink_id, 15) IN ({param}) GROUP BY LEFT(ol.out_detials_outlink_id, 15) UNION ALL SELECT LEFT(order_product_id, 15) AS order_id, NULL AS instock_cost, SUM(buy_num * actual_price) AS buy_cost FROM warehouse_purchasing WHERE LEFT(order_product_id, 15) IN ({param}) AND buy_audit = "采购完成" GROUP BY LEFT(order_product_id, 15) ) SELECT order_id, SUM(CASE WHEN instock_cost IS NULL THEN buy_cost ELSE instock_cost END) AS 采购成本 FROM t1 GROUP BY order_id """ batch_df1 = pd.read_sql(purchase_order_sql, con=engine) # 运行 SQL 查询 result_dfs1.append(batch_df1) # 存入结果列表 print(f"已完成 {i + batch_size} 个 order_id 的查询") # 合并所有查询结果 purchase_order_df1 = pd.concat(result_dfs1, ignore_index=True) purchase_order_df1["order_id"] = purchase_order_df1["order_id"].astype(str) # 转换数据类型,确保匹配 df["order_id"] = df["order_id"].astype(str) # 进行合并 df = pd.merge(df, purchase_order_df1, on='order_id', how='left') # 复制到剪贴板 df.to_clipboard(index=False)
In [ ]:
import pandas as pd df = pd.read_clipboard() df
In [ ]:
from utils.countryOperator import OperateCountry from utils.logisticsBill import BillFactory from utils.Package import Package, Package_group import pandas as pd import json import re # 美国 from utils.logisticsBill import Billing import requests for index, row in df.iterrows(): opCountry = OperateCountry('US') postcode = row['postcode'] if pd.isna(postcode) or str(postcode).lower() == "nan": continue try: package_dict = json.loads(row['实际包裹数据']) except Exception as e: print(f"行 {index} 解析失败: {e}") print(row['实际包裹数据']) continue packages = Package_group() def extract_number(value): # 提取字符串中的第一个数字 match = re.search(r"[-+]?\d*\.\d+|\d+", str(value)) return float(match.group()) if match else 0.0 for key, package in package_dict.items(): package['长'] = extract_number(package['长']) package['宽'] = extract_number(package['宽']) package['高'] = extract_number(package['高']) package['重量'] = extract_number(package['重量']) if package['长'] == 0 or package['宽'] == 0 or package['高'] == 0 or package['重量'] == 0: continue packages.add_package(Package(key,package['长'], package['宽'], package['高'], package['重量'])) if packages is None: continue if "海运" in row['运输方式']: head_type = 1 else: head_type = 0 # if "FEDEX-SAIR-G" in row['快递公司']: # company_name = "Fedex-GROUD" # elif "FEDEX-SAIR-H" in row['快递公司']: # company_name = "Fedex-HOME" # elif "FEDEX02" in row['快递公司']: # company_name = "Fedex-彩虹小马" # elif "大包" in row['快递公司'] or row['快递公司'] == '海MS-FEDEX': # company_name = "Fedex-金宏亚" # elif "GIGA" in row['快递公司']: # company_name = "大健-GIGA" # elif "CEVA" in row['快递公司']: # company_name = "大健-CEVA" # elif "USPS" in row['快递公司']: # company_name = "Fedex-GROUD" # else: # company_name = "大健-Metro" bill = Billing(str(index),opCountry,packages,postcode,company_name="Fedex-GROUD",head_type=head_type,beizhu='1') head_price = bill.head_amount[0] tail_price = bill.tail_amount[0] if "USPS" in row['快递公司']: tail_price = tail_price/2 # df.loc[index,'头程CNY'] = head_price df.loc[index,'头程CNY'] = head_price # df.loc[index,'最优渠道'] = bill.company_name print(f"行 {index} 处理完成") df.to_clipboard(index=False)
In [ ]:
us_df = df[(df['目的国']=='United States')]
In [ ]:
from utils.countryOperator import OperateCountry from utils.logisticsBill import BillFactory from utils.Package import Package, Package_group import pandas as pd # 美国 df1=pd.read_excel(r"D:\test\logistics\拦截数据\1-3月利润分段.xlsx",sheet_name="11-4月全球订单") df = df1[(df1['尾端渠道']=="大健-Metro")|(df1['尾端渠道']=="大健-CEVA")] # df['postcode'] = df['postcode'].astype(str)
In [ ]:
from utils.logisticsBill import Billing opCountry = OperateCountry('US') billFactory = BillFactory() for index,row in df.iterrows(): print(row['postcode']) package = Package(row['包裹号'],row['长'],row['宽'],row['高'],row['重量']) packages= Package_group([package]) postcode = row['postcode'] head_type = 1 if row['运输方式'] == '海运' else 0 try: bill = Billing(str(row['包裹号']),opCountry,packages,postcode=str(postcode),company_name="大健-Metro",head_type=head_type,beizhu=0) df.loc[index,"美西"] = bill.tail_amount[0] except: bill = None df.loc[index,"美西"] = "不可算" print(index) print(bill) # bill_df = billFactory.bills_to_df() # bill_df['ID'] = bill_df['ID'].astype(int) # bill_df.to_clipboard() # merged_df = pd.merge(df, bill_df, left_on="包裹号",right_on="ID", how='inner') df.to_clipboard(index=False)
In [ ]:
import re from data.us_zone import zone_west def get_west_zone(postcode): """获取美西邮编分区""" if not re.match(r'\d{5}-\d{4}|\d{5}', postcode): return "邮编格式不合法" postcode = postcode[:5] postcode = int(postcode) for zone, postcodes in zone_west.items(): start =int(postcodes[0].split("-")[0]) end = int(postcodes[-1].split("-")[-1]) if start > postcode or postcode > end: continue for postcode_range in postcodes: if "-" in postcode_range: start, end = map(int, postcode_range.split("-")) if start <= postcode <= end: return zone else: if int(zone) == postcode: return zone return "未查询到邮编分区" df['分区']=df['postcode'].apply(get_west_zone) df.to_clipboard(index=False)
In [ ]:
# 取sku所属SPU下所有sku及其现在售价 from sell.sell_price import call_sell_and_order_price import json sku_list = ( order_id_df_cal['SKU'] .apply(pd.to_numeric, errors='coerce') # 转数字,非法的变 NaN .dropna() .astype(int) .astype(str) .tolist() ) placeholders = ','.join(['%s'] * len(sku_list)) # result = [] with MySQLconnect('ods') as db: enginal = db.engine() sql = f"""SELECT SPU, sku.SKU, sku.`成本价`, spi.`包裹数据` FROM stg_bayshop_litfad_sku sku LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID` LEFT JOIN ads.new_erp_sku_size spi ON sku.SKU =spi.SKU WHERE spu.SPU IN ( SELECT SPU FROM stg_bayshop_litfad_sku sku LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID` WHERE sku.SKU IN ({placeholders}) ) """ result = pd.read_sql(sql, enginal,params= tuple(sku_list))
In [ ]:
import json from sell.sell_price import call_sell_and_order_price # 计算当前售价 for index,row in df.iterrows(): price = row['采购成本'] try: package_dict = json.loads(row['实际包裹数据']) sell_price, order_price, order_type = call_sell_and_order_price(price, package_dict) except Exception as e: print(f"SKU: {row['SKU']} 报错: {e}") continue if sell_price == 0: continue df.loc[index, '实际应有售价'] = sell_price[0] df.loc[index, '实际物流分摊费'] = sell_price[1] df.loc[index, '实际头程cny'] = sell_price[2] df.loc[index, '实际头程USD'] = sell_price[3] df.loc[index, '订单物流费'] = order_price df.loc[index, '尾端类型'] = order_type print(f" SKU {row['SKU']} ") df.to_clipboard(index=False)
写计算物流费的海运空运函数
In [ ]:
import math express_price = pd.read_excel(r'D:\test\logistics\data\售价尾端价格.xlsx', sheet_name='Sheet1') key_column = express_price.iloc[:, 8] # 第 I 列 value_column = express_price.iloc[:, 9] # 第 J 列 small_column = express_price.iloc[:, 10] # 第 K 列 big_column = express_price.iloc[:, 11] # 第 L 列 air_small_dict = dict(zip(key_column, small_column)) air_big_dict = dict(zip(key_column, big_column)) # 转换成字典 ocean_price_dict = dict(zip(key_column, value_column)) def ocean_order_price(packages): express_fee = 0 # 快递基础费 long_fee = 0 # 超长费 weight_fee = 0 # 超重费 big_fee = 0 # 大包裹费 express_type = '' for package in packages: for key, value in ocean_price_dict.items(): if package.weight <=key: express_fee+=value break if package.fst_size>=116 or package.sed_size>=71 or package.girth>=251: long_fee += 16.3 express_type +="超长" if package.weight>=21000 and package.fst_size<238 and package.girth<315: weight_fee+=25.5 express_type +="超重" if package.fst_size>=238 or package.girth>=315: big_fee+=61.6 express_type +="大包裹" express_fee = express_fee + long_fee + weight_fee + big_fee # 卡派(步长为3) ltl_base = 0 ltl_fee = 0 count1 = 0 count2 = 0 count3 = 0 count4 = 0 order_type2 = '卡派' order_other_type1 = '' order_other_type2 = '' order_other_type3 = '' order_other_type4 = '' order_ltl_oversize = 0 order_ltl_overweight1 = 0 order_ltl_overweight2 = 0 order_ltl_overpackage = 0 sku_total_cubic_feet = 0 for package in packages: cubic_feet= package.length * package.width * package.height / 1000000 * 35.3 sku_total_cubic_feet += cubic_feet # 卡派额外费用 if package.fst_size>= 250: count1 += 1 order_ltl_oversize = 118 order_other_type1 = '超长' if package.weight >= 111000: count2 += 1 order_ltl_overweight1 = 78 order_other_type2 = '超重' if package.weight >= 130000: count3 += 1 order_ltl_overweight2 = 30 order_other_type3 = '超重' if package.fst_size >= 310: count4 += 1 order_ltl_overpackage = 30 order_other_type4 = '大包裹' order_type2 += order_other_type3 + order_other_type1 + order_other_type2 + order_other_type4 # 卡派基础费用 体积/1000000 *35.3 if sku_total_cubic_feet < 25: ltl_base = round(163 / 0.45 / 2, 2) # 181.11 elif sku_total_cubic_feet < 35: ltl_base = round(180 / 0.45 / 2, 2) # 200 else: # 大于一个立方的(35立方英尺) 按照每立方英尺*5美金 # 最低为190美金 ltl_base = round(max(190, 5 * sku_total_cubic_feet) / 0.359 / 2) ltl_fee = math.ceil(count1 / 3) * order_ltl_oversize + math.ceil(count2 / 3) * order_ltl_overweight1 + math.ceil( count3 / 3) * order_ltl_overweight2 + math.ceil(count4 / 3) * order_ltl_overpackage + ltl_base if ltl_fee < express_fee: ocean_fee = ltl_fee order_type = order_type2 else: ocean_fee = express_fee order_type = express_type return ocean_fee, order_type def air_order_price(packages): express_fee = 0 express_type = '' for package in packages: price=0 bill_weight = max(package.weight, package.get_volume_weight(8500)) if package.weight<=420 and package.fst_size<=50 and package.sed_size<=40 and package.trd_size<=30: for key, value in air_small_dict.items(): if package.weight <=key: price =value break elif package.weight<=2718 and package.fst_size<=50 and package.sed_size<=40 and package.trd_size<=30: for key, value in air_small_dict.items(): if bill_weight <=key: price =value break else: for key, value in air_big_dict.items(): if bill_weight <=key: price =value break if package.weight<=420: express_fee+=((((min(max(package.density,37),337)*0.093+27.7)/6+0.65)*package.get_volume_weight(6000))*0.3+price)/0.45 express_type+='USPS' elif package.weight<=2718: express_fee+=(((min(max(package.density,37),337)*0.093+27.7)/6+0.65)*package.get_volume_weight(8500)*0.3+price)/0.45 express_type+='UandF' else: express_fee+=(((min(max(package.density,37),337)*0.093+27.7-1.08)/6+0.65-1.06)*package.get_volume_weight(8500))/0.45+price express_type+='FEDEX' return express_fee, express_type
In [ ]:
# 记录包裹的最大重量和 from utils.Package import Package, Package_group import re for index, row in df.iterrows(): try: package_dict = json.loads(row['ERP包裹数据']) except Exception as e: print(f"行 {index} 解析失败: {e}") print(row['实际包裹数据']) continue packages = Package_group() def extract_number(value): # 提取字符串中的第一个数字 match = re.search(r"[-+]?\d*\.\d+|\d+", str(value)) return float(match.group()) if match else 0.0 for key, package in package_dict.items(): package['长'] = extract_number(package['长']) package['宽'] = extract_number(package['宽']) package['高'] = extract_number(package['高']) package['重量'] = extract_number(package['重量']) if package['长'] == 0 or package['宽'] == 0 or package['高'] == 0 or package['重量'] == 0: continue packages.add_package(Package(key,package['长'], package['宽'], package['高'], package['重量'])) if packages is None: continue if row['运输方式']=='海运': order_fee, order_type = ocean_order_price(packages) else: order_fee, order_type = air_order_price(packages) # 订单信息 df.loc[index, 'ERP物流费'] = order_fee df.loc[index, '尾端类型'] = order_type print(order_fee, order_type) df.to_clipboard(index=False)
In [ ]:
# 取sku所属SPU下所有sku及其现在售价 import json # 单个品类一个一个处理 category = "66 - Furniture" df_one = order_id_df_cal[order_id_df_cal['产品品类']==category] sku_list = ( df_one['SKU'] .apply(pd.to_numeric, errors='coerce') # 转数字,非法的变 NaN .dropna() .astype(int) .astype(str) .tolist() ) placeholders = ','.join(['%s'] * len(sku_list)) # 从哪个表查询 with MySQLconnect('ods') as db: enginal = db.engine() sql = f"""SELECT SKU, 价格 AS ERP采购价, 规格, cpmaso规格, `标准/预设属性集`, 自定义属性集 FROM erp_furniture_sku sku LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID` WHERE spu.SPU IN ( SELECT SPU FROM stg_bayshop_litfad_sku sku LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID` WHERE sku.SKU IN ({placeholders}) ) """ result = pd.read_sql(sql, enginal,params= tuple(sku_list)) df_one = pd.merge(df_one, result, on=['SKU'], how='left') df_one.to_excel(f'{category}.xlsx')
1.找到SPU最新到仓的SKU及其体积 2.根据算法F(x)计算这个SPU的其他SKU理论长宽高重量 3.找到该SPU其他有过实际体积的SKU,记录这些实际体积 4.分析这些SKU的实际体积和理论体积的差距
In [ ]:
import pandas as pd df = pd.read_excel(r'D:\test\logistics\拦截数据\一票一件发货订单.xlsx',sheet_name="单包裹数据") # 写一个cal_size算法,输入原属性和新属性,返回涨幅系数
In [ ]:
import re import json for index, row in df.iterrows(): package_dict = json.loads(row['包裹数据']) for package in package_dict.values(): item = {} for key, value in package.items(): try: # 使用正则表达式提取数字部分 number_str = re.findall(r"[-+]?\d*\.\d+|\d+", str(value)) if number_str: item[key] = float(number_str[0]) # 取第一个匹配到的数字并转换为 float else: item[key] = value # 如果没有数字部分,保留原值 except ValueError: item[key] = value # 如果遇到无法转换的值,保留原值 size = [] size.append(item['长']) size.append(item['宽']) size.append(item['高']) weight = item['重量'] size.sort() length = size[2] width = size[1] height = size[0] df.loc[index, 'ERP长'] = length df.loc[index, 'ERP宽'] = width df.loc[index, 'ERP高'] = height df.loc[index, 'ERP重量'] = weight print(f"{row['SKU']}尺寸为:{width},h:{height},d:{length},w:{weight}")
In [ ]:
def cal_size(old,new): try: old = float(old) new = float(new) if old == 0: return None # 或 return 0,防止除以0 return (new - old) / old except (ValueError, TypeError): return None # 遇到不能转为 float 的就返回 None # 按SPU分组,first_df取每个SPU组里订单月份最大的那行数据,如果订单月份相同,取第一条 first_df = df.groupby('SPU').apply(lambda x: x.loc[x['订单月份'].idxmax()]).reset_index(drop=True) # 按SPU分组,计算每个SPU的涨幅系数 for index, row in first_df.iterrows(): first_df.loc[index, '长系数'] = cal_size(row['ERP长'],row['长']) first_df.loc[index, '宽系数'] = cal_size(row['ERP宽'],row['宽']) first_df.loc[index, '高系数'] = cal_size(row['ERP高'],row['高']) first_df.loc[index, '重量系数'] = cal_size(row['ERP重量'],row['重量']) print( f"{row['SPU']} 的系数为 " f"{first_df.loc[index, '长系数']}, " f"{first_df.loc[index, '宽系数']}, " f"{first_df.loc[index, '高系数']}, " f"{first_df.loc[index, '重量系数']}" )
In [ ]:
# 将每个SPU的系数,并入df表中 df = pd.merge(df, first_df[['SPU','长系数','宽系数','高系数','重量系数']], on='SPU',how='left') print("合并完成") # 根据系数计算每个SKU的理论尺寸 df['理论长'] = ((1 + df['长系数']) * df['ERP长']).round(2) df['理论宽'] = ((1 + df['宽系数']) * df['ERP宽']).round(2) df['理论高'] = ((1 + df['高系数']) * df['ERP高']).round(2) df['理论重量'] = ((1 + df['重量系数']) * df['ERP重量']).round(2) df
In [ ]:
# 构造一个set,存储first_df中的 (SKU, 订单月份) first_packages = set(first_df['包裹号']) # 新增列,包裹号在first_packages中标1,否则0 df['is_first'] = df['包裹号'].apply(lambda x: 1 if x in first_packages else 0) df.to_clipboard(index=False)
In [ ]:
# 计算预测后的尺寸下,一票一件订单的售价和订单价格 from sell.sell_price import call_sell_and_order_price for index,row in df.iterrows(): price = row['成本价'] package_dict1={} package_dict2={} try: package_dict1['包裹1'] = {} package_dict2['包裹1'] = {} package_dict1['包裹1']['长'] = row['长'] package_dict1['包裹1']['宽'] = row['宽'] package_dict1['包裹1']['高'] = row['高'] package_dict1['包裹1']['重量'] = row['重量'] package_dict2['包裹1']['长'] = row['理论长'] package_dict2['包裹1']['宽'] = row['理论宽'] package_dict2['包裹1']['高'] = row['理论高'] package_dict2['包裹1']['重量'] = row['理论重量'] sell_price1, order_price1, order_type1 = call_sell_and_order_price(price, package_dict1) sell_price2, order_price2, order_type2 = call_sell_and_order_price(price, package_dict2) except Exception as e: print(f"SKU: {row['SKU']} 报错: {e}") continue df.loc[index, '实际体积售价'] = sell_price1 df.loc[index, '实际体积订单价'] = order_price1 df.loc[index, '实际体积订单类型'] = order_type1 df.loc[index, '理论体积售价'] = sell_price2 df.loc[index, '理论体积订单价'] = order_price2 df.loc[index, '理论体积订单类型'] = order_type2 print(f"SPU: {row['SPU']}, SKU {row['SKU']} 实际体积售价: {sell_price1}, 理论体积售价: {sell_price2},")
计算SPU下所有SKU的网站售价,实际尺寸售价,预测尺寸售价
In [ ]:
# 取表格数据is_first为1的数据,取spu,长宽高重量系数 import pandas as pd df = pd.read_excel(r'D:\test\logistics\拦截数据\一票一件发货订单.xlsx',sheet_name="单包裹系数计算") # 写一个cal_size算法,输入原属性和新属性,返回涨幅系数 df1=df[['is_first','SPU','SKU','长','宽','高','重量']] base_df = df[df['is_first']==1]
In [ ]:
# 取这些SPU下的所有SKU及其现在售价 from sell.sell_price import call_sell_and_order_price import json from utils.gtools import MySQLconnect import pandas as pd spu_list = ( base_df['SPU'] .apply(pd.to_numeric, errors='coerce') # 转数字,非法的变 NaN .dropna() .astype(int) .astype(str) .tolist() ) placeholders = ','.join(['%s'] * len(spu_list)) # result = [] with MySQLconnect('ods') as db: enginal = db.engine() sql = f"""SELECT 产品品类, 产品分类, SPU, sku.SKU, sku.`成本价`, spi.`包裹数据`, 物流分摊, 产品售价 FROM stg_bayshop_litfad_sku sku LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID` LEFT JOIN ads.new_erp_sku_size spi ON sku.SKU =spi.SKU WHERE spu.SPU IN ({placeholders}) """ result = pd.read_sql(sql, enginal,params= tuple(spu_list)) # 合并df all_df = pd.merge(result,df, on=['SKU'], how='left') all_df
In [ ]:
# 先把ERP包裹数据拆出来 import re import json for index, row in all_df.iterrows(): if not isinstance(row['包裹数据'], str) or not row['包裹数据']: print(f"第{index}行包裹数据为空或非字符串,跳过") continue try: package_dict = json.loads(row['包裹数据']) except json.JSONDecodeError as e: print(f"解析失败:第{index}行,错误信息:{e}") continue try: for package in package_dict.values(): item = {} for key, value in package.items(): try: # 使用正则表达式提取数字部分 number_str = re.findall(r"[-+]?\d*\.\d+|\d+", str(value)) if number_str: item[key] = float(number_str[0]) # 取第一个匹配到的数字并转换为 float else: item[key] = value # 如果没有数字部分,保留原值 except ValueError: item[key] = value # 如果遇到无法转换的值,保留原值 except AttributeError: print(f"解析失败:第{index}行,错误信息:包裹数据为空") continue size = [] size.append(item['长']) size.append(item['宽']) size.append(item['高']) weight = item['重量'] size.sort() length = size[2] width = size[1] height = size[0] all_df.loc[index, 'ERP长'] = length all_df.loc[index, 'ERP宽'] = width all_df.loc[index, 'ERP高'] = height all_df.loc[index, 'ERP重量'] = weight print(f"{row['SKU']}尺寸为:{width},h:{height},d:{length},w:{weight}")
In [ ]:
# 计算每个SPU的长宽高重量系数 def cal_size(old,new): try: old = float(old) new = float(new) if old == 0: return None # 或 return 0,防止除以0 return (new - old) / old except (ValueError, TypeError): return None # 遇到不能转为 float 的就返回 None test_df = all_df[all_df['is_first']==1] # 取基准数据SPU的系数 for index, row in test_df.iterrows(): test_df.loc[index, '长系数'] = cal_size(row['ERP长'],row['长']) test_df.loc[index, '宽系数'] = cal_size(row['ERP宽'],row['宽']) test_df.loc[index, '高系数'] = cal_size(row['ERP高'],row['高']) test_df.loc[index, '重量系数'] = cal_size(row['ERP重量'],row['重量']) print( f"{row['SPU_x']} 的系数为 " f"{test_df.loc[index, '长系数']}, " f"{test_df.loc[index, '宽系数']}, " f"{test_df.loc[index, '高系数']}, " f"{test_df.loc[index, '重量系数']}" ) # 将SPU的基准系数合并至all_df all_df = pd.merge(all_df, test_df[['SPU_x', '长系数', '宽系数', '高系数', '重量系数']], on='SPU_x', how='left')
In [ ]:
# 根据系数 得到所有SKU的预测尺寸 # 根据系数计算每个SKU的理论尺寸 all_df['理论长'] = ((1 + all_df['长系数']) * all_df['ERP长']).round(2) all_df['理论宽'] = ((1 + all_df['宽系数']) * all_df['ERP宽']).round(2) all_df['理论高'] = ((1 + all_df['高系数']) * all_df['ERP高']).round(2) all_df['理论重量'] = ((1 + all_df['重量系数']) * all_df['ERP重量']).round(2) all_df
In [ ]:
# 计算三种尺寸下的售价 # 计算预测后的尺寸下,一票一件订单的售价和订单价格 from sell.sell_price import call_sell_and_order_price for index,row in all_df.iterrows(): price = row['成本价'] package_dict1={} package_dict2={} package_dict3={} try: package_dict1['包裹1'] = {} package_dict2['包裹1'] = {} package_dict3['包裹1'] = {} package_dict1['包裹1']['长'] = row['长'] package_dict1['包裹1']['宽'] = row['宽'] package_dict1['包裹1']['高'] = row['高'] package_dict1['包裹1']['重量'] = row['重量'] package_dict2['包裹1']['长'] = row['理论长'] package_dict2['包裹1']['宽'] = row['理论宽'] package_dict2['包裹1']['高'] = row['理论高'] package_dict2['包裹1']['重量'] = row['理论重量'] package_dict3['包裹1']['长'] = row['ERP长'] package_dict3['包裹1']['宽'] = row['ERP宽'] package_dict3['包裹1']['高'] = row['ERP高'] package_dict3['包裹1']['重量'] = row['ERP重量'] sell_price1, order_price1, order_type1 = call_sell_and_order_price(price, package_dict1) sell_price2, order_price2, order_type2 = call_sell_and_order_price(price, package_dict2) sell_price3, order_price3, order_type3 = call_sell_and_order_price(price, package_dict3) except Exception as e: print(f"SKU: {row['SKU']} 报错: {e}") continue all_df.loc[index, 'ERP售价'] = sell_price3 all_df.loc[index, '实际体积售价'] = sell_price1 all_df.loc[index, '理论体积售价'] = sell_price2 all_df.loc[index, 'ERP订单价'] = order_price3 all_df.loc[index, '实际体积订单价'] = order_price1 all_df.loc[index, '理论体积订单价'] = order_price2 # all_df.loc[index, '理论体积订单类型'] = order_type2 print(f"SPU: {row['SPU_x']}, SKU {row['SKU']} ,ERP售价: {sell_price3}, 实际体积售价: {sell_price1}, 理论体积售价: {sell_price2},") all_df.to_excel('单包裹SKU售价分析.xlsx', index=False)