4.7 KiB
		
	
	
	
	
	
			
		
		
	
	
			4.7 KiB
		
	
	
	
	
	
In [9]:
import pandas as pd from utils.gtools import MySQLconnect categories = '94 - Office Desks' with MySQLconnect('ods') as db: engine = db.engine() sql = f""" WITH a AS ( SELECT t1.SPU, t2.SKU, t1.产品分类, t1.添加时间, order_date, t2.成本价, opl.product_price_dollar, opl.product_num, TIMESTAMPDIFF( MONTH, t1.添加时间, order_date ) AS month_diff FROM ods.stg_bayshop_litfad_spu t1 LEFT JOIN ods.stg_bayshop_litfad_sku t2 ON t2.产品PID = t1.产品PID LEFT JOIN dws.order_product_list opl ON t2.SKU = opl.SKU WHERE t1.添加时间 BETWEEN '2023-01-01' AND '2025-06-16 23:59:59' AND 产品分类 = '{categories}' AND t2.SKU IS NOT NULL ), b AS ( SELECT SPU, SKU,添加时间,产品分类,成本价, b.erp_package_vol, order_date, IF ( month_diff >= 6, NULL, month_diff ) AS month_diff, ROW_NUMBER() over ( PARTITION BY SKU ORDER BY order_date DESC ) AS ranking FROM a LEFT JOIN dwd.dim_erp_sku_package_vol_info b ON a.SKU = b.erp_sku ) SELECT SPU, SKU,添加时间,产品分类,成本价, b.erp_package_vol FROM b WHERE ranking = 1 AND month_diff IS NULL """ df = pd.read_sql(sql, engine)
得到每个SKU的最长边,围长,总重量,6000抛重,采购体积比(采购/6000抛重)
In [ ]:
import json import re from utils import Package,Package_group from sell.sell_price import call_sell_and_order_price def extract_number(value): # 提取字符串中的第一个数字 match = re.search(r"[-+]?\d*\.\d+|\d+", str(value)) return float(match.group()) if match else 0.0 for index, row in df.iterrows(): package_dict = json.loads(row['erp_package_vol']) max_length = 0 # 最长边 max_girth = 0 # 最大围长 all_weight = 0 # 总重量 all_vol_weight = 0 # 总抛重 for key, package in package_dict.items(): package['长'] = extract_number(package['长']) package['宽'] = extract_number(package['宽']) package['高'] = extract_number(package['高']) package['重量'] = extract_number(package['重量']) size =sorted([package['长'],package['宽'],package['高']]) fst_size = size[0] snd_size = size[1] thd_size = size[2] max_length=max(max_length,fst_size) max_girth=max(max_girth,fst_size+(snd_size+thd_size)*2) all_weight+=package['重量']/1000 all_vol_weight+=package['长']*package['宽']*package['高']/6000 sell_price,order_price,order_type = call_sell_and_order_price(row['成本价'], row['erp_package_vol'],"海运") df.loc[index,'网站售价'] = sell_price[0] df.loc[index,'物流分摊费'] = sell_price[1] df.loc[index,'订单物流费'] = order_price df.loc[index,'尾端类型'] = order_type df.loc[index,'最长边'] = max_length df.loc[index,'最大围长'] = max_girth df.loc[index,'总重量'] = all_weight df.loc[index,'总抛重'] = all_vol_weight # 按照那个分组,按照总抛重,每5总抛重为一组,最长边取大,最短边取小,最大实重取大,最小实重取小,网站售价求和,物流分摊费求和,订单物流费求和,尾端类型不要,