13 KiB
13 KiB
一丶stock_date表示备货日期
In [ ]:
# 取备货时间date符合方案的SKU """ 1.订单上线时间三个月以上 2.前一个月订单数>=4或前两个月订单数>=5 数据提取方案: 1.设置变量stock_date为备货时间 2.计算出stock_date的两个月前的order_date2和一个月前的order_date1 3.查询order_date1至stock_date的订单数和order_date2至stock_date的订单数 4.筛选出order_date1至stock_date的订单数>=4或order_date2至stock_date的订单数>=5的SKU 5.计算上线时间和date的差值,筛选>3个月天的产品 """ import pandas as pd import numpy as np from datetime import datetime from dateutil.relativedelta import relativedelta stock_date = '2025-11-18' stock_datetime = datetime.strptime(stock_date, '%Y-%m-%d') # 计算一个月前和两个月前 order_date1 = (stock_datetime - relativedelta(months=1)).strftime('%Y-%m-%d') # 一个月前订单时间 order_date2 = (stock_datetime - relativedelta(months=2)).strftime('%Y-%m-%d') # 两个月前订单时间 online_date = (stock_datetime - relativedelta(months=3)).strftime('%Y-%m-%d') # 上线时间 after_date1 = (stock_datetime + relativedelta(months=1)).strftime('%Y-%m-%d') # 一个月后订单时间 after_date2 = (stock_datetime + relativedelta(months=2)).strftime('%Y-%m-%d') # 两个月后订单时间 after_date3 = (stock_datetime + relativedelta(months=3)).strftime('%Y-%m-%d') # 三个月后订单时间 after_date4 = (stock_datetime + relativedelta(months=4)).strftime('%Y-%m-%d') # 四个月后订单时间 print(after_date1, after_date2, after_date3, after_date4, order_date1,order_date2,online_date)
当前板块直接跑是计算美国区的备货情况,如果需要看全球的,请搜索delivery_country并把这行注释掉(在sql里)
In [ ]:
# 读取订单数据 from utils.gtools import MySQLconnect with MySQLconnect('ods') as db: enginal = db.engine() sql = f""" # 取采购价 with pur_cost AS ( SELECT SUBSTRING_INDEX(opl.order_product_id, '_', 2) AS order_product_id, SUM(actual_price) AS 采购成本, SKU, ROW_NUMBER() OVER (PARTITION BY SKU ORDER BY order_date DESC) as rn FROM dws.order_product_list opl LEFT JOIN ods.warehouse_purchasing wp ON opl.order_product_id = wp.order_product_id WHERE NOT EXISTS ( SELECT 1 FROM dws.log_order_reissue_detail AS r WHERE left(r.order_product_id,15) = opl.order_id) AND order_date >= '{order_date1}' AND order_date < '{stock_datetime}' AND opl.fun_audit NOT REGEXP "等待" AND opl.site_name REGEXP "Litfad" AND opl.product_audit REGEXP "采购完成" AND opl.SKU IS NOT NULL and opl.SKU <>0 GROUP BY SUBSTRING_INDEX(opl.order_product_id, '_', 2) ), t1 AS ( SELECT spu.`产品品类`, spu.`产品分类`, pc.采购成本, opl.SKU, COUNT(DISTINCT CASE WHEN DATE_FORMAT(opl.order_date, '%%Y-%%m-%%d') >= '{order_date2}' and DATE_FORMAT(opl.order_date, '%%Y-%%m-%%d') < '{order_date1}' THEN opl.order_id END) AS 前第二月订单数, COUNT(DISTINCT CASE WHEN DATE_FORMAT(opl.order_date, '%%Y-%%m-%%d') >= '{order_date1}' and DATE_FORMAT(opl.order_date, '%%Y-%%m-%%d') < '{stock_datetime}' THEN opl.order_id END) AS 前一月订单数, SUM(CASE WHEN (DATE_FORMAT(opl.order_date, '%%Y-%%m-%%d') >= '{stock_datetime}' AND DATE_FORMAT(opl.order_date, '%%Y-%%m') < '{after_date1}') THEN opl.product_num ELSE 0 END) AS 后第一月产品数, SUM(CASE WHEN (DATE_FORMAT(opl.order_date, '%%Y-%%m') >= '{after_date1}' AND DATE_FORMAT(opl.order_date, '%%Y-%%m-%%d') <'{after_date2}') THEN opl.product_num ELSE 0 END) AS 后第两月产品数, SUM(CASE WHEN (DATE_FORMAT(opl.order_date, '%%Y-%%m') >= '{after_date2}' AND DATE_FORMAT(opl.order_date, '%%Y-%%m') < '{after_date3}') THEN opl.product_num ELSE 0 END) AS 后第三月产品数, SUM(CASE WHEN DATE_FORMAT(opl.order_date, '%%Y-%%m') >= '{after_date3}' THEN opl.product_num ELSE 0 END) AS 后第四月产品数, DATE_FORMAT(sku.添加时间,'%%Y-%%m-%%d') AS 上线时间, DATE_FORMAT(sku.更新时间,'%%Y-%%m-%%d') AS 更新时间 FROM dws.order_product_list opl LEFT JOIN pur_cost pc ON opl.SKU = pc.SKU LEFT JOIN ods.order_list ol ON opl.order_id =ol.order_id LEFT JOIN ods.stg_bayshop_litfad_sku sku ON opl.SKU = sku.SKU LEFT JOIN ods.stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID` WHERE opl.order_date >= '{order_date2}' AND DATE_FORMAT(opl.order_date, '%%Y-%%m') < '{after_date4}' AND DATE_FORMAT(sku.添加时间,'%%Y-%%m-%%d') < '{online_date}' AND opl.SKU IS NOT NULL and opl.SKU <>0 AND opl.order_product_id NOT REGEXP '^[^_]*_[^_]*_[^_]*$' AND site_type REGEXP "独立站" AND ol.fund_status NOT REGEXP "等待" and sku.状态 REGEXP "启用" AND spu.状态 REGEXP "正常" AND 采购成本>0 AND rn = 1 and 产品品类 <> "126 - Outdoor" and 产品品类 <>"57 - Rugs" and 产品分类 <> "151 - Peel & Stick Backsplash Tile" and 产品分类 <> "138 - Bathroom Sinks" and 产品分类 <> "184 - Toddler & Kids Chairs" and opl.delivery_country regexp "United States" GROUP BY opl.SKU ) SELECT 产品品类, 产品分类, SKU, 采购成本, 前第二月订单数+前一月订单数 AS 前两月订单数, 前一月订单数 AS 备货数, 后第一月产品数, 后第两月产品数, 后第三月产品数, 后第四月产品数, (后第一月产品数+后第两月产品数+后第三月产品数+后第四月产品数) AS 后四月总产品数 FROM t1 WHERE 前一月订单数 >=4 OR (前一月订单数>=2 AND (前第二月订单数+前一月订单数)>=5) ORDER BY 前一月订单数 DESC """ df = pd.read_sql(sql, enginal)
In [ ]:
# 取上诉SKU的体积 import pandas as pd from utils.gtools import MySQLconnect sku_list = ( df['SKU'] .apply(pd.to_numeric, errors='coerce') .dropna() .astype(int) .astype(str) .drop_duplicates() # 加这一行 .tolist() ) # 读取需要计算的包裹信息 with MySQLconnect('ods') as db: enginal = db.engine() quoted_spus = ','.join([f"'{sku}'" for sku in sku_list]) sql = f""" WITH t1 AS ( SELECT opl.order_id, SKU, ol.order_price_dollar AS 订单售价, sum(CASE WHEN opl.order_product_id REGEXP "[0-9]{{15}}_[0-9]*$" THEN product_num END) AS product_num, count(DISTINCT opl.SKU) AS 产品种类 FROM dws.order_product_list opl left join ods.order_list ol ON opl.order_id = ol.order_id WHERE NOT EXISTS ( SELECT 1 FROM dws.log_order_reissue_detail AS r WHERE left(r.order_product_id,15) = opl.order_id ) and opl.delivery_country regexp "United States" AND SKU in ({quoted_spus}) AND SKU <> "" AND SKU IS NOT NULL AND SKU <> 0 GROUP BY order_id ), t2 AS ( SELECT t1.SKU, 订单售价, round(SUM(b.weight/1000 ),0)AS 重量, ROUND(SUM(b.length*b.width*b.hight/1000000),2) AS 体积, COUNT(package) AS 包裹数 FROM t1 LEFT JOIN order_express a ON t1.order_id = a.单号 JOIN package_vol_info b ON a.`包裹号` = b.package WHERE a.`包裹状态` = '客户签收' AND b.hight > 0 AND b.length > 0 AND b.width > 0 AND b.hight > 0 AND b.weight > 0 AND t1.product_num = 1 AND t1.产品种类=1 GROUP BY order_id ) SELECT SKU, AVG(订单售价) AS 订单平均售价, AVG(重量) AS 平均重量, AVG(体积) AS 平均体积 FROM t2 GROUP BY SKU """ package_df = pd.read_sql(sql, enginal)
In [ ]:
all_df = pd.merge(df, package_df, on='SKU', how='left') all_df
In [ ]:
# 处理每个条目的不同备货要求 # 66 - Furniture fir_df = all_df[ ((all_df['产品品类'] == '66 - Furniture') & (all_df['前两月订单数'] >5))| ((all_df['产品品类'] == '1 - Lighting' )& (all_df['前两月订单数'] >5))] fir_df
In [ ]:
len(fir_df)
In [ ]:
fir_df.to_clipboard(index = False)