logistics/算账方案.ipynb

7.0 KiB

In [ ]:
BASE_SQL = """
WITH 
odr_list AS ( 
  SELECT 
    dwd.order_list.order_id,
  dwd.order_list.site_name,
  dwd.order_list.order_date,
  dwd.order_list.delivery_country,
  dwd.order_list.postcode,
  dwd.order_list.currency,
  dwd.order_list.order_price,
  dwd.order_list.order_freight_price,
  dwd.order_list.order_price_dollar,
  dwd.order_list.order_freight_price_dollar,
  dwd.order_list.insurance,
  dwd.order_list.discount_code,
  dwd.order_list.order_cate,
  dwd.order_list.fund_status,
  dwd.order_list.convey 
  FROM 
    dwd.order_list 
  WHERE 
    site_name REGEXP "^litfad|beau|kwowking|lakiq" 
    AND fund_status NOT REGEXP "等待" 
    AND DATE_FORMAT(order_date,"%Y-%m-01") >= "2025-06-01"
),

refund AS (
  SELECT
    ol.order_id,
    SUM(`当次退款USD金额`) AS "总退款金额",
                sum(`当次退款申请比例`) AS "当次退款申请比例",
                `原订单退款比例`,
                MAX(`申请时间`) AS "最后一次申请时间"
  FROM
    dwd.`cpmaso_order_refund` cof
    LEFT JOIN dwd.order_list ol ON ol.order_id = cof.order_id 
  WHERE
    状态 = "退款成功" 
    AND 网站 REGEXP "litfad|lakiq|kwoking|beau" 
    AND ol.order_date >= "2025-01-01" 
    AND EXISTS (
        SELECT 1 FROM odr_list WHERE odr_list.order_id = cof.order_id
    )
  GROUP BY
    ol.order_id
),
-- WITH
sub_order AS (
SELECT DISTINCT
    opl.order_id,

    CASE 
        WHEN (LENGTH(opl.order_product_id) - LENGTH(REPLACE(opl.order_product_id, '_', ''))) = 1 THEN opl.order_product_id
        ELSE SUBSTRING_INDEX(opl.order_product_id, '_', 2)
    END AS 子订单号,

    CASE 
        WHEN (LENGTH(opl.order_product_id) - LENGTH(REPLACE(opl.order_product_id, '_', ''))) = 2 THEN opl.order_product_id
        ELSE NULL
    END AS 子子订单号,

    lor.redo_type AS 补发类型
--     wp.actual_price * wp.buy_num AS 采购金额,
--     buy_audit

FROM  dwd.order_product_list opl 
LEFT JOIN dws.log_order_reissue_detail lord 
    ON lord.order_product_id = opl.order_product_id
LEFT JOIN dwd.log_order_reissue lor 
    ON lord.order_product_id = lor.order_product_id
WHERE DATE_FORMAT(opl.order_date, "%Y-%m-01") >= "2025-06-01"

),
sub_order2 AS (
SELECT
order_id,
COUNT(CASE WHEN 子子订单号 IS NULL THEN 1 ELSE NULL END) AS 子订单数,
COUNT(CASE WHEN 子子订单号 IS NOT NULL THEN 1 ELSE NULL END) AS 子子订单数,
COUNT(CASE WHEN 子子订单号 IS NOT NULL AND 补发类型 REGEXP "配件补发|整件补发" THEN 1 ELSE NULL END) AS 补发订单数,
COUNT(CASE WHEN 子子订单号 IS NOT NULL AND 补发类型 REGEXP "换款"  THEN 1 ELSE NULL END) AS 换款订单数,
COUNT(CASE WHEN 子子订单号 IS NOT NULL AND 补发类型 NOT REGEXP "换款|配件补发|整件补发"  AND 补发类型 IS NOT NULL THEN 1 ELSE NULL END) AS 其他补发数,
COUNT(CASE WHEN 子子订单号 IS NOT NULL AND 补发类型 IS NULL THEN 1 ELSE NULL END) AS 拆分订单数
FROM
sub_order
GROUP BY order_id
),


is_shipping AS (
SELECT
        opl.order_id,
        count(distinct CASE WHEN oe.`关联提单号`<>"--" AND oe.`关联提单号`<>"未关联" AND oe.`包裹状态` not in ( "已作废","已删除")  THEN oe.`包裹号` ELSE null END) AS 已发货包裹数,
                COUNT(DISTINCT CASE WHEN oe.`包裹状态` not in ( "已作废","已删除","--")  THEN oe.`包裹号` ELSE null END)AS 所有包裹数,
                                                                MIN(oe.`客户签收时间`) AS 客户最早签收时间
                FROM order_express oe 
                left join dwd.order_product_list opl on oe.`单号` = opl.order_id 
                WHERE DATE_FORMAT(opl.order_date,"%Y-%m-%d") >'2025-01-01'
                GROUP BY  opl.order_id
                
)

SELECT
  ol.*,
        rf.总退款金额,
        当次退款申请比例,
        原订单退款比例,
        最后一次申请时间,
        so.子订单数,
        so.子子订单数,
        so.补发订单数,
        so.换款订单数,
        so.其他补发数,
        so.拆分订单数,
        CASE when sp.已发货包裹数 =sp.所有包裹数 then "订单已发货"
        WHEN sp.已发货包裹数 < sp.所有包裹数 THEN "部分发货"
        WHEN sp.已发货包裹数 = 0 then"未发货" ELSE "无包裹" END AS "订单发货状态" ,
                                sp.客户最早签收时间
FROM
  odr_list ol
        LEFT JOIN refund rf ON ol.order_id = rf.order_id
        LEFT JOIN sub_order2 so ON ol.order_id = so.order_id
        LEFT JOIN is_shipping sp ON ol.order_id = sp.order_id"""
In [ ]:
#然后提取这些包裹号的提单ID
#分析这些提单ID的总价和平均价格
from utils.gtools import MySQLconnect
import pandas as pd
with MySQLconnect('ods') as db:
    engine = db.engine()
    order_df = pd.read_sql(BASE_SQL,engine)
In [ ]:
#提取这些订单号的包裹号
order_id = order_df['order_id'].tolist()
order_ids = ','.join(f"'{i}'" for i in order_id)
with MySQLconnect('ods') as db:
    conn = db.connect()
    cursor = conn.cursor()
    sql = f"SELECT * FROM order_info WHERE order_id IN ({order_ids})"