11 KiB
11 KiB
1.查询5.1-5.28测量包裹尺寸的订单数据 2.以美国的售价计算它当前应该有的售价 3.判断
In [ ]:
import pandas as pd from utils.gtools import MySQLconnect # 读取需要计算的包裹信息 with MySQLconnect('ads') as db: sql = r""" # 限制范围是测量时间,取得SKU种类为1且数量为1的订单,且重复SKU只取最近的订单 WITH t1 AS ( SELECT order_id, SKU, order_date, sum(CASE WHEN opl.order_product_id LIKE '%\_%' ESCAPE '\\' AND opl.order_product_id NOT LIKE '%\_%\_%' ESCAPE '\\' THEN product_num END) AS product_num, DATE_FORMAT(order_date,"%Y-%m-%d") AS 订单时间, count(opl.SKU) AS 产品种类 FROM dws.fact_order_product_list opl WHERE NOT EXISTS ( SELECT 1 FROM dws.log_order_reissue_detail AS r WHERE r.order_product_id = opl.order_product_id ) AND order_date >= "20250501" AND order_date < "20250612" AND SKU <> "" GROUP BY order_id ) , t2 AS ( SELECT a.`包裹测量时间`, t1.order_id, t1.SKU, t1.order_date, a.包裹号, a.快递公司, a.运输方式, a.`目的国`, d.postcode, CONCAT( '"', b.package, '": {', '"长": ', length, ', ', '"宽": ', width, ', ', '"高": ', hight, ', ', '"重量": ', weight, '}' ) AS package_json FROM t1 LEFT JOIN order_express a ON t1.order_id = a.单号 JOIN package_vol_info b ON a.`包裹号` = b.package JOIN order_list d ON a.`单号` = d.order_id WHERE a.`包裹状态` IN ( '客户签收', '已经投递') AND b.hight > 0 AND b.length > 0 AND b.width > 0 AND b.hight > 0 AND b.weight > 0 -- AND a.`目的国` = "United States" AND t1.product_num = 1 AND t1.产品种类=1 AND a.`包裹测量时间` >= '2025-05-01' AND a.`包裹测量时间` < '2025-06-12' ), t3 AS ( SELECT t2.*, sku.成本价 AS ERP采购价, ess.erp_package_vol AS ERP包裹数据, CONCAT('{', GROUP_CONCAT(package_json SEPARATOR ','), '}') AS 实际包裹数据, ROW_NUMBER() OVER (PARTITION BY SKU ORDER BY 包裹测量时间 DESC) as rn FROM t2 LEFT JOIN dwd.dim_erp_sku_package_vol_info ess ON t2.SKU=ess.erp_sku LEFT JOIN stg_bayshop_litfad_sku sku ON t2.SKU=sku.SKU WHERE ess.`erp_package_vol`<>"{}" AND ess.`erp_package_vol`<>"" GROUP BY order_id ) SELECT 包裹测量时间, order_id, SKU, DATE_FORMAT(order_date,"%Y-%M-%D") AS 订单时间, 包裹号, `快递公司`, `运输方式`, `目的国`, postcode, ERP采购价, ERP包裹数据, 实际包裹数据 FROM t3 WHERE rn=1 """ df=pd.read_sql("SELECT * FROM `order_complet4` WHERE buy_amount is not null and `实际尺寸售价` IS NULL limit 1000 ",db.con)
In [ ]:
def call_sell_price(price, package_dict,head_type="海运"): import json from sell.sell_price import call_sell_and_order_price try: package_dict = json.loads(package_dict) all_sell_price, order_price, order_type = call_sell_and_order_price(price, package_dict,head_type) except Exception as e: print(f" 报错: {e}") return ("","","") if all_sell_price == 0: return ("","","") sell_price= all_sell_price[0] # logis_price = all_sell_price[1] return (sell_price, order_price, order_type) # 计算当前售价 for index,row in df.iterrows(): price = row['buy_amount'] # package_dict = json.loads(row['erp_package_vol']) sell_price = call_sell_price(price, row['package_json'],"海运") print(sell_price) df.loc[index,'网站售价'] = sell_price[0] df.loc[index,'订单物流费'] = sell_price[1] df.loc[index,'尾端类型'] = sell_price[2] print(f"SKU: {row['sku']} 网站售价: {sell_price[0]} 订单物流费: {sell_price[1]} 尾端类型: {sell_price[2]}") df.to_clipboard(index=False)
新的|计算欧洲各国每种货型占比
In [1]:
import pandas as pd from utils.gtools import MySQLconnect sql=r"""SELECT 包裹号, 单号, 快递公司, 目的国, 快递分区, 投递时间, postcode, length, width, hight, weight FROM order_express oe LEFT JOIN package_vol_info pvi ON oe.`包裹号` = pvi.package left join order_list ON oe.单号 = order_list.order_id WHERE `投递时间` >='2025-05-01' AND `投递时间`<'2025-08-01' AND `包裹状态` NOT REGEXP '已作废' AND length >0 AND width >0 AND hight>0 AND weight>0 # AND 目的国 NOT REGEXP "United States|Australia|United Kingdom|Japan|Canada" and 目的国 REGEXP "Australia" # AND 目的国 <>''""" with MySQLconnect('ods') as db: df = pd.read_sql(sql, db.con)
C:\Users\Admin\AppData\Local\Temp\ipykernel_35372\42756626.py:31: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy. df = pd.read_sql(sql, db.con)
In [ ]:
print(df.columns)
In [6]:
def order_type(group): # 判断组中是否有任何一个包裹满足“卡派”条件 if (group['length'] >= 200).any() or (group['weight'] >= 31500).any(): return '卡派' else: return '快递' # 计算每个单号的类型 type_series = df.groupby('单号').apply(order_type) def order_weight(group): # 计算单号的重量 # 计算6000系数的体积重 bill_weight= 0 for i in range(len(group)): bill_weight += max(group.iloc[i]['length'] * group.iloc[i]['width'] * group.iloc[i]['hight'] / 4000, group.iloc[i]['weight']/1000) return bill_weight weight_series = df.groupby('单号').apply(order_weight) # 将结果映射回原表 # df['类型'] = df['单号'].map(type_series) df['计费重'] = df['单号'].map(weight_series) df.to_clipboard(index= False)
C:\Users\Admin\AppData\Local\Temp\ipykernel_35372\4220111735.py:9: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.
type_series = df.groupby('单号').apply(order_type)
C:\Users\Admin\AppData\Local\Temp\ipykernel_35372\4220111735.py:17: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.
weight_series = df.groupby('单号').apply(order_weight)
In [7]:
from logisticsClass.logisticsTail_AU import * for i, row in df.iterrows(): if "POST" in row['快递公司']: gel = PostLogistics_AU() zone = gel.is_remote(row['postcode']) df.loc[i,'渠道'] = "POST" elif "TOLL" in row['快递公司']: gel = TollLogistics_AU() zone = gel.is_remote(row['postcode']) df.loc[i,'渠道'] = "TOLL" elif "ALL" in row['快递公司']: gel = AllLogistics_AU() zone = gel.is_remote(row['postcode']) df.loc[i,'渠道'] = "ALL" else: zone = "其他渠道" df.loc[i,'分区'] = zone df.to_clipboard(index=False)