logistics/scripts/import_data.py

470 lines
20 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
数据导入脚本
将XLSX文件中的数据导入到数据库
"""
import pandas as pd
import numpy as np
from pathlib import Path
from utils.gtools import MySQLconnect
from utils.config_manager import config
class DataImporter:
"""数据导入类"""
def __init__(self):
self.data_dir = Path(__file__).parent.parent / "data"
def get_connection(self, dbname: str = None):
"""获取数据库连接"""
return MySQLconnect(dbname)
def execute_sql(self, sql: str, dbname: str = None):
"""执行SQL语句"""
with self.get_connection(dbname) as conn:
conn.cur.execute(sql)
conn.con.commit()
def execute_many(self, sql: str, data: list, dbname: str = None):
"""批量执行SQL"""
with self.get_connection(dbname) as conn:
conn.cur.executemany(sql, data)
conn.con.commit()
def truncate_table(self, table_name: str, dbname: str = None):
"""清空表"""
self.execute_sql(f"TRUNCATE TABLE {table_name}", dbname)
# ==================== 英国数据导入 ====================
def import_uk_postcode_zone(self):
"""导入英国邮编分区"""
print("导入英国邮编分区...")
df = pd.read_excel(self.data_dir / "英国卡派.xlsx", sheet_name="分区")
df.columns = ["postcode_prefix", "zone"]
data = []
for _, row in df.iterrows():
postcode = str(row["postcode_prefix"]).strip()
zone = str(row["zone"]).strip()
is_remote = 0
data.append((postcode, zone, is_remote))
sql = "INSERT IGNORE INTO uk_postcode_zone (postcode_prefix, zone, is_remote) VALUES (%s, %s, %s)"
self.execute_many(sql, data)
print(f" 已导入 {len(data)} 条记录")
def import_uk_kp_nv_price(self):
"""导入英国卡派NV运费"""
print("导入英国卡派NV运费...")
df = pd.read_excel(self.data_dir / "英国卡派.xlsx", sheet_name="运费")
df.columns = ["zone", "tuopan", "fee"]
data = []
for _, row in df.iterrows():
zone = str(row["zone"]).strip()
tuopan = int(row["tuopan"])
fee = float(row["fee"])
data.append((zone, tuopan, fee))
sql = "INSERT INTO uk_kp_nv_price (zone, tuopan, fee) VALUES (%s, %s, %s)"
self.execute_many(sql, data)
print(f" 已导入 {len(data)} 条记录")
# ==================== 美国数据导入 ====================
def import_us_fedex_pp_price(self):
"""导入美国Fedex邮差小马价格"""
print("导入美国Fedex邮差小马价格...")
df = pd.read_excel(self.data_dir / "美国快递.xlsx", sheet_name="邮差小马")
# 转换列名
cols = ["lbs"] + [str(c) for c in df.columns[1:]]
df.columns = cols
data = []
for _, row in df.iterrows():
lbs = int(row["lbs"])
row_data = [lbs]
for i in range(2, 9):
val = row.get(str(i), 0)
row_data.append(float(val) if pd.notna(val) else 0)
data.append(tuple(row_data))
sql = """INSERT INTO us_fedex_pp_price
(lbs, zone_2, zone_3, zone_4, zone_5, zone_6, zone_7, zone_8)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"""
self.execute_many(sql, data)
print(f" 已导入 {len(data)} 条记录")
def import_us_fedex_kh_price(self):
"""导入美国Fedex金宏亚价格"""
print("导入美国Fedex金宏亚价格...")
df = pd.read_excel(self.data_dir / "美国快递.xlsx", sheet_name="金宏亚")
cols = ["lbs"] + [str(c) for c in df.columns[1:]]
df.columns = cols
data = []
for _, row in df.iterrows():
lbs = int(row["lbs"])
row_data = [lbs]
for i in range(2, 9):
val = row.get(str(i), 0)
row_data.append(float(val) if pd.notna(val) else 0)
data.append(tuple(row_data))
sql = """INSERT INTO us_fedex_kh_price
(lbs, zone_2, zone_3, zone_4, zone_5, zone_6, zone_7, zone_8)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"""
self.execute_many(sql, data)
print(f" 已导入 {len(data)} 条记录")
def import_us_fedex_price(self):
"""导入美国Fedex价格"""
print("导入美国Fedex价格...")
for sheet_name in ["FEDEX", "FEDEX国内"]:
try:
df = pd.read_excel(self.data_dir / "美国快递.xlsx", sheet_name=sheet_name)
if "lbs." in df.columns:
cols = ["lbs"] + [str(c) for c in df.columns[1:8]]
df.columns = cols
data = []
for _, row in df.iterrows():
lbs = int(row["lbs"])
row_data = [lbs]
for i in range(2, 9):
val = row.get(str(i), 0)
row_data.append(float(val) if pd.notna(val) else 0)
data.append(tuple(row_data))
table = "us_fedex_price" if "FEDEX" in sheet_name and "国内" not in sheet_name else "us_fedex_price"
sql = f"""INSERT INTO {table}
(lbs, zone_2, zone_3, zone_4, zone_5, zone_6, zone_7, zone_8)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"""
self.execute_many(sql, data)
print(f" 已导入 {len(data)} 条记录 ({sheet_name})")
except Exception as e:
print(f" 跳过 {sheet_name}: {e}")
def import_us_giga_price(self):
"""导入美国GIGA价格"""
print("导入美国GIGA价格...")
df = pd.read_excel(self.data_dir / "GIGA base_fee_20240607223514.xlsx", sheet_name="Local Fee Data")
data = []
for _, row in df.iterrows():
zip_code = str(int(row["Zip Code"])) if pd.notna(row["Zip Code"]) else ""
delivery_warehouse = str(row["Delivery Warehouse"]) if pd.notna(row["Delivery Warehouse"]) else ""
general_area = str(row["General Area"]) if pd.notna(row["General Area"]) else ""
fee_type = str(row["Fee Type"]) if pd.notna(row["Fee Type"]) else ""
zone = str(row["Zone"]) if pd.notna(row["Zone"]) else ""
local_pickup_fee = float(row["Local Pickup Fee"]) if pd.notna(row["Local Pickup Fee"]) else 0
warehouse_handling_fee = float(row["Warehouse Handling Fee"]) if pd.notna(row["Warehouse Handling Fee"]) else 0
delivery_fee_rate = float(row["Delivery Fee Rate"]) if pd.notna(row["Delivery Fee Rate"]) else 0
additional_delivery_fee = float(row["Additional Delivery Fee"]) if pd.notna(row["Additional Delivery Fee"]) else 0
assembly_fee = float(row["Assembly Fee"]) if pd.notna(row["Assembly Fee"]) else 0
data.append((zip_code, delivery_warehouse, general_area, fee_type, zone,
local_pickup_fee, warehouse_handling_fee, delivery_fee_rate,
additional_delivery_fee, assembly_fee))
sql = """INSERT INTO us_giga_price
(zip_code, delivery_warehouse, general_area, fee_type, zone,
local_pickup_fee, warehouse_handling_fee, delivery_fee_rate,
additional_delivery_fee, assembly_fee)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""
self.execute_many(sql, data)
print(f" 已导入 {len(data)} 条记录")
def import_us_ceva_price(self):
"""导入美国CEVA价格"""
print("导入美国CEVA价格...")
# CEVA base rate
df = pd.read_excel(self.data_dir / "CEVA.xlsx", sheet_name="ceva_base_rate")
df.columns = ["ceva_weight"] + list(df.columns[1:])
data = []
for _, row in df.iterrows():
ceva_weight = row["ceva_weight"]
if pd.isna(ceva_weight):
continue
row_data = [ceva_weight]
for col in df.columns[1:]:
val = row[col]
row_data.append(float(val) if pd.notna(val) else 0)
data.append(tuple(row_data))
sql = """INSERT INTO us_ceva_price
(ceva_weight, zone_ca, zone_wa, zone_or, zone_nv, zone_az,
zone_co, zone_ut, zone_nm, remote_area_surcharge)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""
self.execute_many(sql, data)
print(f" 已导入 {len(data)} 条记录 (ceva_base_rate)")
# CEVA remote zone
df = pd.read_excel(self.data_dir / "CEVA.xlsx", sheet_name="remote_zone")
df.columns = ["postal_code", "state", "beyond_zone", "remote_type"]
data = []
for _, row in df.iterrows():
postal_code = str(int(row["postal_code"])) if pd.notna(row["postal_code"]) else ""
state = str(row["state"]) if pd.notna(row["state"]) else ""
beyond_zone = str(row["beyond_zone"]) if pd.notna(row["beyond_zone"]) else ""
remote_type = str(row["remote_type"]) if pd.notna(row["remote_type"]) else "standard"
data.append((postal_code, state, beyond_zone, remote_type))
sql = "INSERT IGNORE INTO us_ceva_zone (postal_code, state, beyond_zone, remote_type) VALUES (%s, %s, %s, %s)"
self.execute_many(sql, data)
print(f" 已导入 {len(data)} 条记录 (remote_zone)")
def import_us_metro_price(self):
"""导入美国Metro价格"""
print("导入美国Metro价格...")
for sheet_name in ["cuft_25", "cuft_35", "over35_per_cuft", "over35_min"]:
try:
df = pd.read_excel(self.data_dir / "Metro.xlsx", sheet_name=sheet_name)
if "Origins" not in df.columns:
continue
data = []
for _, row in df.iterrows():
origins = str(row["Origins"]) if pd.notna(row["Origins"]) else ""
row_data = [origins]
for col in df.columns[1:]:
val = row[col]
row_data.append(float(val) if pd.notna(val) else 0)
data.append(tuple(row_data))
cols = ", ".join([f"zone_{i}l" for i in range(1, 10)])
sql = f"INSERT INTO us_metro_price (origins, {cols}) VALUES (%s, {cols.replace('zone_', 'zone_')})"
# 简化处理
sql = "INSERT INTO us_metro_price (origins, zone_1l, zone_2l, zone_3l, zone_4l, zone_5l, zone_6l, zone_7l, zone_8l, zone_9l) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
self.execute_many(sql, data)
print(f" 已导入 {len(data)} 条记录 ({sheet_name})")
except Exception as e:
print(f" 跳过 {sheet_name}: {e}")
# Metro zone table
try:
df = pd.read_excel(self.data_dir / "Metro.xlsx", sheet_name="zone_table")
df.columns = ["zip_code", "new_zone_name"]
data = []
for _, row in df.iterrows():
zip_code = str(int(row["zip_code"])) if pd.notna(row["zip_code"]) else ""
zone = str(row["new_zone_name"]) if pd.notna(row["new_zone_name"]) else ""
data.append((zip_code, zone))
sql = "INSERT IGNORE INTO us_metro_zone (zip_code, new_zone_name) VALUES (%s, %s)"
self.execute_many(sql, data)
print(f" 已导入 {len(data)} 条记录 (zone_table)")
except Exception as e:
print(f" zone_table: {e}")
def import_us_xmiles_zone(self):
"""导入美国XMILES邮编"""
print("导入美国XMILES邮编...")
df = pd.read_excel(self.data_dir / "XMILES.xlsx", sheet_name="postcode_table")
# 处理可能的列名问题
cols = df.columns.tolist()
if len(cols) >= 2:
df.columns = ["postcode", "area"]
data = []
for _, row in df.iterrows():
postcode = str(int(row["postcode"])) if pd.notna(row["postcode"]) else ""
area = str(row["area"]) if pd.notna(row["area"]) else ""
data.append((postcode, area))
sql = "INSERT IGNORE INTO us_xmiles_zone (postcode, area) VALUES (%s, %s)"
self.execute_many(sql, data)
print(f" 已导入 {len(data)} 条记录")
def import_us_am_price(self):
"""导入美国AM卡派价格"""
print("导入美国AM卡派价格...")
# price表
df = pd.read_excel(self.data_dir / "美国卡派-AM.xlsx", sheet_name="price")
df.columns = ["pu_zone", "dl_zone", "zone_combo", "minimum", "maximum",
"fee_without_sc", "shipping_cost", "internalid", "externalid", "surcharge"]
data = []
for _, row in df.iterrows():
pu_zone = str(row["pu_zone"]) if pd.notna(row["pu_zone"]) else ""
dl_zone = str(row["dl_zone"]) if pd.notna(row["dl_zone"]) else ""
zone_combo = str(row["zone_combo"]) if pd.notna(row["zone_combo"]) else ""
minimum = float(row["minimum"]) if pd.notna(row["minimum"]) else 0
maximum = float(row["maximum"]) if pd.notna(row["maximum"]) else 0
fee_without_sc = float(row["fee_without_sc"]) if pd.notna(row["fee_without_sc"]) else 0
shipping_cost = float(row["shipping_cost"]) if pd.notna(row["shipping_cost"]) else 0
surcharge = float(row["surcharge"]) if pd.notna(row["surcharge"]) else 0
data.append((pu_zone, dl_zone, zone_combo, minimum, maximum,
fee_without_sc, shipping_cost, surcharge))
sql = """INSERT INTO us_am_price
(pu_zone, dl_zone, zone_combo, minimum_weight, maximum_weight,
fee_without_sc, shipping_cost, surcharge)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"""
self.execute_many(sql, data)
print(f" 已导入 {len(data)} 条记录 (price)")
# postcode表
df = pd.read_excel(self.data_dir / "美国卡派-AM.xlsx", sheet_name="postcode_table")
df.columns = ["zip_code", "zone"]
data = []
for _, row in df.iterrows():
zip_code = str(int(row["zip_code"])) if pd.notna(row["zip_code"]) else ""
zone = str(row["zone"]) if pd.notna(row["zone"]) else ""
data.append((zip_code, zone))
sql = "INSERT IGNORE INTO us_am_postcode (zip_code, zone) VALUES (%s, %s)"
self.execute_many(sql, data)
print(f" 已导入 {len(data)} 条记录 (postcode_table)")
# ==================== 澳洲数据导入 ====================
def import_au_eparcel_price(self):
"""导入澳洲eparcel价格"""
print("导入澳洲eparcel价格...")
df = pd.read_excel(self.data_dir / "澳洲三大渠道.xlsx", sheet_name="eparcel")
cols = ["post"] + [str(c) for c in df.columns[1:]]
df.columns = cols
data = []
for _, row in df.iterrows():
post = str(row["post"]) if pd.notna(row["post"]) else ""
row_data = [post]
for col in cols[1:]:
val = row.get(col, 0)
row_data.append(float(val) if pd.notna(val) else 0)
data.append(tuple(row_data))
sql = """INSERT INTO au_eparcel_price
(post, weight_0_5, weight_1, weight_2, weight_3, weight_4,
weight_5, weight_7, weight_10, weight_15)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""
self.execute_many(sql, data)
print(f" 已导入 {len(data)} 条记录")
def import_au_all(self):
"""导入澳洲toll和allied数据"""
print("导入澳洲toll和allied数据...")
# toll
try:
df = pd.read_excel(self.data_dir / "澳洲三大渠道.xlsx", sheet_name="toll")
df.columns = ["post", "zone_1", "zone_2", "zone_3", "zone_4"]
data = []
for _, row in df.iterrows():
post = str(row["post"]) if pd.notna(row["post"]) else ""
data.append((post,
float(row["zone_1"]) if pd.notna(row["zone_1"]) else 0,
float(row["zone_2"]) if pd.notna(row["zone_2"]) else 0,
float(row["zone_3"]) if pd.notna(row["zone_3"]) else 0,
float(row["zone_4"]) if pd.notna(row["zone_4"]) else 0))
sql = "INSERT INTO au_toll_price (post, zone_1, zone_2, zone_3, zone_4) VALUES (%s, %s, %s, %s, %s)"
self.execute_many(sql, data)
print(f" 已导入 {len(data)} 条记录 (toll)")
except Exception as e:
print(f" toll: {e}")
# allied
try:
df = pd.read_excel(self.data_dir / "澳洲三大渠道.xlsx", sheet_name="allied")
df.columns = ["post", "zone_1", "zone_2", "zone_3", "zone_4"]
data = []
for _, row in df.iterrows():
post = str(row["post"]) if pd.notna(row["post"]) else ""
data.append((post,
float(row["zone_1"]) if pd.notna(row["zone_1"]) else 0,
float(row["zone_2"]) if pd.notna(row["zone_2"]) else 0,
float(row["zone_3"]) if pd.notna(row["zone_3"]) else 0,
float(row["zone_4"]) if pd.notna(row["zone_4"]) else 0))
sql = "INSERT INTO au_allied_price (post, zone_1, zone_2, zone_3, zone_4) VALUES (%s, %s, %s, %s, %s)"
self.execute_many(sql, data)
print(f" 已导入 {len(data)} 条记录 (allied)")
except Exception as e:
print(f" allied: {e}")
# ==================== 欧洲数据导入 ====================
def import_eur_dhl_price(self):
"""导入欧洲DHL价格"""
print("导入欧洲DHL价格...")
try:
df = pd.read_excel(self.data_dir / "欧洲卡派.xlsx", sheet_name="DHL卡派IP报价")
df.columns = ["type", "country", "postalcode", "country_postalcode",
"ip_1", "ip_2", "ip_3", "ip_4", "ip_5", "ip_6"]
data = []
for _, row in df.iterrows():
price_type = str(row["type"]) if pd.notna(row["type"]) else ""
country = str(row["country"]) if pd.notna(row["country"]) else ""
postalcode = str(row["postalcode"]) if pd.notna(row["postalcode"]) else ""
data.append((price_type, country, postalcode,
float(row["ip_1"]) if pd.notna(row["ip_1"]) else 0,
float(row["ip_2"]) if pd.notna(row["ip_2"]) else 0,
float(row["ip_3"]) if pd.notna(row["ip_3"]) else 0,
float(row["ip_4"]) if pd.notna(row["ip_4"]) else 0,
float(row["ip_5"]) if pd.notna(row["ip_5"]) else 0,
float(row["ip_6"]) if pd.notna(row["ip_6"]) else 0))
sql = """INSERT INTO eur_dhl_price
(price_type, country, postalcode, ip_1, ip_2, ip_3, ip_4, ip_5, ip_6)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)"""
self.execute_many(sql, data)
print(f" 已导入 {len(data)} 条记录")
except Exception as e:
print(f" 欧洲DHL: {e}")
# ==================== 主函数 ====================
def import_all(self):
"""导入所有数据"""
print("开始导入数据...")
# 英国
self.import_uk_postcode_zone()
self.import_uk_kp_nv_price()
# 美国
self.import_us_fedex_pp_price()
self.import_us_fedex_kh_price()
self.import_us_fedex_price()
self.import_us_giga_price()
self.import_us_ceva_price()
self.import_us_metro_price()
self.import_us_xmiles_zone()
self.import_us_am_price()
# 澳洲
self.import_au_eparcel_price()
self.import_au_all()
# 欧洲
self.import_eur_dhl_price()
print("\n数据导入完成!")
if __name__ == "__main__":
importer = DataImporter()
importer.import_all()