#!/usr/bin/env python # -*- coding: utf-8 -*- """ 数据导入脚本 将XLSX文件中的数据导入到数据库 """ import pandas as pd import numpy as np from pathlib import Path from utils.gtools import MySQLconnect from utils.config_manager import config class DataImporter: """数据导入类""" def __init__(self): self.data_dir = Path(__file__).parent.parent / "data" def get_connection(self, dbname: str = None): """获取数据库连接""" return MySQLconnect(dbname) def execute_sql(self, sql: str, dbname: str = None): """执行SQL语句""" with self.get_connection(dbname) as conn: conn.cur.execute(sql) conn.con.commit() def execute_many(self, sql: str, data: list, dbname: str = None): """批量执行SQL""" with self.get_connection(dbname) as conn: conn.cur.executemany(sql, data) conn.con.commit() def truncate_table(self, table_name: str, dbname: str = None): """清空表""" self.execute_sql(f"TRUNCATE TABLE {table_name}", dbname) # ==================== 英国数据导入 ==================== def import_uk_postcode_zone(self): """导入英国邮编分区""" print("导入英国邮编分区...") df = pd.read_excel(self.data_dir / "英国卡派.xlsx", sheet_name="分区") df.columns = ["postcode_prefix", "zone"] data = [] for _, row in df.iterrows(): postcode = str(row["postcode_prefix"]).strip() zone = str(row["zone"]).strip() is_remote = 0 data.append((postcode, zone, is_remote)) sql = "INSERT IGNORE INTO uk_postcode_zone (postcode_prefix, zone, is_remote) VALUES (%s, %s, %s)" self.execute_many(sql, data) print(f" 已导入 {len(data)} 条记录") def import_uk_kp_nv_price(self): """导入英国卡派NV运费""" print("导入英国卡派NV运费...") df = pd.read_excel(self.data_dir / "英国卡派.xlsx", sheet_name="运费") df.columns = ["zone", "tuopan", "fee"] data = [] for _, row in df.iterrows(): zone = str(row["zone"]).strip() tuopan = int(row["tuopan"]) fee = float(row["fee"]) data.append((zone, tuopan, fee)) sql = "INSERT INTO uk_kp_nv_price (zone, tuopan, fee) VALUES (%s, %s, %s)" self.execute_many(sql, data) print(f" 已导入 {len(data)} 条记录") # ==================== 美国数据导入 ==================== def import_us_fedex_pp_price(self): """导入美国Fedex邮差小马价格""" print("导入美国Fedex邮差小马价格...") df = pd.read_excel(self.data_dir / "美国快递.xlsx", sheet_name="邮差小马") # 转换列名 cols = ["lbs"] + [str(c) for c in df.columns[1:]] df.columns = cols data = [] for _, row in df.iterrows(): lbs = int(row["lbs"]) row_data = [lbs] for i in range(2, 9): val = row.get(str(i), 0) row_data.append(float(val) if pd.notna(val) else 0) data.append(tuple(row_data)) sql = """INSERT INTO us_fedex_pp_price (lbs, zone_2, zone_3, zone_4, zone_5, zone_6, zone_7, zone_8) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)""" self.execute_many(sql, data) print(f" 已导入 {len(data)} 条记录") def import_us_fedex_kh_price(self): """导入美国Fedex金宏亚价格""" print("导入美国Fedex金宏亚价格...") df = pd.read_excel(self.data_dir / "美国快递.xlsx", sheet_name="金宏亚") cols = ["lbs"] + [str(c) for c in df.columns[1:]] df.columns = cols data = [] for _, row in df.iterrows(): lbs = int(row["lbs"]) row_data = [lbs] for i in range(2, 9): val = row.get(str(i), 0) row_data.append(float(val) if pd.notna(val) else 0) data.append(tuple(row_data)) sql = """INSERT INTO us_fedex_kh_price (lbs, zone_2, zone_3, zone_4, zone_5, zone_6, zone_7, zone_8) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)""" self.execute_many(sql, data) print(f" 已导入 {len(data)} 条记录") def import_us_fedex_price(self): """导入美国Fedex价格""" print("导入美国Fedex价格...") for sheet_name in ["FEDEX", "FEDEX国内"]: try: df = pd.read_excel(self.data_dir / "美国快递.xlsx", sheet_name=sheet_name) if "lbs." in df.columns: cols = ["lbs"] + [str(c) for c in df.columns[1:8]] df.columns = cols data = [] for _, row in df.iterrows(): lbs = int(row["lbs"]) row_data = [lbs] for i in range(2, 9): val = row.get(str(i), 0) row_data.append(float(val) if pd.notna(val) else 0) data.append(tuple(row_data)) table = "us_fedex_price" if "FEDEX" in sheet_name and "国内" not in sheet_name else "us_fedex_price" sql = f"""INSERT INTO {table} (lbs, zone_2, zone_3, zone_4, zone_5, zone_6, zone_7, zone_8) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)""" self.execute_many(sql, data) print(f" 已导入 {len(data)} 条记录 ({sheet_name})") except Exception as e: print(f" 跳过 {sheet_name}: {e}") def import_us_giga_price(self): """导入美国GIGA价格""" print("导入美国GIGA价格...") df = pd.read_excel(self.data_dir / "GIGA base_fee_20240607223514.xlsx", sheet_name="Local Fee Data") data = [] for _, row in df.iterrows(): zip_code = str(int(row["Zip Code"])) if pd.notna(row["Zip Code"]) else "" delivery_warehouse = str(row["Delivery Warehouse"]) if pd.notna(row["Delivery Warehouse"]) else "" general_area = str(row["General Area"]) if pd.notna(row["General Area"]) else "" fee_type = str(row["Fee Type"]) if pd.notna(row["Fee Type"]) else "" zone = str(row["Zone"]) if pd.notna(row["Zone"]) else "" local_pickup_fee = float(row["Local Pickup Fee"]) if pd.notna(row["Local Pickup Fee"]) else 0 warehouse_handling_fee = float(row["Warehouse Handling Fee"]) if pd.notna(row["Warehouse Handling Fee"]) else 0 delivery_fee_rate = float(row["Delivery Fee Rate"]) if pd.notna(row["Delivery Fee Rate"]) else 0 additional_delivery_fee = float(row["Additional Delivery Fee"]) if pd.notna(row["Additional Delivery Fee"]) else 0 assembly_fee = float(row["Assembly Fee"]) if pd.notna(row["Assembly Fee"]) else 0 data.append((zip_code, delivery_warehouse, general_area, fee_type, zone, local_pickup_fee, warehouse_handling_fee, delivery_fee_rate, additional_delivery_fee, assembly_fee)) sql = """INSERT INTO us_giga_price (zip_code, delivery_warehouse, general_area, fee_type, zone, local_pickup_fee, warehouse_handling_fee, delivery_fee_rate, additional_delivery_fee, assembly_fee) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""" self.execute_many(sql, data) print(f" 已导入 {len(data)} 条记录") def import_us_ceva_price(self): """导入美国CEVA价格""" print("导入美国CEVA价格...") # CEVA base rate df = pd.read_excel(self.data_dir / "CEVA.xlsx", sheet_name="ceva_base_rate") df.columns = ["ceva_weight"] + list(df.columns[1:]) data = [] for _, row in df.iterrows(): ceva_weight = row["ceva_weight"] if pd.isna(ceva_weight): continue row_data = [ceva_weight] for col in df.columns[1:]: val = row[col] row_data.append(float(val) if pd.notna(val) else 0) data.append(tuple(row_data)) sql = """INSERT INTO us_ceva_price (ceva_weight, zone_ca, zone_wa, zone_or, zone_nv, zone_az, zone_co, zone_ut, zone_nm, remote_area_surcharge) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""" self.execute_many(sql, data) print(f" 已导入 {len(data)} 条记录 (ceva_base_rate)") # CEVA remote zone df = pd.read_excel(self.data_dir / "CEVA.xlsx", sheet_name="remote_zone") df.columns = ["postal_code", "state", "beyond_zone", "remote_type"] data = [] for _, row in df.iterrows(): postal_code = str(int(row["postal_code"])) if pd.notna(row["postal_code"]) else "" state = str(row["state"]) if pd.notna(row["state"]) else "" beyond_zone = str(row["beyond_zone"]) if pd.notna(row["beyond_zone"]) else "" remote_type = str(row["remote_type"]) if pd.notna(row["remote_type"]) else "standard" data.append((postal_code, state, beyond_zone, remote_type)) sql = "INSERT IGNORE INTO us_ceva_zone (postal_code, state, beyond_zone, remote_type) VALUES (%s, %s, %s, %s)" self.execute_many(sql, data) print(f" 已导入 {len(data)} 条记录 (remote_zone)") def import_us_metro_price(self): """导入美国Metro价格""" print("导入美国Metro价格...") for sheet_name in ["cuft_25", "cuft_35", "over35_per_cuft", "over35_min"]: try: df = pd.read_excel(self.data_dir / "Metro.xlsx", sheet_name=sheet_name) if "Origins" not in df.columns: continue data = [] for _, row in df.iterrows(): origins = str(row["Origins"]) if pd.notna(row["Origins"]) else "" row_data = [origins] for col in df.columns[1:]: val = row[col] row_data.append(float(val) if pd.notna(val) else 0) data.append(tuple(row_data)) cols = ", ".join([f"zone_{i}l" for i in range(1, 10)]) sql = f"INSERT INTO us_metro_price (origins, {cols}) VALUES (%s, {cols.replace('zone_', 'zone_')})" # 简化处理 sql = "INSERT INTO us_metro_price (origins, zone_1l, zone_2l, zone_3l, zone_4l, zone_5l, zone_6l, zone_7l, zone_8l, zone_9l) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" self.execute_many(sql, data) print(f" 已导入 {len(data)} 条记录 ({sheet_name})") except Exception as e: print(f" 跳过 {sheet_name}: {e}") # Metro zone table try: df = pd.read_excel(self.data_dir / "Metro.xlsx", sheet_name="zone_table") df.columns = ["zip_code", "new_zone_name"] data = [] for _, row in df.iterrows(): zip_code = str(int(row["zip_code"])) if pd.notna(row["zip_code"]) else "" zone = str(row["new_zone_name"]) if pd.notna(row["new_zone_name"]) else "" data.append((zip_code, zone)) sql = "INSERT IGNORE INTO us_metro_zone (zip_code, new_zone_name) VALUES (%s, %s)" self.execute_many(sql, data) print(f" 已导入 {len(data)} 条记录 (zone_table)") except Exception as e: print(f" zone_table: {e}") def import_us_xmiles_zone(self): """导入美国XMILES邮编""" print("导入美国XMILES邮编...") df = pd.read_excel(self.data_dir / "XMILES.xlsx", sheet_name="postcode_table") # 处理可能的列名问题 cols = df.columns.tolist() if len(cols) >= 2: df.columns = ["postcode", "area"] data = [] for _, row in df.iterrows(): postcode = str(int(row["postcode"])) if pd.notna(row["postcode"]) else "" area = str(row["area"]) if pd.notna(row["area"]) else "" data.append((postcode, area)) sql = "INSERT IGNORE INTO us_xmiles_zone (postcode, area) VALUES (%s, %s)" self.execute_many(sql, data) print(f" 已导入 {len(data)} 条记录") def import_us_am_price(self): """导入美国AM卡派价格""" print("导入美国AM卡派价格...") # price表 df = pd.read_excel(self.data_dir / "美国卡派-AM.xlsx", sheet_name="price") df.columns = ["pu_zone", "dl_zone", "zone_combo", "minimum", "maximum", "fee_without_sc", "shipping_cost", "internalid", "externalid", "surcharge"] data = [] for _, row in df.iterrows(): pu_zone = str(row["pu_zone"]) if pd.notna(row["pu_zone"]) else "" dl_zone = str(row["dl_zone"]) if pd.notna(row["dl_zone"]) else "" zone_combo = str(row["zone_combo"]) if pd.notna(row["zone_combo"]) else "" minimum = float(row["minimum"]) if pd.notna(row["minimum"]) else 0 maximum = float(row["maximum"]) if pd.notna(row["maximum"]) else 0 fee_without_sc = float(row["fee_without_sc"]) if pd.notna(row["fee_without_sc"]) else 0 shipping_cost = float(row["shipping_cost"]) if pd.notna(row["shipping_cost"]) else 0 surcharge = float(row["surcharge"]) if pd.notna(row["surcharge"]) else 0 data.append((pu_zone, dl_zone, zone_combo, minimum, maximum, fee_without_sc, shipping_cost, surcharge)) sql = """INSERT INTO us_am_price (pu_zone, dl_zone, zone_combo, minimum_weight, maximum_weight, fee_without_sc, shipping_cost, surcharge) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)""" self.execute_many(sql, data) print(f" 已导入 {len(data)} 条记录 (price)") # postcode表 df = pd.read_excel(self.data_dir / "美国卡派-AM.xlsx", sheet_name="postcode_table") df.columns = ["zip_code", "zone"] data = [] for _, row in df.iterrows(): zip_code = str(int(row["zip_code"])) if pd.notna(row["zip_code"]) else "" zone = str(row["zone"]) if pd.notna(row["zone"]) else "" data.append((zip_code, zone)) sql = "INSERT IGNORE INTO us_am_postcode (zip_code, zone) VALUES (%s, %s)" self.execute_many(sql, data) print(f" 已导入 {len(data)} 条记录 (postcode_table)") # ==================== 澳洲数据导入 ==================== def import_au_eparcel_price(self): """导入澳洲eparcel价格""" print("导入澳洲eparcel价格...") df = pd.read_excel(self.data_dir / "澳洲三大渠道.xlsx", sheet_name="eparcel") cols = ["post"] + [str(c) for c in df.columns[1:]] df.columns = cols data = [] for _, row in df.iterrows(): post = str(row["post"]) if pd.notna(row["post"]) else "" row_data = [post] for col in cols[1:]: val = row.get(col, 0) row_data.append(float(val) if pd.notna(val) else 0) data.append(tuple(row_data)) sql = """INSERT INTO au_eparcel_price (post, weight_0_5, weight_1, weight_2, weight_3, weight_4, weight_5, weight_7, weight_10, weight_15) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""" self.execute_many(sql, data) print(f" 已导入 {len(data)} 条记录") def import_au_all(self): """导入澳洲toll和allied数据""" print("导入澳洲toll和allied数据...") # toll try: df = pd.read_excel(self.data_dir / "澳洲三大渠道.xlsx", sheet_name="toll") df.columns = ["post", "zone_1", "zone_2", "zone_3", "zone_4"] data = [] for _, row in df.iterrows(): post = str(row["post"]) if pd.notna(row["post"]) else "" data.append((post, float(row["zone_1"]) if pd.notna(row["zone_1"]) else 0, float(row["zone_2"]) if pd.notna(row["zone_2"]) else 0, float(row["zone_3"]) if pd.notna(row["zone_3"]) else 0, float(row["zone_4"]) if pd.notna(row["zone_4"]) else 0)) sql = "INSERT INTO au_toll_price (post, zone_1, zone_2, zone_3, zone_4) VALUES (%s, %s, %s, %s, %s)" self.execute_many(sql, data) print(f" 已导入 {len(data)} 条记录 (toll)") except Exception as e: print(f" toll: {e}") # allied try: df = pd.read_excel(self.data_dir / "澳洲三大渠道.xlsx", sheet_name="allied") df.columns = ["post", "zone_1", "zone_2", "zone_3", "zone_4"] data = [] for _, row in df.iterrows(): post = str(row["post"]) if pd.notna(row["post"]) else "" data.append((post, float(row["zone_1"]) if pd.notna(row["zone_1"]) else 0, float(row["zone_2"]) if pd.notna(row["zone_2"]) else 0, float(row["zone_3"]) if pd.notna(row["zone_3"]) else 0, float(row["zone_4"]) if pd.notna(row["zone_4"]) else 0)) sql = "INSERT INTO au_allied_price (post, zone_1, zone_2, zone_3, zone_4) VALUES (%s, %s, %s, %s, %s)" self.execute_many(sql, data) print(f" 已导入 {len(data)} 条记录 (allied)") except Exception as e: print(f" allied: {e}") # ==================== 欧洲数据导入 ==================== def import_eur_dhl_price(self): """导入欧洲DHL价格""" print("导入欧洲DHL价格...") try: df = pd.read_excel(self.data_dir / "欧洲卡派.xlsx", sheet_name="DHL卡派IP报价") df.columns = ["type", "country", "postalcode", "country_postalcode", "ip_1", "ip_2", "ip_3", "ip_4", "ip_5", "ip_6"] data = [] for _, row in df.iterrows(): price_type = str(row["type"]) if pd.notna(row["type"]) else "" country = str(row["country"]) if pd.notna(row["country"]) else "" postalcode = str(row["postalcode"]) if pd.notna(row["postalcode"]) else "" data.append((price_type, country, postalcode, float(row["ip_1"]) if pd.notna(row["ip_1"]) else 0, float(row["ip_2"]) if pd.notna(row["ip_2"]) else 0, float(row["ip_3"]) if pd.notna(row["ip_3"]) else 0, float(row["ip_4"]) if pd.notna(row["ip_4"]) else 0, float(row["ip_5"]) if pd.notna(row["ip_5"]) else 0, float(row["ip_6"]) if pd.notna(row["ip_6"]) else 0)) sql = """INSERT INTO eur_dhl_price (price_type, country, postalcode, ip_1, ip_2, ip_3, ip_4, ip_5, ip_6) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)""" self.execute_many(sql, data) print(f" 已导入 {len(data)} 条记录") except Exception as e: print(f" 欧洲DHL: {e}") # ==================== 主函数 ==================== def import_all(self): """导入所有数据""" print("开始导入数据...") # 英国 self.import_uk_postcode_zone() self.import_uk_kp_nv_price() # 美国 self.import_us_fedex_pp_price() self.import_us_fedex_kh_price() self.import_us_fedex_price() self.import_us_giga_price() self.import_us_ceva_price() self.import_us_metro_price() self.import_us_xmiles_zone() self.import_us_am_price() # 澳洲 self.import_au_eparcel_price() self.import_au_all() # 欧洲 self.import_eur_dhl_price() print("\n数据导入完成!") if __name__ == "__main__": importer = DataImporter() importer.import_all()