119 lines
3.5 KiB
Plaintext
119 lines
3.5 KiB
Plaintext
|
|
{
|
|||
|
|
"cells": [
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": null,
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"# 属性统计\n",
|
|||
|
|
"def parse_attr_set(attr_set_str):\n",
|
|||
|
|
" \"\"\"解析属性集,返回 {属性值: 属性名} 的字典\"\"\"\n",
|
|||
|
|
" mapping = {}\n",
|
|||
|
|
" for item in attr_set_str.split(\";\"):\n",
|
|||
|
|
" item = item.strip()\n",
|
|||
|
|
" if not item:\n",
|
|||
|
|
" continue\n",
|
|||
|
|
" try:\n",
|
|||
|
|
" left, value = item.split(\"~\", 1) # \"231012:大小~23206184:140*80*75\"\n",
|
|||
|
|
" attr_name = left.split(\":\")[1]\n",
|
|||
|
|
" value = value.split(\":\")[1]\n",
|
|||
|
|
" mapping[value] = attr_name\n",
|
|||
|
|
" except Exception:\n",
|
|||
|
|
" continue\n",
|
|||
|
|
" return mapping\n",
|
|||
|
|
"def map_spec_to_attrs(spec, attr_mapping):\n",
|
|||
|
|
" \"\"\"把规格里的值映射为 {属性名: 规格值}\"\"\"\n",
|
|||
|
|
" results = {}\n",
|
|||
|
|
" if not isinstance(attr_mapping, dict): # 如果不是字典,直接返回 None\n",
|
|||
|
|
" return None\n",
|
|||
|
|
"\n",
|
|||
|
|
" for val in str(spec).split(): # spec 也转成 str,避免 NaN\n",
|
|||
|
|
" if val in attr_mapping:\n",
|
|||
|
|
" attr_name = attr_mapping[val]\n",
|
|||
|
|
" results[attr_name] = val\n",
|
|||
|
|
" return results if results else None"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": null,
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"import pandas as pd\n",
|
|||
|
|
"from utils.gtools import MySQLconnect\n",
|
|||
|
|
"\n",
|
|||
|
|
"batch_size = 1000000\n",
|
|||
|
|
"offset = 0\n",
|
|||
|
|
"results = []\n",
|
|||
|
|
"leimu = \"furniture\"\n",
|
|||
|
|
"with MySQLconnect('ods') as ods:\n",
|
|||
|
|
" while True:\n",
|
|||
|
|
" sql = f\"\"\"\n",
|
|||
|
|
" SELECT * FROM erp_{leimu}_sku\n",
|
|||
|
|
" LIMIT {batch_size} OFFSET {offset}\n",
|
|||
|
|
" \"\"\"\n",
|
|||
|
|
" chunk = pd.read_sql(sql, ods.con)\n",
|
|||
|
|
" if chunk.empty:\n",
|
|||
|
|
" break\n",
|
|||
|
|
" chunk[\"规格属性映射\"] = chunk.apply(\n",
|
|||
|
|
" lambda row: map_spec_to_attrs(row[\"规格\"], parse_attr_set(row[\"标准/预设属性集\"])),\n",
|
|||
|
|
" axis=1\n",
|
|||
|
|
" )\n",
|
|||
|
|
" print(f\"处理了 {offset} 到 {offset+batch_size} 条数据\")\n",
|
|||
|
|
" results.append(chunk)\n",
|
|||
|
|
" offset += batch_size\n",
|
|||
|
|
"df = pd.concat(results, ignore_index=True)\n",
|
|||
|
|
"all_attrs = {}\n",
|
|||
|
|
"for mapping in df[\"规格属性映射\"].dropna():\n",
|
|||
|
|
" for attr_name, val in mapping.items():\n",
|
|||
|
|
" if attr_name not in all_attrs: # 只保留一个样本\n",
|
|||
|
|
" all_attrs[attr_name] = val\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(all_attrs)\n",
|
|||
|
|
"\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": null,
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"# 保存\n",
|
|||
|
|
"out_df = pd.DataFrame(list(all_attrs.items()), columns=[\"规格属性映射\", \"属性值\"])\n",
|
|||
|
|
"out_df.to_excel(f\"D:/test/logistics/test_excel/{leimu}-规格属性映射.xlsx\", index=False)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": null,
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": []
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"metadata": {
|
|||
|
|
"kernelspec": {
|
|||
|
|
"display_name": "base",
|
|||
|
|
"language": "python",
|
|||
|
|
"name": "python3"
|
|||
|
|
},
|
|||
|
|
"language_info": {
|
|||
|
|
"codemirror_mode": {
|
|||
|
|
"name": "ipython",
|
|||
|
|
"version": 3
|
|||
|
|
},
|
|||
|
|
"file_extension": ".py",
|
|||
|
|
"mimetype": "text/x-python",
|
|||
|
|
"name": "python",
|
|||
|
|
"nbconvert_exporter": "python",
|
|||
|
|
"pygments_lexer": "ipython3",
|
|||
|
|
"version": "3.11.5"
|
|||
|
|
}
|
|||
|
|
},
|
|||
|
|
"nbformat": 4,
|
|||
|
|
"nbformat_minor": 2
|
|||
|
|
}
|