{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "from utils.gtools import MySQLconnect\n", "import pandas as pd\n", "import numpy as np\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import tqdm\n", "from utils.gtools import MySQLconnect\n", "import pandas as pd\n", "import numpy as np\n", "import pickle\n", "import os\n", "# 读取csv文件\n", "csv_file = r\"F:\\DOCUMENTS\\WXWork\\1688854527635889\\Cache\\File\\2025-03\\Litfad产品点击数据 (26).csv\"\n", "print(\"读取csv文件...\")\n", "df1 = pd.read_csv(csv_file, skiprows=2)\n", "max_len = len(df1)\n", "batch_size = 500000\n", "df1 = df1.rename(columns={\"产品 ID\": \"psid\"})\n", "print(\"csv文件读取完毕\")\n", "for j in range(0, (max_len // batch_size) + 1):\n", " print(f\"正在处理第{j}批数据...\")\n", " start_idx = j * batch_size\n", " end_idx = start_idx + batch_size\n", " \n", " batch_df = df1.iloc[start_idx:end_idx] # 获取每一批数据\n", "\n", " with MySQLconnect(\"ods\") as db:\n", " conn = db.connect()\n", " cursor = conn.cursor()\n", " if not os.path.exists(f\"spu_data{j}.csv\"):\n", " for i,row in batch_df.iterrows():\n", " psid = row[\"psid\"].split(\"sku\")[1] if \"sku\" in row[\"psid\"] else row[\"psid\"] # 产品PSID\n", " show_count = row[\"展示次数\"]\n", " click_count = row[\"点击次数\"]\n", " sql = rf\"\"\"SELECT SPU,DATE_FORMAT(spu.添加时间, '%Y-%m') AS 年月\n", " FROM stg_bayshop_litfad_sku sku\n", " left JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` =spu.`产品PID`\n", " WHERE `产品PSID` = '{psid}' \n", " \"\"\"\n", " cursor.execute(sql)\n", " result = cursor.fetchone()\n", " if result:\n", " year_month = result[0]\n", " spu = result[1]\n", " else:\n", " continue\n", " # df1新增一列保存这个年月\n", " batch_df.loc[i,\"年月\"] = year_month\n", " batch_df.loc[i,\"SPU\"] = spu\n", " batch_df.to_csv(f\"spu_data{j}.csv\", index=False, encoding=\"utf-8-sig\")\n", " else:\n", " batch_df = pd.read_csv(f\"spu_data{j}.csv\")\n", " batch_df[\"展示次数\"] = pd.to_numeric(batch_df[\"展示次数\"], errors='coerce')\n", " batch_df[\"点击次数\"] = pd.to_numeric(batch_df[\"点击次数\"], errors='coerce')\n", " result1 = batch_df.groupby(\"年月\").agg(\n", " 年月 = (\"年月\", \"first\"),\n", " SPU = (\"SPU\", \"first\"),\n", " 展示总次数 = (\"展示次数\", \"sum\"),\n", " 点击总次数 = (\"点击次数\", \"sum\"))\n", "\n", "\n", " result = result1.groupby(\"SPU\").agg(\n", " 展示大于0点击为0数量=(\"年月\", lambda x: ((result1.loc[x.index, \"展示总次数\"] > 0) & (result1.loc[x.index, \"点击总次数\"] == 0)).sum()),\n", " 点击大于0数量=(\"年月\", lambda x: (result1.loc[x.index, \"点击总次数\"] > 0).sum()),\n", " 展示为0数量=(\"年月\", lambda x: (result1.loc[x.index, \"展示总次数\"] == 0).sum())\n", " ).reset_index()\n", "\n", "\n", " result.to_csv(f\"spu_statistics{j}.csv\", index=False, encoding=\"utf-8-sig\")\n", "\n", "# 最后读取所有批次的统计数据并合并(相加)\n", "all_result = pd.DataFrame()\n", "for j in range(0, (max_len // batch_size) + 1):\n", " result = pd.read_csv(f\"spu_statistics{j}.csv\")\n", " # 以年月为组,其他数据各批次相加\n", " if all_result.empty:\n", " all_result = result\n", " else:\n", " all_result = pd.merge(all_result, result, on=\"SPU\", how=\"outer\", suffixes=('', '_new'))\n", " all_result = all_result.groupby(\"SPU\").sum().reset_index()\n", "all_result.to_csv(\"all_spu_statistics.csv\", index=False, encoding=\"utf-8-sig\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from utils.gtools import MySQLconnect\n", "import pandas as pd\n", "df=pd.read_excel(r\"F:\\DOCUMENTS\\WXWork\\1688854527635889\\Cache\\File\\2025-03\\导出订单维护任务数据2025-3-20.xlsx\",sheet_name=\"导出订单维护任务数据2025-3-20\")\n", "df['订单数量'] = df['订单数量'].astype(str)\n", "df = df[df['订单数量'].str.len() > 0]\n", "df['订单数量'] = df['订单数量'].astype(float)\n", "df = df[df['订单数量']>0]\n", "df=df[['erp sku','订单号']]\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with MySQLconnect(\"ods\") as db:\n", " engine = db.engine()\n", " conn = db.connect()\n", " cursor = conn.cursor()\n", " df_group = df.groupby('erp sku')\n", " for sku, group in df_group:\n", " for index,row in group.iterrows():\n", " order_id_list = group[\"订单号\"].tolist()\n", " param = \",\".join(f\"'{order_id}'\" for order_id in order_id_list)\n", " sql = f\"\"\"\n", " SELECT\n", " SKU,\n", " sum(as_value_amount) AS 销售额,\n", " sum(product_num) AS 产品数量,\n", " count(order_id) AS 订单数量,\n", " date_format(MAX(order_date), '%%Y-%%m-%%d') AS 最后一次订单日期\n", " FROM\n", " dws.fact_order_product_list\n", " WHERE\n", " SKU = {sku}\n", " AND order_id not IN ({param})\n", " group by SKU\n", " \"\"\"\n", " amount_df = pd.read_sql(sql, con=engine)\n", " if amount_df.empty:\n", " amount = 0\n", " product_num = 0\n", " order_num = 0\n", " last_order_date = None\n", " else:\n", " amount = amount_df[\"销售额\"].values[0]\n", " product_num = amount_df[\"产品数量\"].values[0]\n", " order_num = amount_df[\"订单数量\"].values[0]\n", " last_order_date = amount_df[\"最后一次订单日期\"].values[0]\n", " df.loc[df[\"erp sku\"] == sku, \"不含此次总销售额\"] = amount\n", " df.loc[df[\"erp sku\"] == sku, \"产品数量\"] = product_num\n", " df.loc[df[\"erp sku\"] == sku, \"订单数量\"] = order_num\n", " df.loc[df[\"erp sku\"] == sku, \"最后一次订单日期\"] = last_order_date\n", " print(f\"{sku}的销售额为{amount}, 产品数量为{product_num}, 订单数量为{order_num}, 最后一次订单日期为{last_order_date}\")\n", "\n", "df = df.drop_duplicates(subset=[\"erp sku\", \"订单号\"], keep=\"last\")\n", "df.to_clipboard(index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from utils.countryOperator import OperateCountry\n", "from utils.logisticsBill import BillFactory\n", "from utils.Package import Package, Package_group\n", "import pandas as pd\n", "from utils.gtools import MySQLconnect\n", "# 美国 \n", "from utils.logisticsBill import Billing\n", "ods = MySQLconnect('ods')\n", "cursor = ods.connect().cursor()\n", "df = pd.read_excel(r'F:\\DOCUMENTS\\WXWork\\1688854527635889\\Cache\\File\\2025-04\\Litfad 4-10-25 with data analysis-2(1).xlsx',\n", " sheet_name='NJ-96004848')\n", "df_grouped= df.groupby('TRACKING#')\n", "calculated_results = []\n", "package_list = \",\".join(str(item) for item in df['ORDER#'].tolist())\n", "print(package_list)\n", "query = f\"SELECT package,length,width,hight,weight FROM `package_vol_info` WHERE `package` IN ({package_list})\"\n", "result = cursor.execute(query)\n", "packages_info = cursor.fetchall()\n", "print(packages_info)\n", "# 将df和result合并\n", "new_df = pd.DataFrame(packages_info, columns=['package', 'length', 'width', 'hight', 'weight'])\n", "df = df.merge(new_df, left_on='ORDER#', right_on='package')\n", "df = df.drop(columns='package')\n", "\n", "\n", "for order_num, group in df_grouped:\n", " # opCountry = OperateCountry(group['目的国'].iloc[0])\n", " opCountry = OperateCountry('US')\n", " postcode = group['ZIPCODE'].iloc[0]\n", " packages= Package_group()\n", " packages_dict = {}\n", " volume_weight = 0\n", " weight = 0\n", " for index,row in group.iterrows():\n", " length = float(row['L*H*W(CM)'].split('*')[0])\n", " width = float(row['L*H*W(CM)'].split('*')[1])\n", " hight = float(row['L*H*W(CM)'].split('*')[2])\n", " weight = row['WEIGHT']\n", " package = Package(row['ORDER#'],length,width,hight,weight)\n", " packages.add_package(package)\n", " # packages_dict[row['包裹号']] = {\n", " # \"长\": row['长'],\n", " # \"宽\": row['宽'],\n", " # \"高\": row['高'],\n", " # \"重量\": row['重量']\n", " # }\n", " # weight += row['重量']/1000\n", " # volume_weight += package.get_volume_weight(6000)\n", " # postcode = row['postcode']\n", " # head_type = 1 if row['运输方式'] == '海运' else 0\n", " try:\n", " bill1 = Billing(str(index),opCountry,packages,postcode,company_name='大健-Metro',head_type=1,beizhu='1')\n", " tail_price1 = bill1.tail_amount[0]\n", " except:\n", " tail_price1 = '出错'\n", " try:\n", " bill2 = Billing(str(index),opCountry,packages,postcode,company_name='大健-CEVA',head_type=1,beizhu='1')\n", " tail_price2 = bill2.tail_amount[0]\n", " except:\n", " tail_price2 = '出错'\n", " try:\n", " bill3 = Billing(str(index),opCountry,packages,postcode,company_name='大健-GIGA',head_type=1,beizhu='1')\n", " tail_price3 = bill3.tail_amount[0]\n", " except:\n", " tail_price3 = '出错'\n", " \n", " result = {\n", " 'TRACKING#': order_num,\n", " '大健-MATRO': tail_price1,# Same for all rows in the group\n", " '大健-CEVA':tail_price2,\n", " '大健-GIGA':tail_price3,\n", " }\n", " print(result)\n", " calculated_results.append(result)\n", " # print(packages_dict)\n", "calculated_df = pd.DataFrame(calculated_results)\n", "# 将calculated_df的订单号改为order_id\n", "# calculated_df.rename(columns={'订单号':'order_id'},inplace=True)\n", "calculated_df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 取表格数据is_first为1的数据,取spu,长宽高重量系数\n", "import pandas as pd\n", "df = pd.read_excel(r'D:\\test\\logistics\\拦截数据\\一票一件发货订单.xlsx',sheet_name=\"单包裹系数计算\")\n", "# 写一个cal_size算法,输入原属性和新属性,返回涨幅系数\n", "df=df[['is_first','SPU','SKU','长','宽','高','重量']]\n", "base_df = df[df['is_first']==1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "# 取这些SPU下的所有SKU及其现在售价\n", "from sell.sell_price import call_sell_and_order_price\n", "import json\n", "from utils.gtools import MySQLconnect\n", "import pandas as pd\n", "spu_list = (\n", " base_df['SPU']\n", " .apply(pd.to_numeric, errors='coerce') # 转数字,非法的变 NaN\n", " .dropna()\n", " .astype(int)\n", " .astype(str)\n", " .tolist()\n", ")\n", "placeholders = ','.join(['%s'] * len(spu_list)) \n", "# result = []\n", "with MySQLconnect('ods') as db:\n", " enginal = db.engine()\n", " sql = f\"\"\"SELECT\n", " 产品品类,\n", " 产品分类,\n", " SPU,\n", " sku.SKU,\n", " sku.`成本价`,\n", " spi.`包裹数据`,\n", " 物流分摊,\n", " 产品售价\n", " FROM\n", " stg_bayshop_litfad_sku sku\n", " LEFT JOIN stg_bayshop_litfad_spu spu ON sku.`产品PID` = spu.`产品PID` \n", " LEFT JOIN ads.new_erp_sku_size spi ON sku.SKU =spi.SKU\n", " WHERE\n", " spu.SPU IN ({placeholders}) \n", " \"\"\"\n", " result = pd.read_sql(sql, enginal,params= tuple(spu_list))\n", "# 合并df\n", "all_df = pd.merge(result,df, on=['SPU','SKU'], how='left')\n", "all_df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from utils.gtools import MySQLconnect\n", "\n", "# 读取需要计算的包裹信息\n", "with MySQLconnect('ods') as db:\n", " sql = r\"\"\" \n", "SELECT\n", "DATE_FORMAT(order_date,'%Y-%m') AS 月份,\n", "order_id,\n", "order_date,\n", "opl.SKU,\n", "CASE WHEN opl.order_product_id LIKE '%\\_%' ESCAPE '\\\\' \n", " AND opl.order_product_id NOT LIKE '%\\_%\\_%' ESCAPE '\\\\' THEN product_num END AS product_num,\n", "DATE_FORMAT(order_date,\"%Y-%m-%d\") AS 订单时间,\n", "\n", "sku.`产品售价`*product_num AS erp售价,\n", "sku.成本价*product_num AS erp采购价,\n", "spvi.erp_package_vol\n", "\n", "FROM\n", "dws.fact_order_product_list opl\n", "LEFT JOIN stg_bayshop_litfad_sku sku ON opl.SKU = sku.SKU\n", "LEFT JOIN dwd.dim_erp_sku_package_vol_info spvi ON opl.SKU = spvi.erp_sku\n", "WHERE\n", "order_date >= \"20240601\"\n", "AND order_date < \"20250701\"\n", "AND opl.SKU <> \"\"\n", "AND opl.order_product_id NOT LIKE '%\\_%\\_%' ESCAPE '\\\\'\n", "GROUP BY order_id,SKU\n", " \"\"\"\n", " df=pd.read_sql(sql,db.con)\n", " # print(df)\n", " # df.to_clipboard(index=False)\n", "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import re\n", "import json\n", "import pandas as pd\n", "\n", "def extract_number(value):\n", " match = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", str(value))\n", " return float(match.group()) if match else 0.0\n", "\n", "def calc_volume(row):\n", " try:\n", " package_json = str(row['erp_package_vol'])\n", " if not package_json.startswith('{'):\n", " return 0.0\n", " package_dict = json.loads(package_json)\n", " total_volume = 0.0\n", " # product_num是seri\n", " try:\n", " num = int(row['product_num'])\n", " except:\n", " num = 0\n", " for package in package_dict.values():\n", " l = extract_number(package.get('长', 0))\n", " w = extract_number(package.get('宽', 0))\n", " h = extract_number(package.get('高', 0))\n", " total_volume += l * w * h * num\n", " return total_volume\n", " except Exception as e:\n", " print(f\"错行:{row}\\n错误:{e}\")\n", " return 0.0\n", "\n", "# 计算体积\n", "df['体积'] = df.apply(calc_volume, axis=1)\n", "\n", "# order_id聚合,除了 月份,order_id,order_date,保留第一行,其他售价,采购价,体积求和\n", "df_agg = df.groupby(['order_id']).agg({\n", " '月份': 'first', \n", " 'order_id': 'first', \n", " 'order_date': 'first', \n", " '订单时间': 'first', \n", " 'erp售价': 'sum', \n", " 'erp采购价': 'sum', \n", " '体积': 'sum'\n", " })\n", "\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df.to_clipboard(index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 先把ERP包裹数据拆出来\n", "import re\n", "import json\n", "for index, row in all_df.iterrows():\n", " if not isinstance(row['包裹数据'], str) or not row['包裹数据']:\n", " print(f\"第{index}行包裹数据为空或非字符串,跳过\")\n", " continue\n", " try:\n", " package_dict = json.loads(row['包裹数据'])\n", " except json.JSONDecodeError as e:\n", " print(f\"解析失败:第{index}行,错误信息:{e}\")\n", " continue\n", " try:\n", " for package in package_dict.values():\n", " item = {}\n", " for key, value in package.items():\n", " try:\n", " # 使用正则表达式提取数字部分\n", " number_str = re.findall(r\"[-+]?\\d*\\.\\d+|\\d+\", str(value))\n", " if number_str:\n", " item[key] = float(number_str[0]) # 取第一个匹配到的数字并转换为 float\n", " else:\n", " item[key] = value # 如果没有数字部分,保留原值\n", " except ValueError:\n", " item[key] = value # 如果遇到无法转换的值,保留原值\n", " except AttributeError:\n", " print(f\"解析失败:第{index}行,错误信息:包裹数据为空\")\n", " continue\n", " size = []\n", " size.append(item['长'])\n", " size.append(item['宽'])\n", " size.append(item['高'])\n", " weight = item['重量']\n", " size.sort()\n", " length = size[2]\n", " width = size[1]\n", " height = size[0]\n", " all_df.loc[index, 'ERP长'] = length\n", " all_df.loc[index, 'ERP宽'] = width\n", " all_df.loc[index, 'ERP高'] = height\n", " all_df.loc[index, 'ERP重量'] = weight\n", " print(f\"{row['SKU']}尺寸为:{width},h:{height},d:{length},w:{weight}\")\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 计算每个SPU的长宽高重量系数\n", "def cal_size(old,new):\n", " try:\n", " old = float(old)\n", " new = float(new)\n", " if old == 0:\n", " return None # 或 return 0,防止除以0\n", " return (new - old) / old\n", " except (ValueError, TypeError):\n", " return None # 遇到不能转为 float 的就返回 None\n", " \n", "test_df = all_df[all_df['is_first']==1]\n", "# 取基准数据SPU的系数\n", "for index, row in test_df.iterrows():\n", " test_df.loc[index, '长系数'] = cal_size(row['ERP长'],row['长'])\n", " test_df.loc[index, '宽系数'] = cal_size(row['ERP宽'],row['宽'])\n", " test_df.loc[index, '高系数'] = cal_size(row['ERP高'],row['高'])\n", " test_df.loc[index, '重量系数'] = cal_size(row['ERP重量'],row['重量'])\n", " print(\n", " f\"{row['SPU']} 的系数为 \"\n", " f\"{test_df.loc[index, '长系数']}, \"\n", " f\"{test_df.loc[index, '宽系数']}, \"\n", " f\"{test_df.loc[index, '高系数']}, \"\n", " f\"{test_df.loc[index, '重量系数']}\"\n", " )\n", "# 将SPU的基准系数合并至all_df\n", "all_df = pd.merge(all_df, test_df[['SPU', '长系数', '宽系数', '高系数', '重量系数']], on='SPU', how='left')\n", "all_df.to_excel('单包裹SKU售价分析.xlsx', index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "all_df = pd.read_excel('单包裹SKU售价分析.xlsx')\n", "\n", "# 根据系数 得到所有SKU的预测尺寸\n", "# 根据系数计算每个SKU的理论尺寸\n", "all_df['理论长'] = ((1 + all_df['长系数']) * all_df['ERP长']).round(2)\n", "all_df['理论宽'] = ((1 + all_df['宽系数']) * all_df['ERP宽']).round(2)\n", "all_df['理论高'] = ((1 + all_df['高系数']) * all_df['ERP高']).round(2)\n", "all_df['理论重量'] = ((1 + all_df['重量系数']) * all_df['ERP重量']).round(2)\n", "\n", "# 计算三种尺寸下的售价\n", "# 计算预测后的尺寸下,一票一件订单的售价和订单价格\n", "from sell.sell_price import call_sell_and_order_price\n", "for index,row in all_df.iterrows():\n", " price = row['成本价']\n", " package_dict1={}\n", " package_dict2={}\n", " package_dict3={}\n", " try:\n", " package_dict1['包裹1'] = {}\n", " package_dict2['包裹1'] = {}\n", " package_dict3['包裹1'] = {}\n", " package_dict1['包裹1']['长'] = row['长']\n", " package_dict1['包裹1']['宽'] = row['宽']\n", " package_dict1['包裹1']['高'] = row['高']\n", " package_dict1['包裹1']['重量'] = row['重量']\n", " package_dict2['包裹1']['长'] = row['理论长']\n", " package_dict2['包裹1']['宽'] = row['理论宽'] \n", " package_dict2['包裹1']['高'] = row['理论高']\n", " package_dict2['包裹1']['重量'] = row['理论重量']\n", " package_dict3['包裹1']['长'] = row['ERP长']\n", " package_dict3['包裹1']['宽'] = row['ERP宽'] \n", " package_dict3['包裹1']['高'] = row['ERP高']\n", " package_dict3['包裹1']['重量'] = row['ERP重量']\n", " sell_price1, order_price1, order_type1 = call_sell_and_order_price(price, package_dict1)\n", " sell_price2, order_price2, order_type2 = call_sell_and_order_price(price, package_dict2)\n", " sell_price3, order_price3, order_type3 = call_sell_and_order_price(price, package_dict3)\n", " except Exception as e:\n", " print(f\"SKU: {row['SKU']} 报错: {e}\")\n", " continue\n", " all_df.loc[index, 'ERP售价'] = sell_price3\n", " all_df.loc[index, '实际体积售价'] = sell_price1\n", " all_df.loc[index, '理论体积售价'] = sell_price2\n", "\n", " all_df.loc[index, 'ERP订单价'] = order_price3\n", " all_df.loc[index, '实际体积订单价'] = order_price1\n", " all_df.loc[index, '理论体积订单价'] = order_price2\n", " # all_df.loc[index, '理论体积订单类型'] = order_type2\n", " print(f\"SPU: {row['SPU']}, SKU {row['SKU']} ,网站售价: {row['产品售价']}, ERP售价: {sell_price3}, 实际体积售价: {sell_price1}, 理论体积售价: {sell_price2},\")\n", "all_df.to_excel('单包裹SKU售价分析.xlsx', index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "all_df = pd.read_excel('单包裹SKU售价分析1.xlsx',sheet_name=\"Sheet1\")\n", "all_df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "all_df = all_df[(all_df['是否有过修改记录']==\"否\")&(all_df['使用尺寸售价']!=\"ERP售价\")]\n", "all_df['SPU最大涨幅']=all_df.groupby('SPU')['售价涨跌幅'].transform(max)\n", "all_df['SPU最小涨幅']=all_df.groupby('SPU')['售价涨跌幅'].transform(min)\n", "\n", "filtered_df = all_df[(all_df['SPU最大涨幅'] <= 0.5) & (all_df['SPU最小涨幅'] >= -0.5)]\n", "for index,row in filtered_df.iterrows():\n", " if row['使用尺寸售价']==\"实际体积售价\":\n", " length = str(row['长'])\n", " width = str(row['宽'])\n", " height = str(row['高'])\n", " weight = str(row['重量'])\n", " else:\n", " length = str(row['理论长'])\n", " width = str(row['理论宽'])\n", " height = str(row['理论高'])\n", " weight = str(row['理论重量'])\n", " filtered_df.loc[index, '尺寸重量'] = f\"{weight}|{length}*{width}*{height}*1,\"\n", "\n", " print(index)\n", "spu_list = filtered_df['SPU'].unique()\n", "filtered_df = filtered_df[['SKU','成本价','尺寸重量']]\n", "filtered_df " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "filtered_df.to_excel(\"product_property_data.xlsx\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "df=pd.read_clipboard()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "D, E, F = [], [], []\n", "temp_sum = 0\n", "\n", "for i in range(len(df)):\n", " temp_sum += df.loc[i, '未售出']\n", " if df.loc[i, '已售出'] != 0:\n", " D.append(df.loc[i, '价格'])\n", " E.append(temp_sum)\n", " F.append(df.loc[i, '已售出'])\n", " temp_sum = 0 # 重置\n", "\n", "# 结果\n", "result = pd.DataFrame({'D': D, 'E': E, 'F': F})\n", "result.to_clipboard(index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def uk_package(packages_real):\n", " try:\n", " packages = json.loads(packages_real)\n", " p_num = len(packages)\n", " volume_weight_total = 0\n", " for key,package in packages.items():\n", " side_list = [float(i) for i in [package[\"长\"], package[\"宽\"], package[\"高\"]]]\n", " side_list.sort(reverse=True)\n", " volume_weight = side_list[0] * side_list[1] * side_list[2]/6000\n", " volume_weight_total += volume_weight\n", " express_type = \"快递\"\n", " girth = side_list[0] + side_list[1] * 2 + side_list[2] * 2\n", "\n", "\n", " if side_list[0] > 300 or side_list[1] > 180 or int(package[\"重量\"]) >= 150000 or volume_weight >95000 :\n", " express_type = \"卡派\"\n", " break\n", " if side_list[0] > 175 or girth > 339 or int(package[\"重量\"]) >= 40000 :\n", " express_type = \"大件\" if express_type != \"卡派\" else \"卡派\"\n", " elif side_list[0] > 100 or side_list[0] > 60 or int(package[\"重量\"]) >= 30000 :\n", " express_type = \"快递超长\" if express_type == \"快递超长\" or express_type == \"快递\" else express_type\n", " else :\n", " express_type = \"快递\" if express_type == \"快递\" else express_type\n", "\n", " return pd.Series({\"尾端物流类型\":express_type})\n", " except Exception as e:\n", " print(e)\n", " return pd.Series({\"尾端物流类型\":\"未知\"})\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def order_freight_price(packages_real,express_type):\n", " try:\n", " packages = json.loads(packages_real)\n", " p_num = len(packages)\n", " volume_weight_total = 0\n", " express_fee = 0\n", " if express_type == \"卡派\":\n", " express_fee = 55\n", " else :\n", " for key,package in packages.items():\n", " side_list = [float(i) for i in [package[\"长\"], package[\"宽\"], package[\"高\"]]]\n", " side_list.sort(reverse=True)\n", " volume_weight = side_list[0] * side_list[1] * side_list[2]/6000\n", " volume_weight_total += volume_weight\n", " girth = side_list[0] + side_list[1] * 2 + side_list[2] * 2\n", " if int(package[\"重量\"]) <=5000 :\n", " express_fee = max(express_fee,4.99)\n", " elif int(package[\"重量\"]) <=10000 :\n", " express_fee = max(express_fee,6.99)\n", " elif int(package[\"重量\"]) <=20000 :\n", " express_fee = max(express_fee,9.99)\n", " else :\n", " express_fee = max(express_fee,14.99)\n", "\n", " if side_list[0] > 175 or int(package[\"重量\"]) >= 30000:\n", " express_fee = max(express_fee,19.99)\n", " elif side_list[0] > 120 :\n", " express_fee = max(express_fee,14.99)\n", "\n", "\n", " return pd.Series({\"订单尾端费用\":express_fee})\n", " except:\n", " return pd.Series({\"订单尾端费用\":0})" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import re\n", "df=pd.read_clipboard()\n", "def clean_data(row):\n", " #250221074014717&250222211202474&250222215014600&\t250227073002821\n", " #提取它的数字部分,使用正则提取\n", " pattern = re.compile(r'\\d+')\n", " numbers = re.findall(pattern, str(row))\n", " lenth = len(numbers)\n", " return pd.Series([lenth, numbers])\n", "\n", "df[['单数', '号码']] = df['订单号'].apply(clean_data)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "new_rows = []\n", "\n", "for index, row in df.iterrows():\n", " print(index)\n", " if row['单数'] == 0:\n", " new_rows.append({**row, 'order_id': j,'单号长': 0})\n", " else:\n", " for j in row['号码']:\n", " if len(j)==7 or left(j,3)=='25':\n", " new_rows.append({**row, 'order_id': j,'单号长': len(j)})\n", "\n", " \n", "\n", "new_df = pd.DataFrame(new_rows)\n", "new_df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# new_df的order_id列中有重复的订单号,需要去重,按照创建日期排序,保留创建日期最早的那个\n", "new_df = new_df.sort_values(by=['工单号'])\n", "new_df = new_df.drop_duplicates(subset=['order_id'], keep='first')\n", "new_df.to_clipboard(index=False)\n", "new_df\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Conversation(id='conv_6926c33b9c988194acabfd4ad198556e002f691818f05aa5', created_at=1764148027, metadata={}, object='conversation')\n", "按你这三个关键词来设计: \n", "- 风格:超短 \n", "- 受众:完全小白 \n", "- 卖点:板块情绪 + 找“绝对核心龙头”\n", "\n", "直接给你一套可落地的抖音账号方案。\n", "\n", "---\n", "\n", "## 一、先把你的人设一句话写清楚\n", "\n", "你现在的信息翻译一下: \n", "> 做超短线,不和你讲复杂技术,专门给你看板块情绪,告诉你谁是龙头。\n", "\n", "可以这样写成一句定位(任选/微调):\n", "\n", "1. 「超短情绪交易|只讲板块和龙头,不讲废话」 \n", "2. 「A股超短情绪分析|带你看懂板块核心龙头」 \n", "3. 「专做A股情绪周期|帮你认清每一波的真龙头」\n", "\n", "这句话用在: \n", "- 抖音主页简介第一行 \n", "- 视频第一秒的口播,比如:“我是XX,一个做超短情绪的交易者。”\n", "\n", "---\n", "\n", "## 二、账号基础包装(直接给模板)\n", "\n", "1. 账号名示例(自己替换“XX”) \n", " - 「XX说情绪」 \n", " - 「XX聊龙头」 \n", " - 「XX超短情绪」 \n", " - 「A股情绪观察员XX」\n", "\n", "2. 主页简介模板(你可以直接改名就用)\n", "\n", "> A股超短实战 \n", "> 主讲:板块情绪周期 / 龙头选拔 \n", "> 不荐股不带单,只讲我自己的交易思路 \n", "> 仅作个人复盘记录,非投资建议\n", "\n", "3. 头像 \n", " - 优先:你本人正脸/侧脸 + 简单字(“情绪 / 龙头 / 超短”三选一) \n", " - 不露脸就用统一风格Logo,例如深色底 + 白字“情绪周期”\n", "\n", "---\n", "\n", "## 三、内容设计:为“完全小白”重构你的专业\n", "\n", "你擅长的是很专业的一块,但受众是小白,核心是: \n", "- 不讲深邃理论 \n", "- 讲「看得见、听得懂、记得住」的东西\n", "\n", "### 1)必备的固定栏目(建议至少两个)\n", "\n", "① 《今日A股情绪温度·30秒版》 \n", " 每个交易日收盘后 30–60 秒讲三件事: \n", " - 今天情绪是:冰点 / 修复 / 高潮 / 退潮 \n", " - 主线板块是谁?(最多说 1–2 个) \n", " - 哪个是真龙头?为什么是它?(一句话逻辑)\n", "\n", "② 《这一波行情的真龙头是谁?》系列 \n", " - 一期只讲一个板块的一只“绝对核心” \n", " - 用最近几天的K线 + 板块涨幅录屏简单解释: \n", " - 为什么它是核心(涨幅、换手、封板质量、带动性) \n", " - 什么时候已经不是核心了(开板、分歧、补涨上位等)\n", "\n", "③ 《情绪周期小白课》系列(慢慢打基础) \n", " 每条控制在 60 秒左右,一次只讲一个点,例如: \n", " - 什么叫:冰点 / 修复 / 高潮 / 退潮(用历史例子) \n", " - 龙头、补涨龙、杂毛票,各是什么?举例+图 \n", " - 超短里最容易亏钱的3个时间点: \n", " - 情绪高潮末期去追板 \n", " - 龙头走完接补涨末端 \n", " - 退潮期硬做反包\n", "\n", "你先选 2 个栏目,保证能坚持,而不是一上来啥都做。\n", "\n", "---\n", "\n", "## 四、每条视频的讲解结构(照这个模板来)\n", "\n", "超短+小白,建议用统一结构,观众容易养成习惯。\n", "\n", "### 模板 A:盘后情绪视频(30–60秒)\n", "\n", "1. 开头 3 秒: \n", " - 「今天A股情绪:XX。」(比如:冰点/修复/小高潮) \n", "\n", "2. 中间 20–40 秒: \n", " - 1 句话说指数状态(跌多涨多) \n", " - 2 句话说主线板块 \n", " - 1 句话点出“今天谁是板块里的真核心,为什么” \n", " - 如:“算力板里,真正的龙头是XX,因为它一字打穿、换手最高、带动了整个板块。”\n", "\n", "3. 尾巴 5–10 秒: \n", " - 风险提示: \n", " - 「现在已经是情绪XX阶段,小白不要瞎追。」 \n", " - 「只是我的盘后记录,不建议你照着买。」\n", "\n", "### 模板 B:单支龙头拆解视频(60–90秒)\n", "\n", "1. 开头: \n", " - 「这波XX题材,真正的龙头是它,而不是你以为的那只。」\n", "\n", "2. 正文三点: \n", " - 它是怎么从板块里“被选出来”的: \n", " - 第一个涨停 / 连板最多 / 首个反包 / 放量换手最好 \n", " - 它如何带动: \n", " - 带起板块涨停潮 / 反复给板块做情绪修复 \n", " - 它什么时候“退位”: \n", " - 高位天地、断板放巨量、补涨上来抢风头\n", "\n", "3. 结尾: \n", " - 「现在它处在XX阶段(如高位分歧/退潮),新手别接最后一棒。」\n", "\n", "---\n", "\n", "## 五、把“板块情绪”和“绝对核心”讲小白能懂\n", "\n", "你可以准备几套固定说法,反复用:\n", "\n", "1. 情绪阶段,用生活比喻: \n", " - 冰点:大家都怕,不敢买(涨停家数少、放量大跌多) \n", " - 修复:突然有人敢出手,出现明星股带头(龙头首板/反包) \n", " - 高潮:什么都涨、连板一片、大家打板都赚钱(高位风险最大) \n", " - 退潮:龙头走弱、补涨乱飞、炸板多,新手进去基本挨打\n", "\n", "2. 什么是“绝对核心龙头”(小白版定义): \n", " - 涨得最快、最猛、最早启动的那只 \n", " - 整个板块涨停,它往往是最先封板、最难被砸开的 \n", " - 板块回调时,它率先止跌、带着板块重新冲起来\n", "\n", "3. 你可以明确告诉小白: \n", " - 「我不带你去打这些票,我只是告诉你: \n", " 什么时候是龙头赚钱的阶段,什么时候是给人接盘的阶段。」 \n", "\n", "既展示你能力,又不踩“荐股”红线。\n", "\n", "---\n", "\n", "## 六、第一批内容的具体安排(可以直接照抄题目)\n", "\n", "先准备 7–10 条视频当“启动库存”:\n", "\n", "1)《我是XX,一个做超短情绪的交易者》 \n", " - 你超短几年了、主要做什么段位(打板/接力/混沌) \n", " - 你这个号以后每天/每周会固定输出什么\n", "\n", "2)《20秒教你看懂:今天A股情绪是冷还是热》 \n", " - 用历史某天举例,解释“冰点”和“高潮”的差异(涨停数+板块情况)\n", "\n", "3)《这一波XX题材,真正的龙头是它》 \n", " - 挑一波你熟悉的经典行情案例来讲(不用讲现在的,讲历史更安全) \n", "\n", "4)《为什么90%的散户追板都死在“高潮末期”》 \n", " - 讲情绪高潮末尾的特征: \n", " - 涨停家数爆多 / 连板高度见顶 / 补涨杂毛一堆 \n", " - 明确说:这个阶段小白最容易亏钱\n", "\n", "5)《超短情绪里的3个典型角色:龙头、补涨、杂毛》 \n", " - 用简图/录屏 K 线画出 3 个区别 \n", " - 各举1个历史例子\n", "\n", "6)《我认定“龙头”的4个标准》 \n", " - 启动最早 / 板块带动性 / 换手质量 / 情绪修复能力 \n", " - 每条用1句白话解释 + 盘面截图\n", "\n", "后面再逐渐加: \n", "- 每日盘后情绪复盘 \n", "- 单个板块的龙头拆解 \n", "- 粉丝评论区问某个板块,你录屏讲5分钟变成60秒剪辑\n", "\n", "---\n", "\n", "## 七、合规和话术安全线(做超短题材更要注意)\n", "\n", "1. 永远不要说: \n", " - 「明天跟我一起上」 \n", " - 「这只明天还能干」 \n", " - 「目标价××,冲」 \n", " - 「今天尾盘上车,明天大概率核不掉」\n", "\n", "2. 统一用: \n", " - 「这是我盘后的个人复盘记录,不构成投资建议。」 \n", " - 「我自己会把它当×阶段,但不建议小白去接。」 \n", " - 「你要真想做,仓位自己控制,我不会带单。」\n", "\n", "---\n", "\n", "## 八、你现在可以马上做的两件事\n", "\n", "1. 选定并写下来: \n", " - 抖音号名(比如“XX说情绪”) \n", " - 一句话定位(比如“专讲A股超短情绪和龙头”) \n", " - 两个固定栏目名称: \n", " - 《今日A股情绪温度》 \n", " - 《这一波的真龙头是谁?》\n", "\n", "2. 先写 3 条视频的草稿(每条几行就行): \n", " - 第1条:自我介绍 + 账号能给粉丝什么 \n", " - 第2条:用历史上一波题材行情,讲清“真龙头”是咋选出来的 \n", " - 第3条:用一个极端例子讲“高潮末期追板是怎么被埋的”\n", "\n", "如果你愿意,可以把: \n", "- 你想好的账号名 \n", "- 你的一句话定位草稿 \n", "- 打算做的两个固定栏目名字 \n", "\n", "发给我,我可以帮你把这几句文案改得更利于涨粉、又不踩红线。\n" ] } ], "source": [ "from openai import OpenAI\n", "\n", "text = \"超短,完全小白,讲板块情绪,擅长找出各板块的绝对核心\"\n", "client = OpenAI(\n", " api_key=\"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\"\n", " )\n", "conversation_object = client.conversations.create(\n", "\n", ")\n", "history = []\n", "print(conversation_object) \n", "response = client.responses.create(\n", " conversation = 'conv_6926bdf9368c8195abbb9530da3abc200d904bb0cb286f5a',\n", " model=\"gpt-5.1\",\n", " input=text\n", ")\n", "\n", "print(response.output_text)\n", "history.append((response.output_text))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting openai\n", " Obtaining dependency information for openai from https://files.pythonhosted.org/packages/55/4f/dbc0c124c40cb390508a82770fb9f6e3ed162560181a85089191a851c59a/openai-2.8.1-py3-none-any.whl.metadata\n", " Downloading openai-2.8.1-py3-none-any.whl.metadata (29 kB)\n", "Requirement already satisfied: anyio<5,>=3.5.0 in c:\\programdata\\anaconda3\\lib\\site-packages (from openai) (3.5.0)\n", "Collecting distro<2,>=1.7.0 (from openai)\n", " Obtaining dependency information for distro<2,>=1.7.0 from https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl.metadata\n", " Downloading distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)\n", "Requirement already satisfied: httpx<1,>=0.23.0 in c:\\programdata\\anaconda3\\lib\\site-packages (from openai) (0.28.1)\n", "Collecting jiter<1,>=0.10.0 (from openai)\n", " Obtaining dependency information for jiter<1,>=0.10.0 from https://files.pythonhosted.org/packages/bb/60/1032b30ae0572196b0de0e87dce3b6c26a1eff71aad5fe43dee3082d32e0/jiter-0.12.0-cp311-cp311-win_amd64.whl.metadata\n", " Downloading jiter-0.12.0-cp311-cp311-win_amd64.whl.metadata (5.3 kB)\n", "Requirement already satisfied: pydantic<3,>=1.9.0 in c:\\programdata\\anaconda3\\lib\\site-packages (from openai) (2.11.7)\n", "Requirement already satisfied: sniffio in c:\\programdata\\anaconda3\\lib\\site-packages (from openai) (1.2.0)\n", "Requirement already satisfied: tqdm>4 in c:\\programdata\\anaconda3\\lib\\site-packages (from openai) (4.65.0)\n", "Requirement already satisfied: typing-extensions<5,>=4.11 in c:\\programdata\\anaconda3\\lib\\site-packages (from openai) (4.12.2)\n", "Requirement already satisfied: idna>=2.8 in c:\\programdata\\anaconda3\\lib\\site-packages (from anyio<5,>=3.5.0->openai) (2.10)\n", "Requirement already satisfied: certifi in c:\\programdata\\anaconda3\\lib\\site-packages (from httpx<1,>=0.23.0->openai) (2024.6.2)\n", "Requirement already satisfied: httpcore==1.* in c:\\programdata\\anaconda3\\lib\\site-packages (from httpx<1,>=0.23.0->openai) (1.0.9)\n", "Requirement already satisfied: h11>=0.16 in c:\\programdata\\anaconda3\\lib\\site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai) (0.16.0)\n", "Requirement already satisfied: annotated-types>=0.6.0 in c:\\programdata\\anaconda3\\lib\\site-packages (from pydantic<3,>=1.9.0->openai) (0.7.0)\n", "Requirement already satisfied: pydantic-core==2.33.2 in c:\\programdata\\anaconda3\\lib\\site-packages (from pydantic<3,>=1.9.0->openai) (2.33.2)\n", "Requirement already satisfied: typing-inspection>=0.4.0 in c:\\programdata\\anaconda3\\lib\\site-packages (from pydantic<3,>=1.9.0->openai) (0.4.1)\n", "Requirement already satisfied: colorama in c:\\programdata\\anaconda3\\lib\\site-packages (from tqdm>4->openai) (0.4.6)\n", "Downloading openai-2.8.1-py3-none-any.whl (1.0 MB)\n", " ---------------------------------------- 0.0/1.0 MB ? eta -:--:--\n", " ---------------------------------------- 0.0/1.0 MB ? eta -:--:--\n", " - -------------------------------------- 0.0/1.0 MB 653.6 kB/s eta 0:00:02\n", " - -------------------------------------- 0.0/1.0 MB 653.6 kB/s eta 0:00:02\n", " -- ------------------------------------- 0.1/1.0 MB 363.1 kB/s eta 0:00:03\n", " --- ------------------------------------ 0.1/1.0 MB 476.3 kB/s eta 0:00:02\n", " ----- ---------------------------------- 0.1/1.0 MB 607.9 kB/s eta 0:00:02\n", " ------ --------------------------------- 0.2/1.0 MB 697.2 kB/s eta 0:00:02\n", " ------- -------------------------------- 0.2/1.0 MB 556.2 kB/s eta 0:00:02\n", " --------- ------------------------------ 0.2/1.0 MB 654.9 kB/s eta 0:00:02\n", " ----------- ---------------------------- 0.3/1.0 MB 707.1 kB/s eta 0:00:02\n", " --------------- ------------------------ 0.4/1.0 MB 922.1 kB/s eta 0:00:01\n", " ----------------- ---------------------- 0.5/1.0 MB 938.8 kB/s eta 0:00:01\n", " ------------------- -------------------- 0.5/1.0 MB 952.6 kB/s eta 0:00:01\n", " ---------------------- ----------------- 0.6/1.0 MB 1.0 MB/s eta 0:00:01\n", " -------------------------------- ------- 0.8/1.0 MB 1.4 MB/s eta 0:00:01\n", " -------------------------------------- - 1.0/1.0 MB 1.5 MB/s eta 0:00:01\n", " ---------------------------------------- 1.0/1.0 MB 1.5 MB/s eta 0:00:00\n", "Downloading distro-1.9.0-py3-none-any.whl (20 kB)\n", "Downloading jiter-0.12.0-cp311-cp311-win_amd64.whl (204 kB)\n", " ---------------------------------------- 0.0/204.9 kB ? eta -:--:--\n", " --------------------------------------- 204.9/204.9 kB 13.0 MB/s eta 0:00:00\n", "Installing collected packages: jiter, distro, openai\n", "Successfully installed distro-1.9.0 jiter-0.12.0 openai-2.8.1\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip install openai" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 2 }