1142 lines
41 KiB
Python
1142 lines
41 KiB
Python
# # # import csv
|
||
# # # import pandas as pd
|
||
# # # from MySQLHelper import MySQLHelper
|
||
# # # import logging
|
||
# # # from typing import List, Dict, Optional
|
||
# # # from datetime import datetime
|
||
|
||
# # # def get_monthly_avg_data(db_config: dict, table_name: str) -> Optional[List[Dict]]:
|
||
# # # """
|
||
# # # 从数据库读取月度均值数据
|
||
|
||
# # # Args:
|
||
# # # db_config: 数据库配置
|
||
# # # table_name: 源数据表名
|
||
|
||
# # # Returns:
|
||
# # # List[Dict]: 查询结果数据集,失败返回None
|
||
# # # """
|
||
# # # try:
|
||
# # # with MySQLHelper(**db_config) as db:
|
||
# # # # 获取表结构信息
|
||
# # # columns = db.execute_query(f"""
|
||
# # # SELECT COLUMN_NAME
|
||
# # # FROM INFORMATION_SCHEMA.COLUMNS
|
||
# # # WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s
|
||
# # # ORDER BY ORDINAL_POSITION
|
||
# # # """, (db_config['database'], table_name))
|
||
|
||
# # # if not columns:
|
||
# # # logging.error(f"表 {table_name} 不存在或没有列")
|
||
# # # return None
|
||
|
||
# # # # 获取列名列表(排除id和update_time)
|
||
# # # field_names = [col['COLUMN_NAME'] for col in columns
|
||
# # # if col['COLUMN_NAME'] not in ('id', 'update_time')]
|
||
|
||
# # # # 查询数据
|
||
# # # data = db.execute_query(f"""
|
||
# # # SELECT {', '.join(field_names)}
|
||
# # # FROM {table_name}
|
||
# # # ORDER BY stock_code
|
||
# # # """)
|
||
|
||
# # # if not data:
|
||
# # # logging.error(f"表 {table_name} 中没有数据")
|
||
# # # return None
|
||
|
||
# # # return data
|
||
|
||
# # # except Exception as e:
|
||
# # # logging.error(f"从数据库读取数据失败: {str(e)}")
|
||
# # # return None
|
||
|
||
# # # def export_to_csv(data: List[Dict], output_file: str) -> bool:
|
||
# # # """
|
||
# # # 将数据导出到CSV文件
|
||
|
||
# # # Args:
|
||
# # # data: 要导出的数据集
|
||
# # # output_file: 输出的CSV文件路径
|
||
|
||
# # # Returns:
|
||
# # # bool: 是否导出成功
|
||
# # # """
|
||
# # # if not data:
|
||
# # # return False
|
||
|
||
# # # try:
|
||
# # # # 获取字段名(使用第一个数据的键)
|
||
# # # field_names = list(data[0].keys())
|
||
|
||
# # # # 字段名到中文的映射
|
||
# # # header_map = {
|
||
# # # 'stock_code': '股票代码',
|
||
# # # 'stock_name': '股票名称',
|
||
# # # 'ym_2410': '2024年10月均收盘价',
|
||
# # # 'ym_2411': '2024年11月均收盘价',
|
||
# # # 'ym_2412': '2024年12月均收盘价',
|
||
# # # 'ym_2501': '2025年1月均收盘价',
|
||
# # # 'ym_2502': '2025年2月均收盘价',
|
||
# # # 'ym_2503': '2025年3月均收盘价',
|
||
# # # 'ym_2504': '2025年4月均收盘价',
|
||
# # # 'ym_2505': '2025年5月均收盘价',
|
||
# # # 'ym_2506': '2025年6月均收盘价',
|
||
# # # 'ym_2507': '2025年7月均收盘价',
|
||
# # # 'ym_2508': '2025年8月均收盘价'
|
||
# # # }
|
||
|
||
# # # with open(output_file, mode='w', newline='', encoding='utf-8-sig') as csvfile:
|
||
# # # writer = csv.DictWriter(csvfile, fieldnames=field_names)
|
||
|
||
# # # # 写入中文表头
|
||
# # # writer.writerow({col: header_map.get(col, col) for col in field_names})
|
||
|
||
# # # # 写入数据
|
||
# # # writer.writerows(data)
|
||
|
||
# # # logging.info(f"成功导出 {len(data)} 条记录到CSV文件: {output_file}")
|
||
# # # return True
|
||
|
||
# # # except Exception as e:
|
||
# # # logging.error(f"导出到CSV失败: {str(e)}")
|
||
# # # return False
|
||
|
||
# # # def export_to_excel(data: List[Dict], output_file: str) -> bool:
|
||
# # # """
|
||
# # # 将数据导出为Excel文件(包含多个工作表)
|
||
|
||
# # # Args:
|
||
# # # data: 要导出的数据集
|
||
# # # output_file: 输出的Excel文件路径
|
||
|
||
# # # Returns:
|
||
# # # bool: 是否导出成功
|
||
# # # """
|
||
# # # if not data:
|
||
# # # return False
|
||
|
||
# # # try:
|
||
# # # # 转换为DataFrame
|
||
# # # df = pd.DataFrame(data)
|
||
|
||
# # # # 设置股票代码为索引
|
||
# # # if 'stock_code' in df.columns:
|
||
# # # df.set_index('stock_code', inplace=True)
|
||
|
||
# # # # 创建Excel writer对象
|
||
# # # with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
|
||
# # # # 1. 原始数据工作表
|
||
# # # df.to_excel(writer, sheet_name='原始数据')
|
||
|
||
# # # # 2. 统计信息工作表(仅当有数值列时)
|
||
# # # numeric_cols = [col for col in df.columns if col.startswith('ym_') and pd.api.types.is_numeric_dtype(df[col])]
|
||
|
||
# # # if numeric_cols:
|
||
# # # try:
|
||
# # # stats = df[numeric_cols].describe().loc[['mean', 'min', 'max', 'std']]
|
||
# # # stats.to_excel(writer, sheet_name='统计信息')
|
||
# # # except KeyError:
|
||
# # # logging.warning("无法生成完整的统计信息,数据可能不足")
|
||
# # # # 生成简化版统计信息
|
||
# # # stats = df[numeric_cols].agg(['mean', 'min', 'max', 'std'])
|
||
# # # stats.to_excel(writer, sheet_name='统计信息')
|
||
|
||
# # # # 3. 涨幅排名工作表(需要至少两个月份数据)
|
||
# # # if len(numeric_cols) >= 2:
|
||
# # # first_month = numeric_cols[0]
|
||
# # # last_month = numeric_cols[-1]
|
||
|
||
# # # try:
|
||
# # # df['涨幅(%)'] = (df[last_month] - df[first_month]) / df[first_month] * 100
|
||
# # # result_df = df[['stock_name', '涨幅(%)']].copy()
|
||
# # # result_df.dropna(subset=['涨幅(%)'], inplace=True)
|
||
# # # result_df.sort_values('涨幅(%)', ascending=False, inplace=True)
|
||
# # # result_df.to_excel(writer, sheet_name='涨幅排名')
|
||
# # # except Exception as e:
|
||
# # # logging.warning(f"无法计算涨幅: {str(e)}")
|
||
|
||
# # # # 4. 月度趋势工作表
|
||
# # # if numeric_cols:
|
||
# # # try:
|
||
# # # trend_df = df[numeric_cols].transpose()
|
||
# # # trend_df.index = [col.replace('ym_', '') for col in numeric_cols]
|
||
# # # trend_df.to_excel(writer, sheet_name='月度趋势')
|
||
# # # except Exception as e:
|
||
# # # logging.warning(f"无法生成月度趋势: {str(e)}")
|
||
|
||
# # # logging.info(f"成功导出Excel文件: {output_file}")
|
||
# # # return True
|
||
|
||
# # # except Exception as e:
|
||
# # # logging.error(f"导出Excel失败: {str(e)}")
|
||
# # # return False
|
||
|
||
# # # def export_monthly_avg_data(db_config: dict,
|
||
# # # table_name: str,
|
||
# # # csv_file: str = None,
|
||
# # # excel_file: str = None) -> bool:
|
||
# # # """
|
||
# # # 导出月度均值数据到CSV和/或Excel
|
||
|
||
# # # Args:
|
||
# # # db_config: 数据库配置
|
||
# # # table_name: 源数据表名
|
||
# # # csv_file: CSV输出路径(可选)
|
||
# # # excel_file: Excel输出路径(可选)
|
||
|
||
# # # Returns:
|
||
# # # bool: 是否至少有一种格式导出成功
|
||
# # # """
|
||
# # # if not csv_file and not excel_file:
|
||
# # # logging.error("必须指定至少一种输出格式")
|
||
# # # return False
|
||
|
||
# # # # 从数据库获取数据
|
||
# # # data = get_monthly_avg_data(db_config, table_name)
|
||
# # # if not data:
|
||
# # # return False
|
||
|
||
# # # # 导出结果
|
||
# # # csv_success = True
|
||
# # # excel_success = True
|
||
|
||
# # # if csv_file:
|
||
# # # csv_success = export_to_csv(data, csv_file)
|
||
|
||
# # # if excel_file:
|
||
# # # excel_success = export_to_excel(data, excel_file)
|
||
|
||
# # # return csv_success or excel_success
|
||
|
||
# # # # 使用示例
|
||
# # # if __name__ == "__main__":
|
||
# # # # 配置日志
|
||
# # # logging.basicConfig(
|
||
# # # level=logging.INFO,
|
||
# # # format='%(asctime)s - %(levelname)s - %(message)s',
|
||
# # # handlers=[
|
||
# # # logging.FileHandler('Debug.log', encoding='utf-8'), # 关键在这里
|
||
# # # logging.StreamHandler()
|
||
# # # ]
|
||
# # # )
|
||
|
||
# # # # 数据库配置
|
||
# # # db_config = {
|
||
# # # 'host': 'localhost',
|
||
# # # 'user': 'root',
|
||
# # # 'password': 'bzskmysql',
|
||
# # # 'database': 'klinedata_1d_hk'
|
||
# # # }
|
||
|
||
# # # # 导出数据
|
||
# # # success = export_monthly_avg_data(
|
||
# # # db_config=db_config,
|
||
# # # table_name="hk_monthly_avg_2410_2508", # 你实际使用的表名
|
||
# # # csv_file="hk_stocks_monthly_avg_202410-202508.csv", # CSV输出文件
|
||
# # # excel_file="hk_stocks_monthly_avg_202410-202508.xlsx" # Excel输出文件
|
||
# # # )
|
||
|
||
# # # if success:
|
||
# # # logging.info("数据导出成功完成")
|
||
# # # else:
|
||
# # # logging.error("数据导出过程中出现错误")
|
||
|
||
|
||
# # import csv
|
||
# # import pandas as pd
|
||
# # from MySQLHelper import MySQLHelper
|
||
# # import logging
|
||
# # from typing import List, Dict, Optional
|
||
# # from datetime import datetime
|
||
|
||
# # def get_monthly_avg_data(db_config: dict, table_name: str) -> Optional[List[Dict]]:
|
||
# # """
|
||
# # 从数据库读取月度均值数据
|
||
|
||
# # Args:
|
||
# # db_config: 数据库配置
|
||
# # table_name: 源数据表名
|
||
|
||
# # Returns:
|
||
# # List[Dict]: 查询结果数据集,失败返回None
|
||
# # """
|
||
# # try:
|
||
# # with MySQLHelper(**db_config) as db:
|
||
# # # 获取表结构信息
|
||
# # columns = db.execute_query(f"""
|
||
# # SELECT COLUMN_NAME
|
||
# # FROM INFORMATION_SCHEMA.COLUMNS
|
||
# # WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s
|
||
# # ORDER BY ORDINAL_POSITION
|
||
# # """, (db_config['database'], table_name))
|
||
|
||
# # if not columns:
|
||
# # logging.error(f"表 {table_name} 不存在或没有列")
|
||
# # return None
|
||
|
||
# # # 获取列名列表(排除id和update_time)
|
||
# # field_names = [col['COLUMN_NAME'] for col in columns
|
||
# # if col['COLUMN_NAME'] not in ('id', 'update_time')]
|
||
|
||
# # # 查询数据
|
||
# # data = db.execute_query(f"""
|
||
# # SELECT {', '.join(field_names)}
|
||
# # FROM {table_name}
|
||
# # ORDER BY stock_code
|
||
# # """)
|
||
|
||
# # if not data:
|
||
# # logging.error(f"表 {table_name} 中没有数据")
|
||
# # return None
|
||
|
||
# # return data
|
||
|
||
# # except Exception as e:
|
||
# # logging.error(f"从数据库读取月度均值数据失败: {str(e)}")
|
||
# # return None
|
||
|
||
# # def get_float_share_data(db_config: dict, table_name: str) -> Optional[List[Dict]]:
|
||
# # """
|
||
# # 从conditionalselection表读取流通股本数据
|
||
|
||
# # Args:
|
||
# # db_config: 数据库配置
|
||
# # table_name: 源数据表名
|
||
|
||
# # Returns:
|
||
# # List[Dict]: 查询结果数据集,失败返回None
|
||
# # """
|
||
# # try:
|
||
# # with MySQLHelper(**db_config) as db:
|
||
# # # 查询流通股本数据
|
||
# # data = db.execute_query(f"""
|
||
# # SELECT stock_code , stock_name , float_share
|
||
# # FROM {table_name}
|
||
# # ORDER BY stock_code
|
||
# # """)
|
||
|
||
# # if not data:
|
||
# # logging.error(f"表 {table_name} 中没有流通股本数据")
|
||
# # return None
|
||
|
||
# # return data
|
||
|
||
# # except Exception as e:
|
||
# # logging.error(f"从数据库读取流通股本数据失败: {str(e)}")
|
||
# # return None
|
||
|
||
# # def export_to_csv(data: List[Dict], output_file: str, data_type: str = 'monthly_avg') -> bool:
|
||
# # """
|
||
# # 将数据导出到CSV文件
|
||
|
||
# # Args:
|
||
# # data: 要导出的数据集
|
||
# # output_file: 输出的CSV文件路径
|
||
# # data_type: 数据类型('monthly_avg'或'float_share')
|
||
|
||
# # Returns:
|
||
# # bool: 是否导出成功
|
||
# # """
|
||
# # if not data:
|
||
# # return False
|
||
|
||
# # try:
|
||
# # # 获取字段名(使用第一个数据的键)
|
||
# # field_names = list(data[0].keys())
|
||
|
||
# # # 字段名到中文的映射
|
||
# # header_map = {
|
||
# # 'stock_code': '股票代码',
|
||
# # 'stock_name': '股票名称',
|
||
# # 'float_share': '流通股本(千股)',
|
||
# # 'ym_2410': '2024年10月均收盘价',
|
||
# # 'ym_2411': '2024年11月均收盘价',
|
||
# # 'ym_2412': '2024年12月均收盘价',
|
||
# # 'ym_2501': '2025年1月均收盘价',
|
||
# # 'ym_2502': '2025年2月均收盘价',
|
||
# # 'ym_2503': '2025年3月均收盘价',
|
||
# # 'ym_2504': '2025年4月均收盘价',
|
||
# # 'ym_2505': '2025年5月均收盘价',
|
||
# # 'ym_2506': '2025年6月均收盘价',
|
||
# # 'ym_2507': '2025年7月均收盘价',
|
||
# # 'ym_2508': '2025年8月均收盘价'
|
||
# # }
|
||
|
||
# # with open(output_file, mode='w', newline='', encoding='utf-8-sig') as csvfile:
|
||
# # writer = csv.DictWriter(csvfile, fieldnames=field_names)
|
||
|
||
# # # 写入中文表头
|
||
# # writer.writerow({col: header_map.get(col, col) for col in field_names})
|
||
|
||
# # # 写入数据
|
||
# # writer.writerows(data)
|
||
|
||
# # logging.info(f"成功导出 {len(data)} 条记录到CSV文件: {output_file}")
|
||
# # return True
|
||
|
||
# # except Exception as e:
|
||
# # logging.error(f"导出到CSV失败: {str(e)}")
|
||
# # return False
|
||
|
||
# # def export_to_excel(data: List[Dict], output_file: str, data_type: str = 'monthly_avg') -> bool:
|
||
# # """
|
||
# # 将数据导出为Excel文件(包含多个工作表)
|
||
|
||
# # Args:
|
||
# # data: 要导出的数据集
|
||
# # output_file: 输出的Excel文件路径
|
||
# # data_type: 数据类型('monthly_avg'或'float_share')
|
||
|
||
# # Returns:
|
||
# # bool: 是否导出成功
|
||
# # """
|
||
# # if not data:
|
||
# # return False
|
||
|
||
# # try:
|
||
# # # 转换为DataFrame
|
||
# # df = pd.DataFrame(data)
|
||
|
||
# # # 设置股票代码为索引
|
||
# # if 'stock_code' in df.columns:
|
||
# # df.set_index('stock_code', inplace=True)
|
||
|
||
# # # 创建Excel writer对象
|
||
# # with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
|
||
# # # 1. 原始数据工作表
|
||
# # sheet_name = '流通股本数据' if data_type == 'float_share' else '原始数据'
|
||
# # df.to_excel(writer, sheet_name=sheet_name)
|
||
|
||
# # # 对于月度均价数据,添加额外的工作表
|
||
# # if data_type == 'monthly_avg':
|
||
# # # 2. 统计信息工作表(仅当有数值列时)
|
||
# # numeric_cols = [col for col in df.columns if col.startswith('ym_') and pd.api.types.is_numeric_dtype(df[col])]
|
||
|
||
# # if numeric_cols:
|
||
# # try:
|
||
# # stats = df[numeric_cols].describe().loc[['mean', 'min', 'max', 'std']]
|
||
# # stats.to_excel(writer, sheet_name='统计信息')
|
||
# # except KeyError:
|
||
# # logging.warning("无法生成完整的统计信息,数据可能不足")
|
||
# # # 生成简化版统计信息
|
||
# # stats = df[numeric_cols].agg(['mean', 'min', 'max', 'std'])
|
||
# # stats.to_excel(writer, sheet_name='统计信息')
|
||
|
||
# # # 3. 涨幅排名工作表(需要至少两个月份数据)
|
||
# # if len(numeric_cols) >= 2:
|
||
# # first_month = numeric_cols[0]
|
||
# # last_month = numeric_cols[-1]
|
||
|
||
# # try:
|
||
# # df['涨幅(%)'] = (df[last_month] - df[first_month]) / df[first_month] * 100
|
||
# # result_df = df[['stock_name', '涨幅(%)']].copy()
|
||
# # result_df.dropna(subset=['涨幅(%)'], inplace=True)
|
||
# # result_df.sort_values('涨幅(%)', ascending=False, inplace=True)
|
||
# # result_df.to_excel(writer, sheet_name='涨幅排名')
|
||
# # except Exception as e:
|
||
# # logging.warning(f"无法计算涨幅: {str(e)}")
|
||
|
||
# # # 4. 月度趋势工作表
|
||
# # if numeric_cols:
|
||
# # try:
|
||
# # trend_df = df[numeric_cols].transpose()
|
||
# # trend_df.index = [col.replace('ym_', '') for col in numeric_cols]
|
||
# # trend_df.to_excel(writer, sheet_name='月度趋势')
|
||
# # except Exception as e:
|
||
# # logging.warning(f"无法生成月度趋势: {str(e)}")
|
||
|
||
# # logging.info(f"成功导出Excel文件: {output_file}")
|
||
# # return True
|
||
|
||
# # except Exception as e:
|
||
# # logging.error(f"导出Excel失败: {str(e)}")
|
||
# # return False
|
||
|
||
# # def export_data(db_config: dict,
|
||
# # table_name: str,
|
||
# # data_type: str = 'monthly_avg',
|
||
# # csv_file: str = None,
|
||
# # excel_file: str = None) -> bool:
|
||
# # """
|
||
# # 导出数据到CSV和/或Excel
|
||
|
||
# # Args:
|
||
# # db_config: 数据库配置
|
||
# # table_name: 源数据表名
|
||
# # data_type: 数据类型('monthly_avg'或'float_share')
|
||
# # csv_file: CSV输出路径(可选)
|
||
# # excel_file: Excel输出路径(可选)
|
||
|
||
# # Returns:
|
||
# # bool: 是否至少有一种格式导出成功
|
||
# # """
|
||
# # if not csv_file and not excel_file:
|
||
# # logging.error("必须指定至少一种输出格式")
|
||
# # return False
|
||
|
||
# # # 从数据库获取数据
|
||
# # if data_type == 'float_share':
|
||
# # data = get_float_share_data(db_config, table_name)
|
||
# # else:
|
||
# # data = get_monthly_avg_data(db_config, table_name)
|
||
|
||
# # if not data:
|
||
# # return False
|
||
|
||
# # # 导出结果
|
||
# # csv_success = True
|
||
# # excel_success = True
|
||
|
||
# # if csv_file:
|
||
# # csv_success = export_to_csv(data, csv_file, data_type)
|
||
|
||
# # if excel_file:
|
||
# # excel_success = export_to_excel(data, excel_file, data_type)
|
||
|
||
# # return csv_success or excel_success
|
||
|
||
# # # 使用示例
|
||
# # if __name__ == "__main__":
|
||
# # # 配置日志
|
||
# # logging.basicConfig(
|
||
# # level=logging.INFO,
|
||
# # format='%(asctime)s - %(levelname)s - %(message)s',
|
||
# # handlers=[
|
||
# # logging.FileHandler('export_data.log', encoding='utf-8'),
|
||
# # logging.StreamHandler()
|
||
# # ]
|
||
# # )
|
||
|
||
# # # 数据库配置
|
||
# # db_config = {
|
||
# # 'host': 'localhost',
|
||
# # 'user': 'root',
|
||
# # 'password': 'bzskmysql',
|
||
# # 'database': 'klinedata_1d_hk'
|
||
# # }
|
||
|
||
# # # 导出月度均价数据
|
||
# # monthly_success = export_data(
|
||
# # db_config=db_config,
|
||
# # table_name="hk_monthly_avg_2410_2508",
|
||
# # data_type='monthly_avg',
|
||
# # csv_file="hk_stocks_monthly_avg_202410-202508.csv",
|
||
# # excel_file="hk_stocks_monthly_avg_202410-202508.xlsx"
|
||
# # )
|
||
|
||
# # # 导出流通股本数据
|
||
# # float_share_success = export_data(
|
||
# # db_config=db_config,
|
||
# # table_name="conditionalselection",
|
||
# # data_type='float_share',
|
||
# # csv_file="hk_stocks_float_share.csv",
|
||
# # excel_file="hk_stocks_float_share.xlsx"
|
||
# # )
|
||
|
||
# # if monthly_success and float_share_success:
|
||
# # logging.info("所有数据导出成功完成")
|
||
# # else:
|
||
# # logging.error("数据导出过程中出现错误")
|
||
|
||
import csv
|
||
import pandas as pd
|
||
from MySQLHelper import MySQLHelper
|
||
import logging
|
||
from typing import List, Dict, Optional, Tuple
|
||
from datetime import datetime
|
||
|
||
def get_monthly_avg_data(db_config: dict, table_name: str) -> Optional[List[Dict]]:
|
||
"""
|
||
从数据库读取月度均值数据
|
||
|
||
Args:
|
||
db_config: 数据库配置
|
||
table_name: 源数据表名
|
||
|
||
Returns:
|
||
List[Dict]: 查询结果数据集,失败返回None
|
||
"""
|
||
try:
|
||
with MySQLHelper(**db_config) as db:
|
||
# 获取表结构信息
|
||
columns = db.execute_query(f"""
|
||
SELECT COLUMN_NAME
|
||
FROM INFORMATION_SCHEMA.COLUMNS
|
||
WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s
|
||
ORDER BY ORDINAL_POSITION
|
||
""", (db_config['database'], table_name))
|
||
|
||
if not columns:
|
||
logging.error(f"表 {table_name} 不存在或没有列")
|
||
return None
|
||
|
||
# 获取列名列表(排除id和update_time)
|
||
field_names = [col['COLUMN_NAME'] for col in columns
|
||
if col['COLUMN_NAME'] not in ('id', 'update_time')]
|
||
|
||
# 查询数据
|
||
data = db.execute_query(f"""
|
||
SELECT {', '.join(field_names)}
|
||
FROM {table_name}
|
||
ORDER BY stock_code
|
||
""")
|
||
|
||
if not data:
|
||
logging.error(f"表 {table_name} 中没有数据")
|
||
return None
|
||
|
||
return data
|
||
|
||
except Exception as e:
|
||
logging.error(f"从数据库读取月度均值数据失败: {str(e)}")
|
||
return None
|
||
|
||
def get_float_share_data(db_config: dict, table_name: str) -> Optional[List[Dict]]:
|
||
"""
|
||
从conditionalselection表读取流通股本数据
|
||
|
||
Args:
|
||
db_config: 数据库配置
|
||
table_name: 源数据表名
|
||
|
||
Returns:
|
||
List[Dict]: 查询结果数据集,失败返回None
|
||
"""
|
||
try:
|
||
with MySQLHelper(**db_config) as db:
|
||
# 查询流通股本数据
|
||
data = db.execute_query(f"""
|
||
SELECT stock_code, stock_name, float_share
|
||
FROM {table_name}
|
||
ORDER BY stock_code
|
||
""")
|
||
|
||
if not data:
|
||
logging.error(f"表 {table_name} 中没有流通股本数据")
|
||
return None
|
||
|
||
return data
|
||
|
||
except Exception as e:
|
||
logging.error(f"从数据库读取流通股本数据失败: {str(e)}")
|
||
return None
|
||
|
||
def merge_data(monthly_data: List[Dict], float_share_data: List[Dict]) -> List[Dict]:
|
||
"""
|
||
合并月度均价数据和流通股本数据
|
||
|
||
Args:
|
||
monthly_data: 月度均价数据
|
||
float_share_data: 流通股本数据
|
||
|
||
Returns:
|
||
List[Dict]: 合并后的数据集
|
||
"""
|
||
merged_data = []
|
||
float_share_dict = {item['stock_code']: item['float_share'] for item in float_share_data}
|
||
|
||
for item in monthly_data:
|
||
merged_item = item.copy()
|
||
merged_item['float_share'] = float_share_dict.get(item['stock_code'], 'N/A')
|
||
merged_data.append(merged_item)
|
||
|
||
return merged_data
|
||
|
||
def export_to_csv(data: List[Dict], output_file: str) -> bool:
|
||
"""
|
||
将合并后的数据导出到CSV文件
|
||
|
||
Args:
|
||
data: 要导出的数据集
|
||
output_file: 输出的CSV文件路径
|
||
|
||
Returns:
|
||
bool: 是否导出成功
|
||
"""
|
||
if not data:
|
||
return False
|
||
|
||
try:
|
||
# 获取字段名(使用第一个数据的键)
|
||
field_names = list(data[0].keys())
|
||
|
||
# 字段名到中文的映射
|
||
header_map = {
|
||
'stock_code': '股票代码',
|
||
'stock_name': '股票名称',
|
||
'float_share': '流通股本(千股)',
|
||
'ym_2410': '2024年10月均收盘价',
|
||
'ym_2411': '2024年11月均收盘价',
|
||
'ym_2412': '2024年12月均收盘价',
|
||
'ym_2501': '2025年1月均收盘价',
|
||
'ym_2502': '2025年2月均收盘价',
|
||
'ym_2503': '2025年3月均收盘价',
|
||
'ym_2504': '2025年4月均收盘价',
|
||
'ym_2505': '2025年5月均收盘价',
|
||
'ym_2506': '2025年6月均收盘价',
|
||
'ym_2507': '2025年7月均收盘价',
|
||
'ym_2508': '2025年8月均收盘价'
|
||
}
|
||
|
||
with open(output_file, mode='w', newline='', encoding='utf-8-sig') as csvfile:
|
||
writer = csv.DictWriter(csvfile, fieldnames=field_names)
|
||
|
||
# 写入中文表头
|
||
writer.writerow({col: header_map.get(col, col) for col in field_names})
|
||
|
||
# 写入数据
|
||
writer.writerows(data)
|
||
|
||
logging.info(f"成功导出 {len(data)} 条记录到CSV文件: {output_file}")
|
||
return True
|
||
|
||
except Exception as e:
|
||
logging.error(f"导出到CSV失败: {str(e)}")
|
||
return False
|
||
|
||
def export_to_excel(data: List[Dict], output_file: str) -> bool:
|
||
"""
|
||
将合并后的数据导出为Excel文件(包含多个工作表)
|
||
|
||
Args:
|
||
data: 要导出的数据集
|
||
output_file: 输出的Excel文件路径
|
||
|
||
Returns:
|
||
bool: 是否导出成功
|
||
"""
|
||
if not data:
|
||
return False
|
||
|
||
try:
|
||
# 转换为DataFrame
|
||
df = pd.DataFrame(data)
|
||
|
||
# 设置股票代码为索引
|
||
if 'stock_code' in df.columns:
|
||
df.set_index('stock_code', inplace=True)
|
||
|
||
# 创建Excel writer对象
|
||
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
|
||
# 1. 原始数据工作表
|
||
df.to_excel(writer, sheet_name='合并数据')
|
||
|
||
# 2. 统计信息工作表(仅当有数值列时)
|
||
numeric_cols = [col for col in df.columns if col.startswith('ym_') and pd.api.types.is_numeric_dtype(df[col])]
|
||
|
||
if numeric_cols:
|
||
try:
|
||
stats = df[numeric_cols].describe().loc[['mean', 'min', 'max', 'std']]
|
||
stats.to_excel(writer, sheet_name='统计信息')
|
||
except KeyError:
|
||
logging.warning("无法生成完整的统计信息,数据可能不足")
|
||
# 生成简化版统计信息
|
||
stats = df[numeric_cols].agg(['mean', 'min', 'max', 'std'])
|
||
stats.to_excel(writer, sheet_name='统计信息')
|
||
|
||
# 3. 涨幅排名工作表(需要至少两个月份数据)
|
||
if len(numeric_cols) >= 2:
|
||
first_month = numeric_cols[0]
|
||
last_month = numeric_cols[-1]
|
||
|
||
try:
|
||
df['涨幅(%)'] = (df[last_month] - df[first_month]) / df[first_month] * 100
|
||
result_df = df[['stock_name', '涨幅(%)', 'float_share']].copy()
|
||
result_df.dropna(subset=['涨幅(%)'], inplace=True)
|
||
result_df.sort_values('涨幅(%)', ascending=False, inplace=True)
|
||
result_df.to_excel(writer, sheet_name='涨幅排名')
|
||
except Exception as e:
|
||
logging.warning(f"无法计算涨幅: {str(e)}")
|
||
|
||
# 4. 月度趋势工作表
|
||
if numeric_cols:
|
||
try:
|
||
trend_df = df[numeric_cols].transpose()
|
||
trend_df.index = [col.replace('ym_', '') for col in numeric_cols]
|
||
trend_df.to_excel(writer, sheet_name='月度趋势')
|
||
except Exception as e:
|
||
logging.warning(f"无法生成月度趋势: {str(e)}")
|
||
|
||
# 5. 流通股本分析工作表
|
||
if 'float_share' in df.columns and pd.api.types.is_numeric_dtype(df['float_share']):
|
||
try:
|
||
float_stats = df['float_share'].describe().to_frame().T
|
||
float_stats.to_excel(writer, sheet_name='流通股本分析')
|
||
except Exception as e:
|
||
logging.warning(f"无法生成流通股本分析: {str(e)}")
|
||
|
||
logging.info(f"成功导出Excel文件: {output_file}")
|
||
return True
|
||
|
||
except Exception as e:
|
||
logging.error(f"导出Excel失败: {str(e)}")
|
||
return False
|
||
|
||
def export_combined_data(db_config: dict,
|
||
monthly_table: str,
|
||
float_share_table: str,
|
||
csv_file: str = None,
|
||
excel_file: str = None) -> bool:
|
||
"""
|
||
导出合并后的数据到CSV和/或Excel
|
||
|
||
Args:
|
||
db_config: 数据库配置
|
||
monthly_table: 月度均价表名
|
||
float_share_table: 流通股本表名
|
||
csv_file: CSV输出路径(可选)
|
||
excel_file: Excel输出路径(可选)
|
||
|
||
Returns:
|
||
bool: 是否至少有一种格式导出成功
|
||
"""
|
||
if not csv_file and not excel_file:
|
||
logging.error("必须指定至少一种输出格式")
|
||
return False
|
||
|
||
# 从数据库获取数据
|
||
monthly_data = get_monthly_avg_data(db_config, monthly_table)
|
||
if not monthly_data:
|
||
logging.error("无法获取月度均价数据")
|
||
return False
|
||
|
||
float_share_data = get_float_share_data(db_config, float_share_table)
|
||
if not float_share_data:
|
||
logging.error("无法获取流通股本数据")
|
||
return False
|
||
|
||
# 合并数据
|
||
merged_data = merge_data(monthly_data, float_share_data)
|
||
|
||
# 导出结果
|
||
csv_success = True
|
||
excel_success = True
|
||
|
||
if csv_file:
|
||
csv_success = export_to_csv(merged_data, csv_file)
|
||
|
||
if excel_file:
|
||
excel_success = export_to_excel(merged_data, excel_file)
|
||
|
||
return csv_success or excel_success
|
||
|
||
# 使用示例
|
||
if __name__ == "__main__":
|
||
# 配置日志
|
||
logging.basicConfig(
|
||
level=logging.INFO,
|
||
format='%(asctime)s - %(levelname)s - %(message)s',
|
||
handlers=[
|
||
logging.FileHandler('export_combined_data.log', encoding='utf-8'),
|
||
logging.StreamHandler()
|
||
]
|
||
)
|
||
|
||
# 数据库配置
|
||
db_config = {
|
||
'host': 'localhost',
|
||
'user': 'root',
|
||
'password': 'bzskmysql',
|
||
'database': 'klinedata_1d_hk'
|
||
}
|
||
|
||
# 导出合并数据
|
||
success = export_combined_data(
|
||
db_config=db_config,
|
||
monthly_table="hk_monthly_avg_2410_2508",
|
||
float_share_table="conditionalselection",
|
||
csv_file="hk_stocks_combined_data.csv",
|
||
excel_file="hk_stocks_combined_data.xlsx"
|
||
)
|
||
|
||
if success:
|
||
logging.info("数据合并导出成功完成")
|
||
else:
|
||
logging.error("数据合并导出过程中出现错误")
|
||
|
||
# import csv
|
||
# import pandas as pd
|
||
# from MySQLHelper import MySQLHelper
|
||
# import logging
|
||
# from typing import List, Dict, Optional
|
||
|
||
# # 配置日志
|
||
# logging.basicConfig(
|
||
# level=logging.INFO,
|
||
# format='%(asctime)s - %(levelname)s - %(message)s',
|
||
# handlers=[
|
||
# logging.FileHandler('stock_data_export.log', encoding='utf-8'),
|
||
# logging.StreamHandler()
|
||
# ]
|
||
# )
|
||
|
||
# def get_monthly_avg_data(db_config: dict, table_name: str) -> Optional[List[Dict]]:
|
||
# """
|
||
# 从数据库读取月度均价数据
|
||
# Args:
|
||
# db_config: 数据库配置
|
||
# table_name: 源数据表名
|
||
# Returns:
|
||
# List[Dict]: 查询结果数据集,失败返回None
|
||
# """
|
||
# try:
|
||
# with MySQLHelper(**db_config) as db:
|
||
# # 获取表结构信息
|
||
# columns = db.execute_query(f"""
|
||
# SELECT COLUMN_NAME
|
||
# FROM INFORMATION_SCHEMA.COLUMNS
|
||
# WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s
|
||
# ORDER BY ORDINAL_POSITION
|
||
# """, (db_config['database'], table_name))
|
||
|
||
# if not columns:
|
||
# logging.error(f"表 {table_name} 不存在或没有列")
|
||
# return None
|
||
|
||
# # 获取列名列表(排除id和update_time)
|
||
# field_names = [col['COLUMN_NAME'] for col in columns
|
||
# if col['COLUMN_NAME'] not in ('id', 'update_time')]
|
||
|
||
# # 查询数据
|
||
# data = db.execute_query(f"""
|
||
# SELECT {', '.join(field_names)}
|
||
# FROM {table_name}
|
||
# ORDER BY stock_code
|
||
# """)
|
||
|
||
# if not data:
|
||
# logging.error(f"表 {table_name} 中没有数据")
|
||
# return None
|
||
|
||
# return data
|
||
|
||
# except Exception as e:
|
||
# logging.error(f"从数据库读取月度均价数据失败: {str(e)}")
|
||
# return None
|
||
|
||
# def get_float_share_data(db_config: dict, table_name: str) -> Optional[List[Dict]]:
|
||
# """
|
||
# 从conditionalselection表读取流通股本数据
|
||
# Args:
|
||
# db_config: 数据库配置
|
||
# table_name: 源数据表名
|
||
# Returns:
|
||
# List[Dict]: 查询结果数据集,失败返回None
|
||
# """
|
||
# try:
|
||
# with MySQLHelper(**db_config) as db:
|
||
# # 查询流通股本数据
|
||
# data = db.execute_query(f"""
|
||
# SELECT stock_code, stock_name, float_share
|
||
# FROM {table_name}
|
||
# ORDER BY stock_code
|
||
# """)
|
||
|
||
# if not data:
|
||
# logging.error(f"表 {table_name} 中没有流通股本数据")
|
||
# return None
|
||
|
||
# return data
|
||
|
||
# except Exception as e:
|
||
# logging.error(f"从数据库读取流通股本数据失败: {str(e)}")
|
||
# return None
|
||
|
||
# def calculate_adjusted_prices(monthly_data: List[Dict], float_share_data: List[Dict]) -> List[Dict]:
|
||
# """
|
||
# 计算股本调整后价格(股价×流通股本)
|
||
# Args:
|
||
# monthly_data: 月度均价数据
|
||
# float_share_data: 流通股本数据
|
||
# Returns:
|
||
# List[Dict]: 包含调整后价格的数据集
|
||
# """
|
||
# adjusted_data = []
|
||
# float_shares = {item['stock_code']: item['float_share'] for item in float_share_data}
|
||
|
||
# for stock in monthly_data:
|
||
# adjusted_stock = stock.copy()
|
||
# stock_code = stock['stock_code']
|
||
# float_share = float_shares.get(stock_code)
|
||
|
||
# if float_share is None or not isinstance(float_share, (int, float)):
|
||
# logging.warning(f"股票 {stock_code} 缺少流通股本数据,跳过调整")
|
||
# adjusted_data.append(adjusted_stock)
|
||
# continue
|
||
|
||
# # 对每个月的价格乘以流通股本
|
||
# nIndex = 0
|
||
# for key in stock.keys():
|
||
# nIndex = nIndex + 1
|
||
# # if key.startswith('ym_') and isinstance(stock[key], (int, float)):
|
||
# if nIndex > 2:
|
||
# adjusted_stock[key] = stock[key] * float_share
|
||
|
||
# adjusted_data.append(adjusted_stock)
|
||
|
||
# return adjusted_data
|
||
|
||
# def export_to_csv(data: List[Dict], output_file: str) -> bool:
|
||
# """
|
||
# 将数据导出到CSV文件
|
||
# Args:
|
||
# data: 要导出的数据集
|
||
# output_file: 输出的CSV文件路径
|
||
# Returns:
|
||
# bool: 是否导出成功
|
||
# """
|
||
# if not data:
|
||
# logging.error("没有数据可导出")
|
||
# return False
|
||
|
||
# try:
|
||
# field_names = list(data[0].keys())
|
||
|
||
# # 字段名到中文的映射
|
||
# header_map = {
|
||
# 'stock_code': '股票代码',
|
||
# 'stock_name': '股票名称',
|
||
# 'ym_2410': '2024年10月',
|
||
# 'ym_2411': '2024年11月',
|
||
# 'ym_2412': '2024年12月',
|
||
# 'ym_2501': '2025年1月',
|
||
# 'ym_2502': '2025年2月',
|
||
# 'ym_2503': '2025年3月',
|
||
# 'ym_2504': '2025年4月',
|
||
# 'ym_2505': '2025年5月',
|
||
# 'ym_2506': '2025年6月',
|
||
# 'ym_2507': '2025年7月',
|
||
# 'ym_2508': '2025年8月'
|
||
# }
|
||
|
||
# with open(output_file, mode='w', newline='', encoding='utf-8-sig') as csvfile:
|
||
# writer = csv.DictWriter(csvfile, fieldnames=field_names)
|
||
|
||
# # 写入中文表头
|
||
# writer.writerow({col: header_map.get(col, col) for col in field_names})
|
||
|
||
# # 写入数据
|
||
# writer.writerows(data)
|
||
|
||
# logging.info(f"成功导出 {len(data)} 条记录到CSV文件: {output_file}")
|
||
# return True
|
||
|
||
# except Exception as e:
|
||
# logging.error(f"导出到CSV失败: {str(e)}")
|
||
# return False
|
||
|
||
# def export_to_excel(data: List[Dict], output_file: str) -> bool:
|
||
# """
|
||
# 将数据导出为Excel文件
|
||
# Args:
|
||
# data: 要导出的数据集
|
||
# output_file: 输出的Excel文件路径
|
||
# Returns:
|
||
# bool: 是否导出成功
|
||
# """
|
||
# if not data:
|
||
# logging.error("没有数据可导出")
|
||
# return False
|
||
|
||
# try:
|
||
# df = pd.DataFrame(data)
|
||
|
||
# # 设置股票代码为索引
|
||
# if 'stock_code' in df.columns:
|
||
# df.set_index('stock_code', inplace=True)
|
||
|
||
# # 创建Excel writer对象
|
||
# with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
|
||
# # 原始数据工作表
|
||
# df.to_excel(writer, sheet_name='调整后价格')
|
||
|
||
# # 统计信息工作表
|
||
# numeric_cols = [col for col in df.columns if col.startswith('ym_') and pd.api.types.is_numeric_dtype(df[col])]
|
||
|
||
# if numeric_cols:
|
||
# stats = df[numeric_cols].describe().loc[['mean', 'min', 'max', 'std']]
|
||
# stats.to_excel(writer, sheet_name='统计信息')
|
||
|
||
# # 涨幅排名工作表
|
||
# if len(numeric_cols) >= 2:
|
||
# first_month = numeric_cols[0]
|
||
# last_month = numeric_cols[-1]
|
||
|
||
# df['涨幅(%)'] = (df[last_month] - df[first_month]) / df[first_month] * 100
|
||
# result_df = df[['stock_name', '涨幅(%)']].copy()
|
||
# result_df.dropna(subset=['涨幅(%)'], inplace=True)
|
||
# result_df.sort_values('涨幅(%)', ascending=False, inplace=True)
|
||
# result_df.to_excel(writer, sheet_name='涨幅排名')
|
||
|
||
# # 月度趋势工作表
|
||
# trend_df = df[numeric_cols].transpose()
|
||
# trend_df.index = [col.replace('ym_', '') for col in numeric_cols]
|
||
# trend_df.to_excel(writer, sheet_name='月度趋势')
|
||
|
||
# logging.info(f"成功导出Excel文件: {output_file}")
|
||
# return True
|
||
|
||
# except Exception as e:
|
||
# logging.error(f"导出Excel失败: {str(e)}")
|
||
# return False
|
||
|
||
# def export_adjusted_prices(db_config: dict,
|
||
# monthly_table: str,
|
||
# float_share_table: str,
|
||
# csv_file: str = None,
|
||
# excel_file: str = None) -> bool:
|
||
# """
|
||
# 导出股本调整后的价格数据
|
||
# Args:
|
||
# db_config: 数据库配置
|
||
# monthly_table: 月度均价表名
|
||
# float_share_table: 流通股本表名
|
||
# csv_file: CSV输出路径(可选)
|
||
# excel_file: Excel输出路径(可选)
|
||
# Returns:
|
||
# bool: 是否至少有一种格式导出成功
|
||
# """
|
||
# # 获取数据
|
||
# monthly_data = get_monthly_avg_data(db_config, monthly_table)
|
||
# if not monthly_data:
|
||
# return False
|
||
|
||
# float_share_data = get_float_share_data(db_config, float_share_table)
|
||
# if not float_share_data:
|
||
# return False
|
||
|
||
# # 计算调整后价格
|
||
# adjusted_data = calculate_adjusted_prices(monthly_data, float_share_data)
|
||
|
||
# # 导出结果
|
||
# results = []
|
||
# if csv_file:
|
||
# results.append(export_to_csv(adjusted_data, csv_file))
|
||
# if excel_file:
|
||
# results.append(export_to_excel(adjusted_data, excel_file))
|
||
|
||
# return any(results)
|
||
|
||
# if __name__ == "__main__":
|
||
# # 数据库配置
|
||
# db_config = {
|
||
# 'host': 'localhost',
|
||
# 'user': 'root',
|
||
# 'password': 'bzskmysql',
|
||
# 'database': 'klinedata_1d_hk'
|
||
# }
|
||
|
||
# # 导出数据
|
||
# success = export_adjusted_prices(
|
||
# db_config=db_config,
|
||
# monthly_table="hk_monthly_avg_2410_2508",
|
||
# float_share_table="conditionalselection",
|
||
# csv_file="adjusted_stock_prices.csv",
|
||
# excel_file="adjusted_stock_prices.xlsx"
|
||
# )
|
||
|
||
# if success:
|
||
# logging.info("数据导出成功完成")
|
||
# else:
|
||
# logging.error("数据导出过程中出现错误") |