# # # import csv # # # import pandas as pd # # # from MySQLHelper import MySQLHelper # # # import logging # # # from typing import List, Dict, Optional # # # from datetime import datetime # # # def get_monthly_avg_data(db_config: dict, table_name: str) -> Optional[List[Dict]]: # # # """ # # # 从数据库读取月度均值数据 # # # Args: # # # db_config: 数据库配置 # # # table_name: 源数据表名 # # # Returns: # # # List[Dict]: 查询结果数据集,失败返回None # # # """ # # # try: # # # with MySQLHelper(**db_config) as db: # # # # 获取表结构信息 # # # columns = db.execute_query(f""" # # # SELECT COLUMN_NAME # # # FROM INFORMATION_SCHEMA.COLUMNS # # # WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s # # # ORDER BY ORDINAL_POSITION # # # """, (db_config['database'], table_name)) # # # if not columns: # # # logging.error(f"表 {table_name} 不存在或没有列") # # # return None # # # # 获取列名列表(排除id和update_time) # # # field_names = [col['COLUMN_NAME'] for col in columns # # # if col['COLUMN_NAME'] not in ('id', 'update_time')] # # # # 查询数据 # # # data = db.execute_query(f""" # # # SELECT {', '.join(field_names)} # # # FROM {table_name} # # # ORDER BY stock_code # # # """) # # # if not data: # # # logging.error(f"表 {table_name} 中没有数据") # # # return None # # # return data # # # except Exception as e: # # # logging.error(f"从数据库读取数据失败: {str(e)}") # # # return None # # # def export_to_csv(data: List[Dict], output_file: str) -> bool: # # # """ # # # 将数据导出到CSV文件 # # # Args: # # # data: 要导出的数据集 # # # output_file: 输出的CSV文件路径 # # # Returns: # # # bool: 是否导出成功 # # # """ # # # if not data: # # # return False # # # try: # # # # 获取字段名(使用第一个数据的键) # # # field_names = list(data[0].keys()) # # # # 字段名到中文的映射 # # # header_map = { # # # 'stock_code': '股票代码', # # # 'stock_name': '股票名称', # # # 'ym_2410': '2024年10月均收盘价', # # # 'ym_2411': '2024年11月均收盘价', # # # 'ym_2412': '2024年12月均收盘价', # # # 'ym_2501': '2025年1月均收盘价', # # # 'ym_2502': '2025年2月均收盘价', # # # 'ym_2503': '2025年3月均收盘价', # # # 'ym_2504': '2025年4月均收盘价', # # # 'ym_2505': '2025年5月均收盘价', # # # 'ym_2506': '2025年6月均收盘价', # # # 'ym_2507': '2025年7月均收盘价', # # # 'ym_2508': '2025年8月均收盘价' # # # } # # # with open(output_file, mode='w', newline='', encoding='utf-8-sig') as csvfile: # # # writer = csv.DictWriter(csvfile, fieldnames=field_names) # # # # 写入中文表头 # # # writer.writerow({col: header_map.get(col, col) for col in field_names}) # # # # 写入数据 # # # writer.writerows(data) # # # logging.info(f"成功导出 {len(data)} 条记录到CSV文件: {output_file}") # # # return True # # # except Exception as e: # # # logging.error(f"导出到CSV失败: {str(e)}") # # # return False # # # def export_to_excel(data: List[Dict], output_file: str) -> bool: # # # """ # # # 将数据导出为Excel文件(包含多个工作表) # # # Args: # # # data: 要导出的数据集 # # # output_file: 输出的Excel文件路径 # # # Returns: # # # bool: 是否导出成功 # # # """ # # # if not data: # # # return False # # # try: # # # # 转换为DataFrame # # # df = pd.DataFrame(data) # # # # 设置股票代码为索引 # # # if 'stock_code' in df.columns: # # # df.set_index('stock_code', inplace=True) # # # # 创建Excel writer对象 # # # with pd.ExcelWriter(output_file, engine='openpyxl') as writer: # # # # 1. 原始数据工作表 # # # df.to_excel(writer, sheet_name='原始数据') # # # # 2. 统计信息工作表(仅当有数值列时) # # # numeric_cols = [col for col in df.columns if col.startswith('ym_') and pd.api.types.is_numeric_dtype(df[col])] # # # if numeric_cols: # # # try: # # # stats = df[numeric_cols].describe().loc[['mean', 'min', 'max', 'std']] # # # stats.to_excel(writer, sheet_name='统计信息') # # # except KeyError: # # # logging.warning("无法生成完整的统计信息,数据可能不足") # # # # 生成简化版统计信息 # # # stats = df[numeric_cols].agg(['mean', 'min', 'max', 'std']) # # # stats.to_excel(writer, sheet_name='统计信息') # # # # 3. 涨幅排名工作表(需要至少两个月份数据) # # # if len(numeric_cols) >= 2: # # # first_month = numeric_cols[0] # # # last_month = numeric_cols[-1] # # # try: # # # df['涨幅(%)'] = (df[last_month] - df[first_month]) / df[first_month] * 100 # # # result_df = df[['stock_name', '涨幅(%)']].copy() # # # result_df.dropna(subset=['涨幅(%)'], inplace=True) # # # result_df.sort_values('涨幅(%)', ascending=False, inplace=True) # # # result_df.to_excel(writer, sheet_name='涨幅排名') # # # except Exception as e: # # # logging.warning(f"无法计算涨幅: {str(e)}") # # # # 4. 月度趋势工作表 # # # if numeric_cols: # # # try: # # # trend_df = df[numeric_cols].transpose() # # # trend_df.index = [col.replace('ym_', '') for col in numeric_cols] # # # trend_df.to_excel(writer, sheet_name='月度趋势') # # # except Exception as e: # # # logging.warning(f"无法生成月度趋势: {str(e)}") # # # logging.info(f"成功导出Excel文件: {output_file}") # # # return True # # # except Exception as e: # # # logging.error(f"导出Excel失败: {str(e)}") # # # return False # # # def export_monthly_avg_data(db_config: dict, # # # table_name: str, # # # csv_file: str = None, # # # excel_file: str = None) -> bool: # # # """ # # # 导出月度均值数据到CSV和/或Excel # # # Args: # # # db_config: 数据库配置 # # # table_name: 源数据表名 # # # csv_file: CSV输出路径(可选) # # # excel_file: Excel输出路径(可选) # # # Returns: # # # bool: 是否至少有一种格式导出成功 # # # """ # # # if not csv_file and not excel_file: # # # logging.error("必须指定至少一种输出格式") # # # return False # # # # 从数据库获取数据 # # # data = get_monthly_avg_data(db_config, table_name) # # # if not data: # # # return False # # # # 导出结果 # # # csv_success = True # # # excel_success = True # # # if csv_file: # # # csv_success = export_to_csv(data, csv_file) # # # if excel_file: # # # excel_success = export_to_excel(data, excel_file) # # # return csv_success or excel_success # # # # 使用示例 # # # if __name__ == "__main__": # # # # 配置日志 # # # logging.basicConfig( # # # level=logging.INFO, # # # format='%(asctime)s - %(levelname)s - %(message)s', # # # handlers=[ # # # logging.FileHandler('Debug.log', encoding='utf-8'), # 关键在这里 # # # logging.StreamHandler() # # # ] # # # ) # # # # 数据库配置 # # # db_config = { # # # 'host': 'localhost', # # # 'user': 'root', # # # 'password': 'bzskmysql', # # # 'database': 'klinedata_1d_hk' # # # } # # # # 导出数据 # # # success = export_monthly_avg_data( # # # db_config=db_config, # # # table_name="hk_monthly_avg_2410_2508", # 你实际使用的表名 # # # csv_file="hk_stocks_monthly_avg_202410-202508.csv", # CSV输出文件 # # # excel_file="hk_stocks_monthly_avg_202410-202508.xlsx" # Excel输出文件 # # # ) # # # if success: # # # logging.info("数据导出成功完成") # # # else: # # # logging.error("数据导出过程中出现错误") # # import csv # # import pandas as pd # # from MySQLHelper import MySQLHelper # # import logging # # from typing import List, Dict, Optional # # from datetime import datetime # # def get_monthly_avg_data(db_config: dict, table_name: str) -> Optional[List[Dict]]: # # """ # # 从数据库读取月度均值数据 # # Args: # # db_config: 数据库配置 # # table_name: 源数据表名 # # Returns: # # List[Dict]: 查询结果数据集,失败返回None # # """ # # try: # # with MySQLHelper(**db_config) as db: # # # 获取表结构信息 # # columns = db.execute_query(f""" # # SELECT COLUMN_NAME # # FROM INFORMATION_SCHEMA.COLUMNS # # WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s # # ORDER BY ORDINAL_POSITION # # """, (db_config['database'], table_name)) # # if not columns: # # logging.error(f"表 {table_name} 不存在或没有列") # # return None # # # 获取列名列表(排除id和update_time) # # field_names = [col['COLUMN_NAME'] for col in columns # # if col['COLUMN_NAME'] not in ('id', 'update_time')] # # # 查询数据 # # data = db.execute_query(f""" # # SELECT {', '.join(field_names)} # # FROM {table_name} # # ORDER BY stock_code # # """) # # if not data: # # logging.error(f"表 {table_name} 中没有数据") # # return None # # return data # # except Exception as e: # # logging.error(f"从数据库读取月度均值数据失败: {str(e)}") # # return None # # def get_float_share_data(db_config: dict, table_name: str) -> Optional[List[Dict]]: # # """ # # 从conditionalselection表读取流通股本数据 # # Args: # # db_config: 数据库配置 # # table_name: 源数据表名 # # Returns: # # List[Dict]: 查询结果数据集,失败返回None # # """ # # try: # # with MySQLHelper(**db_config) as db: # # # 查询流通股本数据 # # data = db.execute_query(f""" # # SELECT stock_code , stock_name , float_share # # FROM {table_name} # # ORDER BY stock_code # # """) # # if not data: # # logging.error(f"表 {table_name} 中没有流通股本数据") # # return None # # return data # # except Exception as e: # # logging.error(f"从数据库读取流通股本数据失败: {str(e)}") # # return None # # def export_to_csv(data: List[Dict], output_file: str, data_type: str = 'monthly_avg') -> bool: # # """ # # 将数据导出到CSV文件 # # Args: # # data: 要导出的数据集 # # output_file: 输出的CSV文件路径 # # data_type: 数据类型('monthly_avg'或'float_share') # # Returns: # # bool: 是否导出成功 # # """ # # if not data: # # return False # # try: # # # 获取字段名(使用第一个数据的键) # # field_names = list(data[0].keys()) # # # 字段名到中文的映射 # # header_map = { # # 'stock_code': '股票代码', # # 'stock_name': '股票名称', # # 'float_share': '流通股本(千股)', # # 'ym_2410': '2024年10月均收盘价', # # 'ym_2411': '2024年11月均收盘价', # # 'ym_2412': '2024年12月均收盘价', # # 'ym_2501': '2025年1月均收盘价', # # 'ym_2502': '2025年2月均收盘价', # # 'ym_2503': '2025年3月均收盘价', # # 'ym_2504': '2025年4月均收盘价', # # 'ym_2505': '2025年5月均收盘价', # # 'ym_2506': '2025年6月均收盘价', # # 'ym_2507': '2025年7月均收盘价', # # 'ym_2508': '2025年8月均收盘价' # # } # # with open(output_file, mode='w', newline='', encoding='utf-8-sig') as csvfile: # # writer = csv.DictWriter(csvfile, fieldnames=field_names) # # # 写入中文表头 # # writer.writerow({col: header_map.get(col, col) for col in field_names}) # # # 写入数据 # # writer.writerows(data) # # logging.info(f"成功导出 {len(data)} 条记录到CSV文件: {output_file}") # # return True # # except Exception as e: # # logging.error(f"导出到CSV失败: {str(e)}") # # return False # # def export_to_excel(data: List[Dict], output_file: str, data_type: str = 'monthly_avg') -> bool: # # """ # # 将数据导出为Excel文件(包含多个工作表) # # Args: # # data: 要导出的数据集 # # output_file: 输出的Excel文件路径 # # data_type: 数据类型('monthly_avg'或'float_share') # # Returns: # # bool: 是否导出成功 # # """ # # if not data: # # return False # # try: # # # 转换为DataFrame # # df = pd.DataFrame(data) # # # 设置股票代码为索引 # # if 'stock_code' in df.columns: # # df.set_index('stock_code', inplace=True) # # # 创建Excel writer对象 # # with pd.ExcelWriter(output_file, engine='openpyxl') as writer: # # # 1. 原始数据工作表 # # sheet_name = '流通股本数据' if data_type == 'float_share' else '原始数据' # # df.to_excel(writer, sheet_name=sheet_name) # # # 对于月度均价数据,添加额外的工作表 # # if data_type == 'monthly_avg': # # # 2. 统计信息工作表(仅当有数值列时) # # numeric_cols = [col for col in df.columns if col.startswith('ym_') and pd.api.types.is_numeric_dtype(df[col])] # # if numeric_cols: # # try: # # stats = df[numeric_cols].describe().loc[['mean', 'min', 'max', 'std']] # # stats.to_excel(writer, sheet_name='统计信息') # # except KeyError: # # logging.warning("无法生成完整的统计信息,数据可能不足") # # # 生成简化版统计信息 # # stats = df[numeric_cols].agg(['mean', 'min', 'max', 'std']) # # stats.to_excel(writer, sheet_name='统计信息') # # # 3. 涨幅排名工作表(需要至少两个月份数据) # # if len(numeric_cols) >= 2: # # first_month = numeric_cols[0] # # last_month = numeric_cols[-1] # # try: # # df['涨幅(%)'] = (df[last_month] - df[first_month]) / df[first_month] * 100 # # result_df = df[['stock_name', '涨幅(%)']].copy() # # result_df.dropna(subset=['涨幅(%)'], inplace=True) # # result_df.sort_values('涨幅(%)', ascending=False, inplace=True) # # result_df.to_excel(writer, sheet_name='涨幅排名') # # except Exception as e: # # logging.warning(f"无法计算涨幅: {str(e)}") # # # 4. 月度趋势工作表 # # if numeric_cols: # # try: # # trend_df = df[numeric_cols].transpose() # # trend_df.index = [col.replace('ym_', '') for col in numeric_cols] # # trend_df.to_excel(writer, sheet_name='月度趋势') # # except Exception as e: # # logging.warning(f"无法生成月度趋势: {str(e)}") # # logging.info(f"成功导出Excel文件: {output_file}") # # return True # # except Exception as e: # # logging.error(f"导出Excel失败: {str(e)}") # # return False # # def export_data(db_config: dict, # # table_name: str, # # data_type: str = 'monthly_avg', # # csv_file: str = None, # # excel_file: str = None) -> bool: # # """ # # 导出数据到CSV和/或Excel # # Args: # # db_config: 数据库配置 # # table_name: 源数据表名 # # data_type: 数据类型('monthly_avg'或'float_share') # # csv_file: CSV输出路径(可选) # # excel_file: Excel输出路径(可选) # # Returns: # # bool: 是否至少有一种格式导出成功 # # """ # # if not csv_file and not excel_file: # # logging.error("必须指定至少一种输出格式") # # return False # # # 从数据库获取数据 # # if data_type == 'float_share': # # data = get_float_share_data(db_config, table_name) # # else: # # data = get_monthly_avg_data(db_config, table_name) # # if not data: # # return False # # # 导出结果 # # csv_success = True # # excel_success = True # # if csv_file: # # csv_success = export_to_csv(data, csv_file, data_type) # # if excel_file: # # excel_success = export_to_excel(data, excel_file, data_type) # # return csv_success or excel_success # # # 使用示例 # # if __name__ == "__main__": # # # 配置日志 # # logging.basicConfig( # # level=logging.INFO, # # format='%(asctime)s - %(levelname)s - %(message)s', # # handlers=[ # # logging.FileHandler('export_data.log', encoding='utf-8'), # # logging.StreamHandler() # # ] # # ) # # # 数据库配置 # # db_config = { # # 'host': 'localhost', # # 'user': 'root', # # 'password': 'bzskmysql', # # 'database': 'klinedata_1d_hk' # # } # # # 导出月度均价数据 # # monthly_success = export_data( # # db_config=db_config, # # table_name="hk_monthly_avg_2410_2508", # # data_type='monthly_avg', # # csv_file="hk_stocks_monthly_avg_202410-202508.csv", # # excel_file="hk_stocks_monthly_avg_202410-202508.xlsx" # # ) # # # 导出流通股本数据 # # float_share_success = export_data( # # db_config=db_config, # # table_name="conditionalselection", # # data_type='float_share', # # csv_file="hk_stocks_float_share.csv", # # excel_file="hk_stocks_float_share.xlsx" # # ) # # if monthly_success and float_share_success: # # logging.info("所有数据导出成功完成") # # else: # # logging.error("数据导出过程中出现错误") import csv import pandas as pd from MySQLHelper import MySQLHelper import logging from typing import List, Dict, Optional, Tuple from datetime import datetime def get_monthly_avg_data(db_config: dict, table_name: str) -> Optional[List[Dict]]: """ 从数据库读取月度均值数据 Args: db_config: 数据库配置 table_name: 源数据表名 Returns: List[Dict]: 查询结果数据集,失败返回None """ try: with MySQLHelper(**db_config) as db: # 获取表结构信息 columns = db.execute_query(f""" SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s ORDER BY ORDINAL_POSITION """, (db_config['database'], table_name)) if not columns: logging.error(f"表 {table_name} 不存在或没有列") return None # 获取列名列表(排除id和update_time) field_names = [col['COLUMN_NAME'] for col in columns if col['COLUMN_NAME'] not in ('id', 'update_time')] # 查询数据 data = db.execute_query(f""" SELECT {', '.join(field_names)} FROM {table_name} ORDER BY stock_code """) if not data: logging.error(f"表 {table_name} 中没有数据") return None return data except Exception as e: logging.error(f"从数据库读取月度均值数据失败: {str(e)}") return None def get_float_share_data(db_config: dict, table_name: str) -> Optional[List[Dict]]: """ 从conditionalselection表读取流通股本数据 Args: db_config: 数据库配置 table_name: 源数据表名 Returns: List[Dict]: 查询结果数据集,失败返回None """ try: with MySQLHelper(**db_config) as db: # 查询流通股本数据 data = db.execute_query(f""" SELECT stock_code, stock_name, float_share FROM {table_name} ORDER BY stock_code """) if not data: logging.error(f"表 {table_name} 中没有流通股本数据") return None return data except Exception as e: logging.error(f"从数据库读取流通股本数据失败: {str(e)}") return None def merge_data(monthly_data: List[Dict], float_share_data: List[Dict]) -> List[Dict]: """ 合并月度均价数据和流通股本数据 Args: monthly_data: 月度均价数据 float_share_data: 流通股本数据 Returns: List[Dict]: 合并后的数据集 """ merged_data = [] float_share_dict = {item['stock_code']: item['float_share'] for item in float_share_data} for item in monthly_data: merged_item = item.copy() merged_item['float_share'] = float_share_dict.get(item['stock_code'], 'N/A') merged_data.append(merged_item) return merged_data def export_to_csv(data: List[Dict], output_file: str) -> bool: """ 将合并后的数据导出到CSV文件 Args: data: 要导出的数据集 output_file: 输出的CSV文件路径 Returns: bool: 是否导出成功 """ if not data: return False try: # 获取字段名(使用第一个数据的键) field_names = list(data[0].keys()) # 字段名到中文的映射 header_map = { 'stock_code': '股票代码', 'stock_name': '股票名称', 'float_share': '流通股本(千股)', 'ym_2410': '2024年10月均收盘价', 'ym_2411': '2024年11月均收盘价', 'ym_2412': '2024年12月均收盘价', 'ym_2501': '2025年1月均收盘价', 'ym_2502': '2025年2月均收盘价', 'ym_2503': '2025年3月均收盘价', 'ym_2504': '2025年4月均收盘价', 'ym_2505': '2025年5月均收盘价', 'ym_2506': '2025年6月均收盘价', 'ym_2507': '2025年7月均收盘价', 'ym_2508': '2025年8月均收盘价' } with open(output_file, mode='w', newline='', encoding='utf-8-sig') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=field_names) # 写入中文表头 writer.writerow({col: header_map.get(col, col) for col in field_names}) # 写入数据 writer.writerows(data) logging.info(f"成功导出 {len(data)} 条记录到CSV文件: {output_file}") return True except Exception as e: logging.error(f"导出到CSV失败: {str(e)}") return False def export_to_excel(data: List[Dict], output_file: str) -> bool: """ 将合并后的数据导出为Excel文件(包含多个工作表) Args: data: 要导出的数据集 output_file: 输出的Excel文件路径 Returns: bool: 是否导出成功 """ if not data: return False try: # 转换为DataFrame df = pd.DataFrame(data) # 设置股票代码为索引 if 'stock_code' in df.columns: df.set_index('stock_code', inplace=True) # 创建Excel writer对象 with pd.ExcelWriter(output_file, engine='openpyxl') as writer: # 1. 原始数据工作表 df.to_excel(writer, sheet_name='合并数据') # 2. 统计信息工作表(仅当有数值列时) numeric_cols = [col for col in df.columns if col.startswith('ym_') and pd.api.types.is_numeric_dtype(df[col])] if numeric_cols: try: stats = df[numeric_cols].describe().loc[['mean', 'min', 'max', 'std']] stats.to_excel(writer, sheet_name='统计信息') except KeyError: logging.warning("无法生成完整的统计信息,数据可能不足") # 生成简化版统计信息 stats = df[numeric_cols].agg(['mean', 'min', 'max', 'std']) stats.to_excel(writer, sheet_name='统计信息') # 3. 涨幅排名工作表(需要至少两个月份数据) if len(numeric_cols) >= 2: first_month = numeric_cols[0] last_month = numeric_cols[-1] try: df['涨幅(%)'] = (df[last_month] - df[first_month]) / df[first_month] * 100 result_df = df[['stock_name', '涨幅(%)', 'float_share']].copy() result_df.dropna(subset=['涨幅(%)'], inplace=True) result_df.sort_values('涨幅(%)', ascending=False, inplace=True) result_df.to_excel(writer, sheet_name='涨幅排名') except Exception as e: logging.warning(f"无法计算涨幅: {str(e)}") # 4. 月度趋势工作表 if numeric_cols: try: trend_df = df[numeric_cols].transpose() trend_df.index = [col.replace('ym_', '') for col in numeric_cols] trend_df.to_excel(writer, sheet_name='月度趋势') except Exception as e: logging.warning(f"无法生成月度趋势: {str(e)}") # 5. 流通股本分析工作表 if 'float_share' in df.columns and pd.api.types.is_numeric_dtype(df['float_share']): try: float_stats = df['float_share'].describe().to_frame().T float_stats.to_excel(writer, sheet_name='流通股本分析') except Exception as e: logging.warning(f"无法生成流通股本分析: {str(e)}") logging.info(f"成功导出Excel文件: {output_file}") return True except Exception as e: logging.error(f"导出Excel失败: {str(e)}") return False def export_combined_data(db_config: dict, monthly_table: str, float_share_table: str, csv_file: str = None, excel_file: str = None) -> bool: """ 导出合并后的数据到CSV和/或Excel Args: db_config: 数据库配置 monthly_table: 月度均价表名 float_share_table: 流通股本表名 csv_file: CSV输出路径(可选) excel_file: Excel输出路径(可选) Returns: bool: 是否至少有一种格式导出成功 """ if not csv_file and not excel_file: logging.error("必须指定至少一种输出格式") return False # 从数据库获取数据 monthly_data = get_monthly_avg_data(db_config, monthly_table) if not monthly_data: logging.error("无法获取月度均价数据") return False float_share_data = get_float_share_data(db_config, float_share_table) if not float_share_data: logging.error("无法获取流通股本数据") return False # 合并数据 merged_data = merge_data(monthly_data, float_share_data) # 导出结果 csv_success = True excel_success = True if csv_file: csv_success = export_to_csv(merged_data, csv_file) if excel_file: excel_success = export_to_excel(merged_data, excel_file) return csv_success or excel_success # 使用示例 if __name__ == "__main__": # 配置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler('export_combined_data.log', encoding='utf-8'), logging.StreamHandler() ] ) # 数据库配置 db_config = { 'host': 'localhost', 'user': 'root', 'password': 'bzskmysql', 'database': 'klinedata_1d_hk' } # 导出合并数据 success = export_combined_data( db_config=db_config, monthly_table="hk_monthly_avg_2410_2508", float_share_table="conditionalselection", csv_file="hk_stocks_combined_data.csv", excel_file="hk_stocks_combined_data.xlsx" ) if success: logging.info("数据合并导出成功完成") else: logging.error("数据合并导出过程中出现错误") # import csv # import pandas as pd # from MySQLHelper import MySQLHelper # import logging # from typing import List, Dict, Optional # # 配置日志 # logging.basicConfig( # level=logging.INFO, # format='%(asctime)s - %(levelname)s - %(message)s', # handlers=[ # logging.FileHandler('stock_data_export.log', encoding='utf-8'), # logging.StreamHandler() # ] # ) # def get_monthly_avg_data(db_config: dict, table_name: str) -> Optional[List[Dict]]: # """ # 从数据库读取月度均价数据 # Args: # db_config: 数据库配置 # table_name: 源数据表名 # Returns: # List[Dict]: 查询结果数据集,失败返回None # """ # try: # with MySQLHelper(**db_config) as db: # # 获取表结构信息 # columns = db.execute_query(f""" # SELECT COLUMN_NAME # FROM INFORMATION_SCHEMA.COLUMNS # WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s # ORDER BY ORDINAL_POSITION # """, (db_config['database'], table_name)) # if not columns: # logging.error(f"表 {table_name} 不存在或没有列") # return None # # 获取列名列表(排除id和update_time) # field_names = [col['COLUMN_NAME'] for col in columns # if col['COLUMN_NAME'] not in ('id', 'update_time')] # # 查询数据 # data = db.execute_query(f""" # SELECT {', '.join(field_names)} # FROM {table_name} # ORDER BY stock_code # """) # if not data: # logging.error(f"表 {table_name} 中没有数据") # return None # return data # except Exception as e: # logging.error(f"从数据库读取月度均价数据失败: {str(e)}") # return None # def get_float_share_data(db_config: dict, table_name: str) -> Optional[List[Dict]]: # """ # 从conditionalselection表读取流通股本数据 # Args: # db_config: 数据库配置 # table_name: 源数据表名 # Returns: # List[Dict]: 查询结果数据集,失败返回None # """ # try: # with MySQLHelper(**db_config) as db: # # 查询流通股本数据 # data = db.execute_query(f""" # SELECT stock_code, stock_name, float_share # FROM {table_name} # ORDER BY stock_code # """) # if not data: # logging.error(f"表 {table_name} 中没有流通股本数据") # return None # return data # except Exception as e: # logging.error(f"从数据库读取流通股本数据失败: {str(e)}") # return None # def calculate_adjusted_prices(monthly_data: List[Dict], float_share_data: List[Dict]) -> List[Dict]: # """ # 计算股本调整后价格(股价×流通股本) # Args: # monthly_data: 月度均价数据 # float_share_data: 流通股本数据 # Returns: # List[Dict]: 包含调整后价格的数据集 # """ # adjusted_data = [] # float_shares = {item['stock_code']: item['float_share'] for item in float_share_data} # for stock in monthly_data: # adjusted_stock = stock.copy() # stock_code = stock['stock_code'] # float_share = float_shares.get(stock_code) # if float_share is None or not isinstance(float_share, (int, float)): # logging.warning(f"股票 {stock_code} 缺少流通股本数据,跳过调整") # adjusted_data.append(adjusted_stock) # continue # # 对每个月的价格乘以流通股本 # nIndex = 0 # for key in stock.keys(): # nIndex = nIndex + 1 # # if key.startswith('ym_') and isinstance(stock[key], (int, float)): # if nIndex > 2: # adjusted_stock[key] = stock[key] * float_share # adjusted_data.append(adjusted_stock) # return adjusted_data # def export_to_csv(data: List[Dict], output_file: str) -> bool: # """ # 将数据导出到CSV文件 # Args: # data: 要导出的数据集 # output_file: 输出的CSV文件路径 # Returns: # bool: 是否导出成功 # """ # if not data: # logging.error("没有数据可导出") # return False # try: # field_names = list(data[0].keys()) # # 字段名到中文的映射 # header_map = { # 'stock_code': '股票代码', # 'stock_name': '股票名称', # 'ym_2410': '2024年10月', # 'ym_2411': '2024年11月', # 'ym_2412': '2024年12月', # 'ym_2501': '2025年1月', # 'ym_2502': '2025年2月', # 'ym_2503': '2025年3月', # 'ym_2504': '2025年4月', # 'ym_2505': '2025年5月', # 'ym_2506': '2025年6月', # 'ym_2507': '2025年7月', # 'ym_2508': '2025年8月' # } # with open(output_file, mode='w', newline='', encoding='utf-8-sig') as csvfile: # writer = csv.DictWriter(csvfile, fieldnames=field_names) # # 写入中文表头 # writer.writerow({col: header_map.get(col, col) for col in field_names}) # # 写入数据 # writer.writerows(data) # logging.info(f"成功导出 {len(data)} 条记录到CSV文件: {output_file}") # return True # except Exception as e: # logging.error(f"导出到CSV失败: {str(e)}") # return False # def export_to_excel(data: List[Dict], output_file: str) -> bool: # """ # 将数据导出为Excel文件 # Args: # data: 要导出的数据集 # output_file: 输出的Excel文件路径 # Returns: # bool: 是否导出成功 # """ # if not data: # logging.error("没有数据可导出") # return False # try: # df = pd.DataFrame(data) # # 设置股票代码为索引 # if 'stock_code' in df.columns: # df.set_index('stock_code', inplace=True) # # 创建Excel writer对象 # with pd.ExcelWriter(output_file, engine='openpyxl') as writer: # # 原始数据工作表 # df.to_excel(writer, sheet_name='调整后价格') # # 统计信息工作表 # numeric_cols = [col for col in df.columns if col.startswith('ym_') and pd.api.types.is_numeric_dtype(df[col])] # if numeric_cols: # stats = df[numeric_cols].describe().loc[['mean', 'min', 'max', 'std']] # stats.to_excel(writer, sheet_name='统计信息') # # 涨幅排名工作表 # if len(numeric_cols) >= 2: # first_month = numeric_cols[0] # last_month = numeric_cols[-1] # df['涨幅(%)'] = (df[last_month] - df[first_month]) / df[first_month] * 100 # result_df = df[['stock_name', '涨幅(%)']].copy() # result_df.dropna(subset=['涨幅(%)'], inplace=True) # result_df.sort_values('涨幅(%)', ascending=False, inplace=True) # result_df.to_excel(writer, sheet_name='涨幅排名') # # 月度趋势工作表 # trend_df = df[numeric_cols].transpose() # trend_df.index = [col.replace('ym_', '') for col in numeric_cols] # trend_df.to_excel(writer, sheet_name='月度趋势') # logging.info(f"成功导出Excel文件: {output_file}") # return True # except Exception as e: # logging.error(f"导出Excel失败: {str(e)}") # return False # def export_adjusted_prices(db_config: dict, # monthly_table: str, # float_share_table: str, # csv_file: str = None, # excel_file: str = None) -> bool: # """ # 导出股本调整后的价格数据 # Args: # db_config: 数据库配置 # monthly_table: 月度均价表名 # float_share_table: 流通股本表名 # csv_file: CSV输出路径(可选) # excel_file: Excel输出路径(可选) # Returns: # bool: 是否至少有一种格式导出成功 # """ # # 获取数据 # monthly_data = get_monthly_avg_data(db_config, monthly_table) # if not monthly_data: # return False # float_share_data = get_float_share_data(db_config, float_share_table) # if not float_share_data: # return False # # 计算调整后价格 # adjusted_data = calculate_adjusted_prices(monthly_data, float_share_data) # # 导出结果 # results = [] # if csv_file: # results.append(export_to_csv(adjusted_data, csv_file)) # if excel_file: # results.append(export_to_excel(adjusted_data, excel_file)) # return any(results) # if __name__ == "__main__": # # 数据库配置 # db_config = { # 'host': 'localhost', # 'user': 'root', # 'password': 'bzskmysql', # 'database': 'klinedata_1d_hk' # } # # 导出数据 # success = export_adjusted_prices( # db_config=db_config, # monthly_table="hk_monthly_avg_2410_2508", # float_share_table="conditionalselection", # csv_file="adjusted_stock_prices.csv", # excel_file="adjusted_stock_prices.xlsx" # ) # if success: # logging.info("数据导出成功完成") # else: # logging.error("数据导出过程中出现错误")