""" 计算平均市值 根据 conditionalselection 表格中的股票代码,查找日K数据表格 每个月计算一次结果存放在 hk_monthly_avg_2410_2508 表格中 """ import pandas as pd from datetime import datetime import logging from futu import * from pymysql import Error from MySQLHelper import MySQLHelper # MySQLHelper类保存为单独文件 from typing import Optional, List, Dict, Union, Tuple def create_monthly_avg_table(db_config: dict, target_table: str = "monthly_close_avg") -> bool: """ 创建专门存储2024年10月至2025年8月月度均值的表结构 Args: db_config: 数据库配置 target_table: 目标表名 Returns: bool: 是否创建成功 """ try: with MySQLHelper(**db_config) as db: create_sql = f""" CREATE TABLE IF NOT EXISTS {target_table} ( id INT AUTO_INCREMENT PRIMARY KEY, stock_code VARCHAR(20) NOT NULL COMMENT '股票代码', stock_name VARCHAR(50) COMMENT '股票名称', ym_2410 DECIMAL(10, 3) COMMENT '2024年10月均收盘价', ym_2411 DECIMAL(10, 3) COMMENT '2024年11月均收盘价', ym_2412 DECIMAL(10, 3) COMMENT '2024年12月均收盘价', ym_2501 DECIMAL(10, 3) COMMENT '2025年1月均收盘价', ym_2502 DECIMAL(10, 3) COMMENT '2025年2月均收盘价', ym_2503 DECIMAL(10, 3) COMMENT '2025年3月均收盘价', ym_2504 DECIMAL(10, 3) COMMENT '2025年4月均收盘价', ym_2505 DECIMAL(10, 3) COMMENT '2025年5月均收盘价', ym_2506 DECIMAL(10, 3) COMMENT '2025年6月均收盘价', ym_2507 DECIMAL(10, 3) COMMENT '2025年7月均收盘价', ym_2508 DECIMAL(10, 3) COMMENT '2025年8月均收盘价', update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', UNIQUE KEY uk_stock_code (stock_code) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='月度收盘价均值表(2024.10-2025.08)' """ db.execute_update(create_sql) # logging.info(f"创建/确认表 {target_table} 结构成功") return True except Exception as e: logging.error(f"创建表失败: {str(e)}") return False def calculate_and_save_monthly_avg(db_config: dict, source_table: str = "stock_quotes", target_table: str = "monthly_close_avg") -> bool: """ 计算并保存2024年10月至2025年8月的月度收盘价均值 Args: db_config: 数据库配置 source_table: 源数据表名 target_table: 目标表名 Returns: bool: 是否成功 """ # 定义分析的时间范围 month_ranges = { 'ym_2410': ('2024-10-01', '2024-10-31'), 'ym_2411': ('2024-11-01', '2024-11-30'), 'ym_2412': ('2024-12-01', '2024-12-31'), 'ym_2501': ('2025-01-01', '2025-01-31'), 'ym_2502': ('2025-02-01', '2025-02-28'), 'ym_2503': ('2025-03-01', '2025-03-31'), 'ym_2504': ('2025-04-01', '2025-04-30'), 'ym_2505': ('2025-05-01', '2025-05-31'), 'ym_2506': ('2025-06-01', '2025-06-30'), 'ym_2507': ('2025-07-01', '2025-07-31'), 'ym_2508': ('2025-08-01', '2025-08-31') } try: # 确保表结构存在 if not create_monthly_avg_table(db_config, target_table): return False with MySQLHelper(**db_config) as db: # 获取所有股票代码和名称 stock_info = db.execute_query( f"SELECT DISTINCT stock_code, stock_name FROM {source_table}" ) if not stock_info: logging.error("没有获取到股票基本信息") return False # 为每只股票计算各月均值 for stock in stock_info: stock_code = stock['stock_code'] stock_name = stock['stock_name'] monthly_data = {'stock_code': stock_code, 'stock_name': stock_name} # 计算每个月的均值 for month_col, (start_date, end_date) in month_ranges.items(): sql = f""" SELECT AVG(close_price) as avg_close FROM {source_table} WHERE stock_code = %s AND trade_date BETWEEN %s AND %s """ result = db.execute_query(sql, (stock_code, start_date, end_date)) monthly_data[month_col] = float(result[0]['avg_close']) if result and result[0]['avg_close'] else None # 插入或更新数据 upsert_sql = f""" INSERT INTO {target_table} ( stock_code, stock_name, ym_2410, ym_2411, ym_2412, ym_2501, ym_2502, ym_2503, ym_2504, ym_2505, ym_2506, ym_2507, ym_2508 ) VALUES ( %(stock_code)s, %(stock_name)s, %(ym_2410)s, %(ym_2411)s, %(ym_2412)s, %(ym_2501)s, %(ym_2502)s, %(ym_2503)s, %(ym_2504)s, %(ym_2505)s, %(ym_2506)s, %(ym_2507)s, %(ym_2508)s ) ON DUPLICATE KEY UPDATE stock_name = VALUES(stock_name), ym_2410 = VALUES(ym_2410), ym_2411 = VALUES(ym_2411), ym_2412 = VALUES(ym_2412), ym_2501 = VALUES(ym_2501), ym_2502 = VALUES(ym_2502), ym_2503 = VALUES(ym_2503), ym_2504 = VALUES(ym_2504), ym_2505 = VALUES(ym_2505), ym_2506 = VALUES(ym_2506), ym_2507 = VALUES(ym_2507), ym_2508 = VALUES(ym_2508), update_time = CURRENT_TIMESTAMP """ db.execute_update(upsert_sql, monthly_data) logging.info("月度均值计算和保存完成") return True except Exception as e: logging.error(f"计算和保存月度均值失败: {str(e)}") return False # 安全转换函数 def safe_float(v) -> Optional[float]: """安全转换为float,处理N/A和空值""" try: return float(v) if pd.notna(v) and str(v).upper() != 'N/A' else None except (ValueError, TypeError): return None def safe_int(v) -> Optional[int]: """安全转换为int,处理N/A和空值""" try: return int(v) if pd.notna(v) and str(v).upper() != 'N/A' else None except (ValueError, TypeError): return None def safe_parse_date(date_str, date_format='%Y-%m-%d'): """ 安全解析日期字符串 :param date_str: 日期字符串 :param date_format: 日期格式 :return: 解析后的datetime对象或None """ if not date_str or pd.isna(date_str) or str(date_str).strip() == '': return None try: return datetime.strptime(str(date_str), date_format) except ValueError: logging.warning(f"无法解析日期字符串: {date_str}") return None def validate_market_data(dataset: list) -> list: """ 验证市场数据有效性 Args: dataset (list): 原始数据集 Returns: list: 通过验证的数据集 """ validated_data = [] for item in dataset: try: # 必要字段检查 if not item.get('code') or not item.get('name'): logging.warning(f"跳过无效数据: 缺少必要字段 code或name") continue # 筛选股票名称 if item.get('name')[-1] == 'R': continue # 数值范围验证 if item.get('lot_size') is not None and item['lot_size'] < 0: logging.warning(f"股票 {item['code']} 的lot_size为负值: {item['lot_size']}") item['lot_size'] = None validated_data.append(item) except Exception as e: logging.warning(f"数据验证失败,跳过记录 {item.get('code')}: {str(e)}") continue return validated_data def get_market_data(market: Market) -> List[str]: """ 从Futu API获取指定市场的股票代码列表 Args: market (Market): 市场枚举值,如 Market.SH, Market.SZ Returns: List[str]: 股票代码列表 """ quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111) try: ret, data = quote_ctx.get_stock_basicinfo(market, SecurityType.STOCK) if ret == RET_OK: # 提取code列并转换为列表 codes = data['code'].astype(str).tolist() logging.info(f"获取到 {market} 市场 {len(codes)} 个股票代码") return codes else: logging.error(f"获取股票代码失败: {data}") return [] except Exception as e: logging.error(f"获取股票代码时发生异常: {str(e)}") return [] finally: quote_ctx.close() def get_stock_codes() -> List[str]: """从conditionalselection表获取所有股票代码""" try: with MySQLHelper(**db_config) as db: sql = f"SELECT DISTINCT stock_code,stock_name FROM conditionalselection" results = db.execute_query(sql) return [ row['stock_code'] for row in results if row['stock_code'] and (row.get('stock_name', '') and row['stock_name'][-1] != 'R') # 排除 name 以 R 结尾的票 ] except Exception as e: logging.error(f"获取股票代码失败: {str(e)}") return [] if __name__ == "__main__": # 配置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler('Debug.log', encoding='utf-8'), # 关键在这里 logging.StreamHandler() ] ) # 数据库配置 db_config = { 'host': 'localhost', 'user': 'root', 'password': 'bzskmysql', 'database': 'klinedata_1d_hk' } # market_data = get_market_data(Market.HK) market_data = get_stock_codes() # 使用按照价格和流通股数量筛选的那个表格 for code in market_data: tablename = 'hk_' + code[3:] # 计算并保存月度均值 success = calculate_and_save_monthly_avg( db_config=db_config, source_table=tablename, target_table="hk_monthly_avg_2410_2508" ) if success: logging.info("月度均值处理成功完成") else: logging.error("处理过程中出现错误")