Files
HKDataManagment/PyCode/CalculateCapitalization.py

298 lines
11 KiB
Python
Raw Normal View History

2025-08-15 13:22:58 +08:00
"""
计算平均市值
根据 conditionalselection 表格中的股票代码查找日K数据表格
每个月计算一次结果存放在 hk_monthly_avg_2410_2508 表格中
"""
import pandas as pd
from datetime import datetime
import logging
from futu import *
from pymysql import Error
from MySQLHelper import MySQLHelper # MySQLHelper类保存为单独文件
from typing import Optional, List, Dict, Union, Tuple
def create_monthly_avg_table(db_config: dict, target_table: str = "monthly_close_avg") -> bool:
"""
创建专门存储2024年10月至2025年8月月度均值的表结构
Args:
db_config: 数据库配置
target_table: 目标表名
Returns:
bool: 是否创建成功
"""
try:
with MySQLHelper(**db_config) as db:
create_sql = f"""
CREATE TABLE IF NOT EXISTS {target_table} (
id INT AUTO_INCREMENT PRIMARY KEY,
stock_code VARCHAR(20) NOT NULL COMMENT '股票代码',
stock_name VARCHAR(50) COMMENT '股票名称',
ym_2410 DECIMAL(10, 3) COMMENT '2024年10月均收盘价',
ym_2411 DECIMAL(10, 3) COMMENT '2024年11月均收盘价',
ym_2412 DECIMAL(10, 3) COMMENT '2024年12月均收盘价',
ym_2501 DECIMAL(10, 3) COMMENT '2025年1月均收盘价',
ym_2502 DECIMAL(10, 3) COMMENT '2025年2月均收盘价',
ym_2503 DECIMAL(10, 3) COMMENT '2025年3月均收盘价',
ym_2504 DECIMAL(10, 3) COMMENT '2025年4月均收盘价',
ym_2505 DECIMAL(10, 3) COMMENT '2025年5月均收盘价',
ym_2506 DECIMAL(10, 3) COMMENT '2025年6月均收盘价',
ym_2507 DECIMAL(10, 3) COMMENT '2025年7月均收盘价',
ym_2508 DECIMAL(10, 3) COMMENT '2025年8月均收盘价',
update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
UNIQUE KEY uk_stock_code (stock_code)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='月度收盘价均值表(2024.10-2025.08)'
"""
db.execute_update(create_sql)
# logging.info(f"创建/确认表 {target_table} 结构成功")
return True
except Exception as e:
logging.error(f"创建表失败: {str(e)}")
return False
def calculate_and_save_monthly_avg(db_config: dict,
source_table: str = "stock_quotes",
target_table: str = "monthly_close_avg") -> bool:
"""
计算并保存2024年10月至2025年8月的月度收盘价均值
Args:
db_config: 数据库配置
source_table: 源数据表名
target_table: 目标表名
Returns:
bool: 是否成功
"""
# 定义分析的时间范围
month_ranges = {
'ym_2410': ('2024-10-01', '2024-10-31'),
'ym_2411': ('2024-11-01', '2024-11-30'),
'ym_2412': ('2024-12-01', '2024-12-31'),
'ym_2501': ('2025-01-01', '2025-01-31'),
'ym_2502': ('2025-02-01', '2025-02-28'),
'ym_2503': ('2025-03-01', '2025-03-31'),
'ym_2504': ('2025-04-01', '2025-04-30'),
'ym_2505': ('2025-05-01', '2025-05-31'),
'ym_2506': ('2025-06-01', '2025-06-30'),
'ym_2507': ('2025-07-01', '2025-07-31'),
'ym_2508': ('2025-08-01', '2025-08-31')
}
try:
# 确保表结构存在
if not create_monthly_avg_table(db_config, target_table):
return False
with MySQLHelper(**db_config) as db:
# 获取所有股票代码和名称
stock_info = db.execute_query(
f"SELECT DISTINCT stock_code, stock_name FROM {source_table}"
)
if not stock_info:
logging.error("没有获取到股票基本信息")
return False
# 为每只股票计算各月均值
for stock in stock_info:
stock_code = stock['stock_code']
stock_name = stock['stock_name']
monthly_data = {'stock_code': stock_code, 'stock_name': stock_name}
# 计算每个月的均值
for month_col, (start_date, end_date) in month_ranges.items():
sql = f"""
SELECT AVG(close_price) as avg_close
FROM {source_table}
WHERE stock_code = %s
AND trade_date BETWEEN %s AND %s
"""
result = db.execute_query(sql, (stock_code, start_date, end_date))
monthly_data[month_col] = float(result[0]['avg_close']) if result and result[0]['avg_close'] else None
# 插入或更新数据
upsert_sql = f"""
INSERT INTO {target_table} (
stock_code, stock_name,
ym_2410, ym_2411, ym_2412,
ym_2501, ym_2502, ym_2503, ym_2504,
ym_2505, ym_2506, ym_2507, ym_2508
) VALUES (
%(stock_code)s, %(stock_name)s,
%(ym_2410)s, %(ym_2411)s, %(ym_2412)s,
%(ym_2501)s, %(ym_2502)s, %(ym_2503)s, %(ym_2504)s,
%(ym_2505)s, %(ym_2506)s, %(ym_2507)s, %(ym_2508)s
)
ON DUPLICATE KEY UPDATE
stock_name = VALUES(stock_name),
ym_2410 = VALUES(ym_2410),
ym_2411 = VALUES(ym_2411),
ym_2412 = VALUES(ym_2412),
ym_2501 = VALUES(ym_2501),
ym_2502 = VALUES(ym_2502),
ym_2503 = VALUES(ym_2503),
ym_2504 = VALUES(ym_2504),
ym_2505 = VALUES(ym_2505),
ym_2506 = VALUES(ym_2506),
ym_2507 = VALUES(ym_2507),
ym_2508 = VALUES(ym_2508),
update_time = CURRENT_TIMESTAMP
"""
db.execute_update(upsert_sql, monthly_data)
logging.info("月度均值计算和保存完成")
return True
except Exception as e:
logging.error(f"计算和保存月度均值失败: {str(e)}")
return False
# 安全转换函数
def safe_float(v) -> Optional[float]:
"""安全转换为float处理N/A和空值"""
try:
return float(v) if pd.notna(v) and str(v).upper() != 'N/A' else None
except (ValueError, TypeError):
return None
def safe_int(v) -> Optional[int]:
"""安全转换为int处理N/A和空值"""
try:
return int(v) if pd.notna(v) and str(v).upper() != 'N/A' else None
except (ValueError, TypeError):
return None
def safe_parse_date(date_str, date_format='%Y-%m-%d'):
"""
安全解析日期字符串
:param date_str: 日期字符串
:param date_format: 日期格式
:return: 解析后的datetime对象或None
"""
if not date_str or pd.isna(date_str) or str(date_str).strip() == '':
return None
try:
return datetime.strptime(str(date_str), date_format)
except ValueError:
logging.warning(f"无法解析日期字符串: {date_str}")
return None
def validate_market_data(dataset: list) -> list:
"""
验证市场数据有效性
Args:
dataset (list): 原始数据集
Returns:
list: 通过验证的数据集
"""
validated_data = []
for item in dataset:
try:
# 必要字段检查
if not item.get('code') or not item.get('name'):
logging.warning(f"跳过无效数据: 缺少必要字段 code或name")
continue
# 筛选股票名称
if item.get('name')[-1] == 'R':
continue
# 数值范围验证
if item.get('lot_size') is not None and item['lot_size'] < 0:
logging.warning(f"股票 {item['code']} 的lot_size为负值: {item['lot_size']}")
item['lot_size'] = None
validated_data.append(item)
except Exception as e:
logging.warning(f"数据验证失败,跳过记录 {item.get('code')}: {str(e)}")
continue
return validated_data
def get_market_data(market: Market) -> List[str]:
"""
从Futu API获取指定市场的股票代码列表
Args:
market (Market): 市场枚举值 Market.SH, Market.SZ
Returns:
List[str]: 股票代码列表
"""
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
try:
ret, data = quote_ctx.get_stock_basicinfo(market, SecurityType.STOCK)
if ret == RET_OK:
# 提取code列并转换为列表
codes = data['code'].astype(str).tolist()
logging.info(f"获取到 {market} 市场 {len(codes)} 个股票代码")
return codes
else:
logging.error(f"获取股票代码失败: {data}")
return []
except Exception as e:
logging.error(f"获取股票代码时发生异常: {str(e)}")
return []
finally:
quote_ctx.close()
def get_stock_codes() -> List[str]:
"""从conditionalselection表获取所有股票代码"""
try:
with MySQLHelper(**db_config) as db:
sql = f"SELECT DISTINCT stock_code,stock_name FROM conditionalselection"
results = db.execute_query(sql)
return [
row['stock_code']
for row in results
if row['stock_code'] and (row.get('stock_name', '') and row['stock_name'][-1] != 'R') # 排除 name 以 R 结尾的票
]
except Exception as e:
logging.error(f"获取股票代码失败: {str(e)}")
return []
if __name__ == "__main__":
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('Debug.log', encoding='utf-8'), # 关键在这里
logging.StreamHandler()
]
)
# 数据库配置
db_config = {
'host': 'localhost',
'user': 'root',
'password': 'bzskmysql',
'database': 'klinedata_1d_hk'
}
# market_data = get_market_data(Market.HK)
market_data = get_stock_codes() # 使用按照价格和流通股数量筛选的那个表格
for code in market_data:
tablename = 'hk_' + code[3:]
# 计算并保存月度均值
success = calculate_and_save_monthly_avg(
db_config=db_config,
source_table=tablename,
target_table="hk_monthly_avg_2410_2508"
)
if success:
logging.info("月度均值处理成功完成")
else:
logging.error("处理过程中出现错误")