Files
HKDataManagment/PyCode/CalculateCapitalization.py
2025-08-15 13:22:58 +08:00

298 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
计算平均市值
根据 conditionalselection 表格中的股票代码查找日K数据表格
每个月计算一次结果存放在 hk_monthly_avg_2410_2508 表格中
"""
import pandas as pd
from datetime import datetime
import logging
from futu import *
from pymysql import Error
from MySQLHelper import MySQLHelper # MySQLHelper类保存为单独文件
from typing import Optional, List, Dict, Union, Tuple
def create_monthly_avg_table(db_config: dict, target_table: str = "monthly_close_avg") -> bool:
"""
创建专门存储2024年10月至2025年8月月度均值的表结构
Args:
db_config: 数据库配置
target_table: 目标表名
Returns:
bool: 是否创建成功
"""
try:
with MySQLHelper(**db_config) as db:
create_sql = f"""
CREATE TABLE IF NOT EXISTS {target_table} (
id INT AUTO_INCREMENT PRIMARY KEY,
stock_code VARCHAR(20) NOT NULL COMMENT '股票代码',
stock_name VARCHAR(50) COMMENT '股票名称',
ym_2410 DECIMAL(10, 3) COMMENT '2024年10月均收盘价',
ym_2411 DECIMAL(10, 3) COMMENT '2024年11月均收盘价',
ym_2412 DECIMAL(10, 3) COMMENT '2024年12月均收盘价',
ym_2501 DECIMAL(10, 3) COMMENT '2025年1月均收盘价',
ym_2502 DECIMAL(10, 3) COMMENT '2025年2月均收盘价',
ym_2503 DECIMAL(10, 3) COMMENT '2025年3月均收盘价',
ym_2504 DECIMAL(10, 3) COMMENT '2025年4月均收盘价',
ym_2505 DECIMAL(10, 3) COMMENT '2025年5月均收盘价',
ym_2506 DECIMAL(10, 3) COMMENT '2025年6月均收盘价',
ym_2507 DECIMAL(10, 3) COMMENT '2025年7月均收盘价',
ym_2508 DECIMAL(10, 3) COMMENT '2025年8月均收盘价',
update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
UNIQUE KEY uk_stock_code (stock_code)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='月度收盘价均值表(2024.10-2025.08)'
"""
db.execute_update(create_sql)
# logging.info(f"创建/确认表 {target_table} 结构成功")
return True
except Exception as e:
logging.error(f"创建表失败: {str(e)}")
return False
def calculate_and_save_monthly_avg(db_config: dict,
source_table: str = "stock_quotes",
target_table: str = "monthly_close_avg") -> bool:
"""
计算并保存2024年10月至2025年8月的月度收盘价均值
Args:
db_config: 数据库配置
source_table: 源数据表名
target_table: 目标表名
Returns:
bool: 是否成功
"""
# 定义分析的时间范围
month_ranges = {
'ym_2410': ('2024-10-01', '2024-10-31'),
'ym_2411': ('2024-11-01', '2024-11-30'),
'ym_2412': ('2024-12-01', '2024-12-31'),
'ym_2501': ('2025-01-01', '2025-01-31'),
'ym_2502': ('2025-02-01', '2025-02-28'),
'ym_2503': ('2025-03-01', '2025-03-31'),
'ym_2504': ('2025-04-01', '2025-04-30'),
'ym_2505': ('2025-05-01', '2025-05-31'),
'ym_2506': ('2025-06-01', '2025-06-30'),
'ym_2507': ('2025-07-01', '2025-07-31'),
'ym_2508': ('2025-08-01', '2025-08-31')
}
try:
# 确保表结构存在
if not create_monthly_avg_table(db_config, target_table):
return False
with MySQLHelper(**db_config) as db:
# 获取所有股票代码和名称
stock_info = db.execute_query(
f"SELECT DISTINCT stock_code, stock_name FROM {source_table}"
)
if not stock_info:
logging.error("没有获取到股票基本信息")
return False
# 为每只股票计算各月均值
for stock in stock_info:
stock_code = stock['stock_code']
stock_name = stock['stock_name']
monthly_data = {'stock_code': stock_code, 'stock_name': stock_name}
# 计算每个月的均值
for month_col, (start_date, end_date) in month_ranges.items():
sql = f"""
SELECT AVG(close_price) as avg_close
FROM {source_table}
WHERE stock_code = %s
AND trade_date BETWEEN %s AND %s
"""
result = db.execute_query(sql, (stock_code, start_date, end_date))
monthly_data[month_col] = float(result[0]['avg_close']) if result and result[0]['avg_close'] else None
# 插入或更新数据
upsert_sql = f"""
INSERT INTO {target_table} (
stock_code, stock_name,
ym_2410, ym_2411, ym_2412,
ym_2501, ym_2502, ym_2503, ym_2504,
ym_2505, ym_2506, ym_2507, ym_2508
) VALUES (
%(stock_code)s, %(stock_name)s,
%(ym_2410)s, %(ym_2411)s, %(ym_2412)s,
%(ym_2501)s, %(ym_2502)s, %(ym_2503)s, %(ym_2504)s,
%(ym_2505)s, %(ym_2506)s, %(ym_2507)s, %(ym_2508)s
)
ON DUPLICATE KEY UPDATE
stock_name = VALUES(stock_name),
ym_2410 = VALUES(ym_2410),
ym_2411 = VALUES(ym_2411),
ym_2412 = VALUES(ym_2412),
ym_2501 = VALUES(ym_2501),
ym_2502 = VALUES(ym_2502),
ym_2503 = VALUES(ym_2503),
ym_2504 = VALUES(ym_2504),
ym_2505 = VALUES(ym_2505),
ym_2506 = VALUES(ym_2506),
ym_2507 = VALUES(ym_2507),
ym_2508 = VALUES(ym_2508),
update_time = CURRENT_TIMESTAMP
"""
db.execute_update(upsert_sql, monthly_data)
logging.info("月度均值计算和保存完成")
return True
except Exception as e:
logging.error(f"计算和保存月度均值失败: {str(e)}")
return False
# 安全转换函数
def safe_float(v) -> Optional[float]:
"""安全转换为float处理N/A和空值"""
try:
return float(v) if pd.notna(v) and str(v).upper() != 'N/A' else None
except (ValueError, TypeError):
return None
def safe_int(v) -> Optional[int]:
"""安全转换为int处理N/A和空值"""
try:
return int(v) if pd.notna(v) and str(v).upper() != 'N/A' else None
except (ValueError, TypeError):
return None
def safe_parse_date(date_str, date_format='%Y-%m-%d'):
"""
安全解析日期字符串
:param date_str: 日期字符串
:param date_format: 日期格式
:return: 解析后的datetime对象或None
"""
if not date_str or pd.isna(date_str) or str(date_str).strip() == '':
return None
try:
return datetime.strptime(str(date_str), date_format)
except ValueError:
logging.warning(f"无法解析日期字符串: {date_str}")
return None
def validate_market_data(dataset: list) -> list:
"""
验证市场数据有效性
Args:
dataset (list): 原始数据集
Returns:
list: 通过验证的数据集
"""
validated_data = []
for item in dataset:
try:
# 必要字段检查
if not item.get('code') or not item.get('name'):
logging.warning(f"跳过无效数据: 缺少必要字段 code或name")
continue
# 筛选股票名称
if item.get('name')[-1] == 'R':
continue
# 数值范围验证
if item.get('lot_size') is not None and item['lot_size'] < 0:
logging.warning(f"股票 {item['code']} 的lot_size为负值: {item['lot_size']}")
item['lot_size'] = None
validated_data.append(item)
except Exception as e:
logging.warning(f"数据验证失败,跳过记录 {item.get('code')}: {str(e)}")
continue
return validated_data
def get_market_data(market: Market) -> List[str]:
"""
从Futu API获取指定市场的股票代码列表
Args:
market (Market): 市场枚举值,如 Market.SH, Market.SZ
Returns:
List[str]: 股票代码列表
"""
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
try:
ret, data = quote_ctx.get_stock_basicinfo(market, SecurityType.STOCK)
if ret == RET_OK:
# 提取code列并转换为列表
codes = data['code'].astype(str).tolist()
logging.info(f"获取到 {market} 市场 {len(codes)} 个股票代码")
return codes
else:
logging.error(f"获取股票代码失败: {data}")
return []
except Exception as e:
logging.error(f"获取股票代码时发生异常: {str(e)}")
return []
finally:
quote_ctx.close()
def get_stock_codes() -> List[str]:
"""从conditionalselection表获取所有股票代码"""
try:
with MySQLHelper(**db_config) as db:
sql = f"SELECT DISTINCT stock_code,stock_name FROM conditionalselection"
results = db.execute_query(sql)
return [
row['stock_code']
for row in results
if row['stock_code'] and (row.get('stock_name', '') and row['stock_name'][-1] != 'R') # 排除 name 以 R 结尾的票
]
except Exception as e:
logging.error(f"获取股票代码失败: {str(e)}")
return []
if __name__ == "__main__":
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('Debug.log', encoding='utf-8'), # 关键在这里
logging.StreamHandler()
]
)
# 数据库配置
db_config = {
'host': 'localhost',
'user': 'root',
'password': 'bzskmysql',
'database': 'klinedata_1d_hk'
}
# market_data = get_market_data(Market.HK)
market_data = get_stock_codes() # 使用按照价格和流通股数量筛选的那个表格
for code in market_data:
tablename = 'hk_' + code[3:]
# 计算并保存月度均值
success = calculate_and_save_monthly_avg(
db_config=db_config,
source_table=tablename,
target_table="hk_monthly_avg_2410_2508"
)
if success:
logging.info("月度均值处理成功完成")
else:
logging.error("处理过程中出现错误")