Files
HKDataManagment/PyCode/ExportData.py
2025-08-15 13:22:58 +08:00

1142 lines
41 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# # # import csv
# # # import pandas as pd
# # # from MySQLHelper import MySQLHelper
# # # import logging
# # # from typing import List, Dict, Optional
# # # from datetime import datetime
# # # def get_monthly_avg_data(db_config: dict, table_name: str) -> Optional[List[Dict]]:
# # # """
# # # 从数据库读取月度均值数据
# # # Args:
# # # db_config: 数据库配置
# # # table_name: 源数据表名
# # # Returns:
# # # List[Dict]: 查询结果数据集失败返回None
# # # """
# # # try:
# # # with MySQLHelper(**db_config) as db:
# # # # 获取表结构信息
# # # columns = db.execute_query(f"""
# # # SELECT COLUMN_NAME
# # # FROM INFORMATION_SCHEMA.COLUMNS
# # # WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s
# # # ORDER BY ORDINAL_POSITION
# # # """, (db_config['database'], table_name))
# # # if not columns:
# # # logging.error(f"表 {table_name} 不存在或没有列")
# # # return None
# # # # 获取列名列表(排除id和update_time)
# # # field_names = [col['COLUMN_NAME'] for col in columns
# # # if col['COLUMN_NAME'] not in ('id', 'update_time')]
# # # # 查询数据
# # # data = db.execute_query(f"""
# # # SELECT {', '.join(field_names)}
# # # FROM {table_name}
# # # ORDER BY stock_code
# # # """)
# # # if not data:
# # # logging.error(f"表 {table_name} 中没有数据")
# # # return None
# # # return data
# # # except Exception as e:
# # # logging.error(f"从数据库读取数据失败: {str(e)}")
# # # return None
# # # def export_to_csv(data: List[Dict], output_file: str) -> bool:
# # # """
# # # 将数据导出到CSV文件
# # # Args:
# # # data: 要导出的数据集
# # # output_file: 输出的CSV文件路径
# # # Returns:
# # # bool: 是否导出成功
# # # """
# # # if not data:
# # # return False
# # # try:
# # # # 获取字段名(使用第一个数据的键)
# # # field_names = list(data[0].keys())
# # # # 字段名到中文的映射
# # # header_map = {
# # # 'stock_code': '股票代码',
# # # 'stock_name': '股票名称',
# # # 'ym_2410': '2024年10月均收盘价',
# # # 'ym_2411': '2024年11月均收盘价',
# # # 'ym_2412': '2024年12月均收盘价',
# # # 'ym_2501': '2025年1月均收盘价',
# # # 'ym_2502': '2025年2月均收盘价',
# # # 'ym_2503': '2025年3月均收盘价',
# # # 'ym_2504': '2025年4月均收盘价',
# # # 'ym_2505': '2025年5月均收盘价',
# # # 'ym_2506': '2025年6月均收盘价',
# # # 'ym_2507': '2025年7月均收盘价',
# # # 'ym_2508': '2025年8月均收盘价'
# # # }
# # # with open(output_file, mode='w', newline='', encoding='utf-8-sig') as csvfile:
# # # writer = csv.DictWriter(csvfile, fieldnames=field_names)
# # # # 写入中文表头
# # # writer.writerow({col: header_map.get(col, col) for col in field_names})
# # # # 写入数据
# # # writer.writerows(data)
# # # logging.info(f"成功导出 {len(data)} 条记录到CSV文件: {output_file}")
# # # return True
# # # except Exception as e:
# # # logging.error(f"导出到CSV失败: {str(e)}")
# # # return False
# # # def export_to_excel(data: List[Dict], output_file: str) -> bool:
# # # """
# # # 将数据导出为Excel文件(包含多个工作表)
# # # Args:
# # # data: 要导出的数据集
# # # output_file: 输出的Excel文件路径
# # # Returns:
# # # bool: 是否导出成功
# # # """
# # # if not data:
# # # return False
# # # try:
# # # # 转换为DataFrame
# # # df = pd.DataFrame(data)
# # # # 设置股票代码为索引
# # # if 'stock_code' in df.columns:
# # # df.set_index('stock_code', inplace=True)
# # # # 创建Excel writer对象
# # # with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
# # # # 1. 原始数据工作表
# # # df.to_excel(writer, sheet_name='原始数据')
# # # # 2. 统计信息工作表(仅当有数值列时)
# # # numeric_cols = [col for col in df.columns if col.startswith('ym_') and pd.api.types.is_numeric_dtype(df[col])]
# # # if numeric_cols:
# # # try:
# # # stats = df[numeric_cols].describe().loc[['mean', 'min', 'max', 'std']]
# # # stats.to_excel(writer, sheet_name='统计信息')
# # # except KeyError:
# # # logging.warning("无法生成完整的统计信息,数据可能不足")
# # # # 生成简化版统计信息
# # # stats = df[numeric_cols].agg(['mean', 'min', 'max', 'std'])
# # # stats.to_excel(writer, sheet_name='统计信息')
# # # # 3. 涨幅排名工作表(需要至少两个月份数据)
# # # if len(numeric_cols) >= 2:
# # # first_month = numeric_cols[0]
# # # last_month = numeric_cols[-1]
# # # try:
# # # df['涨幅(%)'] = (df[last_month] - df[first_month]) / df[first_month] * 100
# # # result_df = df[['stock_name', '涨幅(%)']].copy()
# # # result_df.dropna(subset=['涨幅(%)'], inplace=True)
# # # result_df.sort_values('涨幅(%)', ascending=False, inplace=True)
# # # result_df.to_excel(writer, sheet_name='涨幅排名')
# # # except Exception as e:
# # # logging.warning(f"无法计算涨幅: {str(e)}")
# # # # 4. 月度趋势工作表
# # # if numeric_cols:
# # # try:
# # # trend_df = df[numeric_cols].transpose()
# # # trend_df.index = [col.replace('ym_', '') for col in numeric_cols]
# # # trend_df.to_excel(writer, sheet_name='月度趋势')
# # # except Exception as e:
# # # logging.warning(f"无法生成月度趋势: {str(e)}")
# # # logging.info(f"成功导出Excel文件: {output_file}")
# # # return True
# # # except Exception as e:
# # # logging.error(f"导出Excel失败: {str(e)}")
# # # return False
# # # def export_monthly_avg_data(db_config: dict,
# # # table_name: str,
# # # csv_file: str = None,
# # # excel_file: str = None) -> bool:
# # # """
# # # 导出月度均值数据到CSV和/或Excel
# # # Args:
# # # db_config: 数据库配置
# # # table_name: 源数据表名
# # # csv_file: CSV输出路径(可选)
# # # excel_file: Excel输出路径(可选)
# # # Returns:
# # # bool: 是否至少有一种格式导出成功
# # # """
# # # if not csv_file and not excel_file:
# # # logging.error("必须指定至少一种输出格式")
# # # return False
# # # # 从数据库获取数据
# # # data = get_monthly_avg_data(db_config, table_name)
# # # if not data:
# # # return False
# # # # 导出结果
# # # csv_success = True
# # # excel_success = True
# # # if csv_file:
# # # csv_success = export_to_csv(data, csv_file)
# # # if excel_file:
# # # excel_success = export_to_excel(data, excel_file)
# # # return csv_success or excel_success
# # # # 使用示例
# # # if __name__ == "__main__":
# # # # 配置日志
# # # logging.basicConfig(
# # # level=logging.INFO,
# # # format='%(asctime)s - %(levelname)s - %(message)s',
# # # handlers=[
# # # logging.FileHandler('Debug.log', encoding='utf-8'), # 关键在这里
# # # logging.StreamHandler()
# # # ]
# # # )
# # # # 数据库配置
# # # db_config = {
# # # 'host': 'localhost',
# # # 'user': 'root',
# # # 'password': 'bzskmysql',
# # # 'database': 'klinedata_1d_hk'
# # # }
# # # # 导出数据
# # # success = export_monthly_avg_data(
# # # db_config=db_config,
# # # table_name="hk_monthly_avg_2410_2508", # 你实际使用的表名
# # # csv_file="hk_stocks_monthly_avg_202410-202508.csv", # CSV输出文件
# # # excel_file="hk_stocks_monthly_avg_202410-202508.xlsx" # Excel输出文件
# # # )
# # # if success:
# # # logging.info("数据导出成功完成")
# # # else:
# # # logging.error("数据导出过程中出现错误")
# # import csv
# # import pandas as pd
# # from MySQLHelper import MySQLHelper
# # import logging
# # from typing import List, Dict, Optional
# # from datetime import datetime
# # def get_monthly_avg_data(db_config: dict, table_name: str) -> Optional[List[Dict]]:
# # """
# # 从数据库读取月度均值数据
# # Args:
# # db_config: 数据库配置
# # table_name: 源数据表名
# # Returns:
# # List[Dict]: 查询结果数据集失败返回None
# # """
# # try:
# # with MySQLHelper(**db_config) as db:
# # # 获取表结构信息
# # columns = db.execute_query(f"""
# # SELECT COLUMN_NAME
# # FROM INFORMATION_SCHEMA.COLUMNS
# # WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s
# # ORDER BY ORDINAL_POSITION
# # """, (db_config['database'], table_name))
# # if not columns:
# # logging.error(f"表 {table_name} 不存在或没有列")
# # return None
# # # 获取列名列表(排除id和update_time)
# # field_names = [col['COLUMN_NAME'] for col in columns
# # if col['COLUMN_NAME'] not in ('id', 'update_time')]
# # # 查询数据
# # data = db.execute_query(f"""
# # SELECT {', '.join(field_names)}
# # FROM {table_name}
# # ORDER BY stock_code
# # """)
# # if not data:
# # logging.error(f"表 {table_name} 中没有数据")
# # return None
# # return data
# # except Exception as e:
# # logging.error(f"从数据库读取月度均值数据失败: {str(e)}")
# # return None
# # def get_float_share_data(db_config: dict, table_name: str) -> Optional[List[Dict]]:
# # """
# # 从conditionalselection表读取流通股本数据
# # Args:
# # db_config: 数据库配置
# # table_name: 源数据表名
# # Returns:
# # List[Dict]: 查询结果数据集失败返回None
# # """
# # try:
# # with MySQLHelper(**db_config) as db:
# # # 查询流通股本数据
# # data = db.execute_query(f"""
# # SELECT stock_code , stock_name , float_share
# # FROM {table_name}
# # ORDER BY stock_code
# # """)
# # if not data:
# # logging.error(f"表 {table_name} 中没有流通股本数据")
# # return None
# # return data
# # except Exception as e:
# # logging.error(f"从数据库读取流通股本数据失败: {str(e)}")
# # return None
# # def export_to_csv(data: List[Dict], output_file: str, data_type: str = 'monthly_avg') -> bool:
# # """
# # 将数据导出到CSV文件
# # Args:
# # data: 要导出的数据集
# # output_file: 输出的CSV文件路径
# # data_type: 数据类型('monthly_avg'或'float_share')
# # Returns:
# # bool: 是否导出成功
# # """
# # if not data:
# # return False
# # try:
# # # 获取字段名(使用第一个数据的键)
# # field_names = list(data[0].keys())
# # # 字段名到中文的映射
# # header_map = {
# # 'stock_code': '股票代码',
# # 'stock_name': '股票名称',
# # 'float_share': '流通股本(千股)',
# # 'ym_2410': '2024年10月均收盘价',
# # 'ym_2411': '2024年11月均收盘价',
# # 'ym_2412': '2024年12月均收盘价',
# # 'ym_2501': '2025年1月均收盘价',
# # 'ym_2502': '2025年2月均收盘价',
# # 'ym_2503': '2025年3月均收盘价',
# # 'ym_2504': '2025年4月均收盘价',
# # 'ym_2505': '2025年5月均收盘价',
# # 'ym_2506': '2025年6月均收盘价',
# # 'ym_2507': '2025年7月均收盘价',
# # 'ym_2508': '2025年8月均收盘价'
# # }
# # with open(output_file, mode='w', newline='', encoding='utf-8-sig') as csvfile:
# # writer = csv.DictWriter(csvfile, fieldnames=field_names)
# # # 写入中文表头
# # writer.writerow({col: header_map.get(col, col) for col in field_names})
# # # 写入数据
# # writer.writerows(data)
# # logging.info(f"成功导出 {len(data)} 条记录到CSV文件: {output_file}")
# # return True
# # except Exception as e:
# # logging.error(f"导出到CSV失败: {str(e)}")
# # return False
# # def export_to_excel(data: List[Dict], output_file: str, data_type: str = 'monthly_avg') -> bool:
# # """
# # 将数据导出为Excel文件(包含多个工作表)
# # Args:
# # data: 要导出的数据集
# # output_file: 输出的Excel文件路径
# # data_type: 数据类型('monthly_avg'或'float_share')
# # Returns:
# # bool: 是否导出成功
# # """
# # if not data:
# # return False
# # try:
# # # 转换为DataFrame
# # df = pd.DataFrame(data)
# # # 设置股票代码为索引
# # if 'stock_code' in df.columns:
# # df.set_index('stock_code', inplace=True)
# # # 创建Excel writer对象
# # with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
# # # 1. 原始数据工作表
# # sheet_name = '流通股本数据' if data_type == 'float_share' else '原始数据'
# # df.to_excel(writer, sheet_name=sheet_name)
# # # 对于月度均价数据,添加额外的工作表
# # if data_type == 'monthly_avg':
# # # 2. 统计信息工作表(仅当有数值列时)
# # numeric_cols = [col for col in df.columns if col.startswith('ym_') and pd.api.types.is_numeric_dtype(df[col])]
# # if numeric_cols:
# # try:
# # stats = df[numeric_cols].describe().loc[['mean', 'min', 'max', 'std']]
# # stats.to_excel(writer, sheet_name='统计信息')
# # except KeyError:
# # logging.warning("无法生成完整的统计信息,数据可能不足")
# # # 生成简化版统计信息
# # stats = df[numeric_cols].agg(['mean', 'min', 'max', 'std'])
# # stats.to_excel(writer, sheet_name='统计信息')
# # # 3. 涨幅排名工作表(需要至少两个月份数据)
# # if len(numeric_cols) >= 2:
# # first_month = numeric_cols[0]
# # last_month = numeric_cols[-1]
# # try:
# # df['涨幅(%)'] = (df[last_month] - df[first_month]) / df[first_month] * 100
# # result_df = df[['stock_name', '涨幅(%)']].copy()
# # result_df.dropna(subset=['涨幅(%)'], inplace=True)
# # result_df.sort_values('涨幅(%)', ascending=False, inplace=True)
# # result_df.to_excel(writer, sheet_name='涨幅排名')
# # except Exception as e:
# # logging.warning(f"无法计算涨幅: {str(e)}")
# # # 4. 月度趋势工作表
# # if numeric_cols:
# # try:
# # trend_df = df[numeric_cols].transpose()
# # trend_df.index = [col.replace('ym_', '') for col in numeric_cols]
# # trend_df.to_excel(writer, sheet_name='月度趋势')
# # except Exception as e:
# # logging.warning(f"无法生成月度趋势: {str(e)}")
# # logging.info(f"成功导出Excel文件: {output_file}")
# # return True
# # except Exception as e:
# # logging.error(f"导出Excel失败: {str(e)}")
# # return False
# # def export_data(db_config: dict,
# # table_name: str,
# # data_type: str = 'monthly_avg',
# # csv_file: str = None,
# # excel_file: str = None) -> bool:
# # """
# # 导出数据到CSV和/或Excel
# # Args:
# # db_config: 数据库配置
# # table_name: 源数据表名
# # data_type: 数据类型('monthly_avg'或'float_share')
# # csv_file: CSV输出路径(可选)
# # excel_file: Excel输出路径(可选)
# # Returns:
# # bool: 是否至少有一种格式导出成功
# # """
# # if not csv_file and not excel_file:
# # logging.error("必须指定至少一种输出格式")
# # return False
# # # 从数据库获取数据
# # if data_type == 'float_share':
# # data = get_float_share_data(db_config, table_name)
# # else:
# # data = get_monthly_avg_data(db_config, table_name)
# # if not data:
# # return False
# # # 导出结果
# # csv_success = True
# # excel_success = True
# # if csv_file:
# # csv_success = export_to_csv(data, csv_file, data_type)
# # if excel_file:
# # excel_success = export_to_excel(data, excel_file, data_type)
# # return csv_success or excel_success
# # # 使用示例
# # if __name__ == "__main__":
# # # 配置日志
# # logging.basicConfig(
# # level=logging.INFO,
# # format='%(asctime)s - %(levelname)s - %(message)s',
# # handlers=[
# # logging.FileHandler('export_data.log', encoding='utf-8'),
# # logging.StreamHandler()
# # ]
# # )
# # # 数据库配置
# # db_config = {
# # 'host': 'localhost',
# # 'user': 'root',
# # 'password': 'bzskmysql',
# # 'database': 'klinedata_1d_hk'
# # }
# # # 导出月度均价数据
# # monthly_success = export_data(
# # db_config=db_config,
# # table_name="hk_monthly_avg_2410_2508",
# # data_type='monthly_avg',
# # csv_file="hk_stocks_monthly_avg_202410-202508.csv",
# # excel_file="hk_stocks_monthly_avg_202410-202508.xlsx"
# # )
# # # 导出流通股本数据
# # float_share_success = export_data(
# # db_config=db_config,
# # table_name="conditionalselection",
# # data_type='float_share',
# # csv_file="hk_stocks_float_share.csv",
# # excel_file="hk_stocks_float_share.xlsx"
# # )
# # if monthly_success and float_share_success:
# # logging.info("所有数据导出成功完成")
# # else:
# # logging.error("数据导出过程中出现错误")
import csv
import pandas as pd
from MySQLHelper import MySQLHelper
import logging
from typing import List, Dict, Optional, Tuple
from datetime import datetime
def get_monthly_avg_data(db_config: dict, table_name: str) -> Optional[List[Dict]]:
"""
从数据库读取月度均值数据
Args:
db_config: 数据库配置
table_name: 源数据表名
Returns:
List[Dict]: 查询结果数据集失败返回None
"""
try:
with MySQLHelper(**db_config) as db:
# 获取表结构信息
columns = db.execute_query(f"""
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s
ORDER BY ORDINAL_POSITION
""", (db_config['database'], table_name))
if not columns:
logging.error(f"{table_name} 不存在或没有列")
return None
# 获取列名列表(排除id和update_time)
field_names = [col['COLUMN_NAME'] for col in columns
if col['COLUMN_NAME'] not in ('id', 'update_time')]
# 查询数据
data = db.execute_query(f"""
SELECT {', '.join(field_names)}
FROM {table_name}
ORDER BY stock_code
""")
if not data:
logging.error(f"{table_name} 中没有数据")
return None
return data
except Exception as e:
logging.error(f"从数据库读取月度均值数据失败: {str(e)}")
return None
def get_float_share_data(db_config: dict, table_name: str) -> Optional[List[Dict]]:
"""
从conditionalselection表读取流通股本数据
Args:
db_config: 数据库配置
table_name: 源数据表名
Returns:
List[Dict]: 查询结果数据集失败返回None
"""
try:
with MySQLHelper(**db_config) as db:
# 查询流通股本数据
data = db.execute_query(f"""
SELECT stock_code, stock_name, float_share
FROM {table_name}
ORDER BY stock_code
""")
if not data:
logging.error(f"{table_name} 中没有流通股本数据")
return None
return data
except Exception as e:
logging.error(f"从数据库读取流通股本数据失败: {str(e)}")
return None
def merge_data(monthly_data: List[Dict], float_share_data: List[Dict]) -> List[Dict]:
"""
合并月度均价数据和流通股本数据
Args:
monthly_data: 月度均价数据
float_share_data: 流通股本数据
Returns:
List[Dict]: 合并后的数据集
"""
merged_data = []
float_share_dict = {item['stock_code']: item['float_share'] for item in float_share_data}
for item in monthly_data:
merged_item = item.copy()
merged_item['float_share'] = float_share_dict.get(item['stock_code'], 'N/A')
merged_data.append(merged_item)
return merged_data
def export_to_csv(data: List[Dict], output_file: str) -> bool:
"""
将合并后的数据导出到CSV文件
Args:
data: 要导出的数据集
output_file: 输出的CSV文件路径
Returns:
bool: 是否导出成功
"""
if not data:
return False
try:
# 获取字段名(使用第一个数据的键)
field_names = list(data[0].keys())
# 字段名到中文的映射
header_map = {
'stock_code': '股票代码',
'stock_name': '股票名称',
'float_share': '流通股本(千股)',
'ym_2410': '2024年10月均收盘价',
'ym_2411': '2024年11月均收盘价',
'ym_2412': '2024年12月均收盘价',
'ym_2501': '2025年1月均收盘价',
'ym_2502': '2025年2月均收盘价',
'ym_2503': '2025年3月均收盘价',
'ym_2504': '2025年4月均收盘价',
'ym_2505': '2025年5月均收盘价',
'ym_2506': '2025年6月均收盘价',
'ym_2507': '2025年7月均收盘价',
'ym_2508': '2025年8月均收盘价'
}
with open(output_file, mode='w', newline='', encoding='utf-8-sig') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=field_names)
# 写入中文表头
writer.writerow({col: header_map.get(col, col) for col in field_names})
# 写入数据
writer.writerows(data)
logging.info(f"成功导出 {len(data)} 条记录到CSV文件: {output_file}")
return True
except Exception as e:
logging.error(f"导出到CSV失败: {str(e)}")
return False
def export_to_excel(data: List[Dict], output_file: str) -> bool:
"""
将合并后的数据导出为Excel文件(包含多个工作表)
Args:
data: 要导出的数据集
output_file: 输出的Excel文件路径
Returns:
bool: 是否导出成功
"""
if not data:
return False
try:
# 转换为DataFrame
df = pd.DataFrame(data)
# 设置股票代码为索引
if 'stock_code' in df.columns:
df.set_index('stock_code', inplace=True)
# 创建Excel writer对象
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
# 1. 原始数据工作表
df.to_excel(writer, sheet_name='合并数据')
# 2. 统计信息工作表(仅当有数值列时)
numeric_cols = [col for col in df.columns if col.startswith('ym_') and pd.api.types.is_numeric_dtype(df[col])]
if numeric_cols:
try:
stats = df[numeric_cols].describe().loc[['mean', 'min', 'max', 'std']]
stats.to_excel(writer, sheet_name='统计信息')
except KeyError:
logging.warning("无法生成完整的统计信息,数据可能不足")
# 生成简化版统计信息
stats = df[numeric_cols].agg(['mean', 'min', 'max', 'std'])
stats.to_excel(writer, sheet_name='统计信息')
# 3. 涨幅排名工作表(需要至少两个月份数据)
if len(numeric_cols) >= 2:
first_month = numeric_cols[0]
last_month = numeric_cols[-1]
try:
df['涨幅(%)'] = (df[last_month] - df[first_month]) / df[first_month] * 100
result_df = df[['stock_name', '涨幅(%)', 'float_share']].copy()
result_df.dropna(subset=['涨幅(%)'], inplace=True)
result_df.sort_values('涨幅(%)', ascending=False, inplace=True)
result_df.to_excel(writer, sheet_name='涨幅排名')
except Exception as e:
logging.warning(f"无法计算涨幅: {str(e)}")
# 4. 月度趋势工作表
if numeric_cols:
try:
trend_df = df[numeric_cols].transpose()
trend_df.index = [col.replace('ym_', '') for col in numeric_cols]
trend_df.to_excel(writer, sheet_name='月度趋势')
except Exception as e:
logging.warning(f"无法生成月度趋势: {str(e)}")
# 5. 流通股本分析工作表
if 'float_share' in df.columns and pd.api.types.is_numeric_dtype(df['float_share']):
try:
float_stats = df['float_share'].describe().to_frame().T
float_stats.to_excel(writer, sheet_name='流通股本分析')
except Exception as e:
logging.warning(f"无法生成流通股本分析: {str(e)}")
logging.info(f"成功导出Excel文件: {output_file}")
return True
except Exception as e:
logging.error(f"导出Excel失败: {str(e)}")
return False
def export_combined_data(db_config: dict,
monthly_table: str,
float_share_table: str,
csv_file: str = None,
excel_file: str = None) -> bool:
"""
导出合并后的数据到CSV和/或Excel
Args:
db_config: 数据库配置
monthly_table: 月度均价表名
float_share_table: 流通股本表名
csv_file: CSV输出路径(可选)
excel_file: Excel输出路径(可选)
Returns:
bool: 是否至少有一种格式导出成功
"""
if not csv_file and not excel_file:
logging.error("必须指定至少一种输出格式")
return False
# 从数据库获取数据
monthly_data = get_monthly_avg_data(db_config, monthly_table)
if not monthly_data:
logging.error("无法获取月度均价数据")
return False
float_share_data = get_float_share_data(db_config, float_share_table)
if not float_share_data:
logging.error("无法获取流通股本数据")
return False
# 合并数据
merged_data = merge_data(monthly_data, float_share_data)
# 导出结果
csv_success = True
excel_success = True
if csv_file:
csv_success = export_to_csv(merged_data, csv_file)
if excel_file:
excel_success = export_to_excel(merged_data, excel_file)
return csv_success or excel_success
# 使用示例
if __name__ == "__main__":
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('export_combined_data.log', encoding='utf-8'),
logging.StreamHandler()
]
)
# 数据库配置
db_config = {
'host': 'localhost',
'user': 'root',
'password': 'bzskmysql',
'database': 'klinedata_1d_hk'
}
# 导出合并数据
success = export_combined_data(
db_config=db_config,
monthly_table="hk_monthly_avg_2410_2508",
float_share_table="conditionalselection",
csv_file="hk_stocks_combined_data.csv",
excel_file="hk_stocks_combined_data.xlsx"
)
if success:
logging.info("数据合并导出成功完成")
else:
logging.error("数据合并导出过程中出现错误")
# import csv
# import pandas as pd
# from MySQLHelper import MySQLHelper
# import logging
# from typing import List, Dict, Optional
# # 配置日志
# logging.basicConfig(
# level=logging.INFO,
# format='%(asctime)s - %(levelname)s - %(message)s',
# handlers=[
# logging.FileHandler('stock_data_export.log', encoding='utf-8'),
# logging.StreamHandler()
# ]
# )
# def get_monthly_avg_data(db_config: dict, table_name: str) -> Optional[List[Dict]]:
# """
# 从数据库读取月度均价数据
# Args:
# db_config: 数据库配置
# table_name: 源数据表名
# Returns:
# List[Dict]: 查询结果数据集失败返回None
# """
# try:
# with MySQLHelper(**db_config) as db:
# # 获取表结构信息
# columns = db.execute_query(f"""
# SELECT COLUMN_NAME
# FROM INFORMATION_SCHEMA.COLUMNS
# WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s
# ORDER BY ORDINAL_POSITION
# """, (db_config['database'], table_name))
# if not columns:
# logging.error(f"表 {table_name} 不存在或没有列")
# return None
# # 获取列名列表(排除id和update_time)
# field_names = [col['COLUMN_NAME'] for col in columns
# if col['COLUMN_NAME'] not in ('id', 'update_time')]
# # 查询数据
# data = db.execute_query(f"""
# SELECT {', '.join(field_names)}
# FROM {table_name}
# ORDER BY stock_code
# """)
# if not data:
# logging.error(f"表 {table_name} 中没有数据")
# return None
# return data
# except Exception as e:
# logging.error(f"从数据库读取月度均价数据失败: {str(e)}")
# return None
# def get_float_share_data(db_config: dict, table_name: str) -> Optional[List[Dict]]:
# """
# 从conditionalselection表读取流通股本数据
# Args:
# db_config: 数据库配置
# table_name: 源数据表名
# Returns:
# List[Dict]: 查询结果数据集失败返回None
# """
# try:
# with MySQLHelper(**db_config) as db:
# # 查询流通股本数据
# data = db.execute_query(f"""
# SELECT stock_code, stock_name, float_share
# FROM {table_name}
# ORDER BY stock_code
# """)
# if not data:
# logging.error(f"表 {table_name} 中没有流通股本数据")
# return None
# return data
# except Exception as e:
# logging.error(f"从数据库读取流通股本数据失败: {str(e)}")
# return None
# def calculate_adjusted_prices(monthly_data: List[Dict], float_share_data: List[Dict]) -> List[Dict]:
# """
# 计算股本调整后价格(股价×流通股本)
# Args:
# monthly_data: 月度均价数据
# float_share_data: 流通股本数据
# Returns:
# List[Dict]: 包含调整后价格的数据集
# """
# adjusted_data = []
# float_shares = {item['stock_code']: item['float_share'] for item in float_share_data}
# for stock in monthly_data:
# adjusted_stock = stock.copy()
# stock_code = stock['stock_code']
# float_share = float_shares.get(stock_code)
# if float_share is None or not isinstance(float_share, (int, float)):
# logging.warning(f"股票 {stock_code} 缺少流通股本数据,跳过调整")
# adjusted_data.append(adjusted_stock)
# continue
# # 对每个月的价格乘以流通股本
# nIndex = 0
# for key in stock.keys():
# nIndex = nIndex + 1
# # if key.startswith('ym_') and isinstance(stock[key], (int, float)):
# if nIndex > 2:
# adjusted_stock[key] = stock[key] * float_share
# adjusted_data.append(adjusted_stock)
# return adjusted_data
# def export_to_csv(data: List[Dict], output_file: str) -> bool:
# """
# 将数据导出到CSV文件
# Args:
# data: 要导出的数据集
# output_file: 输出的CSV文件路径
# Returns:
# bool: 是否导出成功
# """
# if not data:
# logging.error("没有数据可导出")
# return False
# try:
# field_names = list(data[0].keys())
# # 字段名到中文的映射
# header_map = {
# 'stock_code': '股票代码',
# 'stock_name': '股票名称',
# 'ym_2410': '2024年10月',
# 'ym_2411': '2024年11月',
# 'ym_2412': '2024年12月',
# 'ym_2501': '2025年1月',
# 'ym_2502': '2025年2月',
# 'ym_2503': '2025年3月',
# 'ym_2504': '2025年4月',
# 'ym_2505': '2025年5月',
# 'ym_2506': '2025年6月',
# 'ym_2507': '2025年7月',
# 'ym_2508': '2025年8月'
# }
# with open(output_file, mode='w', newline='', encoding='utf-8-sig') as csvfile:
# writer = csv.DictWriter(csvfile, fieldnames=field_names)
# # 写入中文表头
# writer.writerow({col: header_map.get(col, col) for col in field_names})
# # 写入数据
# writer.writerows(data)
# logging.info(f"成功导出 {len(data)} 条记录到CSV文件: {output_file}")
# return True
# except Exception as e:
# logging.error(f"导出到CSV失败: {str(e)}")
# return False
# def export_to_excel(data: List[Dict], output_file: str) -> bool:
# """
# 将数据导出为Excel文件
# Args:
# data: 要导出的数据集
# output_file: 输出的Excel文件路径
# Returns:
# bool: 是否导出成功
# """
# if not data:
# logging.error("没有数据可导出")
# return False
# try:
# df = pd.DataFrame(data)
# # 设置股票代码为索引
# if 'stock_code' in df.columns:
# df.set_index('stock_code', inplace=True)
# # 创建Excel writer对象
# with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
# # 原始数据工作表
# df.to_excel(writer, sheet_name='调整后价格')
# # 统计信息工作表
# numeric_cols = [col for col in df.columns if col.startswith('ym_') and pd.api.types.is_numeric_dtype(df[col])]
# if numeric_cols:
# stats = df[numeric_cols].describe().loc[['mean', 'min', 'max', 'std']]
# stats.to_excel(writer, sheet_name='统计信息')
# # 涨幅排名工作表
# if len(numeric_cols) >= 2:
# first_month = numeric_cols[0]
# last_month = numeric_cols[-1]
# df['涨幅(%)'] = (df[last_month] - df[first_month]) / df[first_month] * 100
# result_df = df[['stock_name', '涨幅(%)']].copy()
# result_df.dropna(subset=['涨幅(%)'], inplace=True)
# result_df.sort_values('涨幅(%)', ascending=False, inplace=True)
# result_df.to_excel(writer, sheet_name='涨幅排名')
# # 月度趋势工作表
# trend_df = df[numeric_cols].transpose()
# trend_df.index = [col.replace('ym_', '') for col in numeric_cols]
# trend_df.to_excel(writer, sheet_name='月度趋势')
# logging.info(f"成功导出Excel文件: {output_file}")
# return True
# except Exception as e:
# logging.error(f"导出Excel失败: {str(e)}")
# return False
# def export_adjusted_prices(db_config: dict,
# monthly_table: str,
# float_share_table: str,
# csv_file: str = None,
# excel_file: str = None) -> bool:
# """
# 导出股本调整后的价格数据
# Args:
# db_config: 数据库配置
# monthly_table: 月度均价表名
# float_share_table: 流通股本表名
# csv_file: CSV输出路径(可选)
# excel_file: Excel输出路径(可选)
# Returns:
# bool: 是否至少有一种格式导出成功
# """
# # 获取数据
# monthly_data = get_monthly_avg_data(db_config, monthly_table)
# if not monthly_data:
# return False
# float_share_data = get_float_share_data(db_config, float_share_table)
# if not float_share_data:
# return False
# # 计算调整后价格
# adjusted_data = calculate_adjusted_prices(monthly_data, float_share_data)
# # 导出结果
# results = []
# if csv_file:
# results.append(export_to_csv(adjusted_data, csv_file))
# if excel_file:
# results.append(export_to_excel(adjusted_data, excel_file))
# return any(results)
# if __name__ == "__main__":
# # 数据库配置
# db_config = {
# 'host': 'localhost',
# 'user': 'root',
# 'password': 'bzskmysql',
# 'database': 'klinedata_1d_hk'
# }
# # 导出数据
# success = export_adjusted_prices(
# db_config=db_config,
# monthly_table="hk_monthly_avg_2410_2508",
# float_share_table="conditionalselection",
# csv_file="adjusted_stock_prices.csv",
# excel_file="adjusted_stock_prices.xlsx"
# )
# if success:
# logging.info("数据导出成功完成")
# else:
# logging.error("数据导出过程中出现错误")