#!/usr/bin/env python3
"""
源代码清理脚本 - 用于软件著作权申请
功能：移除C++源代码中的注释和空行，生成干净的源代码文档
"""

import os
import re
import shutil
from pathlib import Path

class SourceCodeCleaner:
    def __init__(self, source_dir, output_dir):
        self.source_dir = Path(source_dir)
        self.output_dir = Path(output_dir)
        self.file_extensions = ['.h', '.cpp', '.hpp', '.c', '.cc']
        
    def clean_directory(self):
        """清理整个目录的源代码"""
        if self.output_dir.exists():
            shutil.rmtree(self.output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)
        
        cleaned_files = []
        total_lines = 0
        
        # 遍历源目录
        for root, dirs, files in os.walk(self.source_dir):
            for file in files:
                if any(file.endswith(ext) for ext in self.file_extensions):
                    source_file = Path(root) / file
                    relative_path = source_file.relative_to(self.source_dir)
                    output_file = self.output_dir / relative_path
                    
                    # 创建输出目录
                    output_file.parent.mkdir(parents=True, exist_ok=True)
                    
                    # 清理文件
                    lines_cleaned = self.clean_file(source_file, output_file)
                    total_lines += lines_cleaned
                    cleaned_files.append(str(relative_path))
        
        return cleaned_files, total_lines
    
    def clean_file(self, input_file, output_file):
        """清理单个文件，移除注释和空行"""
        try:
            with open(input_file, 'r', encoding='utf-8', errors='ignore') as f:
                content = f.read()
            
            # 移除多行注释 /* ... */
            content = re.sub(r'/\*.*?\*/', '', content, flags=re.DOTALL)
            
            # 移除单行注释 //
            content = re.sub(r'//.*$', '', content, flags=re.MULTILINE)
            
            # 移除空行
            lines = content.split('\n')
            cleaned_lines = []
            
            for line in lines:
                stripped_line = line.strip()
                if stripped_line:  # 非空行
                    cleaned_lines.append(line)
            
            # 写入清理后的内容
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write('\n'.join(cleaned_lines))
            
            return len(cleaned_lines)
            
        except Exception as e:
            print(f"清理文件 {input_file} 时出错: {e}")
            return 0
    
    def generate_source_document(self, output_file):
        """生成源代码文档"""
        all_cleaned_files = []
        
        for root, dirs, files in os.walk(self.output_dir):
            for file in files:
                if any(file.endswith(ext) for ext in self.file_extensions):
                    file_path = Path(root) / file
                    all_cleaned_files.append(file_path)
        
        # 按文件类型和重要性排序
        all_cleaned_files.sort()
        
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write("# 大单检测软件系统 清理后源代码文档\n\n")
            f.write("## 说明\n")
            f.write("本文档包含软件著作权申请所需的清理后源代码，已移除所有注释和空行。\n\n")
            f.write("## 源代码文件列表\n\n")
            
            for file_path in all_cleaned_files:
                relative_path = file_path.relative_to(self.output_dir)
                f.write(f"- {relative_path}\n")
            
            f.write("\n## 清理后源代码内容\n\n")
            f.write("=" * 80 + "\n\n")
            
            for file_path in all_cleaned_files:
                relative_path = file_path.relative_to(self.output_dir)
                f.write(f"文件: {relative_path}\n")
                f.write("=" * 60 + "\n")
                
                try:
                    with open(file_path, 'r', encoding='utf-8', errors='ignore') as src_file:
                        content = src_file.read()
                    
                    # 写入文件内容，每行添加行号
                    lines = content.split('\n')
                    for i, line in enumerate(lines, 1):
                        f.write(f"{i:4d}: {line}\n")
                    
                    f.write("\n" + "=" * 60 + "\n\n")
                    
                except Exception as e:
                    f.write(f"读取文件时出错: {e}\n\n")

def main():
    # # 配置路径
    # source_dirs = [
    #     "QMainwindow",
    #     "Sqbase", 
    #     "FTAPI",
    #     "common_structures",
    #     "data_processing",
    #     "network_communication",
    #     "core"
    # ]
    # 配置路径
    source_dirs = [
        "QMainwindow",
        "Sqbase", 
        "common_structures",
        "data_processing",
        "network_communication",
        "core"
    ]
    
    output_base_dir = "cleaned_source_code"
    source_document_file = "软著申请材料/清理后源代码文档.md"
    
    print("开始清理源代码...")
    
    all_cleaned_files = []
    total_lines = 0
    
    for source_dir in source_dirs:
        if os.path.exists(source_dir):
            print(f"清理目录: {source_dir}")
            output_dir = os.path.join(output_base_dir, source_dir)
            cleaner = SourceCodeCleaner(source_dir, output_dir)
            cleaned_files, lines = cleaner.clean_directory()
            all_cleaned_files.extend(cleaned_files)
            total_lines += lines
            print(f"  已清理 {len(cleaned_files)} 个文件，共 {lines} 行代码")
        else:
            print(f"警告: 目录 {source_dir} 不存在")
    
    # 生成源代码文档
    print("生成源代码文档...")
    cleaner = SourceCodeCleaner(output_base_dir, output_base_dir)  # 使用自身作为源
    cleaner.generate_source_document(source_document_file)
    
    print(f"\n清理完成!")
    print(f"总文件数: {len(all_cleaned_files)}")
    print(f"总代码行数: {total_lines}")
    print(f"源代码文档已生成: {source_document_file}")
    print(f"清理后的源代码保存在: {output_base_dir}")

if __name__ == "__main__":
    main()