#!/usr/bin/env python3 """ 源代码清理脚本 - 用于软件著作权申请 功能:移除C++源代码中的注释和空行,生成干净的源代码文档 """ import os import re import shutil from pathlib import Path class SourceCodeCleaner: def __init__(self, source_dir, output_dir): self.source_dir = Path(source_dir) self.output_dir = Path(output_dir) self.file_extensions = ['.h', '.cpp', '.hpp', '.c', '.cc'] def clean_directory(self): """清理整个目录的源代码""" if self.output_dir.exists(): shutil.rmtree(self.output_dir) self.output_dir.mkdir(parents=True, exist_ok=True) cleaned_files = [] total_lines = 0 # 遍历源目录 for root, dirs, files in os.walk(self.source_dir): for file in files: if any(file.endswith(ext) for ext in self.file_extensions): source_file = Path(root) / file relative_path = source_file.relative_to(self.source_dir) output_file = self.output_dir / relative_path # 创建输出目录 output_file.parent.mkdir(parents=True, exist_ok=True) # 清理文件 lines_cleaned = self.clean_file(source_file, output_file) total_lines += lines_cleaned cleaned_files.append(str(relative_path)) return cleaned_files, total_lines def clean_file(self, input_file, output_file): """清理单个文件,移除注释和空行""" try: with open(input_file, 'r', encoding='utf-8', errors='ignore') as f: content = f.read() # 移除多行注释 /* ... */ content = re.sub(r'/\*.*?\*/', '', content, flags=re.DOTALL) # 移除单行注释 // content = re.sub(r'//.*$', '', content, flags=re.MULTILINE) # 移除空行 lines = content.split('\n') cleaned_lines = [] for line in lines: stripped_line = line.strip() if stripped_line: # 非空行 cleaned_lines.append(line) # 写入清理后的内容 with open(output_file, 'w', encoding='utf-8') as f: f.write('\n'.join(cleaned_lines)) return len(cleaned_lines) except Exception as e: print(f"清理文件 {input_file} 时出错: {e}") return 0 def generate_source_document(self, output_file): """生成源代码文档""" all_cleaned_files = [] for root, dirs, files in os.walk(self.output_dir): for file in files: if any(file.endswith(ext) for ext in self.file_extensions): file_path = Path(root) / file all_cleaned_files.append(file_path) # 按文件类型和重要性排序 all_cleaned_files.sort() with open(output_file, 'w', encoding='utf-8') as f: f.write("# 大单检测软件系统 清理后源代码文档\n\n") f.write("## 说明\n") f.write("本文档包含软件著作权申请所需的清理后源代码,已移除所有注释和空行。\n\n") f.write("## 源代码文件列表\n\n") for file_path in all_cleaned_files: relative_path = file_path.relative_to(self.output_dir) f.write(f"- {relative_path}\n") f.write("\n## 清理后源代码内容\n\n") f.write("=" * 80 + "\n\n") for file_path in all_cleaned_files: relative_path = file_path.relative_to(self.output_dir) f.write(f"文件: {relative_path}\n") f.write("=" * 60 + "\n") try: with open(file_path, 'r', encoding='utf-8', errors='ignore') as src_file: content = src_file.read() # 写入文件内容,每行添加行号 lines = content.split('\n') for i, line in enumerate(lines, 1): f.write(f"{i:4d}: {line}\n") f.write("\n" + "=" * 60 + "\n\n") except Exception as e: f.write(f"读取文件时出错: {e}\n\n") def main(): # # 配置路径 # source_dirs = [ # "QMainwindow", # "Sqbase", # "FTAPI", # "common_structures", # "data_processing", # "network_communication", # "core" # ] # 配置路径 source_dirs = [ "QMainwindow", "Sqbase", "common_structures", "data_processing", "network_communication", "core" ] output_base_dir = "cleaned_source_code" source_document_file = "软著申请材料/清理后源代码文档.md" print("开始清理源代码...") all_cleaned_files = [] total_lines = 0 for source_dir in source_dirs: if os.path.exists(source_dir): print(f"清理目录: {source_dir}") output_dir = os.path.join(output_base_dir, source_dir) cleaner = SourceCodeCleaner(source_dir, output_dir) cleaned_files, lines = cleaner.clean_directory() all_cleaned_files.extend(cleaned_files) total_lines += lines print(f" 已清理 {len(cleaned_files)} 个文件,共 {lines} 行代码") else: print(f"警告: 目录 {source_dir} 不存在") # 生成源代码文档 print("生成源代码文档...") cleaner = SourceCodeCleaner(output_base_dir, output_base_dir) # 使用自身作为源 cleaner.generate_source_document(source_document_file) print(f"\n清理完成!") print(f"总文件数: {len(all_cleaned_files)}") print(f"总代码行数: {total_lines}") print(f"源代码文档已生成: {source_document_file}") print(f"清理后的源代码保存在: {output_base_dir}") if __name__ == "__main__": main()