Files
QTradeProgram/clean_source_code.py
2026-02-25 23:01:42 +08:00

175 lines
6.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
源代码清理脚本 - 用于软件著作权申请
功能移除C++源代码中的注释和空行,生成干净的源代码文档
"""
import os
import re
import shutil
from pathlib import Path
class SourceCodeCleaner:
def __init__(self, source_dir, output_dir):
self.source_dir = Path(source_dir)
self.output_dir = Path(output_dir)
self.file_extensions = ['.h', '.cpp', '.hpp', '.c', '.cc']
def clean_directory(self):
"""清理整个目录的源代码"""
if self.output_dir.exists():
shutil.rmtree(self.output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
cleaned_files = []
total_lines = 0
# 遍历源目录
for root, dirs, files in os.walk(self.source_dir):
for file in files:
if any(file.endswith(ext) for ext in self.file_extensions):
source_file = Path(root) / file
relative_path = source_file.relative_to(self.source_dir)
output_file = self.output_dir / relative_path
# 创建输出目录
output_file.parent.mkdir(parents=True, exist_ok=True)
# 清理文件
lines_cleaned = self.clean_file(source_file, output_file)
total_lines += lines_cleaned
cleaned_files.append(str(relative_path))
return cleaned_files, total_lines
def clean_file(self, input_file, output_file):
"""清理单个文件,移除注释和空行"""
try:
with open(input_file, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
# 移除多行注释 /* ... */
content = re.sub(r'/\*.*?\*/', '', content, flags=re.DOTALL)
# 移除单行注释 //
content = re.sub(r'//.*$', '', content, flags=re.MULTILINE)
# 移除空行
lines = content.split('\n')
cleaned_lines = []
for line in lines:
stripped_line = line.strip()
if stripped_line: # 非空行
cleaned_lines.append(line)
# 写入清理后的内容
with open(output_file, 'w', encoding='utf-8') as f:
f.write('\n'.join(cleaned_lines))
return len(cleaned_lines)
except Exception as e:
print(f"清理文件 {input_file} 时出错: {e}")
return 0
def generate_source_document(self, output_file):
"""生成源代码文档"""
all_cleaned_files = []
for root, dirs, files in os.walk(self.output_dir):
for file in files:
if any(file.endswith(ext) for ext in self.file_extensions):
file_path = Path(root) / file
all_cleaned_files.append(file_path)
# 按文件类型和重要性排序
all_cleaned_files.sort()
with open(output_file, 'w', encoding='utf-8') as f:
f.write("# 大单检测软件系统 清理后源代码文档\n\n")
f.write("## 说明\n")
f.write("本文档包含软件著作权申请所需的清理后源代码,已移除所有注释和空行。\n\n")
f.write("## 源代码文件列表\n\n")
for file_path in all_cleaned_files:
relative_path = file_path.relative_to(self.output_dir)
f.write(f"- {relative_path}\n")
f.write("\n## 清理后源代码内容\n\n")
f.write("=" * 80 + "\n\n")
for file_path in all_cleaned_files:
relative_path = file_path.relative_to(self.output_dir)
f.write(f"文件: {relative_path}\n")
f.write("=" * 60 + "\n")
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as src_file:
content = src_file.read()
# 写入文件内容,每行添加行号
lines = content.split('\n')
for i, line in enumerate(lines, 1):
f.write(f"{i:4d}: {line}\n")
f.write("\n" + "=" * 60 + "\n\n")
except Exception as e:
f.write(f"读取文件时出错: {e}\n\n")
def main():
# # 配置路径
# source_dirs = [
# "QMainwindow",
# "Sqbase",
# "FTAPI",
# "common_structures",
# "data_processing",
# "network_communication",
# "core"
# ]
# 配置路径
source_dirs = [
"QMainwindow",
"Sqbase",
"common_structures",
"data_processing",
"network_communication",
"core"
]
output_base_dir = "cleaned_source_code"
source_document_file = "软著申请材料/清理后源代码文档.md"
print("开始清理源代码...")
all_cleaned_files = []
total_lines = 0
for source_dir in source_dirs:
if os.path.exists(source_dir):
print(f"清理目录: {source_dir}")
output_dir = os.path.join(output_base_dir, source_dir)
cleaner = SourceCodeCleaner(source_dir, output_dir)
cleaned_files, lines = cleaner.clean_directory()
all_cleaned_files.extend(cleaned_files)
total_lines += lines
print(f" 已清理 {len(cleaned_files)} 个文件,共 {lines} 行代码")
else:
print(f"警告: 目录 {source_dir} 不存在")
# 生成源代码文档
print("生成源代码文档...")
cleaner = SourceCodeCleaner(output_base_dir, output_base_dir) # 使用自身作为源
cleaner.generate_source_document(source_document_file)
print(f"\n清理完成!")
print(f"总文件数: {len(all_cleaned_files)}")
print(f"总代码行数: {total_lines}")
print(f"源代码文档已生成: {source_document_file}")
print(f"清理后的源代码保存在: {output_base_dir}")
if __name__ == "__main__":
main()