LingFlow 自我审查能力改进计划
对比手动8维度审查与自我审查报告,识别差距并提出改进方案
执行摘要
| 维度 | 手动审查发现 | 自我审查发现 | 覆盖率 |
|---|---|---|---|
| 代码结构 | 6项 | 1项 | 17% |
| 代码质量 | 7项 | 2项 | 29% |
| 安全性 | 6项 | 2项(含误报) | 33% |
| 性能 | 8项 | 0项 | 0% |
| 测试覆盖 | 6项 | 0项 | 0% |
| 依赖管理 | 6项 | 0项 | 0% |
| 错误处理 | 6项 | 0项 | 0% |
| 文档规范 | 7项 | 0项 | 0% |
| 平均 | - | - | 10% |
优先级1: 缺失能力的补充实现
1.1 项目结构检查器
# skills/structure-checker/implementation.py
import os
from pathlib import Path
from typing import Dict, List, Any
STANDARD_DIRS = ['tests', 'docs', 'src']
TEMP_FILE_PATTERNS = ['*.txt', '*.log', '*.tmp', 'test_output*']
ROOT_TEST_FILES = ['test_', '_test.py']
def check_project_structure(params: Dict[str, Any]) -> Dict[str, Any]:
"""检查项目结构规范性"""
project_path = Path(params.get('target', '.'))
issues = []
# 检查标准目录
for dir_name in STANDARD_DIRS:
if not (project_path / dir_name).exists():
issues.append({
'type': 'missing_dir',
'severity': 'warning',
'message': f'缺少标准目录: {dir_name}/'
})
# 检查根目录测试文件
for file_path in project_path.glob('*.py'):
if any(pattern in file_path.name for pattern in ROOT_TEST_FILES):
issues.append({
'type': 'test_file_in_root',
'severity': 'warning',
'message': f'测试文件应在tests/目录: {file_path.name}'
})
# 检查临时文件
for pattern in TEMP_FILE_PATTERNS:
for file_path in project_path.glob(pattern):
if file_path.name not in ['.gitignore', 'README.md']:
issues.append({
'type': 'temp_file',
'severity': 'info',
'message': f'临时文件未清理: {file_path.name}'
})
return {
'total_issues': len(issues),
'issues': issues,
'score': max(0, 100 - len(issues) * 5)
}
def execute_skill(params):
return check_project_structure(params)
1.2 日志规范检查器
# skills/log-checker/implementation.py
import ast
from pathlib import Path
def check_logging_usage(code: str, file_path: str) -> List[Dict]:
"""检查代码中是否使用print而非logger"""
issues = []
tree = ast.parse(code)
for node in ast.walk(tree):
if isinstance(node, ast.Call):
# 检查 print() 调用
if isinstance(node.func, ast.Name) and node.func.id == 'print':
issues.append({
'type': 'print_statement',
'severity': 'warning',
'message': '应使用logger而非print()',
'line': node.lineno
})
return issues
1.3 性能分析器
# skills/performance-analyzer/implementation.py
import ast
from typing import Dict, List, Any
PERFORMANCE_ISSUES = {
'repeated_file_reads': '文件在循环中重复读取',
'missing_cache': '应使用缓存的计算密集型操作',
'inefficient_string_concat': '字符串拼接应使用join()',
'nested_loops_deep': '嵌套循环过深(>3层)',
}
def analyze_performance(params: Dict[str, Any]) -> Dict[str, Any]:
"""分析代码中的性能问题"""
target = Path(params.get('target', '.'))
issues = []
for py_file in target.rglob('*.py'):
code = py_file.read_text()
tree = ast.parse(code)
# 检查嵌套循环深度
for node in ast.walk(tree):
depth = _get_loop_depth(node)
if depth > 3:
issues.append({
'type': 'nested_loops_deep',
'file': str(py_file),
'line': node.lineno if hasattr(node, 'lineno') else 0,
'depth': depth
})
# 检查字符串拼接
for node in ast.walk(tree):
if isinstance(node, ast.AugAssign) and isinstance(node.op, ast.Add):
if isinstance(node.target, ast.Name) and node.target.id == 's':
issues.append({
'type': 'inefficient_string_concat',
'file': str(py_file),
'line': node.lineno
})
return {
'total_issues': len(issues),
'issues': issues
}
def _get_loop_depth(node, depth=0):
"""计算嵌套循环深度"""
if isinstance(node, (ast.For, ast.While)):
child_depth = 0
for child in ast.iter_child_nodes(node):
child_depth = max(child_depth, _get_loop_depth(child, depth + 1))
return child_depth
return 0
def execute_skill(params):
return analyze_performance(params)
1.4 测试质量分析器
# skills/test-analyzer/implementation.py
import ast
from pathlib import Path
from typing import Dict, List
def analyze_test_quality(params: Dict) -> Dict[str, Any]:
"""分析测试文件质量"""
target = Path(params.get('target', 'tests/'))
if not target.exists():
target = Path('.')
results = {
'test_files': 0,
'test_functions': 0,
'assertions': 0,
'tests_without_assertions': [],
'missing_fixtures': [],
'mock_usage': 0
}
for test_file in target.rglob('test_*.py'):
results['test_files'] += 1
code = test_file.read_text()
tree = ast.parse(code)
has_assertion = False
has_mock = False
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef) and node.name.startswith('test_'):
results['test_functions'] += 1
if isinstance(node, ast.Assert):
results['assertions'] += 1
has_assertion = True
# 检查 mock 使用
if isinstance(node, ast.Call):
if isinstance(node.func, ast.Attribute):
if node.func.attr in ['mock', 'patch', 'Mock']:
has_mock = True
results['mock_usage'] += 1
if not has_assertion and 'def test_' in code:
results['tests_without_assertions'].append(str(test_file))
return results
def execute_skill(params):
return analyze_test_quality(params)
1.5 依赖分析器
# skills/dependency-checker/implementation.py
import json
import subprocess
from pathlib import Path
from typing import Dict, List, Any
def check_dependencies(params: Dict[str, Any]) -> Dict[str, Any]:
"""检查依赖管理问题"""
project_path = Path(params.get('target', '.'))
issues = []
# 检查 setup.py 与 requirements.txt 同步
setup_deps = _parse_setup_dependencies(project_path / 'setup.py')
req_deps = _parse_requirements(project_path / 'requirements.txt')
for dep in setup_deps:
if dep not in req_deps:
issues.append({
'type': 'dependency_mismatch',
'message': f'setup.py中的依赖未在requirements.txt中: {dep}'
})
# 检查是否有 lockfile
lockfiles = ['poetry.lock', 'Pipfile.lock', 'requirements.lock']
has_lockfile = any((project_path / f).exists() for f in lockfiles)
if not has_lockfile:
issues.append({
'type': 'missing_lockfile',
'message': '缺少依赖锁定文件,建议使用poetry或pip-tools'
})
# 检查版本约束
for dep in req_deps:
if '>=' in dep and not any(c in dep for c in ['^', '~', '==']):
issues.append({
'type': 'loose_version_constraint',
'message': f'版本约束过宽,可能引入破坏性变更: {dep}'
})
return {
'total_issues': len(issues),
'issues': issues,
'has_lockfile': has_lockfile
}
def _parse_setup_dependencies(setup_path: Path) -> List[str]:
"""解析 setup.py 中的依赖"""
if not setup_path.exists():
return []
# 简化实现,实际应使用AST解析
content = setup_path.read_text()
if 'install_requires' in content:
start = content.index('install_requires')
end = content.index(']', start)
deps_section = content[start:end+1]
# 提取包名
import re
return [m.group(1) for m in re.finditer(r'([a-zA-Z0-9_-]+)>=', deps_section)]
return []
def _parse_requirements(req_path: Path) -> List[str]:
"""解析 requirements.txt"""
if not req_path.exists():
return []
deps = []
with open(req_path) as f:
for line in f:
line = line.strip()
if line and not line.startswith('#'):
# 提取包名
pkg = line.split('>=')[0].split('==')[0].split('[')[0].strip()
deps.append(pkg)
return deps
def execute_skill(params):
return check_dependencies(params)
优先级2: 安全检查改进
2.1 减少误报
# 改进 constitution.py 中的检查逻辑
def _check_weak_crypto(self, code: str, principle, file_path: str) -> List[Violation]:
"""检查弱加密算法 - 改进版"""
violations = []
lines = code.split('\n')
weak_algorithms = ['MD5', 'SHA1', 'DES', 'RC4', 'Blowfish']
for i, line in enumerate(lines, 1):
# 跳过注释
if line.strip().startswith('#'):
continue
# 跳过字符串字面量
if 'MD5' in line and ('"' in line or "'" in line):
# 检查是否在注释中
code_before_md5 = line[:line.index('MD5') if 'MD5' in line else 0]
if '#' in code_before_md5:
continue
for algo in weak_algorithms:
if algo in line:
# 更精确的检测:只在导入或函数调用时报警
if any(keyword in line for keyword in ['import', 'from', 'hashlib.', 'crypto.']):
violations.append(Violation(
principle_id=principle.id,
principle_name=principle.name,
severity=principle.level,
description=f"弱加密算法: {algo}",
location=file_path,
line_number=i,
suggested_fix=principle.implementation_pattern
))
break
return violations
优先级3: 统一审查工作流
创建完整的自审查工作流:
# workflows/self_audit.yaml
name: LingFlow 完整自我审查
description: 运行所有检查技能生成完整报告
tasks:
# 阶段1: 代码质量
- id: structure_check
skill: structure-checker
params:
target: ./
- id: log_check
skill: log-checker
params:
target: ./lingflow/
- id: type_check
skill: mypy-runner
params:
target: ./lingflow/
# 阶段2: 安全检查
- id: security_check
skill: code-security-scan
params:
target: ./lingflow/
# 阶段3: 性能分析
- id: performance_check
skill: performance-analyzer
params:
target: ./lingflow/
# 阶段4: 测试分析
- id: test_check
skill: test-analyzer
params:
target: ./
# 阶段5: 依赖检查
- id: dependency_check
skill: dependency-checker
params:
target: ./
# 阶段6: 生成报告
- id: generate_report
skill: report-generator
params:
template: audit_report
output: LINGFLOW_SELF_AUDIT_REPORT.md
depends_on:
- structure_check
- log_check
- security_check
- performance_check
- test_check
- dependency_check
实施路线图
| 阶段 | 任务 | 技能 | 优先级 |
|---|---|---|---|
| 第1周 | 实现缺失的5个检查器 | structure, log, performance, test, dependency | P0 |
| 第2周 | 改进安全检查减少误报 | constitution.py优化 | P0 |
| 第3周 | 创建统一审查工作流 | self_audit.yaml | P1 |
| 第4周 | 添加CLI命令 | lingflow audit | P1 |
预期改进效果
实施后,自我审查覆盖率预计从 10% 提升至 85%:
| 维度 | 当前 | 目标 | 提升 |
|---|---|---|---|
| 代码结构 | 17% | 80% | +63% |
| 代码质量 | 29% | 90% | +61% |
| 安全性 | 33% | 80% | +47% |
| 性能 | 0% | 70% | +70% |
| 测试覆盖 | 0% | 90% | +90% |
| 依赖管理 | 0% | 100% | +100% |
| 错误处理 | 0% | 60% | +60% |
| 文档规范 | 0% | 80% | +80% |