finish app develop
This commit is contained in:
1
app/__init__.py
Normal file
1
app/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""DOCX compliance analysis application."""
|
||||
222
app/analyzer.py
Normal file
222
app/analyzer.py
Normal file
@@ -0,0 +1,222 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from collections import Counter
|
||||
from typing import Protocol
|
||||
|
||||
import requests
|
||||
|
||||
from app.config import ProviderConfig
|
||||
from app.docx_parser import ParsedDocument
|
||||
from app.report_generator import AnalysisReport
|
||||
from app.skill_loader import Skill
|
||||
|
||||
|
||||
class SupportsPost(Protocol):
|
||||
def post(self, url: str, **kwargs): ...
|
||||
|
||||
|
||||
IMPORTANT_TERMS = {
|
||||
"需求",
|
||||
"接口",
|
||||
"测试",
|
||||
"合格性",
|
||||
"追踪",
|
||||
"追溯",
|
||||
"配置",
|
||||
"质量",
|
||||
"部署",
|
||||
"安装",
|
||||
"验收",
|
||||
"设计",
|
||||
"资源",
|
||||
"风险",
|
||||
"计划",
|
||||
"说明",
|
||||
"文档",
|
||||
"CSCI",
|
||||
"GJB",
|
||||
}
|
||||
|
||||
|
||||
def _tokens(text: str) -> list[str]:
|
||||
ascii_tokens = re.findall(r"[A-Za-z][A-Za-z0-9_-]{2,}", text)
|
||||
chinese_terms = [term for term in IMPORTANT_TERMS if term in text]
|
||||
return [token.lower() for token in ascii_tokens] + chinese_terms
|
||||
|
||||
|
||||
def select_relevant_skills(parsed: ParsedDocument, skills: list[Skill], max_skills: int = 6) -> list[Skill]:
|
||||
document_tokens = Counter(_tokens(parsed.text + "\n" + "\n".join(h.text for h in parsed.headings)))
|
||||
scored: list[tuple[int, Skill]] = []
|
||||
|
||||
for skill in skills:
|
||||
skill_text = f"{skill.slug}\n{skill.name}\n{skill.description}\n{skill.use_when}\n{skill.content[:3000]}"
|
||||
score = 0
|
||||
skill_tokens = set(_tokens(skill_text))
|
||||
for token, count in document_tokens.items():
|
||||
if token in skill_tokens:
|
||||
score += count
|
||||
if parsed.filename.lower().endswith(".docx"):
|
||||
score += 1
|
||||
if score > 0:
|
||||
scored.append((score, skill))
|
||||
|
||||
scored.sort(key=lambda item: (-item[0], item[1].slug))
|
||||
if not scored:
|
||||
return skills[:max_skills]
|
||||
return [skill for _, skill in scored[:max_skills]]
|
||||
|
||||
|
||||
def build_analysis_prompt(parsed: ParsedDocument, skills: list[Skill]) -> str:
|
||||
skill_sections = []
|
||||
for skill in skills:
|
||||
skill_sections.append(
|
||||
f"## {skill.slug}\n名称: {skill.name}\n描述: {skill.description}\n适用条件: {skill.use_when}\n规范内容:\n{skill.content[:6000]}"
|
||||
)
|
||||
|
||||
document_outline = "\n".join(f"- H{heading.level} {heading.text}" for heading in parsed.headings[:80]) or "未识别到标题。"
|
||||
document_text = parsed.text[:18000]
|
||||
|
||||
return f"""你是军用软件文档符合性审查助手。请依据给定 GJB438C/GJB2786 技能规范,分析上传 DOCX 是否符合规范。
|
||||
|
||||
请输出中文 Markdown,必须包含以下小节:
|
||||
1. 总体结论
|
||||
2. 符合项
|
||||
3. 不符合项
|
||||
4. 缺失章节或缺失证据
|
||||
5. 整改建议
|
||||
6. 需人工复核事项
|
||||
|
||||
要求:
|
||||
- 每个问题尽量引用文档中的标题、关键词或证据摘要。
|
||||
- 不要编造未在文档中出现的证据。
|
||||
- 如果无法判断,标记为“需人工复核”。
|
||||
|
||||
# 文件
|
||||
{parsed.filename}
|
||||
|
||||
# 文档目录
|
||||
{document_outline}
|
||||
|
||||
# 待检查技能
|
||||
{chr(10).join(skill_sections)}
|
||||
|
||||
# 文档正文摘录
|
||||
{document_text}
|
||||
"""
|
||||
|
||||
|
||||
class LLMClient:
|
||||
def __init__(self, provider: ProviderConfig, session: SupportsPost | None = None, timeout: int = 120) -> None:
|
||||
self.provider = provider
|
||||
self.session = session or requests.Session()
|
||||
self.timeout = timeout
|
||||
|
||||
def complete(self, prompt: str) -> str:
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if self.provider.api_key and self.provider.api_key != "EMPTY":
|
||||
headers["Authorization"] = f"Bearer {self.provider.api_key}"
|
||||
|
||||
payload = {
|
||||
"model": self.provider.model,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"temperature": self.provider.temperature,
|
||||
"max_tokens": self.provider.max_tokens,
|
||||
}
|
||||
response = self.session.post(
|
||||
self.provider.chat_completions_url,
|
||||
headers=headers,
|
||||
json=payload,
|
||||
timeout=self.timeout,
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data["choices"][0]["message"]["content"]
|
||||
|
||||
|
||||
def heuristic_analysis(parsed: ParsedDocument, skills: list[Skill]) -> AnalysisReport:
|
||||
headings_text = "\n".join(h.text for h in parsed.headings)
|
||||
full_text = parsed.text
|
||||
findings: list[dict[str, str]] = []
|
||||
recommendations: list[str] = []
|
||||
|
||||
required_terms = ["范围", "引用文档", "需求", "合格性", "追踪", "接口"]
|
||||
for term in required_terms:
|
||||
present = term in full_text or term in headings_text
|
||||
findings.append(
|
||||
{
|
||||
"status": "符合" if present else "需整改",
|
||||
"item": f"检查关键内容:{term}",
|
||||
"evidence": "文档中已发现相关表述" if present else "未在解析文本中发现明确表述",
|
||||
}
|
||||
)
|
||||
if not present:
|
||||
recommendations.append(f"补充或明确“{term}”相关章节与证据。")
|
||||
|
||||
if not parsed.headings:
|
||||
findings.append({"status": "需整改", "item": "章节结构", "evidence": "未识别到 Word 标题样式"})
|
||||
recommendations.append("使用 Word 标题样式组织章节,便于目录和符合性审查。")
|
||||
|
||||
issue_count = sum(1 for item in findings if item["status"] != "符合")
|
||||
summary = "通过" if issue_count == 0 else "部分通过,需人工复核"
|
||||
raw_output = "未调用模型,已使用本地启发式规则生成初步分析。"
|
||||
|
||||
return AnalysisReport(
|
||||
source_filename=parsed.filename,
|
||||
provider_name="local",
|
||||
model_name="heuristic",
|
||||
matched_skills=[skill.slug for skill in skills],
|
||||
summary=summary,
|
||||
findings=findings,
|
||||
recommendations=recommendations or ["保持现有章节结构,并由人工进行最终符合性确认。"],
|
||||
raw_model_output=raw_output,
|
||||
)
|
||||
|
||||
|
||||
def report_from_model_output(
|
||||
parsed: ParsedDocument,
|
||||
skills: list[Skill],
|
||||
provider_name: str,
|
||||
model_name: str,
|
||||
output: str,
|
||||
) -> AnalysisReport:
|
||||
findings = [{"status": "模型分析", "item": "完整分析结果", "evidence": output[:1200]}]
|
||||
recommendations = _extract_recommendations(output)
|
||||
return AnalysisReport(
|
||||
source_filename=parsed.filename,
|
||||
provider_name=provider_name,
|
||||
model_name=model_name,
|
||||
matched_skills=[skill.slug for skill in skills],
|
||||
summary=_extract_summary(output),
|
||||
findings=findings,
|
||||
recommendations=recommendations,
|
||||
raw_model_output=output,
|
||||
)
|
||||
|
||||
|
||||
def _extract_summary(output: str) -> str:
|
||||
for line in output.splitlines():
|
||||
normalized = line.strip(" #::")
|
||||
if "总体结论" in normalized and len(normalized) > 4:
|
||||
return normalized
|
||||
return "模型已生成分析结果,需人工复核"
|
||||
|
||||
|
||||
def _extract_recommendations(output: str) -> list[str]:
|
||||
recommendations: list[str] = []
|
||||
in_section = False
|
||||
for line in output.splitlines():
|
||||
stripped = line.strip()
|
||||
if "整改建议" in stripped or "修改建议" in stripped:
|
||||
in_section = True
|
||||
continue
|
||||
if in_section and stripped.startswith("#"):
|
||||
break
|
||||
if in_section and stripped.lstrip("-0123456789.、 "):
|
||||
recommendations.append(stripped.lstrip("-0123456789.、 "))
|
||||
return recommendations[:10] or ["按模型分析结果逐项整改,并进行人工复核。"]
|
||||
|
||||
|
||||
def serialize_prompt_debug(prompt: str) -> str:
|
||||
return json.dumps({"prompt_preview": prompt[:2000]}, ensure_ascii=False, indent=2)
|
||||
50
app/config.py
Normal file
50
app/config.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ProviderConfig:
|
||||
api_key: str
|
||||
base_url: str
|
||||
max_tokens: int
|
||||
model: str
|
||||
temperature: float
|
||||
|
||||
@property
|
||||
def chat_completions_url(self) -> str:
|
||||
base_url = self.base_url.rstrip("/")
|
||||
if base_url.endswith("/chat/completions"):
|
||||
return base_url
|
||||
return f"{base_url}/chat/completions"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ApiSettings:
|
||||
provider_name: str
|
||||
provider: ProviderConfig
|
||||
|
||||
|
||||
def load_api_config(path: Path | str = Path("configs/api_config.yaml"), provider_name: str | None = None) -> ApiSettings:
|
||||
config_path = Path(path)
|
||||
data = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {}
|
||||
selected_name = provider_name or data.get("default_provider")
|
||||
providers = data.get("providers", {})
|
||||
|
||||
if not selected_name:
|
||||
raise ValueError("api_config.yaml missing default_provider")
|
||||
if selected_name not in providers:
|
||||
raise ValueError(f"provider not found in api_config.yaml: {selected_name}")
|
||||
|
||||
provider_data = providers[selected_name]
|
||||
provider = ProviderConfig(
|
||||
api_key=str(provider_data.get("api_key", "")),
|
||||
base_url=str(provider_data["base_url"]),
|
||||
max_tokens=int(provider_data.get("max_tokens", 4096)),
|
||||
model=str(provider_data["model"]),
|
||||
temperature=float(provider_data.get("temperature", 0.7)),
|
||||
)
|
||||
return ApiSettings(provider_name=selected_name, provider=provider)
|
||||
67
app/docx_parser.py
Normal file
67
app/docx_parser.py
Normal file
@@ -0,0 +1,67 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from docx import Document
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Heading:
|
||||
level: int
|
||||
text: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ParsedDocument:
|
||||
filename: str
|
||||
text: str
|
||||
paragraphs: list[str]
|
||||
headings: list[Heading]
|
||||
tables: list[list[list[str]]]
|
||||
|
||||
|
||||
def _heading_level(style_name: str) -> int | None:
|
||||
if not style_name.lower().startswith("heading"):
|
||||
return None
|
||||
parts = style_name.split()
|
||||
if parts and parts[-1].isdigit():
|
||||
return int(parts[-1])
|
||||
return 1
|
||||
|
||||
|
||||
def parse_docx(path: Path | str, display_filename: str | None = None) -> ParsedDocument:
|
||||
docx_path = Path(path)
|
||||
document = Document(docx_path)
|
||||
paragraphs: list[str] = []
|
||||
headings: list[Heading] = []
|
||||
tables: list[list[list[str]]] = []
|
||||
text_parts: list[str] = []
|
||||
|
||||
for paragraph in document.paragraphs:
|
||||
text = paragraph.text.strip()
|
||||
if not text:
|
||||
continue
|
||||
paragraphs.append(text)
|
||||
text_parts.append(text)
|
||||
level = _heading_level(paragraph.style.name if paragraph.style else "")
|
||||
if level is not None:
|
||||
headings.append(Heading(level=level, text=text))
|
||||
|
||||
for table in document.tables:
|
||||
rows: list[list[str]] = []
|
||||
for row in table.rows:
|
||||
values = [cell.text.strip() for cell in row.cells]
|
||||
if any(values):
|
||||
rows.append(values)
|
||||
text_parts.append(" | ".join(values))
|
||||
if rows:
|
||||
tables.append(rows)
|
||||
|
||||
return ParsedDocument(
|
||||
filename=display_filename or docx_path.name,
|
||||
text="\n".join(text_parts),
|
||||
paragraphs=paragraphs,
|
||||
headings=headings,
|
||||
tables=tables,
|
||||
)
|
||||
300
app/main.py
Normal file
300
app/main.py
Normal file
@@ -0,0 +1,300 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from uuid import uuid4
|
||||
from typing import Callable
|
||||
|
||||
from fastapi import FastAPI, File, Form, HTTPException, Request, UploadFile
|
||||
from fastapi.responses import FileResponse, HTMLResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.templating import Jinja2Templates
|
||||
|
||||
from app.analyzer import (
|
||||
LLMClient,
|
||||
build_analysis_prompt,
|
||||
heuristic_analysis,
|
||||
report_from_model_output,
|
||||
select_relevant_skills,
|
||||
)
|
||||
from app.config import load_api_config
|
||||
from app.docx_parser import parse_docx
|
||||
from app.report_generator import generate_docx_report, generate_markdown_report
|
||||
from app.skill_loader import load_skill_catalog
|
||||
|
||||
|
||||
ROOT_DIR = Path(__file__).resolve().parent.parent
|
||||
UPLOAD_DIR = ROOT_DIR / "uploads"
|
||||
OUTPUT_DIR = ROOT_DIR / "outputs"
|
||||
SKILL_DIR = ROOT_DIR / "GJB438C-2021_prd_skills"
|
||||
CONFIG_PATH = ROOT_DIR / "configs" / "api_config.yaml"
|
||||
MAX_UPLOAD_BYTES = 30 * 1024 * 1024
|
||||
ProgressCallback = Callable[[int, str], None]
|
||||
|
||||
|
||||
@dataclass
|
||||
class AnalysisTask:
|
||||
task_id: str
|
||||
source_filename: str
|
||||
status: str = "queued"
|
||||
progress: int = 0
|
||||
message: str = "任务已创建"
|
||||
summary: str = ""
|
||||
matched_skills: list[str] = field(default_factory=list)
|
||||
downloads: dict[str, str] = field(default_factory=dict)
|
||||
error: str = ""
|
||||
created_at: float = field(default_factory=time.time)
|
||||
updated_at: float = field(default_factory=time.time)
|
||||
|
||||
def to_dict(self) -> dict[str, object]:
|
||||
return {
|
||||
"task_id": self.task_id,
|
||||
"source_filename": self.source_filename,
|
||||
"status": self.status,
|
||||
"progress": self.progress,
|
||||
"message": self.message,
|
||||
"summary": self.summary,
|
||||
"matched_skills": self.matched_skills,
|
||||
"downloads": self.downloads,
|
||||
"error": self.error,
|
||||
"created_at": self.created_at,
|
||||
"updated_at": self.updated_at,
|
||||
}
|
||||
|
||||
|
||||
class AnalysisTaskStore:
|
||||
def __init__(self) -> None:
|
||||
self._tasks: dict[str, AnalysisTask] = {}
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def create(self, source_filename: str) -> AnalysisTask:
|
||||
task = AnalysisTask(task_id=uuid4().hex, source_filename=source_filename)
|
||||
with self._lock:
|
||||
self._tasks[task.task_id] = task
|
||||
return task
|
||||
|
||||
def update(
|
||||
self,
|
||||
task_id: str,
|
||||
*,
|
||||
status: str | None = None,
|
||||
progress: int | None = None,
|
||||
message: str | None = None,
|
||||
summary: str | None = None,
|
||||
matched_skills: list[str] | None = None,
|
||||
downloads: dict[str, str] | None = None,
|
||||
error: str | None = None,
|
||||
) -> AnalysisTask:
|
||||
with self._lock:
|
||||
task = self._tasks[task_id]
|
||||
if status is not None:
|
||||
task.status = status
|
||||
if progress is not None:
|
||||
task.progress = progress
|
||||
if message is not None:
|
||||
task.message = message
|
||||
if summary is not None:
|
||||
task.summary = summary
|
||||
if matched_skills is not None:
|
||||
task.matched_skills = matched_skills
|
||||
if downloads is not None:
|
||||
task.downloads = downloads
|
||||
if error is not None:
|
||||
task.error = error
|
||||
task.updated_at = time.time()
|
||||
return task
|
||||
|
||||
def get(self, task_id: str) -> AnalysisTask | None:
|
||||
with self._lock:
|
||||
return self._tasks.get(task_id)
|
||||
|
||||
|
||||
TASK_STORE = AnalysisTaskStore()
|
||||
|
||||
app = FastAPI(title="GJB438C DOCX 规范分析")
|
||||
templates = Jinja2Templates(directory=str(ROOT_DIR / "app" / "templates"))
|
||||
app.mount("/static", StaticFiles(directory=str(ROOT_DIR / "app" / "static")), name="static")
|
||||
|
||||
|
||||
def analyze_saved_docx(
|
||||
upload_path: Path,
|
||||
provider: str | None = None,
|
||||
use_model: bool = True,
|
||||
display_filename: str | None = None,
|
||||
progress_callback: ProgressCallback | None = None,
|
||||
) -> dict[str, object]:
|
||||
def progress(percent: int, message: str) -> None:
|
||||
if progress_callback is not None:
|
||||
progress_callback(percent, message)
|
||||
|
||||
progress(5, "正在解析 DOCX 文档")
|
||||
parsed = parse_docx(upload_path, display_filename=display_filename)
|
||||
progress(20, "DOCX 解析完成,正在加载技能规范")
|
||||
skills = load_skill_catalog(SKILL_DIR)
|
||||
progress(35, "技能规范已加载,正在匹配候选技能")
|
||||
selected_skills = select_relevant_skills(parsed, skills)
|
||||
progress(50, f"已匹配 {len(selected_skills)} 项技能,正在读取模型配置")
|
||||
settings = load_api_config(CONFIG_PATH, provider_name=provider or None)
|
||||
|
||||
if use_model:
|
||||
progress(65, f"正在调用 {settings.provider.model} 进行分析")
|
||||
prompt = build_analysis_prompt(parsed, selected_skills)
|
||||
try:
|
||||
output = LLMClient(settings.provider).complete(prompt)
|
||||
report = report_from_model_output(
|
||||
parsed,
|
||||
selected_skills,
|
||||
settings.provider_name,
|
||||
settings.provider.model,
|
||||
output,
|
||||
)
|
||||
except Exception as exc:
|
||||
report = heuristic_analysis(parsed, selected_skills)
|
||||
report = report.__class__(
|
||||
source_filename=report.source_filename,
|
||||
provider_name=settings.provider_name,
|
||||
model_name=f"{settings.provider.model} (调用失败,已降级)",
|
||||
matched_skills=report.matched_skills,
|
||||
summary=f"{report.summary};模型调用失败:{exc}",
|
||||
findings=report.findings,
|
||||
recommendations=report.recommendations,
|
||||
raw_model_output=f"模型调用失败:{exc}\n\n{report.raw_model_output}",
|
||||
)
|
||||
else:
|
||||
progress(70, "已关闭模型分析,正在使用本地规则生成结果")
|
||||
report = heuristic_analysis(parsed, selected_skills)
|
||||
|
||||
progress(85, "正在生成 Markdown 分析文档")
|
||||
markdown_path = generate_markdown_report(report, OUTPUT_DIR)
|
||||
progress(100, "分析完成")
|
||||
|
||||
return {
|
||||
"source_filename": parsed.filename,
|
||||
"summary": report.summary,
|
||||
"matched_skills": report.matched_skills,
|
||||
"downloads": {"markdown": f"/download/{markdown_path.name}"},
|
||||
"markdown_filename": markdown_path.name,
|
||||
}
|
||||
|
||||
|
||||
def _run_analysis_task(
|
||||
task_id: str,
|
||||
upload_path: Path,
|
||||
provider: str | None,
|
||||
use_model: bool,
|
||||
display_filename: str,
|
||||
) -> None:
|
||||
def on_progress(progress: int, message: str) -> None:
|
||||
TASK_STORE.update(task_id, status="running", progress=progress, message=message)
|
||||
|
||||
try:
|
||||
TASK_STORE.update(task_id, status="running", progress=1, message="任务已启动")
|
||||
result = analyze_saved_docx(
|
||||
upload_path,
|
||||
provider=provider,
|
||||
use_model=use_model,
|
||||
display_filename=display_filename,
|
||||
progress_callback=on_progress,
|
||||
)
|
||||
TASK_STORE.update(
|
||||
task_id,
|
||||
status="completed",
|
||||
progress=100,
|
||||
message="分析完成",
|
||||
summary=str(result["summary"]),
|
||||
matched_skills=list(result["matched_skills"]),
|
||||
downloads=dict(result["downloads"]),
|
||||
)
|
||||
except Exception as exc:
|
||||
TASK_STORE.update(task_id, status="error", progress=100, message="分析失败", error=str(exc))
|
||||
|
||||
|
||||
@app.get("/", response_class=HTMLResponse)
|
||||
def index(request: Request) -> HTMLResponse:
|
||||
settings = load_api_config(CONFIG_PATH)
|
||||
skills = load_skill_catalog(SKILL_DIR)
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"index.html",
|
||||
{
|
||||
"default_provider": settings.provider_name,
|
||||
"skill_count": len(skills),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@app.post("/analyze")
|
||||
async def analyze_docx(
|
||||
file: UploadFile = File(...),
|
||||
provider: str | None = Form(None),
|
||||
use_model: str = Form("true"),
|
||||
):
|
||||
if not file.filename or not file.filename.lower().endswith(".docx"):
|
||||
raise HTTPException(status_code=400, detail="仅支持上传 .docx 文件")
|
||||
|
||||
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
upload_path = UPLOAD_DIR / f"{uuid4().hex}_{Path(file.filename).name}"
|
||||
|
||||
content = await file.read()
|
||||
if len(content) > MAX_UPLOAD_BYTES:
|
||||
raise HTTPException(status_code=413, detail="文件超过 30MB 限制")
|
||||
upload_path.write_bytes(content)
|
||||
|
||||
should_use_model = use_model.lower() in {"1", "true", "yes", "on"}
|
||||
task = TASK_STORE.create(Path(file.filename).name)
|
||||
threading.Thread(
|
||||
target=_run_analysis_task,
|
||||
args=(task.task_id, upload_path, provider, should_use_model, Path(file.filename).name),
|
||||
daemon=True,
|
||||
).start()
|
||||
return {
|
||||
"task_id": task.task_id,
|
||||
"status_url": f"/tasks/{task.task_id}",
|
||||
"status": task.status,
|
||||
"progress": task.progress,
|
||||
"message": "任务已提交",
|
||||
}
|
||||
|
||||
|
||||
@app.get("/tasks/{task_id}")
|
||||
def get_task(task_id: str) -> dict[str, object]:
|
||||
task = TASK_STORE.get(task_id)
|
||||
if task is None:
|
||||
raise HTTPException(status_code=404, detail="任务不存在")
|
||||
return task.to_dict()
|
||||
|
||||
|
||||
@app.get("/download/{filename}")
|
||||
def download_report(filename: str):
|
||||
safe_name = Path(filename).name
|
||||
path = OUTPUT_DIR / safe_name
|
||||
if not path.exists() or not path.is_file():
|
||||
raise HTTPException(status_code=404, detail="报告不存在")
|
||||
|
||||
media_type = "application/octet-stream"
|
||||
if path.suffix == ".docx":
|
||||
media_type = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
elif path.suffix == ".md":
|
||||
media_type = "text/markdown; charset=utf-8"
|
||||
|
||||
return FileResponse(path, filename=path.name, media_type=media_type)
|
||||
|
||||
|
||||
@app.post("/cleanup")
|
||||
def cleanup_runtime_files() -> dict[str, int]:
|
||||
removed = 0
|
||||
for directory in (UPLOAD_DIR, OUTPUT_DIR):
|
||||
if not directory.exists():
|
||||
continue
|
||||
for path in directory.iterdir():
|
||||
if path.is_file():
|
||||
path.unlink()
|
||||
removed += 1
|
||||
elif path.is_dir():
|
||||
shutil.rmtree(path)
|
||||
removed += 1
|
||||
return {"removed": removed}
|
||||
108
app/report_generator.py
Normal file
108
app/report_generator.py
Normal file
@@ -0,0 +1,108 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from uuid import uuid4
|
||||
|
||||
from docx import Document
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AnalysisReport:
|
||||
source_filename: str
|
||||
provider_name: str
|
||||
model_name: str
|
||||
matched_skills: list[str]
|
||||
summary: str
|
||||
findings: list[dict[str, str]]
|
||||
recommendations: list[str]
|
||||
raw_model_output: str
|
||||
|
||||
|
||||
def _safe_stem(filename: str) -> str:
|
||||
stem = Path(filename).stem or "analysis"
|
||||
safe = "".join(ch if ch.isalnum() or ch in ("-", "_") else "_" for ch in stem)
|
||||
return safe[:60] or "analysis"
|
||||
|
||||
|
||||
def _report_base_path(report: AnalysisReport, output_dir: Path, suffix: str) -> Path:
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
return output_dir / f"{_safe_stem(report.source_filename)}_{uuid4().hex[:8]}_analysis.{suffix}"
|
||||
|
||||
|
||||
def generate_markdown_report(report: AnalysisReport, output_dir: Path | str) -> Path:
|
||||
path = _report_base_path(report, Path(output_dir), "md")
|
||||
lines = [
|
||||
"# DOCX 规范分析报告",
|
||||
"",
|
||||
"## 基本信息",
|
||||
"",
|
||||
f"- 源文件:{report.source_filename}",
|
||||
f"- 分析时间:{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
|
||||
f"- 模型供应商:{report.provider_name}",
|
||||
f"- 模型名称:{report.model_name}",
|
||||
f"- 命中技能:{', '.join(report.matched_skills) or '无'}",
|
||||
"",
|
||||
"## 总体结论",
|
||||
"",
|
||||
report.summary,
|
||||
"",
|
||||
"## 技能符合性矩阵",
|
||||
"",
|
||||
"| 状态 | 检查项 | 证据或说明 |",
|
||||
"| --- | --- | --- |",
|
||||
]
|
||||
for finding in report.findings:
|
||||
lines.append(
|
||||
f"| {finding.get('status', '')} | {finding.get('item', '')} | {finding.get('evidence', '').replace('|', '/')} |"
|
||||
)
|
||||
lines.extend(["", "## 修改建议", ""])
|
||||
for item in report.recommendations:
|
||||
lines.append(f"- {item}")
|
||||
lines.extend(["", "## 模型分析原文", "", report.raw_model_output])
|
||||
path.write_text("\n".join(lines), encoding="utf-8")
|
||||
return path
|
||||
|
||||
|
||||
def generate_docx_report(report: AnalysisReport, output_dir: Path | str) -> Path:
|
||||
path = _report_base_path(report, Path(output_dir), "docx")
|
||||
document = Document()
|
||||
document.add_heading("DOCX 规范分析报告", level=0)
|
||||
document.add_heading("基本信息", level=1)
|
||||
for label, value in [
|
||||
("源文件", report.source_filename),
|
||||
("分析时间", datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
|
||||
("模型供应商", report.provider_name),
|
||||
("模型名称", report.model_name),
|
||||
("命中技能", ", ".join(report.matched_skills) or "无"),
|
||||
]:
|
||||
document.add_paragraph(f"{label}:{value}")
|
||||
|
||||
document.add_heading("总体结论", level=1)
|
||||
document.add_paragraph(report.summary)
|
||||
|
||||
document.add_heading("技能符合性矩阵", level=1)
|
||||
table = document.add_table(rows=1, cols=3)
|
||||
table.style = "Table Grid"
|
||||
headers = table.rows[0].cells
|
||||
headers[0].text = "状态"
|
||||
headers[1].text = "检查项"
|
||||
headers[2].text = "证据或说明"
|
||||
for finding in report.findings:
|
||||
row = table.add_row().cells
|
||||
row[0].text = finding.get("status", "")
|
||||
row[1].text = finding.get("item", "")
|
||||
row[2].text = finding.get("evidence", "")
|
||||
|
||||
document.add_heading("修改建议", level=1)
|
||||
for item in report.recommendations:
|
||||
document.add_paragraph(item, style="List Bullet")
|
||||
|
||||
document.add_heading("模型分析原文", level=1)
|
||||
for line in report.raw_model_output.splitlines() or ["无"]:
|
||||
document.add_paragraph(line)
|
||||
|
||||
document.add_paragraph("说明:模型分析结果需人工复核,不应直接作为正式审查结论。")
|
||||
document.save(path)
|
||||
return path
|
||||
77
app/skill_loader.py
Normal file
77
app/skill_loader.py
Normal file
@@ -0,0 +1,77 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Skill:
|
||||
slug: str
|
||||
name: str
|
||||
description: str
|
||||
use_when: str
|
||||
content: str
|
||||
path: Path | None
|
||||
|
||||
|
||||
INDEX_ROW_RE = re.compile(
|
||||
r"^\|\s*\[([^\]]+)\]\(([^)]+)\)\s*\|\s*([^|]+?)\s*\|\s*([^|]+?)\s*\|"
|
||||
)
|
||||
FRONT_MATTER_RE = re.compile(r"^---\n(.*?)\n---\n", re.DOTALL)
|
||||
|
||||
|
||||
def _front_matter_value(content: str, key: str) -> str | None:
|
||||
match = FRONT_MATTER_RE.match(content)
|
||||
if not match:
|
||||
return None
|
||||
for line in match.group(1).splitlines():
|
||||
if line.startswith(f"{key}:"):
|
||||
return line.split(":", 1)[1].strip()
|
||||
return None
|
||||
|
||||
|
||||
def load_skill_catalog(root: Path | str = Path("GJB438C-2021_prd_skills")) -> list[Skill]:
|
||||
root_path = Path(root)
|
||||
index_path = root_path / "index.md"
|
||||
skills: list[Skill] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
if index_path.exists():
|
||||
for line in index_path.read_text(encoding="utf-8").splitlines():
|
||||
match = INDEX_ROW_RE.match(line)
|
||||
if not match:
|
||||
continue
|
||||
slug, relative_path, description, use_when = [part.strip() for part in match.groups()]
|
||||
skill_path = root_path / relative_path
|
||||
content = skill_path.read_text(encoding="utf-8") if skill_path.exists() else ""
|
||||
name = _front_matter_value(content, "name") or slug
|
||||
skills.append(
|
||||
Skill(
|
||||
slug=slug,
|
||||
name=name,
|
||||
description=description,
|
||||
use_when=use_when,
|
||||
content=content,
|
||||
path=skill_path,
|
||||
)
|
||||
)
|
||||
seen.add(slug)
|
||||
|
||||
for skill_path in sorted(root_path.glob("*/SKILL.md")):
|
||||
slug = skill_path.parent.name
|
||||
if slug in seen:
|
||||
continue
|
||||
content = skill_path.read_text(encoding="utf-8")
|
||||
skills.append(
|
||||
Skill(
|
||||
slug=slug,
|
||||
name=_front_matter_value(content, "name") or slug,
|
||||
description=_front_matter_value(content, "description") or "",
|
||||
use_when="",
|
||||
content=content,
|
||||
path=skill_path,
|
||||
)
|
||||
)
|
||||
|
||||
return skills
|
||||
72
app/static/app.js
Normal file
72
app/static/app.js
Normal file
@@ -0,0 +1,72 @@
|
||||
const form = document.querySelector("#upload-form");
|
||||
const result = document.querySelector("#result");
|
||||
const summary = document.querySelector("#summary");
|
||||
const skills = document.querySelector("#skills");
|
||||
const mdLink = document.querySelector("#download-md");
|
||||
const progressBar = document.querySelector("#analysis-progress");
|
||||
const statusText = document.querySelector("#analysis-status");
|
||||
const button = form.querySelector("button");
|
||||
|
||||
async function pollTask(statusUrl) {
|
||||
while (true) {
|
||||
const response = await fetch(statusUrl);
|
||||
const payload = await response.json();
|
||||
|
||||
progressBar.style.width = `${payload.progress || 0}%`;
|
||||
statusText.textContent = payload.message || "分析中";
|
||||
|
||||
if (payload.status === "completed") {
|
||||
return payload;
|
||||
}
|
||||
|
||||
if (payload.status === "error") {
|
||||
throw new Error(payload.error || "分析失败");
|
||||
}
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
}
|
||||
}
|
||||
|
||||
form.addEventListener("submit", async (event) => {
|
||||
event.preventDefault();
|
||||
button.disabled = true;
|
||||
button.textContent = "分析中...";
|
||||
result.hidden = true;
|
||||
progressBar.style.width = "0%";
|
||||
statusText.textContent = "任务提交中...";
|
||||
|
||||
const data = new FormData(form);
|
||||
if (!data.has("use_model")) {
|
||||
data.set("use_model", "false");
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch("/analyze", {
|
||||
method: "POST",
|
||||
body: data,
|
||||
});
|
||||
const payload = await response.json();
|
||||
if (!response.ok) {
|
||||
throw new Error(payload.detail || "分析失败");
|
||||
}
|
||||
|
||||
result.hidden = false;
|
||||
const task = await pollTask(payload.status_url);
|
||||
summary.textContent = task.summary;
|
||||
skills.innerHTML = "";
|
||||
task.matched_skills.forEach((name) => {
|
||||
const item = document.createElement("span");
|
||||
item.textContent = name;
|
||||
skills.appendChild(item);
|
||||
});
|
||||
mdLink.href = task.downloads.markdown;
|
||||
} catch (error) {
|
||||
summary.textContent = error.message;
|
||||
skills.innerHTML = "";
|
||||
result.hidden = false;
|
||||
statusText.textContent = "分析失败";
|
||||
} finally {
|
||||
button.disabled = false;
|
||||
button.textContent = "开始分析";
|
||||
}
|
||||
});
|
||||
218
app/static/styles.css
Normal file
218
app/static/styles.css
Normal file
@@ -0,0 +1,218 @@
|
||||
:root {
|
||||
color-scheme: light;
|
||||
font-family: "Inter", "Segoe UI", Arial, sans-serif;
|
||||
background: #f5f7fb;
|
||||
color: #172033;
|
||||
}
|
||||
|
||||
* {
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.shell {
|
||||
min-height: 100vh;
|
||||
display: grid;
|
||||
place-items: center;
|
||||
padding: 32px;
|
||||
}
|
||||
|
||||
.panel {
|
||||
width: min(880px, 100%);
|
||||
background: #ffffff;
|
||||
border: 1px solid #dbe2ee;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 20px 60px rgba(23, 32, 51, 0.08);
|
||||
padding: 32px;
|
||||
}
|
||||
|
||||
.header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
gap: 24px;
|
||||
align-items: flex-start;
|
||||
margin-bottom: 28px;
|
||||
}
|
||||
|
||||
.eyebrow {
|
||||
margin: 0 0 8px;
|
||||
color: #5d6d83;
|
||||
font-size: 13px;
|
||||
letter-spacing: 0;
|
||||
}
|
||||
|
||||
h1,
|
||||
h2 {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 32px;
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
h2 {
|
||||
font-size: 20px;
|
||||
}
|
||||
|
||||
.meta {
|
||||
display: flex;
|
||||
gap: 8px;
|
||||
flex-wrap: wrap;
|
||||
justify-content: flex-end;
|
||||
}
|
||||
|
||||
.meta span,
|
||||
.skills span {
|
||||
border: 1px solid #cfd8e6;
|
||||
border-radius: 6px;
|
||||
padding: 6px 9px;
|
||||
color: #44546a;
|
||||
background: #f8fafc;
|
||||
font-size: 13px;
|
||||
}
|
||||
|
||||
.form {
|
||||
display: grid;
|
||||
gap: 20px;
|
||||
}
|
||||
|
||||
.drop-zone {
|
||||
border: 1px dashed #8aa1bd;
|
||||
border-radius: 8px;
|
||||
padding: 34px;
|
||||
background: #fbfcfe;
|
||||
display: grid;
|
||||
gap: 8px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.drop-zone input {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.drop-title {
|
||||
font-size: 18px;
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
.drop-subtitle {
|
||||
color: #5d6d83;
|
||||
}
|
||||
|
||||
.controls {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr auto;
|
||||
gap: 16px;
|
||||
align-items: end;
|
||||
}
|
||||
|
||||
label {
|
||||
display: grid;
|
||||
gap: 8px;
|
||||
color: #344054;
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
select {
|
||||
height: 42px;
|
||||
border: 1px solid #cfd8e6;
|
||||
border-radius: 6px;
|
||||
padding: 0 12px;
|
||||
background: #ffffff;
|
||||
}
|
||||
|
||||
.checkbox {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
height: 42px;
|
||||
}
|
||||
|
||||
button,
|
||||
.downloads a {
|
||||
border: 0;
|
||||
border-radius: 6px;
|
||||
background: #1f6feb;
|
||||
color: #ffffff;
|
||||
min-height: 44px;
|
||||
padding: 0 18px;
|
||||
font-weight: 700;
|
||||
cursor: pointer;
|
||||
text-decoration: none;
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
button:disabled {
|
||||
background: #8aa1bd;
|
||||
cursor: wait;
|
||||
}
|
||||
|
||||
.result {
|
||||
margin-top: 28px;
|
||||
border-top: 1px solid #dbe2ee;
|
||||
padding-top: 24px;
|
||||
}
|
||||
|
||||
.progress-wrap {
|
||||
display: grid;
|
||||
gap: 8px;
|
||||
margin: 14px 0 18px;
|
||||
}
|
||||
|
||||
.progress-track {
|
||||
height: 10px;
|
||||
border-radius: 999px;
|
||||
background: #e8edf4;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.progress-bar {
|
||||
height: 100%;
|
||||
border-radius: inherit;
|
||||
background: linear-gradient(90deg, #1f6feb, #4f8df5);
|
||||
transition: width 0.2s ease;
|
||||
}
|
||||
|
||||
.status-text {
|
||||
margin: 0;
|
||||
color: #5d6d83;
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
.skills,
|
||||
.downloads {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 8px;
|
||||
margin-top: 16px;
|
||||
}
|
||||
|
||||
.downloads a:last-child {
|
||||
background: #334155;
|
||||
}
|
||||
|
||||
@media (max-width: 680px) {
|
||||
.shell {
|
||||
padding: 16px;
|
||||
}
|
||||
|
||||
.panel {
|
||||
padding: 22px;
|
||||
}
|
||||
|
||||
.header,
|
||||
.controls {
|
||||
grid-template-columns: 1fr;
|
||||
display: grid;
|
||||
}
|
||||
|
||||
.meta {
|
||||
justify-content: flex-start;
|
||||
}
|
||||
}
|
||||
67
app/templates/index.html
Normal file
67
app/templates/index.html
Normal file
@@ -0,0 +1,67 @@
|
||||
<!doctype html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>GJB438C DOCX 规范分析</title>
|
||||
<link rel="stylesheet" href="/static/styles.css">
|
||||
</head>
|
||||
<body>
|
||||
<main class="shell">
|
||||
<section class="panel">
|
||||
<div class="header">
|
||||
<div>
|
||||
<p class="eyebrow">GJB438C / GJB2786</p>
|
||||
<h1>DOCX 规范分析</h1>
|
||||
</div>
|
||||
<div class="meta">
|
||||
<span>{{ skill_count }} 项技能</span>
|
||||
<span>默认 {{ default_provider }}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<form id="upload-form" class="form">
|
||||
<label class="drop-zone">
|
||||
<input id="file" name="file" type="file" accept=".docx" required>
|
||||
<span class="drop-title">选择 DOCX 文件</span>
|
||||
<span class="drop-subtitle">上传后自动匹配技能规范并生成报告</span>
|
||||
</label>
|
||||
|
||||
<div class="controls">
|
||||
<label>
|
||||
模型供应商
|
||||
<select name="provider">
|
||||
<option value="{{ default_provider }}">默认:{{ default_provider }}</option>
|
||||
<option value="intranet">intranet / qwen3-coder</option>
|
||||
<option value="deepseek">deepseek / deepseek-chat</option>
|
||||
</select>
|
||||
</label>
|
||||
<label class="checkbox">
|
||||
<input type="checkbox" name="use_model" value="true" checked>
|
||||
调用模型分析
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<button type="submit">开始分析</button>
|
||||
</form>
|
||||
|
||||
<section id="result" class="result" hidden>
|
||||
<h2>分析结果</h2>
|
||||
<div class="progress-wrap">
|
||||
<div class="progress-track">
|
||||
<div id="analysis-progress" class="progress-bar" style="width: 0%"></div>
|
||||
</div>
|
||||
<p id="analysis-status" class="status-text">等待提交文件</p>
|
||||
</div>
|
||||
<p id="summary"></p>
|
||||
<div id="skills" class="skills"></div>
|
||||
<div class="downloads">
|
||||
<!-- <a id="download-docx" href="#">下载 DOCX 报告</a> -->
|
||||
<a id="download-md" href="#">下载 Markdown 报告</a>
|
||||
</div>
|
||||
</section>
|
||||
</section>
|
||||
</main>
|
||||
<script src="/static/app.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user