add choose skills function

This commit is contained in:
kuangji
2026-05-19 13:22:25 +08:00
parent 1e20f84257
commit 6661f3e361
6 changed files with 118 additions and 10 deletions

View File

@@ -68,6 +68,15 @@ def select_relevant_skills(parsed: ParsedDocument, skills: list[Skill], max_skil
return [skill for _, skill in scored[:max_skills]]
def normalize_selected_skill_slugs(selected_slugs: list[str] | None, skills: list[Skill]) -> list[Skill]:
if not selected_slugs:
return skills
available = {skill.slug: skill for skill in skills}
picked = [available[slug] for slug in selected_slugs if slug in available]
return picked or skills
def build_analysis_prompt(parsed: ParsedDocument, skills: list[Skill]) -> str:
skill_sections = []
for skill in skills:

View File

@@ -9,7 +9,7 @@ from uuid import uuid4
from typing import Callable
from fastapi import FastAPI, File, Form, HTTPException, Request, UploadFile
from fastapi.responses import FileResponse, HTMLResponse
from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
@@ -29,12 +29,41 @@ from app.skill_loader import load_skill_catalog
ROOT_DIR = Path(__file__).resolve().parent.parent
UPLOAD_DIR = ROOT_DIR / "uploads"
OUTPUT_DIR = ROOT_DIR / "outputs"
SKILL_DIR = ROOT_DIR / "GJB438C-2021_prd_skills"
SKILL_ROOT = ROOT_DIR / "skills"
DEFAULT_SKILL_COLLECTION = "GJB438C-2021_prd_skills"
SKILL_COLLECTIONS = [
"GJB438B-2009_prd_skills",
"GJB438C-2021_prd_skills",
]
CONFIG_PATH = ROOT_DIR / "configs" / "api_config.yaml"
MAX_UPLOAD_BYTES = 30 * 1024 * 1024
ProgressCallback = Callable[[int, str], None]
def _skill_collection_path(collection_slug: str) -> Path:
path = SKILL_ROOT / collection_slug
if not path.exists() or not path.is_dir() or not (path / "index.md").exists():
raise HTTPException(status_code=400, detail="技能集合不存在")
return path
def _skill_collection_options() -> list[dict[str, object]]:
options: list[dict[str, object]] = []
for collection_slug in SKILL_COLLECTIONS:
path = SKILL_ROOT / collection_slug
if not path.exists() or not path.is_dir() or not (path / "index.md").exists():
continue
skills = load_skill_catalog(path)
options.append(
{
"slug": collection_slug,
"label": collection_slug.replace("_prd_skills", ""),
"skill_count": len(skills),
}
)
return options
@dataclass
class AnalysisTask:
task_id: str
@@ -124,6 +153,7 @@ def analyze_saved_docx(
provider: str | None = None,
use_model: bool = True,
display_filename: str | None = None,
skill_collection: str = DEFAULT_SKILL_COLLECTION,
progress_callback: ProgressCallback | None = None,
) -> dict[str, object]:
def progress(percent: int, message: str) -> None:
@@ -133,7 +163,7 @@ def analyze_saved_docx(
progress(5, "正在解析 DOCX 文档")
parsed = parse_docx(upload_path, display_filename=display_filename)
progress(20, "DOCX 解析完成,正在加载技能规范")
skills = load_skill_catalog(SKILL_DIR)
skills = load_skill_catalog(_skill_collection_path(skill_collection))
progress(35, "技能规范已加载,正在匹配候选技能")
selected_skills = select_relevant_skills(parsed, skills)
progress(50, f"已匹配 {len(selected_skills)} 项技能,正在读取模型配置")
@@ -186,6 +216,7 @@ def _run_analysis_task(
provider: str | None,
use_model: bool,
display_filename: str,
skill_collection: str = DEFAULT_SKILL_COLLECTION,
) -> None:
def on_progress(progress: int, message: str) -> None:
TASK_STORE.update(task_id, status="running", progress=progress, message=message)
@@ -197,6 +228,7 @@ def _run_analysis_task(
provider=provider,
use_model=use_model,
display_filename=display_filename,
skill_collection=skill_collection,
progress_callback=on_progress,
)
TASK_STORE.update(
@@ -215,13 +247,14 @@ def _run_analysis_task(
@app.get("/", response_class=HTMLResponse)
def index(request: Request) -> HTMLResponse:
settings = load_api_config(CONFIG_PATH)
skills = load_skill_catalog(SKILL_DIR)
return templates.TemplateResponse(
request,
"index.html",
{
"default_provider": settings.provider_name,
"skill_count": len(skills),
"skill_collection_count": len(SKILL_COLLECTIONS),
"skill_collections": _skill_collection_options(),
"default_skill_collection": DEFAULT_SKILL_COLLECTION,
},
)
@@ -231,6 +264,7 @@ async def analyze_docx(
file: UploadFile = File(...),
provider: str | None = Form(None),
use_model: str = Form("true"),
skill_collection: str = Form(DEFAULT_SKILL_COLLECTION),
):
if not file.filename or not file.filename.lower().endswith(".docx"):
raise HTTPException(status_code=400, detail="仅支持上传 .docx 文件")
@@ -248,7 +282,14 @@ async def analyze_docx(
task = TASK_STORE.create(Path(file.filename).name)
threading.Thread(
target=_run_analysis_task,
args=(task.task_id, upload_path, provider, should_use_model, Path(file.filename).name),
args=(
task.task_id,
upload_path,
provider,
should_use_model,
Path(file.filename).name,
skill_collection,
),
daemon=True,
).start()
return {

View File

@@ -31,7 +31,7 @@ def _front_matter_value(content: str, key: str) -> str | None:
return None
def load_skill_catalog(root: Path | str = Path("GJB438C-2021_prd_skills")) -> list[Skill]:
def load_skill_catalog(root: Path | str = Path("skills") / "GJB438C-2021_prd_skills") -> list[Skill]:
root_path = Path(root)
index_path = root_path / "index.md"
skills: list[Skill] = []

View File

@@ -36,12 +36,44 @@
<option value="deepseek">deepseek / deepseek-chat</option>
</select>
</label>
<label>
技能合集
<select name="skill_collection">
{% for collection in skill_collections %}
<option value="{{ collection.slug }}" {% if collection.slug == default_skill_collection %}selected{% endif %}>
{{ collection.label }}{{ collection.skill_count }}
</option>
{% endfor %}
</select>
</label>
<label class="checkbox">
<input type="checkbox" name="use_model" value="true" checked>
调用模型分析
</label>
</div>
{# 预留后续版本:单个技能集合内的 skill 筛选功能
<section class="skill-picker" aria-label="技能筛选">
<div class="skill-picker-head">
<label for="skill-filter">筛选技能</label>
<span id="skill-count-label">{{ skill_count }} 项</span>
</div>
<input id="skill-filter" type="search" placeholder="输入技能名称、说明或 slug">
<div id="skill-list" class="skill-list">
{% for skill in skills %}
<label class="skill-option" data-skill-item data-skill-key="{{ skill.slug }} {{ skill.name }} {{ skill.description }} {{ skill.use_when }}">
<input type="checkbox" name="selected_skills" value="{{ skill.slug }}">
<span class="skill-main">
<span class="skill-name">{{ skill.name }}</span>
<span class="skill-desc">{{ skill.description or skill.use_when }}</span>
</span>
<span class="skill-slug">{{ skill.slug }}</span>
</label>
{% endfor %}
</div>
</section>
#}
<button type="submit">开始分析</button>
</form>

View File

@@ -4,7 +4,7 @@ from app.skill_loader import load_skill_catalog
def test_load_skill_catalog_reads_index_and_skill_files() -> None:
skills = load_skill_catalog(Path("GJB438C-2021_prd_skills"))
skills = load_skill_catalog(Path("skills") / "GJB438C-2021_prd_skills")
assert len(skills) >= 30
skill_names = {skill.slug for skill in skills}
@@ -12,3 +12,10 @@ def test_load_skill_catalog_reads_index_and_skill_files() -> None:
target = next(skill for skill in skills if skill.slug == "gjb438c-software-requirements-spec-structure")
assert "软件需求规格说明" in target.content
assert target.path.name == "SKILL.md"
def test_load_skill_catalog_reads_gjb438b_collection() -> None:
skills = load_skill_catalog(Path("skills") / "GJB438B-2009_prd_skills")
assert len(skills) > 0
assert any(skill.slug.startswith("gjb438b-") for skill in skills)

View File

@@ -2,7 +2,7 @@ from pathlib import Path
from docx import Document
from app.main import OUTPUT_DIR, ROOT_DIR, analyze_saved_docx
from app.main import OUTPUT_DIR, ROOT_DIR, analyze_saved_docx, app
def test_index_template_contains_upload_ui() -> None:
@@ -14,9 +14,11 @@ def test_index_template_contains_upload_ui() -> None:
assert "analysis-progress" in html
assert "analysis-status" in html
assert "下载 Markdown 报告" in html
assert "<!-- <a id=\"download-docx\"" in html
assert "download-md" in js
assert "pollTask" in js
assert "skill_collection" in html
assert "预留后续版本:单个技能集合内的 skill 筛选功能" in html
assert not any(route.path == "/skills" for route in app.routes)
def test_analyze_saved_docx_reports_progress(tmp_path: Path) -> None:
@@ -53,3 +55,20 @@ def test_analyze_saved_docx_creates_downloadable_report(tmp_path: Path) -> None:
assert "docx" not in payload["downloads"]
assert payload["downloads"]["markdown"].endswith(".md")
assert (OUTPUT_DIR / Path(payload["downloads"]["markdown"]).name).exists()
def test_analyze_saved_docx_uses_selected_collection(tmp_path: Path) -> None:
docx_path = tmp_path / "upload.docx"
document = Document()
document.add_heading("软件需求规格说明", level=1)
document.add_paragraph("能力需求、接口需求、合格性规定。")
document.save(docx_path)
payload = analyze_saved_docx(
docx_path,
provider="deepseek",
use_model=False,
skill_collection="GJB438B-2009_prd_skills",
)
assert payload["matched_skills"]