Compare commits
10 Commits
6661f3e361
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
70d7a9578f | ||
|
|
fea4f2b512 | ||
|
|
aa064692ad | ||
|
|
990d872bb8 | ||
|
|
5cb7785a38 | ||
|
|
0f8917d874 | ||
|
|
bb2e55e889 | ||
|
|
23c138e778 | ||
|
|
04639296e2 | ||
|
|
faa6f3da0c |
5
.gitignore
vendored
5
.gitignore
vendored
@@ -12,5 +12,10 @@ plant.md
|
||||
wheelhouse/
|
||||
python-3.12-runtime.tar.gz
|
||||
|
||||
skills/
|
||||
|
||||
|
||||
deploy.md
|
||||
handoff-2026-05-19.md
|
||||
|
||||
prompts_folder/
|
||||
|
||||
119
app/main.py
119
app/main.py
@@ -3,6 +3,7 @@ from __future__ import annotations
|
||||
import shutil
|
||||
import threading
|
||||
import time
|
||||
import zipfile
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from uuid import uuid4
|
||||
@@ -31,15 +32,22 @@ UPLOAD_DIR = ROOT_DIR / "uploads"
|
||||
OUTPUT_DIR = ROOT_DIR / "outputs"
|
||||
SKILL_ROOT = ROOT_DIR / "skills"
|
||||
DEFAULT_SKILL_COLLECTION = "GJB438C-2021_prd_skills"
|
||||
SKILL_COLLECTIONS = [
|
||||
"GJB438B-2009_prd_skills",
|
||||
"GJB438C-2021_prd_skills",
|
||||
]
|
||||
CONFIG_PATH = ROOT_DIR / "configs" / "api_config.yaml"
|
||||
MAX_UPLOAD_BYTES = 30 * 1024 * 1024
|
||||
MAX_SKILL_ARCHIVE_BYTES = 50 * 1024 * 1024
|
||||
ProgressCallback = Callable[[int, str], None]
|
||||
|
||||
|
||||
def _discover_skill_collections() -> list[str]:
|
||||
if not SKILL_ROOT.exists():
|
||||
return []
|
||||
return sorted(
|
||||
path.name
|
||||
for path in SKILL_ROOT.iterdir()
|
||||
if path.is_dir() and (path / "index.md").is_file()
|
||||
)
|
||||
|
||||
|
||||
def _skill_collection_path(collection_slug: str) -> Path:
|
||||
path = SKILL_ROOT / collection_slug
|
||||
if not path.exists() or not path.is_dir() or not (path / "index.md").exists():
|
||||
@@ -49,10 +57,8 @@ def _skill_collection_path(collection_slug: str) -> Path:
|
||||
|
||||
def _skill_collection_options() -> list[dict[str, object]]:
|
||||
options: list[dict[str, object]] = []
|
||||
for collection_slug in SKILL_COLLECTIONS:
|
||||
for collection_slug in _discover_skill_collections():
|
||||
path = SKILL_ROOT / collection_slug
|
||||
if not path.exists() or not path.is_dir() or not (path / "index.md").exists():
|
||||
continue
|
||||
skills = load_skill_catalog(path)
|
||||
options.append(
|
||||
{
|
||||
@@ -64,6 +70,64 @@ def _skill_collection_options() -> list[dict[str, object]]:
|
||||
return options
|
||||
|
||||
|
||||
def _validate_skill_archive_member(member_name: str) -> None:
|
||||
path = Path(member_name)
|
||||
if not member_name or "\\" in member_name or member_name.startswith(("/", "\\")) or path.is_absolute():
|
||||
raise HTTPException(status_code=400, detail="压缩包包含非法路径")
|
||||
if any(part in {"", ".", ".."} for part in path.parts):
|
||||
raise HTTPException(status_code=400, detail="压缩包包含非法路径")
|
||||
|
||||
|
||||
def install_skill_collection_zip(archive_path: Path, collection_slug: str) -> dict[str, object]:
|
||||
if not collection_slug or collection_slug in {".", ".."}:
|
||||
raise HTTPException(status_code=400, detail="技能合集名称无效")
|
||||
if "/" in collection_slug or "\\" in collection_slug:
|
||||
raise HTTPException(status_code=400, detail="技能合集名称无效")
|
||||
if not zipfile.is_zipfile(archive_path):
|
||||
raise HTTPException(status_code=400, detail="仅支持有效的 zip 压缩包")
|
||||
|
||||
SKILL_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
target_dir = SKILL_ROOT / collection_slug
|
||||
temp_dir = SKILL_ROOT / f".{collection_slug}.{uuid4().hex}.tmp"
|
||||
|
||||
try:
|
||||
with zipfile.ZipFile(archive_path) as archive:
|
||||
members = archive.infolist()
|
||||
if not members:
|
||||
raise HTTPException(status_code=400, detail="压缩包为空")
|
||||
names = [member.filename for member in members]
|
||||
for name in names:
|
||||
_validate_skill_archive_member(name)
|
||||
if "index.md" not in names:
|
||||
raise HTTPException(status_code=400, detail="技能合集压缩包根目录必须包含 index.md")
|
||||
archive.extractall(temp_dir)
|
||||
|
||||
skills = load_skill_catalog(temp_dir)
|
||||
if not skills:
|
||||
raise HTTPException(status_code=400, detail="技能合集未包含有效 SKILL.md")
|
||||
|
||||
if target_dir.exists():
|
||||
shutil.rmtree(target_dir)
|
||||
temp_dir.rename(target_dir)
|
||||
return {
|
||||
"slug": collection_slug,
|
||||
"label": collection_slug.replace("_prd_skills", ""),
|
||||
"skill_count": len(skills),
|
||||
}
|
||||
except HTTPException:
|
||||
if temp_dir.exists():
|
||||
shutil.rmtree(temp_dir)
|
||||
raise
|
||||
except zipfile.BadZipFile as exc:
|
||||
if temp_dir.exists():
|
||||
shutil.rmtree(temp_dir)
|
||||
raise HTTPException(status_code=400, detail="仅支持有效的 zip 压缩包") from exc
|
||||
except Exception:
|
||||
if temp_dir.exists():
|
||||
shutil.rmtree(temp_dir)
|
||||
raise
|
||||
|
||||
|
||||
@dataclass
|
||||
class AnalysisTask:
|
||||
task_id: str
|
||||
@@ -247,18 +311,55 @@ def _run_analysis_task(
|
||||
@app.get("/", response_class=HTMLResponse)
|
||||
def index(request: Request) -> HTMLResponse:
|
||||
settings = load_api_config(CONFIG_PATH)
|
||||
skill_collections = _skill_collection_options()
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"index.html",
|
||||
{
|
||||
"default_provider": settings.provider_name,
|
||||
"skill_collection_count": len(SKILL_COLLECTIONS),
|
||||
"skill_collections": _skill_collection_options(),
|
||||
"skill_collection_count": len(skill_collections),
|
||||
"skill_collections": skill_collections,
|
||||
"default_skill_collection": DEFAULT_SKILL_COLLECTION,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@app.get("/skill-collections")
|
||||
def list_skill_collections() -> dict[str, object]:
|
||||
return {
|
||||
"collections": _skill_collection_options(),
|
||||
"default_skill_collection": DEFAULT_SKILL_COLLECTION,
|
||||
}
|
||||
|
||||
|
||||
@app.post("/skill-collections/upload")
|
||||
async def upload_skill_collection(file: UploadFile = File(...)) -> dict[str, object]:
|
||||
if not file.filename or not file.filename.lower().endswith(".zip"):
|
||||
raise HTTPException(status_code=400, detail="技能合集仅支持上传 .zip 压缩包")
|
||||
|
||||
content = await file.read()
|
||||
if len(content) > MAX_SKILL_ARCHIVE_BYTES:
|
||||
raise HTTPException(status_code=413, detail="技能合集压缩包超过 50MB 限制")
|
||||
|
||||
archive_name = Path(file.filename).name
|
||||
collection_slug = archive_name[:-4]
|
||||
archive_path = UPLOAD_DIR / f"{uuid4().hex}_{archive_name}"
|
||||
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
||||
archive_path.write_bytes(content)
|
||||
|
||||
try:
|
||||
collection = install_skill_collection_zip(archive_path, collection_slug)
|
||||
finally:
|
||||
if archive_path.exists():
|
||||
archive_path.unlink()
|
||||
|
||||
return {
|
||||
"message": f"技能合集 {collection['slug']} 上传成功,已加载 {collection['skill_count']} 项技能",
|
||||
"collection": collection,
|
||||
"collections": _skill_collection_options(),
|
||||
}
|
||||
|
||||
|
||||
@app.post("/analyze")
|
||||
async def analyze_docx(
|
||||
file: UploadFile = File(...),
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
const form = document.querySelector("#upload-form");
|
||||
const skillUploadForm = document.querySelector("#skill-upload-form");
|
||||
const skillCollectionSelect = document.querySelector("#skill-collection");
|
||||
const skillUploadStatus = document.querySelector("#skill-upload-status");
|
||||
const result = document.querySelector("#result");
|
||||
const summary = document.querySelector("#summary");
|
||||
const skills = document.querySelector("#skills");
|
||||
@@ -27,6 +30,64 @@ async function pollTask(statusUrl) {
|
||||
}
|
||||
}
|
||||
|
||||
function renderSkillCollections(collections, selectedSlug) {
|
||||
const currentValue = selectedSlug || skillCollectionSelect.value;
|
||||
skillCollectionSelect.innerHTML = "";
|
||||
|
||||
collections.forEach((collection) => {
|
||||
const option = document.createElement("option");
|
||||
option.value = collection.slug;
|
||||
option.textContent = `${collection.label}(${collection.skill_count})`;
|
||||
if (collection.slug === currentValue) {
|
||||
option.selected = true;
|
||||
}
|
||||
skillCollectionSelect.appendChild(option);
|
||||
});
|
||||
}
|
||||
|
||||
async function refreshSkillCollections(selectedSlug) {
|
||||
const response = await fetch("/skill-collections");
|
||||
const payload = await response.json();
|
||||
if (!response.ok) {
|
||||
throw new Error(payload.detail || "技能合集刷新失败");
|
||||
}
|
||||
renderSkillCollections(payload.collections, selectedSlug);
|
||||
}
|
||||
|
||||
skillUploadForm.addEventListener("submit", async (event) => {
|
||||
event.preventDefault();
|
||||
|
||||
const uploadButton = skillUploadForm.querySelector("button");
|
||||
uploadButton.disabled = true;
|
||||
uploadButton.textContent = "上传中...";
|
||||
skillUploadStatus.textContent = "正在上传并解压技能合集...";
|
||||
|
||||
try {
|
||||
const data = new FormData(skillUploadForm);
|
||||
const response = await fetch("/skill-collections/upload", {
|
||||
method: "POST",
|
||||
body: data,
|
||||
});
|
||||
const payload = await response.json();
|
||||
if (!response.ok) {
|
||||
throw new Error(payload.detail || "上传失败");
|
||||
}
|
||||
|
||||
renderSkillCollections(payload.collections, payload.collection.slug);
|
||||
skillUploadForm.reset();
|
||||
skillUploadStatus.textContent = payload.message;
|
||||
} catch (error) {
|
||||
skillUploadStatus.textContent = error.message;
|
||||
} finally {
|
||||
uploadButton.disabled = false;
|
||||
uploadButton.textContent = "上传合集";
|
||||
}
|
||||
});
|
||||
|
||||
refreshSkillCollections().catch((error) => {
|
||||
skillUploadStatus.textContent = error.message;
|
||||
});
|
||||
|
||||
form.addEventListener("submit", async (event) => {
|
||||
event.preventDefault();
|
||||
button.disabled = true;
|
||||
|
||||
@@ -80,6 +80,26 @@ h2 {
|
||||
gap: 20px;
|
||||
}
|
||||
|
||||
.collection-upload {
|
||||
display: grid;
|
||||
grid-template-columns: minmax(0, 1fr) auto;
|
||||
gap: 12px;
|
||||
align-items: end;
|
||||
margin-bottom: 24px;
|
||||
padding: 16px;
|
||||
border: 1px solid #dbe2ee;
|
||||
border-radius: 8px;
|
||||
background: #fbfcfe;
|
||||
}
|
||||
|
||||
.collection-upload .status-text {
|
||||
grid-column: 1 / -1;
|
||||
}
|
||||
|
||||
.collection-upload input[type="file"] {
|
||||
min-height: 42px;
|
||||
}
|
||||
|
||||
.drop-zone {
|
||||
border: 1px dashed #8aa1bd;
|
||||
border-radius: 8px;
|
||||
@@ -207,7 +227,8 @@ button:disabled {
|
||||
}
|
||||
|
||||
.header,
|
||||
.controls {
|
||||
.controls,
|
||||
.collection-upload {
|
||||
grid-template-columns: 1fr;
|
||||
display: grid;
|
||||
}
|
||||
|
||||
@@ -15,11 +15,20 @@
|
||||
<h1>DOCX 规范分析</h1>
|
||||
</div>
|
||||
<div class="meta">
|
||||
<span>{{ skill_count }} 项技能</span>
|
||||
<span>{{ skill_collection_count }} 个合集</span>
|
||||
<span>默认 {{ default_provider }}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<form id="skill-upload-form" class="collection-upload">
|
||||
<label>
|
||||
上传技能合集
|
||||
<input id="skill-archive" name="file" type="file" accept=".zip" required>
|
||||
</label>
|
||||
<button type="submit">上传合集</button>
|
||||
<p id="skill-upload-status" class="status-text" role="status">仅支持 zip 压缩包,解压后会进入 skills/ 目录</p>
|
||||
</form>
|
||||
|
||||
<form id="upload-form" class="form">
|
||||
<label class="drop-zone">
|
||||
<input id="file" name="file" type="file" accept=".docx" required>
|
||||
@@ -38,7 +47,7 @@
|
||||
</label>
|
||||
<label>
|
||||
技能合集
|
||||
<select name="skill_collection">
|
||||
<select id="skill-collection" name="skill_collection">
|
||||
{% for collection in skill_collections %}
|
||||
<option value="{{ collection.slug }}" {% if collection.slug == default_skill_collection %}selected{% endif %}>
|
||||
{{ collection.label }}({{ collection.skill_count }})
|
||||
|
||||
25
handoff-2026-05-26-10-56-09.md
Normal file
25
handoff-2026-05-26-10-56-09.md
Normal file
@@ -0,0 +1,25 @@
|
||||
# Handoff - 2026-05-26
|
||||
|
||||
## Completed Tasks
|
||||
- 将技能合集列表从固定常量改为动态扫描 `skills/` 下包含 `index.md` 的目录,解决后台新增 `skills/interesting_physics_skills` 后前端“技能合集”下拉框不显示的问题。
|
||||
- 新增 `GET /skill-collections` 接口,用于前端实时刷新可用技能合集列表。
|
||||
- 新增 `POST /skill-collections/upload` 接口,严格限制上传 `.zip` 技能合集压缩包,并按 `skills/<压缩包文件名去掉.zip>/` 的规范解压安装。
|
||||
- 为 zip 安装流程增加校验:有效 zip、根目录必须包含 `index.md`、拒绝路径穿越、拒绝空包、拒绝无有效 `SKILL.md` 的合集。
|
||||
- 在前端新增简洁的“上传技能合集”区域,上传成功后显示提醒,并立即刷新“技能合集”下拉框且选中新上传的合集。
|
||||
- 修正首页顶部技能合集数量展示,改为显示当前动态发现的合集数量。
|
||||
- 补充测试覆盖动态发现后台新增目录、上传 zip 后解压并进入列表、拒绝非 zip、拒绝非法路径 zip。
|
||||
- 执行验证:`python -m pytest -q` 通过,结果为 `17 passed in 6.91s`;`git diff --check` 通过。
|
||||
- 启动本地 FastAPI 服务用于页面验证,当前地址为 `http://127.0.0.1:8002`。
|
||||
|
||||
## Blockers
|
||||
- 当前 zip 格式按现有 `skills/GJB438B-2009_prd_skills.zip` 规范处理,即 `index.md` 必须位于压缩包根目录;如果后续需要支持“压缩包内再包一层目录”的格式,需要补充规范转换逻辑。
|
||||
- 上传同名合集时当前实现会用新解压内容替换 `skills/<合集名>/`,需要在后续产品设计中确认是否增加覆盖确认、版本备份或回滚能力。
|
||||
- 本地测试中 `fastapi.testclient.TestClient` 在当前环境会挂起,因此测试改为直接调用异步路由函数和安装函数;后续如升级依赖或调整测试环境,可再恢复端到端 HTTP 客户端测试。
|
||||
- 默认端口 `8000` 和 `8001` 启动失败,最终使用 `8002` 启动服务。
|
||||
|
||||
## Next Steps
|
||||
- 明天使用真实技能合集 zip 在浏览器中做一次完整手工验证:上传、成功提示、下拉框刷新、选择新合集并执行 DOCX 分析。
|
||||
- 为上传同名合集补充更明确的管理策略,例如覆盖确认、保留上一版本备份或禁止覆盖。
|
||||
- 评估是否支持多种 zip 打包结构,并在文档中明确技能合集 zip 的标准目录格式。
|
||||
- 考虑增加前端上传状态样式区分,例如成功、失败、校验错误使用不同颜色,但保持当前页面简洁风格。
|
||||
- 如后续要正式部署,补充接口级日志,记录上传合集名称、技能数量、校验失败原因和安装时间。
|
||||
@@ -1,10 +1,40 @@
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
import zipfile
|
||||
|
||||
from docx import Document
|
||||
|
||||
import app.main as main
|
||||
from app.main import OUTPUT_DIR, ROOT_DIR, analyze_saved_docx, app
|
||||
|
||||
|
||||
class FakeUploadFile:
|
||||
def __init__(self, filename: str, content: bytes) -> None:
|
||||
self.filename = filename
|
||||
self._content = content
|
||||
|
||||
async def read(self) -> bytes:
|
||||
return self._content
|
||||
|
||||
|
||||
def _write_skill_collection_zip(path: Path) -> None:
|
||||
with zipfile.ZipFile(path, "w") as archive:
|
||||
archive.writestr(
|
||||
"index.md",
|
||||
"| Skill | Description | Use When |\n"
|
||||
"| --- | --- | --- |\n"
|
||||
"| [demo-skill](demo-skill/SKILL.md) | 示例技能 | 上传合集测试 |\n",
|
||||
)
|
||||
archive.writestr(
|
||||
"demo-skill/SKILL.md",
|
||||
"---\n"
|
||||
"name: demo-skill\n"
|
||||
"description: 示例技能\n"
|
||||
"---\n"
|
||||
"# Demo Skill\n",
|
||||
)
|
||||
|
||||
|
||||
def test_index_template_contains_upload_ui() -> None:
|
||||
html = (ROOT_DIR / "app" / "templates" / "index.html").read_text(encoding="utf-8")
|
||||
js = (ROOT_DIR / "app" / "static" / "app.js").read_text(encoding="utf-8")
|
||||
@@ -17,10 +47,78 @@ def test_index_template_contains_upload_ui() -> None:
|
||||
assert "download-md" in js
|
||||
assert "pollTask" in js
|
||||
assert "skill_collection" in html
|
||||
assert "skill-upload-form" in html
|
||||
assert "/skill-collections/upload" in js
|
||||
assert "预留后续版本:单个技能集合内的 skill 筛选功能" in html
|
||||
assert not any(route.path == "/skills" for route in app.routes)
|
||||
|
||||
|
||||
def test_skill_collection_options_discover_added_directory(tmp_path: Path, monkeypatch) -> None:
|
||||
skills_root = tmp_path / "skills"
|
||||
collection = skills_root / "interesting_physics_skills"
|
||||
(collection / "demo-skill").mkdir(parents=True)
|
||||
(collection / "index.md").write_text(
|
||||
"| Skill | Description | Use When |\n"
|
||||
"| --- | --- | --- |\n"
|
||||
"| [demo-skill](demo-skill/SKILL.md) | 示例技能 | 后台新增合集 |\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
(collection / "demo-skill" / "SKILL.md").write_text(
|
||||
"---\nname: demo-skill\ndescription: 示例技能\n---\n# Demo\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
monkeypatch.setattr(main, "SKILL_ROOT", skills_root)
|
||||
|
||||
options = main._skill_collection_options()
|
||||
|
||||
assert [option["slug"] for option in options] == ["interesting_physics_skills"]
|
||||
assert options[0]["skill_count"] == 1
|
||||
|
||||
|
||||
def test_upload_skill_collection_zip_extracts_and_lists(tmp_path: Path, monkeypatch) -> None:
|
||||
skills_root = tmp_path / "skills"
|
||||
monkeypatch.setattr(main, "SKILL_ROOT", skills_root)
|
||||
archive_path = tmp_path / "uploaded_skills.zip"
|
||||
_write_skill_collection_zip(archive_path)
|
||||
|
||||
upload = FakeUploadFile("uploaded_skills.zip", archive_path.read_bytes())
|
||||
payload = asyncio.run(main.upload_skill_collection(upload))
|
||||
|
||||
assert payload["collection"]["slug"] == "uploaded_skills"
|
||||
assert payload["collection"]["skill_count"] == 1
|
||||
assert (skills_root / "uploaded_skills" / "index.md").exists()
|
||||
assert any(collection["slug"] == "uploaded_skills" for collection in payload["collections"])
|
||||
|
||||
|
||||
def test_upload_skill_collection_rejects_non_zip(tmp_path: Path, monkeypatch) -> None:
|
||||
monkeypatch.setattr(main, "SKILL_ROOT", tmp_path / "skills")
|
||||
|
||||
upload = FakeUploadFile("uploaded_skills.txt", b"not zip")
|
||||
try:
|
||||
asyncio.run(main.upload_skill_collection(upload))
|
||||
except main.HTTPException as exc:
|
||||
assert exc.status_code == 400
|
||||
assert "zip" in exc.detail
|
||||
else:
|
||||
raise AssertionError("non-zip upload should fail")
|
||||
|
||||
|
||||
def test_install_skill_collection_zip_rejects_unsafe_paths(tmp_path: Path, monkeypatch) -> None:
|
||||
monkeypatch.setattr(main, "SKILL_ROOT", tmp_path / "skills")
|
||||
archive_path = tmp_path / "unsafe.zip"
|
||||
with zipfile.ZipFile(archive_path, "w") as archive:
|
||||
archive.writestr("../index.md", "bad")
|
||||
|
||||
upload = FakeUploadFile("unsafe.zip", archive_path.read_bytes())
|
||||
try:
|
||||
asyncio.run(main.upload_skill_collection(upload))
|
||||
except main.HTTPException as exc:
|
||||
assert exc.status_code == 400
|
||||
assert "非法路径" in exc.detail
|
||||
else:
|
||||
raise AssertionError("unsafe zip should fail")
|
||||
|
||||
|
||||
def test_analyze_saved_docx_reports_progress(tmp_path: Path) -> None:
|
||||
updates: list[tuple[int, str]] = []
|
||||
docx_path = tmp_path / "progress.docx"
|
||||
|
||||
Reference in New Issue
Block a user