10 Commits

Author SHA1 Message Date
kuangji
70d7a9578f skills upload function handoff 2026-05-26 11:06:41 +08:00
kuangji
fea4f2b512 skills upload function test_web.py 2026-05-26 10:34:22 +08:00
kuangji
aa064692ad skills upload function index.html 2026-05-26 10:33:15 +08:00
kuangji
990d872bb8 skills upload function styles.css 2026-05-26 10:33:00 +08:00
kuangji
5cb7785a38 skills upload function app.js 2026-05-26 10:32:47 +08:00
kuangji
0f8917d874 skills upload function 2026-05-26 10:30:07 +08:00
kuangji
bb2e55e889 ignore prompts_folder 2026-05-26 09:34:23 +08:00
kuangji
23c138e778 ignore handoff-2026-05-19.md 2026-05-26 09:33:40 +08:00
kuangji
04639296e2 ignore deploy.md 2026-05-26 09:33:19 +08:00
kuangji
faa6f3da0c ignore skills folder 2026-05-26 09:32:56 +08:00
7 changed files with 332 additions and 12 deletions

5
.gitignore vendored
View File

@@ -12,5 +12,10 @@ plant.md
wheelhouse/
python-3.12-runtime.tar.gz
skills/
deploy.md
handoff-2026-05-19.md
prompts_folder/

View File

@@ -3,6 +3,7 @@ from __future__ import annotations
import shutil
import threading
import time
import zipfile
from dataclasses import dataclass, field
from pathlib import Path
from uuid import uuid4
@@ -31,15 +32,22 @@ UPLOAD_DIR = ROOT_DIR / "uploads"
OUTPUT_DIR = ROOT_DIR / "outputs"
SKILL_ROOT = ROOT_DIR / "skills"
DEFAULT_SKILL_COLLECTION = "GJB438C-2021_prd_skills"
SKILL_COLLECTIONS = [
"GJB438B-2009_prd_skills",
"GJB438C-2021_prd_skills",
]
CONFIG_PATH = ROOT_DIR / "configs" / "api_config.yaml"
MAX_UPLOAD_BYTES = 30 * 1024 * 1024
MAX_SKILL_ARCHIVE_BYTES = 50 * 1024 * 1024
ProgressCallback = Callable[[int, str], None]
def _discover_skill_collections() -> list[str]:
if not SKILL_ROOT.exists():
return []
return sorted(
path.name
for path in SKILL_ROOT.iterdir()
if path.is_dir() and (path / "index.md").is_file()
)
def _skill_collection_path(collection_slug: str) -> Path:
path = SKILL_ROOT / collection_slug
if not path.exists() or not path.is_dir() or not (path / "index.md").exists():
@@ -49,10 +57,8 @@ def _skill_collection_path(collection_slug: str) -> Path:
def _skill_collection_options() -> list[dict[str, object]]:
options: list[dict[str, object]] = []
for collection_slug in SKILL_COLLECTIONS:
for collection_slug in _discover_skill_collections():
path = SKILL_ROOT / collection_slug
if not path.exists() or not path.is_dir() or not (path / "index.md").exists():
continue
skills = load_skill_catalog(path)
options.append(
{
@@ -64,6 +70,64 @@ def _skill_collection_options() -> list[dict[str, object]]:
return options
def _validate_skill_archive_member(member_name: str) -> None:
path = Path(member_name)
if not member_name or "\\" in member_name or member_name.startswith(("/", "\\")) or path.is_absolute():
raise HTTPException(status_code=400, detail="压缩包包含非法路径")
if any(part in {"", ".", ".."} for part in path.parts):
raise HTTPException(status_code=400, detail="压缩包包含非法路径")
def install_skill_collection_zip(archive_path: Path, collection_slug: str) -> dict[str, object]:
if not collection_slug or collection_slug in {".", ".."}:
raise HTTPException(status_code=400, detail="技能合集名称无效")
if "/" in collection_slug or "\\" in collection_slug:
raise HTTPException(status_code=400, detail="技能合集名称无效")
if not zipfile.is_zipfile(archive_path):
raise HTTPException(status_code=400, detail="仅支持有效的 zip 压缩包")
SKILL_ROOT.mkdir(parents=True, exist_ok=True)
target_dir = SKILL_ROOT / collection_slug
temp_dir = SKILL_ROOT / f".{collection_slug}.{uuid4().hex}.tmp"
try:
with zipfile.ZipFile(archive_path) as archive:
members = archive.infolist()
if not members:
raise HTTPException(status_code=400, detail="压缩包为空")
names = [member.filename for member in members]
for name in names:
_validate_skill_archive_member(name)
if "index.md" not in names:
raise HTTPException(status_code=400, detail="技能合集压缩包根目录必须包含 index.md")
archive.extractall(temp_dir)
skills = load_skill_catalog(temp_dir)
if not skills:
raise HTTPException(status_code=400, detail="技能合集未包含有效 SKILL.md")
if target_dir.exists():
shutil.rmtree(target_dir)
temp_dir.rename(target_dir)
return {
"slug": collection_slug,
"label": collection_slug.replace("_prd_skills", ""),
"skill_count": len(skills),
}
except HTTPException:
if temp_dir.exists():
shutil.rmtree(temp_dir)
raise
except zipfile.BadZipFile as exc:
if temp_dir.exists():
shutil.rmtree(temp_dir)
raise HTTPException(status_code=400, detail="仅支持有效的 zip 压缩包") from exc
except Exception:
if temp_dir.exists():
shutil.rmtree(temp_dir)
raise
@dataclass
class AnalysisTask:
task_id: str
@@ -247,18 +311,55 @@ def _run_analysis_task(
@app.get("/", response_class=HTMLResponse)
def index(request: Request) -> HTMLResponse:
settings = load_api_config(CONFIG_PATH)
skill_collections = _skill_collection_options()
return templates.TemplateResponse(
request,
"index.html",
{
"default_provider": settings.provider_name,
"skill_collection_count": len(SKILL_COLLECTIONS),
"skill_collections": _skill_collection_options(),
"skill_collection_count": len(skill_collections),
"skill_collections": skill_collections,
"default_skill_collection": DEFAULT_SKILL_COLLECTION,
},
)
@app.get("/skill-collections")
def list_skill_collections() -> dict[str, object]:
return {
"collections": _skill_collection_options(),
"default_skill_collection": DEFAULT_SKILL_COLLECTION,
}
@app.post("/skill-collections/upload")
async def upload_skill_collection(file: UploadFile = File(...)) -> dict[str, object]:
if not file.filename or not file.filename.lower().endswith(".zip"):
raise HTTPException(status_code=400, detail="技能合集仅支持上传 .zip 压缩包")
content = await file.read()
if len(content) > MAX_SKILL_ARCHIVE_BYTES:
raise HTTPException(status_code=413, detail="技能合集压缩包超过 50MB 限制")
archive_name = Path(file.filename).name
collection_slug = archive_name[:-4]
archive_path = UPLOAD_DIR / f"{uuid4().hex}_{archive_name}"
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
archive_path.write_bytes(content)
try:
collection = install_skill_collection_zip(archive_path, collection_slug)
finally:
if archive_path.exists():
archive_path.unlink()
return {
"message": f"技能合集 {collection['slug']} 上传成功,已加载 {collection['skill_count']} 项技能",
"collection": collection,
"collections": _skill_collection_options(),
}
@app.post("/analyze")
async def analyze_docx(
file: UploadFile = File(...),

View File

@@ -1,4 +1,7 @@
const form = document.querySelector("#upload-form");
const skillUploadForm = document.querySelector("#skill-upload-form");
const skillCollectionSelect = document.querySelector("#skill-collection");
const skillUploadStatus = document.querySelector("#skill-upload-status");
const result = document.querySelector("#result");
const summary = document.querySelector("#summary");
const skills = document.querySelector("#skills");
@@ -27,6 +30,64 @@ async function pollTask(statusUrl) {
}
}
function renderSkillCollections(collections, selectedSlug) {
const currentValue = selectedSlug || skillCollectionSelect.value;
skillCollectionSelect.innerHTML = "";
collections.forEach((collection) => {
const option = document.createElement("option");
option.value = collection.slug;
option.textContent = `${collection.label}${collection.skill_count}`;
if (collection.slug === currentValue) {
option.selected = true;
}
skillCollectionSelect.appendChild(option);
});
}
async function refreshSkillCollections(selectedSlug) {
const response = await fetch("/skill-collections");
const payload = await response.json();
if (!response.ok) {
throw new Error(payload.detail || "技能合集刷新失败");
}
renderSkillCollections(payload.collections, selectedSlug);
}
skillUploadForm.addEventListener("submit", async (event) => {
event.preventDefault();
const uploadButton = skillUploadForm.querySelector("button");
uploadButton.disabled = true;
uploadButton.textContent = "上传中...";
skillUploadStatus.textContent = "正在上传并解压技能合集...";
try {
const data = new FormData(skillUploadForm);
const response = await fetch("/skill-collections/upload", {
method: "POST",
body: data,
});
const payload = await response.json();
if (!response.ok) {
throw new Error(payload.detail || "上传失败");
}
renderSkillCollections(payload.collections, payload.collection.slug);
skillUploadForm.reset();
skillUploadStatus.textContent = payload.message;
} catch (error) {
skillUploadStatus.textContent = error.message;
} finally {
uploadButton.disabled = false;
uploadButton.textContent = "上传合集";
}
});
refreshSkillCollections().catch((error) => {
skillUploadStatus.textContent = error.message;
});
form.addEventListener("submit", async (event) => {
event.preventDefault();
button.disabled = true;

View File

@@ -80,6 +80,26 @@ h2 {
gap: 20px;
}
.collection-upload {
display: grid;
grid-template-columns: minmax(0, 1fr) auto;
gap: 12px;
align-items: end;
margin-bottom: 24px;
padding: 16px;
border: 1px solid #dbe2ee;
border-radius: 8px;
background: #fbfcfe;
}
.collection-upload .status-text {
grid-column: 1 / -1;
}
.collection-upload input[type="file"] {
min-height: 42px;
}
.drop-zone {
border: 1px dashed #8aa1bd;
border-radius: 8px;
@@ -207,7 +227,8 @@ button:disabled {
}
.header,
.controls {
.controls,
.collection-upload {
grid-template-columns: 1fr;
display: grid;
}

View File

@@ -15,11 +15,20 @@
<h1>DOCX 规范分析</h1>
</div>
<div class="meta">
<span>{{ skill_count }} 项技能</span>
<span>{{ skill_collection_count }} 个合集</span>
<span>默认 {{ default_provider }}</span>
</div>
</div>
<form id="skill-upload-form" class="collection-upload">
<label>
上传技能合集
<input id="skill-archive" name="file" type="file" accept=".zip" required>
</label>
<button type="submit">上传合集</button>
<p id="skill-upload-status" class="status-text" role="status">仅支持 zip 压缩包,解压后会进入 skills/ 目录</p>
</form>
<form id="upload-form" class="form">
<label class="drop-zone">
<input id="file" name="file" type="file" accept=".docx" required>
@@ -38,7 +47,7 @@
</label>
<label>
技能合集
<select name="skill_collection">
<select id="skill-collection" name="skill_collection">
{% for collection in skill_collections %}
<option value="{{ collection.slug }}" {% if collection.slug == default_skill_collection %}selected{% endif %}>
{{ collection.label }}{{ collection.skill_count }}

View File

@@ -0,0 +1,25 @@
# Handoff - 2026-05-26
## Completed Tasks
- 将技能合集列表从固定常量改为动态扫描 `skills/` 下包含 `index.md` 的目录,解决后台新增 `skills/interesting_physics_skills` 后前端“技能合集”下拉框不显示的问题。
- 新增 `GET /skill-collections` 接口,用于前端实时刷新可用技能合集列表。
- 新增 `POST /skill-collections/upload` 接口,严格限制上传 `.zip` 技能合集压缩包,并按 `skills/<压缩包文件名去掉.zip>/` 的规范解压安装。
- 为 zip 安装流程增加校验:有效 zip、根目录必须包含 `index.md`、拒绝路径穿越、拒绝空包、拒绝无有效 `SKILL.md` 的合集。
- 在前端新增简洁的“上传技能合集”区域,上传成功后显示提醒,并立即刷新“技能合集”下拉框且选中新上传的合集。
- 修正首页顶部技能合集数量展示,改为显示当前动态发现的合集数量。
- 补充测试覆盖动态发现后台新增目录、上传 zip 后解压并进入列表、拒绝非 zip、拒绝非法路径 zip。
- 执行验证:`python -m pytest -q` 通过,结果为 `17 passed in 6.91s``git diff --check` 通过。
- 启动本地 FastAPI 服务用于页面验证,当前地址为 `http://127.0.0.1:8002`
## Blockers
- 当前 zip 格式按现有 `skills/GJB438B-2009_prd_skills.zip` 规范处理,即 `index.md` 必须位于压缩包根目录;如果后续需要支持“压缩包内再包一层目录”的格式,需要补充规范转换逻辑。
- 上传同名合集时当前实现会用新解压内容替换 `skills/<合集名>/`,需要在后续产品设计中确认是否增加覆盖确认、版本备份或回滚能力。
- 本地测试中 `fastapi.testclient.TestClient` 在当前环境会挂起,因此测试改为直接调用异步路由函数和安装函数;后续如升级依赖或调整测试环境,可再恢复端到端 HTTP 客户端测试。
- 默认端口 `8000``8001` 启动失败,最终使用 `8002` 启动服务。
## Next Steps
- 明天使用真实技能合集 zip 在浏览器中做一次完整手工验证:上传、成功提示、下拉框刷新、选择新合集并执行 DOCX 分析。
- 为上传同名合集补充更明确的管理策略,例如覆盖确认、保留上一版本备份或禁止覆盖。
- 评估是否支持多种 zip 打包结构,并在文档中明确技能合集 zip 的标准目录格式。
- 考虑增加前端上传状态样式区分,例如成功、失败、校验错误使用不同颜色,但保持当前页面简洁风格。
- 如后续要正式部署,补充接口级日志,记录上传合集名称、技能数量、校验失败原因和安装时间。

View File

@@ -1,10 +1,40 @@
import asyncio
from pathlib import Path
import zipfile
from docx import Document
import app.main as main
from app.main import OUTPUT_DIR, ROOT_DIR, analyze_saved_docx, app
class FakeUploadFile:
def __init__(self, filename: str, content: bytes) -> None:
self.filename = filename
self._content = content
async def read(self) -> bytes:
return self._content
def _write_skill_collection_zip(path: Path) -> None:
with zipfile.ZipFile(path, "w") as archive:
archive.writestr(
"index.md",
"| Skill | Description | Use When |\n"
"| --- | --- | --- |\n"
"| [demo-skill](demo-skill/SKILL.md) | 示例技能 | 上传合集测试 |\n",
)
archive.writestr(
"demo-skill/SKILL.md",
"---\n"
"name: demo-skill\n"
"description: 示例技能\n"
"---\n"
"# Demo Skill\n",
)
def test_index_template_contains_upload_ui() -> None:
html = (ROOT_DIR / "app" / "templates" / "index.html").read_text(encoding="utf-8")
js = (ROOT_DIR / "app" / "static" / "app.js").read_text(encoding="utf-8")
@@ -17,10 +47,78 @@ def test_index_template_contains_upload_ui() -> None:
assert "download-md" in js
assert "pollTask" in js
assert "skill_collection" in html
assert "skill-upload-form" in html
assert "/skill-collections/upload" in js
assert "预留后续版本:单个技能集合内的 skill 筛选功能" in html
assert not any(route.path == "/skills" for route in app.routes)
def test_skill_collection_options_discover_added_directory(tmp_path: Path, monkeypatch) -> None:
skills_root = tmp_path / "skills"
collection = skills_root / "interesting_physics_skills"
(collection / "demo-skill").mkdir(parents=True)
(collection / "index.md").write_text(
"| Skill | Description | Use When |\n"
"| --- | --- | --- |\n"
"| [demo-skill](demo-skill/SKILL.md) | 示例技能 | 后台新增合集 |\n",
encoding="utf-8",
)
(collection / "demo-skill" / "SKILL.md").write_text(
"---\nname: demo-skill\ndescription: 示例技能\n---\n# Demo\n",
encoding="utf-8",
)
monkeypatch.setattr(main, "SKILL_ROOT", skills_root)
options = main._skill_collection_options()
assert [option["slug"] for option in options] == ["interesting_physics_skills"]
assert options[0]["skill_count"] == 1
def test_upload_skill_collection_zip_extracts_and_lists(tmp_path: Path, monkeypatch) -> None:
skills_root = tmp_path / "skills"
monkeypatch.setattr(main, "SKILL_ROOT", skills_root)
archive_path = tmp_path / "uploaded_skills.zip"
_write_skill_collection_zip(archive_path)
upload = FakeUploadFile("uploaded_skills.zip", archive_path.read_bytes())
payload = asyncio.run(main.upload_skill_collection(upload))
assert payload["collection"]["slug"] == "uploaded_skills"
assert payload["collection"]["skill_count"] == 1
assert (skills_root / "uploaded_skills" / "index.md").exists()
assert any(collection["slug"] == "uploaded_skills" for collection in payload["collections"])
def test_upload_skill_collection_rejects_non_zip(tmp_path: Path, monkeypatch) -> None:
monkeypatch.setattr(main, "SKILL_ROOT", tmp_path / "skills")
upload = FakeUploadFile("uploaded_skills.txt", b"not zip")
try:
asyncio.run(main.upload_skill_collection(upload))
except main.HTTPException as exc:
assert exc.status_code == 400
assert "zip" in exc.detail
else:
raise AssertionError("non-zip upload should fail")
def test_install_skill_collection_zip_rejects_unsafe_paths(tmp_path: Path, monkeypatch) -> None:
monkeypatch.setattr(main, "SKILL_ROOT", tmp_path / "skills")
archive_path = tmp_path / "unsafe.zip"
with zipfile.ZipFile(archive_path, "w") as archive:
archive.writestr("../index.md", "bad")
upload = FakeUploadFile("unsafe.zip", archive_path.read_bytes())
try:
asyncio.run(main.upload_skill_collection(upload))
except main.HTTPException as exc:
assert exc.status_code == 400
assert "非法路径" in exc.detail
else:
raise AssertionError("unsafe zip should fail")
def test_analyze_saved_docx_reports_progress(tmp_path: Path) -> None:
updates: list[tuple[int, str]] = []
docx_path = tmp_path / "progress.docx"