Compare commits

10 Commits

Author SHA1 Message Date
kuangji
70d7a9578f skills upload function handoff 2026-05-26 11:06:41 +08:00
kuangji
fea4f2b512 skills upload function test_web.py 2026-05-26 10:34:22 +08:00
kuangji
aa064692ad skills upload function index.html 2026-05-26 10:33:15 +08:00
kuangji
990d872bb8 skills upload function styles.css 2026-05-26 10:33:00 +08:00
kuangji
5cb7785a38 skills upload function app.js 2026-05-26 10:32:47 +08:00
kuangji
0f8917d874 skills upload function 2026-05-26 10:30:07 +08:00
kuangji
bb2e55e889 ignore prompts_folder 2026-05-26 09:34:23 +08:00
kuangji
23c138e778 ignore handoff-2026-05-19.md 2026-05-26 09:33:40 +08:00
kuangji
04639296e2 ignore deploy.md 2026-05-26 09:33:19 +08:00
kuangji
faa6f3da0c ignore skills folder 2026-05-26 09:32:56 +08:00
7 changed files with 332 additions and 12 deletions

5
.gitignore vendored
View File

@@ -12,5 +12,10 @@ plant.md
wheelhouse/ wheelhouse/
python-3.12-runtime.tar.gz python-3.12-runtime.tar.gz
skills/
deploy.md
handoff-2026-05-19.md
prompts_folder/

View File

@@ -3,6 +3,7 @@ from __future__ import annotations
import shutil import shutil
import threading import threading
import time import time
import zipfile
from dataclasses import dataclass, field from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from uuid import uuid4 from uuid import uuid4
@@ -31,15 +32,22 @@ UPLOAD_DIR = ROOT_DIR / "uploads"
OUTPUT_DIR = ROOT_DIR / "outputs" OUTPUT_DIR = ROOT_DIR / "outputs"
SKILL_ROOT = ROOT_DIR / "skills" SKILL_ROOT = ROOT_DIR / "skills"
DEFAULT_SKILL_COLLECTION = "GJB438C-2021_prd_skills" DEFAULT_SKILL_COLLECTION = "GJB438C-2021_prd_skills"
SKILL_COLLECTIONS = [
"GJB438B-2009_prd_skills",
"GJB438C-2021_prd_skills",
]
CONFIG_PATH = ROOT_DIR / "configs" / "api_config.yaml" CONFIG_PATH = ROOT_DIR / "configs" / "api_config.yaml"
MAX_UPLOAD_BYTES = 30 * 1024 * 1024 MAX_UPLOAD_BYTES = 30 * 1024 * 1024
MAX_SKILL_ARCHIVE_BYTES = 50 * 1024 * 1024
ProgressCallback = Callable[[int, str], None] ProgressCallback = Callable[[int, str], None]
def _discover_skill_collections() -> list[str]:
if not SKILL_ROOT.exists():
return []
return sorted(
path.name
for path in SKILL_ROOT.iterdir()
if path.is_dir() and (path / "index.md").is_file()
)
def _skill_collection_path(collection_slug: str) -> Path: def _skill_collection_path(collection_slug: str) -> Path:
path = SKILL_ROOT / collection_slug path = SKILL_ROOT / collection_slug
if not path.exists() or not path.is_dir() or not (path / "index.md").exists(): if not path.exists() or not path.is_dir() or not (path / "index.md").exists():
@@ -49,10 +57,8 @@ def _skill_collection_path(collection_slug: str) -> Path:
def _skill_collection_options() -> list[dict[str, object]]: def _skill_collection_options() -> list[dict[str, object]]:
options: list[dict[str, object]] = [] options: list[dict[str, object]] = []
for collection_slug in SKILL_COLLECTIONS: for collection_slug in _discover_skill_collections():
path = SKILL_ROOT / collection_slug path = SKILL_ROOT / collection_slug
if not path.exists() or not path.is_dir() or not (path / "index.md").exists():
continue
skills = load_skill_catalog(path) skills = load_skill_catalog(path)
options.append( options.append(
{ {
@@ -64,6 +70,64 @@ def _skill_collection_options() -> list[dict[str, object]]:
return options return options
def _validate_skill_archive_member(member_name: str) -> None:
path = Path(member_name)
if not member_name or "\\" in member_name or member_name.startswith(("/", "\\")) or path.is_absolute():
raise HTTPException(status_code=400, detail="压缩包包含非法路径")
if any(part in {"", ".", ".."} for part in path.parts):
raise HTTPException(status_code=400, detail="压缩包包含非法路径")
def install_skill_collection_zip(archive_path: Path, collection_slug: str) -> dict[str, object]:
if not collection_slug or collection_slug in {".", ".."}:
raise HTTPException(status_code=400, detail="技能合集名称无效")
if "/" in collection_slug or "\\" in collection_slug:
raise HTTPException(status_code=400, detail="技能合集名称无效")
if not zipfile.is_zipfile(archive_path):
raise HTTPException(status_code=400, detail="仅支持有效的 zip 压缩包")
SKILL_ROOT.mkdir(parents=True, exist_ok=True)
target_dir = SKILL_ROOT / collection_slug
temp_dir = SKILL_ROOT / f".{collection_slug}.{uuid4().hex}.tmp"
try:
with zipfile.ZipFile(archive_path) as archive:
members = archive.infolist()
if not members:
raise HTTPException(status_code=400, detail="压缩包为空")
names = [member.filename for member in members]
for name in names:
_validate_skill_archive_member(name)
if "index.md" not in names:
raise HTTPException(status_code=400, detail="技能合集压缩包根目录必须包含 index.md")
archive.extractall(temp_dir)
skills = load_skill_catalog(temp_dir)
if not skills:
raise HTTPException(status_code=400, detail="技能合集未包含有效 SKILL.md")
if target_dir.exists():
shutil.rmtree(target_dir)
temp_dir.rename(target_dir)
return {
"slug": collection_slug,
"label": collection_slug.replace("_prd_skills", ""),
"skill_count": len(skills),
}
except HTTPException:
if temp_dir.exists():
shutil.rmtree(temp_dir)
raise
except zipfile.BadZipFile as exc:
if temp_dir.exists():
shutil.rmtree(temp_dir)
raise HTTPException(status_code=400, detail="仅支持有效的 zip 压缩包") from exc
except Exception:
if temp_dir.exists():
shutil.rmtree(temp_dir)
raise
@dataclass @dataclass
class AnalysisTask: class AnalysisTask:
task_id: str task_id: str
@@ -247,18 +311,55 @@ def _run_analysis_task(
@app.get("/", response_class=HTMLResponse) @app.get("/", response_class=HTMLResponse)
def index(request: Request) -> HTMLResponse: def index(request: Request) -> HTMLResponse:
settings = load_api_config(CONFIG_PATH) settings = load_api_config(CONFIG_PATH)
skill_collections = _skill_collection_options()
return templates.TemplateResponse( return templates.TemplateResponse(
request, request,
"index.html", "index.html",
{ {
"default_provider": settings.provider_name, "default_provider": settings.provider_name,
"skill_collection_count": len(SKILL_COLLECTIONS), "skill_collection_count": len(skill_collections),
"skill_collections": _skill_collection_options(), "skill_collections": skill_collections,
"default_skill_collection": DEFAULT_SKILL_COLLECTION, "default_skill_collection": DEFAULT_SKILL_COLLECTION,
}, },
) )
@app.get("/skill-collections")
def list_skill_collections() -> dict[str, object]:
return {
"collections": _skill_collection_options(),
"default_skill_collection": DEFAULT_SKILL_COLLECTION,
}
@app.post("/skill-collections/upload")
async def upload_skill_collection(file: UploadFile = File(...)) -> dict[str, object]:
if not file.filename or not file.filename.lower().endswith(".zip"):
raise HTTPException(status_code=400, detail="技能合集仅支持上传 .zip 压缩包")
content = await file.read()
if len(content) > MAX_SKILL_ARCHIVE_BYTES:
raise HTTPException(status_code=413, detail="技能合集压缩包超过 50MB 限制")
archive_name = Path(file.filename).name
collection_slug = archive_name[:-4]
archive_path = UPLOAD_DIR / f"{uuid4().hex}_{archive_name}"
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
archive_path.write_bytes(content)
try:
collection = install_skill_collection_zip(archive_path, collection_slug)
finally:
if archive_path.exists():
archive_path.unlink()
return {
"message": f"技能合集 {collection['slug']} 上传成功,已加载 {collection['skill_count']} 项技能",
"collection": collection,
"collections": _skill_collection_options(),
}
@app.post("/analyze") @app.post("/analyze")
async def analyze_docx( async def analyze_docx(
file: UploadFile = File(...), file: UploadFile = File(...),

View File

@@ -1,4 +1,7 @@
const form = document.querySelector("#upload-form"); const form = document.querySelector("#upload-form");
const skillUploadForm = document.querySelector("#skill-upload-form");
const skillCollectionSelect = document.querySelector("#skill-collection");
const skillUploadStatus = document.querySelector("#skill-upload-status");
const result = document.querySelector("#result"); const result = document.querySelector("#result");
const summary = document.querySelector("#summary"); const summary = document.querySelector("#summary");
const skills = document.querySelector("#skills"); const skills = document.querySelector("#skills");
@@ -27,6 +30,64 @@ async function pollTask(statusUrl) {
} }
} }
function renderSkillCollections(collections, selectedSlug) {
const currentValue = selectedSlug || skillCollectionSelect.value;
skillCollectionSelect.innerHTML = "";
collections.forEach((collection) => {
const option = document.createElement("option");
option.value = collection.slug;
option.textContent = `${collection.label}${collection.skill_count}`;
if (collection.slug === currentValue) {
option.selected = true;
}
skillCollectionSelect.appendChild(option);
});
}
async function refreshSkillCollections(selectedSlug) {
const response = await fetch("/skill-collections");
const payload = await response.json();
if (!response.ok) {
throw new Error(payload.detail || "技能合集刷新失败");
}
renderSkillCollections(payload.collections, selectedSlug);
}
skillUploadForm.addEventListener("submit", async (event) => {
event.preventDefault();
const uploadButton = skillUploadForm.querySelector("button");
uploadButton.disabled = true;
uploadButton.textContent = "上传中...";
skillUploadStatus.textContent = "正在上传并解压技能合集...";
try {
const data = new FormData(skillUploadForm);
const response = await fetch("/skill-collections/upload", {
method: "POST",
body: data,
});
const payload = await response.json();
if (!response.ok) {
throw new Error(payload.detail || "上传失败");
}
renderSkillCollections(payload.collections, payload.collection.slug);
skillUploadForm.reset();
skillUploadStatus.textContent = payload.message;
} catch (error) {
skillUploadStatus.textContent = error.message;
} finally {
uploadButton.disabled = false;
uploadButton.textContent = "上传合集";
}
});
refreshSkillCollections().catch((error) => {
skillUploadStatus.textContent = error.message;
});
form.addEventListener("submit", async (event) => { form.addEventListener("submit", async (event) => {
event.preventDefault(); event.preventDefault();
button.disabled = true; button.disabled = true;

View File

@@ -80,6 +80,26 @@ h2 {
gap: 20px; gap: 20px;
} }
.collection-upload {
display: grid;
grid-template-columns: minmax(0, 1fr) auto;
gap: 12px;
align-items: end;
margin-bottom: 24px;
padding: 16px;
border: 1px solid #dbe2ee;
border-radius: 8px;
background: #fbfcfe;
}
.collection-upload .status-text {
grid-column: 1 / -1;
}
.collection-upload input[type="file"] {
min-height: 42px;
}
.drop-zone { .drop-zone {
border: 1px dashed #8aa1bd; border: 1px dashed #8aa1bd;
border-radius: 8px; border-radius: 8px;
@@ -207,7 +227,8 @@ button:disabled {
} }
.header, .header,
.controls { .controls,
.collection-upload {
grid-template-columns: 1fr; grid-template-columns: 1fr;
display: grid; display: grid;
} }

View File

@@ -15,11 +15,20 @@
<h1>DOCX 规范分析</h1> <h1>DOCX 规范分析</h1>
</div> </div>
<div class="meta"> <div class="meta">
<span>{{ skill_count }} 项技能</span> <span>{{ skill_collection_count }} 个合集</span>
<span>默认 {{ default_provider }}</span> <span>默认 {{ default_provider }}</span>
</div> </div>
</div> </div>
<form id="skill-upload-form" class="collection-upload">
<label>
上传技能合集
<input id="skill-archive" name="file" type="file" accept=".zip" required>
</label>
<button type="submit">上传合集</button>
<p id="skill-upload-status" class="status-text" role="status">仅支持 zip 压缩包,解压后会进入 skills/ 目录</p>
</form>
<form id="upload-form" class="form"> <form id="upload-form" class="form">
<label class="drop-zone"> <label class="drop-zone">
<input id="file" name="file" type="file" accept=".docx" required> <input id="file" name="file" type="file" accept=".docx" required>
@@ -38,7 +47,7 @@
</label> </label>
<label> <label>
技能合集 技能合集
<select name="skill_collection"> <select id="skill-collection" name="skill_collection">
{% for collection in skill_collections %} {% for collection in skill_collections %}
<option value="{{ collection.slug }}" {% if collection.slug == default_skill_collection %}selected{% endif %}> <option value="{{ collection.slug }}" {% if collection.slug == default_skill_collection %}selected{% endif %}>
{{ collection.label }}{{ collection.skill_count }} {{ collection.label }}{{ collection.skill_count }}

View File

@@ -0,0 +1,25 @@
# Handoff - 2026-05-26
## Completed Tasks
- 将技能合集列表从固定常量改为动态扫描 `skills/` 下包含 `index.md` 的目录,解决后台新增 `skills/interesting_physics_skills` 后前端“技能合集”下拉框不显示的问题。
- 新增 `GET /skill-collections` 接口,用于前端实时刷新可用技能合集列表。
- 新增 `POST /skill-collections/upload` 接口,严格限制上传 `.zip` 技能合集压缩包,并按 `skills/<压缩包文件名去掉.zip>/` 的规范解压安装。
- 为 zip 安装流程增加校验:有效 zip、根目录必须包含 `index.md`、拒绝路径穿越、拒绝空包、拒绝无有效 `SKILL.md` 的合集。
- 在前端新增简洁的“上传技能合集”区域,上传成功后显示提醒,并立即刷新“技能合集”下拉框且选中新上传的合集。
- 修正首页顶部技能合集数量展示,改为显示当前动态发现的合集数量。
- 补充测试覆盖动态发现后台新增目录、上传 zip 后解压并进入列表、拒绝非 zip、拒绝非法路径 zip。
- 执行验证:`python -m pytest -q` 通过,结果为 `17 passed in 6.91s``git diff --check` 通过。
- 启动本地 FastAPI 服务用于页面验证,当前地址为 `http://127.0.0.1:8002`
## Blockers
- 当前 zip 格式按现有 `skills/GJB438B-2009_prd_skills.zip` 规范处理,即 `index.md` 必须位于压缩包根目录;如果后续需要支持“压缩包内再包一层目录”的格式,需要补充规范转换逻辑。
- 上传同名合集时当前实现会用新解压内容替换 `skills/<合集名>/`,需要在后续产品设计中确认是否增加覆盖确认、版本备份或回滚能力。
- 本地测试中 `fastapi.testclient.TestClient` 在当前环境会挂起,因此测试改为直接调用异步路由函数和安装函数;后续如升级依赖或调整测试环境,可再恢复端到端 HTTP 客户端测试。
- 默认端口 `8000``8001` 启动失败,最终使用 `8002` 启动服务。
## Next Steps
- 明天使用真实技能合集 zip 在浏览器中做一次完整手工验证:上传、成功提示、下拉框刷新、选择新合集并执行 DOCX 分析。
- 为上传同名合集补充更明确的管理策略,例如覆盖确认、保留上一版本备份或禁止覆盖。
- 评估是否支持多种 zip 打包结构,并在文档中明确技能合集 zip 的标准目录格式。
- 考虑增加前端上传状态样式区分,例如成功、失败、校验错误使用不同颜色,但保持当前页面简洁风格。
- 如后续要正式部署,补充接口级日志,记录上传合集名称、技能数量、校验失败原因和安装时间。

View File

@@ -1,10 +1,40 @@
import asyncio
from pathlib import Path from pathlib import Path
import zipfile
from docx import Document from docx import Document
import app.main as main
from app.main import OUTPUT_DIR, ROOT_DIR, analyze_saved_docx, app from app.main import OUTPUT_DIR, ROOT_DIR, analyze_saved_docx, app
class FakeUploadFile:
def __init__(self, filename: str, content: bytes) -> None:
self.filename = filename
self._content = content
async def read(self) -> bytes:
return self._content
def _write_skill_collection_zip(path: Path) -> None:
with zipfile.ZipFile(path, "w") as archive:
archive.writestr(
"index.md",
"| Skill | Description | Use When |\n"
"| --- | --- | --- |\n"
"| [demo-skill](demo-skill/SKILL.md) | 示例技能 | 上传合集测试 |\n",
)
archive.writestr(
"demo-skill/SKILL.md",
"---\n"
"name: demo-skill\n"
"description: 示例技能\n"
"---\n"
"# Demo Skill\n",
)
def test_index_template_contains_upload_ui() -> None: def test_index_template_contains_upload_ui() -> None:
html = (ROOT_DIR / "app" / "templates" / "index.html").read_text(encoding="utf-8") html = (ROOT_DIR / "app" / "templates" / "index.html").read_text(encoding="utf-8")
js = (ROOT_DIR / "app" / "static" / "app.js").read_text(encoding="utf-8") js = (ROOT_DIR / "app" / "static" / "app.js").read_text(encoding="utf-8")
@@ -17,10 +47,78 @@ def test_index_template_contains_upload_ui() -> None:
assert "download-md" in js assert "download-md" in js
assert "pollTask" in js assert "pollTask" in js
assert "skill_collection" in html assert "skill_collection" in html
assert "skill-upload-form" in html
assert "/skill-collections/upload" in js
assert "预留后续版本:单个技能集合内的 skill 筛选功能" in html assert "预留后续版本:单个技能集合内的 skill 筛选功能" in html
assert not any(route.path == "/skills" for route in app.routes) assert not any(route.path == "/skills" for route in app.routes)
def test_skill_collection_options_discover_added_directory(tmp_path: Path, monkeypatch) -> None:
skills_root = tmp_path / "skills"
collection = skills_root / "interesting_physics_skills"
(collection / "demo-skill").mkdir(parents=True)
(collection / "index.md").write_text(
"| Skill | Description | Use When |\n"
"| --- | --- | --- |\n"
"| [demo-skill](demo-skill/SKILL.md) | 示例技能 | 后台新增合集 |\n",
encoding="utf-8",
)
(collection / "demo-skill" / "SKILL.md").write_text(
"---\nname: demo-skill\ndescription: 示例技能\n---\n# Demo\n",
encoding="utf-8",
)
monkeypatch.setattr(main, "SKILL_ROOT", skills_root)
options = main._skill_collection_options()
assert [option["slug"] for option in options] == ["interesting_physics_skills"]
assert options[0]["skill_count"] == 1
def test_upload_skill_collection_zip_extracts_and_lists(tmp_path: Path, monkeypatch) -> None:
skills_root = tmp_path / "skills"
monkeypatch.setattr(main, "SKILL_ROOT", skills_root)
archive_path = tmp_path / "uploaded_skills.zip"
_write_skill_collection_zip(archive_path)
upload = FakeUploadFile("uploaded_skills.zip", archive_path.read_bytes())
payload = asyncio.run(main.upload_skill_collection(upload))
assert payload["collection"]["slug"] == "uploaded_skills"
assert payload["collection"]["skill_count"] == 1
assert (skills_root / "uploaded_skills" / "index.md").exists()
assert any(collection["slug"] == "uploaded_skills" for collection in payload["collections"])
def test_upload_skill_collection_rejects_non_zip(tmp_path: Path, monkeypatch) -> None:
monkeypatch.setattr(main, "SKILL_ROOT", tmp_path / "skills")
upload = FakeUploadFile("uploaded_skills.txt", b"not zip")
try:
asyncio.run(main.upload_skill_collection(upload))
except main.HTTPException as exc:
assert exc.status_code == 400
assert "zip" in exc.detail
else:
raise AssertionError("non-zip upload should fail")
def test_install_skill_collection_zip_rejects_unsafe_paths(tmp_path: Path, monkeypatch) -> None:
monkeypatch.setattr(main, "SKILL_ROOT", tmp_path / "skills")
archive_path = tmp_path / "unsafe.zip"
with zipfile.ZipFile(archive_path, "w") as archive:
archive.writestr("../index.md", "bad")
upload = FakeUploadFile("unsafe.zip", archive_path.read_bytes())
try:
asyncio.run(main.upload_skill_collection(upload))
except main.HTTPException as exc:
assert exc.status_code == 400
assert "非法路径" in exc.detail
else:
raise AssertionError("unsafe zip should fail")
def test_analyze_saved_docx_reports_progress(tmp_path: Path) -> None: def test_analyze_saved_docx_reports_progress(tmp_path: Path) -> None:
updates: list[tuple[int, str]] = [] updates: list[tuple[int, str]] = []
docx_path = tmp_path / "progress.docx" docx_path = tmp_path / "progress.docx"