增加代码知识库;修复文档处理内容;增加API设置

This commit is contained in:
2026-05-16 20:20:10 +08:00
parent 69b49d28b2
commit 7aa3ce3294
119 changed files with 182273 additions and 793 deletions

View File

@@ -0,0 +1,60 @@
from __future__ import annotations
import argparse
import contextlib
import json
import logging
import sys
from pathlib import Path
def _ensure_legacy_module_path() -> None:
root = Path(__file__).resolve().parents[1]
if str(root) not in sys.path:
sys.path.insert(0, str(root))
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Build a C/C++ code knowledge base.")
parser.add_argument("--project", required=True, help="Project directory or single source file.")
parser.add_argument("--output", required=True, help="Directory for FAISS, metadata and graph files.")
parser.add_argument("--base-name", default=None, help="Output file prefix. Defaults to project/file name.")
parser.add_argument(
"--skip-semantic",
action="store_true",
help="Skip LLM summaries and real embeddings; writes zero-vector placeholders.",
)
parser.add_argument("--embedding-dim", type=int, default=1024)
parser.add_argument("--log-level", default="INFO")
return parser.parse_args()
def main() -> int:
args = parse_args()
logging.basicConfig(level=getattr(logging, args.log_level.upper(), logging.INFO))
_ensure_legacy_module_path()
from graph_builder import build_code_knowledge_base
with contextlib.redirect_stdout(sys.stderr):
graph_path, vector_path, metadata_path = build_code_knowledge_base(
target_path=args.project,
output_dir=args.output,
semantic=not args.skip_semantic,
base_name=args.base_name,
embedding_dim=args.embedding_dim,
)
print(json.dumps(
{
"graph_path": graph_path,
"vector_path": vector_path,
"metadata_path": metadata_path,
},
ensure_ascii=False,
indent=2,
))
return 0
if __name__ == "__main__":
raise SystemExit(main())