61 lines
1.9 KiB
Python
61 lines
1.9 KiB
Python
from __future__ import annotations
|
|
|
|
import argparse
|
|
import contextlib
|
|
import json
|
|
import logging
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
|
|
def _ensure_legacy_module_path() -> None:
|
|
root = Path(__file__).resolve().parents[1]
|
|
if str(root) not in sys.path:
|
|
sys.path.insert(0, str(root))
|
|
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(description="Build a C/C++ code knowledge base.")
|
|
parser.add_argument("--project", required=True, help="Project directory or single source file.")
|
|
parser.add_argument("--output", required=True, help="Directory for FAISS, metadata and graph files.")
|
|
parser.add_argument("--base-name", default=None, help="Output file prefix. Defaults to project/file name.")
|
|
parser.add_argument(
|
|
"--skip-semantic",
|
|
action="store_true",
|
|
help="Skip LLM summaries and real embeddings; writes zero-vector placeholders.",
|
|
)
|
|
parser.add_argument("--embedding-dim", type=int, default=1024)
|
|
parser.add_argument("--log-level", default="INFO")
|
|
return parser.parse_args()
|
|
|
|
|
|
def main() -> int:
|
|
args = parse_args()
|
|
logging.basicConfig(level=getattr(logging, args.log_level.upper(), logging.INFO))
|
|
_ensure_legacy_module_path()
|
|
|
|
from graph_builder import build_code_knowledge_base
|
|
|
|
with contextlib.redirect_stdout(sys.stderr):
|
|
graph_path, vector_path, metadata_path = build_code_knowledge_base(
|
|
target_path=args.project,
|
|
output_dir=args.output,
|
|
semantic=not args.skip_semantic,
|
|
base_name=args.base_name,
|
|
embedding_dim=args.embedding_dim,
|
|
)
|
|
print(json.dumps(
|
|
{
|
|
"graph_path": graph_path,
|
|
"vector_path": vector_path,
|
|
"metadata_path": metadata_path,
|
|
},
|
|
ensure_ascii=False,
|
|
indent=2,
|
|
))
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|