增加代码知识库;修复文档处理内容;增加API设置
This commit is contained in:
2
RAG-TEST-TOOLS/rag_test_tools/__init__.py
Normal file
2
RAG-TEST-TOOLS/rag_test_tools/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
"""Non-interactive helpers for RAG-TEST-TOOLS."""
|
||||
|
||||
Binary file not shown.
Binary file not shown.
60
RAG-TEST-TOOLS/rag_test_tools/build_code_kb.py
Normal file
60
RAG-TEST-TOOLS/rag_test_tools/build_code_kb.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import contextlib
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _ensure_legacy_module_path() -> None:
|
||||
root = Path(__file__).resolve().parents[1]
|
||||
if str(root) not in sys.path:
|
||||
sys.path.insert(0, str(root))
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="Build a C/C++ code knowledge base.")
|
||||
parser.add_argument("--project", required=True, help="Project directory or single source file.")
|
||||
parser.add_argument("--output", required=True, help="Directory for FAISS, metadata and graph files.")
|
||||
parser.add_argument("--base-name", default=None, help="Output file prefix. Defaults to project/file name.")
|
||||
parser.add_argument(
|
||||
"--skip-semantic",
|
||||
action="store_true",
|
||||
help="Skip LLM summaries and real embeddings; writes zero-vector placeholders.",
|
||||
)
|
||||
parser.add_argument("--embedding-dim", type=int, default=1024)
|
||||
parser.add_argument("--log-level", default="INFO")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
logging.basicConfig(level=getattr(logging, args.log_level.upper(), logging.INFO))
|
||||
_ensure_legacy_module_path()
|
||||
|
||||
from graph_builder import build_code_knowledge_base
|
||||
|
||||
with contextlib.redirect_stdout(sys.stderr):
|
||||
graph_path, vector_path, metadata_path = build_code_knowledge_base(
|
||||
target_path=args.project,
|
||||
output_dir=args.output,
|
||||
semantic=not args.skip_semantic,
|
||||
base_name=args.base_name,
|
||||
embedding_dim=args.embedding_dim,
|
||||
)
|
||||
print(json.dumps(
|
||||
{
|
||||
"graph_path": graph_path,
|
||||
"vector_path": vector_path,
|
||||
"metadata_path": metadata_path,
|
||||
},
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user