Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,8 @@ site
temp*

# DataJoint secrets (credentials)
.secrets/
.secrets/

# Generated documentation files
src/llms-full.txt
site/llms-full.txt
2 changes: 2 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ services:
# BUILD mode: build static site from pre-executed notebooks
# Install datajoint-python for mkdocstrings (needs to import for API docs)
pip install -e /datajoint-python
# Generate llms-full.txt with current git info
python scripts/gen_llms_full.py
mkdocs build --config-file ./mkdocs.yaml
else
echo "Unexpected mode..."
Expand Down
43 changes: 42 additions & 1 deletion scripts/gen_llms_full.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,14 @@

This script concatenates all markdown documentation into a single file
optimized for LLM consumption.

The generated file is NOT committed to git - it's auto-generated during
the build process with current version metadata.
"""

import json
import subprocess
from datetime import datetime, timezone
from pathlib import Path

# Documentation root
Expand All @@ -24,6 +29,10 @@

HEADER = """# DataJoint Documentation (Full)

Generated: {timestamp}
Commit: {commit}
Branch: {branch}

> DataJoint is a Python framework for building scientific data pipelines with automated computation, integrity constraints, and seamless integration of relational databases with object storage. This documentation covers DataJoint 2.0.

> This file contains the complete documentation for LLM consumption. For an index with links, see /llms.txt
Expand All @@ -33,6 +42,35 @@
"""


def get_git_info() -> dict[str, str]:
"""Get current git commit hash and branch name."""
try:
commit = subprocess.check_output(
["git", "rev-parse", "--short", "HEAD"],
cwd=Path(__file__).parent.parent,
stderr=subprocess.DEVNULL,
).decode().strip()
except (subprocess.CalledProcessError, FileNotFoundError):
commit = "unknown"

try:
branch = subprocess.check_output(
["git", "rev-parse", "--abbrev-ref", "HEAD"],
cwd=Path(__file__).parent.parent,
stderr=subprocess.DEVNULL,
).decode().strip()
except (subprocess.CalledProcessError, FileNotFoundError):
branch = "unknown"

timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")

return {
"timestamp": timestamp,
"commit": commit,
"branch": branch,
}


def read_markdown_file(filepath: Path) -> str:
"""Read a markdown file and return its content."""
try:
Expand Down Expand Up @@ -79,7 +117,10 @@ def get_doc_files(directory: Path) -> list[Path]:

def generate_llms_full():
"""Generate the llms-full.txt file."""
content_parts = [HEADER]
# Get current git info for version metadata
git_info = get_git_info()
header = HEADER.format(**git_info)
content_parts = [header]

for section_name, section_dir in SECTIONS:
section_path = DOCS_DIR / section_dir
Expand Down
Loading