#!/bin/bash
# Test and inspect summarization for loopflow repo
#
# Usage:
#   ./bin/test-summaries          # Show current status
#   ./bin/test-summaries regen    # Force regenerate
#   ./bin/test-summaries analyze  # Show compression analysis
#   ./bin/test-summaries content  # Show summary structure
#   ./bin/test-summaries pathsets # Show pathset cache entries

set -e

case "${1:-status}" in
  status)
    echo "=== Summary Status ==="
    if sqlite3 ~/.lf/lfd.db "SELECT 1 FROM summaries WHERE repo LIKE '%loopflow%' LIMIT 1;" 2>/dev/null | grep -q 1; then
      echo "Single paths:"
      sqlite3 -header -column ~/.lf/lfd.db "SELECT path, token_budget, length(content) as chars, model FROM summaries WHERE repo LIKE '%loopflow%' AND path NOT LIKE '%,%' ORDER BY path;"
      echo ""
      echo "Pathsets (cached groups):"
      sqlite3 -header -column ~/.lf/lfd.db "SELECT path, token_budget, length(content) as chars, model FROM summaries WHERE repo LIKE '%loopflow%' AND path LIKE '%,%' ORDER BY path;"
      echo -e "\n=== Staleness Check ==="
      lfops summarize --all 2>&1 | grep -E "up to date|stale|regenerating" || echo "(no output)"
    else
      echo "No summaries found. Run: ./bin/test-summaries regen"
    fi
    ;;

  regen)
    echo "=== Regenerating Summaries ==="
    lfops summarize --all -f
    echo -e "\n=== Result ==="
    sqlite3 ~/.lf/lfd.db "SELECT path, token_budget, length(content) as chars, model FROM summaries WHERE repo LIKE '%loopflow%';"
    ;;

  analyze)
    echo "=== Compression Analysis ==="
    uv run python3 << 'EOF'
from pathlib import Path
from loopflow.lfops.summarize import gather_source_content, count_tokens, build_exclude_patterns
from loopflow.lf.config import load_config

repo = Path(".")
config = load_config(repo)
exclude = build_exclude_patterns(config)

dirs = ["src", "swift", "tests", "docs", ".claude", "scripts"]
total = 0
data = []

for d in dirs:
    try:
        content = gather_source_content(Path(d), repo, exclude)
        tokens = count_tokens(content)
        if tokens > 0:
            data.append((d, tokens))
            total += tokens
    except:
        pass

print(f"{'Directory':<15} {'Tokens':>10} {'%':>8}")
print("-" * 35)
for name, tokens in sorted(data, key=lambda x: -x[1]):
    print(f"{name:<15} {tokens:>10,} {100*tokens/total:>7.1f}%")
print("-" * 35)
print(f"{'TOTAL':<15} {total:>10,}")
EOF
    ;;

  content)
    echo "=== Summary Structure ==="
    sqlite3 ~/.lf/lfd.db "SELECT content FROM summaries WHERE repo LIKE '%loopflow%' AND path='.';" | grep "^## " | head -20
    echo -e "\n=== Sample (first 50 lines) ==="
    sqlite3 ~/.lf/lfd.db "SELECT content FROM summaries WHERE repo LIKE '%loopflow%' AND path='.';" | head -50
    ;;

  pathsets)
    echo "=== Pathset Cache Entries ==="
    echo ""
    echo "Pathsets are cached groups of subdirs (e.g., 'docs,swift,tests')."
    echo "They're used when small subdirs are summarized together."
    echo ""
    sqlite3 -header -column ~/.lf/lfd.db "
      SELECT
        path,
        length(path) - length(replace(path, ',', '')) + 1 as num_paths,
        length(content) as chars,
        source_hash,
        model
      FROM summaries
      WHERE repo LIKE '%loopflow%' AND path LIKE '%,%'
      ORDER BY num_paths DESC, path;
    "
    COUNT=$(sqlite3 ~/.lf/lfd.db "SELECT COUNT(*) FROM summaries WHERE repo LIKE '%loopflow%' AND path LIKE '%,%';")
    echo ""
    echo "Total pathset entries: $COUNT"
    ;;

  test)
    echo "=== Running Automated Tests ==="
    uv run pytest tests/test_summarize_integration.py -v
    ;;

  *)
    echo "Usage: $0 [status|regen|analyze|content|pathsets|test]"
    exit 1
    ;;
esac
