#!/usr/bin/env bash
set -euo pipefail

usage() {
  cat <<'EOF'
Usage: coderlm <command> --prompt <prompt> [--max-depth N]

Process large codebases using the RLM (Recursive Language Model) pattern.
The agent discovers relevant files itself using shell tools, then peeks,
decomposes, and recursively calls itself on subsets.

Arguments:
  <command>       Agent to use (claude, codex, or any command)
  --prompt TEXT   The task for the agent (required)
  --max-depth N   Recursion limit (default: 3)

Examples:
  coderlm claude --prompt "Find all TODO comments in src/"
  coderlm claude --prompt "Architecture overview of src/ and lib/"
  coderlm codex --prompt "Review **/*.py for security issues"
EOF
  exit "${1:-0}"
}

# --- Argument Parsing ---

[[ $# -eq 0 ]] && usage 0
[[ "$1" == "--help" || "$1" == "-h" ]] && usage 0

agent="$1"; shift
read -ra _agent_cmd <<< "$agent"

prompt=""
max_depth=3
dry_run=false
allowed_tools="Bash"

while [[ $# -gt 0 ]]; do
  case "$1" in
    --prompt)
      [[ $# -lt 2 ]] && { echo "Error: --prompt requires a value" >&2; exit 1; }
      prompt="$2"; shift 2 ;;
    --max-depth)
      [[ $# -lt 2 ]] && { echo "Error: --max-depth requires a value" >&2; exit 1; }
      max_depth="$2"; shift 2 ;;
    --dry-run)
      dry_run=true; shift ;;
    --allowedTools)
      [[ $# -lt 2 ]] && { echo "Error: --allowedTools requires a value" >&2; exit 1; }
      allowed_tools="$2"; shift 2 ;;
    --help|-h)
      usage 0 ;;
    -*)
      echo "Error: unknown option $1" >&2; exit 1 ;;
    *)
      echo "Error: unexpected argument $1" >&2; exit 1 ;;
  esac
done

[[ -z "$prompt" ]] && { echo "Error: --prompt is required" >&2; usage 1; }

# --- Locate bundled bashrlm.sh, resolving symlinks (npm global install uses symlinks) ---

_s="${BASH_SOURCE[0]}"
[[ -L "$_s" ]] && { _t="$(readlink "$_s")"; [[ "$_t" != /* ]] && _t="$(dirname "$_s")/$_t"; _s="$_t"; }
_bashrlm_script="$(cd "$(dirname "$_s")" && pwd)/bashrlm.sh"
_bashrlm_prompt=""
[[ -f "${_bashrlm_script%.*}.md" ]] && _bashrlm_prompt="$(cat "${_bashrlm_script%.*}.md")"
unset _s _t

# --- System Prompt ---

read -r -d '' system_prompt <<SYSPROMPT || true
You are an RLM (Recursive Language Model). You answer queries about a codebase by discovering and exploring files through tool use. The codebase exists in your working directory — you must use tools to discover which files exist and inspect their contents.

You will work iteratively: discover, explore, analyze, decompose, and aggregate. Do NOT try to solve everything in one step.

## Tools
Prefer these when available:
- fd — find files by name/pattern (faster alternative to find)
- rg (ripgrep) — search file contents by pattern
- ast-grep (sg) — structural code search (AST-aware)
- eza — list files with metadata (sizes, permissions, tree view)
- jq — query/transform JSON files
- qsv — query/transform CSV files
- head -n N / tail -n N / sed -n 'start,endp' — read file slices
- wc -l — line counts
- cat — read entire small files

## Strategy

### 1. DISCOVER AND EXPLORE
Before doing any analysis, discover what you're working with:
- Use fd or find to locate files relevant to your task
- Use eza --tree or fd to understand directory structure
- Check file count and sizes (wc -l) to gauge scope
- Sample a few files with head to understand structure and conventions
- Use rg to scan for patterns relevant to your task
- Do NOT read all files into context — peek strategically

### 2. DECOMPOSE FOR LARGE FILE SETS
For large file sets (>20 files), break the work into sub-tasks using recursive sub-agent calls:
  coderlm ${agent} --prompt "<sub-task>" --max-depth $((max_depth - 1))

Decomposition strategies:
- By directory: tell sub-agents to focus on specific directories
- By concern: split "find bugs" into "find null-safety issues", "find error handling issues", etc.
- By scope: for very large codebases, partition into manageable regions per sub-agent

Each sub-agent gets its own context window and can handle substantial file sets. Don't over-split — analyze the file count and sizes first, then determine if you can cover the scope in a few sub-agent calls rather than many small ones.

### 3. ITERATE AND BUILD UP
- Write small commands, observe outputs, then decide next steps
- Save intermediate findings to temp files to accumulate results across steps
- String matching (rg) finds WHERE things are; sub-agents understand WHAT things mean
- When results seem wrong or empty, reconsider your approach before continuing

### 4. AGGREGATE AND ANSWER
- Combine sub-agent results and your own findings into a coherent final answer
- Explicitly address the original query — don't just dump raw findings
- If sub-agents returned partial answers, synthesize them

## Constraints
- Recursion budget: max-depth=${max_depth} (current depth uses 1)
- Don't read all files at once — peek strategically, then go deeper where needed
- Prefer rg/sg over reading entire files when searching for specific patterns
- Minimize redundant work — if a sub-agent already analyzed something, use its results

${_bashrlm_prompt}
SYSPROMPT

# --- Execute Agent ---

# Activate context guards in every non-interactive bash subshell the agent spawns
if [[ -f "$_bashrlm_script" ]]; then
  if [[ -n "${BASH_ENV:-}" && "$BASH_ENV" != "$_bashrlm_script" ]]; then
    _bashrlm_env_tmp=$(mktemp /tmp/.bashrlm_env.XXXXXX.sh)
    printf '. "%s"\n. "%s"\n' "$BASH_ENV" "$_bashrlm_script" > "$_bashrlm_env_tmp"
    export BASH_ENV="$_bashrlm_env_tmp"
    trap 'rm -f "$_bashrlm_env_tmp"' EXIT
  else
    export BASH_ENV="$_bashrlm_script"
  fi
fi

run() {
  if [[ "$dry_run" == true ]]; then
    printf '%s\0' "$@"
    exit 0
  fi
  exec "$@"
}

combined_prompt="<instructions>
${system_prompt}
</instructions>
<task>
${prompt}
</task>"

if [[ "$agent" == *claude* ]]; then
  run "${_agent_cmd[@]}" -p \
    --append-system-prompt "$system_prompt" \
    --allowedTools "$allowed_tools" \
    "$prompt"
elif [[ "$agent" == *codex* ]]; then
  run "${_agent_cmd[@]}" exec --full-auto "$combined_prompt"
elif [[ "$agent" == *gemini* ]]; then
  run "${_agent_cmd[@]}" -p "$combined_prompt" --yolo
else
  run "${_agent_cmd[@]}" "$combined_prompt"
fi
