#!/usr/bin/env sh
# pre-push hook — mirrors CI checks (version-check + lint + typecheck + test + self-analysis)
# Install once with: git config core.hooksPath .githooks
set -e

if [ -d ".venv/Scripts" ]; then
	PATH=".venv/Scripts:$PATH"
fi
if [ -d ".venv/bin" ]; then
	PATH=".venv/bin:$PATH"
fi

resolve_python_cmd() {
	if command -v python >/dev/null 2>&1; then
		echo "python"
		return 0
	fi
	if command -v python3 >/dev/null 2>&1; then
		echo "python3"
		return 0
	fi
	echo ""
}

PYTHON_CMD=$(resolve_python_cmd)
if [ -z "$PYTHON_CMD" ]; then
	echo ">>> [pre-push] ERROR: Python interpreter not found."
	echo ">>> [pre-push] Ensure a local venv exists (.venv) or python is on PATH."
	exit 1
fi

GIT_DIR_PATH=$(git rev-parse --git-dir 2>/dev/null || echo ".git")
LAST_SUCCESS_SHA_FILE="$GIT_DIR_PATH/.drift-prepush-last-success"

# Escape hatches (export VAR=1 to skip a specific gate):
# DRIFT_SKIP_CHANGELOG, DRIFT_SKIP_VERSION_BUMP,
# DRIFT_SKIP_LOCKFILE, DRIFT_SKIP_DOCSTRING, DRIFT_SKIP_RISK_AUDIT
# DRIFT_SKIP_EVIDENCE_VALIDATION=1 skips content validation of evidence files (emergency only)
# DRIFT_SKIP_HOOKS=1 skips ALL gates (emergency only)
# DRIFT_PYTEST_WORKERS=N overrides the parallel worker cap (default: 4)

# Block private/non-tool paths from being pushed.
blocked_pattern='^(tagesplanung/)'
feature_pattern='^feat(\(.+\))?:'
fix_pattern='^fix(\(.+\))?:'
STUDY_MD_PATH=${STUDY_MD_PATH:-"docs/STUDY.md"}
BENCHMARK_EVIDENCE_PATTERN=${BENCHMARK_EVIDENCE_PATTERN:-"^benchmark_results/[^/]*_feature_evidence\.json$"}

# Aggregated push state (captured while iterating updated refs)
feature_commit_detected=0
fix_commit_detected=0
push_changed_files=""
push_ref_updates=""

collect_push_context() {
	local_ref="$1"
	local_sha="$2"
	remote_ref="$3"
	remote_sha="$4"

	# Deletion push (local ref removed) can be ignored here.
	if [ "$local_sha" = "0000000000000000000000000000000000000000" ]; then
		return 0
	fi

	if [ "$remote_sha" = "0000000000000000000000000000000000000000" ]; then
		commit_list=$(git rev-list "$local_sha" --not --all)
	else
		commit_list=$(git rev-list "$remote_sha..$local_sha")
	fi

	for commit in $commit_list; do
		subject=$(git log -1 --pretty=%s "$commit")
		if echo "$subject" | grep -Eiq "$feature_pattern"; then
			feature_commit_detected=1
		fi

		if echo "$subject" | grep -Eiq "$fix_pattern"; then
			fix_commit_detected=1
		fi

		changed=$(git diff-tree --no-commit-id --name-only --diff-filter=ACMR -r "$commit" || true)
		if [ -n "$changed" ]; then
			push_changed_files="$push_changed_files
$changed"
		fi
	done

	push_ref_updates="$push_ref_updates
$local_sha|$remote_sha"
}

enforce_feature_evidence_gate() {
	if [ "$feature_commit_detected" -ne 1 ]; then
		return 0
	fi

	has_tests=0
	has_empirical=0
	has_benchmark_feature_evidence=0
	effective_study_md_path=""

	if echo "$push_changed_files" | grep -Eq '^tests/'; then
		has_tests=1
	fi

	if echo "$push_changed_files" | grep -Eq '^(benchmark_results/|audit_results/)'; then
		has_empirical=1
	fi

	if [ "$has_tests" -ne 1 ] || [ "$has_empirical" -ne 1 ]; then
		echo ">>> [pre-push] ERROR: Feature-Evidence-Gate failed."
		echo ">>> [pre-push] Detected feature commit (feat:)."
		echo ">>> [pre-push] Required for feature pushes:"
		echo ">>> [pre-push]   1) test changes under tests/"
		echo ">>> [pre-push]   2) empirical artifact under benchmark_results/ or audit_results/"
		echo ">>> [pre-push] Push blocked until empirical evidence is included."
		exit 1
	fi

	evidence_file=""
	if echo "$push_changed_files" | grep -Eq "$BENCHMARK_EVIDENCE_PATTERN"; then
		has_benchmark_feature_evidence=1
		# Capture the first matching evidence file path for content validation
		evidence_file=$(echo "$push_changed_files" | grep -E "$BENCHMARK_EVIDENCE_PATTERN" | head -n 1)
	fi

	if [ "$has_benchmark_feature_evidence" -ne 1 ]; then
		echo ">>> [pre-push] ERROR: Feature-Evidence-Gate failed."
		echo ">>> [pre-push] Feature push requires a versioned benchmark evidence file"
		echo ">>> [pre-push] (benchmark_results/vX.Y.Z_feature_evidence.json or similar)."
		echo ">>> [pre-push]"
		echo ">>> [pre-push] Generate one with:"
		echo ">>> [pre-push]   python scripts/generate_feature_evidence.py --version X.Y.Z --slug my-feature"
		echo ">>> [pre-push] Add the generated file in this push and try again."
		exit 1
	fi

	# Gate 2b — Evidence Content Validation
	# Verify the evidence file was generated by the authorised script (not hand-crafted).
	if [ "${DRIFT_SKIP_EVIDENCE_VALIDATION:-0}" = "1" ]; then
		echo ">>> [pre-push] WARN: Evidence content validation skipped (DRIFT_SKIP_EVIDENCE_VALIDATION=1)."
	elif [ -n "$evidence_file" ] && [ -f "$evidence_file" ]; then
		# Determine the push head SHA for ancestor checks (use first non-zero local_sha)
		push_head_sha=""
		while IFS='|' read -r local_sha _remote_sha; do
			if [ -n "$local_sha" ] && [ "$local_sha" != "0000000000000000000000000000000000000000" ]; then
				push_head_sha="$local_sha"
				break
			fi
		done <<EOF
$push_ref_updates
EOF
		push_head_arg=""
		if [ -n "$push_head_sha" ]; then
			push_head_arg="--push-head $push_head_sha"
		fi

		if ! $PYTHON_CMD scripts/validate_feature_evidence.py "$evidence_file" \
				--require-generated-by $push_head_arg 2>&1; then
			echo ">>> [pre-push] ERROR: Feature-Evidence-Gate (content validation) failed."
			echo ">>> [pre-push] The evidence file '$evidence_file' failed validation."
			echo ">>> [pre-push]"
			echo ">>> [pre-push] Regenerate it with:"
			echo ">>> [pre-push]   python scripts/generate_feature_evidence.py --version X.Y.Z --slug my-feature"
			echo ">>> [pre-push] Skip (emergency only): DRIFT_SKIP_EVIDENCE_VALIDATION=1 git push"
			exit 1
		fi
	fi

	if [ -f "$STUDY_MD_PATH" ]; then
		effective_study_md_path="$STUDY_MD_PATH"
	elif [ -f "benchmark_results/Study.md" ]; then
		effective_study_md_path="benchmark_results/Study.md"
	fi

	if [ -z "$effective_study_md_path" ]; then
		echo ">>> [pre-push] WARNING: Study.md not found (checked $STUDY_MD_PATH and benchmark_results/Study.md)."
		echo ">>> [pre-push] WARNING: Skipping Study freshness block because the study file may still need to be created."
		return 0
	fi

	has_study_update=0
	if echo "$push_changed_files" | grep -Eq "^$effective_study_md_path$"; then
		has_study_update=1
	fi

	if [ "$has_study_update" -ne 1 ]; then
		echo ">>> [pre-push] ERROR: Feature-Evidence-Gate failed: Study freshness check."
		echo ">>> [pre-push] Detected feature commit (feat:) without Study update in $effective_study_md_path."
		echo ">>> [pre-push] Update the Features/Changelog section in $effective_study_md_path and include it in this push."
		exit 1
	fi

	# NOTE: Intentionally no JSON timestamp coherence check here.
	# Reason: reliable timestamp parsing/comparison in strict POSIX sh without adding
	# fragile dependencies (jq/python/date variants) introduces avoidable hook fragility.
}

version_is_strictly_greater() {
	current="$1"
	last="$2"

	current_major=$(echo "$current" | cut -d. -f1)
	current_minor=$(echo "$current" | cut -d. -f2)
	current_patch=$(echo "$current" | cut -d. -f3)

	last_major=$(echo "$last" | cut -d. -f1)
	last_minor=$(echo "$last" | cut -d. -f2)
	last_patch=$(echo "$last" | cut -d. -f3)

	for segment in "$current_major" "$current_minor" "$current_patch" "$last_major" "$last_minor" "$last_patch"; do
		if ! echo "$segment" | grep -Eq '^[0-9]+$'; then
			return 1
		fi
	done

	if [ "$current_major" -gt "$last_major" ]; then
		return 0
	fi
	if [ "$current_major" -lt "$last_major" ]; then
		return 1
	fi

	if [ "$current_minor" -gt "$last_minor" ]; then
		return 0
	fi
	if [ "$current_minor" -lt "$last_minor" ]; then
		return 1
	fi

	if [ "$current_patch" -gt "$last_patch" ]; then
		return 0
	fi

	return 1
}

enforce_changelog_gate() {
	if [ "${DRIFT_SKIP_CHANGELOG:-0}" = "1" ]; then
		echo ">>> [pre-push] WARN: CHANGELOG gate skipped (DRIFT_SKIP_CHANGELOG=1)."
		return 0
	fi

	if [ "$feature_commit_detected" -ne 1 ] && [ "$fix_commit_detected" -ne 1 ]; then
		return 0
	fi

	if ! echo "$push_changed_files" | grep -Eq '^CHANGELOG\.md$'; then
		echo ">>> [pre-push] ERROR: CHANGELOG gate failed."
		echo ">>> [pre-push] feat: or fix: commit detected but CHANGELOG.md not updated."
		echo ">>> [pre-push] Add a changelog entry before pushing."
		exit 1
	fi
}

enforce_version_bump_gate() {
	if [ "${DRIFT_SKIP_VERSION_BUMP:-0}" = "1" ]; then
		echo ">>> [pre-push] WARN: Version bump gate skipped (DRIFT_SKIP_VERSION_BUMP=1)."
		return 0
	fi

	if ! echo "$push_changed_files" | grep -Eq '^pyproject\.toml$'; then
		return 0
	fi

	current_version=$(grep -E '^version[[:space:]]*=' pyproject.toml | head -n 1 | sed -E 's/^[^"]*"([0-9]+\.[0-9]+\.[0-9]+)".*$/\1/' || true)
	if [ -z "$current_version" ]; then
		echo ">>> [pre-push] ERROR: Version bump gate failed."
		echo ">>> [pre-push] pyproject.toml was modified but version ($current_version) is not"
		echo ">>> [pre-push] greater than the last git tag (unknown)."
		echo ">>> [pre-push] Bump the version in pyproject.toml or revert the file."
		exit 1
	fi

	# Prefer local tags to avoid unnecessary network latency.
	# Fallback to remote tags when no local semver tag is available.
	last_tag=$(git tag -l --sort=-version:refname 'v[0-9]*.[0-9]*.[0-9]*' | head -n1 || true)
	if [ -z "$last_tag" ]; then
		last_tag=$(git ls-remote --tags --refs origin 2>/dev/null | grep -Eo 'v[0-9]+\.[0-9]+\.[0-9]+$' | sort -V | tail -n1 || true)
	fi
	if [ -z "$last_tag" ]; then
		echo ">>> [pre-push] WARN: No git tag found. Skipping version bump comparison."
		return 0
	fi

	last_version=$(echo "$last_tag" | sed -E 's/^v//')
	if ! version_is_strictly_greater "$current_version" "$last_version"; then
		echo ">>> [pre-push] ERROR: Version bump gate failed."
		echo ">>> [pre-push] pyproject.toml was modified but version ($current_version) is not"
		echo ">>> [pre-push] greater than the last git tag ($last_tag)."
		echo ">>> [pre-push] Bump the version in pyproject.toml or revert the file."
		exit 1
	fi
}

enforce_lockfile_sync_gate() {
	if [ "${DRIFT_SKIP_LOCKFILE:-0}" = "1" ]; then
		echo ">>> [pre-push] WARN: Lock-file sync gate skipped (DRIFT_SKIP_LOCKFILE=1)."
		return 0
	fi

	if ! echo "$push_changed_files" | grep -Eq '^pyproject\.toml$'; then
		return 0
	fi

	if [ ! -f "uv.lock" ]; then
		echo ">>> [pre-push] ERROR: Lock-file sync gate failed."
		echo ">>> [pre-push] uv.lock not found - run 'uv lock' first"
		exit 1
	fi

	if ! echo "$push_changed_files" | grep -Eq '^uv\.lock$'; then
		echo ">>> [pre-push] ERROR: Lock-file sync gate failed."
		echo ">>> [pre-push] pyproject.toml was modified but uv.lock was not updated."
		echo ">>> [pre-push] Run 'uv lock' and include uv.lock in the push."
		exit 1
	fi
}

enforce_public_api_docstring_gate() {
	if [ "${DRIFT_SKIP_DOCSTRING:-0}" = "1" ]; then
		echo ">>> [pre-push] WARN: Public API docstring gate skipped (DRIFT_SKIP_DOCSTRING=1)."
		return 0
	fi

	if ! echo "$push_changed_files" | grep -Eq '^src/drift/'; then
		return 0
	fi

	missing_files=""

	while IFS='|' read -r local_sha remote_sha; do

		if [ -z "$local_sha" ] || [ -z "$remote_sha" ]; then
			continue
		fi

		if [ "$remote_sha" = "0000000000000000000000000000000000000000" ]; then
			base_sha=$(git merge-base "$local_sha" main 2>/dev/null || true)
			if [ -z "$base_sha" ]; then
				base_sha=$(git merge-base "$local_sha" master 2>/dev/null || true)
			fi
			if [ -n "$base_sha" ]; then
				diff_text=$(git diff "$base_sha..$local_sha" -- src/drift/ || true)
			else
				diff_text=$(git diff "$local_sha~1" "$local_sha" -- src/drift/ 2>/dev/null || true)
			fi
		else
			diff_text=$(git diff "$remote_sha..$local_sha" -- src/drift/ || true)
		fi

		# Weaker, intentionally defensive check:
		# per changed file, if a new top-level public def is added (+def name(...), name starts with [a-z]),
		# require at least one newly added triple-quote line in the same file diff.
		# This favors false-negatives over false-positives to reduce hook fragility.
		missing_in_update=$(echo "$diff_text" | awk '
			/^\+\+\+ b\/src\/drift\// {
				current_file = substr($0, 7)
				next
			}
			/^\+def [a-z][a-zA-Z0-9_]*\(/ {
				if (current_file != "") {
					has_public[current_file] = 1
				}
				next
			}
			/^\+[[:space:]]*"""/ || /^\+[[:space:]]*[\047][\047][\047]/ {
				if (current_file != "") {
					has_docstring[current_file] = 1
				}
				next
			}
			END {
				for (f in has_public) {
					if (!(f in has_docstring)) {
						print f
					}
				}
			}
		' || true)

		if [ -n "$missing_in_update" ]; then
			missing_files="$missing_files
$missing_in_update"
		fi
	done <<EOF
$push_ref_updates
EOF

	missing_files=$(echo "$missing_files" | sed '/^[[:space:]]*$/d' | sort -u || true)
	if [ -n "$missing_files" ]; then
		echo ">>> [pre-push] ERROR: Public API docstring gate failed."
		echo ">>> [pre-push] New public function(s) in src/drift/ appear to lack docstrings."
		echo ">>> [pre-push] Add docstrings to all new public functions before pushing."
		exit 1
	fi
}

enforce_risk_audit_gate() {
	if [ "${DRIFT_SKIP_RISK_AUDIT:-0}" = "1" ]; then
		echo ">>> [pre-push] WARN: Risk audit gate skipped (DRIFT_SKIP_RISK_AUDIT=1)."
		return 0
	fi

	# Check if any signal/ingestion/output files were changed
	has_signal_change=0
	if echo "$push_changed_files" | grep -Eq '^src/drift/signals/'; then
		has_signal_change=1
	fi
	if echo "$push_changed_files" | grep -Eq '^src/drift/ingestion/'; then
		has_signal_change=1
	fi
	if echo "$push_changed_files" | grep -Eq '^src/drift/output/'; then
		has_signal_change=1
	fi

	if [ "$has_signal_change" -ne 1 ]; then
		return 0
	fi

	# Signal changes detected — require at least one audit artifact update
	has_audit_update=0
	if echo "$push_changed_files" | grep -Eq '^audit_results/(fmea_matrix|stride_threat_model|fault_trees|risk_register)\.md$'; then
		has_audit_update=1
	fi

	if [ "$has_audit_update" -ne 1 ]; then
		echo ">>> [pre-push] ERROR: Risk audit gate failed (POLICY §18)."
		echo ">>> [pre-push] Signal/ingestion/output changes detected without audit artifact updates."
		echo ">>> [pre-push]"
		echo ">>> [pre-push] Changed signal-relevant files:"
		echo "$push_changed_files" | grep -E '^src/drift/(signals|ingestion|output)/' | sed 's/^/>>>   - /'
		echo ">>> [pre-push]"
		echo ">>> [pre-push] Required: Update at least one of:"
		echo ">>>   - audit_results/fmea_matrix.md"
		echo ">>>   - audit_results/stride_threat_model.md"
		echo ">>>   - audit_results/fault_trees.md"
		echo ">>>   - audit_results/risk_register.md"
		echo ">>> [pre-push]"
		echo ">>> [pre-push] Skip with: DRIFT_SKIP_RISK_AUDIT=1 git push (emergency only)"
		exit 1
	fi

	# Also verify the four audit artifacts still exist (deletion protection)
	for artifact in audit_results/fmea_matrix.md audit_results/stride_threat_model.md audit_results/fault_trees.md audit_results/risk_register.md; do
		if [ ! -f "$artifact" ]; then
			echo ">>> [pre-push] ERROR: Protected audit artifact missing: $artifact"
			echo ">>> [pre-push] POLICY §18.7 prohibits deletion of audit artifacts."
			exit 1
		fi
	done
}

reject_push_for_blocked_paths() {
	local_ref="$1"
	local_sha="$2"
	remote_ref="$3"
	remote_sha="$4"

	# Deletion push (local ref removed) can be ignored here.
	if [ "$local_sha" = "0000000000000000000000000000000000000000" ]; then
		return 0
	fi

	if [ "$remote_sha" = "0000000000000000000000000000000000000000" ]; then
		commit_list=$(git rev-list "$local_sha" --not --all)
	else
		commit_list=$(git rev-list "$remote_sha..$local_sha")
	fi

	for commit in $commit_list; do
		blocked_hits=$(git diff-tree --no-commit-id --name-only --diff-filter=ACMR -r "$commit" | grep -E "$blocked_pattern" || true)
		if [ -n "$blocked_hits" ]; then
			echo ">>> [pre-push] ERROR: Blocked paths detected in commits for $local_ref -> $remote_ref"
			echo "$blocked_hits"
			echo ">>> [pre-push] Remove these files from the commit history before pushing."
			exit 1
		fi
	done
}

while read local_ref local_sha remote_ref remote_sha; do
	if [ "${DRIFT_SKIP_HOOKS:-0}" != "1" ]; then
		reject_push_for_blocked_paths "$local_ref" "$local_sha" "$remote_ref" "$remote_sha"
		collect_push_context "$local_ref" "$local_sha" "$remote_ref" "$remote_sha"
	fi
done

if [ "${DRIFT_SKIP_HOOKS:-0}" = "1" ]; then
	echo ">>> [pre-push] WARN: All gates skipped (DRIFT_SKIP_HOOKS=1)."
else
	enforce_feature_evidence_gate
	enforce_changelog_gate
	enforce_version_bump_gate
	enforce_lockfile_sync_gate
	enforce_public_api_docstring_gate
	enforce_risk_audit_gate
fi

can_reuse_previous_local_ci_success=0
current_head_sha=$(git rev-parse HEAD 2>/dev/null || true)
if [ -n "$current_head_sha" ] && [ -f "$LAST_SUCCESS_SHA_FILE" ]; then
	last_success_sha=$(cat "$LAST_SUCCESS_SHA_FILE" | tr -d '\r\n')
	if [ "$last_success_sha" = "$current_head_sha" ]; then
		all_updates_match_head=1
		while IFS='|' read -r local_sha remote_sha; do
			if [ -z "$local_sha" ] || [ -z "$remote_sha" ]; then
				continue
			fi
			if [ "$local_sha" != "$current_head_sha" ]; then
				all_updates_match_head=0
				break
			fi
		done <<EOF
$push_ref_updates
EOF

		if [ "$all_updates_match_head" -eq 1 ]; then
			can_reuse_previous_local_ci_success=1
		fi
	fi
fi

if [ "$can_reuse_previous_local_ci_success" -eq 1 ]; then
	echo ">>> [pre-push] Reusing successful local CI checks for unchanged commit $current_head_sha."
	echo ">>> [pre-push] Gates were re-evaluated; expensive checks are skipped for this retry."
	echo ">>> [pre-push] Push continues."
	exit 0
fi

echo ">>> [pre-push] Starting local CI checks..."

# Unset git context variables that git injects when running hooks.
# Without this, tests that create git repos in tmp dirs inherit GIT_DIR
# from the parent repo and fail with exit 128 ("not a git repository").
unset GIT_DIR GIT_WORK_TREE GIT_INDEX_FILE GIT_OBJECT_DIRECTORY GIT_COMMON_DIR

# 1. Version format (SemVer)
echo ">>> [1/6] Checking SemVer version..."
"$PYTHON_CMD" scripts/check_version.py --check-semver

# 1b. Release discipline
echo ">>> [1b/6] Checking release discipline..."
"$PYTHON_CMD" scripts/check_release_discipline.py

# 1c. Model consistency (signal count, weights, version in docs)
echo ">>> [1c/6] Checking model consistency..."
"$PYTHON_CMD" scripts/check_model_consistency.py

# 1d. Public repo hygiene
echo ">>> [1d/6] Checking public repo hygiene..."
"$PYTHON_CMD" scripts/check_repo_hygiene.py --config .github/repo-guard.blocklist --root-allowlist .github/repo-root-allowlist

# 2. Lint
echo ">>> [2/6] Running ruff lint..."
ruff check src/ tests/

# 3. Type checking
echo ">>> [3/6] Running mypy..."
"$PYTHON_CMD" -m mypy src/drift

# 4. Tests with coverage (exclude slow smoke tests that clone external repos)
echo ">>> [4/6] Running pytest + coverage (excl. smoke tests)..."
if "$PYTHON_CMD" -m pytest --help 2>/dev/null | grep -Eq -- "--numprocesses|-n[[:space:]]+numprocesses|-n[[:space:]]+NUM"; then
	DRIFT_WORKERS="${DRIFT_PYTEST_WORKERS:-4}"
	echo ">>> [4/6] xdist detected: running tests in parallel mode (-n ${DRIFT_WORKERS}, capped to avoid OOM)."
	"$PYTHON_CMD" -m pytest -q --tb=short -n "${DRIFT_WORKERS}" --dist=loadscope --cov --cov-report= --ignore=tests/test_smoke_real_repos.py
else
	echo ">>> [4/6] xdist not available: running tests in serial mode."
	"$PYTHON_CMD" -m pytest -q --tb=short --cov --cov-report= --ignore=tests/test_smoke_real_repos.py
fi

# 5. Self-analysis
echo ">>> [5/6] Running self-analysis..."
drift analyze --repo . --format json --exit-zero > /dev/null

if [ -n "$current_head_sha" ]; then
	printf "%s\n" "$current_head_sha" > "$LAST_SUCCESS_SHA_FILE"
fi

echo ">>> [pre-push] All checks passed. Push continues."
