Coverage for src / harnessutils / config.py: 83%
102 statements
« prev ^ index » next coverage.py v7.13.2, created at 2026-02-12 22:41 -0600
« prev ^ index » next coverage.py v7.13.2, created at 2026-02-12 22:41 -0600
1"""Configuration schema for harness-utils."""
3from dataclasses import dataclass, field
4from pathlib import Path
5from typing import Any
8@dataclass
9class TruncationConfig:
10 """Configuration for Tier 1: Output truncation."""
12 max_lines: int = 2000
13 max_bytes: int = 50 * 1024 # 50KB
14 direction: str = "head" # "head" or "tail"
16 # Phase 2: Content-aware truncation
17 max_tokens: int = 2000 # Token-based limit
18 use_content_aware: bool = True # Enable content-aware truncation
19 preserve_errors: bool = True # Keep all errors/warnings in logs
20 json_array_limit: int = 10 # Keep first/last N items in JSON arrays
21 stacktrace_frame_limit: int = 20 # Keep top/bottom N frames in stacktraces
24@dataclass
25class PruningConfig:
26 """Configuration for Tier 2: Selective pruning."""
28 prune_protect: int = 40_000 # Keep recent 40K tokens
29 prune_minimum: int = 20_000 # Only prune if saves 20K+ tokens
30 protect_turns: int = 2 # Protect last 2 turns
31 protected_tools: list[str] = field(
32 default_factory=lambda: ["skill_execution", "subtask_invocation"]
33 )
35 # Importance scoring (Phase 1.2)
36 use_importance_scoring: bool = True # Enable smart pruning
37 recency_weight: float = 1.0 # Weight for recency score
38 size_weight: float = -0.5 # Weight for size penalty (negative)
39 semantic_weight: float = 2.0 # Weight for semantic importance
40 tool_priority_weight: float = 1.5 # Weight for tool type priority
41 recency_decay: float = 0.1 # Exponential decay rate per turn
43 # Tool importance map (higher = more important)
44 tool_importance: dict[str, float] = field(
45 default_factory=lambda: {
46 "read": 50.0,
47 "write": 100.0, # Code changes are important
48 "edit": 100.0,
49 "grep": 30.0, # Search results often repetitive
50 "glob": 30.0,
51 "bash": 70.0,
52 "skill_execution": 150.0, # Complex operations
53 "subtask_invocation": 150.0,
54 "error": 200.0, # Critical for debugging
55 }
56 )
58 # Semantic boost scores
59 error_boost: float = 500.0 # Boost for outputs with errors
60 warning_boost: float = 200.0 # Boost for warnings
61 user_requested_boost: float = 300.0 # User explicitly asked for this
63 # Deduplication (Phase 1.3)
64 detect_duplicates: bool = True # Enable duplicate detection
65 similarity_threshold: float = 0.8 # Similarity threshold (0.0-1.0)
66 duplicate_lookback: int = 20 # Check last N outputs for duplicates
69@dataclass
70class TokenConfig:
71 """Configuration for token estimation."""
73 chars_per_token: int = 4
76@dataclass
77class ModelLimitsConfig:
78 """Configuration for model limits."""
80 default_context_limit: int = 200_000
81 default_output_limit: int = 8_192
84@dataclass
85class StorageConfig:
86 """Configuration for storage layer."""
88 base_path: Path = field(default_factory=lambda: Path("data"))
89 retention_days: int = 7 # For truncated outputs
92@dataclass
93class SummarizationConfig:
94 """Configuration for Tier 3: Summarization."""
96 mode: str = "differential" # "differential" or "full"
97 differential_model: str = "claude-3-5-haiku-20241022" # Cheaper model for diffs
98 full_model: str = "claude-3-5-sonnet-20241022" # More capable model for full
99 max_messages_since_summary: int = 30 # Force full if exceeded
102@dataclass
103class CompactionConfig:
104 """Configuration for context compaction."""
106 auto: bool = True # Enable auto-summarization
107 prune: bool = True # Enable pruning
109 # Phase 2: Predictive overflow detection
110 use_predictive: bool = True # Enable predictive overflow detection
111 predictive_lookahead: int = 5 # Predict N turns ahead
112 predictive_safety_margin: float = 0.8 # Trigger at 80% of limit
115@dataclass
116class HarnessConfig:
117 """Main configuration for harness-utils.
119 Provides all configuration parameters for context window management
120 with sensible defaults from the CTXWINARCH.md specification.
121 """
123 truncation: TruncationConfig = field(default_factory=TruncationConfig)
124 pruning: PruningConfig = field(default_factory=PruningConfig)
125 tokens: TokenConfig = field(default_factory=TokenConfig)
126 model_limits: ModelLimitsConfig = field(default_factory=ModelLimitsConfig)
127 storage: StorageConfig = field(default_factory=StorageConfig)
128 compaction: CompactionConfig = field(default_factory=CompactionConfig)
129 summarization: SummarizationConfig = field(default_factory=SummarizationConfig)
131 @classmethod
132 def from_dict(cls, data: dict[str, Any]) -> "HarnessConfig":
133 """Create configuration from dictionary.
135 Args:
136 data: Configuration dictionary
138 Returns:
139 HarnessConfig instance
140 """
141 config = cls()
143 if "truncation" in data:
144 config.truncation = TruncationConfig(**data["truncation"])
145 if "pruning" in data:
146 protected = data["pruning"].get("protected_tools")
147 pruning_data = {k: v for k, v in data["pruning"].items() if k != "protected_tools"}
148 if protected:
149 pruning_data["protected_tools"] = protected
150 config.pruning = PruningConfig(**pruning_data)
151 if "tokens" in data:
152 config.tokens = TokenConfig(**data["tokens"])
153 if "model_limits" in data:
154 config.model_limits = ModelLimitsConfig(**data["model_limits"])
155 if "storage" in data:
156 storage_data = data["storage"].copy()
157 if "base_path" in storage_data:
158 storage_data["base_path"] = Path(storage_data["base_path"])
159 config.storage = StorageConfig(**storage_data)
160 if "compaction" in data:
161 config.compaction = CompactionConfig(**data["compaction"])
162 if "summarization" in data:
163 config.summarization = SummarizationConfig(**data["summarization"])
165 return config
167 @classmethod
168 def from_toml(cls, path: Path) -> "HarnessConfig":
169 """Load configuration from TOML file.
171 Args:
172 path: Path to TOML configuration file
174 Returns:
175 HarnessConfig instance
176 """
177 import tomllib
179 with open(path, "rb") as f:
180 data = tomllib.load(f)
182 return cls.from_dict(data)
184 @classmethod
185 def from_json(cls, path: Path) -> "HarnessConfig":
186 """Load configuration from JSON file.
188 Args:
189 path: Path to JSON configuration file
191 Returns:
192 HarnessConfig instance
193 """
194 import json
196 with open(path) as f:
197 data = json.load(f)
199 return cls.from_dict(data)