Coverage for src / harnessutils / config.py: 83%

102 statements  

« prev     ^ index     » next       coverage.py v7.13.2, created at 2026-02-12 22:41 -0600

1"""Configuration schema for harness-utils.""" 

2 

3from dataclasses import dataclass, field 

4from pathlib import Path 

5from typing import Any 

6 

7 

8@dataclass 

9class TruncationConfig: 

10 """Configuration for Tier 1: Output truncation.""" 

11 

12 max_lines: int = 2000 

13 max_bytes: int = 50 * 1024 # 50KB 

14 direction: str = "head" # "head" or "tail" 

15 

16 # Phase 2: Content-aware truncation 

17 max_tokens: int = 2000 # Token-based limit 

18 use_content_aware: bool = True # Enable content-aware truncation 

19 preserve_errors: bool = True # Keep all errors/warnings in logs 

20 json_array_limit: int = 10 # Keep first/last N items in JSON arrays 

21 stacktrace_frame_limit: int = 20 # Keep top/bottom N frames in stacktraces 

22 

23 

24@dataclass 

25class PruningConfig: 

26 """Configuration for Tier 2: Selective pruning.""" 

27 

28 prune_protect: int = 40_000 # Keep recent 40K tokens 

29 prune_minimum: int = 20_000 # Only prune if saves 20K+ tokens 

30 protect_turns: int = 2 # Protect last 2 turns 

31 protected_tools: list[str] = field( 

32 default_factory=lambda: ["skill_execution", "subtask_invocation"] 

33 ) 

34 

35 # Importance scoring (Phase 1.2) 

36 use_importance_scoring: bool = True # Enable smart pruning 

37 recency_weight: float = 1.0 # Weight for recency score 

38 size_weight: float = -0.5 # Weight for size penalty (negative) 

39 semantic_weight: float = 2.0 # Weight for semantic importance 

40 tool_priority_weight: float = 1.5 # Weight for tool type priority 

41 recency_decay: float = 0.1 # Exponential decay rate per turn 

42 

43 # Tool importance map (higher = more important) 

44 tool_importance: dict[str, float] = field( 

45 default_factory=lambda: { 

46 "read": 50.0, 

47 "write": 100.0, # Code changes are important 

48 "edit": 100.0, 

49 "grep": 30.0, # Search results often repetitive 

50 "glob": 30.0, 

51 "bash": 70.0, 

52 "skill_execution": 150.0, # Complex operations 

53 "subtask_invocation": 150.0, 

54 "error": 200.0, # Critical for debugging 

55 } 

56 ) 

57 

58 # Semantic boost scores 

59 error_boost: float = 500.0 # Boost for outputs with errors 

60 warning_boost: float = 200.0 # Boost for warnings 

61 user_requested_boost: float = 300.0 # User explicitly asked for this 

62 

63 # Deduplication (Phase 1.3) 

64 detect_duplicates: bool = True # Enable duplicate detection 

65 similarity_threshold: float = 0.8 # Similarity threshold (0.0-1.0) 

66 duplicate_lookback: int = 20 # Check last N outputs for duplicates 

67 

68 

69@dataclass 

70class TokenConfig: 

71 """Configuration for token estimation.""" 

72 

73 chars_per_token: int = 4 

74 

75 

76@dataclass 

77class ModelLimitsConfig: 

78 """Configuration for model limits.""" 

79 

80 default_context_limit: int = 200_000 

81 default_output_limit: int = 8_192 

82 

83 

84@dataclass 

85class StorageConfig: 

86 """Configuration for storage layer.""" 

87 

88 base_path: Path = field(default_factory=lambda: Path("data")) 

89 retention_days: int = 7 # For truncated outputs 

90 

91 

92@dataclass 

93class SummarizationConfig: 

94 """Configuration for Tier 3: Summarization.""" 

95 

96 mode: str = "differential" # "differential" or "full" 

97 differential_model: str = "claude-3-5-haiku-20241022" # Cheaper model for diffs 

98 full_model: str = "claude-3-5-sonnet-20241022" # More capable model for full 

99 max_messages_since_summary: int = 30 # Force full if exceeded 

100 

101 

102@dataclass 

103class CompactionConfig: 

104 """Configuration for context compaction.""" 

105 

106 auto: bool = True # Enable auto-summarization 

107 prune: bool = True # Enable pruning 

108 

109 # Phase 2: Predictive overflow detection 

110 use_predictive: bool = True # Enable predictive overflow detection 

111 predictive_lookahead: int = 5 # Predict N turns ahead 

112 predictive_safety_margin: float = 0.8 # Trigger at 80% of limit 

113 

114 

115@dataclass 

116class HarnessConfig: 

117 """Main configuration for harness-utils. 

118 

119 Provides all configuration parameters for context window management 

120 with sensible defaults from the CTXWINARCH.md specification. 

121 """ 

122 

123 truncation: TruncationConfig = field(default_factory=TruncationConfig) 

124 pruning: PruningConfig = field(default_factory=PruningConfig) 

125 tokens: TokenConfig = field(default_factory=TokenConfig) 

126 model_limits: ModelLimitsConfig = field(default_factory=ModelLimitsConfig) 

127 storage: StorageConfig = field(default_factory=StorageConfig) 

128 compaction: CompactionConfig = field(default_factory=CompactionConfig) 

129 summarization: SummarizationConfig = field(default_factory=SummarizationConfig) 

130 

131 @classmethod 

132 def from_dict(cls, data: dict[str, Any]) -> "HarnessConfig": 

133 """Create configuration from dictionary. 

134 

135 Args: 

136 data: Configuration dictionary 

137 

138 Returns: 

139 HarnessConfig instance 

140 """ 

141 config = cls() 

142 

143 if "truncation" in data: 

144 config.truncation = TruncationConfig(**data["truncation"]) 

145 if "pruning" in data: 

146 protected = data["pruning"].get("protected_tools") 

147 pruning_data = {k: v for k, v in data["pruning"].items() if k != "protected_tools"} 

148 if protected: 

149 pruning_data["protected_tools"] = protected 

150 config.pruning = PruningConfig(**pruning_data) 

151 if "tokens" in data: 

152 config.tokens = TokenConfig(**data["tokens"]) 

153 if "model_limits" in data: 

154 config.model_limits = ModelLimitsConfig(**data["model_limits"]) 

155 if "storage" in data: 

156 storage_data = data["storage"].copy() 

157 if "base_path" in storage_data: 

158 storage_data["base_path"] = Path(storage_data["base_path"]) 

159 config.storage = StorageConfig(**storage_data) 

160 if "compaction" in data: 

161 config.compaction = CompactionConfig(**data["compaction"]) 

162 if "summarization" in data: 

163 config.summarization = SummarizationConfig(**data["summarization"]) 

164 

165 return config 

166 

167 @classmethod 

168 def from_toml(cls, path: Path) -> "HarnessConfig": 

169 """Load configuration from TOML file. 

170 

171 Args: 

172 path: Path to TOML configuration file 

173 

174 Returns: 

175 HarnessConfig instance 

176 """ 

177 import tomllib 

178 

179 with open(path, "rb") as f: 

180 data = tomllib.load(f) 

181 

182 return cls.from_dict(data) 

183 

184 @classmethod 

185 def from_json(cls, path: Path) -> "HarnessConfig": 

186 """Load configuration from JSON file. 

187 

188 Args: 

189 path: Path to JSON configuration file 

190 

191 Returns: 

192 HarnessConfig instance 

193 """ 

194 import json 

195 

196 with open(path) as f: 

197 data = json.load(f) 

198 

199 return cls.from_dict(data)