Coverage for src / harnessutils / inspection.py: 85%

144 statements  

« prev     ^ index     » next       coverage.py v7.13.2, created at 2026-02-18 08:30 -0600

1"""Context inspection and debugging utilities. 

2 

3Provides observability into context state, pruning decisions, and 

4predicted behavior. Enables agents to understand "what's in context" 

5and "why was X pruned". 

6""" 

7 

8from dataclasses import dataclass 

9from typing import Any 

10 

11from harnessutils.compaction.pruning import PruningDecision, calculate_context_tokens 

12from harnessutils.config import HarnessConfig 

13from harnessutils.models.conversation import Conversation 

14from harnessutils.models.message import Message 

15from harnessutils.models.parts import ToolPart 

16from harnessutils.tokens.exact import count_tokens_fast 

17 

18 

19@dataclass 

20class ContextSummary: 

21 """High-level summary of context state.""" 

22 

23 total_messages: int 

24 total_tokens: int 

25 tool_outputs: int 

26 tool_outputs_compacted: int 

27 summaries: int 

28 protected_tokens: int 

29 prunable_tokens: int 

30 health: str # "good" | "degraded" | "poor" 

31 recommendations: list[str] 

32 

33 

34@dataclass 

35class ImpactPrediction: 

36 """Prediction of what would happen if tokens are added.""" 

37 

38 would_trigger_pruning: bool 

39 estimated_pruned_count: int 

40 would_trigger_overflow: bool 

41 estimated_tokens_after_pruning: int 

42 margin_remaining: int # Tokens before hitting overflow 

43 

44 

45class ContextInspector: 

46 """Inspector for querying and analyzing context state. 

47 

48 Provides full observability into: 

49 - What's currently in context 

50 - Why specific items were pruned 

51 - What would happen if more tokens added 

52 - Full audit trail of all decisions 

53 """ 

54 

55 def __init__( 

56 self, 

57 messages: list[Message], 

58 config: HarnessConfig, 

59 conversation: Conversation | None = None, 

60 decisions: list[PruningDecision] | None = None, 

61 ): 

62 """Initialize inspector. 

63 

64 Args: 

65 messages: Conversation messages to inspect 

66 config: Harness configuration 

67 conversation: Optional conversation metadata 

68 decisions: Optional pruning decisions for audit trail 

69 """ 

70 self.messages = messages 

71 self.config = config 

72 self.conversation = conversation 

73 self._decisions: list[PruningDecision] = decisions or [] 

74 

75 def summary(self) -> dict[str, Any]: 

76 """Get high-level summary of context state. 

77 

78 Returns: 

79 Dictionary with context metrics: 

80 - total_messages: Count of all messages 

81 - total_tokens: Token count in context 

82 - tool_outputs: Count of tool outputs 

83 - tool_outputs_compacted: Count of compacted outputs 

84 - summaries: Count of summary messages 

85 - protected_tokens: Tokens in protected window 

86 - prunable_tokens: Tokens eligible for pruning 

87 """ 

88 total_messages = len(self.messages) 

89 total_tokens = calculate_context_tokens(self.messages) 

90 

91 tool_outputs = 0 

92 tool_outputs_compacted = 0 

93 summaries = 0 

94 protected_tokens = 0 

95 prunable_tokens = 0 

96 

97 turns_from_end = 0 

98 for msg in reversed(self.messages): 

99 if msg.role == "user": 

100 turns_from_end += 1 

101 

102 if msg.summary: 

103 summaries += 1 

104 

105 for part in msg.parts: 

106 if not isinstance(part, ToolPart): 

107 continue 

108 

109 if part.state.status != "completed": 

110 continue 

111 

112 tool_outputs += 1 

113 

114 if part.state.time and part.state.time.compacted: 

115 tool_outputs_compacted += 1 

116 continue 

117 

118 tokens = count_tokens_fast(part.state.output) if part.state.output else 0 

119 

120 # Check if protected 

121 is_protected = ( 

122 turns_from_end < self.config.pruning.protect_turns 

123 or part.tool in self.config.pruning.protected_tools 

124 or msg.summary 

125 ) 

126 

127 if is_protected: 

128 protected_tokens += tokens 

129 else: 

130 prunable_tokens += tokens 

131 

132 return { 

133 "total_messages": total_messages, 

134 "total_tokens": total_tokens, 

135 "tool_outputs": tool_outputs, 

136 "tool_outputs_compacted": tool_outputs_compacted, 

137 "summaries": summaries, 

138 "protected_tokens": protected_tokens, 

139 "prunable_tokens": prunable_tokens, 

140 } 

141 

142 def get_pruning_decision( 

143 self, message_id: str, part_id: str 

144 ) -> dict[str, Any] | None: 

145 """Get pruning decision for specific tool output. 

146 

147 Args: 

148 message_id: Message ID 

149 part_id: Tool part call_id 

150 

151 Returns: 

152 Decision details if found: 

153 - pruned: Whether this output was pruned 

154 - reason: Decision type (pruned_fifo, kept, etc.) 

155 - importance_score: Importance score if available 

156 - duplicate_of: Part ID of duplicate if applicable 

157 - tokens_saved: Tokens saved by pruning this 

158 - timestamp: When decision was made 

159 - metadata: Additional context 

160 

161 None if no decision recorded for this part. 

162 """ 

163 # This will be populated by manager when pruning happens 

164 # For now, we reconstruct from current state 

165 for msg in self.messages: 

166 if msg.id != message_id: 

167 continue 

168 

169 for part in msg.parts: 

170 if not isinstance(part, ToolPart): 

171 continue 

172 

173 if part.call_id != part_id: 

174 continue 

175 

176 # Found the part - determine its status 

177 was_compacted = part.state.time and part.state.time.compacted 

178 

179 if was_compacted: 

180 return { 

181 "pruned": True, 

182 "reason": "compacted", 

183 "importance_score": None, 

184 "duplicate_of": None, 

185 "pruned_at": part.state.time.compacted if part.state.time else None, 

186 "tokens_saved": 0, # Unknown 

187 "metadata": {}, 

188 } 

189 else: 

190 return { 

191 "pruned": False, 

192 "reason": "in_context", 

193 "importance_score": None, 

194 "duplicate_of": None, 

195 "pruned_at": None, 

196 "tokens_saved": 0, 

197 "metadata": {}, 

198 } 

199 

200 return None 

201 

202 def predict_impact(self, additional_tokens: int) -> dict[str, Any]: 

203 """Predict what would happen if N tokens added. 

204 

205 Args: 

206 additional_tokens: Number of tokens to simulate adding 

207 

208 Returns: 

209 Prediction of impact: 

210 - would_trigger_pruning: Would pruning be triggered 

211 - estimated_pruned_count: How many outputs would be pruned 

212 - would_trigger_overflow: Would overflow/summarization trigger 

213 - estimated_tokens_after_pruning: Projected token count after pruning 

214 - margin_remaining: Tokens before hitting overflow 

215 """ 

216 current_tokens = calculate_context_tokens(self.messages) 

217 projected_tokens = current_tokens + additional_tokens 

218 

219 # Check if pruning would trigger 

220 would_trigger_pruning = False 

221 estimated_pruned_count = 0 

222 tokens_after_pruning = projected_tokens 

223 

224 # Simple heuristic: if we exceed prune_protect, estimate pruning 

225 if projected_tokens > self.config.pruning.prune_protect: 

226 would_trigger_pruning = True 

227 

228 # Count prunable outputs 

229 prunable_count = 0 

230 prunable_tokens = 0 

231 turns_from_end = 0 

232 

233 for msg in reversed(self.messages): 

234 if msg.role == "user": 

235 turns_from_end += 1 

236 

237 if msg.summary: 

238 break 

239 

240 for part in msg.parts: 

241 if not isinstance(part, ToolPart): 

242 continue 

243 

244 if part.state.status != "completed": 

245 continue 

246 

247 if turns_from_end < self.config.pruning.protect_turns: 

248 continue 

249 

250 if part.tool in self.config.pruning.protected_tools: 

251 continue 

252 

253 if part.state.time and part.state.time.compacted: 

254 continue 

255 

256 tokens = count_tokens_fast(part.state.output) if part.state.output else 0 

257 prunable_count += 1 

258 prunable_tokens += tokens 

259 

260 # Estimate how many would be pruned 

261 overage = projected_tokens - self.config.pruning.prune_protect 

262 if prunable_tokens >= overage: 

263 # Estimate based on average token size 

264 avg_tokens_per_output = ( 

265 prunable_tokens / prunable_count if prunable_count > 0 else 0 

266 ) 

267 if avg_tokens_per_output > 0: 

268 estimated_pruned_count = int(overage / avg_tokens_per_output) + 1 

269 tokens_after_pruning = projected_tokens - min(overage, prunable_tokens) 

270 

271 # Check if overflow would trigger 

272 context_limit = self.config.model_limits.default_context_limit 

273 output_reserve = self.config.model_limits.default_output_limit 

274 usable_limit = context_limit - output_reserve 

275 

276 would_trigger_overflow = tokens_after_pruning >= usable_limit 

277 margin_remaining = usable_limit - tokens_after_pruning 

278 

279 return { 

280 "would_trigger_pruning": would_trigger_pruning, 

281 "estimated_pruned_count": estimated_pruned_count, 

282 "would_trigger_overflow": would_trigger_overflow, 

283 "estimated_tokens_after_pruning": tokens_after_pruning, 

284 "margin_remaining": max(0, margin_remaining), 

285 } 

286 

287 def get_audit_trail(self, limit: int = 50) -> list[dict[str, Any]]: 

288 """Get full audit trail of pruning decisions. 

289 

290 Args: 

291 limit: Maximum number of decisions to return 

292 

293 Returns: 

294 List of decision records (most recent first) 

295 """ 

296 return [d.to_dict() for d in self._decisions[:limit]] 

297 

298 def get_tool_output_tokens(self) -> dict[str, Any]: 

299 """Get breakdown of tokens by tool type. 

300 

301 Returns: 

302 Dictionary with: 

303 - total: Total tokens in tool outputs 

304 - by_tool: Token count per tool type 

305 - prunable: Tokens that could be pruned 

306 - protected: Tokens in protected outputs 

307 - prunability_percent: Percentage that is prunable 

308 """ 

309 total = 0 

310 by_tool: dict[str, int] = {} 

311 prunable = 0 

312 protected = 0 

313 

314 turns_from_end = 0 

315 for msg in reversed(self.messages): 

316 if msg.role == "user": 

317 turns_from_end += 1 

318 

319 for part in msg.parts: 

320 if not isinstance(part, ToolPart): 

321 continue 

322 

323 if part.state.status != "completed": 

324 continue 

325 

326 if part.state.time and part.state.time.compacted: 

327 continue 

328 

329 tokens = count_tokens_fast(part.state.output) if part.state.output else 0 

330 total += tokens 

331 

332 # Track by tool 

333 by_tool[part.tool] = by_tool.get(part.tool, 0) + tokens 

334 

335 # Check if protected 

336 is_protected = ( 

337 turns_from_end < self.config.pruning.protect_turns 

338 or part.tool in self.config.pruning.protected_tools 

339 or msg.summary 

340 ) 

341 

342 if is_protected: 

343 protected += tokens 

344 else: 

345 prunable += tokens 

346 

347 prunability_percent = round((prunable / total * 100) if total > 0 else 0, 1) 

348 

349 return { 

350 "total": total, 

351 "by_tool": by_tool, 

352 "prunable": prunable, 

353 "protected": protected, 

354 "prunability_percent": prunability_percent, 

355 }