Coverage for src / harnessutils / compaction / summarization.py: 96%
93 statements
« prev ^ index » next coverage.py v7.13.2, created at 2026-02-12 22:41 -0600
« prev ^ index » next coverage.py v7.13.2, created at 2026-02-12 22:41 -0600
1"""Tier 3: LLM-powered conversation summarization.
3Uses LLM to semantically compress conversation when approaching limit.
4Cost: Expensive (~$0.10-0.50), Latency: ~3-5s.
5"""
7from dataclasses import dataclass
8from typing import Any
10from harnessutils.models.message import Message
11from harnessutils.models.usage import CacheUsage, Usage
12from harnessutils.types import LLMClient
14SUMMARIZATION_PROMPT = """You are a helpful AI assistant tasked with summarizing conversations.
16When asked to summarize, provide a detailed but concise summary of the conversation.
17Focus on information that would be helpful for continuing the conversation, including:
18- What was done
19- What is currently being worked on
20- Which files are being modified
21- What needs to be done next
22- Key user requests, constraints, or preferences that should persist
23- Important technical decisions and why they were made
25Your summary should be comprehensive enough to provide context but concise enough
26to be quickly understood."""
28DIFFERENTIAL_SUMMARIZATION_PROMPT = """You are a helpful AI assistant tasked with \
29updating conversation summaries.
31You will be given:
321. A previous summary of the conversation
332. New messages/activity since that summary
35Your task is to create an UPDATED summary that:
36- Preserves important context from the previous summary
37- Incorporates new information from recent messages
38- Maintains continuity while staying concise
39- Focuses on what's relevant for continuing the conversation
41The updated summary should feel like a natural evolution of the previous one,
42not a disconnected list of old + new information."""
45@dataclass
46class SummarizationResult:
47 """Result of summarization operation."""
49 summary_message: Message
50 tokens_used: Usage
51 cost: float
54def _find_last_summary(messages: list[Message]) -> Message | None:
55 """Find the last summary message in the conversation.
57 Args:
58 messages: List of messages to search
60 Returns:
61 Last summary message or None if no summary exists
62 """
63 for msg in reversed(messages):
64 if msg.summary:
65 return msg
66 return None
69def _get_messages_since_summary(
70 messages: list[Message], summary_msg: Message
71) -> list[Message]:
72 """Get all messages after the given summary message.
74 Args:
75 messages: Full list of messages
76 summary_msg: The summary message to find messages after
78 Returns:
79 List of messages after the summary
80 """
81 summary_idx = None
82 for i, msg in enumerate(messages):
83 if msg.id == summary_msg.id:
84 summary_idx = i
85 break
87 if summary_idx is None:
88 return messages
90 return messages[summary_idx + 1 :]
93def _build_differential_prompt(
94 last_summary: str, new_messages: list[dict[str, Any]]
95) -> list[dict[str, Any]]:
96 """Build prompt for differential summarization.
98 Args:
99 last_summary: Content of the previous summary
100 new_messages: New messages since the last summary
102 Returns:
103 List of messages formatted for differential summarization
104 """
105 prompt_messages = [
106 {
107 "role": "user",
108 "content": f"""Previous summary:
109{last_summary}
111New activity since summary:""",
112 }
113 ]
115 # Add new messages
116 prompt_messages.extend(new_messages)
118 # Add request for updated summary
119 prompt_messages.append(
120 {
121 "role": "user",
122 "content": (
123 "Provide an UPDATED summary that incorporates both the "
124 "previous context and new activity."
125 ),
126 }
127 )
129 return prompt_messages
132def is_overflow(usage: Usage, context_limit: int, output_limit: int) -> bool:
133 """Check if conversation has overflowed context window.
135 The context window must fit both input tokens and output tokens.
136 We reserve space for output, so usable input space is smaller.
138 Args:
139 usage: Token usage from last turn
140 context_limit: Maximum context tokens for model
141 output_limit: Maximum output tokens for model
143 Returns:
144 True if input tokens exceed available space after reserving for output
145 """
146 total_input = usage.input + usage.cache.read
147 usable_input_space = context_limit - output_limit
149 return total_input > usable_input_space
152def summarize_conversation(
153 messages: list[Message],
154 llm_client: LLMClient,
155 parent_message_id: str,
156 message_id: str,
157 model: str | None = None,
158 auto_mode: bool = False,
159 config: Any | None = None,
160 force_full: bool = False,
161) -> SummarizationResult:
162 """Summarize conversation using LLM.
164 Args:
165 messages: Conversation messages to summarize
166 llm_client: LLM client implementation (callback from app)
167 parent_message_id: ID of message that triggered summarization
168 message_id: ID for the summary message
169 model: Optional model to use (cheaper/faster recommended)
170 auto_mode: Whether this was auto-triggered
171 config: Optional SummarizationConfig for differential mode
172 force_full: Force full summarization even if differential is available
174 Returns:
175 SummarizationResult with summary message and metrics
176 """
177 # Check if differential summarization is possible
178 use_differential = False
179 last_summary = None
180 new_messages_only = messages
182 if config and not force_full:
183 last_summary = _find_last_summary(messages)
184 if last_summary:
185 new_messages_only = _get_messages_since_summary(messages, last_summary)
187 # Only use differential if we have new messages and haven't exceeded limit
188 max_since_summary = getattr(config, "max_messages_since_summary", 30)
189 if new_messages_only and len(new_messages_only) <= max_since_summary:
190 use_differential = True
192 # Choose model and prompt based on mode
193 if use_differential and last_summary:
194 # Differential mode: cheaper/faster model
195 selected_model = model or getattr(config, "differential_model", None)
196 system_prompt = DIFFERENTIAL_SUMMARIZATION_PROMPT
198 # Get last summary content
199 summary_content = ""
200 for part in last_summary.parts:
201 if part.type == "text":
202 summary_content = getattr(part, "text", "")
203 break
205 # Convert only new messages
206 new_model_messages = _convert_to_model_format(new_messages_only)
208 # Build differential prompt
209 model_messages = _build_differential_prompt(summary_content, new_model_messages)
210 else:
211 # Full mode: more capable model
212 selected_model = model or getattr(config, "full_model", None)
213 system_prompt = SUMMARIZATION_PROMPT
215 # Convert all messages
216 model_messages = _convert_to_model_format(messages)
217 model_messages.append(
218 {
219 "role": "user",
220 "content": "Provide a detailed summary for continuing our conversation.",
221 }
222 )
224 response = llm_client.invoke(
225 messages=model_messages,
226 system=[system_prompt],
227 model=selected_model,
228 )
230 usage_data = response.get("usage", {})
231 cache_data = usage_data.get("cache", {})
233 usage = Usage(
234 input=usage_data.get("input", 0),
235 output=usage_data.get("output", 0),
236 reasoning=usage_data.get("reasoning", 0),
237 cache=CacheUsage(
238 read=cache_data.get("read", 0),
239 write=cache_data.get("write", 0),
240 ),
241 )
243 cost = response.get("cost", 0.0)
245 from harnessutils.models.parts import TextPart
247 summary_message = Message(
248 id=message_id,
249 role="assistant",
250 parent_id=parent_message_id,
251 summary=True,
252 agent="summarization",
253 model={"model": response.get("model", model or "unknown")},
254 tokens=usage,
255 cost=cost,
256 )
258 summary_message.add_part(TextPart(text=response.get("content", "")))
260 return SummarizationResult(
261 summary_message=summary_message,
262 tokens_used=usage,
263 cost=cost,
264 )
267def _convert_to_model_format(messages: list[Message]) -> list[dict[str, Any]]:
268 """Convert internal messages to model format for summarization.
270 Args:
271 messages: Internal message objects
273 Returns:
274 List of messages in model format
275 """
276 model_messages: list[dict[str, Any]] = []
278 for msg in messages:
279 if len(msg.parts) == 0:
280 continue
282 if msg.role == "user":
283 content_parts = []
284 for part in msg.parts:
285 if part.type == "text" and not getattr(part, "ignored", False):
286 content_parts.append(getattr(part, "text", ""))
288 if content_parts:
289 model_messages.append(
290 {
291 "role": "user",
292 "content": "\n".join(content_parts),
293 }
294 )
296 elif msg.role == "assistant":
297 if msg.error and not msg.has_partial_output():
298 continue
300 content_parts = []
301 for part in msg.parts:
302 if part.type == "text":
303 content_parts.append(getattr(part, "text", ""))
304 elif part.type == "reasoning":
305 content_parts.append(f"[Thinking: {getattr(part, 'text', '')}]")
307 if content_parts:
308 model_messages.append(
309 {
310 "role": "assistant",
311 "content": "\n".join(content_parts),
312 }
313 )
315 return model_messages