Coverage for src / harness_utils / compaction / summarization.py: 26%
53 statements
« prev ^ index » next coverage.py v7.13.2, created at 2026-01-31 13:47 -0600
« prev ^ index » next coverage.py v7.13.2, created at 2026-01-31 13:47 -0600
1"""Tier 3: LLM-powered conversation summarization.
3Uses LLM to semantically compress conversation when approaching limit.
4Cost: Expensive (~$0.10-0.50), Latency: ~3-5s.
5"""
7from dataclasses import dataclass
8from typing import Any
10from harness_utils.models.message import Message
11from harness_utils.models.usage import CacheUsage, Usage
12from harness_utils.types import LLMClient
15SUMMARIZATION_PROMPT = """You are a helpful AI assistant tasked with summarizing conversations.
17When asked to summarize, provide a detailed but concise summary of the conversation.
18Focus on information that would be helpful for continuing the conversation, including:
19- What was done
20- What is currently being worked on
21- Which files are being modified
22- What needs to be done next
23- Key user requests, constraints, or preferences that should persist
24- Important technical decisions and why they were made
26Your summary should be comprehensive enough to provide context but concise enough
27to be quickly understood."""
30@dataclass
31class SummarizationResult:
32 """Result of summarization operation."""
34 summary_message: Message
35 tokens_used: Usage
36 cost: float
39def is_overflow(usage: Usage, context_limit: int, output_limit: int) -> bool:
40 """Check if conversation has overflowed context window.
42 Args:
43 usage: Token usage from last turn
44 context_limit: Maximum context tokens for model
45 output_limit: Maximum output tokens for model
47 Returns:
48 True if overflow detected
49 """
50 total_input = usage.input + usage.cache.read
51 total_output = usage.output + usage.reasoning
53 total = total_input + total_output
54 usable_input = context_limit - output_limit
56 return total > usable_input
59def summarize_conversation(
60 messages: list[Message],
61 llm_client: LLMClient,
62 parent_message_id: str,
63 message_id: str,
64 model: str | None = None,
65 auto_mode: bool = False,
66) -> SummarizationResult:
67 """Summarize conversation using LLM.
69 Args:
70 messages: Conversation messages to summarize
71 llm_client: LLM client implementation (callback from app)
72 parent_message_id: ID of message that triggered summarization
73 message_id: ID for the summary message
74 model: Optional model to use (cheaper/faster recommended)
75 auto_mode: Whether this was auto-triggered
77 Returns:
78 SummarizationResult with summary message and metrics
79 """
80 model_messages = _convert_to_model_format(messages)
82 model_messages.append({
83 "role": "user",
84 "content": "Provide a detailed summary for continuing our conversation."
85 })
87 response = llm_client.invoke(
88 messages=model_messages,
89 system=[SUMMARIZATION_PROMPT],
90 model=model,
91 )
93 usage_data = response.get("usage", {})
94 cache_data = usage_data.get("cache", {})
96 usage = Usage(
97 input=usage_data.get("input", 0),
98 output=usage_data.get("output", 0),
99 reasoning=usage_data.get("reasoning", 0),
100 cache=CacheUsage(
101 read=cache_data.get("read", 0),
102 write=cache_data.get("write", 0),
103 ),
104 )
106 cost = response.get("cost", 0.0)
108 from harness_utils.models.parts import TextPart
110 summary_message = Message(
111 id=message_id,
112 role="assistant",
113 parent_id=parent_message_id,
114 summary=True,
115 agent="summarization",
116 model={"model": response.get("model", model or "unknown")},
117 tokens=usage,
118 cost=cost,
119 )
121 summary_message.add_part(TextPart(text=response.get("content", "")))
123 return SummarizationResult(
124 summary_message=summary_message,
125 tokens_used=usage,
126 cost=cost,
127 )
130def _convert_to_model_format(messages: list[Message]) -> list[dict[str, Any]]:
131 """Convert internal messages to model format for summarization.
133 Args:
134 messages: Internal message objects
136 Returns:
137 List of messages in model format
138 """
139 model_messages: list[dict[str, Any]] = []
141 for msg in messages:
142 if len(msg.parts) == 0:
143 continue
145 if msg.role == "user":
146 content_parts = []
147 for part in msg.parts:
148 if part.type == "text" and not getattr(part, "ignored", False):
149 content_parts.append(part.text)
151 if content_parts:
152 model_messages.append({
153 "role": "user",
154 "content": "\n".join(content_parts),
155 })
157 elif msg.role == "assistant":
158 if msg.error and not msg.has_partial_output():
159 continue
161 content_parts = []
162 for part in msg.parts:
163 if part.type == "text":
164 content_parts.append(part.text)
165 elif part.type == "reasoning":
166 content_parts.append(f"[Thinking: {part.text}]")
168 if content_parts:
169 model_messages.append({
170 "role": "assistant",
171 "content": "\n".join(content_parts),
172 })
174 return model_messages