Coverage for src / harness_utils / compaction / summarization.py: 26%

53 statements  

« prev     ^ index     » next       coverage.py v7.13.2, created at 2026-01-31 13:47 -0600

1"""Tier 3: LLM-powered conversation summarization. 

2 

3Uses LLM to semantically compress conversation when approaching limit. 

4Cost: Expensive (~$0.10-0.50), Latency: ~3-5s. 

5""" 

6 

7from dataclasses import dataclass 

8from typing import Any 

9 

10from harness_utils.models.message import Message 

11from harness_utils.models.usage import CacheUsage, Usage 

12from harness_utils.types import LLMClient 

13 

14 

15SUMMARIZATION_PROMPT = """You are a helpful AI assistant tasked with summarizing conversations. 

16 

17When asked to summarize, provide a detailed but concise summary of the conversation. 

18Focus on information that would be helpful for continuing the conversation, including: 

19- What was done 

20- What is currently being worked on 

21- Which files are being modified 

22- What needs to be done next 

23- Key user requests, constraints, or preferences that should persist 

24- Important technical decisions and why they were made 

25 

26Your summary should be comprehensive enough to provide context but concise enough 

27to be quickly understood.""" 

28 

29 

30@dataclass 

31class SummarizationResult: 

32 """Result of summarization operation.""" 

33 

34 summary_message: Message 

35 tokens_used: Usage 

36 cost: float 

37 

38 

39def is_overflow(usage: Usage, context_limit: int, output_limit: int) -> bool: 

40 """Check if conversation has overflowed context window. 

41 

42 Args: 

43 usage: Token usage from last turn 

44 context_limit: Maximum context tokens for model 

45 output_limit: Maximum output tokens for model 

46 

47 Returns: 

48 True if overflow detected 

49 """ 

50 total_input = usage.input + usage.cache.read 

51 total_output = usage.output + usage.reasoning 

52 

53 total = total_input + total_output 

54 usable_input = context_limit - output_limit 

55 

56 return total > usable_input 

57 

58 

59def summarize_conversation( 

60 messages: list[Message], 

61 llm_client: LLMClient, 

62 parent_message_id: str, 

63 message_id: str, 

64 model: str | None = None, 

65 auto_mode: bool = False, 

66) -> SummarizationResult: 

67 """Summarize conversation using LLM. 

68 

69 Args: 

70 messages: Conversation messages to summarize 

71 llm_client: LLM client implementation (callback from app) 

72 parent_message_id: ID of message that triggered summarization 

73 message_id: ID for the summary message 

74 model: Optional model to use (cheaper/faster recommended) 

75 auto_mode: Whether this was auto-triggered 

76 

77 Returns: 

78 SummarizationResult with summary message and metrics 

79 """ 

80 model_messages = _convert_to_model_format(messages) 

81 

82 model_messages.append({ 

83 "role": "user", 

84 "content": "Provide a detailed summary for continuing our conversation." 

85 }) 

86 

87 response = llm_client.invoke( 

88 messages=model_messages, 

89 system=[SUMMARIZATION_PROMPT], 

90 model=model, 

91 ) 

92 

93 usage_data = response.get("usage", {}) 

94 cache_data = usage_data.get("cache", {}) 

95 

96 usage = Usage( 

97 input=usage_data.get("input", 0), 

98 output=usage_data.get("output", 0), 

99 reasoning=usage_data.get("reasoning", 0), 

100 cache=CacheUsage( 

101 read=cache_data.get("read", 0), 

102 write=cache_data.get("write", 0), 

103 ), 

104 ) 

105 

106 cost = response.get("cost", 0.0) 

107 

108 from harness_utils.models.parts import TextPart 

109 

110 summary_message = Message( 

111 id=message_id, 

112 role="assistant", 

113 parent_id=parent_message_id, 

114 summary=True, 

115 agent="summarization", 

116 model={"model": response.get("model", model or "unknown")}, 

117 tokens=usage, 

118 cost=cost, 

119 ) 

120 

121 summary_message.add_part(TextPart(text=response.get("content", ""))) 

122 

123 return SummarizationResult( 

124 summary_message=summary_message, 

125 tokens_used=usage, 

126 cost=cost, 

127 ) 

128 

129 

130def _convert_to_model_format(messages: list[Message]) -> list[dict[str, Any]]: 

131 """Convert internal messages to model format for summarization. 

132 

133 Args: 

134 messages: Internal message objects 

135 

136 Returns: 

137 List of messages in model format 

138 """ 

139 model_messages: list[dict[str, Any]] = [] 

140 

141 for msg in messages: 

142 if len(msg.parts) == 0: 

143 continue 

144 

145 if msg.role == "user": 

146 content_parts = [] 

147 for part in msg.parts: 

148 if part.type == "text" and not getattr(part, "ignored", False): 

149 content_parts.append(part.text) 

150 

151 if content_parts: 

152 model_messages.append({ 

153 "role": "user", 

154 "content": "\n".join(content_parts), 

155 }) 

156 

157 elif msg.role == "assistant": 

158 if msg.error and not msg.has_partial_output(): 

159 continue 

160 

161 content_parts = [] 

162 for part in msg.parts: 

163 if part.type == "text": 

164 content_parts.append(part.text) 

165 elif part.type == "reasoning": 

166 content_parts.append(f"[Thinking: {part.text}]") 

167 

168 if content_parts: 

169 model_messages.append({ 

170 "role": "assistant", 

171 "content": "\n".join(content_parts), 

172 }) 

173 

174 return model_messages