Mini Agent 的无限上下文实现

Posted on 11月 15, 2025

MiniMax 的 Agent 是我用过最厉害的通用 AI agent 了，可以说是应上 manus 那句话 真干活 。体感执行我的case时要比manus强很多真帮我干活了，最近他们开源了 Mini Agent 我看了一下对他们的无限上下文实现比较好奇便有了这篇笔记。

Mini Agent

✅ 智能上下文管理：自动对会话历史进行摘要，可处理长达可配置 Token 上限的上下文，从而支持无限长的任务。

摘要时机

Agent Loop 循环中

在每次 Agent Loop 循环中 Mini Agent 做的第一件事就是检查消息长度进行总结通过 _summarize_messages()

async def run(self) -> str:
        """执行代理循环，直到任务完成或达到最大步数。"""

        step = 0

        while step < self.max_steps:
            # 检查并总结消息历史，以防止上下文溢出
            await self._summarize_messages()

            # 获取用于LLM调用的工具列表
            tool_list = list(self.tools.values())

           try:
                response = await self.llm.generate(messages=self.messages, tools=tool_list)
            except Exception as e:
                # 检查是否为重试耗尽错误
                from .retry import RetryExhaustedError

                if isinstance(e, RetryExhaustedError):
                    error_msg = f"LLM call failed after {e.attempts} retries\nLast error: {str(e.last_exception)}"
                    print(f"\n{Colors.BRIGHT_RED}❌ Retry failed:{Colors.RESET} {error_msg}")
                else:
                    error_msg = f"LLM call failed: {str(e)}"
                    print(f"\n{Colors.BRIGHT_RED}❌ Error:{Colors.RESET} {error_msg}")
                return error_msg

            # 添加助手消息
            assistant_msg = Message(
                role="assistant",
                content=response.content,
                thinking=response.thinking,
                tool_calls=response.tool_calls,
            )
            self.messages.append(assistant_msg)

            # 如果存在，打印思维
            if response.thinking:
                print(f"\n{Colors.BOLD}{Colors.MAGENTA}🧠 Thinking:{Colors.RESET}")
                print(f"{Colors.DIM}{response.thinking}{Colors.RESET}")

            # 打印AI响应
            if response.content:
                print(f"\n{Colors.BOLD}{Colors.BRIGHT_BLUE}🤖 Assistant:{Colors.RESET}")
                print(f"{response.content}")

            # 检查任务是否完成（没有工具调用时）完成任务返回
            if not response.tool_calls:
                return response.content

            # 执行工具调用
            for tool_call in response.tool_calls:
                tool_call_id = tool_call.id
                function_name = tool_call.function.name
                arguments = tool_call.function.arguments

               # 将每个参数值截断以避免输出过长
                truncated_args = {}
                for key, value in arguments.items():
                    value_str = str(value)
                    if len(value_str) > 200:
                        truncated_args[key] = value_str[:200] + "..."
                    else:
                        truncated_args[key] = value
                args_json = json.dumps(truncated_args, indent=2, ensure_ascii=False)
                for line in args_json.split("\n"):
                    print(f"   {Colors.DIM}{line}{Colors.RESET}")

                # 执行工具
                if function_name not in self.tools:
                    result = ToolResult(
                        success=False,
                        content="",
                        error=f"Unknown tool: {function_name}",
                    )
                else:
                    try:
                        tool = self.tools[function_name]
                        result = await tool.execute(**arguments)
                    except Exception as e:
                        # 在工具执行期间捕获所有异常，转换为失败的ToolResult
                        import traceback

                        error_detail = f"{type(e).__name__}: {str(e)}"
                        error_trace = traceback.format_exc()
                        result = ToolResult(
                            success=False,
                            content="",
                            error=f"Tool execution failed: {error_detail}\n\nTraceback:\n{error_trace}",
                        )

                # 打印结果
                if result.success:
                    result_text = result.content
                    if len(result_text) > 300:
                        result_text = result_text[:300] + f"{Colors.DIM}...{Colors.RESET}"
                    print(f"{Colors.BRIGHT_GREEN}✓ Result:{Colors.RESET} {result_text}")
                else:
                    print(f"{Colors.BRIGHT_RED}✗ Error:{Colors.RESET} {Colors.RED}{result.error}{Colors.RESET}")

                # 添加工具结果消息
                tool_msg = Message(
                    role="tool",
                    content=result.content if result.success else f"Error: {result.error}",
                    tool_call_id=tool_call_id,
                    name=function_name,
                )
                self.messages.append(tool_msg)

            step += 1

        # 达到最大步数
        error_msg = f"Task couldn't be completed after {self.max_steps} steps."
        print(f"\n{Colors.BRIGHT_YELLOW}⚠️  {error_msg}{Colors.RESET}")
        return error_msg

按需摘要检查

当统计的上下文 Token 长度大于 self.token_limit 进行摘要，token_limit 默认为 8000

默认上下文 token 统计

默认使用 tiktoken + cl100k_base 编码器精确计算 token 数量，cl100k_base 编码器兼容大多数现代 LLM Model 包括 GPT-4/Claude/M2 当 tiktoken 初始化失败，则使用简单估计（ps: 这个应该不会失败吧这段代码不会是 ai 写的吧还是为之后支持传递编码器提供健壮性支持?)


async def _summarize_messages(self):
    """消息历史摘要：当令牌数量超过限制时，对用户消息之间的对话进行总结
    策略（代理模式）：
        - 保留所有用户消息（这些是用户意图）
        - 摘要每个用户-用户对之间的内容（代理执行过程）
        - 如果最后一轮仍在执行（有代理/工具消息但没有下一个用户），也进行摘要
        - 结构：系统 -> 用户1 -> 摘要1 -> 用户2 -> 摘要2 -> 用户3 -> 摘要3（如果正在执行）
    """
    estimated_tokens = self._estimate_tokens()

    # 如果不超过，无需摘要 token_limit: int = 80000
    if estimated_tokens <= self.token_limit:
        return

# 估算Token数量
def _estimate_tokens(self) -> int:
        """使用 tiktoken 精确计算消息历史中的 token 数量
   使用 cl100k_base 编码器（兼容 GPT-4/Claude/M2）
        """
        try:
            # 使用 cl100k_base 编码器（GPT-4 和大多数现代模型所使用）
            encoding = tiktoken.get_encoding("cl100k_base")
        except Exception:
            # 备用方案：如果 tiktoken 初始化失败，则使用简单估计
            return self._estimate_tokens_fallback()

        total_tokens = 0

        for msg in self.messages:
            # 统计文本内容
            if isinstance(msg.content, str):
                total_tokens += len(encoding.encode(msg.content))
            elif isinstance(msg.content, list):
                for block in msg.content:
                    if isinstance(block, dict):
                        # 将字典转换为字符串进行计算
                        total_tokens += len(encoding.encode(str(block)))

            # 统计 thinking
            if msg.thinking:
                total_tokens += len(encoding.encode(msg.thinking))

            #统计 tool_calls
            if msg.tool_calls:
                total_tokens += len(encoding.encode(str(msg.tool_calls)))

            # 每条消息的元数据开销（大约 4 个 token）
            total_tokens += 4

        return total_tokens

备用估计

备用估计比较简单了直接将 2.5 字符作为一个 token 上下文字符串长度作为 Token 计数最后/2.5

def _estimate_tokens_fallback(self) -> int:
    """Fallback token estimation method (when tiktoken is unavailable)"""
    total_chars = 0
    for msg in self.messages:
        if isinstance(msg.content, str):
            total_chars += len(msg.content)
        elif isinstance(msg.content, list):
            for block in msg.content:
                if isinstance(block, dict):
                    total_chars += len(str(block))

        if msg.thinking:
            total_chars += len(msg.thinking)

        if msg.tool_calls:
            total_chars += len(str(msg.tool_calls))

    # 粗略估计：平均 2.5 个字符 = 1 个 token
    return int(total_chars / 2.5)

摘要算法

摘要内容

跳过第一条消息假设他是系统提示即使是 msg.role == “user”，获取除第一条消息之外 user 消息的索引。然后摘要两个 user 对话之间的 ai 和 agent 消息，作为用户消息插入到两个 user 对话之间作为 user message 插入到新的 agent 消息中如果是最后一个用户消息则摘要这个用户消息到最后的所有消息，这里使用用户消息猜测是避免被重复摘要？具体示意图如下

原始消息序列

+-----------------------------------+
| 0: [SYSTEM] Setup Instruction     |
+-----------------------------------+
| 1: [USER]   Task A description    |
+-----------------------------------+
| 2: [AI]     Thinking for Task A   |
| 3: [AI]     Calling Tool X        |
| 4: [TOOL]   Result from Tool X    |
| 5: [AI]     Response for Task A   |
+-----------------------------------+
| 6: [USER]   Task B description    |
+-----------------------------------+
| 7: [AI]     Thinking for Task B   |
| 8: [AI]     Response for Task B   |
+-----------------------------------+

摘要后消息序列

+-----------------------------------+
| 0: [SYSTEM] Setup Instruction     |
+-----------------------------------+
| 1: [USER]   Task A description    |  <- 原始 User 消息
+-----------------------------------+
| 2: [USER]   [Assistant Execution  |
|             Summary]              |
|             Summary of:           |
|             - AI Thinking for A   |
|             - Tool X call & result|
|             - AI Response for A   |
+-----------------------------------+
| 3: [USER]   Task B description    |  <- 原始 User 消息
+-----------------------------------+
| 4: [USER]   [Assistant Execution  |
|             Summary]              |
|             Summary of:           |
|             - AI Thinking for B   |
|             - AI Response for B   |
+-----------------------------------+

# 查找所有用户消息索引（跳过系统提示）
user_indices = [i for i, msg in enumerate(self.messages) if msg.role == "user" and i > 0]
# 构建新的消息列表
new_messages = [self.messages[0]]  # 保持系统提示
summary_count = 0

# 遍历每条用户消息，并在其后总结执行过程
for i, user_idx in enumerate(user_indices):
    # 添加当前用户消息
    new_messages.append(self.messages[user_idx])

     # 确定摘要消息的范围
  # 如果是最后一个用户，则跳到消息列表的末尾；否则跳到下一个用户之前
    if i < len(user_indices) - 1:
        next_user_idx = user_indices[i + 1]
    else:
        next_user_idx = len(self.messages)

    # 提取本轮执行消息
    execution_messages = self.messages[user_idx + 1 : next_user_idx]

    # 如果这一轮有执行消息，请总结它们
    if execution_messages:
        summary_text = await self._create_summary(execution_messages, i + 1)
        if summary_text:
            summary_message = Message(
                role="user",
                content=f"[Assistant Execution Summary]\n\n{summary_text}",
            )
            new_messages.append(summary_message)
            summary_count += 1

# 替换消息列表
self.messages = new_messages

new_tokens = self._estimate_tokens()

构建摘要

通过将 llm 消息与 tools 消息拼装起来通过 LLM 进行摘要如果摘要失败则回退到拼装消息作为摘要返回

+------------------------------------------------------+
| [SYSTEM]                                             |
| "You are an assistant skilled at summarizing..."     |
+------------------------------------------------------+
| [USER]                                               |
| "Please provide a concise summary of...              |
|                                                      |
|  {summary_content}                                   |
|                                                      |
|  Requirements:                                       |
|  1. Focus on...                                      |
|  2. Keep key results...                              |
|  3. Be concise...                                    |
|  ..."                                                |
+------------------------------------------------------+

async def _create_summary(self, messages: list[Message], round_num: int) -> str:
    """为一次执行轮次创建摘要
  Args:
   messages: 要总结的消息列表
   round_num: 轮次编号
  Returns:
   摘要文本
"""
    if not messages:
        return ""

    # 构建摘要内容
    summary_content = f"Round {round_num} execution process:\n\n"
    for msg in messages:
        if msg.role == "assistant":
            content_text = msg.content if isinstance(msg.content, str) else str(msg.content)
            summary_content += f"Assistant: {content_text}\n"
            if msg.tool_calls:
                tool_names = [tc.function.name for tc in msg.tool_calls]
                summary_content += f"  → Called tools: {', '.join(tool_names)}\n"
        elif msg.role == "tool":
            result_preview = msg.content if isinstance(msg.content, str) else str(msg.content)
            summary_content += f"  ← Tool returned: {result_preview}...\n"

    # Call LLM to generate concise summary
    try:
        summary_prompt = f"""Please provide a concise summary of the following Agent execution process:

{summary_content}

Requirements:
1. Focus on what tasks were completed and which tools were called
2. Keep key execution results and important findings
3. Be concise and clear, within 1000 words
4. Use English
5. Do not include "user" related content, only summarize the Agent's execution process"""

        summary_msg = Message(role="user", content=summary_prompt)
        response = await self.llm.generate(
            messages=[
                Message(
                    role="system",
                    content="You are an assistant skilled at summarizing Agent execution processes.",
                ),
                summary_msg,
            ]
        )

        summary_text = response.content
        print(f"{Colors.BRIGHT_GREEN}✓ Summary for round {round_num} generated successfully{Colors.RESET}")
        return summary_text

    except Exception as e:
        print(f"{Colors.BRIGHT_RED}✗ Summary generation failed for round {round_num}: {e}{Colors.RESET}")
        # Use simple text summary on failure
        return summary_content

附录

Mini Agent

MiniMax AI Agnet

版权信息

本文原载于 not only security，复制请保留原文出处。