The Agent Loop in Code
A complete tool-calling agent in ~15 lines of Python.
# The core agent loop -- this is what makes an "agent"
from anthropic import Anthropic
client = Anthropic()
tools = [{
"name": "get_weather",
"description": "Get current weather for a city",
"input_schema": {
"type": "object",
"properties": {"city": {"type": "string"}},
"required": ["city"]
}
}]
messages = [{"role": "user", "content": "Weather in Bengaluru?"}]
while True:
resp = client.messages.create(
model="claude-sonnet-4-5",
tools=tools, messages=messages, max_tokens=1024
)
if resp.stop_reason == "end_turn": break
# Model wants to call a tool
tool_use = next(b for b in resp.content if b.type == "tool_use")
result = run_tool(tool_use.name, tool_use.input)
# Append assistant message + tool result, continue loop
messages.append({"role": "assistant", "content": resp.content})
messages.append({"role": "user", "content": [{
"type": "tool_result",
"tool_use_id": tool_use.id,
"content": str(result)
}]})
while True loop IS the agent. The LLM is stateless — it's your loop that gives it memory by passing the full conversation history each time. This is why agents have context window limits.Production-ready agent loop with error handling
import time
import logging
from anthropic import Anthropic, APIError, RateLimitError
logger = logging.getLogger(__name__)
class ProductionAgentLoop:
def __init__(self, tools, tool_router, max_steps=15, max_retries=3):
self.client = Anthropic()
self.tools = tools
self.tool_router = tool_router
self.max_steps = max_steps
self.max_retries = max_retries
def run(self, user_message: str, system_prompt: str = "") -> str:
messages = [{"role": "user", "content": user_message}]
total_tokens = 0
total_cost = 0.0
for step in range(self.max_steps):
# Retry loop for transient API errors
resp = self._call_with_retry(messages, system_prompt)
# Track token usage
total_tokens += resp.usage.input_tokens + resp.usage.output_tokens
logger.info(f"Step {step}: {resp.usage.input_tokens}in + {resp.usage.output_tokens}out tokens")
# Check for final answer
if resp.stop_reason == "end_turn":
final_text = next(
(b.text for b in resp.content if hasattr(b, "text")),
"No response generated"
)
logger.info(f"Completed in {step+1} steps, {total_tokens} total tokens")
return final_text
# Process ALL tool calls in this response (model can request multiple)
messages.append({"role": "assistant", "content": resp.content})
tool_results = []
for block in resp.content:
if block.type == "tool_use":
result = self._execute_tool(block)
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": str(result)
})
messages.append({"role": "user", "content": tool_results})
return "Agent reached maximum steps without completing"
def _call_with_retry(self, messages, system_prompt):
for attempt in range(self.max_retries):
try:
return self.client.messages.create(
model="claude-sonnet-4-5",
system=system_prompt,
tools=self.tools,
messages=messages,
max_tokens=4096
)
except RateLimitError:
wait = 2 ** attempt
logger.warning(f"Rate limited, retrying in {wait}s")
time.sleep(wait)
except APIError as e:
if e.status_code >= 500:
time.sleep(2 ** attempt)
else:
raise
raise RuntimeError("Max retries exceeded")
def _execute_tool(self, tool_block):
try:
return self.tool_router.dispatch(tool_block.name, tool_block.input)
except Exception as e:
logger.error(f"Tool {tool_block.name} failed: {e}")
return {"error": str(e)}
Context window management strategies
As agents loop, the message history grows. Eventually it hits the context window limit. Here are the strategies:
| STRATEGY | HOW IT WORKS | TRADE-OFF | WHEN TO USE |
|---|---|---|---|
| Sliding window | Keep first N + last M messages, drop middle | Loses intermediate reasoning steps | Simple agents with short tasks |
| Summarization | Periodically summarize old messages into a compact summary | LLM call overhead, information loss | Long-running agents (10+ steps) |
| Tool result truncation | Limit tool results to first K characters | May lose important data | Tools that return large payloads |
| Smart pruning | Keep system prompt + all tool calls + results, drop intermediate thoughts | Loses reasoning chain | When tool results are most important |
| External memory | Store important facts in vector store, retrieve when needed | Retrieval quality, latency | Long-horizon agents across sessions |
Typical token usage per agent step:
• System prompt: ~500 tokens
• Tool schemas: ~200 tokens x 10 tools = 2,000 tokens
• User message: ~100 tokens
• Assistant reply: ~300 tokens
• Tool result: ~500 tokens (varies wildly)
• Per step overhead: ~800 tokens (assistant + tool result)
• Available for history: 200K - 2,500 (fixed) = ~197,500 tokens
• Max steps before overflow: ~197,500 / 800 ~ 245 steps
But: if tool results are 5KB each -> ~1,250 tokens each
• Max steps: ~197,500 / 1,550 ~ 127 steps
Rule of thumb: budget 1-2K tokens per step, plan for 50-100 steps max.