From 499e5daa43730c79cac30d218a133cb544ea4dc3 Mon Sep 17 00:00:00 2001 From: parteeksingh24 Date: Fri, 5 Sep 2025 11:50:23 -0700 Subject: [PATCH 01/13] Add initial training docs --- .../developers/01-introduction-to-agents.mdx | 554 ++++++++ .../developers/02-anatomy-of-an-agent.mdx | 981 ++++++++++++++ .../Training/developers/03-agent-memory.mdx | 1186 +++++++++++++++++ content/Training/developers/index.mdx | 55 + content/Training/index.mdx | 21 + content/meta.json | 1 + 6 files changed, 2798 insertions(+) create mode 100644 content/Training/developers/01-introduction-to-agents.mdx create mode 100644 content/Training/developers/02-anatomy-of-an-agent.mdx create mode 100644 content/Training/developers/03-agent-memory.mdx create mode 100644 content/Training/developers/index.mdx create mode 100644 content/Training/index.mdx diff --git a/content/Training/developers/01-introduction-to-agents.mdx b/content/Training/developers/01-introduction-to-agents.mdx new file mode 100644 index 00000000..d017417c --- /dev/null +++ b/content/Training/developers/01-introduction-to-agents.mdx @@ -0,0 +1,554 @@ +--- +title: Introduction to Agents +description: Understanding AI agents and the $47B opportunity +--- + +# Module 1: Introduction to Agents + +Welcome to the age of AI agents - autonomous systems that are fundamentally transforming how we build and think about software. + +## The $47B Agent Opportunity + +The AI agents market is exploding - projected to grow from [$5.1B in 2024 to $47.1B by 2030](https://www.marketsandmarkets.com/Market-Reports/ai-agents-market-15761548.html) at a staggering 44.8% CAGR. According to [IBM's latest research](https://www.ibm.com/think/insights/ai-agents-2025-expectations-vs-reality), many developers are already exploring or building AI agents. + +But here's the problem: most developers are trying to build autonomous systems on infrastructure designed for websites, not agents. + +As [Goldman Sachs' infrastructure analysis](https://www.goldmansachs.com/insights/articles/a-generational-infrastructure-buildout-might-hinge-on-ai-agents) points out: + +> "We're trying to run autonomous systems on infrastructure built for click-and-response websites. It's like trying to run a Tesla on roads designed for horses." + +Traditional cloud platforms (AWS Lambda, Google Cloud Functions, Azure Functions) were optimized for: +- **5ms response times** (agents need minutes or hours to think) +- **Stateless execution** (agents need persistent memory) +- **Edge distribution** (agents need GPU proximity) +- **Request-response patterns** (agents need continuous reasoning loops) + +## What Exactly Is an AI Agent? + + +For a comprehensive overview of agents and how they differ from traditional software, see our [What is an Agent?](/Guides/what-is-an-agent) guide. + + +An AI agent is not just another chatbot or API wrapper around an LLM. It's a fundamentally different type of software that combines: + +### The Agent Formula +``` +Agent = LLM + Memory + Tools + Reasoning Loop +``` + +Let's break this down: + +1. **LLM (Large Language Model)**: The "brain" that understands intent and generates responses +2. **Memory**: Both short-term (conversation context) and long-term (persistent knowledge) +3. **Tools**: Capabilities to interact with external systems, APIs, and data sources +4. **Reasoning Loop**: The ability to plan, execute, observe results, and adapt + +### Agents vs. Everything Else + +| Traditional API | Chatbot | AI Agent | +|----------------|---------|----------| +| Waits for commands | Responds to messages | Acts autonomously | +| Returns exactly what you ask | Follows scripted patterns | Figures out how to achieve goals | +| Stateless between calls | Maintains conversation context | Remembers everything, learns over time | +| Deterministic output | Limited variation | Adapts based on context | +| Single request-response | Turn-based conversation | Continuous reasoning and action | + +Think of it this way: +- **APIs** are like vending machines - push button, get result +- **Chatbots** are like scripted receptionists - they can talk, but only follow a script +- **Agents** are like smart assistants - they understand goals and figure out how to achieve them + +## The Paradigm Shift: From Deterministic to Non-Deterministic + + +For deeper insights on this shift, read our [Agent Engineering](/Guides/agent-engineering) guide that covers thinking like an agent builder. + + +Traditional software engineering is built on determinism - given the same input, you always get the same output. We write explicit logic for every scenario: + +```python +# Traditional deterministic approach +def process_customer_request(request_type, data): + if request_type == "refund": + if data["amount"] < 100: + return process_refund(data) + else: + return escalate_to_manager(data) + elif request_type == "complaint": + return create_ticket(data) + # ... hundreds more conditions +``` + +Agent engineering embraces non-determinism - the agent interprets intent and figures out the best approach: + +```python +# Agent-based approach +async def handle_customer_request(request, context): + # Agent interprets the request + intent = await analyze_intent(request) + + # Agent decides on approach + plan = await create_action_plan(intent, context.customer_history) + + # Agent executes with available tools + result = await execute_plan(plan, context.available_tools) + + # Agent learns from outcome + await update_knowledge(result, context.memory) + + return result +``` + +This shift requires a new mindset: +- **Design for intent**, not implementation +- **Embrace variability** as a feature, not a bug +- **Think in capabilities**, not functions +- **Trust but verify** - use guardrails and observability + +## Why Agents Need Agent-Native Infrastructure + +[Microsoft's analysis](https://blogs.microsoft.com/blog/2025/05/19/microsoft-build-2025-the-age-of-ai-agents-and-building-the-open-agentic-web/) confirms that "most organizations aren't agent-ready" because their infrastructure wasn't built for autonomous systems. + +### The Infrastructure Mismatch + +Traditional cloud platforms face fundamental limitations when running agents: + +| Traditional Cloud | What Agents Need | The Gap | +|-------------------|------------------|---------| +| 15-second timeouts | Long-running processes | Agents timeout mid-thought | +| Stateless by default | Persistent memory | Agents forget everything | +| Distributed to edge | GPU proximity | High latency to AI models | +| Pay per request | Continuous operation | Costs explode unexpectedly | +| Human-centric monitoring | Agent observability | Can't debug agent decisions | + +Major cloud providers are scrambling to adapt: +- **AWS** launched Bedrock AgentCore (still in preview) +- **Google** released their Agent Development Kit (ADK) +- **Microsoft** is retrofitting Azure with agent capabilities + +But retrofitting existing infrastructure is like turning a highway into an airport - technically possible, but not optimal. + +## Enter Agentuity: The Agent-Native Cloud + + +Learn more about the Agent-Native Cloud paradigm in our [Agent-Native Cloud](/Guides/agent-native-cloud) guide. + + +While others retrofit, Agentuity was built from day one specifically for agents. This isn't just marketing - it's a fundamental architectural difference. + +### Purpose-Built for Agents + +Agentuity provides what agents actually need: + +- **Long-running processes**: Agents can think for hours, not seconds +- **Persistent memory**: Built-in [key-value](/Guides/key-value), [vector](/Guides/vector-db), and [object storage](/Guides/object-storage) +- **Agent-to-agent communication**: Secure, encrypted [channels between agents](/Guides/agent-communication) +- **Native observability**: Track agent decisions with [built-in tracing](/Guides/agent-tracing) +- **Automatic scaling**: Based on agent workload, not request count +- **Framework agnostic**: Run LangChain, CrewAI, or custom agents side by side + +### The Agentuity Difference + +Instead of asking "How do we make Lambda work for agents?", we asked "What would infrastructure look like if agents were the only thing that mattered?" + +The result is a platform where: +- Agents are first-class citizens, not adapted workloads +- Memory and state are built-in, not bolted on +- Long-running is the default, not an exception +- Agent communication is native, not a hack + +## Your First Agent: Hello, Autonomous World + +Enough theory - let's build something. We'll create an agent that doesn't just respond to "hello" but actually thinks about how to greet you. + +### Prerequisites + + +For detailed setup instructions, see our [Getting Started Guide](/Introduction/getting-started) and [CLI Installation Guide](/CLI/installation). + + +Before we start, make sure you have: +- Python 3.9+ or Node.js 18+ +- Basic command line knowledge +- An Agentuity account (free tier is fine) + +### Quick Setup + +```bash +# Install the Agentuity CLI (see full installation guide for other OS) +brew tap agentuity/tap && brew install agentuity + +# Verify installation +agentuity --version + +# Login to your account +agentuity auth login +``` + +### Creating Your First Agent Project + +Let's create a simple agent that demonstrates the key concepts we've learned: + +```bash +# Create a new agent project +agentuity create hello-agent --template python-starter +# or for TypeScript: +# agentuity create hello-agent --template typescript-starter + +# Navigate to the project +cd hello-agent + +# Install dependencies (Python) +pip install -r requirements.txt +# or for TypeScript: +# npm install +``` + +### Understanding the Agent Structure + +Here's a simple "Hello Agent" that demonstrates the core concepts: + + { + // Get the request data + const data = await request.data.json(); + const name = data.name || 'World'; + + // Log the request (built-in observability) + context.logger.info(\`Received greeting request for \${name}\`); + + // Check if we've seen this user before (persistent memory) + const visitsKey = \`visits_\${name}\`; + const result = await context.kv.get('user-data', visitsKey); + + let greeting: string; + + if (result.exists) { + // Returning visitor + const visitCount = await result.data.json(); + visitCount.count++; + greeting = \`Welcome back, \${name}! This is visit #\${visitCount.count}\`; + + // Update the visit count + await context.kv.set('user-data', visitsKey, JSON.stringify(visitCount)); + } else { + // First-time visitor + greeting = \`Hello, \${name}! Welcome to Agentuity.\`; + + // Store first visit + await context.kv.set('user-data', visitsKey, JSON.stringify({ + count: 1, + first_seen: new Date().toISOString() + })); + } + + // Return the response + return response.json({ + message: greeting, + timestamp: new Date().toISOString(), + agent_info: "Running on Agentuity's agent-native infrastructure" + }); +}; + +export default handler;`} /> + +This simple agent demonstrates key concepts: +- **Memory**: Uses key-value storage to remember visitors +- **Logging**: Built-in observability with context.logger +- **State Management**: Tracks visit counts persistently +- **Request/Response**: Handles JSON input and output + +### Testing Your Agent with DevMode + + +DevMode is Agentuity's local development environment that provides instant feedback, complete observability, and a user-friendly web interface for testing agents. + + +Test your agent using Agentuity's DevMode: + +```bash +# Start DevMode - your agent will be available with a web interface +agentuity dev + +# DevMode will start and show: +# 🚀 DevMode ready +# 🌐 Local: http://localhost:3500 +# 🔗 Public: https://[unique-id].agentuity.dev (for external access) +``` + +Once DevMode is running: +1. Open the provided URL in your browser +2. Select your agent from the dropdown +3. Use the pre-configured prompts or write your own test scenarios +4. Click "Run" to execute tests instantly +5. View real-time logs, costs, and performance metrics in the interface + +#### Making Testing Easier with the Welcome Function + +Add a `welcome()` function to your agent to create clickable test scenarios in DevMode: + + { + return { + welcome: "Welcome to my Hello Agent! Try these examples:", + prompts: [ + { + data: JSON.stringify({ name: "Sarah" }), + contentType: "application/json" + }, + { + data: JSON.stringify({ name: "DevMode Tester" }), + contentType: "application/json" + }, + { + data: JSON.stringify({ name: "" }), // Test missing name + contentType: "application/json" + } + ] + }; +}; + +const handler: AgentHandler = async (request, response, context) => { + // Your existing agent code...`} /> + +These prompts appear as clickable buttons in the DevMode interface. Try calling the agent multiple times with the same name - it will remember you, and you can see the memory operations in the real-time logs! + +### Deploying to the Agentuity Cloud + +Now let's deploy this agent to production: + +```bash +# Deploy to Agentuity's agent-native infrastructure +agentuity deploy + +# Your agent is now running at: +# https://your-project.agentuity.com/agents/hello +``` + +That's it! Your agent is now: +- Running in long-running infrastructure (no timeout worries) +- Storing memory persistently (survives restarts) +- Fully observable (check logs and traces in the dashboard) +- Auto-scaling based on load + +## Lab: Extending Your Agent + +Now that you have a working agent, let's enhance it with more capabilities. Use DevMode to test each enhancement as you build it. + +### Challenge 1: Add Time-Based Context +Modify your agent to greet differently based on the time of day: + + { + const data = await request.data.json(); + const name = data.name || 'World'; + + const hour = new Date().getHours(); + let timeGreeting: string; + + if (hour < 12) { + timeGreeting = 'Good morning'; + } else if (hour < 17) { + timeGreeting = 'Good afternoon'; + } else { + timeGreeting = 'Good evening'; + } + + return response.json({ + message: \`\${timeGreeting}, \${name}!\`, + local_time: new Date().toISOString() + }); +};`} /> + +### Challenge 2: Track User Preferences +Store and retrieve user preferences using key-value storage: + + { + const data = await request.data.json(); + const userId = data.userId; + const action = data.action; + + if (action === 'set_preference') { + const preference = data.preference; + await context.kv.set('preferences', userId, JSON.stringify(preference)); + return response.json({ message: 'Preference saved' }); + + } else if (action === 'get_preference') { + const result = await context.kv.get('preferences', userId); + if (result.exists) { + const pref = await result.data.json(); + return response.json({ preference: pref }); + } + return response.json({ message: 'No preference found' }); + } +};`} /> + +### Challenge 3: Add Logging and Error Handling +Implement proper error handling and logging: + + { + try { + const data = await request.data.json(); + const name = data.name; + + if (!name) { + context.logger.warn('No name provided in request'); + return response.json({ error: 'Name is required' }); + } + + context.logger.info(\`Processing request for \${name}\`); + + // Your agent logic here + return response.json({ message: \`Hello, \${name}!\` }); + + } catch (error) { + context.logger.error(\`Error processing request: \${error}\`); + return response.json({ error: 'Internal server error' }); + } +};`} /> + +### Testing Your Enhancements + +After implementing each challenge: +1. Update your `welcome()` function with relevant test scenarios +2. Run `agentuity dev` to start DevMode +3. Use the DevMode interface to test your changes +4. Monitor the logs to verify your agent behaves correctly +5. Check the Sessions tab to track performance and costs + + +For more examples, check our [Templates](/Introduction/templates) and [Examples](/Examples) sections. + + +## Key Takeaways + +- **Agents are different**: They're autonomous systems, not just API wrappers around LLMs +- **The market is massive**: $47B by 2030, with many developers already building agents +- **Infrastructure matters**: Traditional cloud wasn't built for agents' unique needs +- **Non-determinism is a feature**: Agents adapt and reason, they don't just execute +- **Agentuity is agent-native**: Purpose-built infrastructure for agents, not retrofitted + +## What's Next? + +You've just built and deployed your first agent on infrastructure designed specifically for agents. In the next module, we'll dive deeper into the anatomy of an agent - understanding planning, reasoning loops, tool invocation, and how agents really work under the hood. + +But first, take a moment to experiment with your agent. Try: +- Calling it with different names and at different times +- Checking the logs in the Agentuity dashboard +- Modifying the personality and responses +- Adding more sophisticated memory patterns + +Remember: aside from just learning a new framework, you're learning a fundamentally new way to build software. Welcome to the age of agents! + +--- + +**Ready for Module 2?** [The Anatomy of an Agent](./02-anatomy-of-an-agent) \ No newline at end of file diff --git a/content/Training/developers/02-anatomy-of-an-agent.mdx b/content/Training/developers/02-anatomy-of-an-agent.mdx new file mode 100644 index 00000000..0df46db8 --- /dev/null +++ b/content/Training/developers/02-anatomy-of-an-agent.mdx @@ -0,0 +1,981 @@ +--- +title: The Anatomy of an Agent +description: Understanding how agents work - planning, reasoning, tools, and memory +--- + +# Module 2: The Anatomy of an Agent + +Now that you've built your first agent, let's dive deeper to understand what makes agents tick. We'll explore the core components that transform a simple script into an autonomous, intelligent system. + +## The Agent Lifecycle + +Every agent interaction follows a predictable lifecycle, from receiving a trigger to returning a response. Understanding this flow is crucial for building effective agents. + +```mermaid +graph TD + A[Trigger Event] --> B[Request Handler] + B --> C[Context Initialization] + C --> D[Planning Phase] + D --> E[Reasoning Loop] + E --> F[Tool Invocation] + F --> G[Memory Update] + G --> H[Response Generation] + H --> I[Return Response] + E --> E[Continue Loop] +``` + +Let's explore each phase: + +### 1. Trigger Events + +Agents spring into action when triggered. Agentuity supports multiple trigger types: + +| Trigger Type | Description | Use Case | +|-------------|-------------|----------| +| **webhook** | HTTP endpoint call | REST APIs, external integrations | +| **cron** | Scheduled execution | Batch processing, periodic tasks | +| **manual** | Console/CLI invocation | Testing, one-off tasks | +| **agent** | Called by another agent | Multi-agent workflows | +| **sms** | SMS message received | Text-based interactions | +| **email** | Email received | Email automation | +| **queue** | Message queue event | Async processing | + +Each trigger provides different metadata and context: + + { + // Identify the trigger type + const trigger = request.trigger; + + if (trigger === 'webhook') { + // Handle HTTP request + const data = await request.data.json(); + context.logger.info(\`Webhook triggered with data: \${JSON.stringify(data)}\`); + + } else if (trigger === 'cron') { + // Handle scheduled execution + context.logger.info(\`Cron job running at \${new Date().toISOString()}\`); + // No input data for cron triggers + + } else if (trigger === 'agent') { + // Handle agent-to-agent call + const callingAgent = request.metadata.get('source_agent'); + context.logger.info(\`Called by agent: \${callingAgent}\`); + } + + return response.json({ + trigger_type: trigger, + processed_at: new Date().toISOString() + }); +};`} /> + +### 2. Request and Response Handling + +Every agent receives a request and must return a response. The request contains the trigger data and metadata, while the response can be in various formats: + +Hello") + + # Binary response (for files) + # return response.binary(file_bytes) + + # Redirect to another agent + # return response.handoff({"name": "other-agent"}, data, metadata={})`} js={`const handler: AgentHandler = async (request, response, context) => { + // Access request data in different formats + + // JSON data (most common) + const jsonData = await request.data.json(); + + // Plain text + const textData = await request.data.text(); + + // Binary data + const binaryData = await request.data.binary(); + + // Base64 encoded + const base64Data = await request.data.base64(); + + // Response options + + // JSON response (most common) + return response.json({ result: 'success' }); + + // Text response + // return response.text('Hello, World!'); + + // HTML response + // return response.html('

Hello

'); + + // Binary response (for files) + // return response.binary(fileBytes); + + // Redirect to another agent + // return response.handoff({ name: 'other-agent' }, { data: jsonData, contentType: 'application/json' }); +};`} /> + +### 3. The Agent Context + +The context object is your agent's gateway to Agentuity's services. It provides access to storage, logging, tracing, and more: + + { + // Logging at different levels + context.logger.debug('Debug information'); + context.logger.info('General information'); + context.logger.warn('Warning message'); + context.logger.error('Error occurred'); + + // Access metadata + const agentId = context.agent.id; + const agentName = context.agent.name; + const projectId = context.projectId; + const runId = context.runId; + + // Key-Value storage + await context.kv.set('cache', 'key', JSON.stringify({ data: 'value' })); + const result = await context.kv.get('cache', 'key'); + + // Vector storage for semantic search + await context.vector.upsert('docs', { + key: 'doc1', + document: 'AI agents are autonomous systems', + metadata: { category: 'intro' } + }); + + // Object storage for files + await context.objectstore.put('files', 'report.pdf', pdfBytes); + + return response.json({ + agent: agentName, + run: runId + }); +};`} /> + +## Planning and Reasoning: The Agent's Brain + +What separates agents from simple scripts is their ability to plan and reason. While Agentuity provides the infrastructure, you implement the intelligence. + +### The Planning Phase + +Planning involves breaking down a complex request into manageable steps: + + { + const data = await request.data.json(); + + // Create a plan + const plan = await createPlan(data, context); + + // Execute the plan + const results = []; + for (const step of plan) { + context.logger.info(\`Executing: \${step.action}\`); + const result = await executeStep(step, context); + results.push(result); + } + + return response.json({ plan, results }); +};`} /> + +### The Reasoning Loop + +The reasoning loop is where agents adapt based on results: + + + +## Tool Invocation: Extending Agent Capabilities + +Agents become powerful when they can use tools. Tools can be APIs, databases, or even other agents: + + Dict[str, Any]: + """Search the web for information.""" + async with httpx.AsyncClient() as client: + response = await client.get( + "https://api.search.com/v1/search", + params={"q": query} + ) + return response.json() + + @staticmethod + async def calculate(expression: str) -> float: + """Perform mathematical calculations.""" + # Safe evaluation of math expressions + import ast + import operator as op + + ops = { + ast.Add: op.add, + ast.Sub: op.sub, + ast.Mult: op.mul, + ast.Div: op.truediv, + } + + def eval_expr(expr): + return eval(compile(ast.parse(expr, mode='eval'), '', 'eval')) + + return eval_expr(expression) + + @staticmethod + async def send_email(to: str, subject: str, body: str) -> bool: + """Send an email.""" + # Email sending logic + return True + +async def run(request, response, context): + data = await request.data.json() + action = data.get("action") + + tools = AgentTools() + + if action == "research": + # Use web search tool + results = await tools.web_search(data["query"]) + return response.json({"search_results": results}) + + elif action == "calculate": + # Use calculator tool + result = await tools.calculate(data["expression"]) + return response.json({"result": result}) + + elif action == "notify": + # Use email tool + sent = await tools.send_email( + data["email"], + data["subject"], + data["message"] + ) + return response.json({"sent": sent})`} js={`import axios from 'axios'; + +class AgentTools { + static async webSearch(query: string) { + const response = await axios.get('https://api.search.com/v1/search', { + params: { q: query } + }); + return response.data; + } + + static async calculate(expression: string): Promise { + // Use a safe math evaluation library + // This is a simplified example + return eval(expression); // In production, use a safe math parser + } + + static async sendEmail(to: string, subject: string, body: string): Promise { + // Email sending logic + return true; + } +} + +const handler: AgentHandler = async (request, response, context) => { + const data = await request.data.json(); + const action = data.action; + + if (action === 'research') { + // Use web search tool + const results = await AgentTools.webSearch(data.query); + return response.json({ search_results: results }); + + } else if (action === 'calculate') { + // Use calculator tool + const result = await AgentTools.calculate(data.expression); + return response.json({ result }); + + } else if (action === 'notify') { + // Use email tool + const sent = await AgentTools.sendEmail( + data.email, + data.subject, + data.message + ); + return response.json({ sent }); + } +};`} /> + +## Memory Updates: Learning and Persistence + +Memory is what allows agents to learn and improve over time. Agentuity provides three types of memory storage: + +### Short-term Memory (Key-Value) +For session data, cache, and temporary state: + + { + const sessionId = request.metadata.get('session_id'); + + // Store conversation context + await context.kv.set( + 'sessions', + sessionId, + JSON.stringify({ + messages: [], + context: {}, + timestamp: new Date().toISOString() + }), + { ttl: 3600 } // Expires in 1 hour + ); +};`} /> + +### Long-term Memory (Vector Storage) +For knowledge base and semantic search: + + { + // Store learned information + await context.vector.upsert('knowledge', { + key: 'fact_001', + document: 'Users prefer concise responses', + metadata: { + learned_from: 'user_feedback', + confidence: 0.85 + } + }); + + // Retrieve relevant knowledge + const results = await context.vector.search('knowledge', { + query: 'How should I format responses?', + limit: 5 + }); +};`} /> + +## Framework Awareness: Choosing the Right Tool + + +Agentuity is framework-agnostic. You can use any AI framework or build custom agents from scratch. + + +### Popular Agent Frameworks + +Different frameworks excel at different tasks. Here's when to use each: + +| Framework | Best For | Agentuity Integration | +|-----------|----------|----------------------| +| **LangChain** | Complex chains, RAG applications | Native SDK support | +| **CrewAI** | Multi-agent teams, role-based systems | Deploy via container | +| **AutoGen** | Research, experimentation | Direct deployment | +| **Custom** | Specific requirements, full control | Full platform features | + +### Framework Comparison Example + +Here's the same agent built with different approaches: + + { + const data = await request.data.json(); + // Direct implementation + const result = processData(data); + return response.json(result); +}; + +// LangChain Agent +import { initializeAgentExecutor } from 'langchain/agents'; +import { OpenAI } from 'langchain/llms/openai'; + +const handler: AgentHandler = async (request, response, context) => { + const llm = new OpenAI(); + const agent = await initializeAgentExecutor(tools, llm, 'zero-shot'); + const result = await agent.call({ input: await request.data.text() }); + return response.text(result.output); +}; + +// Custom Multi-Agent System +const handler: AgentHandler = async (request, response, context) => { + const researcher = new Agent('Researcher', 'Find information'); + const writer = new Agent('Writer', 'Create content'); + + const researchResult = await researcher.execute(data); + const writeResult = await writer.execute(researchResult); + return response.json(writeResult); +};`} /> + +### When to Use Which Framework + +- **Use LangChain** when you need: + - Complex reasoning chains + - RAG (Retrieval Augmented Generation) + - Extensive tool integration + - Well-documented patterns + +- **Use CrewAI** when you need: + - Multiple specialized agents + - Role-based collaboration + - Hierarchical task delegation + +- **Use Custom** when you need: + - Maximum performance + - Specific business logic + - Full control over behavior + - Minimal dependencies + +## Lab: Building a Multi-Trigger Agent + +Let's put it all together by building an agent that responds to multiple triggers. We'll test it using DevMode's web interface: + + { + const trigger = request.trigger; + context.logger.info(\`Agent triggered via: \${trigger}\`); + + if (trigger === 'webhook') { + // Handle API requests + const data = await request.data.json(); + + // Process based on action + const action = data.action; + if (action === 'store') { + // Store data in KV + await context.kv.set('data', data.key, JSON.stringify(data.value)); + return response.json({ status: 'stored', key: data.key }); + + } else if (action === 'retrieve') { + // Get from KV + const result = await context.kv.get('data', data.key); + if (result.exists) { + const value = await result.data.json(); + return response.json({ found: true, value }); + } + return response.json({ found: false }); + } + + // Handle DevMode simulation of cron for testing + if (data._simulate_cron) { + context.logger.info('Simulating cron trigger in DevMode'); + // Run the same cleanup logic + const report = { + task: 'cleanup (simulated)', + timestamp: new Date().toISOString(), + status: 'completed' + }; + return response.json(report); + } + + } else if (trigger === 'cron') { + // Scheduled cleanup task + context.logger.info('Running scheduled cleanup'); + + // Clean old sessions + await context.kv.delete('sessions', 'old_session'); + + // Generate report + const report = { + task: 'cleanup', + timestamp: new Date().toISOString(), + status: 'completed' + }; + + // Store report + await context.kv.set('reports', 'latest_cleanup', JSON.stringify(report)); + return response.json(report); + + } else if (trigger === 'agent') { + // Handle agent-to-agent communication + const data = await request.data.json(); + + // Process request from another agent + const result = \`Processed request from agent: \${JSON.stringify(data)}\`; + + return response.json({ result }); + + } else { + return response.json({ + error: \`Unknown trigger type: \${trigger}\` + }); + } +}; + +export default handler;`} /> + +### Testing Your Multi-Trigger Agent + + +DevMode makes it easy to test webhook triggers locally. For cron triggers, you can simulate them in DevMode or configure real schedules through the Agentuity Console. + + +#### Setting Up Test Scenarios + +First, enhance your agent with a `welcome()` function to create test scenarios: + + { + return { + welcome: "Multi-Trigger Agent Test Suite", + prompts: [ + // Webhook trigger tests + { + data: JSON.stringify({ action: "store", key: "test", value: "data" }), + contentType: "application/json" + }, + { + data: JSON.stringify({ action: "retrieve", key: "test" }), + contentType: "application/json" + }, + // Simulate what a cron trigger would do + { + data: JSON.stringify({ _simulate_cron: true }), + contentType: "application/json" + }, + // Simulate agent-to-agent communication + { + data: JSON.stringify({ message: "Hello from another agent", source_agent: "agent_123" }), + contentType: "application/json" + } + ] + }; +};`} /> + +#### Testing in DevMode + +1. **Start DevMode:** +```bash +agentuity dev +``` + +2. **Open the DevMode interface** and click the pre-configured prompts to test different scenarios + +3. **Monitor the execution** in the Logs and Sessions tabs + +#### Configuring Cron Triggers for Production + +For actual scheduled execution, configure cron triggers through the [Agentuity Console](https://app.agentuity.com): + +1. Navigate to your agent in the Console +2. Click on **"Add Trigger"** and select **"Cron Job"** +3. Configure the schedule using cron syntax (e.g., `0 * * * *` for hourly) +4. Set the Content Type and optional Payload +5. Enable the trigger and save + + +Learn more about configuring triggers in the [Agent Configuration Guide](/Cloud/agents). + + +## Performance Considerations + +Building efficient agents requires thinking about performance from the start: + +### Cost Optimization + +Effective cost optimization strategies include: + +- **Cache frequently used data** in KV storage to reduce repeated processing +- **Batch operations** when possible to minimize API calls +- **Use appropriate LLM models** - not everything needs GPT-4 +- **Implement early exits** in reasoning loops to avoid unnecessary iterations + +### Latency vs. Capability Trade-offs + +| Approach | Latency | Capability | Use Case | +|----------|---------|------------|----------| +| Simple rules | <100ms | Low | Basic routing | +| Small LLM | 200-500ms | Medium | Classification | +| Large LLM | 1-3s | High | Complex reasoning | +| Multi-agent | 3-10s | Very High | Research tasks | + + { + const data = await request.data.json(); + const complexity = assessComplexity(data); + + if (complexity === 'simple') { + // Use rules-based approach + const result = applyRules(data); + return response.json(result); + + } else if (complexity === 'medium') { + // Use small, fast model + const result = await smallModelProcess(data); + return response.json(result); + + } else { + // Use full agent capabilities + const result = await fullAgentProcess(data, context); + return response.json(result); + } +};`} /> + +## Key Takeaways + +- **Agents follow a lifecycle**: Trigger → Plan → Reason → Execute → Remember → Respond +- **Multiple trigger types**: Choose the right trigger for your use case +- **Context is powerful**: It provides access to all Agentuity services +- **Frameworks are tools**: Pick the right one for your needs, or go custom +- **Memory enables learning**: Use KV for short-term, Vector for long-term +- **Performance matters**: Balance capability with cost and latency + +## What's Next? + +You now understand the core components that make agents work. In the next module, we'll dive deep into memory systems - how agents remember, learn, and improve over time. + +But first, experiment with the multi-trigger agent: +- Add a new trigger type (email or SMS) +- Implement a simple reasoning loop +- Try different response formats +- Connect two agents together + +Remember: The anatomy you've learned here is the foundation. The intelligence you add on top is what makes your agents unique. + +--- + +**Ready for Module 3?** [Agent Memory](./03-agent-memory) \ No newline at end of file diff --git a/content/Training/developers/03-agent-memory.mdx b/content/Training/developers/03-agent-memory.mdx new file mode 100644 index 00000000..246ec9e3 --- /dev/null +++ b/content/Training/developers/03-agent-memory.mdx @@ -0,0 +1,1186 @@ +--- +title: Agent Memory +description: How agents remember, learn, and build context over time +--- + +# Module 3: Agent Memory + +Without memory, an agent is just a stateless function. With memory, it becomes a system that learns, adapts, and builds relationships over time. + +## The Memory Challenge + +According to [IBM's research on AI agent memory](https://www.ibm.com/think/topics/ai-agent-memory), the biggest limitation of current LLMs is their inability to retain information between sessions. Every conversation starts from scratch, every user must re-explain their context, and every task begins without learning from the past. + +As highlighted in recent [memory management research](https://medium.com/@nomannayeem/building-ai-agents-that-actually-remember-a-developers-guide-to-memory-management-in-2025-062fd0be80a1): + +> "The difference between a chatbot and an agent is memory. A chatbot responds to the current message. An agent remembers your entire relationship." + +This creates fundamental challenges: +- **Context Loss**: Users repeatedly explain their situation +- **No Learning**: Agents can't improve from past interactions +- **Limited Personalization**: Every user gets the same generic experience +- **Inefficient Operations**: Recomputing information that should be cached + +## Understanding Agent Memory Types + + +For implementation details on Agentuity's storage systems, see our guides on [Key-Value Storage](/Guides/key-value), [Vector Database](/Guides/vector-db), and [Object Storage](/Guides/object-storage). + + +### The Memory Hierarchy + +Just like human memory, agent memory operates at different levels: + +```mermaid +graph TD + A[Working Memory] --> B[Short-term Memory] + B --> C[Long-term Memory] + C --> D[Collective Memory] + + A1[Current Request Context] --> A + B1[Session State / Cache] --> B + C1[User History / Knowledge Base] --> C + D1[Shared Agent Knowledge] --> D +``` + +Let's explore each level: + +### 1. Working Memory (Request Context) +**Lifetime**: Single request +**Purpose**: Process current task +**Storage**: In-memory variables + +```python +# Working memory exists only during request processing +async def run(request, response, context): + # These variables are working memory + user_intent = analyze_request(request) + current_plan = create_plan(user_intent) + execution_result = execute(current_plan) +``` + +### 2. Short-term Memory (Session State) +**Lifetime**: Minutes to hours +**Purpose**: Maintain conversation context +**Storage**: Key-Value with TTL + +```python +# Short-term memory for session continuity +async def run(request, response, context): + session_id = request.metadata.get("session_id") + + # Retrieve session context + session_data = await context.kv.get("sessions", session_id) + + if session_data.exists: + conversation = await session_data.data.json() + conversation["messages"].append(user_message) + else: + conversation = {"messages": [user_message], "started": datetime.now()} + + # Update with TTL for automatic cleanup + await context.kv.set("sessions", session_id, conversation, {"ttl": 3600}) +``` + +### 3. Long-term Memory (Persistent Knowledge) +**Lifetime**: Permanent until deleted +**Purpose**: Build user relationships and domain knowledge +**Storage**: Vector database for semantic search + +```python +# Long-term memory for learning and relationships +async def run(request, response, context): + user_id = request.metadata.get("user_id") + + # Store learned preferences + await context.vector.upsert("user_knowledge", { + "key": f"pref_{user_id}_{datetime.now()}", + "document": "User prefers technical explanations with code examples", + "metadata": { + "user_id": user_id, + "confidence": 0.9, + "learned_from": "conversation_analysis" + } + }) + + # Retrieve relevant memories + memories = await context.vector.search( + "user_knowledge", + f"What do I know about user {user_id}?", + limit=5, + similarity=0.7 + ) +``` + +### 4. Collective Memory (Shared Knowledge) +**Lifetime**: Permanent, shared across agents +**Purpose**: Organizational knowledge base +**Storage**: Vector database with access controls + +## Structured vs. Unstructured Memory + +Different types of information require different storage approaches: + +### Structured Memory (Key-Value Storage) +Best for discrete, queryable data: +- User preferences +- Session state +- Configuration settings +- Counters and metrics + + { + const userId = request.metadata.get('user_id'); + + // Store structured user profile + const profile = { + name: 'Alice Smith', + preferences: { + language: 'typescript', + timezone: 'America/New_York', + communication_style: 'formal' + }, + metrics: { + total_interactions: 42, + last_seen: new Date().toISOString() + } + }; + + await context.kv.set('users', userId, JSON.stringify(profile)); +};`} /> + +### Unstructured Memory (Vector Storage) +Best for semantic information: +- Conversation history +- Domain knowledge +- User feedback +- Document content + + { + // Store conversation for semantic retrieval + await context.vector.upsert('conversations', { + key: \`conv_\${new Date().toISOString()}\`, + document: "User asked about deploying agents in production. " + + "They're concerned about costs and scaling. " + + "Recommended starting with dev environment.", + metadata: { + user_id: userId, + topic: 'deployment', + sentiment: 'cautious', + timestamp: new Date().toISOString() + } + }); + + // Later, retrieve relevant context + const similarConvos = await context.vector.search('conversations', { + query: 'deployment and scaling concerns', + limit: 3 + }); +};`} /> + +### Binary Memory (Object Storage) +Best for files and large data: +- Generated reports +- User uploads +- Media files +- Model outputs + + { + // Store generated report + const reportData = generatePdfReport(analysisResults); + const reportKey = \`reports/\${userId}/\${new Date().toISOString().split('T')[0]}_analysis.pdf\`; + + await context.objectstore.put( + 'documents', + reportKey, + reportData, + { contentType: 'application/pdf' } + ); + + // Create public URL for sharing (expires in 1 hour) + const shareUrl = await context.objectstore.createPublicURL( + 'documents', + reportKey, + 3600000 // milliseconds + ); +};`} /> + +## Memory Patterns and Best Practices + +### Pattern 1: Conversation Memory with Sliding Window + +Keep recent conversation context while managing memory size: + + max_messages: + # Before removing, summarize older messages + summary = await summarize_messages(messages[:10]) + await context.vector.upsert("summaries", { + "key": f"{session_id}_summary_{datetime.now()}", + "document": summary, + "metadata": {"session_id": session_id} + }) + messages = messages[-max_messages:] + + # Process with context + agent_response = await process_with_context(messages) + + # Add agent response + messages.append({ + "role": "agent", + "content": agent_response, + "timestamp": datetime.now().isoformat() + }) + + # Save updated conversation + await context.kv.set("conversations", session_id, { + "messages": messages, + "updated": datetime.now().isoformat() + }, {"ttl": 7200}) # 2 hour TTL + + return response.json({"message": agent_response})`} js={`const handler: AgentHandler = async (request, response, context) => { + const sessionId = request.metadata.get('session_id'); + const maxMessages = 20; // Keep last 20 messages + + // Get existing conversation + const result = await context.kv.get('conversations', sessionId); + + let messages = []; + if (result.exists) { + const convo = await result.data.json(); + messages = convo.messages; + } + + // Add new message + messages.push({ + role: 'user', + content: await request.data.text(), + timestamp: new Date().toISOString() + }); + + // Sliding window - keep only recent messages + if (messages.length > maxMessages) { + // Before removing, summarize older messages + const summary = await summarizeMessages(messages.slice(0, 10)); + await context.vector.upsert('summaries', { + key: \`\${sessionId}_summary_\${new Date().toISOString()}\`, + document: summary, + metadata: { session_id: sessionId } + }); + messages = messages.slice(-maxMessages); + } + + // Process with context + const agentResponse = await processWithContext(messages); + + // Add agent response + messages.push({ + role: 'agent', + content: agentResponse, + timestamp: new Date().toISOString() + }); + + // Save updated conversation + await context.kv.set('conversations', sessionId, JSON.stringify({ + messages, + updated: new Date().toISOString() + }), { ttl: 7200 }); // 2 hour TTL + + return response.json({ message: agentResponse }); +};`} /> + +### Pattern 2: Personalization Through Learning + +Build user profiles over time: + + +**Metadata Filtering in Vector Search**: The `metadata` parameter in vector search is used to filter results, not just for returning metadata. When you specify `metadata={"user_id": user_id}` in Python or `metadata: { user_id: userId }` in JavaScript, the search will only return vectors that match those metadata criteria. This is useful for isolating user-specific memories or filtering by any other metadata field you've stored. + + + { + const userId = request.metadata.get('user_id'); + const userInput = await request.data.text(); + + // Retrieve user profile + const profileResult = await context.kv.get('profiles', userId); + + let profile; + if (profileResult.exists) { + profile = await profileResult.data.json(); + } else { + profile = { + interaction_count: 0, + topics: {}, + preferences: {}, + created: new Date().toISOString() + }; + } + + // Update interaction count + profile.interaction_count++; + + // Analyze input for topics and preferences + const analysis = await analyzeUserInput(userInput); + + // Update topic frequencies + for (const topic of analysis.topics) { + profile.topics[topic] = (profile.topics[topic] || 0) + 1; + } + + // Store notable preferences as vectors for semantic search + if (analysis.preferences) { + for (const pref of analysis.preferences) { + await context.vector.upsert('user_preferences', { + key: \`\${userId}_\${hashString(pref)}\`, + document: pref, + metadata: { + user_id: userId, + confidence: analysis.confidence, + learned_at: new Date().toISOString() + } + }); + } + } + + // Get relevant memories for this conversation + const memories = await context.vector.search('user_preferences', { + query: userInput, + limit: 3, + metadata: { user_id: userId } + }); + + // Generate personalized response + const responseText = await generateResponse( + userInput, + profile, + memories + ); + + // Save updated profile + await context.kv.set('profiles', userId, JSON.stringify(profile)); + + return response.json({ + message: responseText, + personalized: true, + interaction_number: profile.interaction_count + }); +};`} /> + +### Pattern 3: Semantic Knowledge Base + +Build a searchable knowledge base from interactions: + + { + const { action } = await request.data.json(); + + if (action === 'learn') { + // Extract and store knowledge + const knowledge = await request.data.json(); + + await context.vector.upsert('knowledge_base', { + key: \`fact_\${Date.now()}\`, + document: knowledge.fact, + metadata: { + category: knowledge.category || 'general', + source: knowledge.source || 'user_provided', + confidence: knowledge.confidence || 0.8, + tags: knowledge.tags || [], + created: new Date().toISOString() + } + }); + + return response.json({ status: 'learned', fact: knowledge.fact }); + + } else if (action === 'query') { + // Search the knowledge base + const { query } = await request.data.json(); + + // Semantic search + const results = await context.vector.search('knowledge_base', { + query, + limit: 10, + similarity: 0.6 + }); + + // Format results with relevance scores + const formattedResults = results.map(result => ({ + fact: result.document, + relevance: result.similarity, + metadata: result.metadata + })); + + return response.json({ + query, + results: formattedResults, + count: formattedResults.length + }); + + } else if (action === 'analyze') { + // Analyze knowledge base patterns + const { topic } = await request.data.json(); + + // Get all related facts + const facts = await context.vector.search('knowledge_base', { + query: topic, + limit: 50, + similarity: 0.5 + }); + + // Analyze patterns + const analysis = { + topic, + fact_count: facts.length, + categories: {}, + confidence_avg: 0, + sources: {} + }; + + for (const fact of facts) { + const category = fact.metadata?.category || 'unknown'; + analysis.categories[category] = (analysis.categories[category] || 0) + 1; + + const source = fact.metadata?.source || 'unknown'; + analysis.sources[source] = (analysis.sources[source] || 0) + 1; + + analysis.confidence_avg += fact.metadata?.confidence || 0; + } + + if (facts.length > 0) { + analysis.confidence_avg /= facts.length; + } + + return response.json(analysis); + } +};`} /> + +## Memory Management Strategies + +### TTL (Time To Live) Strategies + +Different memory types require different retention policies: + +| Memory Type | Recommended TTL | Use Case | +|------------|-----------------|----------| +| Session state | 1-2 hours | Active conversations | +| Daily cache | 24 hours | Frequently accessed data | +| User preferences | No TTL | Permanent personalization | +| Temporary results | 5-15 minutes | Computation cache | +| Audit logs | 30-90 days | Compliance requirements | + +### Memory Sizing Guidelines + +Plan your memory usage based on agent patterns: + +```python +# Calculate memory requirements +def estimate_memory_needs(users, interactions_per_day): + # Key-Value Storage + profile_size = 2 # KB per user profile + session_size = 10 # KB per active session + kv_total = (users * profile_size) + (users * 0.1 * session_size) + + # Vector Storage + vectors_per_user = interactions_per_day * 30 # 30 days retention + vector_size = 0.5 # KB per vector + vector_total = users * vectors_per_user * vector_size + + # Object Storage + reports_per_user = 10 # Average files per user + avg_file_size = 500 # KB per file + object_total = users * reports_per_user * avg_file_size + + return { + "kv_storage_gb": kv_total / 1024 / 1024, + "vector_storage_gb": vector_total / 1024 / 1024, + "object_storage_gb": object_total / 1024 / 1024 + } +``` + +### Memory Cleanup Patterns + +Implement automated cleanup to manage costs: + + { + // Scheduled cleanup agent (triggered by cron) + if (request.trigger === 'cron') { + context.logger.info('Running memory cleanup'); + + // Archive old conversations to object storage + const cutoffDate = new Date(); + cutoffDate.setDate(cutoffDate.getDate() - 30); + + // In production, you'd iterate through keys with a pattern + // This is a simplified example + const oldSessions = await getOldSessions(cutoffDate); + + for (const sessionId of oldSessions) { + // Get conversation + const result = await context.kv.get('conversations', sessionId); + if (result.exists) { + const convo = await result.data.json(); + + // Archive to object storage + const archiveKey = \`archives/\${sessionId}/\${cutoffDate.toISOString()}.json\`; + await context.objectstore.put( + 'archives', + archiveKey, + JSON.stringify(convo), + { contentType: 'application/json' } + ); + + // Delete from KV + await context.kv.delete('conversations', sessionId); + + context.logger.info(\`Archived session \${sessionId}\`); + } + } + + return response.json({ + task: 'cleanup', + archived_sessions: oldSessions.length, + timestamp: new Date().toISOString() + }); + } +};`} /> + +## Lab: Building a Memory-Powered Customer Service Agent + +Let's build an agent that remembers customer interactions and provides increasingly personalized support: + + 0.8: + is_recurring = True + break + + # Store this interaction + interaction_key = f"{user_id}_{datetime.now().timestamp()}" + await context.vector.upsert("support_history", { + "key": interaction_key, + "document": message, + "metadata": { + "user_id": user_id, + "timestamp": datetime.now().isoformat(), + "order_id": data.get("order_id"), + "resolved": False + } + }) + + # Add to profile issues + profile["issues"].append({ + "message": message, + "timestamp": datetime.now().isoformat(), + "recurring": is_recurring + }) + + # Generate response based on history + if is_recurring: + response_text = ( + f"I see you've contacted us about this before. " + f"Let me escalate this to our senior support team immediately. " + f"Your case has been prioritized." + ) + priority = "high" + elif profile["interaction_count"] > 5: + response_text = ( + f"Thank you for being a valued customer. " + f"I'm looking into your issue right away." + ) + priority = "medium" + else: + response_text = ( + f"Thank you for contacting support. " + f"I'll help you with your issue." + ) + priority = "normal" + + # Save updated profile + await context.kv.set("customers", user_id, profile) + + # Log for analytics + context.logger.info(f"Support request from {user_id}: {priority} priority") + + return response.json({ + "response": response_text, + "is_recurring": is_recurring, + "interaction_number": profile["interaction_count"], + "priority": priority, + "similar_issues_found": len(past_issues) + }) + + elif action == "get_history": + # Retrieve customer history + profile_result = await context.kv.get("customers", user_id) + + if not profile_result.exists: + return response.json({ + "error": "No history found for this customer" + }) + + profile = await profile_result.data.json() + + # Get recent issues from vector storage + recent_issues = await context.vector.search( + "support_history", + f"all issues for user {user_id}", + limit=10, + metadata={"user_id": user_id} + ) + + return response.json({ + "customer_profile": profile, + "recent_issues": [ + { + "message": issue.document, + "metadata": issue.metadata + } + for issue in recent_issues + ] + }) + + return response.json({ + "error": "Unknown action" + })`} js={`const welcome = () => { + return { + welcome: "Customer Service Agent - I remember you!", + prompts: [ + { + data: JSON.stringify({ + action: "support", + user_id: "customer_123", + message: "My order hasn't arrived yet", + order_id: "ORD-789" + }), + contentType: "application/json" + }, + { + data: JSON.stringify({ + action: "support", + user_id: "customer_123", + message: "Still waiting for my order" + }), + contentType: "application/json" + }, + { + data: JSON.stringify({ + action: "get_history", + user_id: "customer_123" + }), + contentType: "application/json" + } + ] + }; +}; + +const handler: AgentHandler = async (request, response, context) => { + const data = await request.data.json(); + const { action, user_id } = data; + + if (action === 'support') { + const { message } = data; + + // Get customer profile + const profileResult = await context.kv.get('customers', user_id); + let profile; + + if (profileResult.exists) { + profile = await profileResult.data.json(); + profile.interaction_count = (profile.interaction_count || 0) + 1; + } else { + profile = { + user_id, + first_contact: new Date().toISOString(), + interaction_count: 1, + issues: [] + }; + } + + // Search for similar past issues + const pastIssues = await context.vector.search('support_history', { + query: message, + limit: 3, + metadata: { user_id } + }); + + // Determine if this is a recurring issue + let isRecurring = false; + for (const issue of pastIssues) { + if (issue.similarity > 0.8) { + isRecurring = true; + break; + } + } + + // Store this interaction + const interactionKey = \`\${user_id}_\${Date.now()}\`; + await context.vector.upsert('support_history', { + key: interactionKey, + document: message, + metadata: { + user_id, + timestamp: new Date().toISOString(), + order_id: data.order_id, + resolved: false + } + }); + + // Add to profile issues + profile.issues.push({ + message, + timestamp: new Date().toISOString(), + recurring: isRecurring + }); + + // Generate response based on history + let responseText, priority; + + if (isRecurring) { + responseText = + "I see you've contacted us about this before. " + + "Let me escalate this to our senior support team immediately. " + + "Your case has been prioritized."; + priority = 'high'; + } else if (profile.interaction_count > 5) { + responseText = + "Thank you for being a valued customer. " + + "I'm looking into your issue right away."; + priority = 'medium'; + } else { + responseText = + "Thank you for contacting support. " + + "I'll help you with your issue."; + priority = 'normal'; + } + + // Save updated profile + await context.kv.set('customers', user_id, JSON.stringify(profile)); + + // Log for analytics + context.logger.info(\`Support request from \${user_id}: \${priority} priority\`); + + return response.json({ + response: responseText, + is_recurring: isRecurring, + interaction_number: profile.interaction_count, + priority, + similar_issues_found: pastIssues.length + }); + + } else if (action === 'get_history') { + // Retrieve customer history + const profileResult = await context.kv.get('customers', user_id); + + if (!profileResult.exists) { + return response.json({ + error: 'No history found for this customer' + }); + } + + const profile = await profileResult.data.json(); + + // Get recent issues from vector storage + const recentIssues = await context.vector.search('support_history', { + query: \`all issues for user \${user_id}\`, + limit: 10, + metadata: { user_id } + }); + + return response.json({ + customer_profile: profile, + recent_issues: recentIssues.map(issue => ({ + message: issue.document, + metadata: issue.metadata + })) + }); + } + + return response.json({ + error: 'Unknown action' + }); +}; + +export default handler; +export { welcome };`} /> + +### Testing Your Memory Agent + +1. **Start DevMode:** +```bash +agentuity dev +``` + +2. **Test the scenarios:** + - First support request - notice the standard response + - Second similar request - see how it recognizes the recurring issue + - Check history - view all stored interactions + +3. **Observe the memory in action:** + - Watch the logs to see KV and vector operations + - Try different user IDs to see isolated memory + - Test with various message similarities + +## Memory at Scale + +As your agents grow, consider these scaling strategies: + +### Hybrid Memory Architecture +- **Hot data** in KV storage (recent interactions) +- **Warm data** in vector storage (searchable history) +- **Cold data** in object storage (archives) + +### Memory Sharding +- Partition by user ID or tenant +- Distribute across namespaces +- Implement consistent hashing for even distribution + +### Caching Strategies +- Cache frequent queries in KV with short TTL +- Pre-compute common aggregations +- Use edge caching for read-heavy workloads + +## Key Takeaways + +- **Memory transforms agents** from stateless functions to learning systems +- **Choose the right storage**: KV for structure, Vector for semantic, Object for files +- **Implement retention policies**: Not all memory should be permanent +- **Design for scale**: Plan your memory architecture for growth +- **Privacy matters**: Always consider data retention regulations + +## What's Next? + +Now that your agents can remember, it's time to help them collaborate. In the next module, we'll explore agent-to-agent communication - how multiple specialized agents can work together to solve complex problems. + +But first, experiment with memory patterns: +- Build an agent that learns user preferences over time +- Implement a knowledge base that grows from conversations +- Create memory cleanup strategies +- Test different TTL strategies for various use cases + +Remember: Memory is what transforms an agent from a tool into a partner. + +--- + +**Ready for Module 4?** [Agent-to-Agent Collaboration](./04-agent-collaboration) \ No newline at end of file diff --git a/content/Training/developers/index.mdx b/content/Training/developers/index.mdx new file mode 100644 index 00000000..12e34cc6 --- /dev/null +++ b/content/Training/developers/index.mdx @@ -0,0 +1,55 @@ +--- +title: Developer Training +description: Building with Agentuity - Developer Learning Path +--- + +## Why AI Agents, Why Now? + +The AI industry is racing toward a [$47B agent economy by 2030](https://www.marketsandmarkets.com/Market-Reports/ai-agents-market-15761548.html), but [many developers](https://www.ibm.com/think/insights/ai-agents-2025-expectations-vs-reality) are trying to build autonomous systems on infrastructure designed for websites. + +Agents are autonomous systems that remember context, reason through problems, act independently, and collaborate with other agents. This course teaches you **agent engineering fundamentals** while leveraging Agentuity's purpose-built infrastructure. + +## What You'll Build + +By the end of this course, you'll have deployed: +- Production agents with persistent memory (stateful, long-running) +- Multi-agent workflows with seamless agent-to-agent communication +- Framework-agnostic agents (LangChain, CrewAI, or custom) +- Fully observable systems with OpenTelemetry tracing +- Complete dev → staging → production deployment pipelines + +## Time Investment +- **Full course:** ~20 hours (all modules + capstone project) +- **Fast track:** ~8 hours (Modules 1, 3, 6 + mini-project) +- **Framework migration:** ~4 hours (Modules 2 + 6 for developers with existing agent projects) + +## Before You Start + +**Required:** +- Proficiency in Python or JavaScript +- Basic understanding of APIs and cloud deployment +- Command line familiarity + +## Choose Your Path + +### New to AI Agents? +Start with [Module 1: Introduction to Agents](./01-introduction-to-agents) to understand how agents differ from traditional software and why they need specialized infrastructure. + +### Built Agents Before? +Jump to [Module 3: Agent Memory](./03-agent-memory) to see how Agentuity handles state differently, or [Module 4: Agent-to-Agent Collaboration](./04-agent-collaboration) for orchestration patterns. + +### Need Production Deployment? +Go straight to [Module 6: Deployment Environments](./06-deployment-environments) to understand Agentuity's dev/staging/prod environments and scaling approach. + +### Migrating from Another Platform? +Check our Framework Migration Guide for specifics on moving from AWS Bedrock, Google ADK, or Azure AI. + +## What Makes This Different + +Unlike generic AI courses, this curriculum: +- Focuses on **production deployment**, not just demos +- Addresses real challenges like memory management, cost optimization, and non-determinism +- Uses actual industry examples (both successes and cautionary tales) +- Teaches **platform-agnostic concepts** alongside Agentuity-specific implementation + +Ready to dive into the world of AI agents? Let's get started with [Module 1: Introduction to Agents](./01-introduction-to-agents). \ No newline at end of file diff --git a/content/Training/index.mdx b/content/Training/index.mdx new file mode 100644 index 00000000..fcbdf7ba --- /dev/null +++ b/content/Training/index.mdx @@ -0,0 +1,21 @@ +--- +title: Training +description: Learn how to build production-ready AI agents with Agentuity +--- + +## Available Training + +### For Developers +Comprehensive hands-on course covering agent fundamentals through production deployment. + +- 7 modules from basics to advanced +- ~20 hours of content +- Hands-on labs and capstone project + +[Start Developer Training →](./developers) + +## Coming Soon + +**For Executives** - Strategic overview of the agent economy + +**For Tech Leads** - Architecture patterns and integration strategies \ No newline at end of file diff --git a/content/meta.json b/content/meta.json index 77852ca1..2d80002e 100644 --- a/content/meta.json +++ b/content/meta.json @@ -3,6 +3,7 @@ "pages": [ "Introduction", "Guides", + "Training", "Cloud", "CLI", "SDKs", From d1fe88c19ea0f2f8940991a5295c073ffbd375a5 Mon Sep 17 00:00:00 2001 From: parteeksingh24 Date: Mon, 8 Sep 2025 18:33:29 -0700 Subject: [PATCH 02/13] Update training docs, add modules 4-7 --- .../developers/01-introduction-to-agents.mdx | 4 +- .../developers/02-anatomy-of-an-agent.mdx | 35 +- .../Training/developers/03-agent-memory.mdx | 42 +- .../developers/04-agent-collaboration.mdx | 849 ++++++++++++++ .../05-guardrails-evals-observability.mdx | 1006 +++++++++++++++++ .../developers/06-deployment-environments.mdx | 247 ++++ .../developers/07-sandbox-capstone.mdx | 389 +++++++ 7 files changed, 2527 insertions(+), 45 deletions(-) create mode 100644 content/Training/developers/04-agent-collaboration.mdx create mode 100644 content/Training/developers/05-guardrails-evals-observability.mdx create mode 100644 content/Training/developers/06-deployment-environments.mdx create mode 100644 content/Training/developers/07-sandbox-capstone.mdx diff --git a/content/Training/developers/01-introduction-to-agents.mdx b/content/Training/developers/01-introduction-to-agents.mdx index d017417c..3d197edb 100644 --- a/content/Training/developers/01-introduction-to-agents.mdx +++ b/content/Training/developers/01-introduction-to-agents.mdx @@ -1,10 +1,8 @@ --- -title: Introduction to Agents +title: "Module 1: Introduction to Agents" description: Understanding AI agents and the $47B opportunity --- -# Module 1: Introduction to Agents - Welcome to the age of AI agents - autonomous systems that are fundamentally transforming how we build and think about software. ## The $47B Agent Opportunity diff --git a/content/Training/developers/02-anatomy-of-an-agent.mdx b/content/Training/developers/02-anatomy-of-an-agent.mdx index 0df46db8..99704d74 100644 --- a/content/Training/developers/02-anatomy-of-an-agent.mdx +++ b/content/Training/developers/02-anatomy-of-an-agent.mdx @@ -1,28 +1,23 @@ --- -title: The Anatomy of an Agent +title: "Module 2: The Anatomy of an Agent" description: Understanding how agents work - planning, reasoning, tools, and memory --- -# Module 2: The Anatomy of an Agent - Now that you've built your first agent, let's dive deeper to understand what makes agents tick. We'll explore the core components that transform a simple script into an autonomous, intelligent system. ## The Agent Lifecycle Every agent interaction follows a predictable lifecycle, from receiving a trigger to returning a response. Understanding this flow is crucial for building effective agents. -```mermaid + Let's explore each phase: @@ -172,25 +167,25 @@ The context object is your agent's gateway to Agentuity's services. It provides agent_id = context.agent.id agent_name = context.agent.name project_id = context.projectId - run_id = context.runId + session_id = context.sessionId # Key-Value storage await context.kv.set("cache", "key", {"data": "value"}) result = await context.kv.get("cache", "key") # Vector storage for semantic search - await context.vector.upsert("docs", { + await context.vector.upsert("docs", [{ "key": "doc1", "document": "AI agents are autonomous systems", "metadata": {"category": "intro"} - }) + }]) # Object storage for files await context.objectstore.put("files", "report.pdf", pdf_bytes) return response.json({ "agent": agent_name, - "run": run_id + "session": session_id })`} js={`const handler: AgentHandler = async (request, response, context) => { // Logging at different levels context.logger.debug('Debug information'); @@ -202,7 +197,7 @@ The context object is your agent's gateway to Agentuity's services. It provides const agentId = context.agent.id; const agentName = context.agent.name; const projectId = context.projectId; - const runId = context.runId; + const sessionId = context.sessionId; // Key-Value storage await context.kv.set('cache', 'key', JSON.stringify({ data: 'value' })); @@ -220,7 +215,7 @@ The context object is your agent's gateway to Agentuity's services. It provides return response.json({ agent: agentName, - run: runId + session: sessionId }); };`} /> diff --git a/content/Training/developers/03-agent-memory.mdx b/content/Training/developers/03-agent-memory.mdx index 246ec9e3..e6d275e5 100644 --- a/content/Training/developers/03-agent-memory.mdx +++ b/content/Training/developers/03-agent-memory.mdx @@ -1,10 +1,8 @@ --- -title: Agent Memory +title: "Module 3: Agent Memory" description: How agents remember, learn, and build context over time --- -# Module 3: Agent Memory - Without memory, an agent is just a stateless function. With memory, it becomes a system that learns, adapts, and builds relationships over time. ## The Memory Challenge @@ -31,7 +29,7 @@ For implementation details on Agentuity's storage systems, see our guides on [Ke Just like human memory, agent memory operates at different levels: -```mermaid + Let's explore each level: @@ -93,7 +91,7 @@ async def run(request, response, context): user_id = request.metadata.get("user_id") # Store learned preferences - await context.vector.upsert("user_knowledge", { + await context.vector.upsert("user_knowledge", [{ "key": f"pref_{user_id}_{datetime.now()}", "document": "User prefers technical explanations with code examples", "metadata": { @@ -101,7 +99,7 @@ async def run(request, response, context): "confidence": 0.9, "learned_from": "conversation_analysis" } - }) + }]) # Retrieve relevant memories memories = await context.vector.search( @@ -174,7 +172,7 @@ Best for semantic information: max_messages: # Before removing, summarize older messages summary = await summarize_messages(messages[:10]) - await context.vector.upsert("summaries", { + await context.vector.upsert("summaries", [{ "key": f"{session_id}_summary_{datetime.now()}", "document": summary, "metadata": {"session_id": session_id} - }) + }]) messages = messages[-max_messages:] # Process with context @@ -400,7 +399,7 @@ Build user profiles over time: # Store notable preferences as vectors for semantic search if analysis["preferences"]: for pref in analysis["preferences"]: - await context.vector.upsert("user_preferences", { + await context.vector.upsert("user_preferences", [{ "key": f"{user_id}_{hash(pref)}", "document": pref, "metadata": { @@ -408,7 +407,7 @@ Build user profiles over time: "confidence": analysis["confidence"], "learned_at": datetime.now().isoformat() } - }) + }]) # Get relevant memories for this conversation memories = await context.vector.search( @@ -512,7 +511,7 @@ Build a searchable knowledge base from interactions: # Extract and store knowledge knowledge = await request.data.json() - await context.vector.upsert("knowledge_base", { + await context.vector.upsert("knowledge_base", [{ "key": f"fact_{datetime.now().timestamp()}", "document": knowledge["fact"], "metadata": { @@ -522,7 +521,7 @@ Build a searchable knowledge base from interactions: "tags": knowledge.get("tags", []), "created": datetime.now().isoformat() } - }) + }]) return response.json({"status": "learned", "fact": knowledge["fact"]}) @@ -739,13 +738,12 @@ Implement automated cleanup to manage costs: if result.exists: convo = await result.data.json() - # Archive to object storage + # Archive to object storage (auto-detects JSON content type) archive_key = f"archives/{session_id}/{cutoff_date.isoformat()}.json" await context.objectstore.put( "archives", archive_key, - json.dumps(convo), - {"contentType": "application/json"} + json.dumps(convo) ) # Delete from KV @@ -878,7 +876,7 @@ async def run(request, response, context): # Store this interaction interaction_key = f"{user_id}_{datetime.now().timestamp()}" - await context.vector.upsert("support_history", { + await context.vector.upsert("support_history", [{ "key": interaction_key, "document": message, "metadata": { @@ -887,7 +885,7 @@ async def run(request, response, context): "order_id": data.get("order_id"), "resolved": False } - }) + }]) # Add to profile issues profile["issues"].append({ diff --git a/content/Training/developers/04-agent-collaboration.mdx b/content/Training/developers/04-agent-collaboration.mdx new file mode 100644 index 00000000..1a677179 --- /dev/null +++ b/content/Training/developers/04-agent-collaboration.mdx @@ -0,0 +1,849 @@ +--- +title: "Module 4: Agent-to-Agent Collaboration" +description: Building multi-agent systems that work together +--- + +So far, we've built single agents that can reason, act, and remember. But what if one agent can't do everything? What if you need specialized expertise, parallel processing, or simply want to break down complex problems into manageable pieces? + +Welcome to the world of multi-agent systems. + +## Why Multi-Agent Systems? + +According to industry research, multi-agent architectures are becoming the standard for production AI systems. As [Microsoft's Build 2025 announcement](https://blogs.microsoft.com/blog/2025/05/19/microsoft-build-2025-the-age-of-ai-agents-and-building-the-open-agentic-web/) highlighted, the future is about agents working together in an "open agentic web." + +Think about how human organizations work: +- **Specialization**: Different people excel at different tasks +- **Scalability**: Teams can handle more work than individuals +- **Reliability**: If one person is unavailable, others can step in +- **Modularity**: Easy to add new team members with specific skills + +The same principles apply to agent systems. + +## Core Concepts of Agent Collaboration + +### 1. Agent Communication Patterns + +There are three primary ways agents can work together: + +#### Sequential (Chain) +One agent completes its task and passes the result to the next: +``` +User → Research Agent → Summarizer Agent → Response +``` + +#### Parallel (Fan-out/Fan-in) +Multiple agents work simultaneously on different parts: +``` + ┌→ Web Search Agent →┐ +User → Coordinator →→ Database Agent →→ Aggregator → Response + └→ File Agent →┘ +``` + +#### Conditional (Router) +Agents are selected based on the task: +``` +User → Router → [Math Agent | Language Agent | Code Agent] → Response +``` + +### 2. The Handoff Mechanism + +In Agentuity, agents communicate using the `handoff` method. This is like forwarding a phone call - the current agent passes control to another agent, along with any necessary data. + + { + // Receive a research request + const task = await request.data.json(); + + // Decide we need help from the web search agent + if (task.type === 'web_research') { + // Hand off to the web search specialist + return response.handoff( + { name: 'web-search' }, // Target agent + { + data: JSON.stringify({ query: task.query }), // Data as string/buffer + contentType: 'application/json', + metadata: { original_task: task } // Metadata object + } + ); + } + + // Handle other task types... + return response.json({ result: 'Task completed' }); +};`} /> + +### 3. Agent Resolution + +Agents can find each other using either: +- **Name**: Human-readable identifier (e.g., "web-search") +- **ID**: Unique system identifier (e.g., "agent_abc123") + +The context object knows about all agents in your project: + + { + // List all available agents + const availableAgents = context.agents; + context.logger.info(\`Available agents: \${availableAgents.map(a => a.name)}\`); + + // Check if a specific agent exists + if (availableAgents.some(agent => agent.name === 'summarizer')) { + // Agent exists, we can safely hand off + return response.handoff( + { name: 'summarizer' }, + { + data: JSON.stringify({ text: 'Long article to summarize...' }), + contentType: 'application/json' + } + ); + } +};`} /> + +## Building a Research Team + +Let's build a simple research system with three specialized agents working together. + +### The Coordinator Agent + +This agent receives research requests and delegates to specialists: + + { + // Research coordinator that delegates to specialist agents + + const researchRequest = await request.data.json(); + const query = researchRequest.query || ''; + + context.logger.info(\`Coordinating research for: \${query}\`); + + // Step 1: Gather information from web search + const searchResult = await response.handoff( + { name: 'web-search' }, + { + data: JSON.stringify({ query, max_results: 3 }), + contentType: 'application/json' + } + ); + + // Note: In reality, the handoff returns immediately. + // For this example, imagine we're showing the flow. + // The web-search agent's response will go back to the user. + + return searchResult; +};`} /> + +### The Web Search Agent + +A specialist that searches the web and returns results: + + { + // Specialist agent for web searches + + const searchParams = await request.data.json(); + const query = searchParams.query; + const maxResults = searchParams.max_results || 5; + + context.logger.info(\`Searching web for: \${query}\`); + + // Simulate web search (in production, use actual search API) + const searchResults = Array.from({ length: maxResults }, (_, i) => ({ + title: \`Result \${i + 1} for \${query}\`, + snippet: \`This is a relevant snippet about \${query}....\`, + url: \`https://example.com/\${i + 1}\` + })); + + // Now hand off to summarizer to create a coherent summary + return response.handoff( + { name: 'summarizer' }, + { + data: JSON.stringify({ + content: searchResults, + original_query: query + }), + contentType: 'application/json', + metadata: { source: 'web-search' } + } + ); +};`} /> + +### The Summarizer Agent + +Takes search results and creates a concise summary: + + { + // Specialist agent for summarizing content + + const summaryRequest = await request.data.json(); + const content = summaryRequest.content || []; + const originalQuery = summaryRequest.original_query || ''; + + context.logger.info(\`Summarizing \${content.length} items\`); + + // Create a summary from the search results + const summaryParts = content.map(item => { + if (typeof item === 'object') { + const title = item.title || ''; + const snippet = item.snippet || ''; + return \`• \${title}: \${snippet}\`; + } + return ''; + }).filter(Boolean); + + const finalSummary = { + query: originalQuery, + summary: \`Here's what I found about '\${originalQuery}':\\n\\n\${summaryParts.join('\\n')}\`, + source_count: content.length, + timestamp: new Date().toISOString() + }; + + // Return the final summary to the user + return response.json(finalSummary); +};`} /> + +## Orchestration Patterns: Industry vs. Agentuity + +### The Industry Challenge + +According to [Microsoft's multi-agent orchestration announcements](https://www.microsoft.com/en-us/microsoft-copilot/blog/copilot-studio/multi-agent-orchestration-maker-controls-and-more-microsoft-copilot-studio-announcements-at-microsoft-build-2025/), most platforms require complex orchestration layers with state management, workflow engines, and coordination services. [AWS's multi-agent design patterns](https://aws.amazon.com/blogs/machine-learning/design-multi-agent-orchestration-with-reasoning-using-amazon-bedrock-and-open-source-frameworks/) show similar complexity, often requiring LangGraph or custom orchestrators. + +### Agentuity's Approach: Simple Handoffs + +While other platforms build complex orchestration layers, Agentuity takes a different approach: **the handoff mechanism**. It's simpler, but with clear trade-offs: + +- **What it does**: Transfers control from one agent to another (like call forwarding) +- **What it doesn't do**: Parallel execution, getting responses back, complex coordination +- **Why it works**: Most real-world agent workflows are actually sequential or conditional + +## Patterns That Work with Agentuity's Handoff + +### 1. Sequential Chain Pattern + +Perfect for pipelines where each agent completes its work and passes to the next: + + { + // Pipeline pattern: Each agent enriches the data + + const data = await request.data.json(); + const step = data.step || 1; + + if (step === 1) { + // First agent: Clean the data + data.cleaned = true; + data.step = 2; + return response.handoff({ name: 'processor' }, { data: JSON.stringify(data), contentType: 'application/json' }); + + } else if (step === 2) { + // Second agent: Enrich the data + data.enriched = true; + data.step = 3; + return response.handoff({ name: 'validator' }, { data: JSON.stringify(data), contentType: 'application/json' }); + + } else if (step === 3) { + // Final agent: Validate and return + data.validated = true; + return response.json(data); + } +};`} /> + +### 2. Conditional Routing Pattern + +Route to different agents based on the request type - ideal for dispatcher/router agents: + + { + // Router pattern: Direct to appropriate specialist + + const task = await request.data.json(); + const taskType = (task.type || '').toLowerCase(); + + // Route based on task type + const agentMap = { + math: 'calculator-agent', + translation: 'translator-agent', + search: 'web-search', + summary: 'summarizer' + }; + + const targetAgent = agentMap[taskType]; + + if (targetAgent) { + context.logger.info(\`Routing \${taskType} task to \${targetAgent}\`); + return response.handoff( + { name: targetAgent }, + { + data: JSON.stringify(task.payload || {}), + contentType: 'application/json' + } + ); + } + + // Unknown task type + return response.json({ + error: \`Unknown task type: \${taskType}\`, + available_types: Object.keys(agentMap) + }); +};`} /> + +### 3. Delegation Pattern (Agentuity-Specific) + +A main agent handles most work but delegates specialized tasks. Remember: the delegated agent's response goes to the client, not back to the delegator: + + { + // Main agent that delegates when needed + + const task = await request.data.json(); + + // Handle most tasks directly + if (['simple', 'standard'].includes(task.type)) { + const result = processLocally(task); + return response.json(result); + } + + // Delegate complex tasks + if (task.type === 'complex') { + // Note: We won't get the response back + // The specialist will respond directly to client + return response.handoff( + { name: 'specialist-agent' }, + { + data: JSON.stringify(task), + contentType: 'application/json' + } + ); + } +};`} /> + +### 4. Request-Response Pattern + +Understanding that handoff is a one-way transfer is crucial. The final agent in the chain responds to the original client: + +``` +Client Request → Agent A → (handoff) → Agent B → Response to Client + ↓ + (exits) +``` + +This is different from traditional RPC where Agent A would wait for Agent B's response. + +## Agentuity-Specific Best Practices + +### 1. Working with Agent Resolution + +Agentuity provides built-in agent discovery within your project: + + { + // List all agents in your project + const availableAgents = context.agents; + const agentNames = availableAgents.map(a => a.name); + context.logger.info(\`Available agents: \${agentNames}\`); + + // Check if an agent exists before handoff + const targetName = 'specialist-agent'; + if (availableAgents.some(agent => agent.name === targetName)) { + return response.handoff( + { name: targetName }, + { data: JSON.stringify(data), contentType: 'application/json' } + ); + } else { + return response.json({ + error: \`Agent '\${targetName}' not found\`, + available: agentNames + }); + } + + // You can also use agent IDs for more precise targeting + // return response.handoff({ id: 'agent_abc123' }, args); +};`} /> + +### 2. Understanding Handoff Limitations + +Agentuity's handoff has built-in safety features and constraints: + + { + // CONSTRAINT 1: Can't handoff to yourself (loop detection) + // This will raise an error: + // return response.handoff({ name: context.agent.name }, args); + + // CONSTRAINT 2: Handoff is one-way (no response back) + // Wrong expectation: + // const result = await response.handoff({ name: 'helper' }, args); // NO! + // Right approach: + return response.handoff({ name: 'helper' }, args); // Helper responds to client + + // CONSTRAINT 3: Only see agents in your project + // Agents are isolated by project for security + + // FEATURE: Metadata persists through handoffs + const metadata = request.metadata || {}; + metadata.chain = (metadata.chain || []).concat(context.agent.name); + return response.handoff( + { name: 'next-agent' }, + { + data: JSON.stringify(data), + contentType: 'application/json', + metadata // Pass context forward + } + ); +};`} /> + +### 3. Leveraging Agentuity's Built-in Features + +Agentuity provides powerful built-in features for multi-agent systems: + + '.join(agent_chain)}") + + return response.handoff( + {"name": "next-agent"}, + processed_data, + {"agent_chain": agent_chain, "original_session_id": context.sessionId} + )`} js={`const handler: AgentHandler = async (request, response, context) => { + // Use context.sessionId for automatic request tracking + context.logger.info(\`Processing request \${context.sessionId} in \${context.agent.name}\`); + + // Built-in OpenTelemetry tracing (automatic spans) + // Each handoff creates a new span in the trace + + // Use agent metadata for debugging + context.logger.debug(\`Agent ID: \${context.agent.id}\`); + context.logger.debug(\`Project ID: \${context.projectId}\`); + + // Track the chain of agents + const metadata = request.metadata || {}; + const agentChain = metadata.agent_chain || []; + agentChain.push(context.agent.name); + + // Log the full chain for debugging + context.logger.info(\`Agent chain: \${agentChain.join(' -> ')}\`); + + return response.handoff( + { name: 'next-agent' }, + { + data: JSON.stringify(processedData), + contentType: 'application/json', + metadata: { agent_chain: agentChain, original_session_id: context.sessionId } + } + ); +};`} /> + +## Lab: Build a Document Processing Pipeline + +Create a three-agent system that processes documents: + +1. **intake-agent**: Receives documents and validates format +2. **analyzer-agent**: Extracts key information +3. **storage-agent**: Stores processed results + + { + // Validate and route incoming documents + + const document = await request.data.json(); + + // Validate document format + if (!document.content) { + return response.json({ error: 'Missing document content' }); + } + + if (!['pdf', 'text', 'html'].includes(document.type)) { + return response.json({ error: 'Unsupported document type' }); + } + + // Add processing metadata + document.received_at = new Date().toISOString(); + document.intake_agent = context.agent.name; + + // Pass to analyzer + return response.handoff( + { name: 'analyzer-agent' }, + { + data: JSON.stringify(document), + contentType: 'application/json' + } + ); +}; + +// Analyzer Agent +const analyzerHandler: AgentHandler = async (request, response, context) => { + // Extract key information from documents + + const document = await request.data.json(); + const content = document.content || ''; + + // Simple analysis (in production, use NLP/LLM) + const analysis = { + word_count: content.split(' ').length, + char_count: content.length, + has_numbers: /\d/.test(content), + document_type: document.type, + analyzed_at: new Date().toISOString() + }; + + // Combine with original document + document.analysis = analysis; + + // Pass to storage + return response.handoff( + { name: 'storage-agent' }, + { + data: JSON.stringify(document), + contentType: 'application/json' + } + ); +}; + +// Storage Agent +const storageHandler: AgentHandler = async (request, response, context) => { + // Store processed documents + + const document = await request.data.json(); + + // Generate unique ID + const docId = \`doc_\${context.sessionId}\`; + + // Store in KV + await context.kv.set( + 'processed_documents', + docId, + JSON.stringify(document) + ); + + // Return confirmation + return response.json({ + status: 'success', + document_id: docId, + processed_by: [ + document.intake_agent, + 'analyzer-agent', + 'storage-agent' + ], + stored_at: new Date().toISOString() + }); +};`} /> + +## Testing Your Multi-Agent System + +Test the document processing pipeline: + +```bash +# Test with a sample document +curl -X POST https://your-project.agentuity.com/intake-agent \ + -H "Content-Type: application/json" \ + -d '{ + "type": "text", + "content": "This is a test document with some numbers 12345." + }' +``` + +Expected flow: +1. `intake-agent` validates and adds metadata +2. `analyzer-agent` extracts information +3. `storage-agent` saves and returns confirmation + +## Key Takeaways + +- **Multi-agent systems** enable specialization, scalability, and modularity +- **Handoff mechanism** allows agents to delegate tasks to other agents +- **Orchestration patterns** include sequential, parallel, and conditional routing +- **Best practices** include loop prevention, error handling, and request tracing +- **Agentuity makes it simple** with built-in agent resolution and communication + +## What's Next? + +Now that you can build multi-agent systems, Module 5 will cover **Guardrails & Observability** - how to ensure your agent teams behave safely and predictably in production. + +Questions to consider: +- How do you prevent agents from taking harmful actions? +- How do you track what decisions agents are making? +- How do you ensure compliance and audit requirements? + +Continue to [Module 5: Guardrails & Observability →](./05-guardrails-observability) \ No newline at end of file diff --git a/content/Training/developers/05-guardrails-evals-observability.mdx b/content/Training/developers/05-guardrails-evals-observability.mdx new file mode 100644 index 00000000..78e55b69 --- /dev/null +++ b/content/Training/developers/05-guardrails-evals-observability.mdx @@ -0,0 +1,1006 @@ +--- +title: "Module 5: Guardrails, Evals & Observability" +description: Making agents reliable, safe, and production-ready +--- + +You've built agents that can think, remember, and collaborate. Now it's time to make them production-ready. + +## The Reality of Production Agents + +Research from NeurIPS 2024 reveals that most AI agents succeed only [30-40% of the time on complex tasks](https://neurips.cc/virtual/2024/panel/agent-systems). When [Salesforce automated 50% of their support cases](https://www.sfchronicle.com/tech/article/salesforce-ai-job-cuts-benioff-21025920.php), the implementation challenges led to significant organizational restructuring. The gap between demo and production? Proper guardrails, systematic evaluation, and comprehensive observability. + +According to [NIST's Generative AI Risk Management framework](https://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.1270e2025.pdf), the primary operational risks in AI systems include: +- **Hallucination**: Agents generating plausible but incorrect information +- **Prompt Injection**: Adversarial inputs manipulating agent behavior +- **Resource Consumption**: Uncontrolled usage leading to excessive costs +- **Compliance Drift**: Agents violating domain-specific regulations + +## The Three Pillars of Production Agents + + +Agentuity provides built-in OpenTelemetry integration with automatic instrumentation through Traceloop SDK, giving you comprehensive observability out of the box. + + +### 1. Guardrails: Setting Boundaries + +Guardrails prevent agents from harmful actions while preserving autonomy: + +- **Input Validation**: Schema enforcement, content filtering, size limits +- **Rate Limiting**: Prevent abuse and control costs per user/session +- **Security**: Prompt injection defense ([WASP](https://arxiv.org/abs/2407.01593)), tool permissions +- **Domain Rules**: Compliance checks, output validation, custom constraints + +### 2. Evaluation: Measuring Success + +Systematic evaluation is critical for non-deterministic agents: + +- **Industry Benchmarks**: [SWE-Bench](https://www.swebench.com/) (~20% success), [BrowserGym](https://browsergym.ai), [AgentHarm](https://arxiv.org/abs/2407.01564) +- **Automated Testing**: Unit tests, integration tests, golden datasets +- **Production Metrics**: Success rates, latency, cost per request +- **A/B Testing**: Shadow deployments, gradual rollouts + +### 3. Observability: Seeing Everything + +Agentuity provides automatic OpenTelemetry integration with Traceloop SDK: + +- **What's Tracked**: LLM calls, tool invocations, storage operations, API calls +- **Three Pillars**: Logs (events), Metrics (measurements), Traces (request flow) +- **Console View**: Timeline visualization with color-coded spans + +## Implementing Guardrails + +Let's implement essential guardrails for a financial advisor agent: + +### 1. Schema Validation with Zod & Pydantic + +Runtime validation is critical for agents. TypeScript and Python types only exist at development time - at runtime, your data needs validation. + +For detailed API references, see the [Zod](https://zod.dev) and [Pydantic](https://docs.pydantic.dev/latest/) documentation. + +; + +const handler = async (request, response, context) => { + // 1. Validate structure + const result = UserQuerySchema.safeParse(await request.data.json()); + + if (!result.success) { + return response.json({ + error: 'Invalid request', + details: result.error.issues + }); + } + + // 2. Apply domain rules separately + const prohibited = ['insider', 'guaranteed']; + if (prohibited.some(term => result.data.query.toLowerCase().includes(term))) { + return response.json({ error: 'Prohibited terms detected' }); + } + + // 3. Process validated data + const { query, userId } = result.data; + return await processQuery(query, userId); +};`} /> + + +**Key Pattern**: Separate validation from domain rules. Schemas validate structure, your specific rules come after. + + +#### Real-World Example: Validating External API Responses + +Here's how the AI News Digest agent validates Hacker News API responses: + + { + // Fetch and validate stories + const articles = await fetchTopStories(context, 5); + + // Generate AI summary + const summary = await generateSummary(articles); + + // Create and validate digest with Zod + const digestResult = DigestDataSchema.safeParse({ + summary, + sources: articles, + articleCount: articles.length, + timestamp: new Date().toISOString(), + source: 'Hacker News API' + }); + + if (!digestResult.success) { + context.logger.error('Digest validation failed', digestResult.error); + return response.json({ error: 'Failed to create digest' }); + } + + // Store validated digest + await context.kv.set('digest', 'latest', digestResult.data); + + return response.json(digestResult.data); +};`} /> + +#### Using Schemas with AI-Generated Output + +Schemas ensure structured output from AI models: + + 0.8: + agent = await context.get_agent(name=intent.object.agent_type) + return await agent.run(request)`} js={`import { z } from 'zod'; +import { generateObject } from 'ai'; +import { anthropic } from '@ai-sdk/anthropic'; + +const IntentSchema = z.object({ + agentType: z.enum(['support', 'sales', 'technical']), + confidence: z.number().min(0).max(1) +}); + +const handler = async (request, response, context) => { + const userMessage = await request.data.text(); + + // AI generates structured, validated output + const intent = await generateObject({ + model: anthropic('claude-3-7-sonnet'), + schema: IntentSchema, // Zod ensures structure + prompt: userMessage + }); + + // intent.object is validated and typed + if (intent.object.confidence > 0.8) { + const agent = await context.getAgent({ name: intent.object.agentType }); + return await agent.run(request); + } +};`} /> + + +See the complete [Conference Concierge implementation](https://github.com/agentuity/agent-AIEWF2025-concierge-template) for a production example of multi-agent routing with schema validation. + + +### 2. Rate Limiting + +Prevent abuse and control costs: + += 100: # 100 requests per hour + return response.json({ + "error": "Rate limit exceeded", + "retry_after": 3600 + }) + + # Increment counter with TTL + await context.kv.set("rate_limits", hour_key, count + 1, {"ttl": 3600}) + + # Process request + return await handle_request(request)`} js={`const handler: AgentHandler = async (request, response, context) => { + const userId = request.metadata.get('user_id'); + const hourKey = 'rate_' + userId + '_' + new Date().getHours(); + + // Check current usage + const usage = await context.kv.get('rate_limits', hourKey); + const count = usage.exists ? await usage.data.json() : 0; + + if (count >= 100) { // 100 requests per hour + return response.json({ + error: 'Rate limit exceeded', + retry_after: 3600 + }); + } + + // Increment counter with TTL + await context.kv.set('rate_limits', hourKey, count + 1, { ttl: 3600 }); + + // Process request + return await handleRequest(request); +};`} /> + +### 3. Domain-Specific Rules + +Enforce rules specific to your use case: + + { + const { query } = await request.data.json(); + + // Prohibited terms check + const prohibited = ['guaranteed returns', 'risk-free', 'insider']; + for (const term of prohibited) { + if (query.toLowerCase().includes(term)) { + return response.json({ + error: 'Cannot provide advice on prohibited topics', + reason: 'Term \'' + term + '\' is not allowed' + }); + } + } + + // Generate advice with mandatory disclaimer + const advice = await generateAdvice(query); + + return response.json({ + advice, + disclaimer: 'This is not personalized financial advice.' + }); +};`} /> + +## Evaluation Strategies + +### Define Success Metrics + +Choose metrics that matter for your domain: + +| Metric | Example | How to Measure | +|--------|---------|----------------| +| **Accuracy** | Correct advice | Compare against test cases | +| **Compliance** | No prohibited terms | Check output validation | +| **Performance** | < 5s response | Track in telemetry | +| **Cost** | < $0.10/request | Monitor token usage | + +### Build a Golden Dataset + +Create test cases that cover your critical scenarios: + + r.passed).length / results.length; + return { successRate, results }; +}`} /> + +## Observability with OpenTelemetry + +Agentuity automatically tracks everything through OpenTelemetry: + +### What's Tracked (No Code Required) +- **Agent executions**: Full request/response lifecycle +- **LLM calls**: Prompts, completions, token usage, latency +- **Storage operations**: KV gets/sets, vector searches +- **API calls**: External HTTP requests + +View in the Agentuity console Sessions tab with color-coded timeline visualization. + +### Using the Logger + + { + // Logs appear in Sessions view with trace context + context.logger.info('Processing', { userId }); + + try { + const result = await process(data); + context.logger.info('Success', { count: result.length }); + } catch (error) { + context.logger.error('Failed', { error: error.message }); + throw error; + } +};`} /> + +### Custom Spans for Your Own Operations + +Track important operations with custom spans to understand performance and debug issues: + + { + // Create a span for the entire validation flow + return context.tracer.startActiveSpan('validate-financial-query', async (span) => { + // Add context about this operation + span.setAttribute('user.tier', 'premium'); + span.setAttribute('query.type', 'retirement'); + span.setAttribute('query.length', query.length); + + try { + // Track validation steps + span.addEvent('validation-started'); + + if (hasProhibitedTerms(query)) { + span.addEvent('validation-failed', { reason: 'prohibited-terms' }); + span.setStatus({ code: SpanStatusCode.ERROR, message: 'Prohibited terms detected' }); + return response.json({ error: 'Invalid query' }); + } + + span.addEvent('validation-passed'); + + // Track LLM call separately + const advice = await context.tracer.startActiveSpan('generate-advice', async (llmSpan) => { + llmSpan.setAttribute('model', 'gpt-4'); + const result = await generateAdvice(query); + llmSpan.setAttribute('response.tokens', countTokens(result)); + llmSpan.end(); + return result; + }); + + span.setStatus({ code: SpanStatusCode.OK }); + return response.json({ advice }); + + } catch (error) { + span.recordException(error); + span.setStatus({ code: SpanStatusCode.ERROR, message: error.message }); + throw error; + } finally { + span.end(); + } + }); +};`} /> + +### Performance Best Practices + +Follow these optimization patterns to ensure your agents run efficiently in production: + +| Strategy | Implementation | +|----------|---------------| +| **Cache expensive operations** | Store LLM responses in KV with TTL to avoid repeated calls | +| **Use parallel operations** | `Promise.all()` (JS) or `asyncio.gather()` (Python) for concurrent tasks | +| **Fail fast** | Validate inputs early to avoid unnecessary processing | +| **Track token usage** | Add token counts as span attributes to monitor costs | +| **Set meaningful attributes** | Include user tier, request type, and other context for filtering | + +### What You Get Out of the Box + + +**Built-in Observability**: Agentuity provides automatic OpenTelemetry instrumentation with zero configuration required. + + +Everything is tracked automatically: +- **LLM calls**: Model, tokens, latency, and responses +- **Storage operations**: Every KV get/set, vector search, object store operation +- **API calls**: External service interactions and latencies +- **Custom spans**: Your business logic with meaningful attributes +- **Visual debugging**: Color-coded timeline in the console shows execution flow + + +See the [Agent Telemetry Guide](/Guides/agent-telemetry) for advanced tracing and custom spans. + + +## Lab: Production-Ready Financial Advisor + +Let's build a financial advisor progressively, adding production features step by step: + +### Step 1: Basic Validation + + str: + """Generate financial advice - mock implementation for demo.""" + context.logger.info("Generating advice", {"query_preview": query[:50]}) + + # Simple mock responses + advice_map = { + "roth ira": "A Roth IRA is a retirement account with tax-free withdrawals.", + "401k": "A 401(k) offers pre-tax contributions and employer matching.", + "invest": "Start with your risk tolerance and time horizon." + } + + query_lower = query.lower() + for key, advice in advice_map.items(): + if key in query_lower: + return advice + + return "I can help with retirement accounts and investment strategies."`} js={`import { z } from 'zod'; + +// Simple schema for financial queries +const FinancialQuerySchema = z.object({ + query: z.string().min(1).max(500), + portfolioValue: z.number().positive().optional() +}); + +const welcome = () => ({ + welcome: 'Financial Advisor Agent', + prompts: [ + { data: JSON.stringify({ query: 'What is a Roth IRA?' }), contentType: 'application/json' }, + { data: JSON.stringify({ query: 'How do I invest $50000?' }), contentType: 'application/json' } + ] +}); + +const handler = async (request, response, context) => { + // Step 1: Validate input structure + const result = FinancialQuerySchema.safeParse(await request.data.json()); + if (!result.success) { + context.logger.error('Invalid input', { errors: result.error.issues }); + return response.json({ error: 'Invalid request', details: result.error.issues }); + } + + const validated = result.data; + + // Step 2: Apply domain rules + const prohibited = ['guaranteed returns', 'risk-free', 'insider']; + for (const term of prohibited) { + if (validated.query.toLowerCase().includes(term)) { + context.logger.warning('Prohibited term', { term }); + return response.json({ error: 'Cannot advise on ' + term }); + } + } + + // Step 3: Generate response + const advice = await generateFinancialAdvice(validated.query, context); + + return response.json({ + advice, + disclaimer: 'Not personalized financial advice.' + }); +}; + +async function generateFinancialAdvice(query, context) { + // Generate financial advice - mock implementation for demo + context.logger.info('Generating advice', { queryPreview: query.substring(0, 50) }); + + // Simple mock responses + const adviceMap = { + 'roth ira': 'A Roth IRA is a retirement account with tax-free withdrawals.', + '401k': 'A 401(k) offers pre-tax contributions and employer matching.', + 'invest': 'Start with your risk tolerance and time horizon.' + }; + + const queryLower = query.toLowerCase(); + for (const [key, advice] of Object.entries(adviceMap)) { + if (queryLower.includes(key)) { + return advice; + } + } + + return 'I can help with retirement accounts and investment strategies.'; +} + +export default handler; +export { welcome };`} /> + +### Step 2: Add Rate Limiting + +Now enhance with rate limiting to prevent abuse: + += 100: # 100 requests per hour + return response.json({ + "error": "Rate limit exceeded", + "retry_after": 3600 + }) + +await context.kv.set("rate_limits", hour_key, count + 1, {"ttl": 3600})`} js={`// Add to the handler after validation: + +// Check rate limit +const userId = request.metadata.get('user_id') || 'anonymous'; +const hourKey = 'rate_' + userId + '_' + new Date().getHours(); +const usage = await context.kv.get('rate_limits', hourKey); +const count = usage.exists ? await usage.data.json() : 0; + +if (count >= 100) { // 100 requests per hour + return response.json({ + error: 'Rate limit exceeded', + retry_after: 3600 + }); +} + +await context.kv.set('rate_limits', hourKey, count + 1, { ttl: 3600 });`} /> + +### Step 3: Add Observability + +Finally, add custom spans to track and debug your agent's performance: + + { + // Create a span for the entire request + return context.tracer.startActiveSpan('financial-advisor-request', async (span) => { + const userId = request.metadata.get('user_id') || 'anonymous'; + span.setAttribute('user.id', userId); + span.setAttribute('request.type', 'financial-advice'); + + // Validation with span tracking + span.addEvent('validation-started'); + const result = FinancialQuerySchema.safeParse(await request.data.json()); + if (!result.success) { + span.addEvent('validation-failed', { errors: JSON.stringify(result.error.issues) }); + span.setStatus({ code: SpanStatusCode.ERROR, message: 'Invalid input' }); + context.logger.error('Invalid input', { errors: result.error.issues }); + span.end(); + return response.json({ error: 'Invalid request', details: result.error.issues }); + } + + span.addEvent('validation-passed'); + span.setAttribute('query.length', result.data.query.length); + + const validated = result.data; + + // Domain rules check with tracking + const prohibited = ['guaranteed returns', 'risk-free', 'insider']; + for (const term of prohibited) { + if (validated.query.toLowerCase().includes(term)) { + span.addEvent('compliance-violation', { term }); + span.setStatus({ code: SpanStatusCode.ERROR, message: 'Prohibited term: ' + term }); + span.end(); + return response.json({ error: 'Cannot advise on ' + term }); + } + } + + // Generate advice with nested span + const advice = await context.tracer.startActiveSpan('generate-advice', async (adviceSpan) => { + adviceSpan.setAttribute('query.type', detectQueryType(validated.query)); + const result = await generateFinancialAdvice(validated.query, context); + adviceSpan.setAttribute('response.length', result.length); + adviceSpan.end(); + return result; + }); + + span.addEvent('response-generated'); + span.setStatus({ code: SpanStatusCode.OK }); + span.end(); + + return response.json({ + advice, + disclaimer: 'Not personalized financial advice.' + }); + }); +};`} /> + +### Common Validation Pitfalls + +Avoid these common mistakes when implementing validation: + +| Pitfall | Best Practice | +|---------|--------------| +| **Using deprecated validators** | Pydantic v2: Use `@field_validator`, not `@validator` | +| **Throwing on invalid input** | Zod: Use `safeParse` by default, `parse` only when you want to throw | +| **Mixing concerns** | Keep schemas for structure, domain rules separate | +| **I/O in validators** | Never make API calls or database queries inside validators | +| **Poor error messages** | Format validation errors in user-friendly ways before returning | + +### Testing Your Production Agent + +1. **Start DevMode:** +```bash +agentuity dev +``` + +2. **Test guardrails:** + - Try prompt injection attacks + - Exceed rate limits + - Request prohibited advice + +3. **Monitor observability:** + - Watch traces in real-time + - Track custom metrics + - Review logs for issues + +- **Guardrails prevent failures** - Input validation, rate limiting, and domain rules protect your agents +- **Evaluation proves reliability** - Systematic testing with metrics that matter for your domain +- **Observability is automatic** - Agentuity provides OpenTelemetry integration with Traceloop for comprehensive monitoring +- **The console shows everything** - Sessions view with color-coded timeline visualization +- **Production readiness** requires all three pillars working together + +## What's Next? + +Now that your agents are production-ready, it's time to deploy them. In the next module, we'll explore Agentuity's deployment environments - from local development through staging to production. + +But first, experiment with the lab agent: +- Test different guardrail scenarios +- View the telemetry in the console +- Try modifying the evaluation criteria +- Add your own custom spans + +--- + +**Ready for Module 6?** [Deployment Environments](./06-deployment-environments) \ No newline at end of file diff --git a/content/Training/developers/06-deployment-environments.mdx b/content/Training/developers/06-deployment-environments.mdx new file mode 100644 index 00000000..cdd023ef --- /dev/null +++ b/content/Training/developers/06-deployment-environments.mdx @@ -0,0 +1,247 @@ +--- +title: "Module 6: Deployment Environments" +description: From local development to global production +--- + +You've built agents with memory, collaboration, and production-grade guardrails. Now let's deploy them properly. + +## The Reality of Enterprise Agent Deployment + +Research shows that [most organizations aren't "agent-ready"](https://www.ibm.com/think/insights/ai-agents-2025-expectations-vs-reality) - they lack the infrastructure, processes, and understanding needed for production agents. When [Salesforce automated 50% of their support cases](https://www.sfchronicle.com/tech/article/salesforce-ai-job-cuts-benioff-21025920.php), it required massive organizational change alongside the technology. + +The challenge isn't just technical - it's operational: +- How do you test non-deterministic agents? +- How do you safely roll out changes that affect customer interactions? +- How do you monitor agents that make autonomous decisions? +- How do you handle failures gracefully when agents are unpredictable? + +## The Agentuity Environment Model + +Agentuity provides three distinct environments, each with specific purposes: + +### Development Environment +**Where ideas become agents** + +Your local machine running DevMode: +- Instant feedback with hot reload +- Full debugging capabilities +- Mock external services +- Unlimited iterations +- Direct file system access + +```bash +# Start local development +agentuity dev + +# Test with real-time changes +# Edit → Save → Test immediately +``` + +### Staging Environment +**Where agents prove themselves** + + +**Coming Soon**: Dedicated staging environments with production-like infrastructure but isolated from real users. + + +Cloud deployment with test data: +- Production infrastructure +- Synthetic test data +- Full observability +- Performance testing +- Integration testing + +```bash +# Deploy to staging (coming soon) +``` + +### Production Environment +**Where agents serve users** + +Full-scale deployment: +- Auto-scaling infrastructure +- Real user traffic +- Complete monitoring +- High availability +- Disaster recovery + +```bash +# Deploy to production +agentuity deploy +``` + +## Environment Progression + +### Moving Through Environments + +Each environment serves as a gate to the next: + +| Stage | Purpose | Key Validation | Duration | +|-------|---------|----------------|----------| +| **Local Dev** | Build & iterate | Basic functionality | Hours to days | +| **Staging** | Validate at scale | Performance, integration | Days to weeks | +| **Production** | Serve users | Monitoring, optimization | Ongoing | + +### Environment Testing Matrix + +Each environment requires different testing focus: + +| Testing Aspect | Development | Staging | Production | +|----------------|------------|---------|------------| +| **Core Functionality** | Basic input/output validation | Load testing with concurrent users | Success rate metrics | +| **Memory & State** | Memory operations work correctly | Memory cleanup under load | Response time percentiles | +| **Error Handling** | Validation catches bad inputs | Rate limiting works correctly | Error rate trends | +| **Observability** | Logs provide debugging info | Meaningful telemetry spans | Cost per request tracking | +| **Integration** | Error messages are helpful | External service integration | User satisfaction signals | +| **Resilience** | Basic error recovery | Failover and recovery testing | Incident response time | + +## Enterprise Deployment Options + + +**Coming Soon**: Advanced deployment options for enterprise customers with specific compliance and security requirements. + + +### Private Cloud Deployments +Deploy Agentuity infrastructure in your own cloud account. More information coming soon. + +### On-Premise Installations +Run Agentuity entirely within your data center. More information coming soon. + +## Configuration Management + +### Environment-Specific Settings + +Different environments need different configurations: + + + +## Real-World Deployment Lessons + +### Enterprise Scale: Salesforce's Journey + +When Salesforce deployed agents to handle 50% of support cases, they learned: +- **Gradual rollout is essential**: Started with 1% of tickets +- **Human oversight needed initially**: Agents flagged uncertain responses +- **Continuous improvement**: Daily model updates based on outcomes +- **Organizational change**: Required retraining support staff + +### Financial Services: 100+ GenAI Applications + +[CB Insights reports over 100 GenAI applications in financial services](https://www.cbinsights.com/research/report/generative-ai-financial-services-applications-2025/). Banks deploying agents discovered: +- **Compliance first**: Every response needs audit trails +- **Failover critical**: Instant human handoff when needed +- **Cost management**: Token limits per customer interaction +- **Security paramount**: No customer data in prompts + +## Lab: Deploy Your Financial Advisor + +Let's deploy the financial advisor from Module 5: + +### Step 1: Prepare for Deployment + +```bash +# Ensure all tests pass +npm test # or python -m pytest + +# Check environment variables +agentuity config verify + +# Build for production +agentuity build +``` + +### Step 2: Deploy to Production + +```bash +# Deploy with confirmation +agentuity deploy --confirm + +# Monitor deployment +agentuity logs --follow + +# Check health +agentuity status +``` + +### Step 3: Monitor in Production + +View your deployment in the Agentuity console: +- Real-time metrics +- Session traces +- Error tracking +- Cost analysis + +## What's Next? + +You now understand the deployment journey from local development to production. But how do you build complete, production-ready systems that combine everything you've learned? + +In the next module, we'll tackle the capstone project - building a full agent system that uses: +- Memory and state management +- Multi-agent collaboration +- Production guardrails +- Comprehensive observability +- Proper deployment practices + +But first, experiment with deployment: +- Deploy your agents to production +- Monitor their performance +- Try different configurations +- Practice rollback procedures + +--- + +**Ready for the Capstone?** [Module 7: Sandbox & Capstone Project](./07-sandbox-capstone) \ No newline at end of file diff --git a/content/Training/developers/07-sandbox-capstone.mdx b/content/Training/developers/07-sandbox-capstone.mdx new file mode 100644 index 00000000..619f0747 --- /dev/null +++ b/content/Training/developers/07-sandbox-capstone.mdx @@ -0,0 +1,389 @@ +--- +title: "Module 7: Sandbox & Capstone Project" +description: Bringing it all together in a complete production system +--- + +Time to combine everything you've learned into a complete, production-ready agent system. + +## Coming Soon: Agentic Sandbox + + +**Coming Q2 2025**: The Agentic Sandbox - an interactive environment where you can experiment with agents risk-free, test complex scenarios, and learn through hands-on exploration. + + +### What is the Agentic Sandbox? + +A safe, isolated environment for agent experimentation: +- **Pre-configured scenarios**: Common agent patterns ready to explore +- **Interactive debugging**: Step through agent decisions +- **Instant reset**: Break things and start fresh +- **Guided challenges**: Progressive exercises with hints + +### Sandbox Features (Coming Soon) + +- Mock data sources and APIs +- Simulated user interactions +- Time travel debugging +- Performance profiling +- Collaborative sessions + +## Coming Soon: Training Agent + + +**Coming Q2 2025**: Your AI-powered learning companion that adapts to your progress, answers questions, and provides personalized guidance through the Agentuity platform. + + +### Your Personal Agent Instructor + +An intelligent assistant that helps you learn: +- **Contextual help**: Understands what you're building +- **Debugging assistance**: Helps identify and fix issues +- **Learning paths**: Customized based on your experience +- **Best practices**: Recommends patterns for your use case + +## Capstone Project: Customer Support System + +Let's build a complete customer support system that demonstrates mastery of all previous modules. + +### Project Overview + +**Goal**: Build a multi-agent customer support system that can: +- Handle customer inquiries autonomously +- Escalate complex issues appropriately +- Learn from interactions +- Maintain conversation context +- Ensure compliance and safety + +### System Architecture + + + +### Skills in Practice + +Building this multi-agent system demonstrates the key concepts covered in this training: + +| Module | Key Skills Applied | +|--------|-------------------| +| **Agent Basics** | Request/response handling, error management, welcome messages, trigger configuration | +| **Agent Anatomy** | Lifecycle management, comprehensive logging, context usage, tool integration | +| **Memory Management** | Conversation history, user preferences, session management, cleanup strategies | +| **Multi-Agent Collaboration** | Routing logic, context sharing, smooth handoffs, information preservation | +| **Production Readiness** | Input validation (Zod/Pydantic), rate limiting, observability spans, compliance rules | +| **Deployment** | Environment configuration, production deployment, monitoring setup | + +### Phase 1: Triage Agent + +The entry point for all customer interactions: + + { + // Track the conversation + return context.tracer.startActiveSpan('triage-request', async (span) => { + const sessionId = request.metadata.get('session_id'); + span.setAttribute('session.id', sessionId); + + // Get conversation history + const history = await context.kv.get('conversations', sessionId); + const messages = history.exists ? await history.data.json() : []; + + // Analyze customer intent + const customerMessage = await request.data.text(); + messages.push({ role: 'user', content: customerMessage }); + + const intent = await generateObject({ + model: anthropic('claude-3-7-sonnet'), + schema: CustomerIntentSchema, + prompt: \` + Analyze this customer support request: + Message: \${customerMessage} + History: \${JSON.stringify(messages.slice(-5))} + + Categorize as: faq, technical, billing, or urgent + \` + }); + + span.setAttribute('intent.category', intent.object.category); + span.setAttribute('intent.confidence', intent.object.confidence); + + // Route to appropriate agent + if (intent.object.confidence < 0.7 || intent.object.category === 'urgent') { + // Low confidence or urgent = human escalation + span.addEvent('escalating-to-human'); + span.end(); + return response.json({ + action: 'escalate', + message: 'Connecting you with a human agent...', + reason: intent.object.summary + }); + } + + // Hand off to specialist agent + span.end(); + return response.handoff( + { name: intent.object.category + '-agent' }, + { + data: customerMessage, + contentType: 'text/plain', + metadata: { + session_id: sessionId, + intent: intent.object, + conversation_history: messages.slice(-5) // Include recent context + } + } + ); + }); +};`} /> + +### Phase 2: Specialist Agents + +Create specialized agents for each category: + +#### FAQ Agent +- Answers common questions +- Uses vector search for knowledge base +- Tracks which questions are most frequent + +#### Technical Support Agent +- Troubleshoots technical issues +- Guides through diagnostic steps +- Creates tickets for engineering + +#### Billing Agent +- Handles payment questions +- Looks up account information (mock) +- Processes refund requests + +### Phase 3: Memory & Learning + +Implement sophisticated memory: + +```python +# Store interaction outcomes +await context.kv.set( + "outcomes", + f"{session_id}_{timestamp}", + { + "category": intent.category, + "resolved": was_resolved, + "escalated": was_escalated, + "satisfaction": customer_rating, + "agent_actions": actions_taken + }, + {"ttl": 30 * 24 * 3600} # Keep for 30 days +) + +# Learn from patterns +outcomes = await context.kv.list("outcomes", prefix=f"{user_id}_") +if frequent_escalations(outcomes): + # Adjust confidence thresholds + # Flag for human review + pass +``` + +### Phase 4: Production Hardening + +Add all production features: + +#### Guardrails +- PII detection and redaction +- Compliance with support policies +- Rate limiting per customer +- Response length limits + +#### Observability +- Custom spans for each decision point +- Track resolution rates +- Monitor escalation patterns +- Cost per interaction + +#### Testing Suite +```python +# test_support_system.py +test_cases = [ + { + "input": "How do I reset my password?", + "expected_category": "faq", + "should_escalate": False + }, + { + "input": "The app crashes when I click submit", + "expected_category": "technical", + "should_escalate": False + }, + { + "input": "I was charged twice!!!", + "expected_category": "billing", + "confidence_threshold": 0.9 # Higher for financial + }, + { + "input": "This is unacceptable! I demand a manager!", + "expected_category": "urgent", + "should_escalate": True + } +] +``` + +### Phase 5: Deployment + +Deploy your complete system: + +```bash +# Test locally +agentuity dev + +# Run test suite +npm test + +# Deploy to production +agentuity deploy + +# Monitor performance +agentuity logs --follow +``` + +## Alternative Capstone Projects + +Not interested in customer support? Choose an alternative: + +### Option 2: Research Assistant Team + +Build a multi-agent research system: +- **Research Agent**: Finds relevant papers and articles +- **Summarizer Agent**: Creates concise summaries +- **Fact Checker**: Validates claims against sources +- **Report Generator**: Produces final documents + +Key challenges: +- Citation tracking +- Source credibility scoring +- Handling conflicting information +- Long-form content generation + +### Option 3: DevOps Assistant + +Build an operations helper: +- **Monitor Agent**: Watches system metrics +- **Diagnostics Agent**: Analyzes problems +- **Remediation Agent**: Suggests or implements fixes +- **Reporting Agent**: Creates incident reports + +Key challenges: +- Real-time data processing +- Safe remediation actions +- Alert fatigue prevention +- Root cause analysis + +## Getting Help + +As you build your capstone: + +1. **Review previous modules** - Each contains pieces you'll need +2. **Start simple** - Get basic flow working first +3. **Iterate** - Add features incrementally +4. **Test thoroughly** - Each agent independently, then together +5. **Monitor everything** - You can't fix what you can't see + + +Pro Tip: Build your capstone iteratively. Start with two agents (triage + one specialist), get that working perfectly, then add more complexity. + + +## Congratulations! + +By completing this capstone, you've demonstrated mastery of: +- Agent architecture and design +- Memory and state management +- Multi-agent orchestration +- Production deployment +- Real-world problem solving + +You're now ready to build production agent systems with Agentuity! + +## What's Next? + +- Deploy your capstone to production +- Share your implementation with the community +- Explore advanced patterns +- Build agents for your own use cases +- Contribute to the Agentuity ecosystem + +Welcome to the future of agent development! 🚀 + +--- + +**Need help?** Join our [Discord community](https://discord.gg/agentuity) or check the [documentation](https://docs.agentuity.com) \ No newline at end of file From e13f57a6c692247978c4e8ba97b738fa93ca4f37 Mon Sep 17 00:00:00 2001 From: parteeksingh24 Date: Tue, 16 Sep 2025 07:41:42 -0700 Subject: [PATCH 03/13] Update training content --- .../developers/01-introduction-to-agents.mdx | 367 ++++--- .../developers/02-anatomy-of-an-agent.mdx | 825 ++++++++-------- .../Training/developers/03-agent-memory.mdx | 101 +- .../developers/04-agent-collaboration.mdx | 917 ++++++++---------- .../05-guardrails-evals-observability.mdx | 114 ++- .../developers/06-deployment-environments.mdx | 121 +-- .../developers/07-sandbox-capstone.mdx | 89 +- 7 files changed, 1212 insertions(+), 1322 deletions(-) diff --git a/content/Training/developers/01-introduction-to-agents.mdx b/content/Training/developers/01-introduction-to-agents.mdx index 3d197edb..43118e66 100644 --- a/content/Training/developers/01-introduction-to-agents.mdx +++ b/content/Training/developers/01-introduction-to-agents.mdx @@ -19,7 +19,6 @@ Traditional cloud platforms (AWS Lambda, Google Cloud Functions, Azure Functions - **5ms response times** (agents need minutes or hours to think) - **Stateless execution** (agents need persistent memory) - **Edge distribution** (agents need GPU proximity) -- **Request-response patterns** (agents need continuous reasoning loops) ## What Exactly Is an AI Agent? @@ -27,7 +26,7 @@ Traditional cloud platforms (AWS Lambda, Google Cloud Functions, Azure Functions For a comprehensive overview of agents and how they differ from traditional software, see our [What is an Agent?](/Guides/what-is-an-agent) guide. -An AI agent is not just another chatbot or API wrapper around an LLM. It's a fundamentally different type of software that combines: +An AI agent is not just another chatbot or API wrapper around an LLM. It's a fundamentally different type of software that combines a LLM with **memory, tools, and a reasoning loop**. ### The Agent Formula ``` @@ -64,8 +63,7 @@ For deeper insights on this shift, read our [Agent Engineering](/Guides/agent-en Traditional software engineering is built on determinism - given the same input, you always get the same output. We write explicit logic for every scenario: -```python -# Traditional deterministic approach + Agent engineering embraces non-determinism - the agent interprets intent and figures out the best approach: -```python -# Agent-based approach + { + // Agent interprets the request + const intent = await analyzeIntent(request); + + // Agent decides on approach + const plan = await createActionPlan(intent, context.customerHistory); + + // Agent executes with available tools + const result = await executePlan(plan, context.availableTools); + + // Agent learns from outcome + await updateKnowledge(result, context.memory); + + return result; +};`} /> This shift requires a new mindset: - **Design for intent**, not implementation @@ -119,7 +141,7 @@ Traditional cloud platforms face fundamental limitations when running agents: | Pay per request | Continuous operation | Costs explode unexpectedly | | Human-centric monitoring | Agent observability | Can't debug agent decisions | -Major cloud providers are scrambling to adapt: +Major cloud providers are pushing to adapt to the needs of agents: - **AWS** launched Bedrock AgentCore (still in preview) - **Google** released their Agent Development Kit (ADK) - **Microsoft** is retrofitting Azure with agent capabilities @@ -132,7 +154,7 @@ But retrofitting existing infrastructure is like turning a highway into an airpo Learn more about the Agent-Native Cloud paradigm in our [Agent-Native Cloud](/Guides/agent-native-cloud) guide. -While others retrofit, Agentuity was built from day one specifically for agents. This isn't just marketing - it's a fundamental architectural difference. +While others retrofit, Agentuity was built from day one specifically for agents. Our agent-native platform puts AI agents at the center of everything we do. ### Purpose-Built for Agents @@ -140,7 +162,7 @@ Agentuity provides what agents actually need: - **Long-running processes**: Agents can think for hours, not seconds - **Persistent memory**: Built-in [key-value](/Guides/key-value), [vector](/Guides/vector-db), and [object storage](/Guides/object-storage) -- **Agent-to-agent communication**: Secure, encrypted [channels between agents](/Guides/agent-communication) +- **Agent-to-agent communication**: Seamless and secure [channels between agents](/Guides/agent-communication) - **Native observability**: Track agent decisions with [built-in tracing](/Guides/agent-tracing) - **Automatic scaling**: Based on agent workload, not request count - **Framework agnostic**: Run LangChain, CrewAI, or custom agents side by side @@ -157,7 +179,7 @@ The result is a platform where: ## Your First Agent: Hello, Autonomous World -Enough theory - let's build something. We'll create an agent that doesn't just respond to "hello" but actually thinks about how to greet you. +Now that you have an understanding of what agents are and why they need specialized infrastructure, let's build your first agent. ### Prerequisites @@ -206,96 +228,99 @@ pip install -r requirements.txt Here's a simple "Hello Agent" that demonstrates the core concepts: - { + // Get the name from request data + const data = await request.data.json(); + const name = data.name || 'World'; + + // Return a simple greeting + return response.json({ message: \`Hello, \${name}!\` }); +}; + +export default handler;`} /> + +This simple agent demonstrates: +- **Request handling**: Accepts JSON input with a `name` field +- **Response generation**: Returns a personalized greeting +- **Agentuity patterns**: Uses the standard request/response structure +- **Agent-native infrastructure**: Runs on infrastructure built for agents + +### Adding Observability and State + +Let's enhance our agent with logging and a simple state counter: + + { - // Get the request data + # Update the counter + await context.kv.set("stats", "greeting_count", count) + + context.logger.info(f"Greeting #{count} for {name}") + + # Return a greeting with some stats + return response.json({ + "message": f"Hello, {name}!", + "greeting_number": count, + "timestamp": datetime.now().isoformat() + })`} js={`const handler = async (request, response, context) => { + // Log the incoming request (built-in observability) + context.logger.info('Hello agent received a request'); + + // Get the name from request data const data = await request.data.json(); const name = data.name || 'World'; - - // Log the request (built-in observability) - context.logger.info(\`Received greeting request for \${name}\`); - - // Check if we've seen this user before (persistent memory) - const visitsKey = \`visits_\${name}\`; - const result = await context.kv.get('user-data', visitsKey); - - let greeting: string; - - if (result.exists) { - // Returning visitor - const visitCount = await result.data.json(); - visitCount.count++; - greeting = \`Welcome back, \${name}! This is visit #\${visitCount.count}\`; - - // Update the visit count - await context.kv.set('user-data', visitsKey, JSON.stringify(visitCount)); + + // Simple counter using KV storage + const counterResult = await context.kv.get('stats', 'greeting_count'); + let count; + if (counterResult.exists) { + count = await counterResult.data.json(); + count++; } else { - // First-time visitor - greeting = \`Hello, \${name}! Welcome to Agentuity.\`; - - // Store first visit - await context.kv.set('user-data', visitsKey, JSON.stringify({ - count: 1, - first_seen: new Date().toISOString() - })); + count = 1; } - - // Return the response + + // Update the counter + await context.kv.set('stats', 'greeting_count', count); + + context.logger.info(\`Greeting #\${count} for \${name}\`); + + // Return a greeting with some stats return response.json({ - message: greeting, - timestamp: new Date().toISOString(), - agent_info: "Running on Agentuity's agent-native infrastructure" + message: \`Hello, \${name}!\`, + greeting_number: count, + timestamp: new Date().toISOString() }); }; export default handler;`} /> -This simple agent demonstrates key concepts: -- **Memory**: Uses key-value storage to remember visitors -- **Logging**: Built-in observability with context.logger -- **State Management**: Tracks visit counts persistently -- **Request/Response**: Handles JSON input and output +This enhanced agent demonstrates: +- **Observability**: Built-in logging with context.logger +- **State Management**: Simple counter using key-value storage +- **Agent Memory**: One storage type (KV) without complexity +- **Request/Response**: JSON input and structured output ### Testing Your Agent with DevMode @@ -380,8 +405,10 @@ Now let's deploy this agent to production: # Deploy to Agentuity's agent-native infrastructure agentuity deploy -# Your agent is now running at: -# https://your-project.agentuity.com/agents/hello +# Your deployment will show: +# Deployment successful +# Agent URL: https://[unique-id].agentuity.com +# Dashboard: https://console.agentuity.com/projects/[project-id] ``` That's it! Your agent is now: @@ -394,34 +421,75 @@ That's it! Your agent is now: Now that you have a working agent, let's enhance it with more capabilities. Use DevMode to test each enhancement as you build it. -### Challenge 1: Add Time-Based Context -Modify your agent to greet differently based on the time of day: +### Challenge 1: Add Error Handling +Enhance your agent with proper error handling and validation: + + { + try { + const data = await request.data.json(); + const name = data.name; + + if (!name) { + context.logger.warn('No name provided in request'); + return response.json({ error: 'Name is required' }); + } + + context.logger.info(\`Processing request for \${name}\`); + + // Your agent logic here + return response.json({ message: \`Hello, \${name}!\` }); + + } catch (error) { + context.logger.error(\`Error processing request: \${error}\`); + return response.json({ error: 'Internal server error' }); + } +};`} /> + +### Challenge 2: Add Time-Based Context +Enhance your agent to greet differently based on the time of day: { + })`} js={`const handler = async (request, response, context) => { const data = await request.data.json(); const name = data.name || 'World'; - + + // Time-based greeting const hour = new Date().getHours(); - let timeGreeting: string; - + let timeGreeting; if (hour < 12) { timeGreeting = 'Good morning'; } else if (hour < 17) { @@ -429,90 +497,16 @@ async def run(request, response, context): } else { timeGreeting = 'Good evening'; } - + + // Include your existing counter logic here return response.json({ message: \`\${timeGreeting}, \${name}!\`, local_time: new Date().toISOString() }); };`} /> -### Challenge 2: Track User Preferences -Store and retrieve user preferences using key-value storage: - - { - const data = await request.data.json(); - const userId = data.userId; - const action = data.action; - - if (action === 'set_preference') { - const preference = data.preference; - await context.kv.set('preferences', userId, JSON.stringify(preference)); - return response.json({ message: 'Preference saved' }); - - } else if (action === 'get_preference') { - const result = await context.kv.get('preferences', userId); - if (result.exists) { - const pref = await result.data.json(); - return response.json({ preference: pref }); - } - return response.json({ message: 'No preference found' }); - } -};`} /> - -### Challenge 3: Add Logging and Error Handling -Implement proper error handling and logging: - - { - try { - const data = await request.data.json(); - const name = data.name; - - if (!name) { - context.logger.warn('No name provided in request'); - return response.json({ error: 'Name is required' }); - } - - context.logger.info(\`Processing request for \${name}\`); - - // Your agent logic here - return response.json({ message: \`Hello, \${name}!\` }); - - } catch (error) { - context.logger.error(\`Error processing request: \${error}\`); - return response.json({ error: 'Internal server error' }); - } -};`} /> +### Challenge 3: Combine Features +Combine error handling, time-based greetings, and your counter into one robust agent. ### Testing Your Enhancements @@ -541,11 +535,12 @@ You've just built and deployed your first agent on infrastructure designed speci But first, take a moment to experiment with your agent. Try: - Calling it with different names and at different times -- Checking the logs in the Agentuity dashboard -- Modifying the personality and responses -- Adding more sophisticated memory patterns +- Checking the logs in the DevMode interface +- Calling it multiple times to see the counter increment +- Testing error cases like missing names +- Monitoring the key-value storage in the dashboard -Remember: aside from just learning a new framework, you're learning a fundamentally new way to build software. Welcome to the age of agents! +Remember: you're not just learning a new framework, you're learning a fundamentally new way to build software. Welcome to the age of agents! --- diff --git a/content/Training/developers/02-anatomy-of-an-agent.mdx b/content/Training/developers/02-anatomy-of-an-agent.mdx index 99704d74..d5aa0a49 100644 --- a/content/Training/developers/02-anatomy-of-an-agent.mdx +++ b/content/Training/developers/02-anatomy-of-an-agent.mdx @@ -11,12 +11,12 @@ Every agent interaction follows a predictable lifecycle, from receiving a trigge Let's explore each phase: @@ -225,306 +225,159 @@ What separates agents from simple scripts is their ability to plan and reason. W ### The Planning Phase -Planning involves breaking down a complex request into manageable steps: +Planning is how agents break down complex requests into actionable steps. According to [OpenAI's practical guide to building agents](https://cdn.openai.com/business-guides-and-resources/a-practical-guide-to-building-agents.pdf), providing smaller, clearer steps from dense resources helps minimize ambiguity and helps the model better follow instructions. Effective agent planning involves: - { - const data = await request.data.json(); - - // Create a plan - const plan = await createPlan(data, context); - - // Execute the plan - const results = []; - for (const step of plan) { - context.logger.info(\`Executing: \${step.action}\`); - const result = await executeStep(step, context); - results.push(result); - } - - return response.json({ plan, results }); -};`} /> +Modern agents use LLMs for dynamic planning rather than hard-coded decision trees, allowing them to adapt their approach based on context and available resources. ### The Reasoning Loop -The reasoning loop is where agents adapt based on results: +The reasoning loop is where agents continuously evaluate and adapt their approach. [Stanford's research on ReAct agents](https://arxiv.org/abs/2210.03629) shows that combining reasoning and acting in iterative loops significantly improves agent performance. - +The basic pattern follows: +1. **Observe**: Analyze current state and available information +2. **Think**: Reason about the best next action +3. **Act**: Execute the chosen action +4. **Reflect**: Evaluate the results and learn +5. **Repeat**: Continue until goal is achieved or constraints are met + +This iterative approach allows agents to handle uncertainty and recover from errors - key capabilities that distinguish agents from simple scripts. ## Tool Invocation: Extending Agent Capabilities -Agents become powerful when they can use tools. Tools can be APIs, databases, or even other agents: +Agents become powerful when they can use external tools and services: Dict[str, Any]: - """Search the web for information.""" +async def use_web_search(query: str, context) -> Dict[str, Any]: + """Search the web using a real API.""" + try: + # Example using a hypothetical search API + api_key = os.getenv("SEARCH_API_KEY") + if not api_key: + context.logger.warn("No search API key found") + return {"error": "Search not available"} + async with httpx.AsyncClient() as client: response = await client.get( - "https://api.search.com/v1/search", - params={"q": query} + "https://api.searxng.org/search", + params={"q": query, "format": "json"} ) return response.json() - - @staticmethod - async def calculate(expression: str) -> float: - """Perform mathematical calculations.""" - # Safe evaluation of math expressions - import ast - import operator as op - - ops = { - ast.Add: op.add, - ast.Sub: op.sub, - ast.Mult: op.mul, - ast.Div: op.truediv, - } - - def eval_expr(expr): - return eval(compile(ast.parse(expr, mode='eval'), '', 'eval')) - - return eval_expr(expression) - - @staticmethod - async def send_email(to: str, subject: str, body: str) -> bool: - """Send an email.""" - # Email sending logic - return True + except Exception as e: + context.logger.error(f"Search failed: {str(e)}") + return {"error": "Search failed"} + +async def call_another_agent(agent_name: str, data: dict, context) -> Dict[str, Any]: + """Call another agent within the same project.""" + try: + agent = await context.getAgent({"name": agent_name}) + result = await agent.run({ + "data": data, + "contentType": "application/json" + }) + return await result.data.json() + except Exception as e: + context.logger.error(f"Agent call failed: {str(e)}") + return {"error": "Agent call failed"} async def run(request, response, context): data = await request.data.json() - action = data.get("action") - - tools = AgentTools() - - if action == "research": - # Use web search tool - results = await tools.web_search(data["query"]) - return response.json({"search_results": results}) - - elif action == "calculate": - # Use calculator tool - result = await tools.calculate(data["expression"]) - return response.json({"result": result}) - - elif action == "notify": - # Use email tool - sent = await tools.send_email( - data["email"], - data["subject"], - data["message"] - ) - return response.json({"sent": sent})`} js={`import axios from 'axios'; - -class AgentTools { - static async webSearch(query: string) { - const response = await axios.get('https://api.search.com/v1/search', { - params: { q: query } - }); - return response.data; - } - - static async calculate(expression: string): Promise { - // Use a safe math evaluation library - // This is a simplified example - return eval(expression); // In production, use a safe math parser + tool = data.get("tool") + + if tool == "search": + result = await use_web_search(data["query"], context) + elif tool == "delegate": + result = await call_another_agent(data["agent"], data["task"], context) + else: + result = {"error": "Unknown tool"} + + return response.json(result)`} js={`async function useWebSearch(query: string, context: any): Promise { + try { + // Example using a hypothetical search API + const apiKey = process.env.SEARCH_API_KEY; + if (!apiKey) { + context.logger.warn('No search API key found'); + return { error: 'Search not available' }; + } + + const response = await fetch('https://api.searxng.org/search?' + + new URLSearchParams({ q: query, format: 'json' })); + return await response.json(); + } catch (error) { + context.logger.error(\`Search failed: \${error}\`); + return { error: 'Search failed' }; } - - static async sendEmail(to: string, subject: string, body: string): Promise { - // Email sending logic - return true; +} + +async function callAnotherAgent(agentName: string, data: any, context: any): Promise { + try { + const agent = await context.getAgent({ name: agentName }); + const result = await agent.run({ + data, + contentType: 'application/json' + }); + return await result.data.json(); + } catch (error) { + context.logger.error(\`Agent call failed: \${error}\`); + return { error: 'Agent call failed' }; } } const handler: AgentHandler = async (request, response, context) => { const data = await request.data.json(); - const action = data.action; - - if (action === 'research') { - // Use web search tool - const results = await AgentTools.webSearch(data.query); - return response.json({ search_results: results }); - - } else if (action === 'calculate') { - // Use calculator tool - const result = await AgentTools.calculate(data.expression); - return response.json({ result }); - - } else if (action === 'notify') { - // Use email tool - const sent = await AgentTools.sendEmail( - data.email, - data.subject, - data.message - ); - return response.json({ sent }); + const tool = data.tool; + + let result; + if (tool === 'search') { + result = await useWebSearch(data.query, context); + } else if (tool === 'delegate') { + result = await callAnotherAgent(data.agent, data.task, context); + } else { + result = { error: 'Unknown tool' }; } + + return response.json(result); };`} /> -## Memory Updates: Learning and Persistence +## Memory Patterns: How Agents Remember -Memory is what allows agents to learn and improve over time. Agentuity provides three types of memory storage: +Memory enables agents to learn and maintain state across interactions. The duration and type of memory depends on your use case and TTL settings. -### Short-term Memory (Key-Value) -For session data, cache, and temporary state: +### Session Memory +For temporary data that should expire: - { + ) + + # Retrieve session data + session_data = await context.kv.get("sessions", session_id) + if session_data.exists: + data = await session_data.data.json() + context.logger.info(f"Session active for {len(data['messages'])} messages")`} js={`const handler: AgentHandler = async (request, response, context) => { const sessionId = request.metadata.get('session_id'); - - // Store conversation context + + // Store temporary session data with TTL await context.kv.set( 'sessions', sessionId, @@ -537,42 +390,104 @@ For session data, cache, and temporary state: ); };`} /> -### Long-term Memory (Vector Storage) -For knowledge base and semantic search: +### Persistent Memory +For data that should survive restarts: { + const userId = request.metadata.get('user_id'); + + // Store user preferences permanently (no TTL) + await context.kv.set( + 'user_profiles', + userId, + { + name: 'Alice', + preferences: { format: 'concise' }, + created_at: new Date().toISOString() + } + // No TTL = permanent storage + ); + + // Store large files in object storage + await context.objectstore.put( + 'user_documents', + \`\${userId}/resume.pdf\`, + pdfBytes + ); +};`} /> + +### Searchable Memory +For semantic search and knowledge retrieval: + + { - // Store learned information - await context.vector.upsert('knowledge', { - key: 'fact_001', - document: 'Users prefer concise responses', + "How should I format my responses?", + limit=5 + ) + + # Use the knowledge to improve responses + if results: + context.logger.info(f"Found {len(results)} relevant insights") + for result in results: + context.logger.info(f"Insight: {result.document}")`} js={`const handler: AgentHandler = async (request, response, context) => { + // Store searchable knowledge + await context.vector.upsert('knowledge', [{ + key: 'user_feedback_001', + document: 'Users prefer concise responses with examples', metadata: { learned_from: 'user_feedback', - confidence: 0.85 + confidence: 0.85, + category: 'response_style' } - }); - - // Retrieve relevant knowledge - const results = await context.vector.search('knowledge', { - query: 'How should I format responses?', - limit: 5 - }); + }]); + + // Search for relevant knowledge + const results = await context.vector.search( + 'knowledge', + 'How should I format my responses?', + { limit: 5 } + ); + + // Use the knowledge to improve responses + if (results.length > 0) { + context.logger.info(\`Found \${results.length} relevant insights\`); + results.forEach(result => { + context.logger.info(\`Insight: \${result.document}\`); + }); + } };`} /> ## Framework Awareness: Choosing the Right Tool @@ -585,12 +500,12 @@ Agentuity is framework-agnostic. You can use any AI framework or build custom ag Different frameworks excel at different tasks. Here's when to use each: -| Framework | Best For | Agentuity Integration | -|-----------|----------|----------------------| -| **LangChain** | Complex chains, RAG applications | Native SDK support | -| **CrewAI** | Multi-agent teams, role-based systems | Deploy via container | -| **AutoGen** | Research, experimentation | Direct deployment | -| **Custom** | Specific requirements, full control | Full platform features | +| Framework | Best For | Language Support | Agentuity Integration | +|-----------|----------|------------------|----------------------| +| **LangChain** | Complex chains, RAG applications | Python, TypeScript | Native SDK support | +| **CrewAI** | Multi-agent teams, role-based systems | Python | Container deployment | +| **AutoGen** | Research, conversational AI | Python | Direct deployment | +| **Custom** | Specific requirements, full control | Python, TypeScript, Bun | Full platform features | ### Framework Comparison Example @@ -653,165 +568,202 @@ const handler: AgentHandler = async (request, response, context) => { ### When to Use Which Framework -- **Use LangChain** when you need: - - Complex reasoning chains - - RAG (Retrieval Augmented Generation) - - Extensive tool integration - - Well-documented patterns - -- **Use CrewAI** when you need: - - Multiple specialized agents - - Role-based collaboration - - Hierarchical task delegation +| Framework | Use When You Need | Avoid When You Need | +|-----------|-------------------|-------------------| +| **LangChain** | Complex reasoning chains, RAG applications, extensive data integrations | Simple agents, minimal dependencies, lightweight solutions | +| **CrewAI** | Multi-agent teams, role-based collaboration, specialized workflows | Single-agent tasks, simple request-response patterns | +| **AutoGen** | Conversational AI, research applications, experimental workflows | Production deployments, strict performance requirements | +| **Custom** | Full control, specific requirements, optimized performance | Rapid prototyping, standard use cases, extensive tooling needs | -- **Use Custom** when you need: - - Maximum performance - - Specific business logic - - Full control over behavior - - Minimal dependencies +## Lab: Building a Weather Agent with Multiple Triggers -## Lab: Building a Multi-Trigger Agent - -Let's put it all together by building an agent that responds to multiple triggers. We'll test it using DevMode's web interface: +Let's build an agent that demonstrates different behaviors based on how it's triggered. This weather agent will show you how the same code can serve different purposes: { const trigger = request.trigger; - context.logger.info(\`Agent triggered via: \${trigger}\`); - + context.logger.info(\`Weather agent triggered via: \${trigger}\`); + if (trigger === 'webhook') { - // Handle API requests - const data = await request.data.json(); - - // Process based on action - const action = data.action; - if (action === 'store') { - // Store data in KV - await context.kv.set('data', data.key, JSON.stringify(data.value)); - return response.json({ status: 'stored', key: data.key }); - - } else if (action === 'retrieve') { - // Get from KV - const result = await context.kv.get('data', data.key); - if (result.exists) { - const value = await result.data.json(); - return response.json({ found: true, value }); - } - return response.json({ found: false }); + // Manual requests for specific cities + let city = 'London'; + try { + const data = await request.data.json(); + city = data.city || 'London'; + } catch { + city = await request.data.text() || 'London'; } - - // Handle DevMode simulation of cron for testing - if (data._simulate_cron) { - context.logger.info('Simulating cron trigger in DevMode'); - // Run the same cleanup logic - const report = { - task: 'cleanup (simulated)', - timestamp: new Date().toISOString(), - status: 'completed' - }; - return response.json(report); + + context.logger.info(\`Manual weather request for: \${city}\`); + + // Check cache first (10 minutes TTL) + const cacheKey = \`weather_\${city.toLowerCase()}\`; + const cached = await context.kv.get('weather_cache', cacheKey); + + if (cached.exists) { + const weather = await cached.data.json(); + context.logger.info(\`Returning cached weather for \${city}\`); + return response.json({ + ...weather, + cached: true, + cache_age_minutes: 5 // Mock age + }); } - - } else if (trigger === 'cron') { - // Scheduled cleanup task - context.logger.info('Running scheduled cleanup'); - - // Clean old sessions - await context.kv.delete('sessions', 'old_session'); - - // Generate report - const report = { - task: 'cleanup', - timestamp: new Date().toISOString(), - status: 'completed' + + // Mock weather data (in production, call real API) + const weather = { + city, + temperature: 22, + description: 'partly cloudy', + humidity: 65, + timestamp: new Date().toISOString() }; - - // Store report - await context.kv.set('reports', 'latest_cleanup', JSON.stringify(report)); - return response.json(report); - - } else if (trigger === 'agent') { - // Handle agent-to-agent communication - const data = await request.data.json(); - - // Process request from another agent - const result = \`Processed request from agent: \${JSON.stringify(data)}\`; - - return response.json({ result }); - + + // Cache for 10 minutes + await context.kv.set('weather_cache', cacheKey, weather, { ttl: 600 }); + + return response.json({ ...weather, cached: false }); + + } else if (trigger === 'cron') { + // Batch update for major cities + const cities = ['London', 'New York', 'Tokyo', 'Sydney']; + context.logger.info(\`Cron job: Updating weather for \${cities.length} cities\`); + + const weatherData: Record = {}; + cities.forEach(city => { + // Mock weather data with variation + weatherData[city] = { + temperature: 20 + (city.length % 15), // Mock variation + description: 'sunny', + updated: new Date().toISOString() + }; + }); + + // Store daily weather report + await context.kv.set('weather_reports', 'daily', weatherData, { ttl: 86400 }); + + return response.json({ + task: 'weather_update', + cities_updated: cities.length, + timestamp: new Date().toISOString() + }); + + } else if (trigger === 'manual') { + // DevMode testing - show available commands + return response.json({ + message: 'Weather Agent DevMode', + commands: [ + 'Test webhook: POST {"city": "Paris"}', + 'Test cron: Use simulate button in DevMode' + ], + current_time: new Date().toISOString() + }); + } else { return response.json({ - error: \`Unknown trigger type: \${trigger}\` + error: \`Unsupported trigger: \${trigger}\`, + supported: ['webhook', 'cron', 'manual'] }); } -}; - -export default handler;`} /> +};`} /> -### Testing Your Multi-Trigger Agent +### Testing Your Weather Agent -DevMode makes it easy to test webhook triggers locally. For cron triggers, you can simulate them in DevMode or configure real schedules through the Agentuity Console. +DevMode makes it easy to test all trigger types. You can simulate webhooks, test manual triggers, and even simulate cron jobs through the interface. -#### Setting Up Test Scenarios +#### What This Agent Demonstrates + +- **Webhook trigger**: Handles real-time weather requests with caching +- **Cron trigger**: Batch updates for multiple cities +- **Manual trigger**: DevMode testing and debugging interface +- **Caching strategy**: Uses TTL to balance freshness and performance +- **Error handling**: Graceful fallbacks for different input types -First, enhance your agent with a `welcome()` function to create test scenarios: +Try calling this agent with different cities and watch how it caches responses for efficiency! "The difference between a chatbot and an agent is memory. A chatbot responds to the current message. An agent remembers your entire relationship." @@ -25,21 +25,16 @@ This creates fundamental challenges: For implementation details on Agentuity's storage systems, see our guides on [Key-Value Storage](/Guides/key-value), [Vector Database](/Guides/vector-db), and [Object Storage](/Guides/object-storage). -### The Memory Hierarchy +### Memory Types Overview -Just like human memory, agent memory operates at different levels: +Agent memory serves different purposes based on how long data needs to persist: - +| Memory Type | Duration | Storage | Purpose | +|-------------|----------|---------|----------| +| **Working Memory** | Single request | In-memory variables | Process current task | +| **Session Memory** | Minutes to hours | KV with TTL | Maintain conversation context | +| **Persistent Memory** | Until deleted | KV without TTL | Store user preferences, profiles | +| **Searchable Memory** | Until deleted | Vector storage | Semantic search and retrieval | Let's explore each level: @@ -163,12 +158,12 @@ Best for discrete, queryable data: await context.kv.set('users', userId, JSON.stringify(profile)); };`} /> -### Unstructured Memory (Vector Storage) -Best for semantic information: -- Conversation history -- Domain knowledge -- User feedback -- Document content +### Searchable Memory (Vector Storage) +Best for semantic search and knowledge retrieval: +- Conversation history you want to search through +- Domain knowledge and learned information +- User feedback and insights +- Document content that needs to be findable { - // Receive a research request + // Basic handoff example + const task = await request.data.json(); - + // Decide we need help from the web search agent if (task.type === 'web_research') { // Hand off to the web search specialist return response.handoff( { name: 'web-search' }, // Target agent { - data: JSON.stringify({ query: task.query }), // Data as string/buffer + data: JSON.stringify({ query: task.query }), // Data as string contentType: 'application/json', metadata: { original_task: task } // Metadata object } ); } - + // Handle other task types... return response.json({ result: 'Task completed' }); };`} /> -### 3. Agent Resolution +## Agent Resolution Agents can find each other using either: - **Name**: Human-readable identifier (e.g., "web-search") @@ -96,7 +107,7 @@ The context object knows about all agents in your project: # List all available agents available_agents = context.agents context.logger.info(f"Available agents: {[a.name for a in available_agents]}") - + # Check if a specific agent exists if any(agent.name == "summarizer" for agent in context.agents): # Agent exists, we can safely hand off @@ -107,7 +118,7 @@ The context object knows about all agents in your project: // List all available agents const availableAgents = context.agents; context.logger.info(\`Available agents: \${availableAgents.map(a => a.name)}\`); - + // Check if a specific agent exists if (availableAgents.some(agent => agent.name === 'summarizer')) { // Agent exists, we can safely hand off @@ -121,196 +132,6 @@ The context object knows about all agents in your project: } };`} /> -## Building a Research Team - -Let's build a simple research system with three specialized agents working together. - -### The Coordinator Agent - -This agent receives research requests and delegates to specialists: - - { - // Research coordinator that delegates to specialist agents - - const researchRequest = await request.data.json(); - const query = researchRequest.query || ''; - - context.logger.info(\`Coordinating research for: \${query}\`); - - // Step 1: Gather information from web search - const searchResult = await response.handoff( - { name: 'web-search' }, - { - data: JSON.stringify({ query, max_results: 3 }), - contentType: 'application/json' - } - ); - - // Note: In reality, the handoff returns immediately. - // For this example, imagine we're showing the flow. - // The web-search agent's response will go back to the user. - - return searchResult; -};`} /> - -### The Web Search Agent - -A specialist that searches the web and returns results: - - { - // Specialist agent for web searches - - const searchParams = await request.data.json(); - const query = searchParams.query; - const maxResults = searchParams.max_results || 5; - - context.logger.info(\`Searching web for: \${query}\`); - - // Simulate web search (in production, use actual search API) - const searchResults = Array.from({ length: maxResults }, (_, i) => ({ - title: \`Result \${i + 1} for \${query}\`, - snippet: \`This is a relevant snippet about \${query}....\`, - url: \`https://example.com/\${i + 1}\` - })); - - // Now hand off to summarizer to create a coherent summary - return response.handoff( - { name: 'summarizer' }, - { - data: JSON.stringify({ - content: searchResults, - original_query: query - }), - contentType: 'application/json', - metadata: { source: 'web-search' } - } - ); -};`} /> - -### The Summarizer Agent - -Takes search results and creates a concise summary: - - { - // Specialist agent for summarizing content - - const summaryRequest = await request.data.json(); - const content = summaryRequest.content || []; - const originalQuery = summaryRequest.original_query || ''; - - context.logger.info(\`Summarizing \${content.length} items\`); - - // Create a summary from the search results - const summaryParts = content.map(item => { - if (typeof item === 'object') { - const title = item.title || ''; - const snippet = item.snippet || ''; - return \`• \${title}: \${snippet}\`; - } - return ''; - }).filter(Boolean); - - const finalSummary = { - query: originalQuery, - summary: \`Here's what I found about '\${originalQuery}':\\n\\n\${summaryParts.join('\\n')}\`, - source_count: content.length, - timestamp: new Date().toISOString() - }; - - // Return the final summary to the user - return response.json(finalSummary); -};`} /> - -## Orchestration Patterns: Industry vs. Agentuity - -### The Industry Challenge - -According to [Microsoft's multi-agent orchestration announcements](https://www.microsoft.com/en-us/microsoft-copilot/blog/copilot-studio/multi-agent-orchestration-maker-controls-and-more-microsoft-copilot-studio-announcements-at-microsoft-build-2025/), most platforms require complex orchestration layers with state management, workflow engines, and coordination services. [AWS's multi-agent design patterns](https://aws.amazon.com/blogs/machine-learning/design-multi-agent-orchestration-with-reasoning-using-amazon-bedrock-and-open-source-frameworks/) show similar complexity, often requiring LangGraph or custom orchestrators. - -### Agentuity's Approach: Simple Handoffs - -While other platforms build complex orchestration layers, Agentuity takes a different approach: **the handoff mechanism**. It's simpler, but with clear trade-offs: - -- **What it does**: Transfers control from one agent to another (like call forwarding) -- **What it doesn't do**: Parallel execution, getting responses back, complex coordination -- **Why it works**: Most real-world agent workflows are actually sequential or conditional - ## Patterns That Work with Agentuity's Handoff ### 1. Sequential Chain Pattern @@ -319,43 +140,43 @@ Perfect for pipelines where each agent completes its work and passes to the next { // Pipeline pattern: Each agent enriches the data - + const data = await request.data.json(); const step = data.step || 1; - + if (step === 1) { // First agent: Clean the data data.cleaned = true; data.step = 2; return response.handoff({ name: 'processor' }, { data: JSON.stringify(data), contentType: 'application/json' }); - + } else if (step === 2) { // Second agent: Enrich the data data.enriched = true; data.step = 3; return response.handoff({ name: 'validator' }, { data: JSON.stringify(data), contentType: 'application/json' }); - + } else if (step === 3) { // Final agent: Validate and return data.validated = true; @@ -369,10 +190,10 @@ Route to different agents based on the request type - ideal for dispatcher/route { // Router pattern: Direct to appropriate specialist - + const task = await request.data.json(); const taskType = (task.type || '').toLowerCase(); - + // Route based on task type const agentMap = { math: 'calculator-agent', @@ -407,20 +228,20 @@ Route to different agents based on the request type - ideal for dispatcher/route search: 'web-search', summary: 'summarizer' }; - + const targetAgent = agentMap[taskType]; - + if (targetAgent) { context.logger.info(\`Routing \${taskType} task to \${targetAgent}\`); return response.handoff( { name: targetAgent }, - { + { data: JSON.stringify(task.payload || {}), contentType: 'application/json' } ); } - + // Unknown task type return response.json({ error: \`Unknown task type: \${taskType}\`, @@ -428,53 +249,68 @@ Route to different agents based on the request type - ideal for dispatcher/route }); };`} /> -### 3. Delegation Pattern (Agentuity-Specific) +### 3. Orchestrator Pattern (Smart Routing) -A main agent handles most work but delegates specialized tasks. Remember: the delegated agent's response goes to the client, not back to the delegator: +A central agent analyzes requests and routes to appropriate specialists: { - // Main agent that delegates when needed - - const task = await request.data.json(); - - // Handle most tasks directly - if (['simple', 'standard'].includes(task.type)) { - const result = processLocally(task); - return response.json(result); - } - - // Delegate complex tasks - if (task.type === 'complex') { - // Note: We won't get the response back - // The specialist will respond directly to client + {"name": "technical-expert"}, + {"query": user_prompt, "type": "technical"} + ) + elif "price" in user_prompt.lower() or "cost" in user_prompt.lower(): + return response.handoff( + {"name": "sales-expert"}, + {"query": user_prompt, "type": "sales"} + ) + elif "help" in user_prompt.lower() or "problem" in user_prompt.lower(): + return response.handoff( + {"name": "support-expert"}, + {"query": user_prompt, "type": "support"} + ) + else: + return response.text("I can help with technical, sales, or support questions. How can I assist you?")`} js={`const handler: AgentHandler = async (request, response, context) => { + // Smart orchestrator that analyzes intent before routing + + const userPrompt = await request.data.text(); + + // Simple intent analysis (in production, use LLM) + if (userPrompt.toLowerCase().includes('technical') || userPrompt.toLowerCase().includes('code')) { + return response.handoff( + { name: 'technical-expert' }, + { + data: JSON.stringify({ query: userPrompt, type: 'technical' }), + contentType: 'application/json' + } + ); + } else if (userPrompt.toLowerCase().includes('price') || userPrompt.toLowerCase().includes('cost')) { + return response.handoff( + { name: 'sales-expert' }, + { + data: JSON.stringify({ query: userPrompt, type: 'sales' }), + contentType: 'application/json' + } + ); + } else if (userPrompt.toLowerCase().includes('help') || userPrompt.toLowerCase().includes('problem')) { return response.handoff( - { name: 'specialist-agent' }, - { - data: JSON.stringify(task), + { name: 'support-expert' }, + { + data: JSON.stringify({ query: userPrompt, type: 'support' }), contentType: 'application/json' } ); + } else { + return response.text('I can help with technical, sales, or support questions. How can I assist you?'); } };`} /> -### 4. Request-Response Pattern +### 4. Understanding Handoff Flow Understanding that handoff is a one-way transfer is crucial. The final agent in the chain responds to the original client: @@ -486,71 +322,24 @@ Client Request → Agent A → (handoff) → Agent B → Response to Client This is different from traditional RPC where Agent A would wait for Agent B's response. -## Agentuity-Specific Best Practices - -### 1. Working with Agent Resolution +## Understanding Handoff Constraints -Agentuity provides built-in agent discovery within your project: - - { - // List all agents in your project - const availableAgents = context.agents; - const agentNames = availableAgents.map(a => a.name); - context.logger.info(\`Available agents: \${agentNames}\`); - - // Check if an agent exists before handoff - const targetName = 'specialist-agent'; - if (availableAgents.some(agent => agent.name === targetName)) { - return response.handoff( - { name: targetName }, - { data: JSON.stringify(data), contentType: 'application/json' } - ); - } else { - return response.json({ - error: \`Agent '\${targetName}' not found\`, - available: agentNames - }); - } - - // You can also use agent IDs for more precise targeting - // return response.handoff({ id: 'agent_abc123' }, args); -};`} /> - -### 2. Understanding Handoff Limitations - -Agentuity's handoff has built-in safety features and constraints: +Agentuity's handoff has built-in safety features and important constraints to understand: -### 3. Leveraging Agentuity's Built-in Features +## Agentuity's Built-in Features Agentuity provides powerful built-in features for multi-agent systems: '.join(agent_chain)}") - + return response.handoff( {"name": "next-agent"}, processed_data, @@ -615,22 +407,22 @@ Agentuity provides powerful built-in features for multi-agent systems: )`} js={`const handler: AgentHandler = async (request, response, context) => { // Use context.sessionId for automatic request tracking context.logger.info(\`Processing request \${context.sessionId} in \${context.agent.name}\`); - + // Built-in OpenTelemetry tracing (automatic spans) // Each handoff creates a new span in the trace - + // Use agent metadata for debugging context.logger.debug(\`Agent ID: \${context.agent.id}\`); context.logger.debug(\`Project ID: \${context.projectId}\`); - + // Track the chain of agents const metadata = request.metadata || {}; const agentChain = metadata.agent_chain || []; agentChain.push(context.agent.name); - + // Log the full chain for debugging context.logger.info(\`Agent chain: \${agentChain.join(' -> ')}\`); - + return response.handoff( { name: 'next-agent' }, { @@ -641,201 +433,294 @@ Agentuity provides powerful built-in features for multi-agent systems: ); };`} /> -## Lab: Build a Document Processing Pipeline - -Create a three-agent system that processes documents: - -1. **intake-agent**: Receives documents and validates format -2. **analyzer-agent**: Extracts key information -3. **storage-agent**: Stores processed results - - { + // Research orchestrator that routes to specialist agents + + const researchRequest = await request.data.json(); + const query = researchRequest.query || ''; + const researchType = researchRequest.type || 'general'; + + context.logger.info(\`Orchestrating research for: \${query}\`); + + // Route based on research type + if (researchType === 'web' || query.toLowerCase().includes('search')) { + // Route to web search specialist return response.handoff( - {"name": "analyzer-agent"}, - document # Pass document as data - ) - -# Analyzer Agent -async def run(request, response, context): - """Extract key information from documents.""" - - document = await request.data.json() - content = document.get("content", "") - - # Simple analysis (in production, use NLP/LLM) - analysis = { - "word_count": len(content.split()), - "char_count": len(content), - "has_numbers": any(c.isdigit() for c in content), - "document_type": document.get("type"), - "analyzed_at": datetime.now().isoformat() + { name: 'web-search' }, + { + data: JSON.stringify({ query, max_results: 5 }), + contentType: 'application/json', + metadata: { orchestrator: 'research-orchestrator', original_type: researchType } + } + ); + } else if (researchType === 'summary' || query.toLowerCase().includes('summarize')) { + // Route directly to summarizer if we have content + const content = researchRequest.content || ''; + if (content) { + return response.handoff( + { name: 'summarizer' }, + { + data: JSON.stringify({ content, query }), + contentType: 'application/json', + metadata: { orchestrator: 'research-orchestrator' } + } + ); } - - # Combine with original document - document["analysis"] = analysis - - # Pass to storage - return response.handoff( - {"name": "storage-agent"}, - document # Pass document as data - ) - -# Storage Agent -async def run(request, response, context): - """Store processed documents.""" - - document = await request.data.json() - - # Generate unique ID - doc_id = f"doc_{context.sessionId}" - - # Store in KV - await context.kv.set( - "processed_documents", - doc_id, - document - ) - - # Return confirmation - return response.json({ - "status": "success", - "document_id": doc_id, - "processed_by": [ - document.get("intake_agent"), - "analyzer-agent", - "storage-agent" - ], - "stored_at": datetime.now().isoformat() - })`} js={`// Intake Agent -const intakeHandler: AgentHandler = async (request, response, context) => { - // Validate and route incoming documents - - const document = await request.data.json(); - - // Validate document format - if (!document.content) { - return response.json({ error: 'Missing document content' }); } - - if (!['pdf', 'text', 'html'].includes(document.type)) { - return response.json({ error: 'Unsupported document type' }); - } - - // Add processing metadata - document.received_at = new Date().toISOString(); - document.intake_agent = context.agent.name; - - // Pass to analyzer - return response.handoff( - { name: 'analyzer-agent' }, - { - data: JSON.stringify(document), - contentType: 'application/json' - } - ); -}; - -// Analyzer Agent -const analyzerHandler: AgentHandler = async (request, response, context) => { - // Extract key information from documents - - const document = await request.data.json(); - const content = document.content || ''; - - // Simple analysis (in production, use NLP/LLM) - const analysis = { - word_count: content.split(' ').length, - char_count: content.length, - has_numbers: /\d/.test(content), - document_type: document.type, - analyzed_at: new Date().toISOString() - }; - - // Combine with original document - document.analysis = analysis; - - // Pass to storage + + // Default: explain what we can do + return response.json({ + message: 'I can help with web research or summarization.', + examples: [ + { type: 'web', query: 'latest AI developments' }, + { type: 'summary', content: 'text to summarize', query: 'key points' } + ] + }); +};`} /> + +### The Web Search Agent + +A specialist that searches the web and automatically forwards to summarizer: + + { + // Specialist agent for web searches + + const searchParams = await request.data.json(); + const query = searchParams.query; + const maxResults = searchParams.max_results || 5; + + context.logger.info(\`Searching web for: \${query}\`); + + // Simulate web search (in production, use actual search API) + const searchResults = Array.from({ length: maxResults }, (_, i) => ({ + title: \`Result \${i + 1} for \${query}\`, + snippet: \`This is a relevant snippet about \${query} with detailed information...\`, + url: \`https://example.com/article-\${i + 1}\`, + relevance_score: 0.9 - (i * 0.1) + })); + + // Automatically hand off to summarizer for final processing return response.handoff( - { name: 'storage-agent' }, - { - data: JSON.stringify(document), - contentType: 'application/json' + { name: 'summarizer' }, + { + data: JSON.stringify({ + content: searchResults, + original_query: query, + source: 'web_search' + }), + contentType: 'application/json', + metadata: { previous_agent: 'web-search', search_count: searchResults.length } } ); -}; - -// Storage Agent -const storageHandler: AgentHandler = async (request, response, context) => { - // Store processed documents - - const document = await request.data.json(); - - // Generate unique ID - const docId = \`doc_\${context.sessionId}\`; - - // Store in KV - await context.kv.set( - 'processed_documents', - docId, - JSON.stringify(document) - ); - - // Return confirmation - return response.json({ - status: 'success', - document_id: docId, - processed_by: [ - document.intake_agent, - 'analyzer-agent', - 'storage-agent' - ], - stored_at: new Date().toISOString() - }); };`} /> -## Testing Your Multi-Agent System +### The Summarizer Agent + +Takes search results and creates a final research summary: -Test the document processing pipeline: + { + // Specialist agent for summarizing research content + + const summaryRequest = await request.data.json(); + const content = summaryRequest.content || []; + const originalQuery = summaryRequest.original_query || ''; + const source = summaryRequest.source || 'unknown'; + + context.logger.info(\`Summarizing \${Array.isArray(content) ? content.length : 1} items for query: \${originalQuery}\`); + + // Process different content types + let finalSummary; + + if (source === 'web_search' && Array.isArray(content)) { + // Handle web search results + const summaryParts = content.slice(0, 3).map((item, i) => ({ + rank: i + 1, + title: item.title || '', + summary: item.snippet || '', + source_url: item.url || '', + relevance: item.relevance_score || 0 + })); + + finalSummary = { + query: originalQuery, + summary: \`Research Summary for '\${originalQuery}'\`, + key_findings: summaryParts, + total_sources: content.length, + research_completed_at: new Date().toISOString(), + agent_chain: ['orchestrator', 'web-search', 'summarizer'] + }; + } else { + // Handle direct text content + finalSummary = { + query: originalQuery, + summary: \`Summary of content for '\${originalQuery}'\`, + content_length: String(content).length, + processed_at: new Date().toISOString(), + agent_chain: ['orchestrator', 'summarizer'] + }; + } + + // Return the final research summary to the user + return response.json(finalSummary); +};`} /> + +## Testing Your Research Orchestrator + +Test the research orchestrator system: ```bash -# Test with a sample document -curl -X POST https://your-project.agentuity.com/intake-agent \ +# Test web research routing +curl -X POST https://your-project.agentuity.com/agents/orchestrator \ + -H "Content-Type: application/json" \ + -d '{ + "type": "web", + "query": "latest developments in AI agents" + }' + +# Test direct summarization routing +curl -X POST https://your-project.agentuity.com/agents/orchestrator \ -H "Content-Type: application/json" \ -d '{ - "type": "text", - "content": "This is a test document with some numbers 12345." + "type": "summary", + "query": "key points", + "content": "Long text content to be summarized..." }' ``` -Expected flow: -1. `intake-agent` validates and adds metadata -2. `analyzer-agent` extracts information -3. `storage-agent` saves and returns confirmation +Expected flow for web research: +1. `orchestrator` analyzes request and routes to web-search +2. `web-search` finds results and forwards to summarizer +3. `summarizer` creates final research summary and responds to client ## Key Takeaways - **Multi-agent systems** enable specialization, scalability, and modularity -- **Handoff mechanism** allows agents to delegate tasks to other agents -- **Orchestration patterns** include sequential, parallel, and conditional routing -- **Best practices** include loop prevention, error handling, and request tracing -- **Agentuity makes it simple** with built-in agent resolution and communication +- **Handoff mechanism** allows agents to delegate tasks in a simple, sequential manner +- **Orchestration patterns** include sequential chains, conditional routing, and smart orchestration +- **Constraints matter**: Handoffs are one-way, agents can't call themselves, and responses go directly to clients +- **Agentuity simplifies** multi-agent coordination with built-in agent resolution and communication ## What's Next? @@ -846,4 +731,4 @@ Questions to consider: - How do you track what decisions agents are making? - How do you ensure compliance and audit requirements? -Continue to [Module 5: Guardrails & Observability →](./05-guardrails-observability) \ No newline at end of file +Continue to [Module 5: Guardrails & Observability →](./05-guardrails-evals-observability) \ No newline at end of file diff --git a/content/Training/developers/05-guardrails-evals-observability.mdx b/content/Training/developers/05-guardrails-evals-observability.mdx index 78e55b69..56654feb 100644 --- a/content/Training/developers/05-guardrails-evals-observability.mdx +++ b/content/Training/developers/05-guardrails-evals-observability.mdx @@ -7,9 +7,9 @@ You've built agents that can think, remember, and collaborate. Now it's time to ## The Reality of Production Agents -Research from NeurIPS 2024 reveals that most AI agents succeed only [30-40% of the time on complex tasks](https://neurips.cc/virtual/2024/panel/agent-systems). When [Salesforce automated 50% of their support cases](https://www.sfchronicle.com/tech/article/salesforce-ai-job-cuts-benioff-21025920.php), the implementation challenges led to significant organizational restructuring. The gap between demo and production? Proper guardrails, systematic evaluation, and comprehensive observability. +When [Salesforce deployed their Agentforce AI agents](https://www.hr-brew.com/stories/2025/03/04/salesforce-ai-agents-reskilling), they discovered that success required more than just technology - it demanded a comprehensive reskilling strategy for their 72,000+ employees. As their EVP of talent growth noted: "This rise of digital labor powered by AI agents is truly reshaping the way our businesses operate." The gap between demo and production? Proper guardrails, systematic evaluation, and comprehensive observability. -According to [NIST's Generative AI Risk Management framework](https://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.1270e2025.pdf), the primary operational risks in AI systems include: +According to [NIST's AI Risk Management framework](https://www.nist.gov/itl/ai-risk-management-framework), the primary operational risks in AI systems include: - **Hallucination**: Agents generating plausible but incorrect information - **Prompt Injection**: Adversarial inputs manipulating agent behavior - **Resource Consumption**: Uncontrolled usage leading to excessive costs @@ -34,7 +34,7 @@ Guardrails prevent agents from harmful actions while preserving autonomy: Systematic evaluation is critical for non-deterministic agents: -- **Industry Benchmarks**: [SWE-Bench](https://www.swebench.com/) (~20% success), [BrowserGym](https://browsergym.ai), [AgentHarm](https://arxiv.org/abs/2407.01564) +- **Industry Benchmarks**: [SWE-Bench](https://www.swebench.com/) (success rate on software engineering tasks) - **Automated Testing**: Unit tests, integration tests, golden datasets - **Production Metrics**: Success rates, latency, cost per request - **A/B Testing**: Shadow deployments, gradual rollouts @@ -297,6 +297,114 @@ const handler = async (request, response, context) => { } };`} /> +### Orchestrators as Natural Guardrails + +Remember the orchestrator pattern from Module 4? Orchestrators are your first line of defense: + + 1000: + return response.json({"error": "Request too long"}) + + # 2. Analyze intent with validation + intent = await analyze_intent(user_prompt, schema=ValidatedIntent) + + # 3. Apply routing rules + if intent.sensitive: + # Route sensitive requests to specialized agent + return response.handoff( + {"name": "compliance-agent"}, + {"query": user_prompt, "flagged": True} + ) + + if intent.confidence < 0.7: + # Low confidence = fallback to human + return response.json({ + "message": "I'll connect you with a specialist", + "transfer": "human" + }) + + # 4. Route to appropriate agent + agent_map = { + "technical": "tech-expert", + "sales": "sales-agent", + "support": "support-agent" + } + + return response.handoff( + {"name": agent_map[intent.category]}, + {"query": user_prompt} + )`} js={`import { z } from 'zod'; + +const ValidatedIntentSchema = z.object({ + category: z.enum(['technical', 'sales', 'support']), + confidence: z.number(), + sensitive: z.boolean().default(false) +}); + +const orchestratorWithGuardrails = async (request, response, context) => { + const userPrompt = await request.data.text(); + + // 1. Input validation + if (userPrompt.length > 1000) { + return response.json({ error: 'Request too long' }); + } + + // 2. Analyze intent with validation + const { object: intent } = await analyzeIntent(userPrompt, { + schema: ValidatedIntentSchema + }); + + // 3. Apply routing rules + if (intent.sensitive) { + // Route sensitive requests to specialized agent + return response.handoff( + { name: 'compliance-agent' }, + { + data: JSON.stringify({ query: userPrompt, flagged: true }), + contentType: 'application/json' + } + ); + } + + if (intent.confidence < 0.7) { + // Low confidence = fallback to human + return response.json({ + message: "I'll connect you with a specialist", + transfer: 'human' + }); + } + + // 4. Route to appropriate agent + const agentMap = { + technical: 'tech-expert', + sales: 'sales-agent', + support: 'support-agent' + }; + + return response.handoff( + { name: agentMap[intent.category] }, + { data: userPrompt, contentType: 'text/plain' } + ); +};`} /> + +**Why Orchestrators Make Great Guardrails:** +1. **Single validation point** - Check all inputs before routing +2. **Access control** - Decide who can access which agents +3. **Fallback handling** - Gracefully handle edge cases +4. **Audit trail** - Log all routing decisions +5. **Rate limiting** - Apply per-user limits before delegation + See the complete [Conference Concierge implementation](https://github.com/agentuity/agent-AIEWF2025-concierge-template) for a production example of multi-agent routing with schema validation. diff --git a/content/Training/developers/06-deployment-environments.mdx b/content/Training/developers/06-deployment-environments.mdx index cdd023ef..c9e53f4d 100644 --- a/content/Training/developers/06-deployment-environments.mdx +++ b/content/Training/developers/06-deployment-environments.mdx @@ -7,9 +7,9 @@ You've built agents with memory, collaboration, and production-grade guardrails. ## The Reality of Enterprise Agent Deployment -Research shows that [most organizations aren't "agent-ready"](https://www.ibm.com/think/insights/ai-agents-2025-expectations-vs-reality) - they lack the infrastructure, processes, and understanding needed for production agents. When [Salesforce automated 50% of their support cases](https://www.sfchronicle.com/tech/article/salesforce-ai-job-cuts-benioff-21025920.php), it required massive organizational change alongside the technology. +Research shows that [most organizations aren't "agent-ready"](https://www.ibm.com/think/insights/ai-agents-2025-expectations-vs-reality) - they lack the infrastructure, processes, and understanding needed for production agents. When [Salesforce deployed their Agentforce AI agents](https://www.hr-brew.com/stories/2025/03/04/salesforce-ai-agents-reskilling), it required massive organizational change alongside the technology. -The challenge isn't just technical - it's operational: +The challenge involves both technical and operational considerations: - How do you test non-deterministic agents? - How do you safely roll out changes that affect customer interactions? - How do you monitor agents that make autonomous decisions? @@ -25,9 +25,8 @@ Agentuity provides three distinct environments, each with specific purposes: Your local machine running DevMode: - Instant feedback with hot reload - Full debugging capabilities -- Mock external services -- Unlimited iterations -- Direct file system access +- Observability provided by Traceloop (OpenTelemetry integration) +- Access to Agentuity API features (memory, etc) ```bash # Start local development @@ -45,11 +44,9 @@ agentuity dev Cloud deployment with test data: -- Production infrastructure -- Synthetic test data - Full observability -- Performance testing -- Integration testing +- Options for different deployment configurations +- Ability to use different keys and environment variables ```bash # Deploy to staging (coming soon) @@ -60,10 +57,7 @@ Cloud deployment with test data: Full-scale deployment: - Auto-scaling infrastructure -- Real user traffic -- Complete monitoring -- High availability -- Disaster recovery +- Complete monitoring, with the exact same experience as local development ```bash # Deploy to production @@ -74,26 +68,26 @@ agentuity deploy ### Moving Through Environments -Each environment serves as a gate to the next: +The typical flow moves from local development to production with clear validation at each stage: -| Stage | Purpose | Key Validation | Duration | -|-------|---------|----------------|----------| -| **Local Dev** | Build & iterate | Basic functionality | Hours to days | -| **Staging** | Validate at scale | Performance, integration | Days to weeks | -| **Production** | Serve users | Monitoring, optimization | Ongoing | +| Stage | Purpose | Key Focus | Typical Timeline | +|-------|---------|-----------|------------------| +| **Local Dev** | Build & iterate rapidly | Feature development | Hours to days | +| **Staging** | Validate production readiness | Performance & integration testing | Days to weeks | +| **Production** | Serve real users | Monitoring & optimization | Ongoing | -### Environment Testing Matrix +### Environment Validation Focus -Each environment requires different testing focus: +Each environment emphasizes different aspects of your agent's readiness: -| Testing Aspect | Development | Staging | Production | -|----------------|------------|---------|------------| -| **Core Functionality** | Basic input/output validation | Load testing with concurrent users | Success rate metrics | -| **Memory & State** | Memory operations work correctly | Memory cleanup under load | Response time percentiles | -| **Error Handling** | Validation catches bad inputs | Rate limiting works correctly | Error rate trends | -| **Observability** | Logs provide debugging info | Meaningful telemetry spans | Cost per request tracking | -| **Integration** | Error messages are helpful | External service integration | User satisfaction signals | -| **Resilience** | Basic error recovery | Failover and recovery testing | Incident response time | +| Focus Area | Development | Staging | Production | +|------------|------------|---------|------------| +| **Functionality** | Core features work | Load & stress testing | Success metrics | +| **Memory & State** | Memory operations function | Memory performance under load | Response time tracking | +| **Error Handling** | Validation works | Rate limiting effective | Error rate monitoring | +| **Observability** | Debug information available | Telemetry accuracy | Performance insights | +| **Integration** | Error messages helpful | External service testing | User experience metrics | +| **Resilience** | Basic error recovery | Failover mechanisms | Incident response | ## Enterprise Deployment Options @@ -102,10 +96,10 @@ Each environment requires different testing focus: ### Private Cloud Deployments -Deploy Agentuity infrastructure in your own cloud account. More information coming soon. +Deploy Agentuity infrastructure in your own cloud account. More information coming soon! ### On-Premise Installations -Run Agentuity entirely within your data center. More information coming soon. +Run Agentuity entirely within your data center. More information coming soon! ## Configuration Management @@ -113,61 +107,16 @@ Run Agentuity entirely within your data center. More information coming soon. Different environments need different configurations: - +{/* Environment-specific settings are typically configured through the Agentuity web console. +Screenshots showing the web UI for environment configuration will be added here. + +The web console allows you to set: +- Environment variables per deployment +- Rate limiting and caching settings +- Feature flags and deployment options +- Observability and monitoring configuration */} + +*Environment-specific settings are managed through the Agentuity web console. Screenshots of the configuration interface will be provided here.* ## Real-World Deployment Lessons diff --git a/content/Training/developers/07-sandbox-capstone.mdx b/content/Training/developers/07-sandbox-capstone.mdx index 619f0747..fa28933a 100644 --- a/content/Training/developers/07-sandbox-capstone.mdx +++ b/content/Training/developers/07-sandbox-capstone.mdx @@ -5,6 +5,20 @@ description: Bringing it all together in a complete production system Time to combine everything you've learned into a complete, production-ready agent system. +## Building Your Capstone + +As you work on your capstone project: + +1. **Review previous modules** - Each contains pieces you'll need +2. **Start simple** - Get basic flow working first +3. **Iterate** - Add features incrementally +4. **Test thoroughly** - Each agent independently, then together +5. **Monitor everything** - You can't fix what you can't see + + +Pro Tip: Build your capstone iteratively. Start with two agents (triage + one specialist), get that working perfectly, then add more complexity. + + ## Coming Soon: Agentic Sandbox @@ -321,47 +335,56 @@ agentuity logs --follow Not interested in customer support? Choose an alternative: -### Option 2: Research Assistant Team +### Option 2: Deep Research System -Build a multi-agent research system: -- **Research Agent**: Finds relevant papers and articles -- **Summarizer Agent**: Creates concise summaries -- **Fact Checker**: Validates claims against sources -- **Report Generator**: Produces final documents +Build a comprehensive research and analysis system: +- **Research Coordinator**: Routes queries to appropriate researchers +- **Web Research Agent**: Searches and gathers online sources +- **Academic Research Agent**: Finds scholarly articles and papers +- **Analysis Agent**: Synthesizes information across sources +- **Report Generator**: Creates structured research reports +- **Fact Verification Agent**: Cross-checks claims and citations Key challenges: -- Citation tracking -- Source credibility scoring -- Handling conflicting information -- Long-form content generation +- Multi-source information synthesis +- Citation accuracy and tracking +- Handling conflicting or outdated information +- Maintaining research quality standards +- Long-form document generation -### Option 3: DevOps Assistant +### Option 3: Financial Analysis System -Build an operations helper: -- **Monitor Agent**: Watches system metrics -- **Diagnostics Agent**: Analyzes problems -- **Remediation Agent**: Suggests or implements fixes -- **Reporting Agent**: Creates incident reports +Build a financial research and advisory system: +- **Market Research Agent**: Gathers financial data and news +- **Risk Assessment Agent**: Analyzes investment risks +- **Portfolio Advisor**: Provides investment recommendations +- **Compliance Agent**: Ensures regulatory adherence +- **Report Generator**: Creates financial summaries Key challenges: -- Real-time data processing -- Safe remediation actions -- Alert fatigue prevention -- Root cause analysis +- Real-time financial data processing +- Risk calculation and modeling +- Regulatory compliance requirements +- Investment strategy optimization +- Market sentiment analysis + +### Option 4: Content Creation Pipeline + +Build a multi-agent content production system: +- **Content Planner**: Develops content strategies and schedules +- **Research Agent**: Gathers information and sources +- **Writer Agent**: Creates draft content +- **Editor Agent**: Reviews and improves content quality +- **SEO Optimizer**: Optimizes for search and engagement +- **Publisher Agent**: Handles final formatting and distribution -## Getting Help - -As you build your capstone: - -1. **Review previous modules** - Each contains pieces you'll need -2. **Start simple** - Get basic flow working first -3. **Iterate** - Add features incrementally -4. **Test thoroughly** - Each agent independently, then together -5. **Monitor everything** - You can't fix what you can't see +Key challenges: +- Content quality consistency +- Brand voice maintenance +- Multi-format content adaptation +- Publishing workflow coordination +- Performance analytics integration - -Pro Tip: Build your capstone iteratively. Start with two agents (triage + one specialist), get that working perfectly, then add more complexity. - ## Congratulations! @@ -386,4 +409,4 @@ Welcome to the future of agent development! 🚀 --- -**Need help?** Join our [Discord community](https://discord.gg/agentuity) or check the [documentation](https://docs.agentuity.com) \ No newline at end of file +**Ready to dive deeper?** Check the [documentation](https://docs.agentuity.com) for advanced patterns and deployment guides. \ No newline at end of file From e209008366931805219bcd679dd08bad62ea8762 Mon Sep 17 00:00:00 2001 From: parteeksingh24 Date: Wed, 24 Sep 2025 13:13:41 -0700 Subject: [PATCH 04/13] Update code examples & content --- .../developers/01-introduction-to-agents.mdx | 261 ++-- .../developers/02-anatomy-of-an-agent.mdx | 625 +++++---- .../Training/developers/03-agent-memory.mdx | 570 ++++---- .../developers/04-agent-collaboration.mdx | 1149 ++++++++++------- ... => 05-observability-guardrails-evals.mdx} | 1085 ++++++++-------- .../developers/06-deployment-environments.mdx | 184 +-- .../developers/07-sandbox-capstone.mdx | 511 +++----- content/Training/developers/index.mdx | 24 +- content/Training/index.mdx | 4 +- 9 files changed, 2188 insertions(+), 2225 deletions(-) rename content/Training/developers/{05-guardrails-evals-observability.mdx => 05-observability-guardrails-evals.mdx} (57%) diff --git a/content/Training/developers/01-introduction-to-agents.mdx b/content/Training/developers/01-introduction-to-agents.mdx index 43118e66..c4c39984 100644 --- a/content/Training/developers/01-introduction-to-agents.mdx +++ b/content/Training/developers/01-introduction-to-agents.mdx @@ -20,6 +20,11 @@ Traditional cloud platforms (AWS Lambda, Google Cloud Functions, Azure Functions - **Stateless execution** (agents need persistent memory) - **Edge distribution** (agents need GPU proximity) +This makes them poorly suited for: +- **Long-running reasoning** processes that require extended compute time +- **Persistent state management** across multiple interactions +- **Complex agent workflows** that span multiple reasoning cycles + ## What Exactly Is an AI Agent? @@ -141,10 +146,7 @@ Traditional cloud platforms face fundamental limitations when running agents: | Pay per request | Continuous operation | Costs explode unexpectedly | | Human-centric monitoring | Agent observability | Can't debug agent decisions | -Major cloud providers are pushing to adapt to the needs of agents: -- **AWS** launched Bedrock AgentCore (still in preview) -- **Google** released their Agent Development Kit (ADK) -- **Microsoft** is retrofitting Azure with agent capabilities +Major cloud providers like AWS, Google, and Microsoft are working to adapt their platforms for agents. But retrofitting existing infrastructure is like turning a highway into an airport - technically possible, but not optimal. @@ -154,9 +156,11 @@ But retrofitting existing infrastructure is like turning a highway into an airpo Learn more about the Agent-Native Cloud paradigm in our [Agent-Native Cloud](/Guides/agent-native-cloud) guide. -While others retrofit, Agentuity was built from day one specifically for agents. Our agent-native platform puts AI agents at the center of everything we do. +While others retrofit, Agentuity was built from day one specifically for agents. Agentuity's agent-native platform puts AI agents at the center of everything. + +### The Agentuity Difference: Purpose-Built for Agents -### Purpose-Built for Agents +Instead of asking "How do we make Lambda work for agents?", we asked "What would infrastructure look like if agents were the only thing that mattered?" Agentuity provides what agents actually need: @@ -165,17 +169,9 @@ Agentuity provides what agents actually need: - **Agent-to-agent communication**: Seamless and secure [channels between agents](/Guides/agent-communication) - **Native observability**: Track agent decisions with [built-in tracing](/Guides/agent-tracing) - **Automatic scaling**: Based on agent workload, not request count -- **Framework agnostic**: Run LangChain, CrewAI, or custom agents side by side +- **Framework agnostic**: Run LangChain, CrewAI, or custom agents side by side - and easily integrate with popular tools like the Vercel AI SDK for streamlined AI development -### The Agentuity Difference - -Instead of asking "How do we make Lambda work for agents?", we asked "What would infrastructure look like if agents were the only thing that mattered?" - -The result is a platform where: -- Agents are first-class citizens, not adapted workloads -- Memory and state are built-in, not bolted on -- Long-running is the default, not an exception -- Agent communication is native, not a hack +The result is a platform where agents are first-class citizens, not adapted workloads; memory and state are built-in, not bolted on; long-running is the default, not an exception; and agent communication is native, not a hack. ## Your First Agent: Hello, Autonomous World @@ -195,9 +191,24 @@ Before we start, make sure you have: ### Quick Setup ```bash -# Install the Agentuity CLI (see full installation guide for other OS) +# Install the Agentuity CLI + +# NPM (if you have Node.js) +npm install -g @agentuity/cli + +# Bun (if you have Bun) +bun install -g @agentuity/cli + +# macOS (Homebrew) brew tap agentuity/tap && brew install agentuity +# Universal installer (Windows/Linux/macOS) +curl -fsS https://agentuity.sh | sh +# Note: Verify the checksum from https://agentuity.com/install for security + +# Direct download from releases +# Visit https://github.com/agentuity/cli/releases for your platform + # Verify installation agentuity --version @@ -211,39 +222,41 @@ Let's create a simple agent that demonstrates the key concepts we've learned: ```bash # Create a new agent project -agentuity create hello-agent --template python-starter -# or for TypeScript: -# agentuity create hello-agent --template typescript-starter +agentuity create hello-agent --template typescript-starter +# or for Python: +# agentuity create hello-agent --template python-starter # Navigate to the project cd hello-agent -# Install dependencies (Python) -pip install -r requirements.txt -# or for TypeScript: -# npm install +# Install dependencies (TypeScript) +npm install +# or for Python: +# pip install -r requirements.txt ``` ### Understanding the Agent Structure Here's a simple "Hello Agent" that demonstrates the core concepts: - { + return response.json({"message": f"Hello, {name}!"})`} js={`import type { AgentRequest, AgentResponse, AgentContext } from "@agentuity/sdk"; + +export default async function Agent(request: AgentRequest, response: AgentResponse, context: AgentContext) { // Get the name from request data const data = await request.data.json(); const name = data.name || 'World'; // Return a simple greeting return response.json({ message: \`Hello, \${name}!\` }); -}; - -export default handler;`} /> +}`} /> This simple agent demonstrates: - **Request handling**: Accepts JSON input with a `name` field @@ -256,8 +269,9 @@ This simple agent demonstrates: Let's enhance our agent with logging and a simple state counter: { + })`} js={`import type { AgentRequest, AgentResponse, AgentContext } from "@agentuity/sdk"; + +export default async function Agent(request: AgentRequest, response: AgentResponse, context: AgentContext) { // Log the incoming request (built-in observability) context.logger.info('Hello agent received a request'); @@ -312,12 +328,10 @@ async def run(request, response, context): greeting_number: count, timestamp: new Date().toISOString() }); -}; - -export default handler;`} /> +}`} /> This enhanced agent demonstrates: -- **Observability**: Built-in logging with context.logger +- **Comprehensive Observability**: Built-in logging with context.logger plus automatic OpenTelemetry tracing that captures all operations (storage calls, external requests) as structured spans without any additional code - **State Management**: Simple counter using key-value storage - **Agent Memory**: One storage type (KV) without complexity - **Request/Response**: JSON input and structured output @@ -351,7 +365,9 @@ Once DevMode is running: Add a `welcome()` function to your agent to create clickable test scenarios in DevMode: - { +async def run(request: AgentRequest, response: AgentResponse, context: AgentContext): + # Your existing agent code...`} js={`import type { AgentRequest, AgentResponse, AgentContext } from "@agentuity/sdk"; + +export const welcome = () => { return { welcome: "Welcome to my Hello Agent! Try these examples:", prompts: [ @@ -392,7 +410,7 @@ async def run(request, response, context): }; }; -const handler: AgentHandler = async (request, response, context) => { +export default async function Agent(request: AgentRequest, response: AgentResponse, context: AgentContext) { // Your existing agent code...`} /> These prompts appear as clickable buttons in the DevMode interface. Try calling the agent multiple times with the same name - it will remember you, and you can see the memory operations in the real-time logs! @@ -424,101 +442,98 @@ Now that you have a working agent, let's enhance it with more capabilities. Use ### Challenge 1: Add Error Handling Enhance your agent with proper error handling and validation: - { - try { - const data = await request.data.json(); - const name = data.name; - - if (!name) { - context.logger.warn('No name provided in request'); - return response.json({ error: 'Name is required' }); - } - - context.logger.info(\`Processing request for \${name}\`); - - // Your agent logic here - return response.json({ message: \`Hello, \${name}!\` }); - - } catch (error) { - context.logger.error(\`Error processing request: \${error}\`); - return response.json({ error: 'Internal server error' }); - } -};`} /> - -### Challenge 2: Add Time-Based Context -Enhance your agent to greet differently based on the time of day: - - { + # Your agent logic here...`} js={`// Key pattern: Defensive error handling +try { const data = await request.data.json(); - const name = data.name || 'World'; + const name = data.name; - // Time-based greeting - const hour = new Date().getHours(); - let timeGreeting; - if (hour < 12) { - timeGreeting = 'Good morning'; - } else if (hour < 17) { - timeGreeting = 'Good afternoon'; - } else { - timeGreeting = 'Good evening'; + if (!name) { + context.logger.warn('No name provided in request'); + return response.json({ error: 'Name is required' }); } - // Include your existing counter logic here - return response.json({ - message: \`\${timeGreeting}, \${name}!\`, - local_time: new Date().toISOString() - }); -};`} /> + // Your agent logic here...`} /> -### Challenge 3: Combine Features -Combine error handling, time-based greetings, and your counter into one robust agent. +### Challenge 2: Handle Multiple Input Types +Enhance your agent to handle both JSON and text inputs gracefully: -### Testing Your Enhancements + -After implementing each challenge: -1. Update your `welcome()` function with relevant test scenarios -2. Run `agentuity dev` to start DevMode -3. Use the DevMode interface to test your changes -4. Monitor the logs to verify your agent behaves correctly -5. Check the Sessions tab to track performance and costs +### Challenge 3: Complete Production Implementation + +Now let's combine all features (and add some simple storage provided by Agentuity) into a fully-functional agent. + +This complete implementation demonstrates: +- **Error handling** with try/catch +- **Multiple content types** (JSON and text input support) +- **Statistics tracking** (personal greeting counts) +- **Memory management** (stores only last 5 names) +- **TTL storage** (24-hour expiration on stats) +- **DevMode integration** with clickable test scenarios via `welcome()` +- **Logging** for debugging and monitoring + +### Build This Agent Yourself + +Ready to implement this agent? Follow our complete examples: + + + +## Testing Your Complete Agent + +After implementing this complete version: +1. Run `agentuity dev` to start DevMode +2. Use the clickable prompts from the `welcome()` function +3. Test with both JSON and text inputs +4. Call the agent multiple times with the same name to see statistics +5. Monitor the logs to see detailed operation tracking +6. Check the key-value storage to see the personal greeting counts +7. Check the Sessions tab for performance metrics -For more examples, check our [Templates](/Introduction/templates) and [Examples](/Examples) sections. +This complete implementation shows production patterns you'll use throughout the training modules. Notice how we handle errors gracefully, store data efficiently, and provide rich observability. ## Key Takeaways @@ -544,4 +559,4 @@ Remember: you're not just learning a new framework, you're learning a fundamenta --- -**Ready for Module 2?** [The Anatomy of an Agent](./02-anatomy-of-an-agent) \ No newline at end of file +**Ready for Module 2?** [The Anatomy of an Agent](./02-anatomy-of-an-agent) diff --git a/content/Training/developers/02-anatomy-of-an-agent.mdx b/content/Training/developers/02-anatomy-of-an-agent.mdx index d5aa0a49..49d35b3c 100644 --- a/content/Training/developers/02-anatomy-of-an-agent.mdx +++ b/content/Training/developers/02-anatomy-of-an-agent.mdx @@ -37,187 +37,202 @@ Agents spring into action when triggered. Agentuity supports multiple trigger ty Each trigger provides different metadata and context: - { + })`} js={`import type { AgentRequest, AgentResponse, AgentContext } from "@agentuity/sdk"; + +export default async function Agent(request: AgentRequest, response: AgentResponse, context: AgentContext) { // Identify the trigger type const trigger = request.trigger; - + if (trigger === 'webhook') { // Handle HTTP request const data = await request.data.json(); context.logger.info(\`Webhook triggered with data: \${JSON.stringify(data)}\`); - + } else if (trigger === 'cron') { // Handle scheduled execution context.logger.info(\`Cron job running at \${new Date().toISOString()}\`); // No input data for cron triggers - + } else if (trigger === 'agent') { // Handle agent-to-agent call const callingAgent = request.metadata.get('source_agent'); context.logger.info(\`Called by agent: \${callingAgent}\`); } - + return response.json({ trigger_type: trigger, processed_at: new Date().toISOString() }); -};`} /> +}`} /> ### 2. Request and Response Handling Every agent receives a request and must return a response. The request contains the trigger data and metadata, while the response can be in various formats: -Hello") - + # Binary response (for files) # return response.binary(file_bytes) - + # Redirect to another agent - # return response.handoff({"name": "other-agent"}, data, metadata={})`} js={`const handler: AgentHandler = async (request, response, context) => { + # return response.handoff({"name": "other-agent"}, data, metadata={})`} js={`import type { AgentRequest, AgentResponse, AgentContext } from "@agentuity/sdk"; + +export default async function Agent(request: AgentRequest, response: AgentResponse, context: AgentContext) { // Access request data in different formats - + // JSON data (most common) const jsonData = await request.data.json(); - + // Plain text const textData = await request.data.text(); - + // Binary data const binaryData = await request.data.binary(); - - // Base64 encoded - const base64Data = await request.data.base64(); - + // Response options - + // JSON response (most common) return response.json({ result: 'success' }); - + // Text response // return response.text('Hello, World!'); - + // HTML response // return response.html('

Hello

'); - + // Binary response (for files) // return response.binary(fileBytes); - + // Redirect to another agent // return response.handoff({ name: 'other-agent' }, { data: jsonData, contentType: 'application/json' }); -};`} /> +}`} /> ### 3. The Agent Context -The context object is your agent's gateway to Agentuity's services. It provides access to storage, logging, tracing, and more: +The context object is your agent's gateway to Agentuity's services. It provides access to storage, logging, tracing, agent communication, and more: - { + })`} js={`import type { AgentRequest, AgentResponse, AgentContext } from "@agentuity/sdk"; + +export default async function Agent(request: AgentRequest, response: AgentResponse, context: AgentContext) { // Logging at different levels context.logger.debug('Debug information'); context.logger.info('General information'); context.logger.warn('Warning message'); context.logger.error('Error occurred'); - + // Access metadata const agentId = context.agent.id; const agentName = context.agent.name; const projectId = context.projectId; const sessionId = context.sessionId; - + // Key-Value storage - await context.kv.set('cache', 'key', JSON.stringify({ data: 'value' })); + await context.kv.set('cache', 'key', { data: 'value' }); const result = await context.kv.get('cache', 'key'); - + // Vector storage for semantic search - await context.vector.upsert('docs', { + await context.vector.upsert('docs', [{ key: 'doc1', document: 'AI agents are autonomous systems', metadata: { category: 'intro' } - }); - + }]); + // Object storage for files await context.objectstore.put('files', 'report.pdf', pdfBytes); - + + // Get reference to another agent for inter-agent communication + const otherAgent = context.getAgent({ name: 'specialist-agent' }); + // const agentResponse = await otherAgent.run({ data: 'Hello from triage agent' }); + return response.json({ agent: agentName, session: sessionId }); -};`} /> +}`} /> ## Planning and Reasoning: The Agent's Brain @@ -254,8 +269,9 @@ Agents become powerful when they can use external tools and services: Dict[str, Any]: +async def use_web_search(query: str, context: AgentContext) -> Dict[str, Any]: """Search the web using a real API.""" try: # Example using a hypothetical search API @@ -274,10 +290,10 @@ async def use_web_search(query: str, context) -> Dict[str, Any]: context.logger.error(f"Search failed: {str(e)}") return {"error": "Search failed"} -async def call_another_agent(agent_name: str, data: dict, context) -> Dict[str, Any]: +async def call_another_agent(agent_name: str, data: dict, context: AgentContext) -> Dict[str, Any]: """Call another agent within the same project.""" try: - agent = await context.getAgent({"name": agent_name}) + agent = context.get_agent(agent_name) result = await agent.run({ "data": data, "contentType": "application/json" @@ -287,7 +303,7 @@ async def call_another_agent(agent_name: str, data: dict, context) -> Dict[str, context.logger.error(f"Agent call failed: {str(e)}") return {"error": "Agent call failed"} -async def run(request, response, context): +async def run(request: AgentRequest, response: AgentResponse, context: AgentContext): data = await request.data.json() tool = data.get("tool") @@ -298,7 +314,9 @@ async def run(request, response, context): else: result = {"error": "Unknown tool"} - return response.json(result)`} js={`async function useWebSearch(query: string, context: any): Promise { + return response.json(result)`} js={`import type { AgentRequest, AgentResponse, AgentContext } from "@agentuity/sdk"; + +async function useWebSearch(query: string, context: AgentContext): Promise { try { // Example using a hypothetical search API const apiKey = process.env.SEARCH_API_KEY; @@ -316,9 +334,9 @@ async def run(request, response, context): } } -async function callAnotherAgent(agentName: string, data: any, context: any): Promise { +async function callAnotherAgent(agentName: string, data: any, context: AgentContext): Promise { try { - const agent = await context.getAgent({ name: agentName }); + const agent = context.getAgent({ name: agentName }); const result = await agent.run({ data, contentType: 'application/json' @@ -330,7 +348,7 @@ async function callAnotherAgent(agentName: string, data: any, context: any): Pro } } -const handler: AgentHandler = async (request, response, context) => { +export default async function Agent(request: AgentRequest, response: AgentResponse, context: AgentContext) { const data = await request.data.json(); const tool = data.tool; @@ -344,7 +362,7 @@ const handler: AgentHandler = async (request, response, context) => { } return response.json(result); -};`} /> +}`} /> ## Memory Patterns: How Agents Remember @@ -354,8 +372,9 @@ Memory enables agents to learn and maintain state across interactions. The durat For temporary data that should expire: { + context.logger.info(f"Session active for {len(data['messages'])} messages")`} js={`import type { AgentRequest, AgentResponse, AgentContext } from "@agentuity/sdk"; + +export default async function Agent(request: AgentRequest, response: AgentResponse, context: AgentContext) { const sessionId = request.metadata.get('session_id'); // Store temporary session data with TTL await context.kv.set( 'sessions', sessionId, - JSON.stringify({ + { messages: [], - context: {}, + user_preferences: {}, timestamp: new Date().toISOString() - }), + }, { ttl: 3600 } // Expires in 1 hour ); -};`} /> +}`} /> ### Persistent Memory For data that should survive restarts: - { + )`} js={`import type { AgentRequest, AgentResponse, AgentContext } from "@agentuity/sdk"; + +export default async function Agent(request: AgentRequest, response: AgentResponse, context: AgentContext) { const userId = request.metadata.get('user_id'); // Store user preferences permanently (no TTL) @@ -434,12 +460,14 @@ For data that should survive restarts: \`\${userId}/resume.pdf\`, pdfBytes ); -};`} /> +}`} /> ### Searchable Memory For semantic search and knowledge retrieval: - { + context.logger.info(f"Insight: {result.document}")`} js={`import type { AgentRequest, AgentResponse, AgentContext } from "@agentuity/sdk"; + +export default async function Agent(request: AgentRequest, response: AgentResponse, context: AgentContext) { // Store searchable knowledge await context.vector.upsert('knowledge', [{ key: 'user_feedback_001', @@ -488,7 +518,7 @@ For semantic search and knowledge retrieval: context.logger.info(\`Insight: \${result.document}\`); }); } -};`} /> +}`} /> ## Framework Awareness: Choosing the Right Tool @@ -505,6 +535,8 @@ Different frameworks excel at different tasks. Here's when to use each: | **LangChain** | Complex chains, RAG applications | Python, TypeScript | Native SDK support | | **CrewAI** | Multi-agent teams, role-based systems | Python | Container deployment | | **AutoGen** | Research, conversational AI | Python | Direct deployment | +| **Mastra** | TypeScript agents, workflows, modern DX | TypeScript | Full compatibility | +| **Vercel AI SDK** | UI-integrated agents, streaming, web apps | TypeScript | Direct integration | | **Custom** | Specific requirements, full control | Python, TypeScript, Bun | Full platform features | ### Framework Comparison Example @@ -512,7 +544,9 @@ Different frameworks excel at different tasks. Here's when to use each: Here's the same agent built with different approaches: { +import type { AgentRequest, AgentResponse, AgentContext } from "@agentuity/sdk"; + +export default async function Agent(request: AgentRequest, response: AgentResponse, context: AgentContext) { const data = await request.data.json(); // Direct implementation const result = processData(data); return response.json(result); -}; +} // LangChain Agent import { initializeAgentExecutor } from 'langchain/agents'; import { OpenAI } from 'langchain/llms/openai'; +import type { AgentRequest, AgentResponse, AgentContext } from "@agentuity/sdk"; -const handler: AgentHandler = async (request, response, context) => { +export default async function Agent(request: AgentRequest, response: AgentResponse, context: AgentContext) { const llm = new OpenAI(); const agent = await initializeAgentExecutor(tools, llm, 'zero-shot'); const result = await agent.call({ input: await request.data.text() }); return response.text(result.output); -}; +} // Custom Multi-Agent System -const handler: AgentHandler = async (request, response, context) => { +import type { AgentRequest, AgentResponse, AgentContext } from "@agentuity/sdk"; + +export default async function Agent(request: AgentRequest, response: AgentResponse, context: AgentContext) { const researcher = new Agent('Researcher', 'Find information'); const writer = new Agent('Writer', 'Create content'); - + const researchResult = await researcher.execute(data); const writeResult = await writer.execute(researchResult); return response.json(writeResult); -};`} /> +}`} /> ### When to Use Which Framework @@ -577,258 +618,198 @@ const handler: AgentHandler = async (request, response, context) => { ## Lab: Building a Weather Agent with Multiple Triggers -Let's build an agent that demonstrates different behaviors based on how it's triggered. This weather agent will show you how the same code can serve different purposes: +Let's build an agent that demonstrates different behaviors based on how it's triggered - the same agent code serves different purposes depending on the trigger type. - - if trigger == "webhook": - # Manual requests for specific cities - try: - data = await request.data.json() - city = data.get("city", "London") - except: - city = await request.data.text() or "London" - - context.logger.info(f"Manual weather request for: {city}") - - # Check cache first (10 minutes TTL) - cache_key = f"weather_{city.lower()}" - cached = await context.kv.get("weather_cache", cache_key) - - if cached.exists: - weather = await cached.data.json() - context.logger.info(f"Returning cached weather for {city}") - return response.json({ - **weather, - "cached": True, - "cache_age_minutes": 5 # Mock age - }) - - # Mock weather data (in production, call real API) - weather = { - "city": city, - "temperature": 22, - "description": "partly cloudy", - "humidity": 65, - "timestamp": datetime.now().isoformat() - } +### Key Implementation: External API Integration + +Real-world agents integrate with external services using proper error handling: - # Cache for 10 minutes - await context.kv.set("weather_cache", cache_key, weather, {"ttl": 600}) + { - const trigger = request.trigger; - context.logger.info(\`Weather agent triggered via: \${trigger}\`); + if not all([grid_id, grid_x, grid_y]): + raise Exception("Invalid grid data from NWS points API") - if (trigger === 'webhook') { - // Manual requests for specific cities - let city = 'London'; - try { - const data = await request.data.json(); - city = data.city || 'London'; - } catch { - city = await request.data.text() || 'London'; - } + # Step 2: Get forecast using grid coordinates + forecast_url = f"https://api.weather.gov/gridpoints/{grid_id}/{grid_x},{grid_y}/forecast" + # ... fetch and process forecast data - context.logger.info(\`Manual weather request for: \${city}\`); + except Exception as e: + context.logger.error(f"Weather API failed for {location_name}: {e}") + raise`} js={`// Key pattern: NWS API two-step process with proper error handling +async function fetchNWSWeather(lat: number, lon: number, locationName: string, context: AgentContext): Promise { + try { + // Step 1: Get grid coordinates from lat/lon + const pointsUrl = \`https://api.weather.gov/points/\${lat},\${lon}\`; - // Check cache first (10 minutes TTL) - const cacheKey = \`weather_\${city.toLowerCase()}\`; - const cached = await context.kv.get('weather_cache', cacheKey); + const pointsResponse = await fetch(pointsUrl, { + headers: { 'User-Agent': USER_AGENT }, + signal: AbortSignal.timeout(10000) // 10 second timeout + }); - if (cached.exists) { - const weather = await cached.data.json(); - context.logger.info(\`Returning cached weather for \${city}\`); - return response.json({ - ...weather, - cached: true, - cache_age_minutes: 5 // Mock age - }); + if (!pointsResponse.ok) { + throw new Error(\`NWS points API failed: \${pointsResponse.status} \${pointsResponse.statusText}\`); } - // Mock weather data (in production, call real API) - const weather = { - city, - temperature: 22, - description: 'partly cloudy', - humidity: 65, - timestamp: new Date().toISOString() - }; - - // Cache for 10 minutes - await context.kv.set('weather_cache', cacheKey, weather, { ttl: 600 }); + const pointsData = await pointsResponse.json() as NWSPointsResponse; + const gridProps = pointsData.properties; - return response.json({ ...weather, cached: false }); + // Validate and extract grid properties + if (!gridProps || gridProps.gridId === undefined || gridProps.gridX === undefined || gridProps.gridY === undefined) { + throw new Error('Could not extract grid information from NWS points API response.'); + } - } else if (trigger === 'cron') { - // Batch update for major cities - const cities = ['London', 'New York', 'Tokyo', 'Sydney']; - context.logger.info(\`Cron job: Updating weather for \${cities.length} cities\`); - - const weatherData: Record = {}; - cities.forEach(city => { - // Mock weather data with variation - weatherData[city] = { - temperature: 20 + (city.length % 15), // Mock variation - description: 'sunny', - updated: new Date().toISOString() - }; - }); + // Step 2: Get forecast using grid coordinates + const forecastUrl = \`https://api.weather.gov/gridpoints/\${gridProps.gridId}/\${gridProps.gridX},\${gridProps.gridY}/forecast\`; + // ... fetch and process forecast data - // Store daily weather report - await context.kv.set('weather_reports', 'daily', weatherData, { ttl: 86400 }); + } catch (error) { + context.logger.error(\`Weather API failed for \${locationName}: \${error}\`); + throw new Error(\`Weather service unavailable: \${error instanceof Error ? error.message : 'Unknown error'}\`); + } +}`} /> + +### Key Implementation: Smart Caching Strategies + +Different caching approaches based on use case and trigger: + + +}`} /> -### Testing Your Weather Agent +### Build This Agent Yourself - -DevMode makes it easy to test all trigger types. You can simulate webhooks, test manual triggers, and even simulate cron jobs through the interface. - +Ready to implement this agent? Follow our complete examples: + + + +### What This Agent Demonstrates + +- **Trigger-based behavior**: Same agent code behaves differently based on trigger type +- **External API integration**: Real National Weather Service API calls with proper error handling +- **Smart caching strategies**: Different TTL based on use case (cron vs manual) +- **Production patterns**: Timeouts, defensive typing, comprehensive logging +- **AI enhancement**: Vercel AI SDK for weather interpretation -#### What This Agent Demonstrates - -- **Webhook trigger**: Handles real-time weather requests with caching -- **Cron trigger**: Batch updates for multiple cities -- **Manual trigger**: DevMode testing and debugging interface -- **Caching strategy**: Uses TTL to balance freshness and performance -- **Error handling**: Graceful fallbacks for different input types - -Try calling this agent with different cities and watch how it caches responses for efficiency! - - { - return { - welcome: "Multi-Trigger Agent Test Suite", - prompts: [ - // Webhook trigger tests - { - data: JSON.stringify({ action: "store", key: "test", value: "data" }), - contentType: "application/json" - }, - { - data: JSON.stringify({ action: "retrieve", key: "test" }), - contentType: "application/json" - }, - // Simulate what a cron trigger would do - { - data: JSON.stringify({ _simulate_cron: true }), - contentType: "application/json" - }, - // Simulate agent-to-agent communication - { - data: JSON.stringify({ message: "Hello from another agent", source_agent: "agent_123" }), - contentType: "application/json" - } - ] - }; -};`} /> - -#### Testing in DevMode +The complete examples show you how to build real agents that integrate external APIs and use multiple triggers with caching and error handling. + +## Testing Your Weather Agent 1. **Start DevMode:** ```bash agentuity dev ``` -2. **Open the DevMode interface** and click the pre-configured prompts to test different scenarios - -3. **Monitor the execution** in the Logs and Sessions tabs +2. **Test the weather agent** with different city names and trigger types +3. **Monitor the execution** in the Logs and Sessions tabs to see caching behavior -#### Configuring Cron Triggers for Production +### Configuring Cron Triggers for Production For actual scheduled execution, configure cron triggers through the [Agentuity Console](https://app.agentuity.com): @@ -852,20 +833,24 @@ Effective cost optimization strategies include: - **Cache frequently used data** in KV storage to reduce repeated processing - **Batch operations** when possible to minimize API calls -- **Use appropriate LLM models** - use reasoning models (like GPT-4) only when you need complex reasoning +- **Use appropriate LLM models** - use reasoning models only when you need complex reasoning - **Consider fast inference providers** - Groq offers high-speed inference for many models - **Implement early exits** in reasoning loops to avoid unnecessary iterations ### Latency vs. Capability Trade-offs +Different approaches offer varying trade-offs between speed and intelligence: + | Approach | Latency | Capability | Use Case | |----------|---------|------------|----------| -| Simple rules | <100ms | Low | Basic routing | -| Small LLM | 200-500ms | Medium | Classification | -| Large LLM | 1-3s | High | Complex reasoning | -| Multi-agent | 3-10s | Very High | Research tasks | +| Simple rules | Fastest | Low | Basic routing, simple classification | +| Small LLM | Fast | Medium | Text classification, simple reasoning | +| Large LLM | Moderate | High | Complex reasoning, nuanced understanding | +| Multi-agent | Slowest | Very High | Research, complex workflows | + + { + return response.json(result)`} js={`import type { AgentRequest, AgentResponse, AgentContext } from "@agentuity/sdk"; + +export default async function Agent(request: AgentRequest, response: AgentResponse, context: AgentContext) { const data = await request.data.json(); const complexity = assessComplexity(data); - + if (complexity === 'simple') { // Use rules-based approach const result = applyRules(data); return response.json(result); - + } else if (complexity === 'medium') { // Use small, fast model const result = await smallModelProcess(data); return response.json(result); - + } else { // Use full agent capabilities const result = await fullAgentProcess(data, context); return response.json(result); } -};`} /> +}`} /> ## Key Takeaways @@ -926,4 +913,4 @@ Remember: The anatomy you've learned here is the foundation. The intelligence yo --- -**Ready for Module 3?** [Agent Memory](./03-agent-memory) \ No newline at end of file +**Ready for Module 3?** [Agent Memory](./03-agent-memory) diff --git a/content/Training/developers/03-agent-memory.mdx b/content/Training/developers/03-agent-memory.mdx index 1a446855..b9b99844 100644 --- a/content/Training/developers/03-agent-memory.mdx +++ b/content/Training/developers/03-agent-memory.mdx @@ -60,7 +60,7 @@ async def run(request, response, context): ```python # Short-term memory for session continuity async def run(request, response, context): - session_id = request.metadata.get("session_id") + session_id = context.sessionId # Retrieve session context session_data = await context.kv.get("sessions", session_id) @@ -81,10 +81,12 @@ async def run(request, response, context): **Storage**: Vector database for semantic search ```python +from datetime import datetime + # Long-term memory for learning and relationships async def run(request, response, context): user_id = request.metadata.get("user_id") - + # Store learned preferences await context.vector.upsert("user_knowledge", [{ "key": f"pref_{user_id}_{datetime.now()}", @@ -95,7 +97,7 @@ async def run(request, response, context): "learned_from": "conversation_analysis" } }]) - + # Retrieve relevant memories memories = await context.vector.search( "user_knowledge", @@ -155,7 +157,7 @@ Best for discrete, queryable data: } }; - await context.kv.set('users', userId, JSON.stringify(profile)); + await context.kv.set('users', userId, profile); };`} /> ### Searchable Memory (Vector Storage) @@ -203,7 +205,8 @@ Best for semantic search and knowledge retrieval: // Later, retrieve relevant context const similarConvos = await context.vector.search('conversations', { query: 'deployment and scaling concerns', - limit: 3 + limit: 3, + similarity: 0.7 }); };`} /> @@ -214,11 +217,18 @@ Best for files and large data: - Media files - Model outputs - { + // Assume analysisResults and userId are defined above + const analysisResults = { data: 'sample analysis' }; // placeholder + const userId = request.metadata.get('user_id'); + // Store generated report const reportData = generatePdfReport(analysisResults); const reportKey = \`reports/\${userId}/\${new Date().toISOString().split('T')[0]}_analysis.pdf\`; @@ -259,7 +273,7 @@ Best for files and large data: Keep recent conversation context while managing memory size: { - const sessionId = request.metadata.get('session_id'); + const sessionId = context.sessionId; const maxMessages = 20; // Keep last 20 messages // Get existing conversation @@ -348,10 +362,10 @@ Keep recent conversation context while managing memory size: }); // Save updated conversation - await context.kv.set('conversations', sessionId, JSON.stringify({ + await context.kv.set('conversations', sessionId, { messages, updated: new Date().toISOString() - }), { ttl: 7200 }); // 2 hour TTL + }, { ttl: 7200 }); // 2 hour TTL return response.json({ message: agentResponse }); };`} /> @@ -475,6 +489,7 @@ Build user profiles over time: const memories = await context.vector.search('user_preferences', { query: userInput, limit: 3, + similarity: 0.7, metadata: { user_id: userId } }); @@ -486,7 +501,7 @@ Build user profiles over time: ); // Save updated profile - await context.kv.set('profiles', userId, JSON.stringify(profile)); + await context.kv.set('profiles', userId, profile); return response.json({ message: responseText, @@ -779,347 +794,242 @@ Implement automated cleanup to manage costs: } };`} /> -## Lab: Building a Memory-Powered Customer Service Agent +## Lab: Building a Docs Q&A Agent with Memory -Let's build an agent that remembers customer interactions and provides increasingly personalized support: +Let's build an agent that helps users find documentation using semantic search and learns from user interactions to provide better responses over time. - 0.8: - is_recurring = True - break - - # Store this interaction - interaction_key = f"{user_id}_{datetime.now().timestamp()}" - await context.vector.upsert("support_history", [{ - "key": interaction_key, - "document": message, + 5: - response_text = ( - f"Thank you for being a valued customer. " - f"I'm looking into your issue right away." - ) - priority = "medium" - else: - response_text = ( - f"Thank you for contacting support. " - f"I'll help you with your issue." - ) - priority = "normal" - - # Save updated profile - await context.kv.set("customers", user_id, profile) - - # Log for analytics - context.logger.info(f"Support request from {user_id}: {priority} priority") - - return response.json({ - "response": response_text, - "is_recurring": is_recurring, - "interaction_number": profile["interaction_count"], - "priority": priority, - "similar_issues_found": len(past_issues) - }) - - elif action == "get_history": - # Retrieve customer history - profile_result = await context.kv.get("customers", user_id) - - if not profile_result.exists: - return response.json({ - "error": "No history found for this customer" - }) - - profile = await profile_result.data.json() - - # Get recent issues from vector storage - recent_issues = await context.vector.search( - "support_history", - f"all issues for user {user_id}", - limit=10, - metadata={"user_id": user_id} - ) - - return response.json({ - "customer_profile": profile, - "recent_issues": [ - { - "message": issue.document, - "metadata": issue.metadata - } - for issue in recent_issues - ] - }) - + return response.json({ - "error": "Unknown action" - })`} js={`const welcome = () => { - return { - welcome: "Customer Service Agent - I remember you!", - prompts: [ - { - data: JSON.stringify({ - action: "support", - user_id: "customer_123", - message: "My order hasn't arrived yet", - order_id: "ORD-789" - }), - contentType: "application/json" - }, + "status": "indexed", + "chunks": len(chunks), + "file": file_name + })`} js={`// Key pattern: Upload, store, and index documents +async function handleUpload(request: AgentRequest, response: AgentResponse, context: AgentContext) { + const fileContent = await request.data.text(); + const fileName = request.metadata.get('filename') || 'document.txt'; + + // 1. Store in object storage with proper UTF-8 encoding + const binaryData = new TextEncoder().encode(fileContent); + await context.objectstore.put(OBJECT_STORAGE_BUCKET, fileName, binaryData); + + // 2. Chunk and embed for search + const chunks = chunkDocument(fileContent, 500); + + for (let i = 0; i < chunks.length; i++) { + const chunkId = \`\${fileName}_chunk_\${i}\`; + // TypeScript vector storage uses single object format + await context.vector.upsert(VECTOR_STORAGE_NAME, { + key: chunkId, + document: chunks[i], + metadata: { + sourceFile: fileName, + chunkIndex: i, + uploadedAt: new Date().toISOString() + } + }); + } + + return response.json({ + status: 'indexed', + chunks: chunks.length, + file: fileName + }); +}`} /> + +### Key Implementation: Semantic Search with Context + +Agents combine search results with user history for intelligent responses: + + + +### Key Implementation: AI-Powered Documentation Q&A + +Combining search results with AI for intelligent documentation responses: + + { - const data = await request.data.json(); - const { action, user_id } = data; - - if (action === 'support') { - const { message } = data; - - // Get customer profile - const profileResult = await context.kv.get('customers', user_id); - let profile; - - if (profileResult.exists) { - profile = await profileResult.data.json(); - profile.interaction_count = (profile.interaction_count || 0) + 1; - } else { - profile = { - user_id, - first_contact: new Date().toISOString(), - interaction_count: 1, - issues: [] - }; - } - - // Search for similar past issues - const pastIssues = await context.vector.search('support_history', { - query: message, - limit: 3, - metadata: { user_id } - }); - - // Determine if this is a recurring issue - let isRecurring = false; - for (const issue of pastIssues) { - if (issue.similarity > 0.8) { - isRecurring = true; - break; - } - } - - // Store this interaction - const interactionKey = \`\${user_id}_\${Date.now()}\`; - await context.vector.upsert('support_history', { - key: interactionKey, - document: message, - metadata: { - user_id, - timestamp: new Date().toISOString(), - order_id: data.order_id, - resolved: false - } - }); - - // Add to profile issues - profile.issues.push({ - message, - timestamp: new Date().toISOString(), - recurring: isRecurring - }); - - // Generate response based on history - let responseText, priority; - - if (isRecurring) { - responseText = - "I see you've contacted us about this before. " + - "Let me escalate this to our senior support team immediately. " + - "Your case has been prioritized."; - priority = 'high'; - } else if (profile.interaction_count > 5) { - responseText = - "Thank you for being a valued customer. " + - "I'm looking into your issue right away."; - priority = 'medium'; - } else { - responseText = - "Thank you for contacting support. " + - "I'll help you with your issue."; - priority = 'normal'; - } - - // Save updated profile - await context.kv.set('customers', user_id, JSON.stringify(profile)); - - // Log for analytics - context.logger.info(\`Support request from \${user_id}: \${priority} priority\`); - - return response.json({ - response: responseText, - is_recurring: isRecurring, - interaction_number: profile.interaction_count, - priority, - similar_issues_found: pastIssues.length - }); - - } else if (action === 'get_history') { - // Retrieve customer history - const profileResult = await context.kv.get('customers', user_id); - - if (!profileResult.exists) { - return response.json({ - error: 'No history found for this customer' - }); - } - - const profile = await profileResult.data.json(); - - // Get recent issues from vector storage - const recentIssues = await context.vector.search('support_history', { - query: \`all issues for user \${user_id}\`, - limit: 10, - metadata: { user_id } - }); - - return response.json({ - customer_profile: profile, - recent_issues: recentIssues.map(issue => ({ - message: issue.document, - metadata: issue.metadata - })) - }); - } - - return response.json({ - error: 'Unknown action' + return response.choices[0].message.content.strip()`} js={`// Key pattern: AI-powered documentation Q&A with context building +async function buildSmartResponse(query: string, searchResults: any[], context: AgentContext) { + // Build context from top search results (metadata contains full content) + const docContext = searchResults + .slice(0, 2) // Use top 2 results for context + .map(result => result.metadata?.content || '') + .filter(content => content) // Remove any empty results + .join('\\n\\n'); + + // Generate AI response using Vercel AI SDK + const { text: aiAnswer } = await generateText({ + model: openai('gpt-4o-mini'), + prompt: \`Answer this question about Agentuity based on the documentation provided. + +Documentation context: +\${docContext || 'No relevant documentation found.'} + +Question: \${query} + +Provide a helpful, concise answer in 2-3 sentences. If no context is available, politely indicate that.\` }); -}; -export default handler; -export { welcome };`} /> + return aiAnswer; +}`} /> + +### Build This Agent Yourself + +Ready to implement this agent? Follow our complete examples: + + + +### What This Agent Demonstrates + +- **Semantic search**: Vector similarity for finding relevant documentation +- **User learning**: Tracking search patterns and feedback to improve responses +- **Context building**: Combining search results with conversation history +- **Feedback loops**: Learning from user interactions to boost helpful content +- **Personalization**: Adapting responses based on user's experience level + +The complete examples show you how to build intelligent documentation agents that get smarter over time through user interaction. + -### Testing Your Memory Agent +## Testing Your Docs Q&A Agent 1. **Start DevMode:** ```bash agentuity dev ``` -2. **Test the scenarios:** - - First support request - notice the standard response - - Second similar request - see how it recognizes the recurring issue - - Check history - view all stored interactions +2. **Test the documentation search:** + - Ask questions about different documentation topics + - Notice how semantic search finds relevant content + - Test similar queries to see cached vs fresh results 3. **Observe the memory in action:** - - Watch the logs to see KV and vector operations - - Try different user IDs to see isolated memory - - Test with various message similarities + - Watch the logs to see vector search operations + - Try different user patterns to see personalized responses + - Test feedback loops by rating search results ## Memory at Scale @@ -1158,4 +1068,4 @@ Remember: Memory is what transforms an agent from a tool into a partner. --- -**Ready for Module 4?** [Agent-to-Agent Collaboration](./04-agent-collaboration) \ No newline at end of file +**Ready for Module 4?** [Agent-to-Agent Collaboration](./04-agent-collaboration) diff --git a/content/Training/developers/04-agent-collaboration.mdx b/content/Training/developers/04-agent-collaboration.mdx index ada674f1..a67e1d24 100644 --- a/content/Training/developers/04-agent-collaboration.mdx +++ b/content/Training/developers/04-agent-collaboration.mdx @@ -43,50 +43,56 @@ User → Orchestrator →→ Sales Agent ## Industry Challenges vs. Agentuity's Approach -According to [Microsoft's multi-agent orchestration announcements](https://www.microsoft.com/en-us/microsoft-copilot/blog/copilot-studio/multi-agent-orchestration-maker-controls-and-more-microsoft-copilot-studio-announcements-at-microsoft-build-2025/), most platforms require complex orchestration layers with state management, workflow engines, and coordination services. [AWS's multi-agent design patterns](https://aws.amazon.com/blogs/machine-learning/design-multi-agent-orchestration-with-reasoning-using-amazon-bedrock-and-open-source-frameworks/) show similar complexity, often requiring LangGraph or custom orchestrators. +According to [Microsoft's multi-agent orchestration announcements](https://www.microsoft.com/en-us/microsoft-copilot/blog/copilot-studio/multi-agent-orchestration-maker-controls-and-more-microsoft-copilot-studio-announcements-at-microsoft-build-2025/), most platforms require complex orchestration layers with state management, workflow engines, and coordination services. [AWS's multi-agent design patterns](https://aws.amazon.com/blogs/machine-learning/design-multi-agent-orchestration-with-reasoning-using-amazon-bedrock-and-open-source-frameworks/) show similar complexity, often requiring custom orchestrators and complex coordination mechanisms. -### Agentuity's Approach: Simple Handoffs +### Agentuity's Approach: Multiple Communication Methods -While other platforms build complex orchestration layers, Agentuity takes a different approach: **the handoff mechanism**. It's simpler, but with clear trade-offs: +While other platforms build complex orchestration layers, Agentuity provides flexible agent communication options: +**The handoff mechanism** is one approach that's simpler but focused: - **What it does**: Transfers control from one agent to another (like call forwarding) - **What it doesn't do**: Parallel execution, getting responses back, complex coordination - **Why it works**: Most real-world agent workflows are actually sequential or conditional -## The Handoff Mechanism +Agentuity also supports other communication patterns including parallel execution through `ctx.getAgent()` for more complex coordination when needed. -In Agentuity, agents communicate using the `handoff` method. This is like forwarding a phone call - the current agent passes control to another agent, along with any necessary data. +## Agent Communication Methods + +Agentuity provides two primary ways for agents to communicate with each other, each suited for different scenarios: + +### 1. Delegation with `response.handoff()` + +Use handoff when you want to delegate control to another agent and exit. This is like forwarding a phone call - the current agent passes control to another agent, along with any necessary data. { - // Basic handoff example + // Delegation example: route to specialist and exit const task = await request.data.json(); - // Decide we need help from the web search agent + // Decide we need help from a specialist if (task.type === 'web_research') { - // Hand off to the web search specialist + // Delegate to web search specialist (we exit, they respond to client) return response.handoff( { name: 'web-search' }, // Target agent { - data: JSON.stringify({ query: task.query }), // Data as string + data: JSON.stringify({ query: task.query }), contentType: 'application/json', - metadata: { original_task: task } // Metadata object + metadata: { original_task: task } } ); } @@ -95,286 +101,137 @@ In Agentuity, agents communicate using the `handoff` method. This is like forwar return response.json({ result: 'Task completed' }); };`} /> -## Agent Resolution - -Agents can find each other using either: -- **Name**: Human-readable identifier (e.g., "web-search") -- **ID**: Unique system identifier (e.g., "agent_abc123") - -The context object knows about all agents in your project: - - { - // List all available agents - const availableAgents = context.agents; - context.logger.info(\`Available agents: \${availableAgents.map(a => a.name)}\`); - - // Check if a specific agent exists - if (availableAgents.some(agent => agent.name === 'summarizer')) { - // Agent exists, we can safely hand off - return response.handoff( - { name: 'summarizer' }, - { - data: JSON.stringify({ text: 'Long article to summarize...' }), - contentType: 'application/json' - } - ); - } -};`} /> - -## Patterns That Work with Agentuity's Handoff - -### 1. Sequential Chain Pattern - -Perfect for pipelines where each agent completes its work and passes to the next: - - { - // Pipeline pattern: Each agent enriches the data +Use getAgent when you need to stay in control and process responses from other agents. This enables parallel execution, conditional logic, and result aggregation. - const data = await request.data.json(); - const step = data.step || 1; - - if (step === 1) { - // First agent: Clean the data - data.cleaned = true; - data.step = 2; - return response.handoff({ name: 'processor' }, { data: JSON.stringify(data), contentType: 'application/json' }); - - } else if (step === 2) { - // Second agent: Enrich the data - data.enriched = true; - data.step = 3; - return response.handoff({ name: 'validator' }, { data: JSON.stringify(data), contentType: 'application/json' }); - - } else if (step === 3) { - // Final agent: Validate and return - data.validated = true; - return response.json(data); - } -};`} /> + { - // Router pattern: Direct to appropriate specialist - - const task = await request.data.json(); - const taskType = (task.type || '').toLowerCase(); - - // Route based on task type - const agentMap = { - math: 'calculator-agent', - translation: 'translator-agent', - search: 'web-search', - summary: 'summarizer' - }; - - const targetAgent = agentMap[taskType]; - - if (targetAgent) { - context.logger.info(\`Routing \${taskType} task to \${targetAgent}\`); - return response.handoff( - { name: targetAgent }, - { - data: JSON.stringify(task.payload || {}), - contentType: 'application/json' - } - ); + // Request-response example: coordinate multiple agents + + const { query } = await request.data.json(); + + // Get references to specialist agents + const webAgent = await context.getAgent({ name: 'web-search' }); + const newsAgent = await context.getAgent({ name: 'news-search' }); + + // Execute searches in parallel and process responses + const results = await Promise.allSettled([ + webAgent.run({ data: JSON.stringify({ query, source: 'web' }), contentType: 'application/json' }), + newsAgent.run({ data: JSON.stringify({ query, source: 'news' }), contentType: 'application/json' }) + ]); + + // Process and combine results (we stay in control) + const combinedResults = []; + const sources = ['web', 'news']; + + for (let i = 0; i < results.length; i++) { + if (results[i].status === 'fulfilled') { + const data = await results[i].value.data.json(); + combinedResults.push({ + source: sources[i], + results: data + }); + } } - // Unknown task type + // We respond to client with aggregated results return response.json({ - error: \`Unknown task type: \${taskType}\`, - available_types: Object.keys(agentMap) + query, + combined_results: combinedResults, + total_sources: combinedResults.length }); };`} /> -### 3. Orchestrator Pattern (Smart Routing) +### When to Use Each Method + +| Method | Use When | Control Flow | Response Handling | Constraints | +|--------|----------|--------------|-------------------|-------------| +| **`handoff`** | Sequential workflows, simple routing, delegation | One-way, exit current agent | Target agent responds to client | No response back, can't handoff to self, project isolation | +| **`getAgent`** | Parallel execution, conditional logic, result aggregation | Stay in control | You process responses, then respond | More complex, manual coordination required | -A central agent analyzes requests and routes to appropriate specialists: +### Agent Discovery + +Agents can find each other using either name or ID. The context object knows about all agents in your project: { - // Smart orchestrator that analyzes intent before routing - const userPrompt = await request.data.text(); + # Option 2: Get response and stay in control + # summarizer = context.get_agent("summarizer") + # result = await summarizer.run({"text": "Long article..."}) + # summary = await result.data.json() + # return response.json({"processed_summary": summary})`} js={`const handler: AgentHandler = async (request, response, context) => { + // List all available agents + const availableAgents = context.agents; + context.logger.info(\`Available agents: \${availableAgents.map(a => a.name)}\`); - // Simple intent analysis (in production, use LLM) - if (userPrompt.toLowerCase().includes('technical') || userPrompt.toLowerCase().includes('code')) { - return response.handoff( - { name: 'technical-expert' }, - { - data: JSON.stringify({ query: userPrompt, type: 'technical' }), - contentType: 'application/json' - } - ); - } else if (userPrompt.toLowerCase().includes('price') || userPrompt.toLowerCase().includes('cost')) { - return response.handoff( - { name: 'sales-expert' }, - { - data: JSON.stringify({ query: userPrompt, type: 'sales' }), - contentType: 'application/json' - } - ); - } else if (userPrompt.toLowerCase().includes('help') || userPrompt.toLowerCase().includes('problem')) { + // Check if a specific agent exists before communicating + if (availableAgents.some(agent => agent.name === 'summarizer')) { + // Agent exists, safe to use either method + + // Option 1: Delegate and exit return response.handoff( - { name: 'support-expert' }, + { name: 'summarizer' }, { - data: JSON.stringify({ query: userPrompt, type: 'support' }), + data: JSON.stringify({ text: 'Long article to summarize...' }), contentType: 'application/json' } ); - } else { - return response.text('I can help with technical, sales, or support questions. How can I assist you?'); - } -};`} /> - -### 4. Understanding Handoff Flow - -Understanding that handoff is a one-way transfer is crucial. The final agent in the chain responds to the original client: - -``` -Client Request → Agent A → (handoff) → Agent B → Response to Client - ↓ - (exits) -``` - -This is different from traditional RPC where Agent A would wait for Agent B's response. - -## Understanding Handoff Constraints - -Agentuity's handoff has built-in safety features and important constraints to understand: - - { - // CONSTRAINT 1: Can't handoff to yourself (loop detection) - // This will raise an error: - // return response.handoff({ name: context.agent.name }, args); - - // CONSTRAINT 2: Handoff is one-way (no response back) - // Wrong expectation: - // const result = await response.handoff({ name: 'helper' }, args); // NO! - // Right approach: - return response.handoff({ name: 'helper' }, { - data: JSON.stringify(data), - contentType: 'application/json' - }); // Helper responds to client - - // CONSTRAINT 3: Only see agents in your project - // Agents are isolated by project for security - - // FEATURE: Metadata persists through handoffs - const metadata = request.metadata || {}; - metadata.chain = (metadata.chain || []).concat(context.agent.name); - return response.handoff( - { name: 'next-agent' }, - { - data: JSON.stringify(data), - contentType: 'application/json', - metadata // Pass context forward - } - ); + // Option 2: Get response and stay in control + // const summarizer = await context.getAgent({ name: 'summarizer' }); + // const result = await summarizer.run({ + // data: JSON.stringify({ text: 'Long article...' }), + // contentType: 'application/json' + // }); + // const summary = await result.data.json(); + // return response.json({ processed_summary: summary }); + } };`} /> ## Agentuity's Built-in Features @@ -433,286 +290,580 @@ Agentuity provides powerful built-in features for multi-agent systems: ); };`} /> -## Lab: Build a Research Orchestrator System +## Common Communication Patterns + +### 1. Sequential Chain Pattern + +Perfect for pipelines where each agent completes its work and passes to the next. **Best approach: `handoff`** - Natural fit for one-way delegation through a pipeline. + +**Cleaner Agent** - processes raw data: + { + // Data cleaner agent - first in the pipeline + + const data = await request.data.json(); + context.logger.info('Cleaning incoming data'); + + // Clean and validate the data + const cleanedData = { + original: data, + cleaned: true, + processed_by: context.agent.name, + timestamp: new Date().toISOString() + }; + + // Pass to processor agent + return response.handoff({ name: 'processor' }, { + data: JSON.stringify(cleanedData), + contentType: 'application/json' + }); +};`} /> + +**Processor Agent** - enriches cleaned data: + { + // Data processor agent - enriches cleaned data -### The Orchestrator Agent + const data = await request.data.json(); + context.logger.info('Processing cleaned data'); + + // Add enrichments + const enrichedData = { + ...data, + enriched: true, + enrichments: ['validation', 'normalization', 'metadata'], + processed_by: context.agent.name + }; -This agent analyzes requests and delegates to appropriate specialists: + // Pass to final validator + return response.handoff({ name: 'validator' }, { + data: JSON.stringify(enrichedData), + contentType: 'application/json' + }); +};`} /> +**Validator Agent** - final validation and response: { + // Validator agent - final step, responds to client + + const data = await request.data.json(); + context.logger.info('Final validation'); + + // Final validation and response to client + const finalResult = { + ...data, + validated: true, + pipeline_complete: true, + final_processor: context.agent.name, + pipeline_chain: ['cleaner', 'processor', 'validator'] + }; + + // Respond directly to client (no further handoff) + return response.json(finalResult); +};`} /> + +### 2. Conditional Routing Pattern + +Route to different agents based on the request type - ideal for dispatcher/router agents. **Best approach: `handoff`** - Simple delegation based on input analysis. + + { - // Research orchestrator that routes to specialist agents + // Router pattern: Direct to appropriate specialist - const researchRequest = await request.data.json(); - const query = researchRequest.query || ''; - const researchType = researchRequest.type || 'general'; + const task = await request.data.json(); + const taskType = (task.type || '').toLowerCase(); - context.logger.info(\`Orchestrating research for: \${query}\`); + // Route based on task type + const agentMap = { + math: 'calculator-agent', + translation: 'translator-agent', + search: 'web-search', + summary: 'summarizer' + }; + + const targetAgent = agentMap[taskType]; - // Route based on research type - if (researchType === 'web' || query.toLowerCase().includes('search')) { - // Route to web search specialist + if (targetAgent) { + context.logger.info(\`Routing \${taskType} task to \${targetAgent}\`); return response.handoff( - { name: 'web-search' }, + { name: targetAgent }, { - data: JSON.stringify({ query, max_results: 5 }), - contentType: 'application/json', - metadata: { orchestrator: 'research-orchestrator', original_type: researchType } + data: JSON.stringify(task.payload || {}), + contentType: 'application/json' } ); - } else if (researchType === 'summary' || query.toLowerCase().includes('summarize')) { - // Route directly to summarizer if we have content - const content = researchRequest.content || ''; - if (content) { - return response.handoff( - { name: 'summarizer' }, - { - data: JSON.stringify({ content, query }), - contentType: 'application/json', - metadata: { orchestrator: 'research-orchestrator' } - } - ); - } } - // Default: explain what we can do + // Unknown task type return response.json({ - message: 'I can help with web research or summarization.', - examples: [ - { type: 'web', query: 'latest AI developments' }, - { type: 'summary', content: 'text to summarize', query: 'key points' } - ] + error: \`Unknown task type: \${taskType}\`, + available_types: Object.keys(agentMap) }); };`} /> -### The Web Search Agent +### 3. Parallel Execution Pattern -A specialist that searches the web and automatically forwards to summarizer: +Execute multiple agents simultaneously and aggregate results. **Best approach: `getAgent`** - Requires collecting and processing responses from multiple agents. - { - // Specialist agent for web searches + ({ - title: \`Result \${i + 1} for \${query}\`, - snippet: \`This is a relevant snippet about \${query} with detailed information...\`, - url: \`https://example.com/article-\${i + 1}\`, - relevance_score: 0.9 - (i * 0.1) - })); + # Get multiple specialist agents + web_agent = context.get_agent("web-search") + academic_agent = context.get_agent("academic-search") + news_agent = context.get_agent("news-search") - // Automatically hand off to summarizer for final processing - return response.handoff( - { name: 'summarizer' }, - { - data: JSON.stringify({ - content: searchResults, - original_query: query, - source: 'web_search' - }), - contentType: 'application/json', - metadata: { previous_agent: 'web-search', search_count: searchResults.length } + # Execute all searches in parallel + results = await asyncio.gather( + web_agent.run({"query": query, "source": "web"}), + academic_agent.run({"query": query, "source": "academic"}), + news_agent.run({"query": query, "source": "news"}), + return_exceptions=True + ) + + # Process and combine results + combined_results = [] + sources = ["web", "academic", "news"] + + for i, result in enumerate(results): + if not isinstance(result, Exception): + data = await result.data.json() + combined_results.append({ + "source": sources[i], + "results": data.get("results", []), + "count": len(data.get("results", [])) + }) + + # Return aggregated results + return response.json({ + "query": query, + "research_results": combined_results, + "total_sources": len(combined_results), + "execution_pattern": "parallel" + })`} js={`const handler: AgentHandler = async (request, response, context) => { + // Parallel execution example: comprehensive research + + const { query } = await request.data.json(); + + // Get multiple specialist agents + const webAgent = await context.getAgent({ name: 'web-search' }); + const academicAgent = await context.getAgent({ name: 'academic-search' }); + const newsAgent = await context.getAgent({ name: 'news-search' }); + + // Execute all searches in parallel + const results = await Promise.allSettled([ + webAgent.run({ data: JSON.stringify({ query, source: 'web' }), contentType: 'application/json' }), + academicAgent.run({ data: JSON.stringify({ query, source: 'academic' }), contentType: 'application/json' }), + newsAgent.run({ data: JSON.stringify({ query, source: 'news' }), contentType: 'application/json' }) + ]); + + // Process and combine results + const combinedResults = []; + const sources = ['web', 'academic', 'news']; + + for (let i = 0; i < results.length; i++) { + if (results[i].status === 'fulfilled') { + const data = await results[i].value.data.json(); + combinedResults.push({ + source: sources[i], + results: data.results || [], + count: (data.results || []).length + }); } - ); + } + + // Return aggregated results + return response.json({ + query, + research_results: combinedResults, + total_sources: combinedResults.length, + execution_pattern: 'parallel' + }); };`} /> -### The Summarizer Agent +### 4. Smart Orchestration Pattern -Takes search results and creates a final research summary: +A central agent analyzes requests, makes decisions based on responses, and orchestrates complex workflows. **Best approach: `getAgent`** - Requires response processing for intelligent decision-making. { - // Specialist agent for summarizing research content - - const summaryRequest = await request.data.json(); - const content = summaryRequest.content || []; - const originalQuery = summaryRequest.original_query || ''; - const source = summaryRequest.source || 'unknown'; - - context.logger.info(\`Summarizing \${Array.isArray(content) ? content.length : 1} items for query: \${originalQuery}\`); - - // Process different content types - let finalSummary; - - if (source === 'web_search' && Array.isArray(content)) { - // Handle web search results - const summaryParts = content.slice(0, 3).map((item, i) => ({ - rank: i + 1, - title: item.title || '', - summary: item.snippet || '', - source_url: item.url || '', - relevance: item.relevance_score || 0 - })); - - finalSummary = { - query: originalQuery, - summary: \`Research Summary for '\${originalQuery}'\`, - key_findings: summaryParts, - total_sources: content.length, - research_completed_at: new Date().toISOString(), - agent_chain: ['orchestrator', 'web-search', 'summarizer'] - }; + # Low complexity: handle directly + return response.json({ + "approach": "direct", + "result": f"Simple answer for: {query}", + "confidence": "low" + })`} js={`const handler: AgentHandler = async (request, response, context) => { + // Smart orchestration with decision-making + + const { query } = await request.data.json(); + + // First, analyze the request complexity + const analyzer = await context.getAgent({ name: 'complexity-analyzer' }); + const analysis = await analyzer.run({ + data: JSON.stringify({ query }), + contentType: 'application/json' + }); + const analysisData = await analysis.data.json(); + + // Make routing decisions based on analysis + if (analysisData.complexity === 'high') { + // High complexity: get multiple perspectives + const researchAgent = await context.getAgent({ name: 'research-specialist' }); + const expertAgent = await context.getAgent({ name: 'domain-expert' }); + + const researchResult = await researchAgent.run({ + data: JSON.stringify({ query }), + contentType: 'application/json' + }); + const expertResult = await expertAgent.run({ + data: JSON.stringify({ query }), + contentType: 'application/json' + }); + + // Combine insights for comprehensive response + return response.json({ + approach: 'comprehensive', + research: await researchResult.data.json(), + expert_opinion: await expertResult.data.json(), + confidence: 'high' + }); + + } else if (analysisData.complexity === 'medium') { + // Medium complexity: single specialist + const specialist = await context.getAgent({ name: 'general-specialist' }); + const result = await specialist.run({ + data: JSON.stringify({ query }), + contentType: 'application/json' + }); + + return response.json({ + approach: 'specialist', + result: await result.data.json(), + confidence: 'medium' + }); + } else { - // Handle direct text content - finalSummary = { - query: originalQuery, - summary: \`Summary of content for '\${originalQuery}'\`, - content_length: String(content).length, - processed_at: new Date().toISOString(), - agent_chain: ['orchestrator', 'summarizer'] - }; + // Low complexity: handle directly + return response.json({ + approach: 'direct', + result: \`Simple answer for: \${query}\`, + confidence: 'low' + }); } - - // Return the final research summary to the user - return response.json(finalSummary); };`} /> -## Testing Your Research Orchestrator +## Lab: Build a Conference Concierge System + +Build a concierge system that routes user requests to different specialized agents using structured AI responses. This shows how agents can work together by analyzing user intent and routing to the right specialist. -Test the research orchestrator system: +### Key Implementation: Intent Analysis with Zod Validation +The core pattern shows how agents use structured AI responses to decide where to route requests: + + + +### Key Implementation: Routing to Specialized Agents + +The concierge routes to different agents based on what the user needs: + + + +### Key Implementation: Conversation Memory and Context + +Sophisticated orchestrators maintain conversation state across agent interactions: + + + +### Build This Project Yourself + +Ready to implement this project? Follow our complete examples: +
+ + + TypeScript + +
+ + Python (Coming Soon) +
+
+ +### What This System Demonstrates + +- **Structured AI responses**: Zod and Pydantic schemas ensure reliable routing decisions +- **Agent specialization**: Different agents handle SF questions, conference info, and developer support +- **Conversation memory**: KV storage keeps track of context across agent interactions +- **Practical patterns**: Confidence thresholds, fallback handling, and session management +- **Intent-based routing**: AI analyzes user messages to figure out which agent can help + +The complete examples show how to build agent coordination systems that route users to the right specialists based on AI analysis. + +## Testing Your Conference Concierge + +1. **Start DevMode:** ```bash -# Test web research routing -curl -X POST https://your-project.agentuity.com/agents/orchestrator \ - -H "Content-Type: application/json" \ - -d '{ - "type": "web", - "query": "latest developments in AI agents" - }' - -# Test direct summarization routing -curl -X POST https://your-project.agentuity.com/agents/orchestrator \ - -H "Content-Type: application/json" \ - -d '{ - "type": "summary", - "query": "key points", - "content": "Long text content to be summarized..." - }' +agentuity dev ``` -Expected flow for web research: -1. `orchestrator` analyzes request and routes to web-search -2. `web-search` finds results and forwards to summarizer -3. `summarizer` creates final research summary and responds to client +2. **Test different intents:** + - **Local questions**: "Where's the best coffee near Moscone Center?" + - **Conference questions**: "What time does the keynote start?" + - **Developer questions**: "How do I implement agent handoffs?" + +3. **Watch the routing in action:** + - Check logs to see intent analysis and confidence scores + - See how conversation context helps routing get better over time + - Test edge cases where confidence is too low ## Key Takeaways @@ -724,11 +875,11 @@ Expected flow for web research: ## What's Next? -Now that you can build multi-agent systems, Module 5 will cover **Guardrails & Observability** - how to ensure your agent teams behave safely and predictably in production. +Now that you can build multi-agent systems, Module 5 will cover **Observability, Guardrails, & Evals** - how to ensure your agent teams behave safely and predictably in production. Questions to consider: - How do you prevent agents from taking harmful actions? - How do you track what decisions agents are making? - How do you ensure compliance and audit requirements? -Continue to [Module 5: Guardrails & Observability →](./05-guardrails-evals-observability) \ No newline at end of file +Continue to [Module 5: Observability, Guardrails, & Evals →](./05-observability-guardrails-evals) diff --git a/content/Training/developers/05-guardrails-evals-observability.mdx b/content/Training/developers/05-observability-guardrails-evals.mdx similarity index 57% rename from content/Training/developers/05-guardrails-evals-observability.mdx rename to content/Training/developers/05-observability-guardrails-evals.mdx index 56654feb..c07e7408 100644 --- a/content/Training/developers/05-guardrails-evals-observability.mdx +++ b/content/Training/developers/05-observability-guardrails-evals.mdx @@ -1,5 +1,5 @@ --- -title: "Module 5: Guardrails, Evals & Observability" +title: "Module 5: Observability, Guardrails, & Evals" description: Making agents reliable, safe, and production-ready --- @@ -18,54 +18,210 @@ According to [NIST's AI Risk Management framework](https://www.nist.gov/itl/ai-r ## The Three Pillars of Production Agents -Agentuity provides built-in OpenTelemetry integration with automatic instrumentation through Traceloop SDK, giving you comprehensive observability out of the box. +Agentuity provides built-in OpenTelemetry integration with automatic instrumentation, giving you comprehensive observability out of the box. -### 1. Guardrails: Setting Boundaries +### 1. Observability: Seeing Everything -Guardrails prevent agents from harmful actions while preserving autonomy: +Agentuity's key advantage is automatic OpenTelemetry integration with zero configuration: + +- **What's Tracked**: LLM calls, tool invocations, storage operations, API calls +- **Observability Triad**: Logs (events), Metrics (measurements), Traces (request flow) +- **Console View**: Timeline visualization with color-coded spans +- **Zero Setup**: Works out of the box - no instrumentation code required + +### 2. Guardrails: Setting Boundaries + +Guardrails prevent agents from harmful actions while preserving autonomy and ensuring they stay in line with what they're supposed to do: - **Input Validation**: Schema enforcement, content filtering, size limits -- **Rate Limiting**: Prevent abuse and control costs per user/session +- **Rate Limiting**: Prevent abuse and control costs per user/session - **Security**: Prompt injection defense ([WASP](https://arxiv.org/abs/2407.01593)), tool permissions - **Domain Rules**: Compliance checks, output validation, custom constraints -### 2. Evaluation: Measuring Success +### 3. Evaluation: Measuring Success Systematic evaluation is critical for non-deterministic agents: -- **Industry Benchmarks**: [SWE-Bench](https://www.swebench.com/) (success rate on software engineering tasks) -- **Automated Testing**: Unit tests, integration tests, golden datasets -- **Production Metrics**: Success rates, latency, cost per request -- **A/B Testing**: Shadow deployments, gradual rollouts +- **Real-World Benchmarks**: [τ-Bench](https://sierra.ai/blog/benchmarking-ai-agents) (dynamic agent interactions), [TheAgentCompany](https://metadesignsolutions.com/benchmarking-ai-agents-in-2025-top-tools-metrics-performance-testing-strategies/) (multi-step professional tasks) +- **Golden Datasets**: Domain-specific test cases with expected outcomes +- **Production Metrics**: Success rates, latency, cost per request, goal completion rates +- **A/B Testing**: Shadow deployments, gradual rollouts, real user feedback -### 3. Observability: Seeing Everything +## Observability with OpenTelemetry -Agentuity provides automatic OpenTelemetry integration with Traceloop SDK: +Agentuity automatically tracks everything through OpenTelemetry: -- **What's Tracked**: LLM calls, tool invocations, storage operations, API calls -- **Three Pillars**: Logs (events), Metrics (measurements), Traces (request flow) -- **Console View**: Timeline visualization with color-coded spans +### What's Tracked (No Code Required) +- **Agent executions**: Full request/response lifecycle +- **LLM calls**: Prompts, completions, token usage, latency +- **Storage operations**: KV gets/sets, vector searches +- **API calls**: External HTTP requests + +View in the Agentuity console Sessions tab with color-coded timeline visualization. + +### Using the Logger + + { + // Logs appear in Sessions view with trace context + context.logger.info('Processing', { userId }); + + try { + const result = await process(data); + context.logger.info('Success', { count: result.length }); + } catch (error) { + context.logger.error('Failed', { error: error.message }); + throw error; + } +};`} /> + +### Custom Spans for Your Own Operations + +Track important operations with custom spans to understand performance and debug issues: + + { + // Create a span for the entire validation flow + return context.tracer.startActiveSpan('validate-financial-query', async (span) => { + // Add context about this operation + span.setAttribute('user.tier', 'premium'); + span.setAttribute('query.type', 'retirement'); + span.setAttribute('query.length', query.length); + + try { + // Track validation steps + span.addEvent('validation-started'); + + if (hasProhibitedTerms(query)) { + span.addEvent('validation-failed', { reason: 'prohibited-terms' }); + span.setStatus({ code: SpanStatusCode.ERROR, message: 'Prohibited terms detected' }); + return response.json({ error: 'Invalid query' }); + } + + span.addEvent('validation-passed'); + + // Track LLM call separately + const advice = await context.tracer.startActiveSpan('generate-advice', async (llmSpan) => { + llmSpan.setAttribute('model', 'gpt-4'); + const result = await generateAdvice(query); + llmSpan.setAttribute('response.tokens', countTokens(result)); + llmSpan.end(); + return result; + }); + + span.setStatus({ code: SpanStatusCode.OK }); + return response.json({ advice }); + + } catch (error) { + span.recordException(error); + span.setStatus({ code: SpanStatusCode.ERROR, message: error.message }); + throw error; + } finally { + span.end(); + } + }); +};`} /> + +### Performance Best Practices + +Follow these optimization patterns to ensure your agents run efficiently in production: + +| Strategy | Implementation | +|----------|---------------| +| **Cache expensive operations** | Store LLM responses in KV with TTL to avoid repeated calls | +| **Use parallel operations** | `Promise.all()` (JS) or `asyncio.gather()` (Python) for concurrent tasks | +| **Fail fast** | Validate inputs early to avoid unnecessary processing | +| **Track token usage** | Add token counts as span attributes to monitor costs | +| **Set meaningful attributes** | Include user tier, request type, and other context for filtering | + +### What You Get Out of the Box + + +**Built-in Observability**: Agentuity provides automatic OpenTelemetry instrumentation with zero configuration required. + + +Everything is tracked automatically: +- **LLM calls**: Model, tokens, latency, and responses +- **Storage operations**: Every KV get/set, vector search, object store operation +- **API calls**: External service interactions and latencies +- **Custom spans**: Your business logic with meaningful attributes +- **Visual debugging**: Color-coded timeline in the console shows execution flow + + +See the [Agent Telemetry Guide](/Guides/agent-telemetry) for advanced tracing and custom spans. + ## Implementing Guardrails -Let's implement essential guardrails for a financial advisor agent: +Let's implement essential guardrails for production agent systems, using patterns from our Conference Concierge system: ### 1. Schema Validation with Zod & Pydantic Runtime validation is critical for agents. TypeScript and Python types only exist at development time - at runtime, your data needs validation. +**Zod** (TypeScript) and **Pydantic** (Python) are popular validation libraries in their respective ecosystems. Zod provides TypeScript-first schema validation with static type inference, while Pydantic offers high-performance validation for Python with type hints integration. Both solve the same problem: ensuring runtime data matches your type expectations. + For detailed API references, see the [Zod](https://zod.dev) and [Pydantic](https://docs.pydantic.dev/latest/) documentation. - 0 if provided + +class ContentEvaluation(BaseModel): + approved: bool + reason: str + category: Literal['safe', 'risky', 'prohibited'] + confidence: float = Field(ge=0.0, le=1.0) -async def run(request, response, context): +async def run(request: AgentRequest, response: AgentResponse, context: AgentContext): # 1. Validate structure try: raw_data = await request.data.json() @@ -76,55 +232,148 @@ async def run(request, response, context): 'details': e.errors() }) - # 2. Apply domain rules separately - prohibited = ['insider', 'guaranteed'] - if any(term in validated.query.lower() for term in prohibited): - return response.json({'error': 'Prohibited terms detected'}) - - # 3. Process validated data - return await process_query(validated.query, validated.user_id)`} js={`import { z } from 'zod'; + # 2. Use LLM jury for content evaluation + evaluation = await evaluate_with_jury(validated.query, context) + if not evaluation.approved: + return response.json({ + 'error': 'Content policy violation', + 'reason': evaluation.reason + }) + + # 3. Process validated and approved data + return await process_query(validated.query, validated.user_id) + +async def evaluate_with_jury(query: str, context) -> ContentEvaluation: + """Use LLM jury to evaluate content appropriateness.""" + jury_prompt = f''' + Evaluate this financial query for compliance and safety: + "{query}" + + Check for: + - Prohibited financial advice (guaranteed returns, insider trading) + - Inappropriate requests (illegal activities) + - Scam indicators or harmful content + + Respond with your evaluation as structured data. + ''' + + # Use structured generation with Python anthropic SDK + from anthropic import Anthropic + import json + + client = Anthropic() + response = await client.messages.create( + model="claude-3-haiku-20240307", + max_tokens=1000, + messages=[{"role": "user", "content": jury_prompt}] + ) + + # Parse response and validate with Pydantic + try: + response_data = json.loads(response.content[0].text) + evaluation = ContentEvaluation(**response_data) + except (json.JSONDecodeError, ValidationError) as e: + # Fallback to safe evaluation + evaluation = ContentEvaluation( + approved=False, + reason="Failed to parse AI evaluation", + category="prohibited", + confidence=0.0 + ) + + context.logger.info("Jury evaluation", { + "approved": evaluation.approved, + "category": evaluation.category, + "confidence": evaluation.confidence + }) + + return evaluation`} js={`import { z } from 'zod'; +import { generateObject } from 'ai'; +import { anthropic } from '@ai-sdk/anthropic'; // Define clean schema for structure const UserQuerySchema = z.object({ query: z.string().min(1).max(1000), userId: z.string(), - portfolioValue: z.number().positive().optional() + portfolioValue: z.number().positive().optional() // Must be > 0 if provided +}); + +// Content evaluation schema for LLM jury +const ContentEvaluationSchema = z.object({ + approved: z.boolean(), + reason: z.string(), + category: z.enum(['safe', 'risky', 'prohibited']), + confidence: z.number().min(0).max(1) }); -// TypeScript type from schema +// TypeScript types from schemas type UserQuery = z.infer; +type ContentEvaluation = z.infer; const handler = async (request, response, context) => { // 1. Validate structure const result = UserQuerySchema.safeParse(await request.data.json()); - + if (!result.success) { return response.json({ error: 'Invalid request', details: result.error.issues }); } - - // 2. Apply domain rules separately - const prohibited = ['insider', 'guaranteed']; - if (prohibited.some(term => result.data.query.toLowerCase().includes(term))) { - return response.json({ error: 'Prohibited terms detected' }); + + // 2. Use LLM jury for content evaluation + const evaluation = await evaluateWithJury(result.data.query, context); + if (!evaluation.approved) { + return response.json({ + error: 'Content policy violation', + reason: evaluation.reason + }); } - - // 3. Process validated data + + // 3. Process validated and approved data const { query, userId } = result.data; return await processQuery(query, userId); +}; + +async function evaluateWithJury(query: string, context): Promise { + const juryPrompt = \` + Evaluate this financial query for compliance and safety: + "\${query}" + + Check for: + - Prohibited financial advice (guaranteed returns, insider trading) + - Inappropriate requests (illegal activities) + - Scam indicators or harmful content + + Respond with your evaluation as structured data. + \`; + + const evaluation = await generateObject({ + model: anthropic('claude-3-haiku'), + schema: ContentEvaluationSchema, + prompt: juryPrompt + }); + + context.logger.info('Jury evaluation', { + approved: evaluation.object.approved, + category: evaluation.object.category, + confidence: evaluation.object.confidence + }); + + return evaluation.object; };`} /> **Key Pattern**: Separate validation from domain rules. Schemas validate structure, your specific rules come after. + #### Real-World Example: Validating External API Responses Here's how the AI News Digest agent validates Hacker News API responses: - { } };`} /> -### Orchestrators as Natural Guardrails +### 3. Orchestrators as Natural Guardrails Remember the orchestrator pattern from Module 4? Orchestrators are your first line of defense: @@ -324,7 +573,8 @@ async def orchestrator_with_guardrails(request, response, context): # Route sensitive requests to specialized agent return response.handoff( {"name": "compliance-agent"}, - {"query": user_prompt, "flagged": True} + user_prompt, + metadata={"flagged": True} ) if intent.confidence < 0.7: @@ -343,7 +593,7 @@ async def orchestrator_with_guardrails(request, response, context): return response.handoff( {"name": agent_map[intent.category]}, - {"query": user_prompt} + user_prompt )`} js={`import { z } from 'zod'; const ValidatedIntentSchema = z.object({ @@ -413,7 +663,9 @@ See the complete [Conference Concierge implementation](https://github.com/agentu Prevent abuse and control costs: - -### 3. Domain-Specific Rules +### 4. Domain-Specific Rules Enforce rules specific to your use case: - 1000: + return response.json({ + "error": "Query too long", + "max_length": 1000 + }) + + # Process valid request + result = await process_query(query, context) + return response.json({ - "advice": advice, - "disclaimer": "This is not personalized financial advice." + "response": result, + "processed_by": context.agent.name })`} js={`const handler: AgentHandler = async (request, response, context) => { const { query } = await request.data.json(); - - // Prohibited terms check - const prohibited = ['guaranteed returns', 'risk-free', 'insider']; - for (const term of prohibited) { + + // Domain-specific validation rules + const inappropriateRequests = ['hack', 'illegal', 'private information']; + for (const term of inappropriateRequests) { if (query.toLowerCase().includes(term)) { return response.json({ - error: 'Cannot provide advice on prohibited topics', - reason: 'Term \'' + term + '\' is not allowed' + error: 'Cannot assist with inappropriate requests', + reason: \`Request contains prohibited content: '\${term}'\` }); } } - - // Generate advice with mandatory disclaimer - const advice = await generateAdvice(query); - + + // Length validation + if (query.length > 1000) { + return response.json({ + error: 'Query too long', + maxLength: 1000 + }); + } + + // Process valid request + const result = await processQuery(query, context); + return response.json({ - advice, - disclaimer: 'This is not personalized financial advice.' + response: result, + processedBy: context.agent.name }); };`} /> ## Evaluation Strategies + +**Coming Soon**: Agentuity's integrated prompt library and automated evaluation suite will provide built-in prompt management and continuous evaluation workflows. + + ### Define Success Metrics Choose metrics that matter for your domain: | Metric | Example | How to Measure | |--------|---------|----------------| -| **Accuracy** | Correct advice | Compare against test cases | -| **Compliance** | No prohibited terms | Check output validation | +| **Accuracy** | Correct routing | Compare against test cases | +| **Compliance** | Appropriate responses | Check output validation | | **Performance** | < 5s response | Track in telemetry | | **Cost** | < $0.10/request | Monitor token usage | @@ -515,30 +786,39 @@ Choose metrics that matter for your domain: Create test cases that cover your critical scenarios: - content.includes(term)); + } else if (test.expected.routedTo) { + // Check correct routing decision + const routedAgent = response.routedTo; + passed = routedAgent === test.expected.routedTo; + + // Also check response content + const content = (response.response || '').toLowerCase(); for (const term of test.expected.contains || []) { if (!content.includes(term)) { passed = false; break; } } - - // Check prohibited terms - for (const term of test.expected.excludes || []) { - if (content.includes(term)) { - passed = false; - break; - } - } } results.push({ @@ -649,466 +934,188 @@ async function evaluateAgent(agentHandler, testCases) { return { successRate, results }; }`} /> -## Observability with OpenTelemetry - -Agentuity automatically tracks everything through OpenTelemetry: - -### What's Tracked (No Code Required) -- **Agent executions**: Full request/response lifecycle -- **LLM calls**: Prompts, completions, token usage, latency -- **Storage operations**: KV gets/sets, vector searches -- **API calls**: External HTTP requests - -View in the Agentuity console Sessions tab with color-coded timeline visualization. - -### Using the Logger - - { - // Logs appear in Sessions view with trace context - context.logger.info('Processing', { userId }); - - try { - const result = await process(data); - context.logger.info('Success', { count: result.length }); - } catch (error) { - context.logger.error('Failed', { error: error.message }); - throw error; - } -};`} /> - -### Custom Spans for Your Own Operations - -Track important operations with custom spans to understand performance and debug issues: - - { - // Create a span for the entire validation flow - return context.tracer.startActiveSpan('validate-financial-query', async (span) => { - // Add context about this operation - span.setAttribute('user.tier', 'premium'); - span.setAttribute('query.type', 'retirement'); - span.setAttribute('query.length', query.length); - - try { - // Track validation steps - span.addEvent('validation-started'); - - if (hasProhibitedTerms(query)) { - span.addEvent('validation-failed', { reason: 'prohibited-terms' }); - span.setStatus({ code: SpanStatusCode.ERROR, message: 'Prohibited terms detected' }); - return response.json({ error: 'Invalid query' }); - } - - span.addEvent('validation-passed'); - - // Track LLM call separately - const advice = await context.tracer.startActiveSpan('generate-advice', async (llmSpan) => { - llmSpan.setAttribute('model', 'gpt-4'); - const result = await generateAdvice(query); - llmSpan.setAttribute('response.tokens', countTokens(result)); - llmSpan.end(); - return result; - }); - - span.setStatus({ code: SpanStatusCode.OK }); - return response.json({ advice }); - - } catch (error) { - span.recordException(error); - span.setStatus({ code: SpanStatusCode.ERROR, message: error.message }); - throw error; - } finally { - span.end(); - } - }); -};`} /> +Instead of building a new agent from scratch, let's learn validation patterns from Module 4's Conference Concierge - a working system that shows useful validation techniques. -### Performance Best Practices +### Key Pattern: Structured LLM Output Validation -Follow these optimization patterns to ensure your agents run efficiently in production: +The concierge uses validation schemas to ensure AI generates reliable routing decisions: -| Strategy | Implementation | -|----------|---------------| -| **Cache expensive operations** | Store LLM responses in KV with TTL to avoid repeated calls | -| **Use parallel operations** | `Promise.all()` (JS) or `asyncio.gather()` (Python) for concurrent tasks | -| **Fail fast** | Validate inputs early to avoid unnecessary processing | -| **Track token usage** | Add token counts as span attributes to monitor costs | -| **Set meaningful attributes** | Include user tier, request type, and other context for filtering | + -**Built-in Observability**: Agentuity provides automatic OpenTelemetry instrumentation with zero configuration required. -
+# Use AI to analyze intent with validation +try: + ai_response = await generate_structured_intent(user_message) + intent = UserIntent(**ai_response) # Pydantic validates structure +except ValidationError: + # Safe fallback for invalid data + intent = UserIntent(agent_type='dev_experience', confidence=0.5) -Everything is tracked automatically: -- **LLM calls**: Model, tokens, latency, and responses -- **Storage operations**: Every KV get/set, vector search, object store operation -- **API calls**: External service interactions and latencies -- **Custom spans**: Your business logic with meaningful attributes -- **Visual debugging**: Color-coded timeline in the console shows execution flow +# Validated data enables confident decision making +if intent.confidence < 0.8: + return response.json({ + "message": "I'll connect you with a human specialist" + })`} js={`// TypeScript - Zod validation for AI-generated routing decisions +import { z } from 'zod'; +import { generateObject } from 'ai'; - -See the [Agent Telemetry Guide](/Guides/agent-telemetry) for advanced tracing and custom spans. - +const UserIntentSchema = z.object({ + agentType: z.enum(['sf_local_guide', 'conference_expert', 'dev_experience']), + confidence: z.number().min(0).max(1) +}); -## Lab: Production-Ready Financial Advisor +// Validation ensures reliable AI output structure +const { object: userIntent } = await generateObject({ + model: anthropic('claude-3-5-sonnet-20241022'), + schema: UserIntentSchema, // Zod prevents invalid structures + prompt: \`Analyze this user message and determine routing: "\${userMessage}"\` +}); -Let's build a financial advisor progressively, adding production features step by step: +// Validated data enables confident decision making +if (userIntent.confidence < 0.8) { + return response.json({ + message: "I'll connect you with a human specialist" + }); +}`} /> -### Step 1: Basic Validation +### Key Pattern: Input Validation - str: - """Generate financial advice - mock implementation for demo.""" - context.logger.info("Generating advice", {"query_preview": query[:50]}) - - # Simple mock responses - advice_map = { - "roth ira": "A Roth IRA is a retirement account with tax-free withdrawals.", - "401k": "A 401(k) offers pre-tax contributions and employer matching.", - "invest": "Start with your risk tolerance and time horizon." - } - - query_lower = query.lower() - for key, advice in advice_map.items(): - if key in query_lower: - return advice - - return "I can help with retirement accounts and investment strategies."`} js={`import { z } from 'zod'; - -// Simple schema for financial queries -const FinancialQuerySchema = z.object({ - query: z.string().min(1).max(500), - portfolioValue: z.number().positive().optional() -}); + context.logger.warning("Invalid input structure", {"errors": e.errors()}) + return response.json({ + "error": "Invalid request format", + "details": [{"field": err["loc"][-1], "issue": err["msg"]} for err in e.errors()] + }) -const welcome = () => ({ - welcome: 'Financial Advisor Agent', - prompts: [ - { data: JSON.stringify({ query: 'What is a Roth IRA?' }), contentType: 'application/json' }, - { data: JSON.stringify({ query: 'How do I invest $50000?' }), contentType: 'application/json' } - ] -}); + # Proceed with validated, type-safe data + return await process_conversation(validated, context) +``` -const handler = async (request, response, context) => { - // Step 1: Validate input structure - const result = FinancialQuerySchema.safeParse(await request.data.json()); - if (!result.success) { - context.logger.error('Invalid input', { errors: result.error.issues }); - return response.json({ error: 'Invalid request', details: result.error.issues }); - } - - const validated = result.data; - - // Step 2: Apply domain rules - const prohibited = ['guaranteed returns', 'risk-free', 'insider']; - for (const term of prohibited) { - if (validated.query.toLowerCase().includes(term)) { - context.logger.warning('Prohibited term', { term }); - return response.json({ error: 'Cannot advise on ' + term }); +### Key Pattern: Domain-Specific Guardrails + +The concierge applies context-aware validation rules: + +```typescript +// Context-aware validation based on agent routing +async function validateAgentRouting( + userIntent: UserIntent, + conversationHistory: ConversationRecord, + ctx: AgentContext +) { + // Apply routing-specific validation rules + if (userIntent.agentType === 'dev_experience') { + // Technical questions require minimum context + if (userIntent.context.length < 10) { + ctx.logger.info('Insufficient context for technical routing'); + return { valid: false, reason: 'Please provide more detail about your question' }; } } - - // Step 3: Generate response - const advice = await generateFinancialAdvice(validated.query, context); - - return response.json({ - advice, - disclaimer: 'Not personalized financial advice.' - }); -}; -async function generateFinancialAdvice(query, context) { - // Generate financial advice - mock implementation for demo - context.logger.info('Generating advice', { queryPreview: query.substring(0, 50) }); - - // Simple mock responses - const adviceMap = { - 'roth ira': 'A Roth IRA is a retirement account with tax-free withdrawals.', - '401k': 'A 401(k) offers pre-tax contributions and employer matching.', - 'invest': 'Start with your risk tolerance and time horizon.' - }; - - const queryLower = query.toLowerCase(); - for (const [key, advice] of Object.entries(adviceMap)) { - if (queryLower.includes(key)) { - return advice; + // Location questions need geographic context + if (userIntent.agentType === 'sf_local_guide') { + const hasLocationContext = /\b(san francisco|sf|bay area|moscone)\b/i.test(userIntent.context); + if (!hasLocationContext) { + return { valid: false, reason: 'Please specify your San Francisco location or area of interest' }; } } - - return 'I can help with retirement accounts and investment strategies.'; -} - -export default handler; -export { welcome };`} /> - -### Step 2: Add Rate Limiting - -Now enhance with rate limiting to prevent abuse: - -= 100: # 100 requests per hour - return response.json({ - "error": "Rate limit exceeded", - "retry_after": 3600 - }) -await context.kv.set("rate_limits", hour_key, count + 1, {"ttl": 3600})`} js={`// Add to the handler after validation: - -// Check rate limit -const userId = request.metadata.get('user_id') || 'anonymous'; -const hourKey = 'rate_' + userId + '_' + new Date().getHours(); -const usage = await context.kv.get('rate_limits', hourKey); -const count = usage.exists ? await usage.data.json() : 0; - -if (count >= 100) { // 100 requests per hour - return response.json({ - error: 'Rate limit exceeded', - retry_after: 3600 - }); + return { valid: true }; } +``` -await context.kv.set('rate_limits', hourKey, count + 1, { ttl: 3600 });`} /> - -### Step 3: Add Observability +### Pattern: Error Recovery and Fallbacks -Finally, add custom spans to track and debug your agent's performance: +Good validation includes graceful error recovery: - { - // Create a span for the entire request - return context.tracer.startActiveSpan('financial-advisor-request', async (span) => { - const userId = request.metadata.get('user_id') || 'anonymous'; - span.setAttribute('user.id', userId); - span.setAttribute('request.type', 'financial-advice'); - - // Validation with span tracking - span.addEvent('validation-started'); - const result = FinancialQuerySchema.safeParse(await request.data.json()); - if (!result.success) { - span.addEvent('validation-failed', { errors: JSON.stringify(result.error.issues) }); - span.setStatus({ code: SpanStatusCode.ERROR, message: 'Invalid input' }); - context.logger.error('Invalid input', { errors: result.error.issues }); - span.end(); - return response.json({ error: 'Invalid request', details: result.error.issues }); - } - - span.addEvent('validation-passed'); - span.setAttribute('query.length', result.data.query.length); - - const validated = result.data; - - // Domain rules check with tracking - const prohibited = ['guaranteed returns', 'risk-free', 'insider']; - for (const term of prohibited) { - if (validated.query.toLowerCase().includes(term)) { - span.addEvent('compliance-violation', { term }); - span.setStatus({ code: SpanStatusCode.ERROR, message: 'Prohibited term: ' + term }); - span.end(); - return response.json({ error: 'Cannot advise on ' + term }); - } + # Strategy 3: Safe defaults + return { + "status": "default", + "data": ConversationRequest( + message="I need help", + session_id=f"emergency_{int(time.time())}" + ) } - - // Generate advice with nested span - const advice = await context.tracer.startActiveSpan('generate-advice', async (adviceSpan) => { - adviceSpan.setAttribute('query.type', detectQueryType(validated.query)); - const result = await generateFinancialAdvice(validated.query, context); - adviceSpan.setAttribute('response.length', result.length); - adviceSpan.end(); - return result; - }); - - span.addEvent('response-generated'); - span.setStatus({ code: SpanStatusCode.OK }); - span.end(); - - return response.json({ - advice, - disclaimer: 'Not personalized financial advice.' - }); - }); -};`} /> +``` -### Common Validation Pitfalls +## 🚀 Coming Soon: Integrated Prompts & Evaluations -Avoid these common mistakes when implementing validation: +Agentuity is working on prompt management and evaluation features: -| Pitfall | Best Practice | -|---------|--------------| -| **Using deprecated validators** | Pydantic v2: Use `@field_validator`, not `@validator` | -| **Throwing on invalid input** | Zod: Use `safeParse` by default, `parse` only when you want to throw | -| **Mixing concerns** | Keep schemas for structure, domain rules separate | -| **I/O in validators** | Never make API calls or database queries inside validators | -| **Poor error messages** | Format validation errors in user-friendly ways before returning | +### **Code-First Prompts** +- Version-controlled prompts in your codebase alongside agent logic +- Prompt templates with variable substitution -### Testing Your Production Agent +### **Automated Evaluations** +- Evals that run automatically after every agent session (configurable) +- Custom evaluation criteria specific to your domain and use cases +- Integration with your existing CI/CD pipeline -1. **Start DevMode:** -```bash -agentuity dev -``` +### **Experimentation Mode** +- Multi-model comparisons (e.g., GPT-5, Claude, Gemini) on identical prompts -2. **Test guardrails:** - - Try prompt injection attacks - - Exceed rate limits - - Request prohibited advice + +These features will provide better visibility into agent behavior and systematic ways to improve agent performance over time. + -3. **Monitor observability:** - - Watch traces in real-time - - Track custom metrics - - Review logs for issues +## Key Takeaways - **Guardrails prevent failures** - Input validation, rate limiting, and domain rules protect your agents -- **Evaluation proves reliability** - Systematic testing with metrics that matter for your domain -- **Observability is automatic** - Agentuity provides OpenTelemetry integration with Traceloop for comprehensive monitoring +- **Evaluation proves reliability** - Systematic testing with metrics that matter for your domain +- **Observability is automatic** - Agentuity provides OpenTelemetry integration for comprehensive monitoring - **The console shows everything** - Sessions view with color-coded timeline visualization - **Production readiness** requires all three pillars working together ## What's Next? -Now that your agents are production-ready, it's time to deploy them. In the next module, we'll explore Agentuity's deployment environments - from local development through staging to production. - -But first, experiment with the lab agent: -- Test different guardrail scenarios -- View the telemetry in the console -- Try modifying the evaluation criteria -- Add your own custom spans +Now that you understand the fundamentals of production-ready agents, it's time to explore deployment. In the next module, we'll cover Agentuity's deployment environments - from local development through staging to production. --- -**Ready for Module 6?** [Deployment Environments](./06-deployment-environments) \ No newline at end of file +**Ready for Module 6?** [Deployment Environments](./06-deployment-environments) diff --git a/content/Training/developers/06-deployment-environments.mdx b/content/Training/developers/06-deployment-environments.mdx index c9e53f4d..c314763c 100644 --- a/content/Training/developers/06-deployment-environments.mdx +++ b/content/Training/developers/06-deployment-environments.mdx @@ -5,16 +5,6 @@ description: From local development to global production You've built agents with memory, collaboration, and production-grade guardrails. Now let's deploy them properly. -## The Reality of Enterprise Agent Deployment - -Research shows that [most organizations aren't "agent-ready"](https://www.ibm.com/think/insights/ai-agents-2025-expectations-vs-reality) - they lack the infrastructure, processes, and understanding needed for production agents. When [Salesforce deployed their Agentforce AI agents](https://www.hr-brew.com/stories/2025/03/04/salesforce-ai-agents-reskilling), it required massive organizational change alongside the technology. - -The challenge involves both technical and operational considerations: -- How do you test non-deterministic agents? -- How do you safely roll out changes that affect customer interactions? -- How do you monitor agents that make autonomous decisions? -- How do you handle failures gracefully when agents are unpredictable? - ## The Agentuity Environment Model Agentuity provides three distinct environments, each with specific purposes: @@ -25,7 +15,7 @@ Agentuity provides three distinct environments, each with specific purposes: Your local machine running DevMode: - Instant feedback with hot reload - Full debugging capabilities -- Observability provided by Traceloop (OpenTelemetry integration) +- Observability provided by OpenTelemetry integration - Access to Agentuity API features (memory, etc) ```bash @@ -48,12 +38,12 @@ Cloud deployment with test data: - Options for different deployment configurations - Ability to use different keys and environment variables -```bash -# Deploy to staging (coming soon) -``` + +Staging environment deployment is coming soon. For now, you can test locally with DevMode and deploy directly to production. + ### Production Environment -**Where agents serve users** +**Where agents live** Full-scale deployment: - Auto-scaling infrastructure @@ -64,6 +54,47 @@ Full-scale deployment: agentuity deploy ``` +## Preview Deployments + +**Testing changes in the cloud before production** + +Preview deployments allow you to test agent modifications in a cloud environment automatically when you create pull requests, giving you confidence before merging to production. + +### How Preview Deployments Work + +1. **Enable in Console**: Turn on preview deployments for your repository +2. **Create Pull Request**: Open a PR with your agent changes +3. **Automatic Deployment**: Agentuity automatically deploys your changes to a preview environment +4. **Test with Unique URLs**: Each preview gets unique URLs with git hash tagging +5. **Merge with Confidence**: Test thoroughly before merging to production + +### Setting Up Preview Deployments + +Enable preview deployments in your repository settings through the Agentuity console with a simple toggle switch. + +### PR Integration + +When you create a pull request, Agentuity automatically comments with: +- Direct links to your preview deployment +- Curl examples for API testing +- Deployment logs and monitoring links +- Configuration details from your agentuity.yaml + +### Preview Environment Access + +Each preview deployment gets tagged URLs that include your git commit hash, allowing precise testing of specific changes. + + +**Current Status**: Preview deployments share production resources for storage (KV, vector, object store). Full resource isolation is coming soon. + + +### Benefits for Development Teams + +- **Collaborative testing**: Share preview links with team members and stakeholders +- **Integration validation**: Test with real external APIs in cloud environment +- **Performance insights**: See how changes perform under cloud conditions +- **Risk reduction**: Catch issues before they reach production users + ## Environment Progression ### Moving Through Environments @@ -73,6 +104,7 @@ The typical flow moves from local development to production with clear validatio | Stage | Purpose | Key Focus | Typical Timeline | |-------|---------|-----------|------------------| | **Local Dev** | Build & iterate rapidly | Feature development | Hours to days | +| **Preview** | Test changes in cloud | PR validation & team collaboration | Minutes to hours | | **Staging** | Validate production readiness | Performance & integration testing | Days to weeks | | **Production** | Serve real users | Monitoring & optimization | Ongoing | @@ -80,14 +112,14 @@ The typical flow moves from local development to production with clear validatio Each environment emphasizes different aspects of your agent's readiness: -| Focus Area | Development | Staging | Production | -|------------|------------|---------|------------| -| **Functionality** | Core features work | Load & stress testing | Success metrics | -| **Memory & State** | Memory operations function | Memory performance under load | Response time tracking | -| **Error Handling** | Validation works | Rate limiting effective | Error rate monitoring | -| **Observability** | Debug information available | Telemetry accuracy | Performance insights | -| **Integration** | Error messages helpful | External service testing | User experience metrics | -| **Resilience** | Basic error recovery | Failover mechanisms | Incident response | +| Focus Area | Development | Preview | Staging | Production | +|------------|------------|---------|---------|------------| +| **Functionality** | Core features work | Change validation | Load & stress testing | Success metrics | +| **Memory & State** | Memory operations function | Data integration testing | Memory performance under load | Response time tracking | +| **Error Handling** | Validation works | Edge case testing | Rate limiting effective | Error rate monitoring | +| **Observability** | Debug information available | Cloud behavior verification | Telemetry accuracy | Performance insights | +| **Integration** | Error messages helpful | Real API testing | External service testing | User experience metrics | +| **Resilience** | Basic error recovery | Team collaboration | Failover mechanisms | Incident response | ## Enterprise Deployment Options @@ -96,10 +128,53 @@ Each environment emphasizes different aspects of your agent's readiness: ### Private Cloud Deployments -Deploy Agentuity infrastructure in your own cloud account. More information coming soon! + +**Complete infrastructure control via CLI** + + +**Coming Soon**: Create and manage your own Agentuity clusters with full control over infrastructure, compliance, and data sovereignty. + + +#### Multi-Cloud Support +Deploy Agentuity infrastructure on your preferred cloud provider with full region selection: +- **Amazon Web Services (AWS)** - All AWS regions +- **Microsoft Azure** - Global Azure regions +- **Google Cloud Platform (GCP)** - Worldwide GCP regions +- **VMware** - On-premise and private cloud deployments + +#### Flexible Cluster Sizing +Choose the right cluster size for your workload: + +| Size | Use Case | Typical Workload | +|------|----------|------------------| +| **Dev** | Development & testing | Individual developers, proof of concepts | +| **Small** | Team deployments | Small teams, departmental use | +| **Medium** | Production workloads | Multiple teams, moderate scale | +| **Large** | Enterprise scale | High-volume production, multiple business units | + +#### Complete Cluster Management +Create and manage clusters through the command-line interface with full control over: + +- **Cluster Creation**: New cluster provisioning across supported providers +- **Region Selection**: Choose optimal geographic deployment locations +- **Machine Management**: List, remove, and monitor individual cluster machines +- **Status Monitoring**: Real-time cluster health and performance visibility + +#### Deployment Options +- **Automated Setup**: Streamlined cloud provider integration +- **Manual Setup**: Step-by-step guidance for custom configurations +- **Full Control**: Complete networking, security, and resource customization + +#### Enterprise Benefits +- **Data Sovereignty**: Keep all data within your infrastructure boundaries +- **Compliance Control**: Meet specific regulatory and security requirements +- **Performance Optimization**: Deploy closer to your users and data sources +- **Cost Management**: Direct control over resource allocation and scaling ### On-Premise Installations -Run Agentuity entirely within your data center. More information coming soon! +**VMware-based deployments for complete on-premise control** + +Run Agentuity entirely within your data center using VMware infrastructure. Perfect for organizations requiring complete air-gapped deployments or specific hardware control. ## Configuration Management @@ -107,16 +182,9 @@ Run Agentuity entirely within your data center. More information coming soon! Different environments need different configurations: -{/* Environment-specific settings are typically configured through the Agentuity web console. -Screenshots showing the web UI for environment configuration will be added here. - -The web console allows you to set: -- Environment variables per deployment -- Rate limiting and caching settings -- Feature flags and deployment options -- Observability and monitoring configuration */} - -*Environment-specific settings are managed through the Agentuity web console. Screenshots of the configuration interface will be provided here.* + +Environment-specific settings are configured in the Agentuity web console. More information coming soon. + ## Real-World Deployment Lessons @@ -136,44 +204,6 @@ When Salesforce deployed agents to handle 50% of support cases, they learned: - **Cost management**: Token limits per customer interaction - **Security paramount**: No customer data in prompts -## Lab: Deploy Your Financial Advisor - -Let's deploy the financial advisor from Module 5: - -### Step 1: Prepare for Deployment - -```bash -# Ensure all tests pass -npm test # or python -m pytest - -# Check environment variables -agentuity config verify - -# Build for production -agentuity build -``` - -### Step 2: Deploy to Production - -```bash -# Deploy with confirmation -agentuity deploy --confirm - -# Monitor deployment -agentuity logs --follow - -# Check health -agentuity status -``` - -### Step 3: Monitor in Production - -View your deployment in the Agentuity console: -- Real-time metrics -- Session traces -- Error tracking -- Cost analysis - ## What's Next? You now understand the deployment journey from local development to production. But how do you build complete, production-ready systems that combine everything you've learned? @@ -185,12 +215,6 @@ In the next module, we'll tackle the capstone project - building a full agent sy - Comprehensive observability - Proper deployment practices -But first, experiment with deployment: -- Deploy your agents to production -- Monitor their performance -- Try different configurations -- Practice rollback procedures - --- -**Ready for the Capstone?** [Module 7: Sandbox & Capstone Project](./07-sandbox-capstone) \ No newline at end of file +**Ready for the Capstone?** [Module 7: Sandbox & Capstone Project](./07-sandbox-capstone) diff --git a/content/Training/developers/07-sandbox-capstone.mdx b/content/Training/developers/07-sandbox-capstone.mdx index fa28933a..d779acbc 100644 --- a/content/Training/developers/07-sandbox-capstone.mdx +++ b/content/Training/developers/07-sandbox-capstone.mdx @@ -1,9 +1,9 @@ --- -title: "Module 7: Sandbox & Capstone Project" -description: Bringing it all together in a complete production system +title: "Module 7: Advanced Multi-Agent Research System" +description: Building sophisticated agent coordination with recursive research --- -Time to combine everything you've learned into a complete, production-ready agent system. +Time to build an advanced multi-agent research system that combines everything you've learned. ## Building Your Capstone @@ -16,7 +16,7 @@ As you work on your capstone project: 5. **Monitor everything** - You can't fix what you can't see -Pro Tip: Build your capstone iteratively. Start with two agents (triage + one specialist), get that working perfectly, then add more complexity. +Pro Tip: Build your research system iteratively. Start with orchestrator + web search agents, get that working perfectly, then add recursive research and report generation. ## Coming Soon: Agentic Sandbox @@ -55,358 +55,215 @@ An intelligent assistant that helps you learn: - **Learning paths**: Customized based on your experience - **Best practices**: Recommends patterns for your use case -## Capstone Project: Customer Support System +## Capstone Project: Deep Research System -Let's build a complete customer support system that demonstrates mastery of all previous modules. +Build an advanced multi-agent research system that demonstrates mastery of sophisticated agent coordination, recursive algorithms, and real-world API integration. ### Project Overview -**Goal**: Build a multi-agent customer support system that can: -- Handle customer inquiries autonomously -- Escalate complex issues appropriately -- Learn from interactions -- Maintain conversation context -- Ensure compliance and safety +**Goal**: Create a research system that can: +- Coordinate multiple specialized research agents +- Conduct recursive deep-dive investigations +- Accumulate knowledge across research iterations +- Generate comprehensive structured reports +- Handle complex multi-source information synthesis ### System Architecture -### Skills in Practice - -Building this multi-agent system demonstrates the key concepts covered in this training: - -| Module | Key Skills Applied | -|--------|-------------------| -| **Agent Basics** | Request/response handling, error management, welcome messages, trigger configuration | -| **Agent Anatomy** | Lifecycle management, comprehensive logging, context usage, tool integration | -| **Memory Management** | Conversation history, user preferences, session management, cleanup strategies | -| **Multi-Agent Collaboration** | Routing logic, context sharing, smooth handoffs, information preservation | -| **Production Readiness** | Input validation (Zod/Pydantic), rate limiting, observability spans, compliance rules | -| **Deployment** | Environment configuration, production deployment, monitoring setup | - -### Phase 1: Triage Agent - -The entry point for all customer interactions: - - { - // Track the conversation - return context.tracer.startActiveSpan('triage-request', async (span) => { - const sessionId = request.metadata.get('session_id'); - span.setAttribute('session.id', sessionId); - - // Get conversation history - const history = await context.kv.get('conversations', sessionId); - const messages = history.exists ? await history.data.json() : []; - - // Analyze customer intent - const customerMessage = await request.data.text(); - messages.push({ role: 'user', content: customerMessage }); - - const intent = await generateObject({ - model: anthropic('claude-3-7-sonnet'), - schema: CustomerIntentSchema, - prompt: \` - Analyze this customer support request: - Message: \${customerMessage} - History: \${JSON.stringify(messages.slice(-5))} - - Categorize as: faq, technical, billing, or urgent - \` - }); - - span.setAttribute('intent.category', intent.object.category); - span.setAttribute('intent.confidence', intent.object.confidence); - - // Route to appropriate agent - if (intent.object.confidence < 0.7 || intent.object.category === 'urgent') { - // Low confidence or urgent = human escalation - span.addEvent('escalating-to-human'); - span.end(); - return response.json({ - action: 'escalate', - message: 'Connecting you with a human agent...', - reason: intent.object.summary - }); - } - - // Hand off to specialist agent - span.end(); - return response.handoff( - { name: intent.object.category + '-agent' }, - { - data: customerMessage, - contentType: 'text/plain', - metadata: { - session_id: sessionId, - intent: intent.object, - conversation_history: messages.slice(-5) // Include recent context - } - } - ); - }); -};`} /> - -### Phase 2: Specialist Agents - -Create specialized agents for each category: - -#### FAQ Agent -- Answers common questions -- Uses vector search for knowledge base -- Tracks which questions are most frequent - -#### Technical Support Agent -- Troubleshoots technical issues -- Guides through diagnostic steps -- Creates tickets for engineering - -#### Billing Agent -- Handles payment questions -- Looks up account information (mock) -- Processes refund requests - -### Phase 3: Memory & Learning - -Implement sophisticated memory: - -```python -# Store interaction outcomes -await context.kv.set( - "outcomes", - f"{session_id}_{timestamp}", - { - "category": intent.category, - "resolved": was_resolved, - "escalated": was_escalated, - "satisfaction": customer_rating, - "agent_actions": actions_taken - }, - {"ttl": 30 * 24 * 3600} # Keep for 30 days -) - -# Learn from patterns -outcomes = await context.kv.list("outcomes", prefix=f"{user_id}_") -if frequent_escalations(outcomes): - # Adjust confidence thresholds - # Flag for human review - pass -``` - -### Phase 4: Production Hardening - -Add all production features: - -#### Guardrails -- PII detection and redaction -- Compliance with support policies -- Rate limiting per customer -- Response length limits - -#### Observability -- Custom spans for each decision point -- Track resolution rates -- Monitor escalation patterns -- Cost per interaction - -#### Testing Suite -```python -# test_support_system.py -test_cases = [ - { - "input": "How do I reset my password?", - "expected_category": "faq", - "should_escalate": False - }, - { - "input": "The app crashes when I click submit", - "expected_category": "technical", - "should_escalate": False - }, - { - "input": "I was charged twice!!!", - "expected_category": "billing", - "confidence_threshold": 0.9 # Higher for financial - }, - { - "input": "This is unacceptable! I demand a manager!", - "expected_category": "urgent", - "should_escalate": True - } -] -``` - -### Phase 5: Deployment - -Deploy your complete system: - -```bash -# Test locally -agentuity dev - -# Run test suite -npm test +### Skills Progression -# Deploy to production -agentuity deploy - -# Monitor performance -agentuity logs --follow -``` +This sophisticated system builds on concepts from every previous module: -## Alternative Capstone Projects +| Module | Skills Applied in Deep Research | +|--------|--------------------------------| +| **Module 1** | Request handling, error management → Complex orchestration | +| **Module 2** | Trigger behaviors → Agent coordination patterns | +| **Module 3** | Memory management → Research state accumulation | +| **Module 4** | Multi-agent basics → Advanced orchestration | +| **Module 5** | Validation → Research quality assurance | +| **Module 6** | Deployment → Production research system | -Not interested in customer support? Choose an alternative: +### Key Implementation: Research Orchestration -### Option 2: Deep Research System +The orchestrator coordinates the entire research workflow: -Build a comprehensive research and analysis system: -- **Research Coordinator**: Routes queries to appropriate researchers -- **Web Research Agent**: Searches and gathers online sources -- **Academic Research Agent**: Finds scholarly articles and papers -- **Analysis Agent**: Synthesizes information across sources -- **Report Generator**: Creates structured research reports -- **Fact Verification Agent**: Cross-checks claims and citations + + +### Key Implementation: Recursive Research Pattern + +The researcher agent conducts deep, iterative research: + += MAX_DEPTH or sufficient_information(accumulated_knowledge): + return accumulated_knowledge + + # Conduct research iteration + new_findings = await search_and_analyze(query, context) + accumulated_knowledge.extend(new_findings) + + # Generate follow-up questions based on findings + follow_up_queries = await generate_follow_ups(accumulated_knowledge) + + # Recursive calls for deeper research + for follow_up in follow_up_queries: + deeper_knowledge = await conduct_research( + follow_up, accumulated_knowledge, depth + 1, context + ) + accumulated_knowledge.extend(deeper_knowledge) + + return accumulated_knowledge`} js={`// TypeScript - Recursive research with learning accumulation +const conductResearch = async ( + query: string, + accumulatedKnowledge: ResearchFinding[], + depth: number, + context: AgentContext +) => { + if (depth >= MAX_DEPTH || sufficientInformation(accumulatedKnowledge)) { + return accumulatedKnowledge; + } + + // Conduct research iteration + const newFindings = await searchAndAnalyze(query, context); + accumulatedKnowledge.push(...newFindings); + + // Generate follow-up questions based on findings + const followUpQueries = await generateFollowUps(accumulatedKnowledge); + + // Recursive calls for deeper research + for (const followUp of followUpQueries) { + const deeperKnowledge = await conductResearch( + followUp, accumulatedKnowledge, depth + 1, context + ); + accumulatedKnowledge.push(...deeperKnowledge); + } -### Option 4: Content Creation Pipeline + return accumulatedKnowledge; +};`} /> -Build a multi-agent content production system: -- **Content Planner**: Develops content strategies and schedules -- **Research Agent**: Gathers information and sources -- **Writer Agent**: Creates draft content -- **Editor Agent**: Reviews and improves content quality -- **SEO Optimizer**: Optimizes for search and engagement -- **Publisher Agent**: Handles final formatting and distribution +### Key Implementation: Smart Caching Strategies + +Different caching approaches based on research depth and complexity: + + + +### Build This Project Yourself + +Ready to implement this project? Follow our complete examples: + +
+ + + TypeScript + +
+ + Python (Coming Soon) +
+
+ +### What This System Demonstrates + +- **Advanced multi-agent orchestration**: Coordinated workflow between specialized research agents +- **Recursive algorithms**: Self-improving research with learning accumulation across iterations +- **External API integration**: Real-world web search with intelligent filtering and relevance evaluation +- **State accumulation**: Complex memory management and knowledge aggregation +- **Production patterns**: Error handling, rate limiting, comprehensive observability + +The complete examples show how to build sophisticated research systems that demonstrate mastery of advanced agent concepts. + +## Testing Your Deep Research System + +1. **Start DevMode:** +```bash +agentuity dev +``` -Key challenges: -- Content quality consistency -- Brand voice maintenance -- Multi-format content adaptation -- Publishing workflow coordination -- Performance analytics integration +2. **Test research requests:** + - **Simple topics**: "Benefits of renewable energy" + - **Complex topics**: "Impact of quantum computing on cryptography" + - **Technical subjects**: "Latest developments in machine learning architectures" +3. **Monitor the orchestration:** + - Watch logs to see agent coordination and recursive research calls + - Observe how research accumulates across multiple iterations + - See report generation synthesis from accumulated findings +## Key Takeaways -## Congratulations! +By building this deep research system, you've mastered: -By completing this capstone, you've demonstrated mastery of: -- Agent architecture and design -- Memory and state management -- Multi-agent orchestration -- Production deployment -- Real-world problem solving +- **Multi-agent orchestration**: Coordinating specialized agents for complex workflows +- **Recursive algorithms**: Self-improving systems with learning accumulation +- **Production integration**: Real-world APIs with intelligent filtering and caching +- **Advanced memory management**: State accumulation across multiple agent interactions +- **Sophisticated observability**: Comprehensive tracking of complex agent workflows -You're now ready to build production agent systems with Agentuity! +You're now ready to build advanced production agent systems with Agentuity! ## What's Next? -- Deploy your capstone to production -- Share your implementation with the community -- Explore advanced patterns -- Build agents for your own use cases -- Contribute to the Agentuity ecosystem +- Deploy your research system to production +- Experiment with different research domains +- Build agents for your own complex use cases +- Share your implementations with the community -Welcome to the future of agent development! 🚀 +Welcome to advanced agent development! 🚀 --- - -**Ready to dive deeper?** Check the [documentation](https://docs.agentuity.com) for advanced patterns and deployment guides. \ No newline at end of file diff --git a/content/Training/developers/index.mdx b/content/Training/developers/index.mdx index 12e34cc6..c8e6bd6a 100644 --- a/content/Training/developers/index.mdx +++ b/content/Training/developers/index.mdx @@ -15,7 +15,7 @@ By the end of this course, you'll have deployed: - Production agents with persistent memory (stateful, long-running) - Multi-agent workflows with seamless agent-to-agent communication - Framework-agnostic agents (LangChain, CrewAI, or custom) -- Fully observable systems with OpenTelemetry tracing +- Fully observable systems with [OpenTelemetry](https://opentelemetry.io/) tracing - Complete dev → staging → production deployment pipelines ## Time Investment @@ -33,16 +33,18 @@ By the end of this course, you'll have deployed: ## Choose Your Path ### New to AI Agents? -Start with [Module 1: Introduction to Agents](./01-introduction-to-agents) to understand how agents differ from traditional software and why they need specialized infrastructure. +Start with [Module 1: Introduction to Agents](./developers/01-introduction-to-agents) to understand how agents differ from traditional software and why they need specialized infrastructure. ### Built Agents Before? -Jump to [Module 3: Agent Memory](./03-agent-memory) to see how Agentuity handles state differently, or [Module 4: Agent-to-Agent Collaboration](./04-agent-collaboration) for orchestration patterns. +Jump to [Module 3: Agent Memory](./developers/03-agent-memory) to see how Agentuity handles state differently, or [Module 4: Agent-to-Agent Collaboration](./developers/04-agent-collaboration) for orchestration patterns. ### Need Production Deployment? -Go straight to [Module 6: Deployment Environments](./06-deployment-environments) to understand Agentuity's dev/staging/prod environments and scaling approach. +Go straight to [Module 6: Deployment Environments](./developers/06-deployment-environments) to understand Agentuity's dev/staging/prod environments and scaling approach. ### Migrating from Another Platform? -Check our Framework Migration Guide for specifics on moving from AWS Bedrock, Google ADK, or Azure AI. + +Framework Migration Guide coming soon for developers moving from AWS Bedrock, Google ADK, or Azure AI. + ## What Makes This Different @@ -52,4 +54,14 @@ Unlike generic AI courses, this curriculum: - Uses actual industry examples (both successes and cautionary tales) - Teaches **platform-agnostic concepts** alongside Agentuity-specific implementation -Ready to dive into the world of AI agents? Let's get started with [Module 1: Introduction to Agents](./01-introduction-to-agents). \ No newline at end of file +Ready to dive into the world of AI agents? Let's get started with [Module 1: Introduction to Agents](./developers/01-introduction-to-agents). + +## All Modules + +- [Module 1: Introduction to Agents](./developers/01-introduction-to-agents) +- [Module 2: Anatomy of an Agent](./developers/02-anatomy-of-an-agent) +- [Module 3: Agent Memory](./developers/03-agent-memory) +- [Module 4: Agent Collaboration](./developers/04-agent-collaboration) +- [Module 5: Observability, Guardrails, & Evals](./developers/05-observability-guardrails-evals) +- [Module 6: Deployment Environments](./developers/06-deployment-environments) +- [Module 7: Sandbox Capstone](./developers/07-sandbox-capstone) diff --git a/content/Training/index.mdx b/content/Training/index.mdx index fcbdf7ba..207ae796 100644 --- a/content/Training/index.mdx +++ b/content/Training/index.mdx @@ -12,10 +12,10 @@ Comprehensive hands-on course covering agent fundamentals through production dep - ~20 hours of content - Hands-on labs and capstone project -[Start Developer Training →](./developers) +[Start Developer Training →](Training/developers) ## Coming Soon **For Executives** - Strategic overview of the agent economy -**For Tech Leads** - Architecture patterns and integration strategies \ No newline at end of file +**For Tech Leads** - Architecture patterns and integration strategies From 7500edae430fe5f47cb874a628825abe3de3ebaa Mon Sep 17 00:00:00 2001 From: parteeksingh24 Date: Wed, 24 Sep 2025 13:49:38 -0700 Subject: [PATCH 05/13] Tweaks --- .../developers/02-anatomy-of-an-agent.mdx | 11 +++++++++++ .../05-observability-guardrails-evals.mdx | 17 +++++++++++++++-- .../developers/06-deployment-environments.mdx | 12 ++++++------ .../Training/developers/07-sandbox-capstone.mdx | 2 +- 4 files changed, 33 insertions(+), 9 deletions(-) diff --git a/content/Training/developers/02-anatomy-of-an-agent.mdx b/content/Training/developers/02-anatomy-of-an-agent.mdx index 49d35b3c..c3a79084 100644 --- a/content/Training/developers/02-anatomy-of-an-agent.mdx +++ b/content/Training/developers/02-anatomy-of-an-agent.mdx @@ -731,6 +731,11 @@ async def handle_manual_trigger(request: AgentRequest, response: AgentResponse, context.logger.info(f"Returning cached weather for {location_query}") return response.json(cached_weather) + # Resolve location to coordinates (geocoding step) + coordinates = await geocode_location(location_query) + if not coordinates: + return response.json({"error": "Unable to find location coordinates"}) + # Fetch fresh data and cache with shorter TTL weather = await fetch_nws_weather(coordinates["lat"], coordinates["lon"], location_query, context) await context.kv.set("weather", cache_key, weather, {"ttl": 300}) # 5 minutes @@ -756,6 +761,12 @@ async function handleManualTrigger(request: AgentRequest, response: AgentRespons return response.json(cachedWeather); } + // Resolve location to coordinates (geocoding step) + const coordinates = await geocodeLocation(locationQuery); + if (!coordinates) { + return response.json({ error: 'Unable to find location coordinates' }); + } + // Fetch fresh data and cache with shorter TTL const weather = await fetchNWSWeather(coordinates.lat, coordinates.lon, locationQuery, context); await context.kv.set('weather', cacheKey, weather, { ttl: 300 }); // 5 minutes diff --git a/content/Training/developers/05-observability-guardrails-evals.mdx b/content/Training/developers/05-observability-guardrails-evals.mdx index c07e7408..db1348cc 100644 --- a/content/Training/developers/05-observability-guardrails-evals.mdx +++ b/content/Training/developers/05-observability-guardrails-evals.mdx @@ -63,6 +63,10 @@ View in the Agentuity console Sessions tab with color-coded timeline visualizati ### Using the Logger { + // Extract data from request + const data = await request.data.json(); + const userId = request.metadata.get('user_id'); + // Logs appear in Sessions view with trace context context.logger.info('Processing', { userId }); @@ -91,6 +99,9 @@ Track important operations with custom spans to understand performance and debug { + const { query = '' } = await request.data.json(); + // Create a span for the entire validation flow return context.tracer.startActiveSpan('validate-financial-query', async (span) => { // Add context about this operation @@ -258,10 +271,10 @@ async def evaluate_with_jury(query: str, context) -> ContentEvaluation: ''' # Use structured generation with Python anthropic SDK - from anthropic import Anthropic + from anthropic import AsyncAnthropic import json - client = Anthropic() + client = AsyncAnthropic() response = await client.messages.create( model="claude-3-haiku-20240307", max_tokens=1000, diff --git a/content/Training/developers/06-deployment-environments.mdx b/content/Training/developers/06-deployment-environments.mdx index c314763c..cc6ca52c 100644 --- a/content/Training/developers/06-deployment-environments.mdx +++ b/content/Training/developers/06-deployment-environments.mdx @@ -29,8 +29,8 @@ agentuity dev ### Staging Environment **Where agents prove themselves** - -**Coming Soon**: Dedicated staging environments with production-like infrastructure but isolated from real users. + +**Now Available**: Staging environments are here! Test your agents with production-like infrastructure before going live. Cloud deployment with test data: @@ -38,10 +38,6 @@ Cloud deployment with test data: - Options for different deployment configurations - Ability to use different keys and environment variables - -Staging environment deployment is coming soon. For now, you can test locally with DevMode and deploy directly to production. - - ### Production Environment **Where agents live** @@ -58,6 +54,10 @@ agentuity deploy **Testing changes in the cloud before production** + +**Preview deployments are here!** Automatically test your PR changes in the cloud before merging. + + Preview deployments allow you to test agent modifications in a cloud environment automatically when you create pull requests, giving you confidence before merging to production. ### How Preview Deployments Work diff --git a/content/Training/developers/07-sandbox-capstone.mdx b/content/Training/developers/07-sandbox-capstone.mdx index d779acbc..c05d39b7 100644 --- a/content/Training/developers/07-sandbox-capstone.mdx +++ b/content/Training/developers/07-sandbox-capstone.mdx @@ -120,7 +120,7 @@ const researchPlan = await generateObject({ }); // Coordinate multi-step research workflow -const researcher = await ctx.getAgent({ name: 'deep-researcher' }); +const researcher = await context.getAgent({ name: 'deep-researcher' }); const researchResult = await researcher.run({ data: JSON.stringify({ query, From 4a8cf871aa1a28bc300eb5e27c2eff5b06426925 Mon Sep 17 00:00:00 2001 From: afterrburn Date: Wed, 1 Oct 2025 07:24:09 -0600 Subject: [PATCH 06/13] updating tutorial api reader --- app/api/tutorials/[id]/route.ts | 10 ++- .../[id]/steps/[stepNumber]/route.ts | 21 ++++--- app/api/tutorials/route.ts | 43 +++++-------- content/tutorials.json | 61 +++++++++++++++++++ lib/tutorial/index.ts | 40 ++++++++++++ lib/tutorial/schemas.ts | 2 +- 6 files changed, 139 insertions(+), 38 deletions(-) create mode 100644 content/tutorials.json diff --git a/app/api/tutorials/[id]/route.ts b/app/api/tutorials/[id]/route.ts index 434fa476..9b4245cd 100644 --- a/app/api/tutorials/[id]/route.ts +++ b/app/api/tutorials/[id]/route.ts @@ -1,7 +1,7 @@ import { NextRequest, NextResponse } from 'next/server'; -import { join } from 'path'; import { parseTutorialMDXCached } from '@/lib/tutorial/mdx-parser'; import { TutorialIdParamsSchema } from '@/lib/tutorial/schemas'; +import { getTutorialFilePath } from '@/lib/tutorial'; interface RouteParams { params: Promise<{ id: string }>; @@ -21,7 +21,13 @@ export async function GET(request: NextRequest, { params }: RouteParams) { } const { id } = validationResult.data; - const filePath = join(process.cwd(), 'content', 'Tutorial', `${id}.mdx`); + const filePath = await getTutorialFilePath(id); + if (!filePath) { + return NextResponse.json( + { success: false, error: 'Tutorial not found' }, + { status: 404 } + ); + } const parsed = await parseTutorialMDXCached(filePath); diff --git a/app/api/tutorials/[id]/steps/[stepNumber]/route.ts b/app/api/tutorials/[id]/steps/[stepNumber]/route.ts index d881bf75..53979777 100644 --- a/app/api/tutorials/[id]/steps/[stepNumber]/route.ts +++ b/app/api/tutorials/[id]/steps/[stepNumber]/route.ts @@ -1,7 +1,7 @@ import { NextRequest, NextResponse } from 'next/server'; -import { join } from 'path'; import { parseTutorialMDXCached } from '@/lib/tutorial/mdx-parser'; import { StepParamsSchema } from '@/lib/tutorial/schemas'; +import { getTutorialFilePath } from '@/lib/tutorial'; interface RouteParams { params: Promise<{ id: string; stepNumber: string }>; @@ -10,8 +10,7 @@ interface RouteParams { export async function GET(request: NextRequest, { params }: RouteParams) { try { const rawParams = await params; - - // Validate and transform parameters with Zod + const validationResult = StepParamsSchema.safeParse(rawParams); if (!validationResult.success) { return NextResponse.json( @@ -19,12 +18,18 @@ export async function GET(request: NextRequest, { params }: RouteParams) { { status: 400 } ); } - + const { id, stepNumber: stepNum } = validationResult.data; - - const filePath = join(process.cwd(), 'content', 'Tutorial', `${id}.mdx`); + + const filePath = await getTutorialFilePath(id); + if (!filePath) { + return NextResponse.json( + { success: false, error: 'Tutorial not found' }, + { status: 404 } + ); + } const parsed = await parseTutorialMDXCached(filePath); - + const step = parsed.steps.find(s => s.stepNumber === stepNum); if (!step) { return NextResponse.json( @@ -32,7 +37,7 @@ export async function GET(request: NextRequest, { params }: RouteParams) { { status: 404 } ); } - + return NextResponse.json({ success: true, data: { diff --git a/app/api/tutorials/route.ts b/app/api/tutorials/route.ts index 934f0891..15c7a2a5 100644 --- a/app/api/tutorials/route.ts +++ b/app/api/tutorials/route.ts @@ -1,57 +1,46 @@ import { NextResponse } from 'next/server'; -import { readdir } from 'fs/promises'; import { join } from 'path'; import { parseTutorialMDXCached } from '@/lib/tutorial/mdx-parser'; import { TutorialListItemSchema, type TutorialListItem } from '@/lib/tutorial/schemas'; +import { getTutorialsConfig } from '@/lib/tutorial'; export async function GET() { try { - const tutorialsDir = join(process.cwd(), 'content', 'Tutorial'); - - // Check if Tutorial directory exists, if not create it for future use - let entries: string[]; - try { - entries = await readdir(tutorialsDir); - } catch (error) { - // Return empty array if Tutorial directory doesn't exist yet - return NextResponse.json([]); - } - - const mdxFiles = entries.filter(file => file.endsWith('.mdx')); - + const config = await getTutorialsConfig(); + const tutorials = await Promise.all( - mdxFiles.map(async (file): Promise => { + config.tutorials.map(async (tutorialMeta): Promise => { try { - const filePath = join(tutorialsDir, file); + const filePath = join(process.cwd(), 'content', tutorialMeta.path); const parsed = await parseTutorialMDXCached(filePath); - + const tutorialItem = { - id: file.replace('.mdx', ''), - title: parsed.metadata.title, - description: parsed.metadata.description, - totalSteps: parsed.metadata.totalSteps, - difficulty: parsed.metadata.difficulty, - estimatedTime: parsed.metadata.estimatedTime, + id: tutorialMeta.id, + title: tutorialMeta.title, + description: tutorialMeta.description, + totalSteps: parsed.metadata.totalSteps || parsed.steps.length, + difficulty: tutorialMeta.difficulty, + estimatedTime: tutorialMeta.estimatedTime, }; // Validate the tutorial list item const validationResult = TutorialListItemSchema.safeParse(tutorialItem); if (!validationResult.success) { - console.warn(`Invalid tutorial item ${file}:`, validationResult.error.message); + console.warn(`Invalid tutorial item ${tutorialMeta.id}:`, validationResult.error.message); return null; } return validationResult.data; } catch (error) { - console.warn(`Failed to parse tutorial ${file}:`, error); + console.warn(`Failed to parse tutorial ${tutorialMeta.id} at ${tutorialMeta.path}:`, error); return null; } }) ); - + // Filter out failed tutorials const validTutorials = tutorials.filter(tutorial => tutorial !== null); - + return NextResponse.json(validTutorials); } catch (error) { console.error('Failed to load tutorials:', error); diff --git a/content/tutorials.json b/content/tutorials.json new file mode 100644 index 00000000..ae2db185 --- /dev/null +++ b/content/tutorials.json @@ -0,0 +1,61 @@ +{ + "_comment": "This file defines all available tutorials for the tutorial API endpoints. The tutorial API (/api/tutorials) reads from this file to determine which MDX files to process as tutorials, ensuring only intentionally marked files are included. Each tutorial entry must have a corresponding MDX file at the specified path.", + "tutorials": [ + { + "id": "01-introduction-to-agents", + "title": "Module 1: Introduction to Agents", + "description": "Understanding AI agents and the $47B opportunity", + "path": "Training/developers/01-introduction-to-agents.mdx", + "difficulty": "beginner", + "estimatedTime": "15 minutes" + }, + { + "id": "02-anatomy-of-an-agent", + "title": "Module 2: The Anatomy of an Agent", + "description": "Understanding how agents work - planning, reasoning, tools, and memory", + "path": "Training/developers/02-anatomy-of-an-agent.mdx", + "difficulty": "beginner", + "estimatedTime": "20 minutes" + }, + { + "id": "03-agent-memory", + "title": "Module 3: Agent Memory", + "description": "How agents remember, learn, and build context over time", + "path": "Training/developers/03-agent-memory.mdx", + "difficulty": "beginner", + "estimatedTime": "55 minutes" + }, + { + "id": "04-agent-collaboration", + "title": "Module 4: Agent-to-Agent Collaboration", + "description": "Building multi-agent systems that work together", + "path": "Training/developers/04-agent-collaboration.mdx", + "difficulty": "intermediate", + "estimatedTime": "30 minutes" + }, + { + "id": "05-observability-guardrails-evals", + "title": "Module 5: Observability, Guardrails, & Evals", + "description": "Making agents reliable, safe, and production-ready", + "path": "Training/developers/05-observability-guardrails-evals.mdx", + "difficulty": "intermediate", + "estimatedTime": "25 minutes" + }, + { + "id": "06-deployment-environments", + "title": "Module 6: Deployment Environments", + "description": "From local development to global production", + "path": "Training/developers/06-deployment-environments.mdx", + "difficulty": "intermediate", + "estimatedTime": "20 minutes" + }, + { + "id": "07-sandbox-capstone", + "title": "Module 7: Advanced Multi-Agent Research System", + "description": "Building sophisticated agent coordination with recursive research", + "path": "Training/developers/07-sandbox-capstone.mdx", + "difficulty": "advanced", + "estimatedTime": "45 minutes" + } + ] +} \ No newline at end of file diff --git a/lib/tutorial/index.ts b/lib/tutorial/index.ts index ff0ffffd..bc3ff7f9 100644 --- a/lib/tutorial/index.ts +++ b/lib/tutorial/index.ts @@ -1,3 +1,43 @@ // Tutorial state management exports export * from './types'; export * from './state-manager'; + +// Tutorial configuration utilities +import { join } from 'path'; +import { readFile } from 'fs/promises'; + +interface TutorialMeta { + id: string; + title: string; + description: string; + path: string; + difficulty?: string; + estimatedTime?: string; +} + +interface TutorialsConfig { + tutorials: TutorialMeta[]; +} + +/** + * Reads the tutorials.json configuration file + */ +export async function getTutorialsConfig(): Promise { + const configPath = join(process.cwd(), 'content', 'tutorials.json'); + const configContent = await readFile(configPath, 'utf-8'); + return JSON.parse(configContent); +} + +/** + * Gets the full file path for a specific tutorial based on tutorials.json + */ +export async function getTutorialFilePath(tutorialId: string): Promise { + const config = await getTutorialsConfig(); + const tutorial = config.tutorials.find(t => t.id === tutorialId); + + if (!tutorial) { + return null; + } + + return join(process.cwd(), 'content', tutorial.path); +} diff --git a/lib/tutorial/schemas.ts b/lib/tutorial/schemas.ts index 6f64921b..30716f2e 100644 --- a/lib/tutorial/schemas.ts +++ b/lib/tutorial/schemas.ts @@ -4,7 +4,7 @@ import { z } from 'zod'; export const TutorialMetadataSchema = z.object({ title: z.string().min(1, 'Title is required'), description: z.string().min(1, 'Description is required'), - totalSteps: z.number().positive('Total steps must be a positive number'), + totalSteps: z.number().positive('Total steps must be a positive number').optional(), difficulty: z.enum(['beginner', 'intermediate', 'advanced']).optional(), estimatedTime: z.string().optional() }); From cf514cbecf5306ef32b964aa23e552a93ea84807 Mon Sep 17 00:00:00 2001 From: afterrburn Date: Wed, 1 Oct 2025 07:24:33 -0600 Subject: [PATCH 07/13] add .env.example to agent-docs --- agent-docs/.env.example | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 agent-docs/.env.example diff --git a/agent-docs/.env.example b/agent-docs/.env.example new file mode 100644 index 00000000..a8b9bfbb --- /dev/null +++ b/agent-docs/.env.example @@ -0,0 +1,4 @@ +TUTORIAL_API_URL=http://localhost:3201 +VECTOR_STORE_NAME=docs +# Run `agentuity auth` -- this will be populated in .env +AGENTUITY_SDK_KEY= \ No newline at end of file From 480fee898caf1a92b316d8d4d2bcf08b2a37ebd9 Mon Sep 17 00:00:00 2001 From: afterrburn Date: Sat, 4 Oct 2025 15:05:28 -0600 Subject: [PATCH 08/13] safe-guarding file reading --- app/api/page-content/route.ts | 12 ++- app/api/tutorials/route.ts | 9 +- components/CodeFromFiles.tsx | 25 ++--- lib/tutorial/index.ts | 18 +++- lib/tutorial/mdx-parser.ts | 22 ++--- lib/utils/secure-path.ts | 168 ++++++++++++++++++++++++++++++++++ lib/validation/middleware.ts | 6 +- 7 files changed, 216 insertions(+), 44 deletions(-) create mode 100644 lib/utils/secure-path.ts diff --git a/app/api/page-content/route.ts b/app/api/page-content/route.ts index c91c3939..4187afa7 100644 --- a/app/api/page-content/route.ts +++ b/app/api/page-content/route.ts @@ -1,5 +1,6 @@ import type { NextRequest } from 'next/server'; import docsJson from '@/content/docs.json'; +import { validatePathString } from '@/lib/utils/secure-path'; interface Doc { file: string; @@ -18,8 +19,15 @@ export async function GET(request: NextRequest) { return new Response('Path parameter required', { status: 400 }); } - if (path.includes('..') || path.includes('\\') || path.startsWith('/')) { - return new Response('Invalid path parameter', { status: 400 }); + // Validate path for security issues (but don't require leading slash for this API) + const validation = validatePathString(path); + if (!validation.valid) { + return new Response(`Invalid path parameter: ${validation.error}`, { status: 400 }); + } + + // Additional check: path shouldn't start with '/' for this specific API + if (path.startsWith('/')) { + return new Response('Invalid path parameter: path should not start with "/"', { status: 400 }); } const doc = docs.find( diff --git a/app/api/tutorials/route.ts b/app/api/tutorials/route.ts index 15c7a2a5..aac96d44 100644 --- a/app/api/tutorials/route.ts +++ b/app/api/tutorials/route.ts @@ -1,8 +1,7 @@ import { NextResponse } from 'next/server'; -import { join } from 'path'; import { parseTutorialMDXCached } from '@/lib/tutorial/mdx-parser'; import { TutorialListItemSchema, type TutorialListItem } from '@/lib/tutorial/schemas'; -import { getTutorialsConfig } from '@/lib/tutorial'; +import { getTutorialsConfig, getTutorialFilePath } from '@/lib/tutorial'; export async function GET() { try { @@ -11,7 +10,11 @@ export async function GET() { const tutorials = await Promise.all( config.tutorials.map(async (tutorialMeta): Promise => { try { - const filePath = join(process.cwd(), 'content', tutorialMeta.path); + const filePath = await getTutorialFilePath(tutorialMeta.id); + if (!filePath) { + console.warn(`Tutorial file not found for ${tutorialMeta.id}`); + return null; + } const parsed = await parseTutorialMDXCached(filePath); const tutorialItem = { diff --git a/components/CodeFromFiles.tsx b/components/CodeFromFiles.tsx index a3f97467..ead65df5 100644 --- a/components/CodeFromFiles.tsx +++ b/components/CodeFromFiles.tsx @@ -1,8 +1,8 @@ import React from 'react'; -import { readFile } from 'fs/promises'; import path from 'path'; import CodeBlock from '@/app/chat/components/CodeBlock'; import { Tabs, Tab } from 'fumadocs-ui/components/tabs'; +import { readSecureFile } from '@/lib/utils/secure-path'; export interface CodeFromFilesSnippet { path: string; // repo-root-relative, e.g. "/examples/poc-tutorial/src/agent.ts" @@ -41,20 +41,15 @@ export default async function CodeFromFiles(props: CodeFromFilesProps) { return null; } - const repoRoot = process.cwd(); - const loaded = await Promise.all( snippets.map(async (s) => { - if (!s.path.startsWith('/')) { - throw new Error('CodeFromFiles: each snippet.path must start with "/" (repo-root-relative)'); - } - const absolutePath = path.resolve(repoRoot, `.${s.path}`); - if (!absolutePath.startsWith(repoRoot)) { - throw new Error('CodeFromFiles: resolved path escapes repository root'); - } - let fileContent = ''; + let content = ''; try { - fileContent = await readFile(absolutePath, 'utf-8'); + content = await readSecureFile(s.path, { + from: s.from, + to: s.to, + requireLeadingSlash: true + }); } catch (error) { const message = error instanceof Error ? error.message : String(error); return { @@ -63,14 +58,10 @@ export default async function CodeFromFiles(props: CodeFromFilesProps) { content: `// Failed to load ${s.path}: ${message}`, }; } - const lines = fileContent.split(/\r?\n/); - const startIdx = Math.max(0, (s.from ? s.from - 1 : 0)); - const endIdx = Math.min(lines.length, s.to ? s.to : lines.length); - const sliced = lines.slice(startIdx, endIdx).join('\n'); return { label: s.title || path.basename(s.path) || s.lang || 'code', lang: s.lang || inferLanguageFromExtension(s.path) || 'text', - content: sliced, + content, }; }) ); diff --git a/lib/tutorial/index.ts b/lib/tutorial/index.ts index bc3ff7f9..6ca11394 100644 --- a/lib/tutorial/index.ts +++ b/lib/tutorial/index.ts @@ -5,6 +5,7 @@ export * from './state-manager'; // Tutorial configuration utilities import { join } from 'path'; import { readFile } from 'fs/promises'; +import { resolveSecurePath } from '@/lib/utils/secure-path'; interface TutorialMeta { id: string; @@ -29,7 +30,8 @@ export async function getTutorialsConfig(): Promise { } /** - * Gets the full file path for a specific tutorial based on tutorials.json + * Gets the full file path for a specific tutorial based on tutorials.json. + * Validates that the tutorial path doesn't escape the content directory. */ export async function getTutorialFilePath(tutorialId: string): Promise { const config = await getTutorialsConfig(); @@ -39,5 +41,17 @@ export async function getTutorialFilePath(tutorialId: string): Promise { const fileContent = await readFile(filePath, 'utf-8'); @@ -218,21 +218,11 @@ function parseSnippetObject(objectString: string): { async function loadSnippetContent(snippet: { path: string; from?: number; to?: number }): Promise { try { - const repoRoot = process.cwd(); - const absolutePath = path.resolve(repoRoot, `.${snippet.path}`); - - // Security check - if (!absolutePath.startsWith(repoRoot)) { - throw new Error('Path escapes repository root'); - } - - const fileContent = await readFile(absolutePath, 'utf-8'); - const lines = fileContent.split(/\r?\n/); - - const startIdx = Math.max(0, (snippet.from ? snippet.from - 1 : 0)); - const endIdx = Math.min(lines.length, snippet.to ? snippet.to : lines.length); - - return lines.slice(startIdx, endIdx).join('\n'); + return await readSecureFile(snippet.path, { + from: snippet.from, + to: snippet.to, + requireLeadingSlash: true + }); } catch (error) { return `// Failed to load ${snippet.path}: ${error}`; } diff --git a/lib/utils/secure-path.ts b/lib/utils/secure-path.ts new file mode 100644 index 00000000..eac9cf43 --- /dev/null +++ b/lib/utils/secure-path.ts @@ -0,0 +1,168 @@ +import path from 'path'; +import { readFile } from 'fs/promises'; +import { z } from 'zod'; + +export class PathSecurityError extends Error { + constructor(message: string) { + super(message); + this.name = 'PathSecurityError'; + } +} + +export interface SecurePathOptions { + /** + * Base directory to resolve paths against (defaults to process.cwd()) + */ + baseDir?: string; + /** + * Whether to require the path to start with '/' (repo-relative) + */ + requireLeadingSlash?: boolean; +} + +export interface ReadSecureFileOptions extends SecurePathOptions { + /** + * Starting line number (1-indexed, inclusive) + */ + from?: number; + /** + * Ending line number (1-indexed, inclusive) + */ + to?: number; +} + +/** + * Validates a path string for security issues without resolving it. + * Checks for path traversal attempts and suspicious characters. + */ +export function validatePathString(pathStr: string): { valid: boolean; error?: string } { + if (!pathStr || pathStr.trim().length === 0) { + return { valid: false, error: 'Path cannot be empty' }; + } + + // Check for path traversal attempts + if (pathStr.includes('..')) { + return { valid: false, error: 'Path contains ".." (path traversal attempt)' }; + } + + // Check for backslashes (Windows-style paths can be problematic) + if (pathStr.includes('\\')) { + return { valid: false, error: 'Path contains backslashes' }; + } + + return { valid: true }; +} + +/** + * Resolves a path safely within a base directory. + * Ensures the resolved path doesn't escape the base directory. + * + * @param inputPath - The path to resolve (should start with '/' for repo-relative paths) + * @param options - Configuration options + * @returns Absolute path that's guaranteed to be within the base directory + * @throws {PathSecurityError} If the path is invalid or escapes the base directory + */ +export function resolveSecurePath(inputPath: string, options: SecurePathOptions = {}): string { + const { + baseDir = process.cwd(), + requireLeadingSlash = true + } = options; + + // Validate the path string + const validation = validatePathString(inputPath); + if (!validation.valid) { + throw new PathSecurityError(validation.error!); + } + + // Check if path starts with '/' if required + if (requireLeadingSlash && !inputPath.startsWith('/')) { + throw new PathSecurityError( + 'Path must start with "/" (repo-relative path expected)' + ); + } + + // Resolve the path + // If path starts with '/', treat it as repo-relative by prepending '.' + const pathToResolve = inputPath.startsWith('/') ? `.${inputPath}` : inputPath; + const absolutePath = path.resolve(baseDir, pathToResolve); + + // Security check: ensure the resolved path is within the base directory + if (!absolutePath.startsWith(baseDir)) { + throw new PathSecurityError( + `Resolved path escapes base directory: ${path.relative(baseDir, absolutePath)}` + ); + } + + return absolutePath; +} + +/** + * Reads a file securely with optional line range extraction. + * Combines path resolution, security checks, and file reading. + * + * @param inputPath - The path to read (should start with '/' for repo-relative paths) + * @param options - Configuration options including line range + * @returns File content (optionally sliced to specified line range) + * @throws {PathSecurityError} If the path is invalid or escapes the base directory + * @throws {Error} If the file cannot be read + */ +export async function readSecureFile( + inputPath: string, + options: ReadSecureFileOptions = {} +): Promise { + const { from, to, ...pathOptions } = options; + + // Resolve the path securely + const absolutePath = resolveSecurePath(inputPath, pathOptions); + + // Read the file + const fileContent = await readFile(absolutePath, 'utf-8'); + + // If no line range specified, return full content + if (from === undefined && to === undefined) { + return fileContent; + } + + // Extract line range + const lines = fileContent.split(/\r?\n/); + const startIdx = Math.max(0, (from ? from - 1 : 0)); + const endIdx = Math.min(lines.length, to ? to : lines.length); + + return lines.slice(startIdx, endIdx).join('\n'); +} + +/** + * Zod schema for validating path strings. + * Can be used in API route parameter validation. + */ +export const SecurePathStringSchema = z.string() + .min(1, 'Path cannot be empty') + .refine( + (p) => !p.includes('..'), + 'Path contains ".." (path traversal attempt)' + ) + .refine( + (p) => !p.includes('\\'), + 'Path contains backslashes' + ); + +/** + * Zod schema for validating repo-relative paths (must start with '/'). + */ +export const RepoRelativePathSchema = SecurePathStringSchema + .refine( + (p) => p.startsWith('/'), + 'Path must start with "/" (repo-relative path expected)' + ); + +/** + * Zod schema for validating simple path identifiers (no slashes, no dots). + * Useful for validating tutorial IDs, page paths, etc. + */ +export const PathIdentifierSchema = z.string() + .min(1, 'Identifier cannot be empty') + .refine( + (id) => !id.includes('..') && !id.includes('/') && !id.includes('\\'), + 'Identifier contains invalid characters (path traversal attempt)' + ); + diff --git a/lib/validation/middleware.ts b/lib/validation/middleware.ts index b04e3c7b..36ed8e7d 100644 --- a/lib/validation/middleware.ts +++ b/lib/validation/middleware.ts @@ -1,6 +1,7 @@ import { NextRequest, NextResponse } from 'next/server'; import { z } from 'zod'; import { MessageSchema } from '@/app/chat/types'; +import { PathIdentifierSchema } from '@/lib/utils/secure-path'; export interface ValidationError { field: string; @@ -101,10 +102,7 @@ export const StepNumberSchema = z.string().transform((val, ctx) => { return stepIndex; }); -export const TutorialIdSchema = z.string().min(1, 'must be a non-empty string').refine( - (id) => !id.includes('..') && !id.includes('/') && !id.includes('\\'), - 'contains invalid characters (path traversal attempt)' -); +export const TutorialIdSchema = PathIdentifierSchema; export function validateStepNumber(stepNumber: string): ValidationResult { const result = StepNumberSchema.safeParse(stepNumber); From fea3010e18b6db5acbcb7edc190303231bedd49a Mon Sep 17 00:00:00 2001 From: parteeksingh24 Date: Mon, 13 Oct 2025 11:35:12 -0700 Subject: [PATCH 09/13] Refine docs and update training agent - Update module content to provide step-by-step guidance - Add code step files (`examples/training`) for agent to use - Set training steps to a minimum of 1 (no step 0 state) - Use `dynamic-codeblock` in docs content and agent output - Remove modules 6 and 7 from `tutorials.json` (no training steps) - Update system prompts to use `askDocsAgentTool` more often --- .../src/agents/agent-pulse/context/builder.ts | 6 +- agent-docs/src/agents/agent-pulse/tools.ts | 20 +- app/chat/components/CodeBlock.tsx | 47 +- app/chat/types.ts | 7 +- components/CodeFromFiles.tsx | 4 +- .../developers/01-introduction-to-agents.mdx | 395 +++---- .../developers/02-anatomy-of-an-agent.mdx | 874 +++++---------- .../Training/developers/03-agent-memory.mdx | 978 ++++------------- .../developers/04-agent-collaboration.mdx | 991 +++++------------- .../05-observability-guardrails-evals.mdx | 860 +++------------ .../developers/07-sandbox-capstone.mdx | 281 +++-- content/Training/developers/index.mdx | 11 +- content/tutorials.json | 16 - .../training/01-intro/step1-basic-agent.py | 9 + .../training/01-intro/step1-basic-agent.ts | 14 + .../training/01-intro/step2-with-logging.py | 14 + .../training/01-intro/step2-with-logging.ts | 19 + .../training/01-intro/step3-with-state.py | 28 + .../training/01-intro/step3-with-state.ts | 34 + .../training/01-intro/step4-with-errors.py | 41 + .../training/01-intro/step4-with-errors.ts | 53 + .../step1-understanding-triggers.py | 29 + .../step1-understanding-triggers.ts | 35 + .../training/02-anatomy/step2-request-data.py | 44 + .../training/02-anatomy/step2-request-data.ts | 50 + .../02-anatomy/step3-structured-logging.py | 50 + .../02-anatomy/step3-structured-logging.ts | 60 ++ .../training/02-anatomy/step4-kv-caching.py | 47 + .../training/02-anatomy/step4-kv-caching.ts | 55 + .../02-anatomy/step5-response-formats.py | 43 + .../02-anatomy/step5-response-formats.ts | 48 + .../03-memory/step1-session-memory.py | 49 + .../03-memory/step1-session-memory.ts | 55 + .../03-memory/step2-persistent-preferences.py | 56 + .../03-memory/step2-persistent-preferences.ts | 63 ++ .../training/03-memory/step3-vector-basics.py | 80 ++ .../training/03-memory/step3-vector-basics.ts | 85 ++ .../03-memory/step4-vector-filtering.py | 88 ++ .../03-memory/step4-vector-filtering.ts | 92 ++ .../03-memory/step5-object-storage.py | 52 + .../03-memory/step5-object-storage.ts | 57 + .../step1-basic-handoff.py | 8 + .../step1-basic-handoff.ts | 13 + .../step2-data-formats.py | 29 + .../step2-data-formats.ts | 34 + .../step3-run-and-wait.py | 26 + .../step3-run-and-wait.ts | 33 + .../step4-parallel-execution.py | 33 + .../step4-parallel-execution.ts | 36 + .../step5-structured-responses.py | 68 ++ .../step5-structured-responses.ts | 54 + .../step1-child-loggers.py | 41 + .../step1-child-loggers.ts | 50 + .../step2-input-validation.py | 49 + .../step2-input-validation.ts | 57 + .../step3-ai-output-validation.py | 66 ++ .../step3-ai-output-validation.ts | 55 + .../step4-custom-spans.py | 64 ++ .../step4-custom-spans.ts | 71 ++ lib/tutorial/mdx-parser.ts | 7 +- lib/tutorial/schemas.ts | 13 +- lib/validation/middleware.ts | 7 +- 62 files changed, 3362 insertions(+), 3292 deletions(-) create mode 100644 examples/training/01-intro/step1-basic-agent.py create mode 100644 examples/training/01-intro/step1-basic-agent.ts create mode 100644 examples/training/01-intro/step2-with-logging.py create mode 100644 examples/training/01-intro/step2-with-logging.ts create mode 100644 examples/training/01-intro/step3-with-state.py create mode 100644 examples/training/01-intro/step3-with-state.ts create mode 100644 examples/training/01-intro/step4-with-errors.py create mode 100644 examples/training/01-intro/step4-with-errors.ts create mode 100644 examples/training/02-anatomy/step1-understanding-triggers.py create mode 100644 examples/training/02-anatomy/step1-understanding-triggers.ts create mode 100644 examples/training/02-anatomy/step2-request-data.py create mode 100644 examples/training/02-anatomy/step2-request-data.ts create mode 100644 examples/training/02-anatomy/step3-structured-logging.py create mode 100644 examples/training/02-anatomy/step3-structured-logging.ts create mode 100644 examples/training/02-anatomy/step4-kv-caching.py create mode 100644 examples/training/02-anatomy/step4-kv-caching.ts create mode 100644 examples/training/02-anatomy/step5-response-formats.py create mode 100644 examples/training/02-anatomy/step5-response-formats.ts create mode 100644 examples/training/03-memory/step1-session-memory.py create mode 100644 examples/training/03-memory/step1-session-memory.ts create mode 100644 examples/training/03-memory/step2-persistent-preferences.py create mode 100644 examples/training/03-memory/step2-persistent-preferences.ts create mode 100644 examples/training/03-memory/step3-vector-basics.py create mode 100644 examples/training/03-memory/step3-vector-basics.ts create mode 100644 examples/training/03-memory/step4-vector-filtering.py create mode 100644 examples/training/03-memory/step4-vector-filtering.ts create mode 100644 examples/training/03-memory/step5-object-storage.py create mode 100644 examples/training/03-memory/step5-object-storage.ts create mode 100644 examples/training/04-agent-communication/step1-basic-handoff.py create mode 100644 examples/training/04-agent-communication/step1-basic-handoff.ts create mode 100644 examples/training/04-agent-communication/step2-data-formats.py create mode 100644 examples/training/04-agent-communication/step2-data-formats.ts create mode 100644 examples/training/04-agent-communication/step3-run-and-wait.py create mode 100644 examples/training/04-agent-communication/step3-run-and-wait.ts create mode 100644 examples/training/04-agent-communication/step4-parallel-execution.py create mode 100644 examples/training/04-agent-communication/step4-parallel-execution.ts create mode 100644 examples/training/04-agent-communication/step5-structured-responses.py create mode 100644 examples/training/04-agent-communication/step5-structured-responses.ts create mode 100644 examples/training/05-observability-guardrails/step1-child-loggers.py create mode 100644 examples/training/05-observability-guardrails/step1-child-loggers.ts create mode 100644 examples/training/05-observability-guardrails/step2-input-validation.py create mode 100644 examples/training/05-observability-guardrails/step2-input-validation.ts create mode 100644 examples/training/05-observability-guardrails/step3-ai-output-validation.py create mode 100644 examples/training/05-observability-guardrails/step3-ai-output-validation.ts create mode 100644 examples/training/05-observability-guardrails/step4-custom-spans.py create mode 100644 examples/training/05-observability-guardrails/step4-custom-spans.ts diff --git a/agent-docs/src/agents/agent-pulse/context/builder.ts b/agent-docs/src/agents/agent-pulse/context/builder.ts index ca48a4f1..63a0221f 100644 --- a/agent-docs/src/agents/agent-pulse/context/builder.ts +++ b/agent-docs/src/agents/agent-pulse/context/builder.ts @@ -24,7 +24,11 @@ You have access to various tools you can use -- use when appropriate! === TOOL-USAGE RULES (must follow) === - startTutorialById must only be used when user select a tutorial. If the user starts a new tutorial, the step number should be set to one. Valid step is between 1 and totalSteps of the specific tutorial. -- Treat askDocsAgentTool as a search helper; ignore results you judge irrelevant. +- **askDocsAgentTool usage:** + - ALWAYS use askDocsAgentTool for questions about the Agentuity SDK, platform features, APIs, or CLI commands. + - Examples: AgentContext, AgentRequest, AgentResponse, ctx.logger, ctx.vector, resp.json, deployment, authentication, agent configuration. + - For non-Agentuity questions (general programming concepts), you may answer directly without the tool. + - Treat doc results as authoritative. If docs don't cover it, inform the user. === RESPONSE STYLE (format guidelines) === - Begin with a short answer, then elaborate if necessary. diff --git a/agent-docs/src/agents/agent-pulse/tools.ts b/agent-docs/src/agents/agent-pulse/tools.ts index 89f2e501..6ca21035 100644 --- a/agent-docs/src/agents/agent-pulse/tools.ts +++ b/agent-docs/src/agents/agent-pulse/tools.ts @@ -38,8 +38,15 @@ export async function createTools(context: ToolContext) { const data = tutorialResponse.data const totalSteps = tutorialResponse.data.totalSteps; + + // Guard: reject invalid step numbers (steps are 1-indexed) + if (stepNumber < 1) { + return `Step number must be 1 or greater. You requested step ${stepNumber}.`; + } + + // Guard: reject steps beyond tutorial length if (stepNumber > totalSteps) { - return `This tutorial only has ${totalSteps} steps. You either reached the end of the tutorial or selected an incorrect step number.`; + return `This tutorial only has ${totalSteps} step${totalSteps === 1 ? '' : 's'}. You requested step ${stepNumber}.`; } state.setAction({ type: ActionType.START_TUTORIAL_STEP, @@ -57,7 +64,16 @@ export async function createTools(context: ToolContext) { * This tool doesn't use state - it returns data directly */ const askDocsAgentTool = tool({ - description: "Query the Agentuity Development Documentation agent using RAG (Retrieval Augmented Generation) to get relevant documentation and answers about the Agentuity platform, APIs, and development concepts", + description: `Query the Agentuity Development Documentation agent using RAG (Retrieval Augmented Generation). + +USE THIS TOOL when users ask about: +- Agentuity SDK objects (AgentContext, AgentRequest, AgentResponse) +- SDK methods (ctx.logger, ctx.vector, ctx.kv, resp.json, resp.stream, etc.) +- Platform features (deployment, authentication, agent configuration, etc.) +- CLI commands (agentuity agent create, deploy, etc.) +- Agentuity APIs and development workflows (e.g. agent.run, agent.create, etc.) + +This tool provides authoritative, up-to-date documentation specific to the Agentuity platform.`, parameters: z.object({ query: z.string().describe("The question or query to send to the query function"), }), diff --git a/app/chat/components/CodeBlock.tsx b/app/chat/components/CodeBlock.tsx index 8900cb92..4a236ccb 100644 --- a/app/chat/components/CodeBlock.tsx +++ b/app/chat/components/CodeBlock.tsx @@ -1,7 +1,6 @@ 'use client'; -import { useState, useEffect, useRef } from 'react'; -import { Copy, Check } from 'lucide-react'; +import { DynamicCodeBlock } from 'fumadocs-ui/components/dynamic-codeblock'; interface SimpleCodeBlockProps { content: string; @@ -12,51 +11,17 @@ export default function CodeBlock({ content, language }: SimpleCodeBlockProps) { - const [code, setCode] = useState(content); - const [copied, setCopied] = useState(false); - const textareaRef = useRef(null); - - useEffect(() => { - setCode(content); - // Auto-size the textarea to its content within max height - if (textareaRef.current) { - const el = textareaRef.current; - el.style.height = 'auto'; - const maxPx = 500; - const newHeight = Math.min(el.scrollHeight, maxPx); - el.style.height = `${newHeight}px`; - } - }, [content]); - - const copyToClipboard = () => { - navigator.clipboard.writeText(code); - setCopied(true); - setTimeout(() => setCopied(false), 2000); - }; - return (
- {/* Header */} + {/* Custom header with language label */}
{language} -
- -
-