Version: v0.3.1

Building AI Agents

This section covers the main usage patterns of BaseAgent: basic inference, streaming, model settings, tools, structured output, RAG pipelines, and YAML configuration.

pip install rakam-systems-agent[all]

Available extras:

Extra	What it adds
`llm-providers`	`openai`, `mistralai`, `tiktoken`
`all`	Everything above

Run a basic agent

import asyncio
from dotenv import load_dotenv
load_dotenv()

from rakam_systems_agent import BaseAgent

async def main():
    agent = BaseAgent(
        name="my_assistant",
        model="openai:gpt-4o",
        system_prompt="You are a helpful assistant."
    )
    result = await agent.arun("What is Python?")
    print(result.output_text)

asyncio.run(main())

Stream responses

async def main():
    agent = BaseAgent(name="stream_agent", model="openai:gpt-4o")

    print("Response: ", end="", flush=True)
    async for chunk in agent.astream("Tell me a short story."):
        print(chunk, end="", flush=True)
    print()

asyncio.run(main())

Customize model settings

from rakam_systems_core.interfaces import ModelSettings

agent = BaseAgent(
    name="creative",
    model="openai:gpt-4o",
    system_prompt="You are a creative writer."
)
result = await agent.arun(
    "Write a haiku about programming.",
    model_settings=ModelSettings(temperature=0.9, max_tokens=100)
)

Add tools

import asyncio
from rakam_systems_agent import BaseAgent
from rakam_systems_core.interfaces.tool import ToolComponent

def get_weather(city: str) -> str:
    """Get weather for a city."""
    return f"Weather in {city}: 22°C, Sunny"

weather_tool = ToolComponent.from_function(
    function=get_weather,
    name="get_weather",
    description="Get current weather for a city",
    json_schema={
        "type": "object",
        "properties": {"city": {"type": "string", "description": "City name"}},
        "required": ["city"]
    }
)

async def main():
    agent = BaseAgent(
        name="tool_agent",
        model="openai:gpt-4o",
        tools=[weather_tool]
    )
    result = await agent.arun("What's the weather in Paris?")
    print(result.output_text)

asyncio.run(main())

Use structured output

import asyncio
from pydantic import BaseModel, Field
from rakam_systems_agent import BaseAgent

class MovieReview(BaseModel):
    title: str = Field(description="Movie title")
    rating: float = Field(ge=0, le=10)
    summary: str = Field(description="Brief summary")
    recommended: bool

async def main():
    agent = BaseAgent(
        name="critic",
        model="openai:gpt-4o",
        output_type=MovieReview
    )
    result = await agent.arun("Review the movie 'Inception'")

    review: MovieReview = result.output
    print(f"Rating: {review.rating}/10")
    print(f"Recommended: {'Yes' if review.recommended else 'No'}")

asyncio.run(main())

Build a RAG pipeline

Combine agents and vector store for question-answering over your documents:

import asyncio
from rakam_systems_agent import BaseAgent
from rakam_systems_vectorstore import FaissStore, Node, NodeMetadata
from rakam_systems_core.interfaces.tool import ToolComponent

# 1. Create vector store with your documents
store = FaissStore(name="kb", base_index_path="./kb_index",
                   embedding_model="Snowflake/snowflake-arctic-embed-m", initialising=True)

kb_nodes = [
    Node(content="Our company was founded in 2020.", metadata=NodeMetadata(source_file_uuid="info", position=0)),
    Node(content="We offer AI Assistant at $99/month.", metadata=NodeMetadata(source_file_uuid="info", position=1)),
]
store.create_collection_from_nodes("knowledge", kb_nodes)

# 2. Create search tool
def search_kb(query: str) -> str:
    results, _ = store.search(collection_name="knowledge", query=query, number=3)
    return "\n".join([content for _, (_, content, _) in results.items()])

search_tool = ToolComponent.from_function(
    function=search_kb, name="search_kb",
    description="Search company knowledge base",
    json_schema={
        "type": "object",
        "properties": {"query": {"type": "string"}},
        "required": ["query"]
    }
)

# 3. Create RAG agent
async def main():
    agent = BaseAgent(
        name="rag_agent",
        model="openai:gpt-4o",
        system_prompt="Use search_kb tool to find information. Answer based on retrieved docs.",
        tools=[search_tool]
    )
    result = await agent.arun("How much does your product cost?")
    print(result.output_text)  # "$99/month"

asyncio.run(main())

Add chat history

Enable multi-turn conversations by persisting message history between calls. The message_history parameter is available on arun(), ainfer(), and astream().

Install the extra:

pip install rakam-systems-agent[all]

JSONChatHistory stores history in a local file — the simplest option for development and single-instance deployments:

import asyncio
from dotenv import load_dotenv
load_dotenv()

from rakam_systems_agent import BaseAgent
from rakam_systems_agent.components.chat_history import JSONChatHistory

async def main():
    agent = BaseAgent(
        name="chat_assistant",
        model="openai:gpt-4o",
        system_prompt="You are a helpful assistant."
    )

    history = JSONChatHistory(config={"storage_path": "./chat_history.json"})
    chat_id = "user-123"

    # Turn 1
    messages = history.get_message_history(chat_id)
    result = await agent.arun("My name is Alice.", message_history=messages)
    history.save_messages(chat_id, result.metadata["messages"])
    print(result.output_text)

    # Turn 2 — agent remembers previous context
    messages = history.get_message_history(chat_id)
    result = await agent.arun("What is my name?", message_history=messages)
    history.save_messages(chat_id, result.metadata["messages"])
    print(result.output_text)  # "Your name is Alice."

asyncio.run(main())

The pattern is always: get → run → save.

history.get_message_history(chat_id) — load prior messages (returns None for new sessions, which is fine)
agent.arun(..., message_history=messages) — run with history
history.save_messages(chat_id, result.metadata["messages"]) — persist the full exchange

Variants: For multi-instance or production deployments, swap JSONChatHistory for SQLChatHistory (SQLite) or PostgresChatHistory (PostgreSQL) — the get_message_history / save_messages API is identical across all three.

from rakam_systems_agent.components.chat_history import SQLChatHistory, PostgresChatHistory

# SQLite
history = SQLChatHistory(config={"db_path": "./chat.db"})

# PostgreSQL
history = PostgresChatHistory(config={"connection_string": "postgresql://user:pass@host/db"})

Configure an agent with YAML

Create agents from config files — no code changes needed to switch models or prompts:

config/agent.yaml:

version: "1.0"

agents:
  assistant:
    name: "assistant"
    llm_config:
      model: "openai:gpt-4o"
      temperature: 0.7
    system_prompt: "You are a helpful assistant."

Use in code:

import asyncio
from rakam_systems_core.config_loader import ConfigurationLoader

loader = ConfigurationLoader()
config = loader.load_from_yaml("config/agent.yaml")
agent = loader.create_agent("assistant", config)

asyncio.run(agent.arun("Hello!"))

Run a basic agent​

Stream responses​

Customize model settings​

Add tools​

Use structured output​

Build a RAG pipeline​

Add chat history​

Configure an agent with YAML​