Building a Minimal Agent from Scratch using Granite 3.3

2025-04-26

Agents is the new hype. Dozens of frameworks are popping up every day.

For me, in order to understand something, I love building it from scratch to get a feel of how it works. So I decided to build a minimal agent from scratch using Granite 3.3.

To be an agent, a model needs to have choice to perform certain actions in order to achieve a goal. In this case, I will give the agent the ability to call a magic function in order to let the user know what it returns for the number the user provides.

The full code is available in this Colab Notebook.

I've created a modified version using Groq, Tavily Search, and Riza in this Colab Notebook.

1. Building the Toolbox

Let's start by building our toolbox:

# define toolbox

def magic_function(x: int) -> int:
    """
    A function that performs a magic function on a number.
    :param x: The input number to the magic function.
    :return: The result of the magic function.
    """
    return x + 2

toolbox = {
    "magic_function": magic_function,
}

2. Defining the Response Schema

The response from the model can be either

a function call
a final response message to indicate that the agent is done
a tool call error
a regular message

Our execution loop should parse the response and interpret it accordingly.

# define response classes

from dataclasses import dataclass


import jinja2
from tabulate import tabulate
from colorama import Fore, Style


TOOL_CALL_STRING = "&#x3C;tool_call>"
TOOL_CALL_END_STRING = "&#x3C;|end_of_text|>"
FINAL_RESPONSE_STRING = "&#x3C;final_response>"
FINAL_RESPONSE_END_STRING = "&#x3C;|end_of_text|>"

environment = jinja2.Environment()


@dataclass
class Response:
    pass


@dataclass
class ToolCallError(Response):
    message: str


@dataclass
class ToolCall(Response):
    tool: str
    arguments: dict

    @classmethod
    def parse(cls, message: str):
        """
        Parses a message if it has a tool call in the format
        :param message: The message to parse.
        :return: A ToolCall object if the message contains a tool call, otherwise None.
        """
        import json
        if TOOL_CALL_STRING in message:
            try:
                tool_call = message.split(TOOL_CALL_STRING)[1]
                tool_call = tool_call.split(TOOL_CALL_END_STRING)[0]
                tool_call = json.loads(tool_call)
                return cls(tool=tool_call["name"], arguments=tool_call["args"])
            except (json.JSONDecodeError, KeyError):
                return ToolCallError(message="Invalid tool call format")
        return None


@dataclass
class FinalResponse(Response):
    message: str

    @classmethod
    def parse(cls, message: str):
        """
        Parses a message if it has a final response in the format
        :param message: The message to parse.
        :return: A FinalResponse object if the message contains a final response, otherwise None.
        """
        import json
        if FINAL_RESPONSE_STRING in message:
            try:
                final_response = message.split(FINAL_RESPONSE_STRING)[1]
                final_response = final_response.split(FINAL_RESPONSE_END_STRING)[0]
                final_response = json.loads(final_response)
                return cls(message=final_response["message"])
            except (json.JSONDecodeError, KeyError):
                return ToolCallError(message="Invalid final response format")
        return None


@dataclass
class MessageResponse(Response):
    content: str

    @classmethod
    def parse(cls, message: str):
        """
        Parses a message if it has a message response in the format
        :param message: The message to parse.
        :return: A MessageResponse object if the message contains a message response, otherwise None.
        """
        return cls(message)

3. Utility Methods for Parsing and Logging

Let's define some utility methods for parsing and logging

# utility methods


def parse_response(response_message: str) -> Response:
    """
    Parses the response message to determine if it is a tool call, final response, or an error.
    """
    tool_call = ToolCall.parse(response_message)
    if tool_call:
        return tool_call
    final_response = FinalResponse.parse(response_message)
    if final_response:
        return final_response
    return MessageResponse.parse(response_message)


def log(color: str, message: str):
    """
    Logs the message with the specified color using colorama.
    """
    print(f"{color}{message}{Style.RESET_ALL}")


def print_message(message: dict):
    """
    Prints the message content in a tabular format and assigns colors to each role using colorama.
    """
    role = message["role"]
    content = message["content"]
    if role == "user":
        color = Fore.CYAN
    elif role == "assistant":
        color = Fore.GREEN
    elif role == "function":
        color = Fore.YELLOW
    else:
        color = Fore.WHITE
    log(color, f"{role}: {content}")


def print_messages(messages: list):
    """
    Prints the turn id and the message content in a tabular format and assigns colors to each role using colorama.
    """
    table = []
    for i, message in enumerate(messages):
        role = message["role"]
        content = message["content"]
        if role == "user":
            color = Fore.CYAN
        elif role == "assistant":
            color = Fore.GREEN
        elif role == "function":
            color = Fore.YELLOW
        else:
            color = Fore.WHITE
        table.append([i, color + role + Style.RESET_ALL, content])
    print(tabulate(table, headers=["Turn ID", "Role", "Message"], tablefmt="fancy_grid", stralign="left", maxcolwidths=[None, None, 120]))

4. Loading the Model and Calling It

Let's use the snippet provided in the Granite 3.3 Model Card

from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
import torch

model_path = "ibm-granite/granite-3.3-2b-instruct"
device = "cuda"
model = AutoModelForCausalLM.from_pretrained(
        model_path,
        device_map=device,
        torch_dtype=torch.bfloat16,
    )
tokenizer = AutoTokenizer.from_pretrained(
        model_path
)

Abstract the logic of calling the model into a function

def run_model(messages: list[dict[str, str]]) -> str:
    input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt", thinking=True, return_dict=True, add_generation_prompt=True).to(device)
    formatted_input = tokenizer.decode(input_ids["input_ids"][0], skip_special_tokens=False)
    log(Fore.LIGHTYELLOW_EX + Style.BRIGHT, f"Formatted input: {formatted_input}")
    set_seed(42)
    output = model.generate(
        **input_ids,
        max_new_tokens=8192,
    )
    prediction = tokenizer.decode(output[0, input_ids["input_ids"].shape[1]:], skip_special_tokens=True)
    return prediction

Now let's define the system message that will be used to instruct the model on how to behave.

5. Defining the System Message

# define initial messages

system_message = environment.from_string("""
You are a tool-calling agent who has access to some tools.
The user will ask you to do some tasks, and you will call the appropriate tool, if you need to, to get the result.

INSTRUCTIONS:

- If you need to call a tool, respond with a message in the following format:
    ```
    {{tool_call_string}}{"name": "tool_name", "args": {"arg1": "value1", "arg2": "value2"}}{{tool_call_end_string}}
    ```
- If you have a final response, respond with a message in the following format:
    ```
    {{final_response_string}}{"message": "Final response message"}{{final_response_end_string}}
    ```
- Think step by step and do not skip any steps.
- When you call a tool, you will get the result back in a message with the role "function".

You have access to the following tools:

-   {
      "name": "magic_function",
        "description": "A function that performs a magic function on a number.",
        "parameters": {
            "type": "object",
            "properties": {
                "x": {
                    "type": "integer",
                    "description": "The input number to the magic function.",
                },
            },
            "required": ["x"],
        },
    }
""").render(tool_call_string=TOOL_CALL_STRING, tool_call_end_string=TOOL_CALL_END_STRING, final_response_string=FINAL_RESPONSE_STRING, final_response_end_string=FINAL_RESPONSE_END_STRING)

print(system_message)

5.1 (Optional) Adding an Example for In-Context Learning

Let's also add an example for in-context learning

message = "What's the value of magic_function(3)?"

messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": "What's the value of magic_function(6)?"},
    {"role": "assistant", "content": "To find the value of magic_function(6), we need to call the magic_function with x=6. The function will return the result.\n" + TOOL_CALL_STRING +
        '{"name": "magic_function", "args": {"x": 6}}' + TOOL_CALL_END_STRING},
    {"role": "function", "content": "8"},
    {"role": "assistant", "content": FINAL_RESPONSE_STRING +
        '{"message": "The value of magic_function(6) is 8."}' + FINAL_RESPONSE_END_STRING},
    {"role": "user", "content": message},
]

print(run_model(messages))

6. Defining the Agent Loop

Now let's define the agent loop that will execute the tool calls and print the results.

max_iterations = 5
current_iteration = 0
while current_iteration &#x3C; max_iterations:
    current_iteration += 1
    log(Fore.CYAN, f"Iteration {current_iteration} of {max_iterations}")
    response_message = run_model(messages)
    log(Fore.CYAN, f"Response message: {response_message}")
    parsed_response: Response = parse_response(response_message)
    match parsed_response:
        case ToolCall(tool=tool, arguments=arguments):
            messages.append({"role": "assistant", "content": response_message + TOOL_CALL_END_STRING})
            log(Fore.YELLOW, f"Tool call: {tool} with arguments: {arguments}")
            if tool in toolbox:
                try:
                    result = toolbox[tool](**arguments)
                    messages.append({"role": "function", "content": str(result)})
                    log(Fore.YELLOW, f"Tool result: {result}")
                except Exception as e:
                    messages.append({"role": "function", "content": str(e)})
                    log(Fore.RED, f"Tool error: {e}")
            else:
                messages.append({"role": "function", "content": f"Tool '{tool}' not found."})
                log(Fore.RED, f"Tool '{tool}' not found.")
        case FinalResponse(message=message):
            messages.append({"role": "assistant", "content": response_message + FINAL_RESPONSE_END_STRING})
            log(Fore.GREEN, f"Final response: {message}")
            break
        case MessageResponse(content=content):
            messages.append({"role": "assistant", "content": response_message})
            log(Fore.GREEN, f"Message response: {content}")


print("Updated messages list:")
print_messages(messages)

This is the core of any agent. You need to:

Give the model the choice to perform certain actions
Define the response schema, how to parse it.
Define the execution loop that will execute the tool calls and print the results.