2025-04-26
Agents is the new hype. Dozens of frameworks are popping up every day.
For me, in order to understand something, I love building it from scratch to get a feel of how it works. So I decided to build a minimal agent from scratch using Granite 3.3.
To be an agent, a model needs to have choice to perform certain actions in order to achieve a goal. In this case, I will give the agent the ability to call a magic function in order to let the user know what it returns for the number the user provides.
The full code is available in this Colab Notebook.
I've created a modified version using Groq, Tavily Search, and Riza in this Colab Notebook.
Let's start by building our toolbox:
# define toolbox
def magic_function(x: int) -> int:
"""
A function that performs a magic function on a number.
:param x: The input number to the magic function.
:return: The result of the magic function.
"""
return x + 2
toolbox = {
"magic_function": magic_function,
}
The response from the model can be either
Our execution loop should parse the response and interpret it accordingly.
# define response classes
from dataclasses import dataclass
import jinja2
from tabulate import tabulate
from colorama import Fore, Style
TOOL_CALL_STRING = "<tool_call>"
TOOL_CALL_END_STRING = "<|end_of_text|>"
FINAL_RESPONSE_STRING = "<final_response>"
FINAL_RESPONSE_END_STRING = "<|end_of_text|>"
environment = jinja2.Environment()
@dataclass
class Response:
pass
@dataclass
class ToolCallError(Response):
message: str
@dataclass
class ToolCall(Response):
tool: str
arguments: dict
@classmethod
def parse(cls, message: str):
"""
Parses a message if it has a tool call in the format
:param message: The message to parse.
:return: A ToolCall object if the message contains a tool call, otherwise None.
"""
import json
if TOOL_CALL_STRING in message:
try:
tool_call = message.split(TOOL_CALL_STRING)[1]
tool_call = tool_call.split(TOOL_CALL_END_STRING)[0]
tool_call = json.loads(tool_call)
return cls(tool=tool_call["name"], arguments=tool_call["args"])
except (json.JSONDecodeError, KeyError):
return ToolCallError(message="Invalid tool call format")
return None
@dataclass
class FinalResponse(Response):
message: str
@classmethod
def parse(cls, message: str):
"""
Parses a message if it has a final response in the format
:param message: The message to parse.
:return: A FinalResponse object if the message contains a final response, otherwise None.
"""
import json
if FINAL_RESPONSE_STRING in message:
try:
final_response = message.split(FINAL_RESPONSE_STRING)[1]
final_response = final_response.split(FINAL_RESPONSE_END_STRING)[0]
final_response = json.loads(final_response)
return cls(message=final_response["message"])
except (json.JSONDecodeError, KeyError):
return ToolCallError(message="Invalid final response format")
return None
@dataclass
class MessageResponse(Response):
content: str
@classmethod
def parse(cls, message: str):
"""
Parses a message if it has a message response in the format
:param message: The message to parse.
:return: A MessageResponse object if the message contains a message response, otherwise None.
"""
return cls(message)
Let's define some utility methods for parsing and logging
# utility methods
def parse_response(response_message: str) -> Response:
"""
Parses the response message to determine if it is a tool call, final response, or an error.
"""
tool_call = ToolCall.parse(response_message)
if tool_call:
return tool_call
final_response = FinalResponse.parse(response_message)
if final_response:
return final_response
return MessageResponse.parse(response_message)
def log(color: str, message: str):
"""
Logs the message with the specified color using colorama.
"""
print(f"{color}{message}{Style.RESET_ALL}")
def print_message(message: dict):
"""
Prints the message content in a tabular format and assigns colors to each role using colorama.
"""
role = message["role"]
content = message["content"]
if role == "user":
color = Fore.CYAN
elif role == "assistant":
color = Fore.GREEN
elif role == "function":
color = Fore.YELLOW
else:
color = Fore.WHITE
log(color, f"{role}: {content}")
def print_messages(messages: list):
"""
Prints the turn id and the message content in a tabular format and assigns colors to each role using colorama.
"""
table = []
for i, message in enumerate(messages):
role = message["role"]
content = message["content"]
if role == "user":
color = Fore.CYAN
elif role == "assistant":
color = Fore.GREEN
elif role == "function":
color = Fore.YELLOW
else:
color = Fore.WHITE
table.append([i, color + role + Style.RESET_ALL, content])
print(tabulate(table, headers=["Turn ID", "Role", "Message"], tablefmt="fancy_grid", stralign="left", maxcolwidths=[None, None, 120]))
Let's use the snippet provided in the Granite 3.3 Model Card
from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
import torch
model_path = "ibm-granite/granite-3.3-2b-instruct"
device = "cuda"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map=device,
torch_dtype=torch.bfloat16,
)
tokenizer = AutoTokenizer.from_pretrained(
model_path
)
Abstract the logic of calling the model into a function
def run_model(messages: list[dict[str, str]]) -> str:
input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt", thinking=True, return_dict=True, add_generation_prompt=True).to(device)
formatted_input = tokenizer.decode(input_ids["input_ids"][0], skip_special_tokens=False)
log(Fore.LIGHTYELLOW_EX + Style.BRIGHT, f"Formatted input: {formatted_input}")
set_seed(42)
output = model.generate(
**input_ids,
max_new_tokens=8192,
)
prediction = tokenizer.decode(output[0, input_ids["input_ids"].shape[1]:], skip_special_tokens=True)
return prediction
Now let's define the system message that will be used to instruct the model on how to behave.
# define initial messages
system_message = environment.from_string("""
You are a tool-calling agent who has access to some tools.
The user will ask you to do some tasks, and you will call the appropriate tool, if you need to, to get the result.
INSTRUCTIONS:
- If you need to call a tool, respond with a message in the following format:
```
{{tool_call_string}}{"name": "tool_name", "args": {"arg1": "value1", "arg2": "value2"}}{{tool_call_end_string}}
```
- If you have a final response, respond with a message in the following format:
```
{{final_response_string}}{"message": "Final response message"}{{final_response_end_string}}
```
- Think step by step and do not skip any steps.
- When you call a tool, you will get the result back in a message with the role "function".
You have access to the following tools:
- {
"name": "magic_function",
"description": "A function that performs a magic function on a number.",
"parameters": {
"type": "object",
"properties": {
"x": {
"type": "integer",
"description": "The input number to the magic function.",
},
},
"required": ["x"],
},
}
""").render(tool_call_string=TOOL_CALL_STRING, tool_call_end_string=TOOL_CALL_END_STRING, final_response_string=FINAL_RESPONSE_STRING, final_response_end_string=FINAL_RESPONSE_END_STRING)
print(system_message)
Let's also add an example for in-context learning
message = "What's the value of magic_function(3)?"
messages = [
{"role": "system", "content": system_message},
{"role": "user", "content": "What's the value of magic_function(6)?"},
{"role": "assistant", "content": "To find the value of magic_function(6), we need to call the magic_function with x=6. The function will return the result.\n" + TOOL_CALL_STRING +
'{"name": "magic_function", "args": {"x": 6}}' + TOOL_CALL_END_STRING},
{"role": "function", "content": "8"},
{"role": "assistant", "content": FINAL_RESPONSE_STRING +
'{"message": "The value of magic_function(6) is 8."}' + FINAL_RESPONSE_END_STRING},
{"role": "user", "content": message},
]
print(run_model(messages))
Now let's define the agent loop that will execute the tool calls and print the results.
max_iterations = 5
current_iteration = 0
while current_iteration < max_iterations:
current_iteration += 1
log(Fore.CYAN, f"Iteration {current_iteration} of {max_iterations}")
response_message = run_model(messages)
log(Fore.CYAN, f"Response message: {response_message}")
parsed_response: Response = parse_response(response_message)
match parsed_response:
case ToolCall(tool=tool, arguments=arguments):
messages.append({"role": "assistant", "content": response_message + TOOL_CALL_END_STRING})
log(Fore.YELLOW, f"Tool call: {tool} with arguments: {arguments}")
if tool in toolbox:
try:
result = toolbox[tool](**arguments)
messages.append({"role": "function", "content": str(result)})
log(Fore.YELLOW, f"Tool result: {result}")
except Exception as e:
messages.append({"role": "function", "content": str(e)})
log(Fore.RED, f"Tool error: {e}")
else:
messages.append({"role": "function", "content": f"Tool '{tool}' not found."})
log(Fore.RED, f"Tool '{tool}' not found.")
case FinalResponse(message=message):
messages.append({"role": "assistant", "content": response_message + FINAL_RESPONSE_END_STRING})
log(Fore.GREEN, f"Final response: {message}")
break
case MessageResponse(content=content):
messages.append({"role": "assistant", "content": response_message})
log(Fore.GREEN, f"Message response: {content}")
print("Updated messages list:")
print_messages(messages)
This is the core of any agent. You need to:
choice
to perform certain actions