Skip to content

Commit 20b0449

Browse files
committed
feat: add methods to create and execute langgraph AI agent
1 parent 61f5400 commit 20b0449

File tree

11 files changed

+302
-14
lines changed

11 files changed

+302
-14
lines changed

.vscode/launch.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"program": "${file}",
99
"console": "integratedTerminal",
1010
"justMyCode": false,
11-
"envFile": "${workspaceFolder}/.vscode/.env"
11+
"envFile": "${workspaceFolder}/.env"
1212
}
1313
]
1414
}

.vscode/settings.json

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,6 @@
2323
"python.testing.pytestArgs": [
2424
"."
2525
],
26-
"python.envFile": "${workspaceFolder}/.vscode/.env",
27-
"python-envs.pythonProjects": [
28-
{
29-
"path": "",
30-
"envManager": "ms-python.python:venv",
31-
"packageManager": "ms-python.python:pip"
32-
}
33-
],
3426
"ruff.organizeImports": true,
3527
"cucumberautocomplete.steps": [
3628
"steps/**/*.py",

CHANGELOG.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Toolium Changelog
22
=================
33

4-
v3.7.1
4+
v3.8.0
55
------
66

77
*Release date: In development*
@@ -10,6 +10,7 @@ v3.7.1
1010
- Configure ruff for linting and formatting files, replacing flake8 and black
1111
- Add text analysis tool to get an overall match of a text against a list of expected characteristics
1212
using AI libraries that come with the `ai` extra dependency
13+
- Add langgraph methods to create a ReAct AI agent that tests the behavior of other AI agents or LLMs
1314

1415
v3.7.0
1516
------

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
3.7.1.dev0
1+
3.8.0.dev0

docs/ai_utils.rst

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,3 +226,37 @@ custom behavior after accuracy scenario execution, like calling Allure `after_sc
226226
227227
# Monkey-patch the hook
228228
accuracy.after_accuracy_scenario = custom_after_accuracy_scenario
229+
230+
231+
AI agents for testing
232+
---------------------
233+
234+
Toolium provides utilities to create and execute AI agents in your tests using langgraph library, allowing you to
235+
simulate complex user interactions or validate AI-generated responses.
236+
237+
You can create an AI agent using the `create_react_agent` function from the `toolium.utils.ai_utils.ai_agent` module.
238+
This function allows you to create a ReAct agent, which is a type of AI agent that can reason and act based on the
239+
conversation history and tool interactions. You must specify the system message with AI testing agent instructions
240+
and the tool method, that the agent can use to send requests to the system under test and receive responses.
241+
242+
.. image:: react_agent.png
243+
:alt: ReAct Agent Flow Diagram
244+
245+
Once you have created an AI agent, you can execute it using the `execute_agent` function from the same module. This
246+
function will run the agent and log all conversation messages and tool calls, providing insights into the agent's
247+
behavior and the interactions it had during execution.
248+
You can also provide previous messages to the agent to give it context for its reasoning and actions.
249+
250+
.. code-block:: python
251+
252+
from toolium.utils.ai_utils.ai_agent import create_react_agent, execute_agent
253+
254+
# Create a ReAct agent with a system message and a tool method
255+
system_message = "You are an assistant that helps users find TV content based on their preferences."
256+
tool_method = tv_recommendations # This should be a function that the agent can call as a tool
257+
model_name = 'gpt-4o-mini' # Specify the model to use for the agent
258+
259+
agent = create_react_agent(system_message, tool_method=tool_method, model_name=model_name)
260+
261+
# Execute the agent and log all interactions
262+
final_state = execute_agent(agent)

docs/react_agent.png

10.4 KB
Loading

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,9 @@ select = [
129129
"RUF", # Ruff-specific rules
130130
]
131131
# Rules to ignore
132-
ignore = []
132+
ignore = [
133+
"COM812", # flake8-missing-trailing-comas (conflict with ruff format)
134+
]
133135

134136
[tool.ruff.lint.isort]
135137
combine-as-imports = true

requirements_ai.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1+
langchain-openai~=1.1 # OpenAI LLMs in AI agents
2+
langgraph~=1.0 # AI agents
13
spacy~=3.8.7
24
sentence-transformers~=5.1
3-
transformers==4.56.2; python_version < '3.10'
4-
openai~=1.108
5+
openai~=1.108 # OpenAI LLMs

toolium/test/conftest.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
"""
2+
Copyright 2026 Telefónica Innovación Digital, S.L.
3+
This file is part of Toolium.
4+
5+
Licensed under the Apache License, Version 2.0 (the "License");
6+
you may not use this file except in compliance with the License.
7+
You may obtain a copy of the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.
16+
"""
17+
18+
import logging
19+
import os
20+
21+
import pytest
22+
23+
24+
def pytest_configure(config): # noqa: ARG001
25+
"""Configure logging for all tests in this directory and subdirectories."""
26+
# Configure logging to show DEBUG messages and save to file
27+
log_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
28+
29+
# Console handler
30+
console_handler = logging.StreamHandler()
31+
console_handler.setLevel(logging.DEBUG)
32+
console_handler.setFormatter(log_formatter)
33+
34+
# File handler
35+
log_dir = os.path.join('toolium', 'test', 'output')
36+
os.makedirs(log_dir, exist_ok=True)
37+
log_file_path = os.path.join(log_dir, 'toolium_tests.log')
38+
file_handler = logging.FileHandler(log_file_path, mode='w', encoding='utf-8')
39+
file_handler.setLevel(logging.DEBUG)
40+
file_handler.setFormatter(log_formatter)
41+
42+
# Configure root logger
43+
root_logger = logging.getLogger()
44+
root_logger.setLevel(logging.DEBUG)
45+
46+
# Clear any existing handlers to avoid duplicates
47+
root_logger.handlers.clear()
48+
49+
# Add our handlers
50+
root_logger.addHandler(console_handler)
51+
root_logger.addHandler(file_handler)
52+
53+
# Ensure specific toolium loggers use DEBUG level
54+
logging.getLogger('toolium').setLevel(logging.DEBUG)
55+
logging.getLogger('toolium.utils.ai_utils.ai_agent').setLevel(logging.DEBUG)
56+
57+
58+
@pytest.fixture(scope='session', autouse=True)
59+
def setup_logging():
60+
"""Session-level fixture to ensure logging is properly configured."""
61+
# This fixture runs automatically for all tests
62+
# Additional logging setup can be done here if needed
63+
yield # noqa: PT022
64+
# Cleanup can be done here if needed
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
"""
2+
Copyright 2026 Telefónica Innovación Digital, S.L.
3+
This file is part of Toolium.
4+
5+
Licensed under the Apache License, Version 2.0 (the "License");
6+
you may not use this file except in compliance with the License.
7+
You may obtain a copy of the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.
16+
"""
17+
18+
import json
19+
import logging
20+
21+
import pytest
22+
23+
from toolium.utils.ai_utils.ai_agent import create_react_agent, execute_agent
24+
25+
# Global variable to keep track of mock responses in the agent
26+
mock_response_id = 0
27+
28+
logger = logging.getLogger(__name__)
29+
30+
31+
def tv_recommendations(user_question): # noqa: ARG001
32+
"""
33+
Tool to help users find TV content.
34+
Asks questions to the user to understand their preferences and then recommends specific content.
35+
Takes into account previous questions to make increasingly accurate recommendations.
36+
37+
:param user_question: The question from the user to the tool
38+
:returns: A response from the tool based on the user's question
39+
"""
40+
mocked_responses = [
41+
'Hola, ¿hoy te encuentras triste o feliz?',
42+
'¿Te gustaría que busque contenidos cómicos o de acción?',
43+
'He encontrado estas series que pueden gustarte: "The Office", "Parks and Recreation" and "Brooklyn Nine-Nine"',
44+
]
45+
46+
# Return the next response in the list for each call, and loop back to the start after the last one
47+
global mock_response_id
48+
response = mocked_responses[mock_response_id]
49+
mock_response_id = mock_response_id + 1 if mock_response_id < len(mocked_responses) - 1 else 0
50+
return response
51+
52+
53+
TV_CONTENT_SYSTEM_MESSAGE = (
54+
'You are a user looking for TV content. '
55+
'To do this, you will be helped by an assistant who will guide you with questions. '
56+
"Answer the assistant's questions until it recommends specific content to you. "
57+
'CRITICAL RULE: As soon as the TV assistant responds with concrete results, '
58+
'(I found ..., Here you have ...), stop asking questions immediately, analyze the response '
59+
"and return an analysis about the assistant's performance, to see if it answered correctly. "
60+
'If after 5 questions, the assistant has not given any recommendation, do not continue asking '
61+
'and return the analysis. '
62+
'Respond in JSON format: '
63+
'{"result": RESULT, "analysis": "your analysis"} '
64+
'where RESULT = true if it worked well and returned relevant content, false if not.'
65+
)
66+
67+
68+
# @pytest.mark.skip('This test relies on mocked responses and is meant for demonstration purposes')
69+
def test_react_agent():
70+
agent = create_react_agent(TV_CONTENT_SYSTEM_MESSAGE, tool_method=tv_recommendations, model_name='gpt-4o-mini')
71+
agent_results = execute_agent(agent)
72+
73+
# Check if the agent's final response contains a valid JSON with the expected structure and analyze the result
74+
try:
75+
ai_agent_response = json.loads(agent_results['messages'][-1].content)
76+
except (KeyError, IndexError, json.JSONDecodeError) as e:
77+
raise AssertionError('AI Agent did not return a valid response') from e
78+
error_message = f'TV recommendations use case did not return a valid response: {ai_agent_response["analysis"]}'
79+
assert ai_agent_response['result'] is True, error_message

0 commit comments

Comments
 (0)