feat: add methods to create and execute langgraph AI agent

rgonalo · rgonalo · commit 20b0449f8000 · 2026-02-17T12:59:56.000+01:00
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -8,7 +8,7 @@
       "program": "${file}",
       "console": "integratedTerminal",
       "justMyCode": false,
-      "envFile": "${workspaceFolder}/.vscode/.env"
+      "envFile": "${workspaceFolder}/.env"
     }
   ]
 }
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -23,14 +23,6 @@
   "python.testing.pytestArgs": [
     "."
   ],
-  "python.envFile": "${workspaceFolder}/.vscode/.env",
-  "python-envs.pythonProjects": [
-    {
-      "path": "",
-      "envManager": "ms-python.python:venv",
-      "packageManager": "ms-python.python:pip"
-    }
-  ],
   "ruff.organizeImports": true,
   "cucumberautocomplete.steps": [
     "steps/**/*.py",
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -1,7 +1,7 @@
 Toolium Changelog
 =================
 
-v3.7.1
+v3.8.0
 ------
 
 *Release date: In development*
@@ -10,6 +10,7 @@ v3.7.1
 - Configure ruff for linting and formatting files, replacing flake8 and black
 - Add text analysis tool to get an overall match of a text against a list of expected characteristics
   using AI libraries that come with the `ai` extra dependency
+- Add langgraph methods to create a ReAct AI agent that tests the behavior of other AI agents or LLMs
 
 v3.7.0
 ------
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-3.7.1.dev0
+3.8.0.dev0
diff --git a/docs/ai_utils.rst b/docs/ai_utils.rst
@@ -226,3 +226,37 @@ custom behavior after accuracy scenario execution, like calling Allure `after_sc
 
     # Monkey-patch the hook
     accuracy.after_accuracy_scenario = custom_after_accuracy_scenario
+
+
+AI agents for testing
+---------------------
+
+Toolium provides utilities to create and execute AI agents in your tests using langgraph library, allowing you to
+simulate complex user interactions or validate AI-generated responses.
+
+You can create an AI agent using the `create_react_agent` function from the `toolium.utils.ai_utils.ai_agent` module.
+This function allows you to create a ReAct agent, which is a type of AI agent that can reason and act based on the
+conversation history and tool interactions. You must specify the system message with AI testing agent instructions
+and the tool method, that the agent can use to send requests to the system under test and receive responses.
+
+.. image:: react_agent.png
+   :alt: ReAct Agent Flow Diagram
+
+Once you have created an AI agent, you can execute it using the `execute_agent` function from the same module. This
+function will run the agent and log all conversation messages and tool calls, providing insights into the agent's
+behavior and the interactions it had during execution.
+You can also provide previous messages to the agent to give it context for its reasoning and actions.
+
+.. code-block:: python
+
+    from toolium.utils.ai_utils.ai_agent import create_react_agent, execute_agent
+
+    # Create a ReAct agent with a system message and a tool method
+    system_message = "You are an assistant that helps users find TV content based on their preferences."
+    tool_method = tv_recommendations  # This should be a function that the agent can call as a tool
+    model_name = 'gpt-4o-mini'  # Specify the model to use for the agent
+
+    agent = create_react_agent(system_message, tool_method=tool_method, model_name=model_name)
+
+    # Execute the agent and log all interactions
+    final_state = execute_agent(agent)
diff --git a/docs/react_agent.png b/docs/react_agent.png
diff --git a/pyproject.toml b/pyproject.toml
@@ -129,7 +129,9 @@ select = [
     "RUF", # Ruff-specific rules
 ]
 # Rules to ignore
-ignore = []
+ignore = [
+    "COM812",  # flake8-missing-trailing-comas (conflict with ruff format)
+]
 
 [tool.ruff.lint.isort]
 combine-as-imports = true
diff --git a/requirements_ai.txt b/requirements_ai.txt
@@ -1,4 +1,5 @@
+langchain-openai~=1.1       # OpenAI LLMs in AI agents
+langgraph~=1.0              # AI agents
 spacy~=3.8.7
 sentence-transformers~=5.1
-transformers==4.56.2; python_version < '3.10'
-openai~=1.108
+openai~=1.108               # OpenAI LLMs
diff --git a/toolium/test/conftest.py b/toolium/test/conftest.py
@@ -0,0 +1,64 @@
+"""
+Copyright 2026 Telefónica Innovación Digital, S.L.
+This file is part of Toolium.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import logging
+import os
+
+import pytest
+
+
+def pytest_configure(config):  # noqa: ARG001
+    """Configure logging for all tests in this directory and subdirectories."""
+    # Configure logging to show DEBUG messages and save to file
+    log_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+
+    # Console handler
+    console_handler = logging.StreamHandler()
+    console_handler.setLevel(logging.DEBUG)
+    console_handler.setFormatter(log_formatter)
+
+    # File handler
+    log_dir = os.path.join('toolium', 'test', 'output')
+    os.makedirs(log_dir, exist_ok=True)
+    log_file_path = os.path.join(log_dir, 'toolium_tests.log')
+    file_handler = logging.FileHandler(log_file_path, mode='w', encoding='utf-8')
+    file_handler.setLevel(logging.DEBUG)
+    file_handler.setFormatter(log_formatter)
+
+    # Configure root logger
+    root_logger = logging.getLogger()
+    root_logger.setLevel(logging.DEBUG)
+
+    # Clear any existing handlers to avoid duplicates
+    root_logger.handlers.clear()
+
+    # Add our handlers
+    root_logger.addHandler(console_handler)
+    root_logger.addHandler(file_handler)
+
+    # Ensure specific toolium loggers use DEBUG level
+    logging.getLogger('toolium').setLevel(logging.DEBUG)
+    logging.getLogger('toolium.utils.ai_utils.ai_agent').setLevel(logging.DEBUG)
+
+
+@pytest.fixture(scope='session', autouse=True)
+def setup_logging():
+    """Session-level fixture to ensure logging is properly configured."""
+    # This fixture runs automatically for all tests
+    # Additional logging setup can be done here if needed
+    yield  # noqa: PT022
+    # Cleanup can be done here if needed
diff --git a/toolium/test/utils/ai_utils/test_ai_agent.py b/toolium/test/utils/ai_utils/test_ai_agent.py
@@ -0,0 +1,79 @@
+"""
+Copyright 2026 Telefónica Innovación Digital, S.L.
+This file is part of Toolium.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import json
+import logging
+
+import pytest
+
+from toolium.utils.ai_utils.ai_agent import create_react_agent, execute_agent
+
+# Global variable to keep track of mock responses in the agent
+mock_response_id = 0
+
+logger = logging.getLogger(__name__)
+
+
+def tv_recommendations(user_question):  # noqa: ARG001
+    """
+    Tool to help users find TV content.
+    Asks questions to the user to understand their preferences and then recommends specific content.
+    Takes into account previous questions to make increasingly accurate recommendations.
+
+    :param user_question: The question from the user to the tool
+    :returns: A response from the tool based on the user's question
+    """
+    mocked_responses = [
+        'Hola, ¿hoy te encuentras triste o feliz?',
+        '¿Te gustaría que busque contenidos cómicos o de acción?',
+        'He encontrado estas series que pueden gustarte: "The Office", "Parks and Recreation" and "Brooklyn Nine-Nine"',
+    ]
+
+    # Return the next response in the list for each call, and loop back to the start after the last one
+    global mock_response_id
+    response = mocked_responses[mock_response_id]
+    mock_response_id = mock_response_id + 1 if mock_response_id < len(mocked_responses) - 1 else 0
+    return response
+
+
+TV_CONTENT_SYSTEM_MESSAGE = (
+    'You are a user looking for TV content. '
+    'To do this, you will be helped by an assistant who will guide you with questions. '
+    "Answer the assistant's questions until it recommends specific content to you. "
+    'CRITICAL RULE: As soon as the TV assistant responds with concrete results, '
+    '(I found ..., Here you have ...), stop asking questions immediately, analyze the response '
+    "and return an analysis about the assistant's performance, to see if it answered correctly. "
+    'If after 5 questions, the assistant has not given any recommendation, do not continue asking '
+    'and return the analysis. '
+    'Respond in JSON format: '
+    '{"result": RESULT, "analysis": "your analysis"} '
+    'where RESULT = true if it worked well and returned relevant content, false if not.'
+)
+
+
+# @pytest.mark.skip('This test relies on mocked responses and is meant for demonstration purposes')
+def test_react_agent():
+    agent = create_react_agent(TV_CONTENT_SYSTEM_MESSAGE, tool_method=tv_recommendations, model_name='gpt-4o-mini')
+    agent_results = execute_agent(agent)
+
+    # Check if the agent's final response contains a valid JSON with the expected structure and analyze the result
+    try:
+        ai_agent_response = json.loads(agent_results['messages'][-1].content)
+    except (KeyError, IndexError, json.JSONDecodeError) as e:
+        raise AssertionError('AI Agent did not return a valid response') from e
+    error_message = f'TV recommendations use case did not return a valid response: {ai_agent_response["analysis"]}'
+    assert ai_agent_response['result'] is True, error_message
diff --git a/toolium/utils/ai_utils/ai_agent.py b/toolium/utils/ai_utils/ai_agent.py
@@ -0,0 +1,115 @@
+"""
+Copyright 2026 Telefónica Innovación Digital, S.L.
+This file is part of Toolium.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import logging
+
+# AI library imports must be optional to allow installing Toolium without `ai` extra dependency
+try:
+    from langchain_core.messages import SystemMessage
+    from langchain_core.tools import Tool
+    from langchain_openai import AzureChatOpenAI
+    from langgraph.graph import END, START, MessagesState, StateGraph
+    from langgraph.prebuilt import ToolNode, tools_condition
+except ImportError:
+    AzureChatOpenAI = None
+
+from toolium.driver_wrappers_pool import DriverWrappersPool
+
+# Configure logger
+logger = logging.getLogger(__name__)
+
+
+def create_react_agent(system_message, tool_method, tool_description=None, model_name=None):
+    """
+    Creates a ReAct agent using the provided system message, tool method and model name.
+
+    :param system_message: The system message to set the behavior of the assistant
+    :param tool_method: The method that the agent can use as a tool
+    :param tool_description: Optional custom description for the tool. If not provided, uses the method's docstring
+    :param model_name: The name of the model to use (optional)
+    :returns: A compiled ReAct agent graph
+    """
+    if AzureChatOpenAI is None:
+        raise ImportError(
+            "AzureChatOpenAI is not installed. Please run 'pip install toolium[ai]' to use langgraph features",
+        )
+
+    # Define LLM with bound tools
+    config = DriverWrappersPool.get_default_wrapper().config
+    model_name = model_name or config.get_optional('AI', 'openai_model', 'gpt-4o-mini')
+    llm = AzureChatOpenAI(model=model_name)
+
+    # Create tools with custom description if provided
+    if tool_description:
+        tools = [Tool(name=tool_method.__name__, description=tool_description, func=tool_method)]
+    else:
+        tools = [tool_method]
+
+    llm_with_tools = llm.bind_tools(tools)
+
+    # System message
+    sys_msg = SystemMessage(content=system_message)
+
+    # Node
+    def assistant(state: MessagesState):
+        return {'messages': [llm_with_tools.invoke([sys_msg] + state['messages'])]}
+
+    # Build graph
+    builder = StateGraph(MessagesState)
+    builder.add_node('assistant', assistant)
+    builder.add_node('tools', ToolNode(tools))
+    builder.add_edge(START, 'assistant')
+    builder.add_conditional_edges(
+        'assistant',
+        tools_condition,
+    )
+    builder.add_edge('tools', 'assistant')
+    builder.add_edge('assistant', END)
+
+    # Compile graph
+    logger.info('Creating ReAct agent with model %s and tools %s', model_name, tools)
+    graph = builder.compile()
+    return graph
+
+
+def execute_agent(ai_agent, previous_messages=None):
+    """
+    Executes the given AI agent and logs all conversation messages and tool calls.
+
+    :param ai_agent: The AI agent to be executed
+    :param previous_messages: Optional list of previous messages with the tool to provide context to the agent
+    :returns: The final state of the agent after execution
+    """
+    logger.info('Executing AI agent with previous messages: %s', previous_messages)
+    initial_state = MessagesState(messages=previous_messages or [])
+    final_state = ai_agent.invoke(initial_state)
+
+    # Log all conversation messages and tool calls to help with debugging and understanding the agent's behavior
+    logger.info('AI agent execution completed with %d messages', len(final_state['messages']))
+    for msg in final_state['messages']:
+        if msg.type == 'ai' and hasattr(msg, 'tool_calls') and msg.tool_calls:
+            for tool_call in msg.tool_calls:
+                logger.debug(
+                    '%s: calling to %s tool with args %s',
+                    msg.type.upper(),
+                    tool_call['name'],
+                    tool_call['args'],
+                )
+        else:
+            logger.debug('%s: %s', msg.type.upper(), msg.content)
+
+    return final_state

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@`
`8`	`8`	`"program": "${file}",`
`9`	`9`	`"console": "integratedTerminal",`
`10`	`10`	`"justMyCode": false,`
`11`		`- "envFile": "${workspaceFolder}/.vscode/.env"`
	`11`	`+ "envFile": "${workspaceFolder}/.env"`
`12`	`12`	`}`
`13`	`13`	`]`
`14`	`14`	`}`