Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
<a href="https://github.com/Agent-One-Lab/AgentFly" target="_blank"><img alt="Static Badge" src="https://img.shields.io/github/stars/Agent-One-Lab/AgentFly?style=for-the-badge&logo=github&color=a2d2ff"></a>
</p>
<p align="center">
<a href="./assets/images/wechat.jpg" target="_blank"><img alt="Static Badge" src="https://img.shields.io/badge/WeChat-%23e9edc9?style=for-the-badge&logo=wechat"></a>
<a href="https://agent-one-lab.github.io/assets/agentfly/wechat.jpg" target="_blank"><img alt="Static Badge" src="https://img.shields.io/badge/WeChat-%23e9edc9?style=for-the-badge&logo=wechat"></a>
<a href="https://discord.gg/Ze5Z9QhhJ3" target="_blank"><img alt="Static Badge" src="https://img.shields.io/badge/discord-%23dedbd2?style=for-the-badge&logo=discord"></a>
</p>
<p align="center">
Expand Down Expand Up @@ -39,16 +39,18 @@ AgentFly is an extensible framework for building LLM agents with reinforcement l

## News

**08/2025 Multi-Modal (Vision) Agent Training Support** - Thanks to the powerful template system, AgentFly now supports training vision-language agents! 🎉 Train agents that can see and understand visual content, including GUI automation and image-based QA. See our [predefined training examples](docs/examples/predefined_training_examples.md) for ready-to-use scripts.
**12/2025 verl update**: Updated verl to 0.6.x version.

**08/2025 Multi-Modal (Vision) Agent Training Support**: Thanks to the powerful template system, AgentFly now supports training vision-language agents! 🎉 Train agents that can see and understand visual content, including GUI automation and image-based QA. See our [predefined training examples](docs/examples/predefined_training_examples.md) for ready-to-use scripts.

---

**08/2025 Chat Template System** - A flexible framework for creating conversation templates with multi-model support, vision capabilities, and tool integration. [Learn more →](docs/chat_template/)
**08/2025 Chat Template System**: A flexible framework for creating conversation templates with multi-model support, vision capabilities, and tool integration. [Learn more →](docs/chat_template/)

## Installation
**Option 1**: One-line Installation:
```
bash install.sh # Assume conda with python3.10.x
bash install.sh # Assume conda with python3.12.x
```
**Option 2**: Customized Installation

Expand Down Expand Up @@ -144,7 +146,7 @@ During training, `question` will be used to format the input messages, while oth
#### 2. Tools & Rewards
You can use any existing tool, which is in [documentation](https://agentfly.readthedocs.io/), or define a tool by decorating it with `@tool`. The output should eighther be a string, or a dictionary containing `observation` as a key.
```python
@reward(name="customized_tool")
@tool(name="customized_tool")
def customized_tool(arg1, arg2):
# tool logic here
```
Expand Down
13 changes: 11 additions & 2 deletions agentfly/agents/agent_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __init__(
log_file: str = "agent",
streaming: str = "console",
debug: bool = False,
monitors: List[str] = [],
monitors: List[str] = ["wandb"],
wandb_project_name: str = None,
wandb_run_name: str = None,
local_cache_dir: str = None,
Expand Down Expand Up @@ -184,6 +184,12 @@ def _preprocess_messages(self, messages: List[Dict]):

return messages_list.to_list()

def _preprocess_backends(self):
self.llm_engine.preprocess()

def _postprocess_backends(self):
self.llm_engine.postprocess()

def _initialize_monitor(self, monitors: List[str]) -> None:
for monitor in monitors:
if monitor == "local":
Expand Down Expand Up @@ -212,14 +218,17 @@ async def run(self,

"""
processed_messages = self._preprocess_messages(messages)
self._preprocess_backends()

return await self.run_async(
await self.run_async(
processed_messages,
max_turns=max_turns,
generation_config=generation_config,
**kwargs,
)

self._postprocess_backends()

def set_llm_engine(self, llm_engine: Any, tokenizer: Any, processor: Any):
assert self.backend == "async_verl", "Only async verl backend is supported for now"

Expand Down
23 changes: 17 additions & 6 deletions agentfly/agents/llm_backends/backend_configs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from dataclasses import dataclass
from dataclasses import dataclass, field
from typing import Optional, Dict, Any, List

from vllm import AsyncEngineArgs
Expand Down Expand Up @@ -39,7 +39,7 @@ class VLLMConfig:



@dataclass
@dataclass(init=False)
class AsyncVLLMConfig:
"""Configuration for Async VLLM backend with engine arguments. Arguments are the same as vLLM's arguments, which can
be found at https://docs.vllm.ai/en/latest/configuration/engine_args.html. Here listed some important arguments:
Expand All @@ -53,10 +53,21 @@ class AsyncVLLMConfig:
data_parallel_size (int): Data parallel size.
tensor_parallel_size (int): Tensor parallel size.
"""
engine_args: AsyncEngineArgs = AsyncEngineArgs()

def __init__(self, **kwargs):
self.engine_args = AsyncEngineArgs(**kwargs)
engine_args: AsyncEngineArgs

def __init__(self, engine_args: Optional[AsyncEngineArgs] = None, **kwargs):
"""Initialize AsyncVLLMConfig.

Args:
engine_args: Optional AsyncEngineArgs instance. If provided, kwargs are ignored.
**kwargs: Arguments to pass to AsyncEngineArgs if engine_args is not provided.
"""
if engine_args is not None:
self.engine_args = engine_args
elif kwargs:
self.engine_args = AsyncEngineArgs(**kwargs)
else:
self.engine_args = AsyncEngineArgs()


@dataclass
Expand Down
43 changes: 39 additions & 4 deletions agentfly/agents/llm_backends/llm_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
This module provides a unified interface to different LLM implementations.
"""
import asyncio
from asyncore import loop
from collections import deque
import copy
from functools import partial
import time
Expand Down Expand Up @@ -54,6 +52,10 @@ def apply_chat_template(self, messages_list: List[List[Dict]], template: str, ad
vision_inputs.append(chat.vision_inputs())

return prompts, vision_inputs

def prepare(self):
"""Prepare the backend"""
pass

def generate(self, messages_list: str, **kwargs) -> str:
"""Generate text from prompt"""
Expand Down Expand Up @@ -404,6 +406,18 @@ def __init__(self, llm_engine, model_name_or_path: str, template: str, max_lengt
trust_remote_code=True,
)
self.llm_engine = llm_engine

def preprocess(self):
"""Preprocess the backend"""
self.llm_engine.wake_up()
if self.llm_engine.reward_model_manager:
self.llm_engine.reward_model_manager.wake_up()

def postprocess(self):
"""Postprocess the backend"""
self.llm_engine.sleep()
if self.llm_engine.reward_model_manager:
self.llm_engine.reward_model_manager.sleep()

def _process_inputs(self, prompts: List[str], vision_inputs: Dict[str, List[PIL.Image.Image]]):
inputs = []
Expand Down Expand Up @@ -433,13 +447,31 @@ def _convert_to_openai_chat_without_tool_call_processing(self, messages: list) -
if "tool_choice" in message:
del message["tool_choice"]
return messages

def _process_messages(self, messages: List[Dict]):
new_messages = []
for message in messages:
new_message = {}
new_message.update(message)
if isinstance(message["content"], list):
if len(message["content"]) == 1:
assert message["content"][0]["type"] == "text"
new_message["content"] = message["content"][0]["text"]
else:
new_message["content"] = message["content"]

new_messages.append(new_message)
return new_messages


async def generate_async(self, messages_list: str, **kwargs) -> str:
"""Generate text from prompt using Verl"""
# We need to build a DataProto from the prompts

generation_config = {}
tensors = torch.ones(len(messages_list), dtype=torch.int64)
# messages_list = [self._convert_to_openai_chat_without_tool_call_processing(messages) for messages in messages_list]
messages_list = [self._process_messages(messages) for messages in messages_list]
messages_list = [self._convert_to_openai_chat_without_tool_call_processing(messages) for messages in messages_list]
tools = kwargs.get("tools", None)
tools_list = np.array([tools] * len(messages_list))
Expand All @@ -453,8 +485,11 @@ async def generate_async(self, messages_list: str, **kwargs) -> str:

batch = DataProto.from_single_dict(data, meta_info={"n": n, "temperature": temperature})

gen_batch_output = await self.llm_engine.generate_sequences_async(batch, **generation_config)
response_texts = gen_batch_output.batch['responses'].tolist() # np.array of strings with length BS
gen_batch_output = await self.llm_engine.generate_sequences_async(batch)
response_ids = gen_batch_output.batch['responses'].tolist() # np.array of strings with length BS
assert len(response_ids) == len(messages_list)
response_texts = [self.tokenizer.decode(response_id, skip_special_tokens=True) for response_id in response_ids]

return response_texts


Expand Down
4 changes: 4 additions & 0 deletions agentfly/agents/specialized/hf_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,16 @@
from typing import List
from ..agent_base import BaseAgent
from ..parsers import extract_tool_calls
import logging

logger = logging.getLogger(__file__)

class HFAgent(BaseAgent):
def __init__(self, model_name_or_path: str, **kwargs):
super().__init__(model_name_or_path, **kwargs)

def parse(self, responses: List[str], **kwargs) -> List[Dict]:
logger.debug(f"[HFAgent] Responses: {responses}")
new_messages_list = []
for response in responses:
tool_calls = extract_tool_calls(response)
Expand Down
8 changes: 4 additions & 4 deletions agentfly/envs/manager/resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ def cleanup_envs():
for env in tqdm(GLOBAL_ENVS):
env.close()

import atexit, signal
# import atexit, signal

atexit.register(cleanup_envs)
for sig in [signal.SIGTERM, signal.SIGINT]:
signal.signal(sig, cleanup_envs)
# atexit.register(cleanup_envs)
# for sig in [signal.SIGTERM, signal.SIGINT]:
# signal.signal(sig, cleanup_envs)
9 changes: 4 additions & 5 deletions agentfly/tests/scripts/test_cpu_runs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
# Test CPU runs


pytest -x agentfly/tests/unit/tools/
pytest -x agentfly/tests/unit/envs/
pytest -x agentfly/tests/unit/rewards/

pytest -x agentfly/tests/unit/templates/
pytest -x agentfly/tests/unit/tools/ || exit 1
pytest -x agentfly/tests/unit/envs/ || exit 1
pytest -x agentfly/tests/unit/rewards/ || exit 1
pytest -x agentfly/tests/unit/templates/ || exit 1
Loading
Loading