Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
128 commits
Select commit Hold shift + click to select a range
905bf4b
Update README.md
rifoagenadi Jul 21, 2025
43e19ba
Merge branch 'main' of github.com:Agent-One-Lab/AgentFly
Reason-Wang Jul 21, 2025
31a1e10
update docs
Reason-Wang Jul 21, 2025
dedcd49
change submodule url
Reason-Wang Jul 21, 2025
b1f5051
update docs
Reason-Wang Jul 21, 2025
cececf6
Merge remote-tracking branch 'origin/main' into update/docs
bitalov Jul 21, 2025
6e16a11
update doc
bitalov Jul 21, 2025
afefc08
test upd
bitalov Jul 21, 2025
befb7ed
cleaning code
bitalov Jul 21, 2025
1a64425
undo commit
bitalov Jul 21, 2025
f3e3ed3
Set cache folder
Reason-Wang Jul 21, 2025
798e2af
Convert code tool input to string
Reason-Wang Jul 22, 2025
d3ebc82
Remove deprecated release_env
Reason-Wang Jul 22, 2025
1eda3ef
Update readme
Reason-Wang Jul 23, 2025
c84f19b
Update readme
Reason-Wang Jul 23, 2025
ad2438c
Add autodownload data in verl
Reason-Wang Jul 23, 2025
120012e
Update readme
Reason-Wang Jul 23, 2025
044bad4
Update react agent
Reason-Wang Jul 23, 2025
b6283c9
Add new math reward
Reason-Wang Jul 24, 2025
dc75bbe
Update demo video
Reason-Wang Jul 24, 2025
6a1dab6
Merge branch 'main' of github.com:Agent-One-Lab/AgentFly
Reason-Wang Jul 24, 2025
8e07586
Update Readme
Reason-Wang Jul 24, 2025
b7415a1
Update readme
Reason-Wang Jul 24, 2025
80554c6
Update readme
Reason-Wang Jul 24, 2025
f6d67da
Create LICENSE
Reason-Wang Jul 24, 2025
b207819
Add Github Action CI script (#1)
rifoagenadi Jul 25, 2025
4352649
update requirements
Reason-Wang Jul 26, 2025
7bd72bc
Add download tool data
Reason-Wang Jul 26, 2025
3dfca80
Merge main
Reason-Wang Jul 26, 2025
a904940
Validate tool call arguments
Reason-Wang Jul 26, 2025
4fa9d9a
fix bug
Reason-Wang Jul 26, 2025
b012555
Update react agent and verl
Reason-Wang Jul 27, 2025
ad39ac1
Add streaming option and update docs
Reason-Wang Jul 27, 2025
017edf5
Remove deprecated file
Reason-Wang Jul 27, 2025
18719f2
Merge pull request #2 from Agent-One-Lab/streaming
Reason-Wang Jul 27, 2025
293dc09
Update wechat group
Reason-Wang Jul 28, 2025
6721184
Fix bug, update verl
Reason-Wang Jul 29, 2025
782771b
Fix openai agent
Reason-Wang Aug 5, 2025
3573ce5
Add vision templates
Reason-Wang Aug 7, 2025
efff2ce
Update wechat
Reason-Wang Aug 7, 2025
b576489
Test multi-modal training
Reason-Wang Aug 9, 2025
842209b
Update verl
Reason-Wang Aug 9, 2025
b5dd7a6
Merge branch 'agents'
Reason-Wang Aug 9, 2025
c867ce7
Fix conflicts
Reason-Wang Aug 9, 2025
5e69c1a
Add template tests
Reason-Wang Aug 9, 2025
909949d
More powerful, flexible template system
Reason-Wang Aug 12, 2025
e332a9b
Merge pull request #3 from Agent-One-Lab/agents
Reason-Wang Aug 12, 2025
051fb8c
Add mock tests, fix training bug
Reason-Wang Aug 13, 2025
a5cccf9
Merge pull request #4 from Agent-One-Lab/agents
Reason-Wang Aug 13, 2025
3bbb91c
GUI Agent
yongxinwang-ai Aug 13, 2025
3f49d46
GUI Agent
yongxinwang-ai Aug 13, 2025
85d70af
GUI Agent
yongxinwang-ai Aug 13, 2025
298001c
Fix bug
Reason-Wang Aug 14, 2025
f1aa31e
Fix conflicts
Reason-Wang Aug 14, 2025
394f58b
Merge branch 'main' into multimodal
Reason-Wang Aug 14, 2025
4f585d4
Fix bug
Reason-Wang Aug 14, 2025
8b047c7
GUI Agent
yongxinwang-ai Aug 14, 2025
041cd8c
GUI Agent
yongxinwang-ai Aug 14, 2025
b075468
GUI Agent
yongxinwang-ai Aug 14, 2025
5b17da2
Add multimodal training
Reason-Wang Aug 14, 2025
ce9a755
Update wechat group
Reason-Wang Aug 14, 2025
ab0bfac
Merge pull request #5 from Agent-One-Lab/multimodal
Reason-Wang Aug 14, 2025
6db0393
Remove printing in gui agent
Reason-Wang Aug 15, 2025
9730582
Add docs for template system
Reason-Wang Aug 15, 2025
2f2b8dd
Merge branch 'main' of github.com:Agent-One-Lab/AgentFly
Reason-Wang Aug 15, 2025
f0c1b66
Train qwen2.5-vl gui agent
Reason-Wang Aug 15, 2025
14b614f
Add training examples in docs
Reason-Wang Aug 15, 2025
2cdf609
Update Readme
Reason-Wang Aug 15, 2025
9b8ba17
Make tool content to be pure observation, fix tests
Reason-Wang Aug 15, 2025
58683e7
Merge pull request #6 from Agent-One-Lab/agents
Reason-Wang Aug 15, 2025
81291a9
Merge branch 'main' into multimodal
Reason-Wang Aug 19, 2025
27ea7b1
Fix multimodal train bug
Reason-Wang Aug 21, 2025
94dd7a8
Fix bug in multi-modal training
Reason-Wang Aug 24, 2025
46893e1
Update Readme
Reason-Wang Aug 24, 2025
ad38d5a
Update Readme
Reason-Wang Aug 24, 2025
0fef647
Merge pull request #7 from Agent-One-Lab/multimodal
Reason-Wang Aug 24, 2025
f192c50
Refactor agents into module
Reason-Wang Aug 24, 2025
b0641ab
Update wechat group
Reason-Wang Aug 24, 2025
a2d290a
Add Messages class to manage messages
Reason-Wang Aug 28, 2025
f86421b
Update docs
Reason-Wang Aug 30, 2025
8b588d3
Update github CI
Reason-Wang Aug 30, 2025
14796a9
Remove cache
Reason-Wang Aug 30, 2025
7309317
Update github CI
Reason-Wang Aug 30, 2025
2669162
Update github CI
Reason-Wang Aug 30, 2025
f9786e1
Update github CI
Reason-Wang Aug 30, 2025
36aa54f
Update dependencies
Reason-Wang Aug 30, 2025
bd36ea4
Make CI time longer
Reason-Wang Aug 30, 2025
2dc0b82
Update CI test files
Reason-Wang Aug 30, 2025
e6e313b
Merge pull request #9 from Agent-One-Lab/agents
Reason-Wang Aug 30, 2025
3112b21
Update doc requirement
Reason-Wang Aug 30, 2025
918da75
Update verl
Reason-Wang Aug 30, 2025
aa53067
Update docs
Reason-Wang Aug 30, 2025
2066390
Update llm backends and docs
Reason-Wang Aug 30, 2025
23da7ed
Merge pull request #10 from Agent-One-Lab/agents
Reason-Wang Aug 30, 2025
09582ad
Update trained agents and docs
Reason-Wang Sep 1, 2025
e77c793
Merge pull request #12 from Agent-One-Lab/agents
Reason-Wang Sep 1, 2025
d3ae31a
Refine docs
Reason-Wang Sep 4, 2025
55fa450
Refine docs
Reason-Wang Sep 5, 2025
597d17b
Merge pull request #13 from Agent-One-Lab/agents
Reason-Wang Sep 5, 2025
66666a3
Update wechat group
Reason-Wang Sep 7, 2025
b2188a2
Update verl
Reason-Wang Sep 7, 2025
505cbf2
Add templates
Reason-Wang Sep 9, 2025
3a2a66e
Add qwen3 template
Reason-Wang Sep 11, 2025
f51ae36
Merge branch 'main' into agents
Reason-Wang Sep 11, 2025
12b217f
Merge pull request #14 from Agent-One-Lab/agents
Reason-Wang Sep 11, 2025
6b3ff7b
Update templates
Reason-Wang Sep 12, 2025
6b4b1d2
Merge branch 'agents' of github.com:Agent-One-Lab/AgentFly into agents
Reason-Wang Sep 12, 2025
844a5cb
Merge pull request #15 from Agent-One-Lab/agents
Reason-Wang Sep 12, 2025
00da66c
update wechat
Reason-Wang Sep 15, 2025
d8c602c
Improve local logs
Reason-Wang Sep 21, 2025
b7c8a3f
Fix bug
Reason-Wang Sep 21, 2025
5aa9ecd
Fix bug
Reason-Wang Sep 21, 2025
fc96f46
Add template
Reason-Wang Sep 21, 2025
0da8306
Merge branch 'main' into agents
Reason-Wang Sep 21, 2025
95da9c2
Add vlm_as_judge rewards
Reason-Wang Sep 29, 2025
3731853
Add vlm_as_judge rewards
Reason-Wang Sep 29, 2025
2f387ae
Add vlm_as_judge rewards
Reason-Wang Sep 29, 2025
8f5fa7b
Add vlm_as_judge rewards
Reason-Wang Sep 29, 2025
c1331fc
Add vlm_as_judge rewards and training
Reason-Wang Sep 29, 2025
155230a
Merge conflicts
Reason-Wang Sep 29, 2025
b168973
Fix client backend bugs
Reason-Wang Oct 8, 2025
daa7e59
Fix conflicts
Reason-Wang Oct 8, 2025
6e8c79e
Fix conflicts
Reason-Wang Oct 8, 2025
c17aabf
merge conflicts
Reason-Wang Oct 8, 2025
53748d7
Fix bug
Reason-Wang Oct 8, 2025
08647e5
Merge main
Reason-Wang Oct 8, 2025
62f8352
Fix bug
Reason-Wang Oct 8, 2025
97566ab
Fix bug
Reason-Wang Oct 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,10 @@ tests/e2e/toy_examples/deepspeed/synchronous/output.txt
*.lock

# data
data/
*.parquet
agentfly/agents/data/*
test_cache/

# local logs
logs
Expand All @@ -133,6 +135,10 @@ data/
test_cache/
/*.jpg
/*.png
slurm/
*.err
*.out
*.log

# Notebooks
agentfly/tests/*.ipynb
Expand All @@ -146,3 +152,11 @@ test_outputs/
agentfly/data/
*.ipynb

# training scripts
training_scripts/
verl/training_scripts/

# training scripts
training_scripts/
verl/training_scripts/

32 changes: 25 additions & 7 deletions agentfly/agents/agent_base.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from abc import ABC, abstractmethod
from collections import defaultdict
from datetime import datetime
import json
from .utils.messages import MessagesList
from ..templates.templates import get_template
from ..__init__ import AGENT_DATA_DIR
from .. import AGENT_DATA_DIR
from .llm_backends import (
AsyncVLLMBackend,
AsyncVerlBackend,
Expand All @@ -23,6 +24,7 @@
import logging
from .chain.streaming_observer import ConsoleStreamObserver, StreamingManager
from .utils.tokenizer import create_processor, create_tokenizer
from ..utils.monitor import JsonlSink, Monitor, WandbSink
try:
from verl.protocol import DataProto
except ImportError:
Expand Down Expand Up @@ -51,10 +53,12 @@ def __init__(
backend_config: Any = None,
reward_fn: Callable = None,
log_file: str = "agent",
project_name: str = None,
run_name: str = None,
streaming: str = "console",
debug: bool = False,
monitors: List[str] = [],
wandb_project_name: str = None,
wandb_run_name: str = None,
local_cache_dir: str = None,
**kwargs # To pass other unused arguments
):
"""
Expand Down Expand Up @@ -94,7 +98,6 @@ def __init__(

# Create appropriate tokenizer for trajectory processing
self.tokenizer = create_tokenizer(model_name_or_path)

self.processor = create_processor(model_name_or_path)

self._reward_fn = reward_fn
Expand All @@ -104,8 +107,12 @@ def __init__(
else:
self.jinja_template = get_template(self.template).jinja_template()

self.project_name = project_name
self.run_name = run_name
self.wandb_project_name = wandb_project_name
self.wandb_run_name = wandb_run_name
self.local_cache_dir = local_cache_dir
self.local_run_cache_dir = None
self._initialize_monitor(monitors)

self.streaming_manager = StreamingManager()
if streaming == "console":
self.streaming_manager.add_observer(ConsoleStreamObserver())
Expand Down Expand Up @@ -177,6 +184,17 @@ def _preprocess_messages(self, messages: List[Dict]):

return messages_list.to_list()

def _initialize_monitor(self, monitors: List[str]) -> None:
for monitor in monitors:
if monitor == "local":
assert self.local_cache_dir is not None, "local_cache_dir must be set when using local monitor."
self.local_run_cache_dir = f"{os.path.join(self.local_cache_dir, os.path.basename(self.model_name_or_path), datetime.now().strftime('%Y%m%d_%H%M%S'))}"
Monitor.add_sink("jsonl", JsonlSink(f"{self.local_run_cache_dir}/"))
elif monitor == "wandb":
Monitor.add_sink("wandb", WandbSink(project=self.wandb_project_name, run_name=self.wandb_run_name))
else:
raise ValueError(f"Monitor {monitor} is not supported.")

async def run(self,
messages: Union[List[dict], np.ndarray, Dict],
max_turns: int,
Expand Down Expand Up @@ -392,4 +410,4 @@ def get_verl_data_proto(self):
batch = DataProto.from_single_dict(inputs, meta_info={"use_agent": True})

return batch


20 changes: 12 additions & 8 deletions agentfly/agents/chain/chain_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,6 @@ def __init__(self):
self.terminal_status = ["terminal", "finish"]
self.global_step = 0
self.finished_chains_count = 0
self.initialize_monitor()
self.monitor_info = defaultdict(list)

def reset(self) -> None:
Expand Down Expand Up @@ -333,7 +332,7 @@ async def _run_single_chain(self,
await done_queue.put((chain_id, chain, current_node))

self.finished_chains_count += 1
self.monitor_chain()
self.monitor_chain(trajectory=current_node.messages.messages)

async def _generate_response(self, current_node, tools, depth, chain_id, enable_streaming):
"""Generate response with optional streaming support."""
Expand Down Expand Up @@ -485,7 +484,6 @@ async def _finalize_chain(self, chain_id, chain, current_node, depth):

await self.release_resources(chain_id)


async def release_resources(self, id: str) -> None:
for tool in self.tools:
if isinstance(tool, Tool):
Expand All @@ -498,10 +496,6 @@ async def set_tools(self, id: str, env_args: Dict[str, Any]) -> None:
if isinstance(tool, Tool):
await tool.set_env(id, env_args)

def initialize_monitor(self) -> None:
Monitor.add_sink("jsonl", JsonlSink(f"{AGENT_DATA_DIR}/demo_metrics.jsonl"))
Monitor.add_sink("wandb", WandbSink(project=self.project_name, run_name=self.run_name))

def monitor_step(self) -> None:
messages = self.get_messages()
avg_turns = 0
Expand Down Expand Up @@ -589,9 +583,19 @@ def monitor_step(self) -> None:
emit(evt)


def monitor_chain(self) -> None:
def monitor_chain(self, trajectory) -> None:
self.monitor_info['Agent/chains'].append(self.finished_chains_count)
for tool in self.tools:
if tool.is_stateful and tool.pool_size > 0:
self.monitor_info[f"Agent/Tool/{tool.name}/used_env_size"].append(tool.used_env_size)

# We only log the trajectory to local jsonl file, for wandb much bandwidth is needed
evt = MetricEvent(
sinks=["jsonl"],
kind="text",
name="Agent/rollout/trajectory",
value=json.dumps(serialize_for_json(trajectory), indent=2),
x=self.global_step,
x_name="Agent/rollout/step"
)
emit(evt)
Loading