From 77a80ccfde0d3d420a8ffe5949f1dd87a8a18079 Mon Sep 17 00:00:00 2001 From: Aayush Date: Mon, 29 Apr 2024 06:22:04 +0000 Subject: [PATCH 01/10] changed by aayush --- setup.py | 2 +- syllabus/core/task_interface/task_wrapper.py | 2 +- .../minigrid_task_wrapper_verma.py | 28 + .../training_scripts/cleanrl_procgen_plr.py | 48 +- .../examples/training_scripts/command.txt | 1 + .../training_scripts/requirements.txt | 131 ++ .../training_scripts/test_minigrid_wrapper.py | 267 ++++ .../training_scripts/wandb/latest-run | 1 + .../training_scripts/cleanrl_procgen_plr.py | 522 ++++++ .../files/conda-environment.yaml | 165 ++ .../files/config.yaml | 130 ++ .../files/diff.patch | 13 + ...out.tfevents.1713840453.f411843fc70b.297.0 | 1 + .../files/requirements.txt | 146 ++ ...8eada0f5549603091d9236731d61fe0bce82.patch | 566 +++++++ .../files/wandb-metadata.json | 167 ++ .../files/wandb-summary.json | 1 + .../run-rpqjdbky.wandb | Bin 0 -> 258994 bytes .../training_scripts/cleanrl_procgen_plr.py | 528 ++++++ .../files/conda-environment.yaml | 165 ++ .../files/config.yaml | 130 ++ .../files/diff.patch | 13 + ...ut.tfevents.1713840773.f411843fc70b.1109.0 | 1 + .../files/requirements.txt | 146 ++ ...8eada0f5549603091d9236731d61fe0bce82.patch | 1417 ++++++++++++++++ .../files/wandb-metadata.json | 167 ++ .../files/wandb-summary.json | 1 + .../run-x38taylu.wandb | Bin 0 -> 97707 bytes .../training_scripts/cleanrl_procgen_plr.py | 522 ++++++ .../files/conda-environment.yaml | 165 ++ .../files/config.yaml | 126 ++ .../files/diff.patch | 122 ++ ...ut.tfevents.1713841239.f411843fc70b.1794.0 | 1 + .../files/requirements.txt | 146 ++ ...8eada0f5549603091d9236731d61fe0bce82.patch | 1408 ++++++++++++++++ .../files/wandb-metadata.json | 167 ++ .../files/wandb-summary.json | 1 + .../run-37l9hfvl.wandb | Bin 0 -> 51518 bytes .../training_scripts/cleanrl_procgen_plr.py | 524 ++++++ .../files/conda-environment.yaml | 165 ++ .../files/config.yaml | 130 ++ .../files/diff.patch | 133 ++ ...ut.tfevents.1713845400.f411843fc70b.2432.0 | 1 + .../files/requirements.txt | 146 ++ ...8eada0f5549603091d9236731d61fe0bce82.patch | 1419 ++++++++++++++++ .../files/wandb-metadata.json | 167 ++ .../files/wandb-summary.json | 1 + .../run-maennc1u.wandb | Bin 0 -> 5972 bytes .../training_scripts/cleanrl_procgen_plr.py | 524 ++++++ .../files/conda-environment.yaml | 165 ++ .../files/config.yaml | 126 ++ .../files/diff.patch | 142 ++ ...8eada0f5549603091d9236731d61fe0bce82.patch | 1421 +++++++++++++++++ .../files/wandb-metadata.json | 167 ++ .../files/wandb-summary.json | 1 + .../run-4m0uhqaw.wandb | Bin 0 -> 2455 bytes 56 files changed, 12621 insertions(+), 28 deletions(-) create mode 100644 syllabus/examples/task_wrappers/minigrid_task_wrapper_verma.py create mode 100644 syllabus/examples/training_scripts/command.txt create mode 100644 syllabus/examples/training_scripts/requirements.txt create mode 100644 syllabus/examples/training_scripts/test_minigrid_wrapper.py create mode 120000 syllabus/examples/training_scripts/wandb/latest-run create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/conda-environment.yaml create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/config.yaml create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/diff.patch create mode 120000 syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/events.out.tfevents.1713840453.f411843fc70b.297.0 create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/requirements.txt create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/wandb-metadata.json create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/wandb-summary.json create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/run-rpqjdbky.wandb create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/conda-environment.yaml create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/config.yaml create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/diff.patch create mode 120000 syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/events.out.tfevents.1713840773.f411843fc70b.1109.0 create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/requirements.txt create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/wandb-metadata.json create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/wandb-summary.json create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/run-x38taylu.wandb create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/conda-environment.yaml create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/config.yaml create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/diff.patch create mode 120000 syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/events.out.tfevents.1713841239.f411843fc70b.1794.0 create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/requirements.txt create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/wandb-metadata.json create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/wandb-summary.json create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/run-37l9hfvl.wandb create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/conda-environment.yaml create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/config.yaml create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/diff.patch create mode 120000 syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/events.out.tfevents.1713845400.f411843fc70b.2432.0 create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/requirements.txt create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/wandb-metadata.json create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/wandb-summary.json create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/run-maennc1u.wandb create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/conda-environment.yaml create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/config.yaml create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/diff.patch create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/wandb-metadata.json create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/wandb-summary.json create mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/run-4m0uhqaw.wandb diff --git a/setup.py b/setup.py index 31e09f23..22a94e88 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ extras = dict() -extras['test'] = ['cmake', 'ninja', 'nle>=0.9.0', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] +extras['test'] = ['cmake', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] extras['docs'] = ['sphinx-tabs', 'sphinxcontrib-spelling', 'furo'] extras['all'] = extras['test'] + extras['docs'] diff --git a/syllabus/core/task_interface/task_wrapper.py b/syllabus/core/task_interface/task_wrapper.py index 89c94ff3..e44e9b5a 100644 --- a/syllabus/core/task_interface/task_wrapper.py +++ b/syllabus/core/task_interface/task_wrapper.py @@ -96,4 +96,4 @@ def __getattr__(self, attr): # return env_attr # def get_current_task(self): -# return self.current_task \ No newline at end of file +# return self.current_task diff --git a/syllabus/examples/task_wrappers/minigrid_task_wrapper_verma.py b/syllabus/examples/task_wrappers/minigrid_task_wrapper_verma.py new file mode 100644 index 00000000..3e36b8e7 --- /dev/null +++ b/syllabus/examples/task_wrappers/minigrid_task_wrapper_verma.py @@ -0,0 +1,28 @@ +import gymnasium as gym +import numpy as np +from syllabus.core import TaskWrapper +from syllabus.task_space import TaskSpace +from minigrid.wrappers import RGBImgPartialObsWrapper, ImgObsWrapper + + +class MinigridTaskWrapperVerma(TaskWrapper): + def __init__(self, env: gym.Env, env_id, seed=0): + super().__init__(env) + self.task_space = TaskSpace(gym.spaces.Discrete(200), list(np.arange(0, 200))) + self.env_id = env_id + self.task = seed + self.episode_return = 0 + + env_fn = [partial(self._make_minigrid_env, env_name, seeds[i]) for i in range(num_envs)] + + self.observation_space = self.env.observation_space + + @staticmethod + def _make_minigrid_env(env_name, seed): + self.seed(seed) + env = FullyObsWrapper(env) + env = ImgObsWrapper(env) + return env + + def seed(self, seed): + self.env.gym_env.unwrapped._venv.seed(int(seed), 0) diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py index dabcd500..a0cdc2af 100644 --- a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py +++ b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py @@ -24,7 +24,7 @@ from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum from syllabus.examples.models import ProcgenAgent -from syllabus.examples.task_wrappers import ProcgenTaskWrapper +from syllabus.examples.task_wrappers import ProcgenTaskWrapper, MinigridTaskWrapper from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs @@ -136,7 +136,7 @@ def thunk(): env = MultiProcessingSyncWrapper( env, curriculum.get_components(), - update_on_step=curriculum.requires_step_updates, + update_on_step=False, task_space=env.task_space, ) return env @@ -150,37 +150,31 @@ def wrap_vecenv(vecenv): return vecenv -def full_level_replay_evaluate( +def slow_level_replay_evaluate( env_name, policy, num_episodes, device, - num_levels=1 # Not used + num_levels=0 ): policy.eval() eval_envs = ProcgenEnv( - num_envs=args.num_eval_episodes, env_name=env_name, num_levels=1, start_level=0, distribution_mode="easy", paint_vel_info=False + num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False ) eval_envs = VecExtractDictObs(eval_envs, "rgb") eval_envs = wrap_vecenv(eval_envs) - - # Seed environments - seeds = [int.from_bytes(os.urandom(3), byteorder="little") for _ in range(num_episodes)] - for i, seed in enumerate(seeds): - eval_envs.seed(seed, i) - eval_obs, _ = eval_envs.reset() - eval_episode_rewards = [-1] * num_episodes + eval_episode_rewards = [] - while -1 in eval_episode_rewards: + while len(eval_episode_rewards) < num_episodes: with torch.no_grad(): eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) for i, info in enumerate(infos): - if 'episode' in info.keys() and eval_episode_rewards[i] == -1: - eval_episode_rewards[i] = info['episode']['r'] + if 'episode' in info.keys(): + eval_episode_rewards.append(info['episode']['r']) mean_returns = np.mean(eval_episode_rewards) stddev_returns = np.std(eval_episode_rewards) @@ -251,7 +245,7 @@ def get_value(obs): ) # wandb.run.log_code("./syllabus/examples") - writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) + writer = SummaryWriter(os.path.join(args.logging_dir, f"./runs/{run_name}")) writer.add_text( "hyperparameters", "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), @@ -271,7 +265,9 @@ def get_value(obs): if args.curriculum: sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") sample_env = GymV21CompatibilityV0(env=sample_env) + # code to edit sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) + # sample_env = MinigridTaskWrapper(sample_env, args.env_id, seed=args.seed) # Intialize Curriculum Method if args.curriculum_method == "plr": @@ -485,13 +481,13 @@ def get_value(obs): mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( args.env_id, agent, args.num_eval_episodes, device, num_levels=0 ) - full_mean_eval_returns, full_stddev_eval_returns, full_normalized_mean_eval_returns = full_level_replay_evaluate( + slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( args.env_id, agent, args.num_eval_episodes, device, num_levels=0 ) mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( args.env_id, agent, args.num_eval_episodes, device, num_levels=200 ) - full_mean_train_returns, full_stddev_train_returns, full_normalized_mean_train_returns = full_level_replay_evaluate( + slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( args.env_id, agent, args.num_eval_episodes, device, num_levels=200 ) @@ -510,17 +506,17 @@ def get_value(obs): writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) - writer.add_scalar("test_eval/stddev_eval_return", stddev_eval_returns, global_step) - writer.add_scalar("test_eval/full_mean_episode_return", full_mean_eval_returns, global_step) - writer.add_scalar("test_eval/full_normalized_mean_eval_return", full_normalized_mean_eval_returns, global_step) - writer.add_scalar("test_eval/full_stddev_eval_return", full_stddev_eval_returns, global_step) + writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) + writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) + writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) - writer.add_scalar("train_eval/stddev_train_return", stddev_train_returns, global_step) - writer.add_scalar("train_eval/full_mean_episode_return", full_mean_train_returns, global_step) - writer.add_scalar("train_eval/full_normalized_mean_train_return", full_normalized_mean_train_returns, global_step) - writer.add_scalar("train_eval/full_stddev_train_return", full_stddev_train_returns, global_step) + writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) + writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) + writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) diff --git a/syllabus/examples/training_scripts/command.txt b/syllabus/examples/training_scripts/command.txt new file mode 100644 index 00000000..ab135b53 --- /dev/null +++ b/syllabus/examples/training_scripts/command.txt @@ -0,0 +1 @@ +python cleanrl_procgen_plr.py --curriculum True --track True --env-id "bigfish" diff --git a/syllabus/examples/training_scripts/requirements.txt b/syllabus/examples/training_scripts/requirements.txt new file mode 100644 index 00000000..505ff718 --- /dev/null +++ b/syllabus/examples/training_scripts/requirements.txt @@ -0,0 +1,131 @@ +absl-py==2.1.0 +aiosignal==1.3.1 +alabaster==0.7.13 +attrs==23.2.0 +Babel==2.14.0 +beautifulsoup4==4.12.3 +cachetools==5.3.3 +certifi==2024.2.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +cloudpickle==3.0.0 +cmake==3.29.2 +contourpy==1.1.1 +cycler==0.12.1 +dm-tree==0.1.8 +docutils==0.20.1 +exceptiongroup==1.2.0 +Farama-Notifications==0.0.4 +filelock==3.13.4 +fonttools==4.51.0 +frozenlist==1.4.1 +fsspec==2024.3.1 +furo==2024.1.29 +future==1.0.0 +glcontext==2.5.0 +glfw==1.12.0 +google-auth==2.29.0 +google-auth-oauthlib==1.0.0 +grpcio==1.62.1 +gym==0.23.0 +gym-notices==0.0.8 +gymnasium==0.28.1 +idna==3.7 +imageio==2.34.0 +imageio-ffmpeg==0.3.0 +imagesize==1.4.1 +importlib_metadata==7.1.0 +importlib_resources==6.4.0 +iniconfig==2.0.0 +jax-jumpy==1.0.0 +Jinja2==3.1.3 +jsonschema==4.21.1 +jsonschema-specifications==2023.12.1 +kiwisolver==1.4.5 +lazy_loader==0.4 +lz4==4.3.3 +Markdown==3.6 +markdown-it-py==3.0.0 +MarkupSafe==2.1.5 +matplotlib==3.7.5 +mdurl==0.1.2 +moderngl==5.10.0 +mpmath==1.3.0 +msgpack==1.0.8 +networkx==3.1 +numpy==1.24.4 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-nccl-cu12==2.19.3 +nvidia-nvjitlink-cu12==12.4.127 +nvidia-nvtx-cu12==12.1.105 +oauthlib==3.2.2 +packaging==24.0 +pandas==2.0.3 +pillow==10.3.0 +pkgutil_resolve_name==1.3.10 +pluggy==1.4.0 +-e git+https://github.com/RyanNavillus/procgen.git@ed4be818681701c52cdf5d4996d1ceca7083e368#egg=procgen +protobuf==5.26.1 +py-cpuinfo==9.0.0 +pyarrow==15.0.2 +pyasn1==0.6.0 +pyasn1_modules==0.4.0 +pycparser==2.22 +pyenchant==3.2.2 +pyglet==1.4.11 +Pygments==2.17.2 +pyparsing==3.1.2 +pytest==8.1.1 +pytest-benchmark==4.0.0 +python-dateutil==2.9.0.post0 +pytz==2024.1 +PyWavelets==1.4.1 +PyYAML==6.0.1 +ray==2.10.0 +referencing==0.34.0 +requests==2.31.0 +requests-oauthlib==2.0.0 +rich==13.7.1 +rpds-py==0.18.0 +rsa==4.9 +scikit-image==0.21.0 +scipy==1.10.0 +shellingham==1.5.4 +Shimmy==1.3.0 +six==1.16.0 +snowballstemmer==2.2.0 +soupsieve==2.5 +Sphinx==7.1.2 +sphinx-basic-ng==1.0.0b2 +sphinx-tabs==3.4.5 +sphinxcontrib-applehelp==1.0.4 +sphinxcontrib-devhelp==1.0.2 +sphinxcontrib-htmlhelp==2.0.1 +sphinxcontrib-jsmath==1.0.1 +sphinxcontrib-qthelp==1.0.3 +sphinxcontrib-serializinghtml==1.1.5 +sphinxcontrib-spelling==8.0.0 +-e git+https://github.com/RoseyGreenBlue/Syllabus.git@6e36433fbb5c0e990358d7f895d976086dbfb65e#egg=Syllabus_RL +sympy==1.12 +tensorboard==2.14.0 +tensorboard-data-server==0.7.2 +tensorboardX==2.6.2.2 +tifffile==2023.7.10 +tomli==2.0.1 +torch==2.2.2 +triton==2.2.0 +typer==0.12.3 +typing_extensions==4.11.0 +tzdata==2024.1 +urllib3==2.2.1 +Werkzeug==3.0.2 +zipp==3.18.1 diff --git a/syllabus/examples/training_scripts/test_minigrid_wrapper.py b/syllabus/examples/training_scripts/test_minigrid_wrapper.py new file mode 100644 index 00000000..092d8848 --- /dev/null +++ b/syllabus/examples/training_scripts/test_minigrid_wrapper.py @@ -0,0 +1,267 @@ +import argparse +import os, sys +import random +import time +from collections import deque +from distutils.util import strtobool + +import gym as openai_gym +import gymnasium as gym +import numpy as np +import procgen # noqa: F401 +from procgen import ProcgenEnv +import torch +import torch.nn as nn +import torch.optim as optim +from shimmy.openai_gym_compatibility import GymV21CompatibilityV0 +from torch.utils.tensorboard import SummaryWriter + +from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum +from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum +from syllabus.examples.models import ProcgenAgent +from syllabus.examples.task_wrappers import ProcgenTaskWrapper, MinigridTaskWrapper +from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs +sys.path.append("/data/averma/MARL/Syllabus/syllabus/examples/task_wrappers") +from minigrid_task_wrapper_verma import * + + +def parse_args(): + # fmt: off + parser = argparse.ArgumentParser() + parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), + help="the name of this experiment") + parser.add_argument("--seed", type=int, default=1, + help="seed of the experiment") + parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="if toggled, `torch.backends.cudnn.deterministic=False`") + parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="if toggled, cuda will be enabled by default") + parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="if toggled, this experiment will be tracked with Weights and Biases") + parser.add_argument("--wandb-project-name", type=str, default="syllabus", + help="the wandb's project name") + parser.add_argument("--wandb-entity", type=str, default=None, + help="the entity (team) of wandb's project") + parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="weather to capture videos of the agent performances (check out `videos` folder)") + parser.add_argument("--logging-dir", type=str, default=".", + help="the base directory for logging and wandb storage.") + + # Algorithm specific arguments + parser.add_argument("--env-id", type=str, default="starpilot", + help="the id of the environment") + parser.add_argument("--total-timesteps", type=int, default=int(25e6), + help="total timesteps of the experiments") + parser.add_argument("--learning-rate", type=float, default=5e-4, + help="the learning rate of the optimizer") + parser.add_argument("--num-envs", type=int, default=64, + help="the number of parallel game environments") + parser.add_argument("--num-steps", type=int, default=256, + help="the number of steps to run in each environment per policy rollout") + parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="Toggle learning rate annealing for policy and value networks") + parser.add_argument("--gae", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="Use GAE for advantage computation") + parser.add_argument("--gamma", type=float, default=0.999, + help="the discount factor gamma") + parser.add_argument("--gae-lambda", type=float, default=0.95, + help="the lambda for the general advantage estimation") + parser.add_argument("--num-minibatches", type=int, default=8, + help="the number of mini-batches") + parser.add_argument("--update-epochs", type=int, default=3, + help="the K epochs to update the policy") + parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="Toggles advantages normalization") + parser.add_argument("--clip-coef", type=float, default=0.2, + help="the surrogate clipping coefficient") + parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="Toggles whether or not to use a clipped loss for the value function, as per the paper.") + parser.add_argument("--ent-coef", type=float, default=0.01, + help="coefficient of the entropy") + parser.add_argument("--vf-coef", type=float, default=0.5, + help="coefficient of the value function") + parser.add_argument("--max-grad-norm", type=float, default=0.5, + help="the maximum norm for the gradient clipping") + parser.add_argument("--target-kl", type=float, default=None, + help="the target KL divergence threshold") + + # Procgen arguments + parser.add_argument("--full-dist", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="Train on full distribution of levels.") + + # Curriculum arguments + parser.add_argument("--curriculum", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="if toggled, this experiment will use curriculum learning") + parser.add_argument("--curriculum-method", type=str, default="plr", + help="curriculum method to use") + parser.add_argument("--num-eval-episodes", type=int, default=10, + help="the number of episodes to evaluate the agent on after each policy update.") + + args = parser.parse_args() + args.batch_size = int(args.num_envs * args.num_steps) + args.minibatch_size = int(args.batch_size // args.num_minibatches) + # fmt: on + return args + + +PROCGEN_RETURN_BOUNDS = { + "coinrun": (5, 10), + "starpilot": (2.5, 64), + "caveflyer": (3.5, 12), + "dodgeball": (1.5, 19), + "fruitbot": (-1.5, 32.4), + "chaser": (0.5, 13), + "miner": (1.5, 13), + "jumper": (3, 10), + "leaper": (3, 10), + "maze": (5, 10), + "bigfish": (1, 40), + "heist": (3.5, 10), + "climber": (2, 12.6), + "plunder": (4.5, 30), + "ninja": (3.5, 10), + "bossfight": (0.5, 13), +} + + +def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): + def thunk(): + env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) + env = GymV21CompatibilityV0(env=env) + if curriculum is not None: + env = ProcgenTaskWrapper(env, env_id, seed=seed) + env = MultiProcessingSyncWrapper( + env, + curriculum.get_components(), + update_on_step=False, + task_space=env.task_space, + ) + return env + return thunk + + +def wrap_vecenv(vecenv): + vecenv.is_vector_env = True + vecenv = VecMonitor(venv=vecenv, filename=None, keep_buf=100) + vecenv = VecNormalize(venv=vecenv, ob=False, ret=True) + return vecenv + + +def slow_level_replay_evaluate( + env_name, + policy, + num_episodes, + device, + num_levels=0 +): + policy.eval() + + eval_envs = ProcgenEnv( + num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False + ) + eval_envs = VecExtractDictObs(eval_envs, "rgb") + eval_envs = wrap_vecenv(eval_envs) + eval_obs, _ = eval_envs.reset() + eval_episode_rewards = [] + + while len(eval_episode_rewards) < num_episodes: + with torch.no_grad(): + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + + eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) + for i, info in enumerate(infos): + if 'episode' in info.keys(): + eval_episode_rewards.append(info['episode']['r']) + + mean_returns = np.mean(eval_episode_rewards) + stddev_returns = np.std(eval_episode_rewards) + env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] + normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) + policy.train() + return mean_returns, stddev_returns, normalized_mean_returns + + +def level_replay_evaluate( + env_name, + policy, + num_episodes, + device, + num_levels=0 +): + policy.eval() + + eval_envs = ProcgenEnv( + num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False + ) + eval_envs = VecExtractDictObs(eval_envs, "rgb") + eval_envs = wrap_vecenv(eval_envs) + eval_obs, _ = eval_envs.reset() + eval_episode_rewards = [-1] * num_episodes + + while -1 in eval_episode_rewards: + with torch.no_grad(): + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + + eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) + for i, info in enumerate(infos): + if 'episode' in info.keys() and eval_episode_rewards[i] == -1: + eval_episode_rewards[i] = info['episode']['r'] + + # print(eval_episode_rewards) + mean_returns = np.mean(eval_episode_rewards) + stddev_returns = np.std(eval_episode_rewards) + env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] + normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) + policy.train() + return mean_returns, stddev_returns, normalized_mean_returns + + +def make_value_fn(): + def get_value(obs): + obs = np.array(obs) + with torch.no_grad(): + return agent.get_value(torch.Tensor(obs).to(device)) + return get_value + +def print_values(obj): + describer = obj.__dict__ + for key in describer.keys(): + print(f"{key}: {describer[key]}") + print() + + +if __name__ == "__main__": + args = parse_args() + run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + torch.backends.cudnn.deterministic = args.torch_deterministic + + device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu") + print("Device:", device) + + # Curriculum setup + curriculum = None + if args.curriculum: + print("args:\n--------------") + print(f"{args}\n-------------\n") + sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") + sample_env = GymV21CompatibilityV0(env=sample_env) + procgen_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) + minigrid_env = MinigridTaskWrapperVerma(sample_env, args.env_id, seed=args.seed) + # print() + # print("procgen_env attr") + print_values(procgen_env.env) + + # seeds = [int.from_bytes(os.urandom(4), byteorder="little") for _ in range(args.num_envs)] + seeds = [int(s) for s in np.random.choice(10, args.num_envs)] + print(seeds) + + # print("procgen_env.env attr:") + # print_values(procgen_env.env) + # + # print("procgen_env.env.gym_env attr:") + # print_values(procgen_env.env.gym_env) + + diff --git a/syllabus/examples/training_scripts/wandb/latest-run b/syllabus/examples/training_scripts/wandb/latest-run new file mode 120000 index 00000000..459ba0d0 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/latest-run @@ -0,0 +1 @@ +run-20240423_041359-4m0uhqaw \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py new file mode 100644 index 00000000..d2d54b58 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py @@ -0,0 +1,522 @@ +""" An example applying Syllabus Prioritized Level Replay to Procgen. This code is based on https://github.com/facebookresearch/level-replay/blob/main/train.py + +NOTE: In order to efficiently change the seed of a procgen environment directly without reinitializing it, +we rely on Minqi Jiang's custom branch of procgen found here: https://github.com/minqi/procgen +""" +import argparse +import os +import random +import time +from collections import deque +from distutils.util import strtobool + +import gym as openai_gym +import gymnasium as gym +import numpy as np +import procgen # noqa: F401 +from procgen import ProcgenEnv +import torch +import torch.nn as nn +import torch.optim as optim +from shimmy.openai_gym_compatibility import GymV21CompatibilityV0 +from torch.utils.tensorboard import SummaryWriter + +from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum +from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum +from syllabus.examples.models import ProcgenAgent +from syllabus.examples.task_wrappers import ProcgenTaskWrapper +from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs + + +def parse_args(): + # fmt: off + parser = argparse.ArgumentParser() + parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), + help="the name of this experiment") + parser.add_argument("--seed", type=int, default=1, + help="seed of the experiment") + parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="if toggled, `torch.backends.cudnn.deterministic=False`") + parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="if toggled, cuda will be enabled by default") + parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="if toggled, this experiment will be tracked with Weights and Biases") + parser.add_argument("--wandb-project-name", type=str, default="syllabus", + help="the wandb's project name") + parser.add_argument("--wandb-entity", type=str, default=None, + help="the entity (team) of wandb's project") + parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="weather to capture videos of the agent performances (check out `videos` folder)") + parser.add_argument("--logging-dir", type=str, default=".", + help="the base directory for logging and wandb storage.") + + # Algorithm specific arguments + parser.add_argument("--env-id", type=str, default="starpilot", + help="the id of the environment") + parser.add_argument("--total-timesteps", type=int, default=int(25e6), + help="total timesteps of the experiments") + parser.add_argument("--learning-rate", type=float, default=5e-4, + help="the learning rate of the optimizer") + parser.add_argument("--num-envs", type=int, default=64, + help="the number of parallel game environments") + parser.add_argument("--num-steps", type=int, default=256, + help="the number of steps to run in each environment per policy rollout") + parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="Toggle learning rate annealing for policy and value networks") + parser.add_argument("--gae", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="Use GAE for advantage computation") + parser.add_argument("--gamma", type=float, default=0.999, + help="the discount factor gamma") + parser.add_argument("--gae-lambda", type=float, default=0.95, + help="the lambda for the general advantage estimation") + parser.add_argument("--num-minibatches", type=int, default=8, + help="the number of mini-batches") + parser.add_argument("--update-epochs", type=int, default=3, + help="the K epochs to update the policy") + parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="Toggles advantages normalization") + parser.add_argument("--clip-coef", type=float, default=0.2, + help="the surrogate clipping coefficient") + parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="Toggles whether or not to use a clipped loss for the value function, as per the paper.") + parser.add_argument("--ent-coef", type=float, default=0.01, + help="coefficient of the entropy") + parser.add_argument("--vf-coef", type=float, default=0.5, + help="coefficient of the value function") + parser.add_argument("--max-grad-norm", type=float, default=0.5, + help="the maximum norm for the gradient clipping") + parser.add_argument("--target-kl", type=float, default=None, + help="the target KL divergence threshold") + + # Procgen arguments + parser.add_argument("--full-dist", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="Train on full distribution of levels.") + + # Curriculum arguments + parser.add_argument("--curriculum", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="if toggled, this experiment will use curriculum learning") + parser.add_argument("--curriculum-method", type=str, default="plr", + help="curriculum method to use") + parser.add_argument("--num-eval-episodes", type=int, default=10, + help="the number of episodes to evaluate the agent on after each policy update.") + + args = parser.parse_args() + args.batch_size = int(args.num_envs * args.num_steps) + args.minibatch_size = int(args.batch_size // args.num_minibatches) + # fmt: on + return args + + +PROCGEN_RETURN_BOUNDS = { + "coinrun": (5, 10), + "starpilot": (2.5, 64), + "caveflyer": (3.5, 12), + "dodgeball": (1.5, 19), + "fruitbot": (-1.5, 32.4), + "chaser": (0.5, 13), + "miner": (1.5, 13), + "jumper": (3, 10), + "leaper": (3, 10), + "maze": (5, 10), + "bigfish": (1, 40), + "heist": (3.5, 10), + "climber": (2, 12.6), + "plunder": (4.5, 30), + "ninja": (3.5, 10), + "bossfight": (0.5, 13), +} + + +def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): + def thunk(): + env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) + env = GymV21CompatibilityV0(env=env) + if curriculum is not None: + env = ProcgenTaskWrapper(env, env_id, seed=seed) + env = MultiProcessingSyncWrapper( + env, + curriculum.get_components(), + update_on_step=False, + task_space=env.task_space, + ) + return env + return thunk + + +def wrap_vecenv(vecenv): + vecenv.is_vector_env = True + vecenv = VecMonitor(venv=vecenv, filename=None, keep_buf=100) + vecenv = VecNormalize(venv=vecenv, ob=False, ret=True) + return vecenv + + +def slow_level_replay_evaluate( + env_name, + policy, + num_episodes, + device, + num_levels=0 +): + policy.eval() + + eval_envs = ProcgenEnv( + num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False + ) + eval_envs = VecExtractDictObs(eval_envs, "rgb") + eval_envs = wrap_vecenv(eval_envs) + eval_obs, _ = eval_envs.reset() + eval_episode_rewards = [] + + while len(eval_episode_rewards) < num_episodes: + with torch.no_grad(): + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + + eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) + for i, info in enumerate(infos): + if 'episode' in info.keys(): + eval_episode_rewards.append(info['episode']['r']) + + mean_returns = np.mean(eval_episode_rewards) + stddev_returns = np.std(eval_episode_rewards) + env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] + normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) + policy.train() + return mean_returns, stddev_returns, normalized_mean_returns + + +def level_replay_evaluate( + env_name, + policy, + num_episodes, + device, + num_levels=0 +): + policy.eval() + + eval_envs = ProcgenEnv( + num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False + ) + eval_envs = VecExtractDictObs(eval_envs, "rgb") + eval_envs = wrap_vecenv(eval_envs) + eval_obs, _ = eval_envs.reset() + eval_episode_rewards = [-1] * num_episodes + + while -1 in eval_episode_rewards: + with torch.no_grad(): + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + + eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) + for i, info in enumerate(infos): + if 'episode' in info.keys() and eval_episode_rewards[i] == -1: + eval_episode_rewards[i] = info['episode']['r'] + + # print(eval_episode_rewards) + mean_returns = np.mean(eval_episode_rewards) + stddev_returns = np.std(eval_episode_rewards) + env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] + normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) + policy.train() + return mean_returns, stddev_returns, normalized_mean_returns + + +def make_value_fn(): + def get_value(obs): + obs = np.array(obs) + with torch.no_grad(): + return agent.get_value(torch.Tensor(obs).to(device)) + return get_value + + +if __name__ == "__main__": + args = parse_args() + run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" + if args.track: + import wandb + + wandb.init( + project=args.wandb_project_name, + entity=args.wandb_entity, + sync_tensorboard=True, + config=vars(args), + name=run_name, + monitor_gym=True, + save_code=True, + dir=args.logging_dir + ) + # wandb.run.log_code("./syllabus/examples") + + writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) + writer.add_text( + "hyperparameters", + "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), + ) + + # TRY NOT TO MODIFY: seeding + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + torch.backends.cudnn.deterministic = args.torch_deterministic + + device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu") + print("Device:", device) + + # Curriculum setup + curriculum = None + if args.curriculum: + sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") + sample_env = GymV21CompatibilityV0(env=sample_env) + sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) + + # Intialize Curriculum Method + if args.curriculum_method == "plr": + print("Using prioritized level replay.") + curriculum = PrioritizedLevelReplay( + sample_env.task_space, + sample_env.observation_space, + num_steps=args.num_steps, + num_processes=args.num_envs, + gamma=args.gamma, + gae_lambda=args.gae_lambda, + task_sampler_kwargs_dict={"strategy": "value_l1"}, + get_value=make_value_fn(), + ) + elif args.curriculum_method == "dr": + print("Using domain randomization.") + curriculum = DomainRandomization(sample_env.task_space) + elif args.curriculum_method == "lp": + print("Using learning progress.") + curriculum = LearningProgressCurriculum(sample_env.task_space) + elif args.curriculum_method == "sq": + print("Using sequential curriculum.") + curricula = [] + stopping = [] + for i in range(199): + curricula.append(i + 1) + stopping.append("steps>=50000") + curricula.append(list(range(i + 1))) + stopping.append("steps>=50000") + curriculum = SequentialCurriculum(curricula, stopping[:-1], sample_env.task_space) + else: + raise ValueError(f"Unknown curriculum method {args.curriculum_method}") + curriculum = make_multiprocessing_curriculum(curriculum) + del sample_env + + # env setup + print("Creating env") + envs = gym.vector.AsyncVectorEnv( + [ + make_env( + args.env_id, + args.seed + i, + curriculum=curriculum if args.curriculum else None, + num_levels=1 if args.curriculum else 0 + ) + for i in range(args.num_envs) + ] + ) + envs = wrap_vecenv(envs) + + assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" + print("Creating agent") + agent = ProcgenAgent( + envs.single_observation_space.shape, + envs.single_action_space.n, + arch="large", + base_kwargs={'recurrent': False, 'hidden_size': 256} + ).to(device) + optimizer = optim.Adam(agent.parameters(), lr=args.learning_rate, eps=1e-5) + + # ALGO Logic: Storage setup + obs = torch.zeros((args.num_steps, args.num_envs) + envs.single_observation_space.shape).to(device) + actions = torch.zeros((args.num_steps, args.num_envs) + envs.single_action_space.shape).to(device) + logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device) + rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) + dones = torch.zeros((args.num_steps, args.num_envs)).to(device) + values = torch.zeros((args.num_steps, args.num_envs)).to(device) + + # TRY NOT TO MODIFY: start the game + global_step = 0 + start_time = time.time() + next_obs, _ = envs.reset() + next_obs = torch.Tensor(next_obs).to(device) + next_done = torch.zeros(args.num_envs).to(device) + num_updates = args.total_timesteps // args.batch_size + episode_rewards = deque(maxlen=10) + completed_episodes = 0 + + for update in range(1, num_updates + 1): + # Annealing the rate if instructed to do so. + if args.anneal_lr: + frac = 1.0 - (update - 1.0) / num_updates + lrnow = frac * args.learning_rate + optimizer.param_groups[0]["lr"] = lrnow + + for step in range(0, args.num_steps): + global_step += 1 * args.num_envs + obs[step] = next_obs + dones[step] = next_done + + # ALGO LOGIC: action logic + with torch.no_grad(): + action, logprob, _, value = agent.get_action_and_value(next_obs) + values[step] = value.flatten() + actions[step] = action + logprobs[step] = logprob + + # TRY NOT TO MODIFY: execute the game and log data. + next_obs, reward, term, trunc, info = envs.step(action.cpu().numpy()) + done = np.logical_or(term, trunc) + rewards[step] = torch.tensor(reward).to(device).view(-1) + next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) + completed_episodes += sum(done) + + for item in info: + if "episode" in item.keys(): + episode_rewards.append(item['episode']['r']) + print(f"global_step={global_step}, episodic_return={item['episode']['r']}") + writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) + if curriculum is not None: + curriculum.log_metrics(writer, global_step) + break + + # bootstrap value if not done + with torch.no_grad(): + next_value = agent.get_value(next_obs).reshape(1, -1) + if args.gae: + advantages = torch.zeros_like(rewards).to(device) + lastgaelam = 0 + for t in reversed(range(args.num_steps)): + if t == args.num_steps - 1: + nextnonterminal = 1.0 - next_done + nextvalues = next_value + else: + nextnonterminal = 1.0 - dones[t + 1] + nextvalues = values[t + 1] + delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t] + advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam + returns = advantages + values + else: + returns = torch.zeros_like(rewards).to(device) + for t in reversed(range(args.num_steps)): + if t == args.num_steps - 1: + nextnonterminal = 1.0 - next_done + next_return = next_value + else: + nextnonterminal = 1.0 - dones[t + 1] + next_return = returns[t + 1] + returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return + advantages = returns - values + + # flatten the batch + b_obs = obs.reshape((-1,) + envs.single_observation_space.shape) + b_logprobs = logprobs.reshape(-1) + b_actions = actions.reshape((-1,) + envs.single_action_space.shape) + b_advantages = advantages.reshape(-1) + b_returns = returns.reshape(-1) + b_values = values.reshape(-1) + + # Optimizing the policy and value network + b_inds = np.arange(args.batch_size) + clipfracs = [] + for epoch in range(args.update_epochs): + np.random.shuffle(b_inds) + for start in range(0, args.batch_size, args.minibatch_size): + end = start + args.minibatch_size + mb_inds = b_inds[start:end] + + _, newlogprob, entropy, newvalue = agent.get_action_and_value(b_obs[mb_inds], b_actions.long()[mb_inds]) + logratio = newlogprob - b_logprobs[mb_inds] + ratio = logratio.exp() + + with torch.no_grad(): + # calculate approx_kl http://joschu.net/blog/kl-approx.html + old_approx_kl = (-logratio).mean() + approx_kl = ((ratio - 1) - logratio).mean() + clipfracs += [((ratio - 1.0).abs() > args.clip_coef).float().mean().item()] + + mb_advantages = b_advantages[mb_inds] + if args.norm_adv: + mb_advantages = (mb_advantages - mb_advantages.mean()) / (mb_advantages.std() + 1e-8) + + # Policy loss + pg_loss1 = -mb_advantages * ratio + pg_loss2 = -mb_advantages * torch.clamp(ratio, 1 - args.clip_coef, 1 + args.clip_coef) + pg_loss = torch.max(pg_loss1, pg_loss2).mean() + + # Value loss + newvalue = newvalue.view(-1) + if args.clip_vloss: + v_loss_unclipped = (newvalue - b_returns[mb_inds]) ** 2 + v_clipped = b_values[mb_inds] + torch.clamp( + newvalue - b_values[mb_inds], + -args.clip_coef, + args.clip_coef, + ) + v_loss_clipped = (v_clipped - b_returns[mb_inds]) ** 2 + v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped) + v_loss = 0.5 * v_loss_max.mean() + else: + v_loss = 0.5 * ((newvalue - b_returns[mb_inds]) ** 2).mean() + + entropy_loss = entropy.mean() + loss = pg_loss - args.ent_coef * entropy_loss + v_loss * args.vf_coef + + optimizer.zero_grad() + loss.backward() + nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm) + optimizer.step() + + if args.target_kl is not None: + if approx_kl > args.target_kl: + break + + y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy() + var_y = np.var(y_true) + explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y + + # Evaluate agent + mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=0 + ) + slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=0 + ) + mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=200 + ) + slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=200 + ) + + # TRY NOT TO MODIFY: record rewards for plotting purposes + writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) + writer.add_scalar("charts/episode_returns", np.mean(episode_rewards), global_step) + writer.add_scalar("losses/value_loss", v_loss.item(), global_step) + writer.add_scalar("losses/policy_loss", pg_loss.item(), global_step) + writer.add_scalar("losses/entropy", entropy_loss.item(), global_step) + writer.add_scalar("losses/old_approx_kl", old_approx_kl.item(), global_step) + writer.add_scalar("losses/approx_kl", approx_kl.item(), global_step) + writer.add_scalar("losses/clipfrac", np.mean(clipfracs), global_step) + writer.add_scalar("losses/explained_variance", explained_var, global_step) + print("SPS:", int(global_step / (time.time() - start_time))) + writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) + + writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) + writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) + writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) + writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) + + writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) + writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) + writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) + writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) + + writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) + + envs.close() + writer.close() diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/conda-environment.yaml b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/conda-environment.yaml new file mode 100644 index 00000000..cd0b0b09 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/conda-environment.yaml @@ -0,0 +1,165 @@ +name: test2_py +channels: + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - ca-certificates=2024.3.11=h06a4308_0 + - ld_impl_linux-64=2.38=h1181459_1 + - libffi=3.3=he6710b0_2 + - libgcc-ng=11.2.0=h1234567_1 + - libgomp=11.2.0=h1234567_1 + - libstdcxx-ng=11.2.0=h1234567_1 + - ncurses=6.4=h6a678d5_0 + - openssl=1.1.1w=h7f8727e_0 + - pip=23.3.1=py38h06a4308_0 + - python=3.8.5=h7579374_1 + - readline=8.2=h5eee18b_0 + - setuptools=68.2.2=py38h06a4308_0 + - sqlite=3.41.2=h5eee18b_0 + - tk=8.6.12=h1ccaba5_0 + - wheel=0.41.2=py38h06a4308_0 + - xz=5.4.6=h5eee18b_0 + - zlib=1.2.13=h5eee18b_0 + - pip: + - absl-py==2.1.0 + - aiosignal==1.3.1 + - alabaster==0.7.13 + - appdirs==1.4.4 + - attrs==23.2.0 + - babel==2.14.0 + - beautifulsoup4==4.12.3 + - cachetools==5.3.3 + - certifi==2024.2.2 + - cffi==1.16.0 + - charset-normalizer==3.3.2 + - click==8.1.7 + - cloudpickle==3.0.0 + - cmake==3.29.2 + - contourpy==1.1.1 + - cycler==0.12.1 + - dm-tree==0.1.8 + - docker-pycreds==0.4.0 + - docutils==0.20.1 + - exceptiongroup==1.2.0 + - farama-notifications==0.0.4 + - filelock==3.13.4 + - fonttools==4.51.0 + - frozenlist==1.4.1 + - fsspec==2024.3.1 + - furo==2024.1.29 + - future==1.0.0 + - gitdb==4.0.11 + - gitpython==3.1.43 + - glcontext==2.5.0 + - glfw==1.12.0 + - google-auth==2.29.0 + - google-auth-oauthlib==1.0.0 + - grpcio==1.62.1 + - gym==0.23.0 + - gym-notices==0.0.8 + - gymnasium==0.28.1 + - idna==3.7 + - imageio==2.34.0 + - imageio-ffmpeg==0.3.0 + - imagesize==1.4.1 + - importlib-metadata==7.1.0 + - importlib-resources==6.4.0 + - iniconfig==2.0.0 + - jax-jumpy==1.0.0 + - jinja2==3.1.3 + - jsonschema==4.21.1 + - jsonschema-specifications==2023.12.1 + - kiwisolver==1.4.5 + - lazy-loader==0.4 + - lz4==4.3.3 + - markdown==3.6 + - markdown-it-py==3.0.0 + - markupsafe==2.1.5 + - matplotlib==3.7.5 + - mdurl==0.1.2 + - moderngl==5.10.0 + - mpmath==1.3.0 + - msgpack==1.0.8 + - networkx==3.1 + - numpy==1.24.4 + - nvidia-cublas-cu12==12.1.3.1 + - nvidia-cuda-cupti-cu12==12.1.105 + - nvidia-cuda-nvrtc-cu12==12.1.105 + - nvidia-cuda-runtime-cu12==12.1.105 + - nvidia-cudnn-cu12==8.9.2.26 + - nvidia-cufft-cu12==11.0.2.54 + - nvidia-curand-cu12==10.3.2.106 + - nvidia-cusolver-cu12==11.4.5.107 + - nvidia-cusparse-cu12==12.1.0.106 + - nvidia-nccl-cu12==2.19.3 + - nvidia-nvjitlink-cu12==12.4.127 + - nvidia-nvtx-cu12==12.1.105 + - oauthlib==3.2.2 + - packaging==24.0 + - pandas==2.0.3 + - pillow==10.3.0 + - pkgutil-resolve-name==1.3.10 + - pluggy==1.4.0 + - protobuf==4.25.3 + - psutil==5.9.8 + - py-cpuinfo==9.0.0 + - pyarrow==15.0.2 + - pyasn1==0.6.0 + - pyasn1-modules==0.4.0 + - pycparser==2.22 + - pyenchant==3.2.2 + - pyglet==1.4.11 + - pygments==2.17.2 + - pyparsing==3.1.2 + - pytest==8.1.1 + - pytest-benchmark==4.0.0 + - python-dateutil==2.9.0.post0 + - pytz==2024.1 + - pywavelets==1.4.1 + - pyyaml==6.0.1 + - ray==2.10.0 + - referencing==0.34.0 + - requests==2.31.0 + - requests-oauthlib==2.0.0 + - rich==13.7.1 + - rpds-py==0.18.0 + - rsa==4.9 + - scikit-image==0.21.0 + - scipy==1.10.0 + - sentry-sdk==1.45.0 + - setproctitle==1.3.3 + - shellingham==1.5.4 + - shimmy==1.3.0 + - six==1.16.0 + - smmap==5.0.1 + - snowballstemmer==2.2.0 + - soupsieve==2.5 + - sphinx==7.1.2 + - sphinx-basic-ng==1.0.0b2 + - sphinx-tabs==3.4.5 + - sphinxcontrib-applehelp==1.0.4 + - sphinxcontrib-devhelp==1.0.2 + - sphinxcontrib-htmlhelp==2.0.1 + - sphinxcontrib-jsmath==1.0.1 + - sphinxcontrib-qthelp==1.0.3 + - sphinxcontrib-serializinghtml==1.1.5 + - sphinxcontrib-spelling==8.0.0 + - syllabus-rl==0.5 + - sympy==1.12 + - tensorboard==2.14.0 + - tensorboard-data-server==0.7.2 + - tensorboardx==2.6.2.2 + - tifffile==2023.7.10 + - tomli==2.0.1 + - torch==2.2.2 + - triton==2.2.0 + - typer==0.12.3 + - typing-extensions==4.11.0 + - tzdata==2024.1 + - urllib3==2.2.1 + - wandb==0.16.6 + - werkzeug==3.0.2 + - zipp==3.18.1 +prefix: /home/user/miniconda/envs/test2_py + diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/config.yaml b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/config.yaml new file mode 100644 index 00000000..60afaf32 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/config.yaml @@ -0,0 +1,130 @@ +wandb_version: 1 + +exp_name: + desc: null + value: cleanrl_procgen_plr +seed: + desc: null + value: 1 +torch_deterministic: + desc: null + value: true +cuda: + desc: null + value: true +track: + desc: null + value: true +wandb_project_name: + desc: null + value: syllabus +wandb_entity: + desc: null + value: null +capture_video: + desc: null + value: false +logging_dir: + desc: null + value: . +env_id: + desc: null + value: bigfish +total_timesteps: + desc: null + value: 25000000 +learning_rate: + desc: null + value: 0.0005 +num_envs: + desc: null + value: 64 +num_steps: + desc: null + value: 256 +anneal_lr: + desc: null + value: false +gae: + desc: null + value: true +gamma: + desc: null + value: 0.999 +gae_lambda: + desc: null + value: 0.95 +num_minibatches: + desc: null + value: 8 +update_epochs: + desc: null + value: 3 +norm_adv: + desc: null + value: true +clip_coef: + desc: null + value: 0.2 +clip_vloss: + desc: null + value: true +ent_coef: + desc: null + value: 0.01 +vf_coef: + desc: null + value: 0.5 +max_grad_norm: + desc: null + value: 0.5 +target_kl: + desc: null + value: null +full_dist: + desc: null + value: true +curriculum: + desc: null + value: true +curriculum_method: + desc: null + value: plr +num_eval_episodes: + desc: null + value: 10 +batch_size: + desc: null + value: 16384 +minibatch_size: + desc: null + value: 2048 +_wandb: + desc: null + value: + code_path: code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py + python_version: 3.8.5 + cli_version: 0.16.6 + framework: torch + is_jupyter_run: false + is_kaggle_kernel: false + start_time: 1713840447.0 + t: + 1: + - 1 + - 30 + - 55 + 2: + - 1 + - 30 + - 55 + 3: + - 13 + - 16 + - 23 + - 35 + 4: 3.8.5 + 5: 0.16.6 + 8: + - 5 + 13: linux-x86_64 diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/diff.patch b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/diff.patch new file mode 100644 index 00000000..0a6b4640 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/diff.patch @@ -0,0 +1,13 @@ +diff --git a/setup.py b/setup.py +index 31e09f2..22a94e8 100644 +--- a/setup.py ++++ b/setup.py +@@ -2,7 +2,7 @@ from setuptools import find_packages, setup + + + extras = dict() +-extras['test'] = ['cmake', 'ninja', 'nle>=0.9.0', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] ++extras['test'] = ['cmake', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] + extras['docs'] = ['sphinx-tabs', 'sphinxcontrib-spelling', 'furo'] + extras['all'] = extras['test'] + extras['docs'] + diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/events.out.tfevents.1713840453.f411843fc70b.297.0 b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/events.out.tfevents.1713840453.f411843fc70b.297.0 new file mode 120000 index 00000000..b47732cd --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/events.out.tfevents.1713840453.f411843fc70b.297.0 @@ -0,0 +1 @@ +/data/averma/MARL/Syllabus/syllabus/examples/training_scripts/runs/{run_name}/events.out.tfevents.1713840453.f411843fc70b.297.0 \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/requirements.txt b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/requirements.txt new file mode 100644 index 00000000..7f33d240 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/requirements.txt @@ -0,0 +1,146 @@ +Babel==2.14.0 +Farama-Notifications==0.0.4 +GitPython==3.1.43 +Jinja2==3.1.3 +Markdown==3.6 +MarkupSafe==2.1.5 +PyWavelets==1.4.1 +PyYAML==6.0.1 +Pygments==2.17.2 +Shimmy==1.3.0 +Sphinx==7.1.2 +Syllabus-RL==0.5 +Werkzeug==3.0.2 +absl-py==2.1.0 +aiosignal==1.3.1 +alabaster==0.7.13 +appdirs==1.4.4 +attrs==23.2.0 +beautifulsoup4==4.12.3 +cachetools==5.3.3 +certifi==2024.2.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +cloudpickle==3.0.0 +cmake==3.29.2 +colorama==0.4.6 +contourpy==1.1.1 +cycler==0.12.1 +dm-tree==0.1.8 +docker-pycreds==0.4.0 +docutils==0.20.1 +exceptiongroup==1.2.0 +filelock==3.13.4 +fonttools==4.51.0 +frozenlist==1.4.1 +fsspec==2024.3.1 +furo==2024.1.29 +future==1.0.0 +gitdb==4.0.11 +glcontext==2.5.0 +glfw==1.12.0 +google-auth-oauthlib==1.0.0 +google-auth==2.29.0 +grpcio==1.62.1 +gym-notices==0.0.8 +gym==0.23.0 +gymnasium==0.28.1 +idna==3.7 +imageio-ffmpeg==0.3.0 +imageio==2.34.0 +imagesize==1.4.1 +importlib_metadata==7.1.0 +importlib_resources==6.4.0 +iniconfig==2.0.0 +jax-jumpy==1.0.0 +jsonschema-specifications==2023.12.1 +jsonschema==4.21.1 +kiwisolver==1.4.5 +lazy_loader==0.4 +lz4==4.3.3 +markdown-it-py==3.0.0 +matplotlib==3.7.5 +mdurl==0.1.2 +moderngl==5.10.0 +mpmath==1.3.0 +msgpack==1.0.8 +networkx==3.1 +numpy==1.24.4 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-nccl-cu12==2.19.3 +nvidia-nvjitlink-cu12==12.4.127 +nvidia-nvtx-cu12==12.1.105 +oauthlib==3.2.2 +packaging==24.0 +pandas==2.0.3 +pillow==10.3.0 +pip==23.3.1 +pkgutil_resolve_name==1.3.10 +pluggy==1.4.0 +procgen==0.9.5+ed4be81 +protobuf==4.25.3 +psutil==5.9.8 +psutil==5.9.8 +py-cpuinfo==9.0.0 +pyarrow==15.0.2 +pyasn1==0.6.0 +pyasn1_modules==0.4.0 +pycparser==2.22 +pyenchant==3.2.2 +pyglet==1.4.11 +pyparsing==3.1.2 +pytest-benchmark==4.0.0 +pytest==8.1.1 +python-dateutil==2.9.0.post0 +pytz==2024.1 +ray==2.10.0 +referencing==0.34.0 +requests-oauthlib==2.0.0 +requests==2.31.0 +rich==13.7.1 +rpds-py==0.18.0 +rsa==4.9 +scikit-image==0.21.0 +scipy==1.10.0 +sentry-sdk==1.45.0 +setproctitle==1.2.2 +setproctitle==1.3.3 +setuptools==68.2.2 +shellingham==1.5.4 +six==1.16.0 +smmap==5.0.1 +snowballstemmer==2.2.0 +soupsieve==2.5 +sphinx-basic-ng==1.0.0b2 +sphinx-tabs==3.4.5 +sphinxcontrib-applehelp==1.0.4 +sphinxcontrib-devhelp==1.0.2 +sphinxcontrib-htmlhelp==2.0.1 +sphinxcontrib-jsmath==1.0.1 +sphinxcontrib-qthelp==1.0.3 +sphinxcontrib-serializinghtml==1.1.5 +sphinxcontrib-spelling==8.0.0 +sympy==1.12 +tensorboard-data-server==0.7.2 +tensorboard==2.14.0 +tensorboardX==2.6.2.2 +tifffile==2023.7.10 +tomli==2.0.1 +torch==2.2.2 +triton==2.2.0 +typer==0.12.3 +typing_extensions==4.11.0 +tzdata==2024.1 +urllib3==2.2.1 +wandb==0.16.6 +wheel==0.41.2 +zipp==3.18.1 \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch new file mode 100644 index 00000000..2da133fd --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch @@ -0,0 +1,566 @@ +diff --git a/setup.py b/setup.py +index 31e09f2..22a94e8 100644 +--- a/setup.py ++++ b/setup.py +@@ -2,7 +2,7 @@ from setuptools import find_packages, setup + + + extras = dict() +-extras['test'] = ['cmake', 'ninja', 'nle>=0.9.0', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] ++extras['test'] = ['cmake', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] + extras['docs'] = ['sphinx-tabs', 'sphinxcontrib-spelling', 'furo'] + extras['all'] = extras['test'] + extras['docs'] + +diff --git a/syllabus/curricula/plr/plr_wrapper.py b/syllabus/curricula/plr/plr_wrapper.py +index 9515df4..f89828b 100644 +--- a/syllabus/curricula/plr/plr_wrapper.py ++++ b/syllabus/curricula/plr/plr_wrapper.py +@@ -312,10 +312,10 @@ class PrioritizedLevelReplay(Curriculum): + """ + Log the task distribution to the provided tensorboard writer. + """ +- super().log_metrics(writer, step) ++ # super().log_metrics(writer, step) + metrics = self._task_sampler.metrics() + writer.add_scalar("curriculum/proportion_seen", metrics["proportion_seen"], step) + writer.add_scalar("curriculum/score", metrics["score"], step) +- for task in list(self.task_space.tasks)[:10]: +- writer.add_scalar(f"curriculum/task_{task - 1}_score", metrics["task_scores"][task - 1], step) +- writer.add_scalar(f"curriculum/task_{task - 1}_staleness", metrics["task_staleness"][task - 1], step) ++ # for task in list(self.task_space.tasks)[:10]: ++ # writer.add_scalar(f"curriculum/task_{task - 1}_score", metrics["task_scores"][task - 1], step) ++ # writer.add_scalar(f"curriculum/task_{task - 1}_staleness", metrics["task_staleness"][task - 1], step) +diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py b/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py +index a6d469e..8f1cc34 100644 +--- a/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py ++++ b/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py +@@ -14,6 +14,7 @@ import gym as openai_gym + import gymnasium as gym + import numpy as np + import procgen # noqa: F401 ++from procgen import ProcgenEnv + import torch + import torch.nn as nn + import torch.optim as optim +@@ -21,10 +22,10 @@ from shimmy.openai_gym_compatibility import GymV21CompatibilityV0 + from torch.utils.tensorboard import SummaryWriter + + from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum +-from syllabus.curricula import DomainRandomization, LearningProgressCurriculum, CentralizedPrioritizedLevelReplay ++from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum + from syllabus.examples.models import ProcgenAgent + from syllabus.examples.task_wrappers import ProcgenTaskWrapper +-from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize ++from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs + + + def parse_args(): +@@ -46,6 +47,8 @@ def parse_args(): + help="the entity (team) of wandb's project") + parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="weather to capture videos of the agent performances (check out `videos` folder)") ++ parser.add_argument("--logging-dir", type=str, default=".", ++ help="the base directory for logging and wandb storage.") + + # Algorithm specific arguments + parser.add_argument("--env-id", type=str, default="starpilot", +@@ -124,15 +127,15 @@ PROCGEN_RETURN_BOUNDS = { + } + + +-def make_env(env_id, seed, curriculum_components=None, start_level=0, num_levels=1): ++def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): + def thunk(): + env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) + env = GymV21CompatibilityV0(env=env) +- env = ProcgenTaskWrapper(env, env_id, seed=seed) +- if curriculum_components is not None: ++ if curriculum is not None: ++ env = ProcgenTaskWrapper(env, env_id, seed=seed) + env = MultiProcessingSyncWrapper( + env, +- curriculum_components, ++ curriculum.get_components(), + update_on_step=False, + task_space=env.task_space, + ) +@@ -147,7 +150,7 @@ def wrap_vecenv(vecenv): + return vecenv + + +-def level_replay_evaluate( ++def slow_level_replay_evaluate( + env_name, + policy, + num_episodes, +@@ -155,28 +158,24 @@ def level_replay_evaluate( + num_levels=0 + ): + policy.eval() +- eval_envs = gym.vector.SyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, task_queue, update_queue, num_levels=num_levels) +- for i in range(1) +- ] ++ ++ eval_envs = ProcgenEnv( ++ num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False + ) ++ eval_envs = VecExtractDictObs(eval_envs, "rgb") + eval_envs = wrap_vecenv(eval_envs) +- +- eval_episode_rewards = [] + eval_obs, _ = eval_envs.reset() ++ eval_episode_rewards = [] + + while len(eval_episode_rewards) < num_episodes: + with torch.no_grad(): + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + +- eval_obs, _, truncs, terms, infos = eval_envs.step(np.array([eval_action.cpu().numpy()])) +- +- for info in infos: ++ eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) ++ for i, info in enumerate(infos): + if 'episode' in info.keys(): + eval_episode_rewards.append(info['episode']['r']) + +- eval_envs.close() + mean_returns = np.mean(eval_episode_rewards) + stddev_returns = np.std(eval_episode_rewards) + env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] +@@ -185,8 +184,7 @@ def level_replay_evaluate( + return mean_returns, stddev_returns, normalized_mean_returns + + +-def fast_level_replay_evaluate( +- eval_envs, ++def level_replay_evaluate( + env_name, + policy, + num_episodes, +@@ -194,9 +192,13 @@ def fast_level_replay_evaluate( + num_levels=0 + ): + policy.eval() +- possible_seeds = np.arange(0, num_levels + 1) +- eval_obs, _ = eval_envs.reset(seed=list(np.random.choice(possible_seeds, size=num_episodes))) + ++ eval_envs = ProcgenEnv( ++ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False ++ ) ++ eval_envs = VecExtractDictObs(eval_envs, "rgb") ++ eval_envs = wrap_vecenv(eval_envs) ++ eval_obs, _ = eval_envs.reset() + eval_episode_rewards = [-1] * num_episodes + + while -1 in eval_episode_rewards: +@@ -231,10 +233,11 @@ if __name__ == "__main__": + name=run_name, + monitor_gym=True, + save_code=True, +- # dir="/fs/nexus-scratch/rsulli/" ++ dir=args.logging_dir + ) +- wandb.run.log_code("./syllabus/examples") +- writer = SummaryWriter(f"./runs/{run_name}") ++ # wandb.run.log_code("./syllabus/examples") ++ ++ writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) + writer.add_text( + "hyperparameters", + "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), +@@ -250,7 +253,7 @@ if __name__ == "__main__": + print("Device:", device) + + # Curriculum setup +- task_queue = update_queue = None ++ curriculum = None + if args.curriculum: + sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") + sample_env = GymV21CompatibilityV0(env=sample_env) +@@ -273,6 +276,16 @@ if __name__ == "__main__": + elif args.curriculum_method == "lp": + print("Using learning progress.") + curriculum = LearningProgressCurriculum(sample_env.task_space) ++ elif args.curriculum_method == "sq": ++ print("Using sequential curriculum.") ++ curricula = [] ++ stopping = [] ++ for i in range(199): ++ curricula.append(i + 1) ++ stopping.append("steps>=50000") ++ curricula.append(list(range(i + 1))) ++ stopping.append("steps>=50000") ++ curriculum = SequentialCurriculum(curricula, stopping[:-1], sample_env.task_space) + else: + raise ValueError(f"Unknown curriculum method {args.curriculum_method}") + curriculum = make_multiprocessing_curriculum(curriculum) +@@ -285,7 +298,7 @@ if __name__ == "__main__": + make_env( + args.env_id, + args.seed + i, +- curriculum_components=curriculum.get_components() if args.curriculum else None, ++ curriculum=curriculum if args.curriculum else None, + num_levels=1 if args.curriculum else 0 + ) + for i in range(args.num_envs) +@@ -293,22 +306,6 @@ if __name__ == "__main__": + ) + envs = wrap_vecenv(envs) + +- test_eval_envs = gym.vector.AsyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, num_levels=0) +- for i in range(args.num_eval_episodes) +- ] +- ) +- test_eval_envs = wrap_vecenv(test_eval_envs) +- +- train_eval_envs = gym.vector.AsyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, num_levels=200) +- for i in range(args.num_eval_episodes) +- ] +- ) +- train_eval_envs = wrap_vecenv(train_eval_envs) +- + assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" + print("Creating agent") + agent = ProcgenAgent( +@@ -369,6 +366,8 @@ if __name__ == "__main__": + print(f"global_step={global_step}, episodic_return={item['episode']['r']}") + writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) ++ if curriculum is not None: ++ curriculum.log_metrics(writer, global_step) + break + + # Syllabus curriculum update +@@ -388,8 +387,6 @@ if __name__ == "__main__": + }, + } + curriculum.update(update) +- #if args.curriculum: +- # curriculum.log_metrics(writer, global_step) + + # bootstrap value if not done + with torch.no_grad(): +@@ -487,8 +484,18 @@ if __name__ == "__main__": + explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y + + # Evaluate agent +- mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = fast_level_replay_evaluate(test_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=0) +- mean_train_returns, stddev_train_returns, normalized_mean_train_returns = fast_level_replay_evaluate(train_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=200) ++ mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 ++ ) ++ slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 ++ ) ++ mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 ++ ) ++ slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 ++ ) + + # TRY NOT TO MODIFY: record rewards for plotting purposes + writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) +@@ -502,12 +509,21 @@ if __name__ == "__main__": + writer.add_scalar("losses/explained_variance", explained_var, global_step) + print("SPS:", int(global_step / (time.time() - start_time))) + writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) ++ + writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) + writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) ++ + writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) + writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) ++ + writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) + + envs.close() +diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py +index e13c22e..d2d54b5 100644 +--- a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py ++++ b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py +@@ -14,6 +14,7 @@ import gym as openai_gym + import gymnasium as gym + import numpy as np + import procgen # noqa: F401 ++from procgen import ProcgenEnv + import torch + import torch.nn as nn + import torch.optim as optim +@@ -24,7 +25,7 @@ from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curri + from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum + from syllabus.examples.models import ProcgenAgent + from syllabus.examples.task_wrappers import ProcgenTaskWrapper +-from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize ++from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs + + + def parse_args(): +@@ -126,18 +127,17 @@ PROCGEN_RETURN_BOUNDS = { + } + + +-def make_env(env_id, seed, curriculum_components=None, start_level=0, num_levels=1): ++def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): + def thunk(): + env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) + env = GymV21CompatibilityV0(env=env) +- env = ProcgenTaskWrapper(env, env_id, seed=seed) +- if curriculum_components is not None: ++ if curriculum is not None: ++ env = ProcgenTaskWrapper(env, env_id, seed=seed) + env = MultiProcessingSyncWrapper( + env, +- curriculum_components, ++ curriculum.get_components(), + update_on_step=False, + task_space=env.task_space, +- buffer_size=4, + ) + return env + return thunk +@@ -150,7 +150,7 @@ def wrap_vecenv(vecenv): + return vecenv + + +-def level_replay_evaluate( ++def slow_level_replay_evaluate( + env_name, + policy, + num_episodes, +@@ -158,28 +158,24 @@ def level_replay_evaluate( + num_levels=0 + ): + policy.eval() +- eval_envs = gym.vector.SyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, task_queue, update_queue, num_levels=num_levels) +- for i in range(1) +- ] ++ ++ eval_envs = ProcgenEnv( ++ num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False + ) ++ eval_envs = VecExtractDictObs(eval_envs, "rgb") + eval_envs = wrap_vecenv(eval_envs) +- +- eval_episode_rewards = [] + eval_obs, _ = eval_envs.reset() ++ eval_episode_rewards = [] + + while len(eval_episode_rewards) < num_episodes: + with torch.no_grad(): + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + +- eval_obs, _, truncs, terms, infos = eval_envs.step(np.array([eval_action.cpu().numpy()])) +- +- for info in infos: ++ eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) ++ for i, info in enumerate(infos): + if 'episode' in info.keys(): + eval_episode_rewards.append(info['episode']['r']) + +- eval_envs.close() + mean_returns = np.mean(eval_episode_rewards) + stddev_returns = np.std(eval_episode_rewards) + env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] +@@ -188,8 +184,7 @@ def level_replay_evaluate( + return mean_returns, stddev_returns, normalized_mean_returns + + +-def fast_level_replay_evaluate( +- eval_envs, ++def level_replay_evaluate( + env_name, + policy, + num_episodes, +@@ -198,15 +193,12 @@ def fast_level_replay_evaluate( + ): + policy.eval() + +- # Choose evaluation seeds +- if num_levels == 0: +- seeds = np.random.randint(0, 2 ** 16 - 1, size=num_episodes) +- else: +- seeds = np.random.choice(np.arange(0, num_levels), size=num_episodes) +- +- seed_envs = [(int(seed), env) for seed, env in zip(seeds, range(num_episodes))] +- eval_obs, _ = eval_envs.reset(seed=seed_envs) +- ++ eval_envs = ProcgenEnv( ++ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False ++ ) ++ eval_envs = VecExtractDictObs(eval_envs, "rgb") ++ eval_envs = wrap_vecenv(eval_envs) ++ eval_obs, _ = eval_envs.reset() + eval_episode_rewards = [-1] * num_episodes + + while -1 in eval_episode_rewards: +@@ -251,7 +243,7 @@ if __name__ == "__main__": + save_code=True, + dir=args.logging_dir + ) +- wandb.run.log_code(os.path.join(args.logging_dir, "/syllabus/examples")) ++ # wandb.run.log_code("./syllabus/examples") + + writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) + writer.add_text( +@@ -316,7 +308,7 @@ if __name__ == "__main__": + make_env( + args.env_id, + args.seed + i, +- curriculum_components=curriculum.get_components() if args.curriculum else None, ++ curriculum=curriculum if args.curriculum else None, + num_levels=1 if args.curriculum else 0 + ) + for i in range(args.num_envs) +@@ -324,22 +316,6 @@ if __name__ == "__main__": + ) + envs = wrap_vecenv(envs) + +- test_eval_envs = gym.vector.AsyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, num_levels=0) +- for i in range(args.num_eval_episodes) +- ] +- ) +- test_eval_envs = wrap_vecenv(test_eval_envs) +- +- train_eval_envs = gym.vector.AsyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, num_levels=200) +- for i in range(args.num_eval_episodes) +- ] +- ) +- train_eval_envs = wrap_vecenv(train_eval_envs) +- + assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" + print("Creating agent") + agent = ProcgenAgent( +@@ -500,8 +476,18 @@ if __name__ == "__main__": + explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y + + # Evaluate agent +- mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = fast_level_replay_evaluate(test_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=0) +- mean_train_returns, stddev_train_returns, normalized_mean_train_returns = fast_level_replay_evaluate(train_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=200) ++ mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 ++ ) ++ slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 ++ ) ++ mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 ++ ) ++ slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 ++ ) + + # TRY NOT TO MODIFY: record rewards for plotting purposes + writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) +@@ -515,12 +501,21 @@ if __name__ == "__main__": + writer.add_scalar("losses/explained_variance", explained_var, global_step) + print("SPS:", int(global_step / (time.time() - start_time))) + writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) ++ + writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) + writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) ++ + writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) + writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) ++ + writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) + + envs.close() +diff --git a/syllabus/examples/utils/vecenv.py b/syllabus/examples/utils/vecenv.py +index 6e5a0a9..af3b187 100644 +--- a/syllabus/examples/utils/vecenv.py ++++ b/syllabus/examples/utils/vecenv.py +@@ -1,7 +1,6 @@ + import time + from collections import deque + +-import gym + import numpy as np + + +@@ -154,12 +153,20 @@ class VecEnvObservationWrapper(VecEnvWrapper): + pass + + def reset(self): +- obs, infos = self.venv.reset() ++ outputs = self.venv.reset() ++ if len(outputs) == 2: ++ obs, infos = outputs ++ else: ++ obs, infos = outputs, {} + return self.process(obs), infos + + def step_wait(self): +- print(self.venv) +- obs, rews, terms, truncs, infos = self.venv.step_wait() ++ env_outputs = self.venv.step_wait() ++ if len(env_outputs) == 4: ++ obs, rews, terms, infos = env_outputs ++ truncs = np.zeros_like(terms) ++ else: ++ obs, rews, terms, truncs, infos = env_outputs + return self.process(obs), rews, terms, truncs, infos + + +@@ -209,7 +216,10 @@ class VecNormalize(VecEnvWrapper): + + def reset(self, seed=None): + self.ret = np.zeros(self.num_envs) +- obs, infos = self.venv.reset(seed=seed) ++ if seed is not None: ++ obs, infos = self.venv.reset(seed=seed) ++ else: ++ obs, infos = self.venv.reset() + return self._obfilt(obs), infos + + +@@ -228,7 +238,10 @@ class VecMonitor(VecEnvWrapper): + self.eplen_buf = deque([], maxlen=keep_buf) + + def reset(self, seed=None): +- obs, infos = self.venv.reset(seed=seed) ++ if seed is not None: ++ obs, infos = self.venv.reset(seed=seed) ++ else: ++ obs, infos = self.venv.reset() + self.eprets = np.zeros(self.num_envs, 'f') + self.eplens = np.zeros(self.num_envs, 'i') + return obs, infos +@@ -239,7 +252,8 @@ class VecMonitor(VecEnvWrapper): + self.eprets += rews + self.eplens += 1 + # Convert dict of lists to list of dicts +- infos = [dict(zip(infos, t)) for t in zip(*infos.values())] ++ if isinstance(infos, dict): ++ infos = [dict(zip(infos, t)) for t in zip(*infos.values())] + newinfos = list(infos[:]) + for i in range(len(dones)): + if dones[i]: diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/wandb-metadata.json b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/wandb-metadata.json new file mode 100644 index 00000000..e72715df --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/wandb-metadata.json @@ -0,0 +1,167 @@ +{ + "os": "Linux-3.10.0-1160.11.1.el7.x86_64-x86_64-with-glibc2.10", + "python": "3.8.5", + "heartbeatAt": "2024-04-23T02:47:28.253643", + "startedAt": "2024-04-23T02:47:27.596549", + "docker": null, + "cuda": "10.1.243", + "args": [ + "--curriculum", + "True", + "--track", + "True", + "--env-id", + "bigfish" + ], + "state": "running", + "program": "cleanrl_procgen_plr.py", + "codePathLocal": "cleanrl_procgen_plr.py", + "codePath": "syllabus/examples/training_scripts/cleanrl_procgen_plr.py", + "git": { + "remote": "https://github.com/RoseyGreenBlue/Syllabus.git", + "commit": "6e36433fbb5c0e990358d7f895d976086dbfb65e" + }, + "email": "djhaayusv04@gmail.com", + "root": "/data/averma/MARL/Syllabus", + "host": "f411843fc70b", + "username": "root", + "executable": "/home/user/miniconda/envs/test2_py/bin/python", + "cpu_count": 12, + "cpu_count_logical": 24, + "cpu_freq": { + "current": 1281.8734583333332, + "min": 1200.0, + "max": 3700.0 + }, + "cpu_freq_per_core": [ + { + "current": 1288.073, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1204.858, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1270.434, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1199.877, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1294.714, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.085, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1295.751, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.292, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.292, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1202.575, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1202.575, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1233.288, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1325.427, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1274.377, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1481.066, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.292, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1259.436, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.5, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1208.801, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1227.062, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1449.108, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1313.598, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1253.002, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1763.293, + "min": 1200.0, + "max": 3700.0 + } + ], + "disk": { + "/": { + "total": 5952.626953125, + "used": 988.7798461914062 + } + }, + "memory": { + "total": 251.63711166381836 + } +} diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/wandb-summary.json b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/wandb-summary.json new file mode 100644 index 00000000..15bc4d38 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/wandb-summary.json @@ -0,0 +1 @@ +{"global_step": 81856, "_timestamp": 1713840633.2564902, "_runtime": 185.6468482017517, "_step": 520, "charts/episodic_return": 0.0, "charts/episodic_length": 79.0, "curriculum/proportion_seen": 0.0, "curriculum/score": 0.0, "charts/learning_rate": 0.0005000000237487257, "charts/episode_returns": 1.2000000476837158, "losses/value_loss": 0.10687784850597382, "losses/policy_loss": -0.0007377793081104755, "losses/entropy": 2.6995317935943604, "losses/old_approx_kl": 0.0001328418729826808, "losses/approx_kl": 0.0014047222211956978, "losses/clipfrac": 0.0, "losses/explained_variance": 0.0861138105392456, "charts/SPS": 390.0, "test_eval/mean_episode_return": 1.399999976158142, "test_eval/normalized_mean_eval_return": 0.010256409645080566, "test_eval/stddev_eval_return": 1.399999976158142, "test_eval/slow_mean_episode_return": 0.5, "test_eval/slow_normalized_mean_eval_return": -0.012820512987673283, "test_eval/slow_stddev_eval_return": 0.5, "train_eval/mean_episode_return": 0.4000000059604645, "train_eval/normalized_mean_train_return": -0.015384615398943424, "train_eval/stddev_train_return": 0.4000000059604645, "train_eval/slow_mean_episode_return": 1.100000023841858, "train_eval/slow_normalized_mean_train_return": 0.0025641031097620726, "train_eval/slow_stddev_train_return": 1.100000023841858, "curriculum/completed_episodes": 500.0, "_wandb": {"runtime": 204}} \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/run-rpqjdbky.wandb b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/run-rpqjdbky.wandb new file mode 100644 index 0000000000000000000000000000000000000000..3278c7155636059b0ec6c6e516aaf73d957ef6d9 GIT binary patch literal 258994 zcmeFad0-UP^2ci+Yy--sqERH`3hFS^+jQf;UHAQR-R?ENkPHwANz8&EE@2NK5cU89 zkwwC$K|z8b1W*in*aM0NL5M6N>;Vzqw`LN^>7G7gdOGj@^Z5O|_u?dd=F?TD&N)?e zs@lJP=VSYpU)6GC&exu+!s>_h4Qm|{>5)ZE6WprONp?w^TZ$IsjvWO}^~8Eziq=t> z6L!_AMh-{)euQU<_iAXts5l?aqmjeDPAciuGf&TIrXnR zDWZN#ulV@rj%g{a7u0pMcGUOwOo~c~?(Xdyc3tb3cyDwHIbzJQTk4cM-uKepY zjykEy(J|dB|9-2Z*?Z9mu^s90UA-}>6~j7OU;ajp>l}^zKjKYDjZ5u?-%Lo0k9V|i zTpbggl$w_8jp`8>>rLz%R;N>Re2Ul6+|e*Tv2*9Rgw9d1amjtdYPWW_c3kVI>rLnp z6^F0fpmoQ%&Yj{?y0peuZ=9N#8XX^%8rR*MlIl%L=^IvGkX^n%jvE|TgFQKco|GJ& zinmeM<-|v_qp_oYLR$AIJQ2T8OOYH+9S!JTR6L@#AS?J94Wbhgym)v#zC^`$Yvici zIT{c0znjL6I-R4tcgM@A<8o@6hTn1EB=wCSGl@-bkC^H$-3ivZI-!L27hzXK!j$ zxA;o&;NNwEm>`0b)XE`f+_dE6xR|u~H2kiA+?yOtul&oX?%vcci4anq);>ME!O_(B zqI=*qdXwT(5@U(JmfHn^I(%;}#tkg!JeSFHMULER>q8+K2t zH4$R1Uq7rYEG(~fSVY4vVd?l^VPRRd!dll6ovKrA3k7s5PUW#$cZYRt6d#w6*7Lre zsuHD0bHd(oIJ=~#CZ)7**RFG1YL~Q*&X~mR?Oshx@%DNu+3QVsEI!TK?zM_}?!=EY zyIb*!iX@7iI(C#}TwYCciLx5&>7;6MtmaW%suJ6=Q%6PiGG^z_uKli_2cuP6m=y+v zXzqKf_d#yaP{wFy*ObHr$0Lr1=oMRks}2~t*tkxeoDfn>7so@62hx&KQj@*W-JwvO zIsrIZQ@t3`)k&5mO>v1X%^j-=qT&(Vv5LEs*VQq`s|x<#KQ#C5@jdaC!{2)B%KL3* z@lF3DTg|*NX~&&=U%>Eno^9c1?ri65mz)Nc!L(|8XwvPyltp4Mc zFTD8Hvv^$nuGdz1T*jBD+DFumNlL5!17|w^a|3<92F&*>`Kuh?&r$uyP5CIc>-~6~ zL-06chuhl&_AkYmn3n2H?NsrX%84tB&Z_g)DQF(2t4XcT!k)VAZ9Ji2*E>Mis6q;v z8MAhWMKtf278f520}@%Wayp&P>VL;=v90<%hY=cez0UUz*ZAa|J8<4R5#h0k39x+s zVCTSSPP6sB!|5enUGWnAPt4fA_8m(van8KLCY|xPYr499ui!`D7cBf?!NS@RtvaOm z`Qs9k<5FR#V&v&NZeItF(wonAC7ozqp%eaR7k@Xixuv(-z39d3;_bFIxOP$&hd#Iv6|CX=|R~o+#Bg%*W+TMlD*i?B|M0&NA(F_ZFPS# zz3uBN-nM_pl4Ik>gjqs3*1v5}Lb#qq*!a^_J%UHE38DAJrH>Ip3kIR^h;A`}7~EDg>4QNB-k`Z~Q^j2H4_Ul+_~VucIt)1WClTDt5R`ne zsI!hh)a;rIGdj-vf(TkN1O*${b+$y%%QNW`5!}KMl=b=gT^)fW+a-hYN0eWPpcO+< zTw3t1C4#Y+cDJCJ5Lq!3{36X9*sp(>4nbBW<4mxmM$Ol}{sPT}TPtRQf6S6e!!f3n znXu@<$?e3@nqeqP|84OSPKis zSR~P7n^k?o^OVmb-N7*A_t|05s@95c@E$ST$uN{0TE0NfptIc`v^Af5dPv!;qi*>m~z(7QnPx+9+ks zYWWH=+{ZBFj6aMqt;pfJ6Xn+sgOg#%7@dzJ5HqJGcc5K{CEI$_k8LLqLpz3{XwX`W zX@%k1r&D_ogNtFvTCwLhJ%b6NA7ol+7<0 zQA;NWS+a}a=GqBOi9uu-GUuJbm{u5G`@e%@h(Tf)(u=1r)-$LsyLPMtRy4eY7-WW_ zeEb&}(+a~Ija%*|28ChB-#l)so&nB&<2r3gmv;7>Kbsgl3`5C zFccive?Z58^P63AxV6VFg%~u3A@5`<#ogWZmgUMzqB!2TOKo z@~W~2h~a*QAv5z7#0y)K7jS6$^{iQfyV)*PS`ohV5}h6%t~fnZ47uiu>n#zybwWHt1dlKT zIbU69r6X|LW>|}RU-LQ7h5UFa&uEM!%sWkc9vnR!c^8{h&H?iQq|wAoIs5Z&)I@cf;`A zMDQ<$px~GEj(UPX%Yr3y)bYE4t%=|%hM;`@(2kY}#^#rHBZ8+Hg2KtGQ*;ELK#PJU zg0X9Qyh8-fFa+77Hl$c0*nY$L@nlb*t+1zl!;q0ZW`J&=r>KF>(3aF_z&jltCwux_ zg+29;S-f;I#kPK8W;kt z&6XIRSyh@u46iT@Wre?BOe+i@ulu+gF}%t!lpi>?RnH&>I=flYrQIV^eCN;C7>10o zmoTOkhI&tY;*-O_8HW74*~jz@vTWDlrkxq&3+;4Z7&7O7g)yx#Jk}=cJjvld3`614 zS!eYOirX$fK%W)wT_jz4ona`<&c&Ek81Cr#{z+nZgJDSjVb|XV2HQ4zcV73&Hez^_ zVaT0Wj4`b+G&uc>FJkf*!%&dhcarYh<52@`lq}8ZPWAiGBZjvbhJq2pFs2oT?>qi4 zjpXoOh9NKe{0u#VW?RT)_`F?3#PC0cA!9`UnbsJFrU`ct!#fN^!S+5YbquQ8w(Z`3 zlZx&jhA5VydSldQ4ABfjcHXxb(+WfQ zgKz&y3>_JU%wN`*>ly3|ZUEVN5Fw-v4!2PYj(HhJv-F!}JV_WY;13+IuH`O$?nGhV0+Z zVN5FwuRPR7wz9!MXJZui2YuReb{QSC|! zS!I%7bUS1Q(mzq}B}XSEd6SWZ*WRPGkL-}*O@7^kx<mH++E7}JVM&6;_~W8J{et*eX# z2S@$QKG#xNrh`3?YVJTAKha7Mt4Cp8&!;eAeAlK`Ml3A6*3$P+-ud*CbfE5DaiI2p z|D5F)Tj~Hr+Z;>F9(Xp24%7)12WtP2c}E7_Vu@hfk2mk63t4Zc|qsfoj{H;Oo9ex)VVPLr^kfcUMaUsSi4OC*X0ZT~%LFkzav|mi#g(EaJ{8 z@_*3nLfW-W`a73gJOCg9J3`F{(-6IVUZrd`{+PS-Z zKoIY-5bF+LP%99w=@$|Sq9+59|4qRT14O|5E=y7Tw-$f&5JB`}Ao7lE!=P3m!kV3V zi6Gu*AkzQJI%$CL1hV3mCiOwrHTM%lZw4ZF?F*SIty>Vp2Mk2cUw`D+(#;9C z5XdrFlEi?hsM`qQLk6N~^C=8!1!B+m^Z5kv5d)F_*>UW<*@^A8FTv=Y=uPhu#K#Oo z;n;GEoBhASQ{GC%^U}i@itJ-6`qkD`cx;>5AAM85JLS3e31cwwPZVHOE3&w{wUkE~ zeOZk1L*vIAFmSEJCM!E!8t^z_^kXr~KAnzHtzbmGKfeKC^k*?LmQI~!zzCQuWoeHl z9M1fPFb3!_X60a1D;NiEJv5&%2C^8L<7O|?V+i);8XfG`rZHg*VlmQZEX1f*Fg`Du zbl?v>Fe9wl{xDntV|Qz{ae{Y5vir5BmHRctD$bzKEs!(;e=)U3oR$>Ww2ZuHLEfB9q!Sk5?X_KnFBHRIi zaQsQ?{{UC30bD`B-7%Mn>_}CX<*K+s)34&Y9R==?ux1Cta666_Z#d<6o9@RUUrkXw zs;CAfZe>(}N&$yUBqLiTEIJ_ux8(MSqFd2q+^Vc{9uTjY+kEK}AP)^|_Im(i$_Y1D zX}1&4b;}x;7~UT!i9zMB5Zf+f(J6~=+3mq82!fV1sVcFw^w`~nB;-#53t2=Cxw(+t zPQeY$!6SpsVjePXLfI~4S&>>E8A5tJ+(fUDq|Vc8!Q&A%zNrpMtkeLp;_*nTCa5lt z%f%9Vyo07pBq5J55i)k1Tp??CbP#hL6d4!#>^5GCs=8gWtctWsxfvm=Y13RY2szUP z88elKEQ(mLmEbEBcPiI__3{X^q)PDJ;7YE_uE^TtWB+>M7?4MXH9Hi@USki)MfLz# z4Q(nbqy{!GQ65Ef2?~}eQIIt3ks4igS>c}g;-dBnF^@81#!N7Cli|z4pes_4eHV_8 z+(k;EAF!KQ62rjdTw<3qO{X+C2R5`QWST|}8z(=cz9tk(=JuZbo z7Kdx+|0XeyH4!uN{kh1pBD=)U_Fsw|GP3SW0c1&06}O6+r%94dwWVQeKiWiM9%mwE z+z;R~dt_|fLlkr9YqL?zih^nE(NtLwP;sQH1(W3uUU-zmJl;gixK6-BmNAu~+9Azm zPYqaYEKm@$gg+h;)|+8guIf;B4%7a;O)Q^SrmetEd0k=^OJ?gcFl%$kVFM1 z3%ASdR%8#0>>1X1=1CIs1QRhMT*DKy$EC@dO9)NOwtEP`jIuhC>c-*L1xJC%h*^E5 z|LS_Q(ViH1qfNIPxXhA@NE3gj9Zb$@=x64ZRlOoj+>+CcQ-vV7;VRHj*`n%X)}Gt9 zWe7=mQeY|5T?ihrs$d0F`I9-=Mq3Tte(7ws

C8k#I;*fvm7gv*7wFHRTL3e`>;v zyAwQS4LhAs56!CWJ8j7+qOOtPfo}w7W}Kd@%2{xCUEktWGGLPf8!)%Uz9xX zxr8*CwHh#Bl872s=+mSm3# zKY>Tq6t{@kS!Fs4;)^41X-Hz8Y9eOb=irIi?Sh#3JFsB(ORE8!jm@{j7l!vl*T*D4@q92#O$M>Qxo9d})1)^`&6` zEUZ~cfP*t$6K8D}oPtZ%RCjQz9ck|AtmAgsexj0`&}q0VVYuC@M-f;-D^c~&FCpk@ zCeXNL!V|P0!C4L)!4otsReb0gFk*^`;!%pAi858JVW$QcoSOi zz)A#HZa%Z^jxfooC>rLphCR32Lk`=j+O0m_YuzD=ugzfNYlV}3-(rZb1-yR^F;>4| z(Bt)X|446oX5cqXw`X|jB|>xEGGD!d*-rcY%94{TCq+xJkyBAilzG$I#s>;>iTQIA zX56;nX%}1+I2wmGqu^5r)k$I4{dqJhtq9ffXizVPS#-^;RZx%OsI%BOYH>jsw985y z^}+sazobJ$PT)g>N5FRS7B;Zh_;O%Ta7u#6*P5V42|Ik-U!jUvne4$?2|g~`4_BF> zqWj*B%daK1nQfvrxE#b)8*EbrzS_`C;Tk1(4H#lo6kP~;Dk^zzA(X1jLht8xO(y0! zCd{}@#4`_)h6F}LXnCtoF!Nx)-&S&}stN^^B=|916yUDPEdKA|f%Ry`of~+?_2BXm zk6FgP3cIVI%zQ!9Td~zyEjcxp2X0J3M(Erni_A$$l-|jFr+}d6nL*>EWa^+75Uo|< z01HvOg_?jC6nFv15rqRHHh)zmEqA-$)q~Cr^8=q7=z5cRas;4ZaQVnVjhIja>wu=> zV2Cw9CIv?>LsewujM^uUw4K$M-xi-&WbLSx7afuEwPa%-7+C2MK5S`c`(QYpEM2MtSs zn$)3LZZY&3t&2+lTBTJAaaBnXpomqNC0Cc$O`jw|e`O+QTHZL!Dl)spAOZBCf);CF zLJKZrB}ua8c1tdJ1Q=-ds#kZsO@jW~M9{eZ#f66YNZ7dq+m>*bXfgD?m_g72R%aIy zw$SYW;TVQlSk+9PPRt8Um~lagXT;z~5~2P)&@k z{`%}&#JnglGhJyiXU0Utst_bof@UjM%u?v{S$9&Ck+Vx_O61E#k{1H#9u>#us&baw{_*+_VqR>* zjQA3d*^PJ}HuNDe3%2`vaEnV8+!FrZpAo~(XGQI@Ka!a9Oqg-^iN_2Vkc;xccycB& z@-m-1HDI?Xz<`TuTs- ziEDmOYDEyMScu|FQxh#g^nUMFZz-O)+P4e6PM3C%un9GvG2MoW*acwY^ADV=JZmP0 zz9BQL)QU%z6y&jB2a1FxcGXf>AM%VQ=!1e@ZN)hrv z!lmjFJ*)}Wn)e&gmzJ#`e9Kkkslg+zt1s(*q3w8{t6AI2ujFVp$KltcK z3v!RG^|{9aP{SL<6E&hHE_ZOZ32#cx*`gEIh){Wizb^@D#iY=!m=Rim7gT}BkP-#I zPE}|%_Kn5e2zrwlG!B&Jg2q8uRK0#AZYALkjskbG95XXC0cQ}%dCBln75cQ<9uswyTOd5 zKZt`Y=x$64ecNqhMmr2mgFg_`W>Xv$;oZLm+(QXbTPqWy{C=Cv-*$}CS4Dw?Er6rN zl8@rn-FNM$M44@si8B6Cvr39Fs+B~U?54NgP8i!+jFPpVpj`@EQUtDDyQ)|`+4tG= zgt3FgNI#Neu}Mqz4Eb@w*vVp)eezAA0Rwvvqua}pG#1?T%?ZNT#bRX7E5xW)q%mUg z;Fg3@q{A4s2Ssaj(!hbxCXBVS93KvxN#azBgO`yOHJVKK^A{WU~irv~>= z1I!Ir_3FF62GkjnP8dJ27+EVbhFYt(^XbBfB*OSvhw;bg*?J7gzQzduJ=LrSVeDlw zGFMK)s8*C}y1J+>Vf>=Qn0{f60V7~<0qmQrFO7S%UT;enzv?jh^!w2oM)Kdg>JvsW zi;+9~w?YGk7|5S$X^I!uO4&;o`&f*kU57EM6=}q7F8F~keq%B6e)#Q}0V80a1gwPB zm&TMS@7zro`&o?adB-rS6^xg!n>2zj4zL)RgTEcC&j&^_gk1(#95iAZVf@Zw#nL*{~(J|^y#prwH=37)_&`%aGl-7>S5O^xFIgLKG7UvY0A!zwb;fb zCuhk95=se6lQ+3&r(P&s_y^5jvY?SAhzXB28AT9BS%{qBGp^S|NVfGW zyW{75eF@?i3z5BHp0%zz;p6W21rtkIi1e-f@7F^hd~VZrcNlOrQ#x;y2h?6Wt`H2hN zbr1*>*d>X7wJdv?AWpFm8J`YHumrJJyyM~Hc;0Cf2e!byzl#l2F0YI`UK-LJtNMNv@rjr7_R;D1jLGSkj)y#t0ItHpZ_0P)2J;ArHE=pc ztqElQN$`51d=BH}QzY>w*Z0JH){GfvNglI=v=m%>jxzjJAwVh-V7Rt zTArj4gccDG2u-`C&_!)@N7lt>;2lOjtAa~aDA!n76x1+<*RqF1xuqJ2+ejI zMUAo(GGeaqz$*wWR8?<}J5G3f2tog422FR?P2-Qa(}|$wKgb;6={FJ+?T2=wGy$?k z==L^B$}nLmk1%zzH;(lCl9_(v=+2Y03VVU87+gaZbXL1UkHr~8El4PvfUOvO_X6^; z8BvQFt2;~~OZK;kC8HxgPt@4t(JedPI!!wdURX-10ZAjH1zGgCm4JL>-IC>z-X65> z9#Zf0T2!6Dm_7eRu!)0pSI_+^HE+u1;Mj|uvnVEIkqtuxc<4YdtTZ;-f z29PuiJ{KBw{jmB6h32Yv=y`ueW~4b{|B1T^xY)=F+OzoV3ms`j_cLS0NrlJkhP|L* zG;c}=Te0NOS1bebdj>iI;Cr3#X`;q z$|eb_TSH+(w%~|I+de<$p+ry*s8!KC!0)EZDOf#4A0dOMAb=wTv%323^X1n}{FYpt z18Y?@5ActgoxTa9T5)my&}GPcQrJNz3QJBLp6~>mOHfLIE4*L|YkyXo2b+9c!Ig35 zgP#Ul<0{rzbzNBU+T7>CoKdUmb!bb|u>M(}TeoDu*{37IUwiqr_SKim#RVP@Ij9EL zs<=Gie`LX=;m<^bMOOcLH{JD?@*JKwq*m8!t9-@$Bbm=c)JCYX`cIsqe(@Q7!=daq z%-^~S-_Sx!us_?zPEEk`KB-l4N5xN^G2`OD^?pF)G8;XzmefDyhU^l87{)@B9Pit~ z62v#%Z~mDehO-cbt1^@I5CMDT!7No@5+yfpnLrRDScu}26O%1LOn>9mpU4trR$3yz zB#MU}n`N*>*nk^Ff%#c|ik^3`eT*#8$VyA(AGK)r#2jmuXv4^bV+dmui;=N$&oVs* z_5}en$`Z!6il>w?Mza_hU+u@JR_4XmTZi06S;Av#QD+6?u7?%_^ZhL@J&MS?;@eZo zljI)XY}nN6{EZ$b((2%qKpA=z`(uvjs{8(NnFT+EqqJ|WW;+@LL^eOX$vAZ3WGJFo zki+S?wxV4~yw~48F)A5t82g4@2hrk=D*nMA)E1UdaU#GOz#HXnw~>N=8?<3)<7gV6 zNUa*%^@xs7^G4BM`i3<}dlIDb!RljvFh#TF<2T=(q%LY*dfvHNxH*FI6S&cg+uJC*cDv)|D-TFXjg9s8@Qr!-_t4HocBqclRemZy@x6)x zlHyVlW3POrrY(JuV$ekcx8n&%TitI4|CaDep^7z~=8zHy!D|kW<5t~M0)5p0 z!y_v?H5r27yvL?c9^ZF{3mQ=t=qBFkT~&Td|AqY@Rd$zwN>fH8+&(BljSVY`0?H0{ z*oyDU{$7A#+dFQnGHi@j%bVQ z2=EmGe)~XQ;VwtZD&2r$61ykGdsCr{6^e`Lk4tuBKiTs;YOBGnDR_mbb%ainY|%MY z7`U*7Mn{}+CPy5^#Z3L|J`0B`@U9@|_n=l?w%b)h)Z~~VPb5q*>Ju~jWO2aP8{^4q zG2X;$f$Qd`X9J>!%L3Kl`G;Au)4T(`?WOqvO&u8Ee?{RR*aw8dsvZuc8;f4~iw;Ct zW(Oi{q0A0M1Wji~C{RcTA{%b*u_t&yBYzz|uds6Bh7&w&Rhi}5OWyjKwq6t1tyg*3 zk^_dV7m{yn+ClCZ^Ii;Xy(Y3-ui{EdHLxeG%#YjK4 z6r)^;x1xu@3B)K0OJ?e}y>&+s#C#Sad*Bd{C5YnQ{k9Us z7c4~H-d(AB2=wPOLRgYS?LS_6j3B;bA<}35mTC#&v(YDy6GScxkv;m-M|ucc_%}jW zlEgJ*>VHBI3s{JPr5PVvg1Ga}_d3(m{;G0n`_;2-)1J}#-4rhL2AJBGHie7(9c@EX z`|HZ7?H{$^@P3SHWoj3!-7}3a7P1({KNd_iU<7RPZ0WW}zjtn_MHq`%jGVDsFsc=d zAyYHIC5&%ajO^7LzBXWJN&ua*B#mntkC{Rki&>1^Q#&!L6^!5d?p#P1c`Qc$&a$mK z4Af@~H!v{D;qSLFJ~KS`x@k|QfW*0 z3{i*4gL~Umof_(d;p%aM{>~KIJY|yPjQY8#c+AhP3aZ?;S3iZIHOx7*$wRg~g$@{K z?c)C)-9*sKO`z#y$CET-7wB^xd`9zJc3S8;H#+YuO8ZiFKe_=UUV{H){FIt&Q&#yT za@nmgaoNG`#dX=C6GYHHWh7}Hv?ui3Sev3V-h9?jtOeaV zxzIFSx#46j^!lepNh4H>aF2`y8m60pmd38TJ%ylGn?Tdy)f^f)C?tn^$)Q1uHK5*T zIN(MTNW@JLbzyRWHF@3K0}%xMy%{tPyF6%I`a%#n_;SUa(lubYk=c*bP=qy6kO=p) z7)dMti@WDI?ZtjD*^6nM=Jn_ki#lFtXTV>r2#Lq6g?^R8pxnONd5HHR1r0k;2HA7u zaF@O$GxxMW{f+dU7_E?Gc3^vA>!%^y5pjv8m{9p)M`$v$c;q( zqZu`)fjtRYL_-5tkn34Ncc8ZCh{Af~0v^&mP?88mv1GOclcqh;;h~#}y1c6`tiyjYGmJ#CBs1jxs`~OUwyfUeQlJ=YO|#70-|1PMva3#&ypdN z9*V@bWI<)E)WEbxz6vZEcE|#xF0j<@MprF+J`vRGSeHP~-cJ$lVk+jKdQ~77vL%GV-t%}eC*H<1`PNLZ1P<$*?NyJhPs)>$o%dkMzw;`u6@i}!YE`ha*v(S zmzu$`U$9FWo1?=gkTkZi7-hrKw^+lN=zJ)JFt)N7CH=SlY>%N6Bwy_um z%YVVBR-`fRt@F1L#&#B??8}leJqAiz2MA4A^1(Wqyz5Jo*ui3?7oWtaRxq07ZCXhf zJ6Vjvg-ic5Vgw3KU`45ZAh=!ZlCC6;T`WeyA8RnG6^w^IjPw=$DPl1)epoY57qb&^ ztH!RmQG9>5*9l{{4&(PijA{j=!`L-FNE&-sjP%0gBMlhnOk=cEmXs>3VBtH2@e_+t zxO^=}wSsZek=J~I;GbEHym4PnHDcHnX8FHAI^+??UKXQl;4+MA1tWRX%l{#1{K8`7 zPP;J2fDy0?A&wi>pBvk581)rl{K{gK|IvT0HH`Eldj=CmF^iG6Z__shjDS@LEp0|x z+%Rk~VeDfu(m&sWQLRYhg|j_AB#hr!jI2xL>kSx4YBSCaOP1=c58iJ?82ed_;vf2L zu!d23d*R9iJn%qe9JSmRmd_rU(?qv3bi-9?6UEIBznD&O)ZZ)PsQy9Iw|v#ak}MYO ze|tGW9AqKVGk?BA4}nBpn;?FRn%j&Z4zUmgyAIxA3F7|y`<^C<5*DIx?R=LGg5q{I zL44O|+Jgk~2Mba1`8O_05ZSkH9!C&|S%{3yr*&nd&;T${vZkfE-Rj(jMFeq#g~*%o z_Y;;Niq1`5Ll8$^g_YlS@79)G` zB#deWW9O1HC4_OB#VFW#VwWC63|Iovl2V*gK4(S^*ALJ4B?}q~XYjKNf?*?Eh! zm)B}`I6yGc=PBY+W5<9?Kp_bm5#o@k4Z_w`YG>s}oq2@ja6^&7mO9N?KAYms=L5%` z1$64-WRi$Zx;N$Ghp8}^xzP(7dV%Yr7kG;Z%_*p}=4%O~6SbUPC+uP=m@fnlQ~BIb zylD;KU`=p>>UWJmqhby2L7~MHdZV~mK`TAmeEcNM>Oak9HK$x8*)sHaQqV?;tKY#S zZF?Om)G%{XHvttPDvBxT)!WncmbU##)E7-eO+GHJsL|IFNeDa(9xQr+$au(;+MwUm zv0H-g8rM6~H<=Z++GkpKnMD1UiKr3wFfC@{6E!MMAiB$?4kl{Vc1uQ`t#F4~QIRX< zK?4olvZXb;ae5IM@Jl8JoE&I81CA0Y)GC@w9ei4cE^D0ur{-43fI#sUq{hLJON_AH zt}nf>-9#Gxx0!}hD`?Ih17hYdG$zqZk#+2q1chWX7timfM~6`P@FF z;pw#lmg_;&cwUN_=)?^|6s_ZHcraPxl5-7ED{f3asJFk78AC0!T9Go7G`vskz*`iY zl)32@C=QA@%GYp8HQ>c!QMNQ>YNLi@f8xRYW7MNU)irjD;ufE2IOlm1c3(4Lb4tOI zIYX-*gtz(F!Gw)-ZiD;GSte;5nnAO>H%idI9YJ1DVZDT(mWF$Ow>XR%xVeA+_;W{3V5PM zT^Si_&+j)skxA>;z}f+8C}LN~J7uEHl?x^VRYXIfwxOSyVN#s~kZQjrqbhUQ=ZYRw{=xiXB(3%SWBf$Y?+g?D zMx|D+exukb4(|LdN-+JlyTGttIf*83Q-R>qoxwLCgn1* zpgHkU=*S*azqyOG{TW0k&rhW%04?C21_}x?)b2|Y+ul#qLrkerL4-$*>i}>}ajAo8 zI7%Jaui^MoD4c{k`pE1;Jx_*OsB^aUC7RYl&89V{@-N96HIz{FpPwy88qSrqEA-V` zcOOS_LcyskD%w)QW5_~_4GuJpOa$~NwJVB8`d#>0xhszve7tbC83R$4g8u7o`AH!k z@37hx#UuTr7XP&tqgwItx(3HwPZ+~>7}KVIr_Ur2?Q6^O(Sm`#-b*7`jP%{#VN@#^ z`SK)T`rK+#s?jV)=9dHZTEn={J^BPmV+@OtwX3+lKIku@bd(VT zbGiB$-3LT%AdImrM%Fh+Fsc=4wEq3T-GniY#mGqSH^YDtus*CMX^frwb`D{TXECz3 z567rhFm6hm{xo4^u^9Pt4=gcY1g!IENgB6)T#r|qC{^Q77pMPdDeW=#9GfhCAPrqzumh)-FF zqI2Wg=pj&P-#9TWY1GRz-rhwJlUazYN!e{IK{QVI-wA@4!a`&o-u|)<0#`fiGEzUs z9lW0)rm_(EEB3x@3F79t9WrTFWLM4#zhv{*E}W)czh&I^u}Ku)b$g^K&5F+|XN7;% zlF}6z)yk}Bbj`!x5ymtYBe%48g8>7cAR~sQY`07D(&dCPoy8~`J9eWrjANt4e8QN) zVq_fMTx7(sEur?5ozwFedlhjXM5RWRMFTEaZnep!T0YsmP^UL1|u8{zcDt<^(Fw4m-S_Bt6ee z($urTGzx@rk_vh&g(hjucE1v$N+888Z$t-B;8@43yq??sG=)Rvn}kEq0gfwaRFMqo zS0cA4@6?TY${|Zy=PvfZwTT!$97~AE!}rCy(*#e03DLgZN?({b8|YlbbFtuhBWekR z=1#M}Bm}4hLDq1Y5b-ZTm%SjUS4X+$l74?_qTh6i;^{Zq+5}5pAomIHlC4U`ZEej+ z@*uRUpn;YPB_U9^mkr|!+Qlvlj}ddO88cO3=lIfa72h`nLh+E>5Dn2el>m)ilu%L> z!$5Ie#*T@uSzUj+g{=1i6YGsoFjvm-(ZONGHQpp=?vz$)U`nIf391B8%SSj(QGC6s z^%mV1PVG8N)L)rV<2+|BY3L>H)oZw<&69MnH4B!;e*H${FbS9E(JBs?+;y@R7Tl&a zAg}4yCWZ{vk2raTpr)$N0zc0%*!D!Fc(=`yTB5tZE^M4UB1#6Zq82X<+VKEEFEoLs z1EaYi^VL(K{;)hr|G&U&ShDb)G?aQll9(Iq1K4y8QA*smG=`uTnL*=x$n&3qxV8%6mVDRN(LGepn?Ot`4*Lj|K0Az=2z9Kzt|vX$I;V|{&ZEiHwo42 zed|0=-Vz!i;|~`-n3mg6bX|Aw06B~q%5_lUG|~duneB13lIoI@=NZ`j&~)DALZjVA zu;6Ym=$cS+6l9^&KFAeE>INI{6V<1VPpnODizVzqIrmKIC;G4h!iIJ^k%Sq~`APtP z%N~?-Cw(%^n!mosf4e+R7)x1CoQXSf3>Y})8hv4wFm9{=hCmq0S&ZzWy%^OB#>e*$IYbyMSd5JEgBBPt z(EZtnVaZ<~wQ=Dt!pLVa(szu&s8%q(FRr(gFjle{xr?%w8ZZ>w?o4K;oj*qyt5}Tu z-{)XdD;N)byJa(BtY$ItS5Mz;zzA3#+mbX!x;tMYjPF^DqU?DX)e6SKs~`N4Fn(Y$ za%caz+kg?UJhmkaA%E=ygt3OjD4AJ^QLSLOzV3aRFn-iwoIJ4Kh@l3U8`Y;y_GQ5u^1(pSs2v{#&>I% zHzkbqEJoRm%<=ljA}(eGkcK6t+OTrchlH_##VG%25=OOx(Qn*9Ux}8DI*hHG=NT|i z-q2`pEv*}Oj@;?1->`|r$k|+kQLSJ^9=ksx0q-2HEY*up~OZ(ntvF1CmolL0nVIK@|gC$izG$z2Iz zD+^KBf8%|YB=PGjbMGREZ7f9LjY831Y0!>?}d- zU?IxC{_?~1|tfSf7? zS}!b7yubT}Wn`g>DyO!8)UpFVVpJ;x+!wLQLXB}Q8%-E{ zSd8pJ`54s-M%nj0eH-+jSd7eVEA$ocQA9RSmX;-HH19Tk4PpGuVq^?mXR!i)($)X& zPD-_x#V9|u=y$y|&}`GLbH;t$=RQXmzpxmYTb5%~E7E8l|Iq=$_?5*d`hMD<1`J%8 zvq`BsZoYRMVHC3%c|+!6R4W*@>mCmyN7FtNM-xS;xsC@US0igZG;a&JmqYWT8sY@F z`-Se8ZfanuyVogdi+fAHYs%z^6t5A$9Q4-dU{L*(p{dY5Hcs+R9 zjh>chcEHtc96Y#!_Jls6bwL6!V^Sh}0r#K;@}siQ;^{iiwIN9#G=qkZgez%uK2=f3 zmFF)HCTYd?90oiPkuUVEl+f*fuHsgW{7KT$yRRB`9MFeMB7Z2z%eCFOTSbkexzHqP zUceJoA?%*iphnsTVt%+J0t3#*J0*GBgP+|*C9_Lv2kbA72G!7B(`*$KEQbEzz3?O* zOuubt@St0;R3~mUBH-y#HLRv=bU;#Cq(1v93HlE+K_j5T({2xLHflk-9nh5J3flgj ztboR%%tKObB3lFp<>ceK#@a=G^p$Q%&{_rFnl-mAFP@smX5z%KEOR6HW zS9c|KP2;=Yq7B&*lg$aaf_Un!qJwh~Ll#WE)zI&A>egu$h3e6t8*wk(R;C-Nj3LwR z_;teqvff8ctT%N@=Sf;cvJ(o;b0r*^dwQ%4K*Xu-m+=QAOSv+by7ij|gn~fw% zE_F>zYZ+A+a7!DBhS)Z;IXbdfIJ@{En$~3|(^}=$0z>T*+Q9sSTdcgqI~!6g4Ymw5 zHjp0Y@ijcq*>9A-8F+g-iTZ?@s4;7KqQW8Bf%1x9!JE)v3T`ETa?{+HRry z0juD0mp)EkvAdII)bOkEsFC`EG&TOT4ral{(D&blQ-=rHamX}8p)w5_D>`bWebh$- zX{|nGvR0EDj!P}#axB`m@pdRdsYBm?8}_2uBI5?R8|`#ZdV-a-r=9xhG1BkTCXyy4 z;W107B7@uMAu&s#GaDq0FGcmR1S-7X1_EQiRcD{Z{SrZahV}O4ZY~(14-er)Zr80X z&+dA@b_!=%Z(rH*tr*pcw{K9Z4#x@O9E(x-MZpXMM!@RcSU9U6Ad1V_xrCgS+vzV17}$l_Q$?ZdtsvKLs4tgOEx zxWr-_=8=!nfoxR6^wO7 zFAqq<1N($`)#`^eYgFI=!2G_St*ISxSLgV|j?wW^DXHG12jNf)bTiX!Nvkp^eYc-D z`m!9^1+y`%6^@uOXMCAM{lY6MMEE)K3U?Rk(hA6G2MEYx;jF%?daS#2J7M%^F$xC$ zhEc6xNN;_gmw*Qj2(P$z{ipxl_0qHT*f=m8UA6_g{;&Aat0)dUFuda4wSUn3^Ox#b zf*70e$s+_Yh=nNMnD5p@phT5T=63X|C$|wq1`AQJY`xnO#PqK=y+ja$S%|#zn;$Sh z1nSRWNuMH$9d8lD5Edf)^j{BHg1BwXxDEs{l!eGyeN0z19vkXFsixRCRew_4bo(2t z2;vhKqA2UEwW{&^A{HMah+!;5d7on)b&_zSkfTwZaK)zj5aVYq@YU}h&O+quI@8gT zB+e~uI+-9wun?Jle9}V?AqGk_wNx_Y$mkbdAxUJi5ZP15_OJx;=qs%*62wRyM8;BG zmWqgDie3AtUxx4ZsnaMHqG-ozYgsCHw=8T+3-{>CmBMfBvND%t>LT%?TMkrF!P0{F z+-vL4(@HU>a;5N(nla-CjA~`2I54|yB4LcxVU#SHZomjwOUM#N`hdr7A&hY>M#19m zF{%}e)Z9d0{`GhkBRBv2d;g(mp4QNi0Uj^x=E-7>Fwb$T77fjqh?g z{YDs{vKS@5Wnok+N;S6m`Y6Jf%wlAH`Fp7WBVb7+#CfW}WQ<5Tnn)N^Sd6@{OEIby zjH2e*KbGQwQ^Nz7t<<=+wNW$>_xD{P)>N>++_H+^2)KTP@Hq-sBKd@MooEY^mJK`( z=Ej)1DVGZ6G zWR5UBPnulrc>-#576~qCp15*wRBPGv zrahREsD?-g*CC3fog3Rh`GXpew4$O14Xz^N&bh7vuOiOa*5z%Yo?%MOORYk2NHo`= zBE7*}-@I9kwhc8vjQ}LBQo;$WQW%DjwY2bwPknchXPOysZjEVB<9(vOZ%}HkhT9&U z*N}%Hpi?{2B~f%)SBzGXAAD!=H76kI&%>J?3DDfe=SJcwI4*$Vx~~!_V=UR5nwFf< zH!Pf-PFxK~vRlZq*7^K3rznY3OTs03WK@o4eg2BFt)4uHRD4!o75BNFxZEBIy2QVO zO$M5$;&xQb*DYMYMv)XnKvIy3OX6($pyK)T>ASw7JxWes75BNR@Rb=8ex+>{;*Pk< zuZ5=J_NNbOPRWIW3NrQHaZCD)U}V15Tev(C(6ib7WzLb!pXq$Rm?(j6{~&p!`uod8 z?e3{V`^!1({xY+u2%}orU(RhjunEa-u9@uMoio>1svU$4T#$Yq!88`V1wx*vx_~64 zr@>Y2649Cf6|-5Jto`SSk4Mm+V_x7r2PG(Q@&T!IkSeUNs87es=`kvp$#+7G6@oyY+I9PG*m^`P!^o3M&mHS z+GCHqd~>rI1pS2>G@OMz?Z(23T^yg9s`v1ykIG)h za9{GMVaJes$6tJD*79tb{apk#CvGX=xQIp!BKlD2YTaO}e_onP)L)rVQ~5#D1C)n$ zap(dO>H*4j2Z4sJcPIuaqqPM#*Sg#xk8<|WmFsC@e{D9g=~sBv*u=q7hNR(wZ`&Q3 z4L3||>T7|c1SLYEFrChbd%h_>vX6}TLNg-{XDSaHRuc6GgJbhl-2O~<4RwuhLcw8K zM)Oi#J0_3%)H^pnOIwshCR-G8z4Df8^sGYyT4HW)16K^kt9~_8%4>af72#f;aWO#N=+y6mLRm z->`bfz4xH}Tf~i5K1ue*^ynK_9}N*vkW=zRTrCkuvmr8za_^6%>36{G+WRX$n&u8{T(MQ!0| zl9ZenL!&xVyCi#~V^jKu)xt}>!_mU`L3eNW#N=Mi=pNB=@zEXQy)Qi0H>?599x&v% zdl%8zQP=+ya7IE4JrOM(O{@N1a!LyQMm>0i@v$443dH|Kwy zX(`d2y)ReJs&EKa!Q8y2>ZecPxpmQ>)#*_qnmOwEK8k}=w60r$`pNir^?m;?F%1*E zj!$vEi^=^-w#?+D;_af)ztn?E;@lkto%wi2VK~$5UtRH~P;43Zd{Mev=k-wC zkLGTOr7gt@vn>UdR4z5n|9Ci;I+&}+_Cn4IMnoxEY%1W5g)f@DxUUNJKin*lsPoN4 z%`NW>T}Asulmrh6TC)56Pta()jnfevJ-H*%N{Jgs#Y!sdOL;VujH=$ zP;aIrG*P_#q25Z#s#CqZVCsz&!je~(AiX`8(Xda$clWKsI&&UQMYJowkKUN$saMu$ zy)DQ0kuNWadQ%Qe_43lMx8yQ8;wtT}Vb#OsIpNB>rhi3{ta4?2(Z3^FPcN?#Sg4F^ zga%gsYO8xo;Yc$e9RG*T5>OZU%BO}G1--iRss2sTw&U`f zGOT$rM0fd38P`1Al)U_%5x-^M1-z>sp;89e`mmph=t6CM%|k#JQj(c94_}5BwLoiL zU2>2+Nwotnd9~e6-+EYeO|!V-!HwfoB;AT9c%aA$&7T_bJj~##@UGV(t-WFWY`;bL zWo2AM_-ik}*1q~1^;bG zzG!<~L~UGsuKp9-WfqR4Z}$Ug;4!LpWPa{o*;f?A@W!D{lF5$f7<^3FFMr~RG#W8#P-#pk%n9_r0Cdm#-7*i z?7!)7x;DJxo`!$avaFLB)ru^hZLqKvVXR{@a@LLbPLF}MxOOENI$v~*C5-hfM!^r0 zFsc=dl;)-H5XJ@_#>^Q7Mhx3Z#63E&d?;aTWHHj0e}Pf0U__p}ALK7!r07WWKZ3LQLSKfXqTBn7=&wZ6 zv4zFRUH)l_HH@qewtr0+TUm^}%M86WN^h! z^PVD%T`We{S9>t36=~d=^W>j|QN&`D?HTjA0Ry?LMhr_+{Kn4SSfL{gmj(H{pG+;XN41l;o=#Rz#ve?(tHKKeTy z*T!^-PEJi}7w?TuPKZnB9F-iMiY^h?;cSQlEB?VBIB2LuRe+eLH_D$p1~(5jpe{!n zN7MMkloW4DyB^W;Y2GOMOW&~OF64KhAp{hofbxHzhU7iiaM_LOcR0m?7wkhE1s9BRXz(+E?^OB8hIK(+*8mY6}hrv&=0fyRfU zB{dm>;Jiuv8p_1@{vfZIF5$|KM;&)n`8EAl_J35FEa`u>GV&)-w2m@15fg<1vLjb~ zU-mZxjojXGTa}ShcrP1emgo;U*2pSMfZOqu<6iym^1u4^q!qrmjUqlsK19eArN0zM zfG=SNVE7VuIaE@eI>=tJp**c3#~#@jNKFuQ zWfWCu-?6*qP|WrxvzRSTNTxAcva@jI1&LErM3xt))T2p>1v8REkz5%(>xKtyLlqYzG;S&&xfv3JZD(`3Rb@-7=%+}~ zdrbsQr!StMMY^5BPp~3W&0|(+0kbns4dq5Uw+cTxV)Lq~%b8R?xBs`>LGq;kVj^g~ zD6XI-fogk&CTJTn=?sE~3msMw-yX@nsN2eDw|f5*Q|04;{?){jE@H*!X*V8>C7Cbj zpgSkq>j^^Cs3wS1QA)5vEP&1DR<#-3`|K2`^@Gv-WU&B#%2JOy?9BJ!REAJck&=H)A9=g zPEjmW{;R6qntRFf&-v`wZ)VKAK6wzdj8d~mk`D>mLvJNCL&hc<0a}v;fx5b)ahxtV zrU~`Z_uoUxz28i^aUSJOX4;^F= zum{=f{W)d&gRF#8hw&h5$;orgM`a`EAp1LekS!ScHAb~^kUco+)eeMlkj2Q&K5)T^ z5hyzcx>kK()jjU!y$RzGi&0ciiczg#{1Vyn2w{}482RH@jnLPrL81HrZUsvi^6y2x z2;&bHBX`;+jA{krv(?l7O&EuD7#C+xG+@}*HZ7;u6aONNBP>Sth}9U?3P$hP6P*a- zD2q{iY~@S?hJ9_*Ru6r72w@y!F-p#^$Ea2?)=jzb9SYQxRt9Q5^F_O|=ghrZ7pRc~ zc80Q~Q$N1;({T#a9Ip)2_yC3p*Jg0(JSY1aWZ3S2GCW6bq5D;=4|kAnLxe z*Vi!Zbmgq5knG7-{q_EI>@4iEQO#X%cA{BvrgBzPjJjnbMzu04-f8IZ1!>N*7}@EU zh8r;8zB6i+rD;(%Jf|*UoMSOE4rPq6hH=eHFYF~7RnB6RpIc<8kb(^30RD7KI#rfB z;t9ey&tjAezNF(Ld4?ZD`3oJ%n_S(gIX(-rE8>L|hZ5Y)G zM(38_`1<%}#C%pRKb`ku>_T7#TZ1 z$Ea2?-b}jvS;FYgVw4PDk*?pOEA}-`dn|cOOTrkyVw4N0AUPdF*4`x z7;3<qAoVYWqF-dxqiKHc7KRaSZCo$9+;4(wC z`I7d96+?4(8YGP{wi|vT1<`0!q-Uv>IlnZYP5#c&bpjTUL%sl~>Jq3`QAH)5(Bd|> z-=2k_1#~QxC0|6Fbd_;zdq$=_d6q;y#!S@I(9Z0pG(l6mI!H;H;FCIZ^)`qaeQ2PC z6f#8&oGojjsfT8**+`-uYbI)5M?0cMK?x1_Dnf~2z!4p)JKqS6GHU2k3Fo$m8?>yb zwSik-DL_T6SnCZp5?q#+_WiTl!3B`vtzUq-HOO3+!R&D6z15nvga1b%UJUlyaamWS&39F8m)&>M16Zni?l_ zsVNIKNYS-mCl0iR4O!L(ONM$TD2s?g2O{q}OQw!!5T8ZRQ%#|fUdvgs(9zb{=a08! z5j1aB%QYZr6^acxqwf{+gLSi7+t#?e2U)UgGfT#6XNP;kNR7jhgTGD(Gv8_r2pZNK zNztf6iz|oBE)+F+&W2RJcOe85vkhfG4f7fKJ#I)8TV$lav&ceT z*G;ixx~H3&Za7hS^BIe{ss?GwNy`y$!Gfg;*@zhhjg%==9HTO?@CdNf;=nt98bMZU zhM5(^5rrpcg%X?jTN6rK=0T$$uHDdZxw&zWMqLX5ox>&FjFvvX^QjL=)HBUQjZTML zQASXJ;mY`g7K|n>TdWX@hSU zbs~+vU93jc_q5;JfAw5)QO{z1yZOT>U{otE>gW@W-h`3EVr0$vVU#|n2Q~8oxTqnS z>SOd5lphIWHj7bkelte3g5jC+#&*J(!(tSDHfpi~!@j14JMMh9lrZM97=@qD$Ea2? z;)h50+SJTrF|yW8oo&RhEp6{`t>hxYn9pLA^j7$`1d6UOl8;^q;?0v4la za3)5zg7K%atsP-}#bV^o9JRxMVPEgX^D8#=C5*3GjN<8&F{%}e;p?ZhNx%aa)~RUr zH_ex}m;UF77P@3H70Z=P!k9d?#ePZ_TU4i_*`I&Voaqx=Sc3Sv%k}dJ;u{tsbLr-1 z^bm0E*#t3h^M?--#9|gAz_c()scX(!cc(dQo!#!40=XYYE9 zYQ@Z+{ve_`Nn;g@kvZ(EB?b&!F*WLxrG4#f`90SX#%dO${M0gxY6WBWi#I+(7~iuP zd4HwVpJ;_&uw|hMHp*XjLdHa7aB0oV8SR3OFOA9 z_dotOVf?7W_-iyqwSsY~QFq_=ynw~XnzrzO0V7~nQcFrTyVhLa+*r$E6r5O&QLSJ+ zwYlLSlEyj~BX9O!Lk&a{+vDV6`D@(Oo!yQKJJ`X}nWWE4j5COeaQwfAW4 zBRiyclVA52ifAACWI~U~xP(}5&qzgzj7y2^jDnx3-sH&CF3}124{u~bTKA6ldt#?Z zs?zDDx)AtLC!T^r6BWPe!hfR?c66-M5gGZO|IsN<`X6_9YJFvd-rj^BQNF=G{KrG} zPVvTfa^g!wT^{~JcJM1tjl3f=z)xTPZmru`DD)M-f3ifgV}nku4(^0NtfJ1ut}+6vrd!~#&#B?e8O&wY6YX|ppknBV+V^-Fl)pSBSxTo zx}^p1{Kffq6UI&!qww5ZjA{iV{)^`yr>Ms+lc)!!fASo|@Rm!sbI)^;28#d)^aHkL zp`f-i(g*35I0`_ZrVr~3SGt|L?$uH-7nwN2J#dTi{KC|_(G|om43ohP>1pIRwj0`m zi^B?f%}6dJ6SB~rIOn>f#{s?D#2HRbR30=EnNYTce`%VScp(7PezBh#(GCq&?lr1+ zjC9tjc_?nR`OXKL67?Q4YHCf&iQb}2vE&XC?IHE$Q#0r@mO`utoo0|c!A@$y)&AgrZ=*^5 zv)QDEKbOa>QN*CKB{sGiZ2!xzN5erqBYo z_E&U4RbtF!G_G(VSdR!C3oT@85kC|3FJ{oZcC1KF(2yewI~kIsZEu680U9M&Fr#G^ zRq$ojkO|^bEnBrD=wD5s$+c`QX=L;wmWg5-Awi=|v+Z^asb#3IgIGPTBCrLEkW92T z>d`L=y4VaFu4o=KO2k7KLQ`)PRJR>krlcUWJj5;nx%sSm%P9}P>K%M{ zVteAdN~H?Xe$|EkU~VJ_v!eE#Zrx@c>Gys!QNuUR6EzALV&fCq?i=l0Y}aqBRA`4r z{mTH(6y$==uO4;a3xYmi1`Ss_7aE-{+(}Nn43R2v(I$oMY=Mq_QxnxU z3Ho<4XgJ<^(CBq2((=!PCjI8^$W+@ev!Hnma(8LlEhAf7mwzL=CN*e&A3+~9gNB=) z3yrqqA`&ur&_T~xHDJDRArDuJBsV(7lde`haft3`4&)>d^dS>ya_aM-sZX_nSIL78 zcA`dW5!>gqitml0nKIJA1eHBpio(WB_hEuAF@r`3fD0|+xE|`gJSB8XW;k)ENEF5G zIsy6bx>N!(`2Z@^Y;=t|CgXkz7fWdh^bf0(yejFKRS*M9D)1>F=nB z%<8u^|0kDk%{pu*YHCevnlh?5abl$sq&)Qwwos$+t^LrrWQ>w?=r%66aWR&KmTx2h(r2 z{VO&0JAy2r0y(aeyEL{)svxSh2JM|nqAm?AYM&#XXTMQ$Rtxn=jjl>ITQan15|QaI zVRM3&OTRtQN;|e5mk8+NIuCyT;X`%lXIS|HrFX#eRzDeV|DtE($%9|Udhj!5W?@t- z9{hwK$9E@;6D&sF{@wWo43xsRDW2k9@pL+2oMbWb_m*H(D;V29I^2jbPO%u}gIBIM zVA$70H+$SwJqY78i;=l+Jw~;Hv9aYs%V{k8Y(hOYe+xI+Y)kjuzb*Yu%HJxl%-`}4TDW0OJxdU- z=Nhdci1RE&asQ>w^bi3XU%^gSe|~REpRk1>F0c?~3x8;43F4`)d#@*mKUs*pH%9-I7EQmsp70 z;<17yhzB0{%$FkjHw%%wJmYCS1a5&F?W`q9d^6$g_5_h$w<6EiKPQR~j(XY>MEi)H z^+=ui)U8NX_CGJHXy2=P2*ezW5IA~Pe@?9E-t!TH=*vRnryqUQ62$m!g^LNI9}7`( zD*atOM8LLPmLQ&tn&O+-{aJ|OxkKNz1d%pz=QLU=2Gp%6dgqr!#>M?vx|}WCa0-+_ zU`e0;ub|))TJQ$ettfiuA2t2kqZrl7N-<Jv0D^gNzy%8hSvXNDRC{RUK-Q>-sedjvohdR^5Pg3VmJ%o|gqB72t&Dhx>wF zJ-7cpvWDoZhMq$b+E*ZPgAIsC?xC*P4-@qSGio>`c+?2Apfz9cfC$e`j;>esJD1R; zPjf>D;Vp;rn3c6~-|+QirJ$ZzH(<>^@_}>1K*;t%XpJvx(r=!}*oM*yx+uPe%p6HV zJ1rCr@n~qg$0)dX^~zes1U<sh5R8SO+AR-Lj++MJkvCLq_K58a} zs|M3dLbHY*dg!5t9-4$6dg!5t9;AgHdg!6Q-*u8e_9^>J_C5Fif1dZ{_xv33mVG~G z?Yh?5-=(UX1kYaiE%BB4K=rS2Lu{{J{Qt*pq4@vIGNt{=TQ=ls@&dcMmbMsP`_gL$En?XoD|F2qvMm{szgJ(IPnI65 zWod48KuFjkX%i{fy41X7+U|dhQzw)4wwum%Ue%=Cg zZ=Rt80gZl*ve5|RBMS?g z&X`5(-X-Wsn$Wz<4{oGtAU9lR^47Do0aYLsrwuv~U}J)QO8>?}y9R&#;nI9Sr&XwV zD#-Mxsmc$LF)dJx9JhO8QU2Ls<7#C?zZSPhko&^z-n6mi9xSc&Sp2)&^K?l8{F+t^ zrV$%d`!J2+*n#<*#deRcwqz{Xda{DJ*dXVv^$xiu{*KBVaJ2UDvcwDe>VXPP##s98dr|oP%nq5H7dNydbsy5>#Q0v zpq|RQ`clSq?Ilkpz@luFRz&Kd1gSjZG}hIZzbF~4in#h-nf*X38mQAX1}eEJxyEy0 zyW!$*IP`j~)V~ZLsMzM?dMWN6LUYl}o)=w~9uKctO4j>}wO+xkHt042&_Q+gvD3D*x;vNy}g+i;>># zHd+79(-+VYDiO@!0W4zaWe`EJp6} ztvLz|mzuhxP^aqO=d106F_*>2oVX9IieO|k`FdY8zHnY~1b@7gUzD^u%Oi{6!;`Mk zr-Ue;A6o5MN<)}m9KkPaG`s&+j}S!mkST)+B7=oUyOGmO4xw5@P-tM(Z;?@*AQrF? zx%o$$2|+YV7=N1}7Rn&59{xfOfg5?sVIgFRGQVDrB8WvSME=5aUkE|G`Q4(=2x2h{ zk-8?SjT{2W6iNuJ9;KgY@4mdIK0z#DAyOv{Xd?tsx3wvbhW66ppP%T&|)T5@h9e$oL zma`byJ7=L)5sV3YH`gJI6)Z;L!Gm)Y7^(&9gfI?t$(EF%RS+d4D8OGl6 zXjKFw>&(i|gt4B*NZh?=q5?xz)L5vPJi2U79)4?QM4+8u`p?@lu5CX#YpXS zVvzy^HR%kp#?!lg`JFH}u^3r%E}&HrjNck`lVmD4vlzM8hGr`;j8g+{T>r;Lgt3Li z$e%PGt%_j$`0`sr1A2#sPYTsQxKTn6 zYV7!%&0|3QO{Gr?X-06jAgBPH3O5@kblLkY`a;r?+Nnj2!!xgcQ|&{9T)AFXN@C)f z8e%ENJ6#b|K++baZy`=E>)?(SL(?{qt#@fr3Xsy)klFFV z&Q|!DQ5c6E*3NyWT}O#}w+1zx&9$ka!iHO=xO2$0wH~zrI#Nn%RP@5=bmA%`?k%!j zSC^~oW3ji$U74fduB3t^Txu6Gw{)uFP!H$L;5M8832Q3Y$j(DuU`UZ*A0TsAx-;JR zZWhhqdo<>7+@a*S*kNpxU_mKBzl?1_%_la7M02>)P1;8&9EpM{vL5bv>`))+4*%a; zJsig7*&1gJAo;SF4A^y}+ZkdTJ$#>552w;knh9K_N(SvY|K@PJ z{|PQC&{WhK_COc~VJ;Thy7uadtu%-4*XrR^`AIWXk5tLL(EQ*hGwIFY1`KNzYUJIb zmqS2|irBH#wv;7RQ>lj^(CXpb(oY^_Kibg6`Kb>?&6~p^+82Ob?LrN}taMxomP zEe%T2V6#%)j-~azuTxjg)#&QvQsL5LUy9z&rPu4~0h!gjF1s6vX4s%Xg@#?i+|Ci( zt8JxO?U2T-2FV@=_LZj4gor3#d_q_hk|U~H-4IyBT>_+5y6Aawne{5W;k;mYN5{}X zHldX!HYoU7WYDhNXP%OTFAr-$!(YQQwF|F1kVd1It;k!q4HsmA2BO)5(!v}&I zHxs#gJpwV4oSc4kbxDeh!o+j2Y*|7E`wg{D! zA&#<+q10^`Mk^dcNTg7@>G1ufcMQ$maJC#dhK{k0p_~mj(W;1JsQIhiB^CPPGK>q0 z*C{X{)uP0}AS*q_ykGy2N~oP+G1AAZMXMqhYtOs)MB};S6^Gg;NRiu=4%g(4cqmb; zc(8?xQFp|@ex^{{$>LC3VWSy$JBaxjA1L#-oghxJ5ZSXgRh7jHu@g4R)p+cUjNb_2 zGz(F%cXw4GOI%pmZx}(GVIfjGE^^8tRO{g&?YQ*)sYPtJZwcZo3z4{DwNnV<=J3f; z1aXdqNKBvdnhXLv9u-Rn#XJ`d8f+$r^D>COb6*pJsQ6a7ZwMlvg~;oW8ZU=Xt-c~O zvMWEZVK6~lU?GyHOo2`dtp8TI)k-VDIm<_7#G-#6lFDz4E&d zL|jlacMQJoa`7ZvI8kKvJ~~o9J)z3EQPb1Gsm;Hqwc<+gBwI8m-a3a?MJCx7(^78| z##I(0_te_$3JerEHfXIlwfA2=3F8`zk+pd{S{1<<6Y{AP?ktdDTv&8bj^TlBszDe% z|NHp|gmIn4$iB25t%_iDI5%?>S>pzaQIOs%NgiOsoeQI^QLEi&(+T4yi;;h92wD}v zc;xw3QsI?bEJl8ZaU&HNI2;&cjdr*8g%HMV79(%=474hO@vU?FJ7kSJEJogp+lr{I z2g&0GVSKgW{6NCE%VOkYc3UilVQsSXBw-|0ERugIoHKHAG8Iu<6mM6thS19Q(Hk{} z5=MuLMe;92tk=cEfA{79kN|mu8)Tx~H(xjxN?kq;$ z%*}$S0RQMyFqtrVuoyWv(iI8dh(Z~4aD6MKdIPdXPZlF%;Ud8V@R!b~93{6yuZrsT zY$zLp=WD@z6I~f^7>w^rg|w>CK@zJOsTh)wH{2~&o6LpmO^ID41xb2qVZ+zJ#dcFl z1K*pdiyZ)WBCtu%8cH2rkBxlXB~v*(mJJoE96Aqc_o=8ZQsRPdf`<*IY%jd$x=tpZ zXU$2$BV!|CRzz=bt%?+Ir3*JKwRO(H-=CyD-dD@m+@b`ym2ZJMzs?mEU1Pff(Z>_9k=9k|&K?P-WS zdvVjBHMV{H?FWYtbw4d?s!O052ZYEECJU5p;X%`tZk`(zBP0M_96cNw6i}~#$6TiI z>~S3VX3HY#;{7$cIQdt2wx%Kvb{&^FLGiG0D?Jd{ICOd8Ge_AQ)Zk!^?X>;p=|N=d z0UE|e%$JL8bJ%b+pT_<8f&fcwDmps&@8$|^1ox;Z3iT(Zbw1fB3qx#L=Q z4tMx3=Afj;Z*CK;&pjsUEC3Nb>uRd!bfNs8=f=XsD^p~t%ke}sqVsM zKygR;6mFvgU=$KUS~W>{u=(>|XT+BGHqahrq{bcvwGlZ7Z0P+UtrKO(bg{XU89f*O zPMY$BQ&1!E-)nOqD-HTIY!7!0xqc#wOg&1=)Y!jsO^sAm9cH}_j-+hAp;b(c3Zxdh z#bt-+lx#cdb-%K~+JdM@YfzIHk2kJ8$mfJkonNL7!0Fls50aInULA08S>3W>?OB<( z)LXS2WaKI~?OLg3l}Z^>rbO|NjDLp4`r^W+?<$Y_Z(euti&oi- zsO@RB7Qbkdz38->9k${Z?eZ5fG2Uy_A$!r~FuPsFpLEJzbUIPHx%fqw>_zON9QNWD z-Le;5_~bnlkNia&ve$|~>6N|c#0=r7^Q5UJJt3%qvX2OLfblqsUzI;2{3Or)&&nT? z-DCE;?tNDNl#mt%oZ@emKPHICNU(*!O7@(}*==U`y^qSDlMOSA?cS^M2ZfK~Srj)b ze^RJdjK^5~s{B#m7Lo1VH{0Zo$_AOcd!I#4EBhevs>56S&G?Dzt7z9l=96%3vPV@8 z55rlfwyAn-TueB9(46pNoG+|pe8-?N_&p6wwWSxLd{HrR?ag6r!XhHWT15KZe}je# zlCep9$y?_kQ-#8JbPOuzL8Y}ibxk!&el9LPo<30#ha8L9OWoz+k}uyIHD%!<1joZI zBjSHDe`9VPANGUqqv9b|4ry<$I**&Gm3;Lbd~F46;HaO~G*y&diipO4gJ&3OpZIT; zr2iJv8UwsM`h~}pvh%IK-p52ZredpFm4a$kt+b&qd#cx8Bg@t)*ZiaAO-rAW^IV8| zIXV2t7CZb4zccmZ&{0V73NHN@_E`RRr)K!VaTQxVTH;5hbsmK;EQ{@S>A&*T(-$kz zZyeA5#{4O>={K56|AjXb7fV-=Cltq43Zav;a;M6(>Ab3iOwe~q-|DOrXQ~j!L>43Q zU>;f(!SDr-@AYDN2)9+NIoG73VMcKw&4tn9#^A!*fRtdrwtfaAvYJhgg#Q>8mk=Kk z=?jaCj)?vtG%hT`*D>f(oUIUL!T;kAsbzW)7IB-Jm}(Wh-xnJZAJftoTBz`hp4hb@@aT{a5g;8;r%ao89&G1V-3 zYivwpM0oq6_dfap{SI7T#XcE709Bdc*K?VkHB~EmHx|ygnArAc9#gOdicXMrLfSE$ z$5zt|rbmk26%*MqG%Oa=N4wCUB0B~>LSKryk&ypJ1}irAAhnqqo2nPRy~JCk=Rol$ zx(8@=;}B#)B*j$Q^ia`z!y_YNzmE$Ghb}!GN3EvEi+@?W*vPPmXkW|FHeqoQVbS4u zsz8T0Gm_P@)3iEp>VcEe^oYq+^h`02(1atE5o}_FnGKlAc;4(=)QqX#5n53Fn4{I6PO*gQ)57?ej)o&wWhtM z`X!o<*L->^1L$p26ZuE^KfQ3gz{0Sz1D+@pG$1Rh`~=mXuksTbn(BtPj*E*3Zynh> zDkMB6DmK!WfDTsF8!?{C+oiJevZI9qm#|$Za_+`$7hJ}}<6~R~-ZDQvCu!uRu?s=Q z&`!G*3jtzu%5)$s)umquOmWuov=B^Y7lQO1z0j)2LNMpI&+K$Unxb_=;>j=5LO{nt zT$Au`A@Kipn(Q=8*7HD|7J&-XM`vT~xUY7+)&or_=X7etqH1It3uVBLt$kdUa|-!W zl{^n2+l|Xf+e|s9(<&B~R4Z&WWze?Ag-kcnvoV4orn3;a^LM=_^l@f`q!$;cBn!B8XWmMC!~VUkgDjm^~&x2H!WkctR`cp8JBjHK&R(5eW= zp1m2_gt36dNFLWcSBaq}t|K%wc7IUy2w7txi;>%JAX*i{IQwQJDQR{Qi;>oI$Y}+J zaV5OoUjM~n!dT2=#RY_+uvd>MsPM)J^exC)k8CZuIpLRd&x8z0URA@9HbNwbjV z?IYp9X&oODU;Hn{JHoJ#*vL5g|IoH^I5hd<%(3m8y1h+5$aY2vjkN!dSs#WasVbsldRMQRS2?gwe2jcn`u@$zr6Z9YU)jqa!Qs$`ry_ z#bOi;7?Y~NK)Q`V7%zT2r7>ZwW-(I7%t5On7&T5zkZx41VKH*<_L!ryw`iAdC$xM(&K2XjKGbOVakigt3vuNZg!! zQi0*Ns?1bE80&LdZzhaQEJi_>;b>I^qf3M7cEZ@qViYXS93k^Pz!ruHNO~5YI!qW_ zWEg$7pj8o!giA}FA&hJmBWq^2SqcnQA#0(r(J-{uUVCvK1J1wd)o_d)!qkWx<}eX9WKVSxj`fU9SDhGA&{aPKIL`}8QP>C5D zw@y43JyY{C6P={9=3i!_a#$nJ*kYw@Oe@k*Wowsb)a7MIXf`{b)xkZSd!9t?#-$^F z>!}$pa*Ga*D#l0_LIM!(FySgsi2x$92KH5W=4&$cK`mqRN{bOT?pKgNnLoz%8h$N` zs3GuX_qdS#j_OqG9B#F|+Ahsb#?I9+Hu)WST^!XWaTSM8P2-xobRkdC_&)BnU_QgO z5!i(aGM6;A)q3QQGPTYD`;dChvE&!y&1Tr1L8FXoYrWYlAU)iR`~obisNsxM5OzAV z+9s{OTtK!ytYvF>(YUro%8FNKQ>tfcn-tqOdOkza4Q4XrNYV1eq*~)D*YKb7AEAML zL}Os1rW|)K4k1jG0oU*1yjk28h=GlAa}J#RP=pr+P-Q*b@z1*MWHR8yx0{Qt*T+#o z)$sWYZ6fywb-Nu-FZL9&q3v2=>Mu#29@pyQJT-p&T-=vMWn=wKDX)(kul|XwVH$ZJ zCvF_$tiVc@WBamh?B^rN*eA4%4WA*;*eEKCyfyyF*4>I5e@zJ3xD|!Vy;Xz|L^SiH!`$iv1_7Fz-R<0jA8cK;_d`Cc5U>9{C^3m(w6g`hJF?ae+s zU>n)`q=v1L7o~lF8$H~M98p}b^KW2V{il|1#*uA~vj+B*7_+4hHN{O$ZASS!XNvQ83LDLxzPqN7<1?}8hJyrgmW9YaxS^pO zLbZyY5X6f$Tn`b%ITj*&^sa_N5WAl1v6&#wvk=J($Gs$nP%VCk3B2@{82RX?B?OVr zLgWoz{*n+xv#BT7#o+rc6noMOEt`IPSSNX!1!*ZLBd|iISpWBh2gs9tvDlMd*lO~$ zacEVZ;&-6E&uHa!nn#}Q=2aAH;nyks z(3tR6i+>SD0gI7(Zl<6#=L=;Y>_8aTS&YA8MjWTl<^&)HBVlfJGd!khljK@Cw;AO(N&0-{G z^%# zBWpqT(_$F^=$0Z?G3d@>ByU`NSApR%Y)@6Cab{b>=)q!SpIL)eMXd4hkRd0?8a-Kz zyt^ZM$dfGKUsOpz658Ol`SOBPh_@Guk+p0RS{1>ld!_zggwdPD$lZBspb|q(3t6ZJ zz^4a0t|DvnVKI{KbR8syVcGNJT*BzfVkGYBn<~e^23KXm6~Z{NF63jvNM) zDKOx>P+|y;jYF^YSwt8ES&Zb2K4?`0HXFRCKqju+h{BJ028*@U%|Irs(^SO6t*fR4LQe`y%fbU3*o_ZaN;(|6@XwIr6~CmkTpQr zO$kj7PF!S#x|`kMWufhspTF5dZqA{VR7IAZHeN|a67(UpJR3iLfsGJ9Y-2S(9t$xP zlCpzhBXnrtx@WN+W8;0D$kxNOY)#b(HM2_~#flo2I@Hv^dBG|uk@+39QQ8{uaa3hQ z02(!vWo}OAw|i^UIS=UJl~g5`o#c$t_T!UA5Nf*Vq(+jhd4Bu=BoaZL6S;DTKSTV= zf^%E*+!Ng zsby(6x_FkxvxK^W{;1|zIso~BP_jYr+yglSY>puez*2kK#VlM(bJ-}3xs3c`Txtiz zeUP5So4_eMiZ`mEZ0`4{R)%aKZAOxF4ku(h*g}kUoA+k<+X;j{S_>O~G%hw|*}XdA zoVt^l-T%o<1sic#qz58njuJu{Yuhaq2PdqcKAx)4$I0);!-jN=6JN`eT_ktwm_Ig-|i z8`85-ZD2QMc3Ztl^`BtrTxYiRqE zX*r8&RFBgb)yRtHc5leq;<@neG3af~q{5(nT3SA;LG3`M9Qrou!6W5|#diEQ`LQ`< z>+xE)Mh#xxYK`rx+s)4+(i_we4fMBrt3bQ#5Ve6?JQfI~FP1!c*qu#Zdi*d=WfQcf zGO7Z>*^OeSi`}H|nC7ipMqF7@QUe*+Z7dLpak+6EWT;&gQop@JJ$#~859bwuz|n&W z={cPI94O06LMla9@MIQ=X@0X1mwy!FdZ62e@6F)?(g<)a$=PK}iky zRi_;(C2msQm2FYH_DkiTBU4Y-GBw5pm)hY)X>dL@?Yp?t{%=v_r%cGtV(~#ebPI%F zWZO}PWn1luJ7`!>(HPd`N##OgVSsyyYife#&1T5B55OFb^is^g}lRu-pb{nH~63 z2;Gqp4UgZ6`vFu;?KU2H;yotvKZ+zI-M7@#`rd0 zOZq&^Sd5hPe%BQkC?a4`r@Zf$Vn(V-<^$ zcXgN7w68qg)yiG4` zG(YQh6(NXE6YZe{v5AGqnm)dk0>ZE)_RsV74X3llW)>p1$Mjl45Kp}KcV}`9Y|(NJ zAR$K6H9*(>;6cT4k9%Cy_4uJ+iSdVI2TBIuZlBAINSZ7H?Qk4i^XmEYz|Phk%ky<@dtd6|J2ZMY9c{VPm@BB5JJ01u-Y?#`HlW(A3N$L4 zc&K6soSjy-hkMWbI^#A$@6>|EK^VW-6q*nbh58?1QL!C^s-yG>s)AUY=;6rI(Yg1n zJA(&cE<>BB!b=`)FC?PaVh>0Vx^tu5KpNJ&G={Ys$7Y_TQ3KrrK`XAM_4+qzdHCJG z<=$OC>~zv>WnjutjgOb=ZgSsS=t%^%nL);4|;2eyk8x1=l!l!alj?R}QE zkpzkMXiefc#&fNW7#_j|x}BUiouNFQ(VH_+BOXb$myoCm;TD$KF|KvF7PLA0Ta%ij zTLNPvBaLb>=~DBSE(93-#+J|GcGQTpAv_L^9VjTWv$)gwbN#kK)W!E|b#XWec(z8? zFUnu>Cp6k;^Qdj8+Go76Z4h9B3>x%WT=Kj~r|Y*vw>)I*eHz9lrveund2()OCh%5l z-92eQs9_Asld~a-S0+B?boc%KN;O)t_iHTK$PeV6KOmBc+O7IsoI9}nU%Zq(99k4k ztXnP**k!YsXKs4Ebb>yh1r3h{&(x66#I)hxY-T`hV-;xVFQMiVGIVj)WG6Fk|B5|4psjr#P@X@p$0ZM@kV=`AVL;T$ar*+}s!q z>|EA4Qm}YtW4Ut#`fN%+6_85r{QUKy->;H$JH8(*z zk!w2$Xp!%MXFI*QDIha4wwEXo2Kf~`3aK%*ijk(#qiVJBWV$1)=~CBCdrfXSY+RJ4 z6N>39`7kSpOm~zuUG}KCuL+s%t*-gY2;vwEk$v;ZzvK{@Q_7~@??kb;x6osme11nac?2EzER1qmw@Ii!lzi`b#ope+R&y5bN2?;< z-liYdpHCPkS&W=D+b1Y6RO?1yjFf&dux1{MBaBllM$+H|XjKH`tL3LM2;(%1k=wUm zrUFB?ZiEnqcWOgPeB}&_k+!?zEHR8a1OEJyFwU|VY1gMD%c6vEf~q;+g)p*rjMzrj zILBfn=gvi|BG!oR_NY|;;yjCyIKAg^C5D>&9sB#zcdF(S-v0+-^s@n3JleP3qoV#aAJ-$#IMRQ_U}cj zA{b$lF2#~HuCW-YYp)+yV5q9633aMPow^SqYZR~;nX5aW5W|>W@3~(I<2s9xIH}`B z1%}J8B$?%5@6IKR8!Se_fnI1;#2Px7;3a7@a)-sp%+EusBG%|pbzE=4 zxXWT>k2x+c4Fe%ckIIBAv~Fy8yZ^gnjl{}D!bF98s+5!Yf~8?5M?W)?_O~4>7fBjq z7e>(6oUhF8Zy|JUm}trFyiHHi{s7Z?3~J!1ZfDe;?n>KO5uWa3gIf=xDsR5)Lc` z7;(`FT@J2u#;sw$zSk`|)ulOEX@G`XV+@7YlhF0bOGUK&8NLhS$!6=)Rk- z0YWA8_S8kr%u2M2WqWJ0ltF)nZGtbz{I!+WduaD-#=5l#RT~^rK|h)^9dOw;^Nq)iO4TW^u6HaFQcqf@^KPfgOO{7*Hb%3acs!$IEZQxoyMp zy-$;|leLV^lg+})g_sWFkNlXEZXdS?!q`?QgrF!J?lq$~vRhKO{g;yq29mM+X&D<% z0^sryBx?7L5T`G>e{%FF2Qa|-Htl1&TafseMQ&^7ib})2q|Mm?trZ(C2p%<+hC(eYE;V&v-iqxte#Hj0 z*MkC1HVB3xH-=fU-L4+4mk*Jt2Wps_92Q(?Y;nJCNdslx4dMx?Zv^ zsoT@K$@w2*Ks~r}k@9q*uO)TO-p;aU6STL~E;?eSDt&z8%h5x&lAmfwv7f52)%=A= z(W;2MEj8w7cfv?vF_LqJ^;KY44J+jFYwnFa!Whb8WON#jRz)z@yaOWQSFFE}t+)uox**5A9G`0|H1&A1>zk(s!yMzPF^< z-$)iC`N(OsDq@X3ciT#rxkj-VNyF#JZx1>Wij#>^byp^zdL*NLxeGn#mHViYLqR(?5}^DNwL^@#j)6}Qb0L-<#>~9*+9awN}m!M8p{%1?N71T z`Ngr=!bWqiPcsQYwB9!BPlCu`Are!DKP87y6;2Y`-tHS2-Pv=kYQP=P`L3c90oMx7x}V~7{^>6s?GRvx(qPa~zL=db z@;sPVXt)z?aD4Fkx5ME=m0teUD{_bP`?vqtN+&8rA#uab3UnwrvzE4>7+UuY8r3Vc zMm0~_7eQKNtyqvVpxeFq)Z|p{00};*xf?5+X7ht8_ZI2SbHsB)p(=md!q(6`LlS~UL=T6$`=otW@zp<#Q^CC`Ficd z`2d%Jsmh%Tm+IJ5Jg@!JmKDwt_9jhil-STrQ?j8x4K}Shxu&|<0k|X%Y#XB6R3{1g zMY3&**D-GO#Y`I6n>9u@oz-|tmzPq3{5x{ljNhdI8k%_6He$L(@+G?y_d1)N7=4tW zw`f7by{2tzoGffsDdmgT#dSxv0Xb>%dO=R)GoaoZw&BRBmO;B*zXX3k9XwmBgL9SV z0F8AE^(MIETKCXp0JL&kqidt!8|3z|Ig=F@@wz`B`bAm7-l~NS2OiJXP@zG>8-F^} z#kK^Zi#t%g65S9Z6gNa<*7l?=`|kH>U~glce>qdSG?#lY-D<^e(S1uFpP2lr`vvlY zZD;*psRg~83wbbG*G}j`5Ib0itnNF%l|!hi*JHjdJ;aHd$$12^lZD7VzW-Yxh*uZ1 z`h_5Nu@HH?`~N0`!0c>POnc+7tz`*fHw%%IH{v%Th^}*;RVc!eQygI_j1r{p96ef| zC*f4pd>3*iKeeyN42rPqDUPrd#U`etp;eIx%g=wmDk&-d&0-X6xid+DVOL9q66#Ui zUfKQ%VeDlwk`lY8iDBGYzBZDqv5&<_o3;>w1Z;uoK_?yot`P9%XMn-cO+PO2)%oTa>0voOy6rDx!KCHxy8xUuP{~WA!k~~AXTE->~ zSj{^hkgJ0)S*IMQ-u4=jbb+Au;8F&52`Gny3r;1C?XXynoyiP%VG>M^dOpQGaDVOm&xE^yr zj?EDHg~tkh5`=MVjAsbh`=P0QI?}K{qA{$Ucv?J5bKOJ7Vy@npB z#e*BjR(Qo&kuemFe{g!Y+op05jjG(VbETS9D(#X|7Sa~ZiK_FsscN(@p>0guPv&@E zbVA3VO1RR3O1)-B$(Q1M;cf7e6`NV;Y&q&YXet{X+qz>=c~n2Oy6V(1)xclP7Bl`C z8taP-m%gc-(`I(nd7|(&EBjh`CrV#ii(jSLII$zp}(P%O8{7 zV|LvCX4!KpZ*`k(o_pV{cv5b&_x^8|KPudfu{(=DiwfLKU!sm?yZ@@}QI$tD!F{h? z^ibJn(VblPzFGdP5JGp`>eMz>kBy57r%##_evI>lwT$l=R0bWbfvL9iLXZ(kC-XPv*70FK_&&Nfq^vFssmD##O1}CIzPAD@SJES>X{so_ z6cLU8hPet30sOa0(tnF-jp1D$)vj&!>Yvs6>V3?E$11m~RVk=u)k>G786juO%w1*b zlxzM`^QNUglAWv3YCr8fj~DMe3%@fdc{#o_D7f@rc<{}6u4eec6O~&%TH;6M_gRH6 zENk})92;v_%l(6XV;=h()B0?r-)JiRZ_IpPgH%fBWbr1y5IXZ(lDr}k>fjodn`eD$ z%WT3p#bRX4?T1!HFrqKVze{J8(=0|}&v6CvEMlwxMx9k&Jn~lrVVq$x3bLl6RS}E@ zFSlMv7-wY|7gKu4Qw(v#T_q(<=*TrOvAUFEc#g$LUp@}4ieOZ8x_|B-6jbSa?Mq-9t_SV&kKUtCmJ$osE<(k!HT`$$Ylt>Z)Di~l9ObzEFTcQ1qZh+ z{OWi!{mb*;*Sp`sufFIup;BYN|IbbR8t;qz-i#j+dawE4*~agGYjA^Lm3QC!ZS@)n z4DvNyJ)5Xo<+I(YU~R!*MYlrwhe0R#M=L};O{3$2yjyh~BG#??m`>IqjEgKr+O6K1 zatvHCG-`CTIy5heFfOqe8M6kXRguxrVQ|cGa@AcfcGX>$)~?Kb+hjG7FyE@lG733O zE!P?+kgM)Wv8%4I(bTbf#cCq0_;It8_IZ_sNZ&BPDf12Clej*@RhD?-B}d}yjzuXAC0{E%;Z=q-H$fU)vzAJaRk&#>c zN%^K6XCKSGW0QPO4viPby#trx_+l^dqWtvYbd)$($`3Cn(crrGgXCuyrxW4f;$N#c zx>!*yq4;ztJGqp@xstZ*8fQFg=Vs#<)pc9T)kVT3nyC^vi^GGBv;kcQA1_11=JY>( zSH;!kLQoa66ppvZp=W)$&IX@^HojJ-Or<-OYaXj?del^<#7$0|e&O)f&IiIJXt)9( z?1sc=1*qWAk^BjRIP z`aC0dTxe5$Yq zrM}>sP zM8!t>63`8cdN5{z@|axE$z>BCa)&X*VP-fG>HZay8iypsDYCu42=|L*3eeVwZ9+RM~3dCVQ6yH zXhms>8F5g$&Bhzlde8wG(KiO{z-Kl3z!=cO$w= zbzXIDR+1%oW7=+f5+rO5**x5jM8>NXGF~!U+a7wV@(i+d7cE=E0l~vYZY2b3{Y+&y zKr>mE@Cs}sP+&GgDmx<7%uHsrZ^>KsCC%SmHRf*@M<^N@(^wq*uXToynDN8f>5yuj zK~dIg_po^gR>$|-TBVcIshg=tmA=qnlePKKuQJaW)EkZR#J=L3GKbuMNv0xI`XVpU zt&_hBdCnI6_sUK3CwDg$DX$fNU(V5<$?^m@?3s*8ntZYE%?HSz+{081^US8F38N>Ak=d#LN(BaXB1-SRkT3SzwsY4IMlTj4Z|VrNDuNOI>->d;(VNA{ z?YU)>0s|>v24RePcI|J3(TByz+^`3&ieQX2x35eXeOZj8CH;;nFpzX*5JphggSQAH znZ?K-Hv+ASV4NKGU#ZSdKN&{v#PbRa+_6()U{x-?Te<9??(u}tpT)@CJr1pkV1zEL zI+d(3K!$O9b02wJ7?j#WeV4%*h z(i+&`m);s3bKUg_V=#-6c5x(H6~XA#pji@O3}G?y&Mup-z)-D$CDf^0eb-haj1(3l zdGLC)DuR)HH^i`9o8vB45M?SU1tenIE#_A zHx0QYY<@S~c`BVs2;=HQpHw1ij9@YH*HY$=h&5)Hz4QlRjASu#7O%`zV5ru>62kcE z^cx9;F^a{==(rKBieNPPxt1g#H=4!B-QDxD0t31a${{X<@l4%$=LjQ}#mMhH5Uq+} zoZ0=s4YI}<79(@Rkzw)}J<1xZtl~l#?Z3SH1YwM2F$&h6L8~GdNe@3?i7>{o7`fg1 zPEcZ~>8A=|Jp9I!4->|C79)SbP_!z7G2xLO7YJhli;=Q2WuY7c*N9byxX{+H;Aoz- zZcJn`QZCLyt0EZXUz~Q0Feb4WnMcpS>d*nT{iw;dwiVsbNji&ToSwje--s@AW1TmF` zNZK-}sT=|#HOgs9s7ICE^1~K_n8rfnFB;WU2;%I`^A>`b&O)U08uXmO33j~QBCCOGA|?iNIr^h(R?-8n^&EfB5;y5EB^BmA9+ zU9b)p=LF|lo^yDN=^9=payV#v6g?!y;KjWme>{rDS8IZg$pt5q8&^Ew+IApnTGC%` zb)EeEKz)LqsRa%H1P|Jd=ji0$sz#sY3V0c>)JD)=8!q=yfD2J;)?aRQ|GKxyujc_h zOT)E>Do>i1UC6aY(NElW;hLJvM6@TTH24r6N1$fsg|BIogJ zjbV-2C0s8mZZ%QaFI{S$^BD2i01RtnZBYat(l-c%vEe_fw{=+XJi?x%g$-v3Z#F{> zL+FmsF==4h;2K+Z zVAC56y`@QA9EBv20)lWm1PNK6xy@SfyF^KXah?V=xnj7`bV-vRC(wg-1_IiP0)rkn z%&qu+vf0dLTNE*%5q0qS8XX*Q0`78+#1c&6{Bf-p-$ojb;kL$*LDe1v^brvA;{TZG z%w|88cFaVRc!tI#PF0C`uzOEaGWJ3(W8*VCW4p0* z+5HUn8ZXT2p*$Q2Pr4BhLsv(RTFDjL=Gsy1KZnWKi?oc*slrD46t^Aw6bFA~>l&M` z${9S11DhhfNK&-BJt(rsV!JdI1$)D;h>3LABCD#hyl|G&JVJhBgJ3_iO9*>zM8u;j3|Thh+1DWI*{GL2mddCYjb z6m$fujeq8%b%{TP`#-AXIZ8+@g>VQqDA-WgT-A{m(~} zr(Du?1=N$YAj_6C(Ph~7j!Vr~$k;2ijE$*)hmEBH732APGrca3EIz-daAg-qN-;9I z@$+4jHOq`^$GiL6cZdP?%3^PFp+_`v=`^{@lpBg?DlSZ{#ib9Xri^~R3VD-P6?>D5 z{IJ~@393v@AGB`;VXS5`QkEr6kwxgK)Vqp18I~zM#!rV|k?IDnVKI`j`=eD6r}#gA zeZoZ;Ygvq(W7F3wFjOnm30dRm>nSJ68d)-oSqspr2u9tw$x?)V9gC4VW63TB25!&N^1yVY`)dH#mJjGVX7R1 zju8gUR1eR)JdLbzfW^qVIUTKvVDx`Kb17jQWHC|)F5IEOFs@SF6JxjcCX8GbBW2?% zv?_v;Kj3n9G`{dqag_e5lrNEWs9RMwN)JgpwY+Ab)i3|!#|BcA{%~=WzOd1PuBlap zAWj`{|Cb<+un>uNS3V<$z@bIiql9+p>l1F(CWxafMBeF*&j>-pe>A@>K^$WtGMB9R zmmES>=UK=SXVYuXB8cNGMDFq}{}O^Q4SxDrf;hoKj|A7qY~~0Rs*b#3>dcb?(vDLJ$R2>X$o* z?>nvKSwqr|wr34eh8`y_6>_~T;N*MO47f9+;&yO5A*Bk>njLOl*6rZ@$KR&V^MF31 z?rf2K4LoNH^h$BX&d;lGh|E=s?el*52K)n457x5_+B?y{Y>Y$j8m(=s)u@*Ex`oObZf z>QGY$=TUnC;Z5{FgBWK5)IY?vH8vh(_k_OFF_dBi=hb5b7#6%cy(H*@Tm~=SVXjAQ zz^GQn2#|t7avhL|Km>#7-rjC?U*1bQt^DGhR^e_TbMDdsa(@YeNh)q1A%97ak7tgh zoz{ioomOG1=^NLeRgs<6dUwr%)L}1bby)a%cyklNMou?GM0mqgFO|^h^}oj|XJn{w zpn3(YhslXeLvdKf4z2VqQD4%aCf5*`8VP7#Ka+1bms-!(Q27ghtx-xHmobnLPTOR*hkF;j*8df<^(_rs+o*aT zr-wrV4W{N#XSz#v0Mh>h<4jD&0rK7ydIxW|h-=!N%0j-8kf1jmJjyO02GM4ngM;HN#5oRGiKpKfV4$46r*^ zDbkxMJhrFpnJO=+=C&JGP;HLuJt@}LsY;RFOp&wT*c`!vYAxLVY(N;DWf+;u7At%n zFqAT+fd^muIJo!q<0<6x=)z*;cUzBEMSLF1YTcSi7+qP6^wo2>DllAXu|8apDm_Mx z$_-BwMmH8Ctnk&6EmXEbvB8;9aMoPwBv?_wJ^VRo0By03yF%mmY zn4rKw`mI4@-Fqzm`GS&Xc8GtjCCM$Z9V_Yg)O79(rcnHfq9!$ge2R%d-j*67P( zrGaJUjP61h{aB2wsW;H72*$s|LLMZH{wzk` z>GD!T*v&YNnyKn9|MnNc7|LSgEL$X4YN%k}Z)FH$7>kj9y+Dz8 z1Yf6789Nm+2Dc%M;VeexY0J-lo*C>4Wn0`dzP#*n#D-nIufmlU_^$;#}Y;= zi;=Um_jUz_aq^Zwv>5p+VT@rh@`n#bt0EXJA0J?j!WWLM(#nPUbEZnyr2MJW#lf;v z5hvVc25m*&i<&X;U9>r_N~;HxzFs$DTFB2mboU0-c2qZ6NRm&^j?Ej zMaIO}ueV-C7*kk`l%9LXDlk+f2!(o7(1G_KqoF;O#Ynt(46TY_T)nY(2zgMZX?akP zY{2uN*r_lCVt+g@p>Dt)C2)=Rpt#||x8UY5JRrE4$$C&+yIa20ny{y9VRLH2ke3kE zPLP?UpF_osdIaE}9;&~(Ac}(M7LtijG=;@>e|+tOa_52lR~6M70q_=aZdwyI${y3b zO`f$eJh}NqC_NPbV|!2v!fW-Aj5>VQjJwQJ&KvbAQP0q#hF6B?5_SMv=l(6J-0*CT z*mnS^Jyh<*3%LvwvX^C1IlX=Am6gh1q-%9?IB|F*8x?GD@ljvjiiZu)L;$dHtsJ3N zluvP>raS8!by*%h)^9A0?3o(a|0=3HQ_zOOTa=)Ih>zRj zl}Se+GvZSDuGGh8Y4ma2OXo&=P>#mwcId}Fc$tC$7};1lp!|c_o)`CyT?nq19N8}0 zdvBhqOU9n9Wo#-6p%u`?dOetDfyK68 z?75~N8GDYFvEhT_Vmm!9$Wm~z^``LvB;$bEfzUO=u_|F)m!n>Nm znj)x$;UBfzc$GpsQji>82(3Y`59y(7E{@AN>(f)Mhf3;rk~G*J?z)*eHi8EBe64{EhZxt^E)P^!_}B!+8`vo176@!7vT!gh zxv*WqO2o`(?!J3tz9VC2Xc-$0G#)mR3y}!HU%d1Nb^vm5P&XA(YXmY;8P9H!DfYQM zp@;WpP%h4bDylV{kVFTuCr%f~B@YS>^CodUXjdRijhc*9G!F*_WRJ+kwKu>0>ykdo zLJd=scaCRjq-7zoiQmKZsJ(%h&CvfK=7H4P%qZ9H0jK8)-B3^ zuptM)pZ-l1at%(+pu(uogMk_>F0Npc!dlwa@2xXCh-Nk) zd?{1m_C$K3(tC?de(7VcFJ?6SnB1N#Shr{X@;hi%#O)cnb;?%4Sjl1(4BE0miJ=zl z6T%o?e`FkCtYR_JdR{}TA{b5Yezu-4Ra?N^gxG-5f6w#u^#M z$ZSEepqJmCkwqA5S&a0>Nrx2HfS*sTK?jNCJ41T~LZ!;Mp@vQr6RxGzm?L>QY{jJyjI z(W(f>ro7wIOtpo@$VuM2MS)>l$&xMCKW|`<0|Ju_-o|iz{hsv>meMjkscl* zeY&ja1Ld;&^g-hyH0yuO zU-ixGOqMvoLgb&A93h85#Y*Ma7P3U|6-}Qfh&&m@$bAt)5EU19Pa}wvEJXVB#ec{l z5Xd(OqHo(PHwoet3z5}%?H@uA<6ipWbDEz{7tc>cW2|#lclpr|i4I1cnVJrLC5+~$ zGsW{$VXLWgwxLy#`DxvM$B!V4vn)pL#hC*Y7`RJnkTHHJ6E>AF&aoJIHy5E*5sbS} z_3d^JUwB@_k>{csE1HoQ2*ShVqmv9q9xI-A-W!NW4BZZf)`$n!CoRYbU}6&Ph}hS! zP^>XuBi85w6wi@|j1n&{DRObLWA5Ny}RezZ$obBdF&_xPxbH zeQf`uP)Z*?Qd!_JKz$WOiAVH?pC0bw_J(jNkO7BA{rA_)vBU6^%Bu3R3+ zFW1uZi29lqHLu1B<}!#TI#7^G*VJ5W1Jd8*wg$Er>lU<=Y*@5qQ)7JzWdE{vCkQU=E~#VdNp*(E$-ISwS?9E9BSNhiFh=*BI2uW8}zMz|>6eMzjuyV1%bdS4xji2UH_r9Uky?Iq-(7llr1i=jcQmwZg z4L}Nk!%X$Da2FD_bZnH6$P8+ay=I615cN$hYIrhvYnBJPF8|A*=9$`Re6~8M9ne<6 zDvcU&*i5k0-a0Q|d77wiX;EVWAu%+s`P-PKsP$p6Y)x4gLU0EI$- z_GOMh^l&?Fq2P)YY2RUw!rI#Yu48iyty_sz)f4{6U8@ZZD>|r@pkL5ln&bUTl2NW( zxc)`=jj%LkYma+F^FMED&u) zUNywDowB*inI8UTbE593NsVKNX6hTLk>rR`!!ubjaA+ zmG|R^W}@z-MGgNkuYW_85tZw?(7Fc?tMTdi4zmmLw;nnmIBk&XW~tp{)=d3`s5@&> z!{N-cH8ks_dV2oMWdTrQ2Q_x!C@GJ!+ctUOZLjCk*UH#pK;5Nkk?dBX*Ep}|g*6H% zFV=d4oV4?O|STe#0-w zwcV|1k=R&KTwvjKJZllxcAc29CNf?UYrOQ$vlJ>_kOMX<=3`y*AE{bZcNU{y&=NtF zu5Xg;Z3v?Wi;;AG*?vVIgh#`mKKSmi++k#mo-9Vj()DOn#2QyW-q(~cda)Qe`6If@ z^JuWSP_YKqi_#~clz+WXEyC!{Via7SfL28?`gyaq5k?;tBjwOFh1eK$wN+wp@B^0~ zS&Xdoj>E+;UfCD4mN1f8jGSG&7RjuEJ*rK`8d#!Aj}cij>N&#b$6_Rn zJB(IEtkLt07j6?qe-sgVx-=lhE_!|ynRMKLKp*CjEvc1 z_9`$C*HR8~?2<}vjkoqzmIRdsu^1_rrlM65j4ux~e338)vlzK|?;KNNsObp`VN4AF z>n2%a2#b-C(*3v?#?g;zyh#`-EJi`E-T4X(hgz-xoSCJ!#^eu8%?V>Di;=VO5Ly+n z#-GtYUm}cQEJoJu^uJ_zK$va~s{oq*>4a#)7|vp3?_Y#gMKG4M?eGR+j9@X6a+l}G zG2jDJnW==Vajive8exoNF>=oppj8o!HG})MkH!~{s#>H*bX`hkO5b?BJe#@!9dE-H z@il8ky-TSZqpKFF5fwI?yeXxE5X83Y@0}osR2Cw$Td(SJ2-S);LgV|XokLzFh%qcg z>ZXC!g&@`(cU~oku`ERH!TAm35O%c-6hbRsyZBM(31S=zk$5z-fe^%viQn%gi192$ z>V#=;$swF-yAUCW+_z@GO%M}Uh=L{a-V%aHZuou&f|$rcq^+L+p&UY0MOSD?_*vSP z4+&xt3z3$-^g|(tax-6=L=b5#MC#pjUn?Ngvg3sIgVme5q)L;MS%~Z%+rAcpc;}0D zT?t|e3z2tcT8tb5=VaynL#R8s`g|opOl2W5Z!L=vg2=7>c?LmDVDLbdj*eyJ)$a zQM}w0wwiWfI$9N3?tXc2{;YHO!t|=@7yNOjo9i01TT!^hs_(+!g|rN)qNDTzyDe18 z&5n>8#Q95lFmQW+QJo7(=K(!a%Wp&BAdWv2a{P!I`4`(U09qN*!d+by6?CD_ByIyU zzHg_++OEe#Qp|RiCN(djg_0J~+M@gve>{UOQseT^;jcn%qY?=er^c;xCQ^+|oa=wy z@=yn#t<}MKb)xK`c0oSCPgw(-{{by!2gmh(gz=zP;(+i1+rjO#!e{g$=s8-@R4++0 zSrnBWq!cEurLm^-^NftoqjQ+O4ph#8@Dd7!BDai%cHG=GZv&0$xmx|3$|dojQC>|a z4}+MvK@J-ZF^2bVsKHrrwI8B}c9{r~)8hPk`RX4jVmq&@YIzaKE5w`2kgLlVe_EvAr>OzW#+~ zw1w(3ng=03?lkU(j2E_VJZhS+xP3eTLPVfOtlC8(db`sro5!t|cP5PPMT^%W^~Fna zg7M~YuLa7t`k9lw9jV>;ZHm)uLr4uVOehga8G$8xxYgQh<2MJXhcDLX;p8&o4Q%KT zp?)G?lSj|i(1r@Yd}c>_7>rGMb=a{mJ=|*RKPO0<#+PVe!^ftLjh~C?|NoL(#_Qrx zJ_`UgCPoixGJ9;S zoE#D~Uas`kJ2anVYRqR=IR1Ej9AZ`|lj&cAu*?4?TYds@nvwm5g6XKhh&u`F(uMgw z^R*O$UakdA!p@pUYzS|ngdio0^VTlCBZ$}k=}ZOMN$ISZ$5E_S2JODm>Zw-w+Adi`lc#|2N9)3?|D$pKePhyF|>5EOXLV?*?U575ALA_FIP}AKcj-{aj zPxB;~T5nJXKn2xl#=!{NM!MKyBl%ipz4lJnxa}h{^(rk>!*j`{#;%XH_dHwcQM>(5 z1y`CHBG$-@!__71nb~c)&C=+xN55+Y=+#wQ*=ts<)N9py;Z`?CD(bo~19LBYXqM@0I9>xD=9!lL6MLu2D&!hi5ZhsH+6 znPc16YZ4q85$y}c#mXkZ5z)afMa8si9qD_;6pTNiO@gC+Z6jlTz-L;(@ zMQ@2WN5(||@O-0tFEtW)eup0Y^$~i0StXxe{@U3h&+obNKe$ieyVg~0HRJyCOTRr2 z&u`Sd=NHm4EFml;tc@=&DlFvv*FR|%(!70SWLS&V@gecW|Ke*G78M)mix0t5^hL&p z6hCX}uO*%~t`161TkW|?PhNV0O@i@EW8y-;_k|_2j`PLiamV{2zc;rCi}yVb!-TdI zd2+``)NevhZhgromz3S1t3c1Im;23%8~CmbRa>pN|KxJAyLJt#GxPqF`!OcU7t%W3 z7Z(y05gid86WuZ_#24KrJ|w{xpI{4(Z66XD(IOEYskOFT|o<|%xhp>5;BVq<+RLuqh^M!*)*(~R@s`9&8#y!$Ub z-#9GphxjJJjT`^emi|rXVP1XWC_T)LB_C#LdUuhBnOE*wGg@voRc$r@{=-b{(i;zR z;{TTqlVUoU3hqCtk`M9wm^eJG!Y6FR6I}7xoO1L8HG8Gb{)3T|29`nsk4V<}IIqCpb7@(;OC^gfMJ$O73|O zRg)&VlA|{wI<|EJMsVR;^Sty|k%i&OuJ2Z-C$_8P6HD4VN#uzwcBi>%0Pki7aOTM= z|E~;S&&aek^e}TuKFs7x(?lNT`+tl}rUAT%8NgW`(*Lg>z@5w+f2Sw-cgZK1-f5P| z6MXK;aZA_WyY^OX)#v^IPV6xkPw>rtIvv8&Ub`Xm3BI_=8(z8jHw6(`f!Qb|H) z(AMxkRHxVM>NMG4`cKzsSNo0|h{0x@K~vF55(Y08ChRo<)^!P+Y^&44?rLB#))958 zQVc57CwE@6X={-NdlTI07q=C!LzJ}Olr)!~A)zEEHKpoP!1_(XPUadVhSVNl#NA)B zHZUCSY~Blt)k6G?b4tIo3REq_SjhJ+o0iyR% zyKlm;L##gqQmZ``tOZ~*nbnw(UP<0 zCm^lw;98edrX~|xz|GrN6=8tb3DdGUt&Kk$0CDONdxOF+F;l z3a&8vdz=QiyAn3dCbjWoF5qq8D%kOz*1^p|;nehrD(54GyK#k$_l)EU&o~#cF9Z

gT$>*@3Mj8cI6Q^H1b7ton|1mJ=> zb)V-G2wvw>H~~LN>Rv!1KEOphbkR&M;`^7%rlHYU4?a4JJ5URV?t{Ck!ZV&%?YS69 z_7^T$XXAN7$$E|z2O{(BwKHF1`USunw&q(d51xfo?ZZ{AZ)qb}^-*(gHZtFbxcQp* zw*#W%%s1bdXdOQNjZ1AYcM?k7{jFpnGT%pb=4;mX0^V>l-xCeBkh)({*!@#y%}#Q4gJ#z;ivTf!VyAH5OeMeU0z{9e zcFTn)Jx-;Vh2%`c3P{MXo37K;?l z#TD*p+D)!-$CTFVA%K{N`>!Q&4*X5sRlT=LC?2@XB%el$eac61r#=E zId8jJM=*6XMl*Lmy7EH&%A4bB39sC3bK1uf0j`MBid-j4WUJgS`Fayejz z&lIRG=50000") + curricula.append(list(range(i + 1))) + stopping.append("steps>=50000") + curriculum = SequentialCurriculum(curricula, stopping[:-1], sample_env.task_space) + else: + raise ValueError(f"Unknown curriculum method {args.curriculum_method}") + curriculum = make_multiprocessing_curriculum(curriculum) + del sample_env + + # env setup + print("Creating env") + envs = gym.vector.AsyncVectorEnv( + [ + make_env( + args.env_id, + args.seed + i, + curriculum=curriculum if args.curriculum else None, + num_levels=1 if args.curriculum else 0 + ) + for i in range(args.num_envs) + ] + ) + envs = wrap_vecenv(envs) + + assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" + print("Creating agent") + agent = ProcgenAgent( + envs.single_observation_space.shape, + envs.single_action_space.n, + arch="large", + base_kwargs={'recurrent': False, 'hidden_size': 256} + ).to(device) + optimizer = optim.Adam(agent.parameters(), lr=args.learning_rate, eps=1e-5) + + # ALGO Logic: Storage setup + obs = torch.zeros((args.num_steps, args.num_envs) + envs.single_observation_space.shape).to(device) + actions = torch.zeros((args.num_steps, args.num_envs) + envs.single_action_space.shape).to(device) + logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device) + rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) + dones = torch.zeros((args.num_steps, args.num_envs)).to(device) + values = torch.zeros((args.num_steps, args.num_envs)).to(device) + + # TRY NOT TO MODIFY: start the game + global_step = 0 + start_time = time.time() + next_obs, _ = envs.reset() + next_obs = torch.Tensor(next_obs).to(device) + next_done = torch.zeros(args.num_envs).to(device) + num_updates = args.total_timesteps // args.batch_size + episode_rewards = deque(maxlen=10) + completed_episodes = 0 + + for update in range(1, num_updates + 1): + # Annealing the rate if instructed to do so. + if args.anneal_lr: + frac = 1.0 - (update - 1.0) / num_updates + lrnow = frac * args.learning_rate + optimizer.param_groups[0]["lr"] = lrnow + + for step in range(0, args.num_steps): + global_step += 1 * args.num_envs + obs[step] = next_obs + dones[step] = next_done + + # ALGO LOGIC: action logic + with torch.no_grad(): + action, logprob, _, value = agent.get_action_and_value(next_obs) + values[step] = value.flatten() + actions[step] = action + logprobs[step] = logprob + + # TRY NOT TO MODIFY: execute the game and log data. + next_obs, reward, term, trunc, info = envs.step(action.cpu().numpy()) + done = np.logical_or(term, trunc) + rewards[step] = torch.tensor(reward).to(device).view(-1) + next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) + completed_episodes += sum(done) + + for item in info: + if "episode" in item.keys(): + episode_rewards.append(item['episode']['r']) + print(f"global_step={global_step}, episodic_return={item['episode']['r']}") + writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) + if curriculum is not None: + curriculum.log_metrics(writer, global_step) + break + + # bootstrap value if not done + with torch.no_grad(): + next_value = agent.get_value(next_obs).reshape(1, -1) + if args.gae: + advantages = torch.zeros_like(rewards).to(device) + lastgaelam = 0 + for t in reversed(range(args.num_steps)): + if t == args.num_steps - 1: + nextnonterminal = 1.0 - next_done + nextvalues = next_value + else: + nextnonterminal = 1.0 - dones[t + 1] + nextvalues = values[t + 1] + delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t] + advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam + returns = advantages + values + else: + returns = torch.zeros_like(rewards).to(device) + for t in reversed(range(args.num_steps)): + if t == args.num_steps - 1: + nextnonterminal = 1.0 - next_done + next_return = next_value + else: + nextnonterminal = 1.0 - dones[t + 1] + next_return = returns[t + 1] + returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return + advantages = returns - values + + # flatten the batch + b_obs = obs.reshape((-1,) + envs.single_observation_space.shape) + b_logprobs = logprobs.reshape(-1) + b_actions = actions.reshape((-1,) + envs.single_action_space.shape) + b_advantages = advantages.reshape(-1) + b_returns = returns.reshape(-1) + b_values = values.reshape(-1) + + # Optimizing the policy and value network + b_inds = np.arange(args.batch_size) + clipfracs = [] + for epoch in range(args.update_epochs): + np.random.shuffle(b_inds) + for start in range(0, args.batch_size, args.minibatch_size): + end = start + args.minibatch_size + mb_inds = b_inds[start:end] + + _, newlogprob, entropy, newvalue = agent.get_action_and_value(b_obs[mb_inds], b_actions.long()[mb_inds]) + logratio = newlogprob - b_logprobs[mb_inds] + ratio = logratio.exp() + + with torch.no_grad(): + # calculate approx_kl http://joschu.net/blog/kl-approx.html + old_approx_kl = (-logratio).mean() + approx_kl = ((ratio - 1) - logratio).mean() + clipfracs += [((ratio - 1.0).abs() > args.clip_coef).float().mean().item()] + + mb_advantages = b_advantages[mb_inds] + if args.norm_adv: + mb_advantages = (mb_advantages - mb_advantages.mean()) / (mb_advantages.std() + 1e-8) + + # Policy loss + pg_loss1 = -mb_advantages * ratio + pg_loss2 = -mb_advantages * torch.clamp(ratio, 1 - args.clip_coef, 1 + args.clip_coef) + pg_loss = torch.max(pg_loss1, pg_loss2).mean() + + # Value loss + newvalue = newvalue.view(-1) + if args.clip_vloss: + v_loss_unclipped = (newvalue - b_returns[mb_inds]) ** 2 + v_clipped = b_values[mb_inds] + torch.clamp( + newvalue - b_values[mb_inds], + -args.clip_coef, + args.clip_coef, + ) + v_loss_clipped = (v_clipped - b_returns[mb_inds]) ** 2 + v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped) + v_loss = 0.5 * v_loss_max.mean() + else: + v_loss = 0.5 * ((newvalue - b_returns[mb_inds]) ** 2).mean() + + entropy_loss = entropy.mean() + loss = pg_loss - args.ent_coef * entropy_loss + v_loss * args.vf_coef + + optimizer.zero_grad() + loss.backward() + nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm) + optimizer.step() + + if args.target_kl is not None: + if approx_kl > args.target_kl: + break + + y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy() + var_y = np.var(y_true) + explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y + + # Evaluate agent + mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=0 + ) + full_mean_eval_returns, full_stddev_eval_returns, full_normalized_mean_eval_returns = full_level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=0 + ) + mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=200 + ) + full_mean_train_returns, full_stddev_train_returns, full_normalized_mean_train_returns = full_level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=200 + ) + + # TRY NOT TO MODIFY: record rewards for plotting purposes + writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) + writer.add_scalar("charts/episode_returns", np.mean(episode_rewards), global_step) + writer.add_scalar("losses/value_loss", v_loss.item(), global_step) + writer.add_scalar("losses/policy_loss", pg_loss.item(), global_step) + writer.add_scalar("losses/entropy", entropy_loss.item(), global_step) + writer.add_scalar("losses/old_approx_kl", old_approx_kl.item(), global_step) + writer.add_scalar("losses/approx_kl", approx_kl.item(), global_step) + writer.add_scalar("losses/clipfrac", np.mean(clipfracs), global_step) + writer.add_scalar("losses/explained_variance", explained_var, global_step) + print("SPS:", int(global_step / (time.time() - start_time))) + writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) + + writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) + writer.add_scalar("test_eval/stddev_eval_return", stddev_eval_returns, global_step) + writer.add_scalar("test_eval/full_mean_episode_return", full_mean_eval_returns, global_step) + writer.add_scalar("test_eval/full_normalized_mean_eval_return", full_normalized_mean_eval_returns, global_step) + writer.add_scalar("test_eval/full_stddev_eval_return", full_stddev_eval_returns, global_step) + + writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) + writer.add_scalar("train_eval/stddev_train_return", stddev_train_returns, global_step) + writer.add_scalar("train_eval/full_mean_episode_return", full_mean_train_returns, global_step) + writer.add_scalar("train_eval/full_normalized_mean_train_return", full_normalized_mean_train_returns, global_step) + writer.add_scalar("train_eval/full_stddev_train_return", full_stddev_train_returns, global_step) + + writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) + + envs.close() + writer.close() diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/conda-environment.yaml b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/conda-environment.yaml new file mode 100644 index 00000000..cd0b0b09 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/conda-environment.yaml @@ -0,0 +1,165 @@ +name: test2_py +channels: + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - ca-certificates=2024.3.11=h06a4308_0 + - ld_impl_linux-64=2.38=h1181459_1 + - libffi=3.3=he6710b0_2 + - libgcc-ng=11.2.0=h1234567_1 + - libgomp=11.2.0=h1234567_1 + - libstdcxx-ng=11.2.0=h1234567_1 + - ncurses=6.4=h6a678d5_0 + - openssl=1.1.1w=h7f8727e_0 + - pip=23.3.1=py38h06a4308_0 + - python=3.8.5=h7579374_1 + - readline=8.2=h5eee18b_0 + - setuptools=68.2.2=py38h06a4308_0 + - sqlite=3.41.2=h5eee18b_0 + - tk=8.6.12=h1ccaba5_0 + - wheel=0.41.2=py38h06a4308_0 + - xz=5.4.6=h5eee18b_0 + - zlib=1.2.13=h5eee18b_0 + - pip: + - absl-py==2.1.0 + - aiosignal==1.3.1 + - alabaster==0.7.13 + - appdirs==1.4.4 + - attrs==23.2.0 + - babel==2.14.0 + - beautifulsoup4==4.12.3 + - cachetools==5.3.3 + - certifi==2024.2.2 + - cffi==1.16.0 + - charset-normalizer==3.3.2 + - click==8.1.7 + - cloudpickle==3.0.0 + - cmake==3.29.2 + - contourpy==1.1.1 + - cycler==0.12.1 + - dm-tree==0.1.8 + - docker-pycreds==0.4.0 + - docutils==0.20.1 + - exceptiongroup==1.2.0 + - farama-notifications==0.0.4 + - filelock==3.13.4 + - fonttools==4.51.0 + - frozenlist==1.4.1 + - fsspec==2024.3.1 + - furo==2024.1.29 + - future==1.0.0 + - gitdb==4.0.11 + - gitpython==3.1.43 + - glcontext==2.5.0 + - glfw==1.12.0 + - google-auth==2.29.0 + - google-auth-oauthlib==1.0.0 + - grpcio==1.62.1 + - gym==0.23.0 + - gym-notices==0.0.8 + - gymnasium==0.28.1 + - idna==3.7 + - imageio==2.34.0 + - imageio-ffmpeg==0.3.0 + - imagesize==1.4.1 + - importlib-metadata==7.1.0 + - importlib-resources==6.4.0 + - iniconfig==2.0.0 + - jax-jumpy==1.0.0 + - jinja2==3.1.3 + - jsonschema==4.21.1 + - jsonschema-specifications==2023.12.1 + - kiwisolver==1.4.5 + - lazy-loader==0.4 + - lz4==4.3.3 + - markdown==3.6 + - markdown-it-py==3.0.0 + - markupsafe==2.1.5 + - matplotlib==3.7.5 + - mdurl==0.1.2 + - moderngl==5.10.0 + - mpmath==1.3.0 + - msgpack==1.0.8 + - networkx==3.1 + - numpy==1.24.4 + - nvidia-cublas-cu12==12.1.3.1 + - nvidia-cuda-cupti-cu12==12.1.105 + - nvidia-cuda-nvrtc-cu12==12.1.105 + - nvidia-cuda-runtime-cu12==12.1.105 + - nvidia-cudnn-cu12==8.9.2.26 + - nvidia-cufft-cu12==11.0.2.54 + - nvidia-curand-cu12==10.3.2.106 + - nvidia-cusolver-cu12==11.4.5.107 + - nvidia-cusparse-cu12==12.1.0.106 + - nvidia-nccl-cu12==2.19.3 + - nvidia-nvjitlink-cu12==12.4.127 + - nvidia-nvtx-cu12==12.1.105 + - oauthlib==3.2.2 + - packaging==24.0 + - pandas==2.0.3 + - pillow==10.3.0 + - pkgutil-resolve-name==1.3.10 + - pluggy==1.4.0 + - protobuf==4.25.3 + - psutil==5.9.8 + - py-cpuinfo==9.0.0 + - pyarrow==15.0.2 + - pyasn1==0.6.0 + - pyasn1-modules==0.4.0 + - pycparser==2.22 + - pyenchant==3.2.2 + - pyglet==1.4.11 + - pygments==2.17.2 + - pyparsing==3.1.2 + - pytest==8.1.1 + - pytest-benchmark==4.0.0 + - python-dateutil==2.9.0.post0 + - pytz==2024.1 + - pywavelets==1.4.1 + - pyyaml==6.0.1 + - ray==2.10.0 + - referencing==0.34.0 + - requests==2.31.0 + - requests-oauthlib==2.0.0 + - rich==13.7.1 + - rpds-py==0.18.0 + - rsa==4.9 + - scikit-image==0.21.0 + - scipy==1.10.0 + - sentry-sdk==1.45.0 + - setproctitle==1.3.3 + - shellingham==1.5.4 + - shimmy==1.3.0 + - six==1.16.0 + - smmap==5.0.1 + - snowballstemmer==2.2.0 + - soupsieve==2.5 + - sphinx==7.1.2 + - sphinx-basic-ng==1.0.0b2 + - sphinx-tabs==3.4.5 + - sphinxcontrib-applehelp==1.0.4 + - sphinxcontrib-devhelp==1.0.2 + - sphinxcontrib-htmlhelp==2.0.1 + - sphinxcontrib-jsmath==1.0.1 + - sphinxcontrib-qthelp==1.0.3 + - sphinxcontrib-serializinghtml==1.1.5 + - sphinxcontrib-spelling==8.0.0 + - syllabus-rl==0.5 + - sympy==1.12 + - tensorboard==2.14.0 + - tensorboard-data-server==0.7.2 + - tensorboardx==2.6.2.2 + - tifffile==2023.7.10 + - tomli==2.0.1 + - torch==2.2.2 + - triton==2.2.0 + - typer==0.12.3 + - typing-extensions==4.11.0 + - tzdata==2024.1 + - urllib3==2.2.1 + - wandb==0.16.6 + - werkzeug==3.0.2 + - zipp==3.18.1 +prefix: /home/user/miniconda/envs/test2_py + diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/config.yaml b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/config.yaml new file mode 100644 index 00000000..d65c04d2 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/config.yaml @@ -0,0 +1,130 @@ +wandb_version: 1 + +exp_name: + desc: null + value: cleanrl_procgen_plr +seed: + desc: null + value: 1 +torch_deterministic: + desc: null + value: true +cuda: + desc: null + value: true +track: + desc: null + value: true +wandb_project_name: + desc: null + value: syllabus +wandb_entity: + desc: null + value: null +capture_video: + desc: null + value: false +logging_dir: + desc: null + value: . +env_id: + desc: null + value: bigfish +total_timesteps: + desc: null + value: 25000000 +learning_rate: + desc: null + value: 0.0005 +num_envs: + desc: null + value: 64 +num_steps: + desc: null + value: 256 +anneal_lr: + desc: null + value: false +gae: + desc: null + value: true +gamma: + desc: null + value: 0.999 +gae_lambda: + desc: null + value: 0.95 +num_minibatches: + desc: null + value: 8 +update_epochs: + desc: null + value: 3 +norm_adv: + desc: null + value: true +clip_coef: + desc: null + value: 0.2 +clip_vloss: + desc: null + value: true +ent_coef: + desc: null + value: 0.01 +vf_coef: + desc: null + value: 0.5 +max_grad_norm: + desc: null + value: 0.5 +target_kl: + desc: null + value: null +full_dist: + desc: null + value: true +curriculum: + desc: null + value: true +curriculum_method: + desc: null + value: plr +num_eval_episodes: + desc: null + value: 10 +batch_size: + desc: null + value: 16384 +minibatch_size: + desc: null + value: 2048 +_wandb: + desc: null + value: + code_path: code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py + python_version: 3.8.5 + cli_version: 0.16.6 + framework: torch + is_jupyter_run: false + is_kaggle_kernel: false + start_time: 1713840768.0 + t: + 1: + - 1 + - 30 + - 55 + 2: + - 1 + - 30 + - 55 + 3: + - 13 + - 16 + - 23 + - 35 + 4: 3.8.5 + 5: 0.16.6 + 8: + - 5 + 13: linux-x86_64 diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/diff.patch b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/diff.patch new file mode 100644 index 00000000..0a6b4640 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/diff.patch @@ -0,0 +1,13 @@ +diff --git a/setup.py b/setup.py +index 31e09f2..22a94e8 100644 +--- a/setup.py ++++ b/setup.py +@@ -2,7 +2,7 @@ from setuptools import find_packages, setup + + + extras = dict() +-extras['test'] = ['cmake', 'ninja', 'nle>=0.9.0', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] ++extras['test'] = ['cmake', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] + extras['docs'] = ['sphinx-tabs', 'sphinxcontrib-spelling', 'furo'] + extras['all'] = extras['test'] + extras['docs'] + diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/events.out.tfevents.1713840773.f411843fc70b.1109.0 b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/events.out.tfevents.1713840773.f411843fc70b.1109.0 new file mode 120000 index 00000000..c1b8de5c --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/events.out.tfevents.1713840773.f411843fc70b.1109.0 @@ -0,0 +1 @@ +/data/averma/MARL/Syllabus/syllabus/examples/training_scripts/runs/{run_name}/events.out.tfevents.1713840773.f411843fc70b.1109.0 \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/requirements.txt b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/requirements.txt new file mode 100644 index 00000000..7f33d240 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/requirements.txt @@ -0,0 +1,146 @@ +Babel==2.14.0 +Farama-Notifications==0.0.4 +GitPython==3.1.43 +Jinja2==3.1.3 +Markdown==3.6 +MarkupSafe==2.1.5 +PyWavelets==1.4.1 +PyYAML==6.0.1 +Pygments==2.17.2 +Shimmy==1.3.0 +Sphinx==7.1.2 +Syllabus-RL==0.5 +Werkzeug==3.0.2 +absl-py==2.1.0 +aiosignal==1.3.1 +alabaster==0.7.13 +appdirs==1.4.4 +attrs==23.2.0 +beautifulsoup4==4.12.3 +cachetools==5.3.3 +certifi==2024.2.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +cloudpickle==3.0.0 +cmake==3.29.2 +colorama==0.4.6 +contourpy==1.1.1 +cycler==0.12.1 +dm-tree==0.1.8 +docker-pycreds==0.4.0 +docutils==0.20.1 +exceptiongroup==1.2.0 +filelock==3.13.4 +fonttools==4.51.0 +frozenlist==1.4.1 +fsspec==2024.3.1 +furo==2024.1.29 +future==1.0.0 +gitdb==4.0.11 +glcontext==2.5.0 +glfw==1.12.0 +google-auth-oauthlib==1.0.0 +google-auth==2.29.0 +grpcio==1.62.1 +gym-notices==0.0.8 +gym==0.23.0 +gymnasium==0.28.1 +idna==3.7 +imageio-ffmpeg==0.3.0 +imageio==2.34.0 +imagesize==1.4.1 +importlib_metadata==7.1.0 +importlib_resources==6.4.0 +iniconfig==2.0.0 +jax-jumpy==1.0.0 +jsonschema-specifications==2023.12.1 +jsonschema==4.21.1 +kiwisolver==1.4.5 +lazy_loader==0.4 +lz4==4.3.3 +markdown-it-py==3.0.0 +matplotlib==3.7.5 +mdurl==0.1.2 +moderngl==5.10.0 +mpmath==1.3.0 +msgpack==1.0.8 +networkx==3.1 +numpy==1.24.4 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-nccl-cu12==2.19.3 +nvidia-nvjitlink-cu12==12.4.127 +nvidia-nvtx-cu12==12.1.105 +oauthlib==3.2.2 +packaging==24.0 +pandas==2.0.3 +pillow==10.3.0 +pip==23.3.1 +pkgutil_resolve_name==1.3.10 +pluggy==1.4.0 +procgen==0.9.5+ed4be81 +protobuf==4.25.3 +psutil==5.9.8 +psutil==5.9.8 +py-cpuinfo==9.0.0 +pyarrow==15.0.2 +pyasn1==0.6.0 +pyasn1_modules==0.4.0 +pycparser==2.22 +pyenchant==3.2.2 +pyglet==1.4.11 +pyparsing==3.1.2 +pytest-benchmark==4.0.0 +pytest==8.1.1 +python-dateutil==2.9.0.post0 +pytz==2024.1 +ray==2.10.0 +referencing==0.34.0 +requests-oauthlib==2.0.0 +requests==2.31.0 +rich==13.7.1 +rpds-py==0.18.0 +rsa==4.9 +scikit-image==0.21.0 +scipy==1.10.0 +sentry-sdk==1.45.0 +setproctitle==1.2.2 +setproctitle==1.3.3 +setuptools==68.2.2 +shellingham==1.5.4 +six==1.16.0 +smmap==5.0.1 +snowballstemmer==2.2.0 +soupsieve==2.5 +sphinx-basic-ng==1.0.0b2 +sphinx-tabs==3.4.5 +sphinxcontrib-applehelp==1.0.4 +sphinxcontrib-devhelp==1.0.2 +sphinxcontrib-htmlhelp==2.0.1 +sphinxcontrib-jsmath==1.0.1 +sphinxcontrib-qthelp==1.0.3 +sphinxcontrib-serializinghtml==1.1.5 +sphinxcontrib-spelling==8.0.0 +sympy==1.12 +tensorboard-data-server==0.7.2 +tensorboard==2.14.0 +tensorboardX==2.6.2.2 +tifffile==2023.7.10 +tomli==2.0.1 +torch==2.2.2 +triton==2.2.0 +typer==0.12.3 +typing_extensions==4.11.0 +tzdata==2024.1 +urllib3==2.2.1 +wandb==0.16.6 +wheel==0.41.2 +zipp==3.18.1 \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch new file mode 100644 index 00000000..5b0b6409 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch @@ -0,0 +1,1417 @@ +diff --git a/setup.py b/setup.py +index 31e09f2..22a94e8 100644 +--- a/setup.py ++++ b/setup.py +@@ -2,7 +2,7 @@ from setuptools import find_packages, setup + + + extras = dict() +-extras['test'] = ['cmake', 'ninja', 'nle>=0.9.0', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] ++extras['test'] = ['cmake', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] + extras['docs'] = ['sphinx-tabs', 'sphinxcontrib-spelling', 'furo'] + extras['all'] = extras['test'] + extras['docs'] + +diff --git a/syllabus/core/curriculum_base.py b/syllabus/core/curriculum_base.py +index 03284da..4ca9aeb 100644 +--- a/syllabus/core/curriculum_base.py ++++ b/syllabus/core/curriculum_base.py +@@ -76,7 +76,7 @@ class Curriculum: + """ + self.completed_tasks += 1 + +- def update_on_step(self, obs: typing.Any, rew: float, term: bool, trunc: bool, info: dict, env_id: int = None) -> None: ++ def update_on_step(self, task: typing.Any, obs: typing.Any, rew: float, term: bool, trunc: bool, info: dict, env_id: int = None) -> None: + """ Update the curriculum with the current step results from the environment. + + :param obs: Observation from teh environment +@@ -88,7 +88,7 @@ class Curriculum: + """ + raise NotImplementedError("This curriculum does not require step updates. Set update_on_step for the environment sync wrapper to False to improve performance and prevent this error.") + +- def update_on_step_batch(self, step_results: List[typing.Tuple[int, int, int, int, int]], env_id: int = None) -> None: ++ def update_on_step_batch(self, step_results: List[typing.Tuple[Any, Any, int, int, int, int]], env_id: int = None) -> None: + """Update the curriculum with a batch of step results from the environment. + + This method can be overridden to provide a more efficient implementation. It is used +@@ -96,9 +96,9 @@ class Curriculum: + + :param step_results: List of step results + """ +- obs, rews, terms, truncs, infos = tuple(step_results) ++ tasks, obs, rews, terms, truncs, infos = tuple(step_results) + for i in range(len(obs)): +- self.update_on_step(obs[i], rews[i], terms[i], truncs[i], infos[i], env_id=env_id) ++ self.update_on_step(tasks[i], obs[i], rews[i], terms[i], truncs[i], infos[i], env_id=env_id) + + def update_on_episode(self, episode_return: float, episode_length: int, episode_task: Any, env_id: int = None) -> None: + """Update the curriculum with episode results from the environment. +diff --git a/syllabus/core/curriculum_sync_wrapper.py b/syllabus/core/curriculum_sync_wrapper.py +index 6e069d8..f986643 100644 +--- a/syllabus/core/curriculum_sync_wrapper.py ++++ b/syllabus/core/curriculum_sync_wrapper.py +@@ -29,6 +29,14 @@ class CurriculumWrapper: + def tasks(self): + return self.task_space.tasks + ++ @property ++ def requires_step_updates(self): ++ return self.curriculum.requires_step_updates ++ ++ @property ++ def requires_episode_updates(self): ++ return self.curriculum.requires_episode_updates ++ + def get_tasks(self, task_space=None): + return self.task_space.get_tasks(gym_space=task_space) + +diff --git a/syllabus/core/environment_sync_wrapper.py b/syllabus/core/environment_sync_wrapper.py +index c995aa1..6edee7c 100644 +--- a/syllabus/core/environment_sync_wrapper.py ++++ b/syllabus/core/environment_sync_wrapper.py +@@ -19,7 +19,8 @@ class MultiProcessingSyncWrapper(gym.Wrapper): + def __init__(self, + env, + components: MultiProcessingComponents, +- update_on_step: bool = True, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? ++ update_on_step: bool = False, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? ++ update_on_progress: bool = False, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? + batch_size: int = 100, + buffer_size: int = 2, # Having an extra task in the buffer minimizes wait time at reset + task_space: TaskSpace = None, +@@ -34,6 +35,7 @@ class MultiProcessingSyncWrapper(gym.Wrapper): + self.update_queue = components.update_queue + self.task_space = task_space + self.update_on_step = update_on_step ++ self.update_on_progress = update_on_progress + self.batch_size = batch_size + self.global_task_completion = global_task_completion + self.task_progress = 0.0 +@@ -125,17 +127,21 @@ class MultiProcessingSyncWrapper(gym.Wrapper): + def _package_step_updates(self): + step_batch = { + "update_type": "step_batch", +- "metrics": ([self._obs[:self._batch_step], self._rews[:self._batch_step], self._terms[:self._batch_step], self._truncs[:self._batch_step], self._infos[:self._batch_step]],), ++ "metrics": ([self._tasks[:self._batch_step], self._obs[:self._batch_step], self._rews[:self._batch_step], self._terms[:self._batch_step], self._truncs[:self._batch_step], self._infos[:self._batch_step]],), + "env_id": self.instance_id, + "request_sample": False + } +- task_batch = { +- "update_type": "task_progress_batch", +- "metrics": (self._tasks[:self._batch_step], self._task_progresses[:self._batch_step],), +- "env_id": self.instance_id, +- "request_sample": False +- } +- return [step_batch, task_batch] ++ update = [step_batch] ++ ++ if self.update_on_progress: ++ task_batch = { ++ "update_type": "task_progress_batch", ++ "metrics": (self._tasks[:self._batch_step], self._task_progresses[:self._batch_step],), ++ "env_id": self.instance_id, ++ "request_sample": False ++ } ++ update.append(task_batch) ++ return update + + def add_task(self, task): + update = { +diff --git a/syllabus/curricula/annealing_box.py b/syllabus/curricula/annealing_box.py +index 6c565ec..101981c 100644 +--- a/syllabus/curricula/annealing_box.py ++++ b/syllabus/curricula/annealing_box.py +@@ -49,8 +49,8 @@ class AnnealingBoxCurriculum(Curriculum): + """ + # Linear annealing from start_values to end_values + annealed_values = ( +- self.start_values + (self.end_values - self.start_values) * +- np.minimum(self.current_step, self.total_steps) / self.total_steps ++ self.start_values + (self.end_values - self.start_values) * ++ np.minimum(self.current_step, self.total_steps) / self.total_steps + ) + +- return [annealed_values.copy() for _ in range(k)] +\ No newline at end of file ++ return [annealed_values.copy() for _ in range(k)] +diff --git a/syllabus/curricula/noop.py b/syllabus/curricula/noop.py +index f6bd5dc..fb5d8ae 100644 +--- a/syllabus/curricula/noop.py ++++ b/syllabus/curricula/noop.py +@@ -28,7 +28,7 @@ class NoopCurriculum(Curriculum): + """ + pass + +- def update_on_step(self, obs, rew, term, trunc, info, env_id: int = None) -> None: ++ def update_on_step(self, task, obs, rew, term, trunc, info, env_id: int = None) -> None: + """ + Update the curriculum with the current step results from the environment. + """ +diff --git a/syllabus/curricula/plr/plr_wrapper.py b/syllabus/curricula/plr/plr_wrapper.py +index 9515df4..9c808dd 100644 +--- a/syllabus/curricula/plr/plr_wrapper.py ++++ b/syllabus/curricula/plr/plr_wrapper.py +@@ -23,16 +23,15 @@ class RolloutStorage(object): + get_value=None, + ): + self.num_steps = num_steps +- self.buffer_steps = num_steps * 2 # Hack to prevent overflow from lagging updates. ++ self.buffer_steps = num_steps * 4 # Hack to prevent overflow from lagging updates. + self.num_processes = num_processes + self._requires_value_buffers = requires_value_buffers + self._get_value = get_value + self.tasks = torch.zeros(self.buffer_steps, num_processes, 1, dtype=torch.int) + self.masks = torch.ones(self.buffer_steps + 1, num_processes, 1) + self.obs = [[[0] for _ in range(self.num_processes)]] * self.buffer_steps +- self._fill = torch.zeros(self.buffer_steps, num_processes, 1) + self.env_steps = [0] * num_processes +- self.should_update = False ++ self.ready_buffers = set() + + if requires_value_buffers: + self.returns = torch.zeros(self.buffer_steps + 1, num_processes, 1) +@@ -46,12 +45,10 @@ class RolloutStorage(object): + self.action_log_dist = torch.zeros(self.buffer_steps, num_processes, action_space.n) + + self.num_steps = num_steps +- self.step = 0 + + def to(self, device): + self.masks = self.masks.to(device) + self.tasks = self.tasks.to(device) +- self._fill = self._fill.to(device) + if self._requires_value_buffers: + self.rewards = self.rewards.to(device) + self.value_preds = self.value_preds.to(device) +@@ -59,108 +56,79 @@ class RolloutStorage(object): + else: + self.action_log_dist = self.action_log_dist.to(device) + +- def insert(self, masks, action_log_dist=None, value_preds=None, rewards=None, tasks=None): +- if self._requires_value_buffers: +- assert (value_preds is not None and rewards is not None), "Selected strategy requires value_preds and rewards" +- if len(rewards.shape) == 3: +- rewards = rewards.squeeze(2) +- self.value_preds[self.step].copy_(torch.as_tensor(value_preds)) +- self.rewards[self.step].copy_(torch.as_tensor(rewards)[:, None]) +- self.masks[self.step + 1].copy_(torch.as_tensor(masks)[:, None]) +- else: +- self.action_log_dist[self.step].copy_(action_log_dist) +- if tasks is not None: +- assert isinstance(tasks[0], int), "Provided task must be an integer" +- self.tasks[self.step].copy_(torch.as_tensor(tasks)[:, None]) +- self.step = (self.step + 1) % self.num_steps +- + def insert_at_index(self, env_index, mask=None, action_log_dist=None, obs=None, reward=None, task=None, steps=1): +- if env_index >= self.num_processes: +- warnings.warn(f"Env index {env_index} is greater than the number of processes {self.num_processes}. Using index {env_index % self.num_processes} instead.") +- env_index = env_index % self.num_processes +- + step = self.env_steps[env_index] + end_step = step + steps +- # Update buffer fill traacker, and check for common usage errors. +- try: +- if end_step > len(self._fill): +- raise IndexError +- self._fill[step:end_step, env_index] = 1 +- except IndexError as e: +- if any(self._fill[:][env_index] == 0): +- raise UsageError(f"Step {step} + {steps} = {end_step} is out of range for env index {env_index}. Your value for PLR's num_processes may be too high.") from e +- else: +- raise UsageError(f"Step {step} + {steps} = {end_step} is out of range for env index {env_index}. Your value for PLR's num_processes may be too low.") from e + + if mask is not None: + self.masks[step + 1:end_step + 1, env_index].copy_(torch.as_tensor(mask[:, None])) ++ + if obs is not None: + for s in range(step, end_step): + self.obs[s][env_index] = obs[s - step] ++ + if reward is not None: + self.rewards[step:end_step, env_index].copy_(torch.as_tensor(reward[:, None])) ++ + if action_log_dist is not None: + self.action_log_dist[step:end_step, env_index].copy_(torch.as_tensor(action_log_dist[:, None])) ++ + if task is not None: + try: +- task = int(task) ++ int(task[0]) + except TypeError: +- assert isinstance(task, int), f"Provided task must be an integer, got {task} with type {type(task)} instead." +- self.tasks[step:end_step, env_index].copy_(torch.as_tensor(task)) +- else: +- self.env_steps[env_index] += steps +- # Hack for now, we call insert_at_index twice +- while all(self._fill[self.step] == 1): +- self.step = (self.step + 1) % self.buffer_steps +- # Check if we have enough steps to compute a task sampler update +- if self.step == self.num_steps + 1: +- self.should_update = True +- +- def _get_values(self): ++ assert isinstance(task, int), f"Provided task must be an integer, got {task[0]} with type {type(task[0])} instead." ++ self.tasks[step:end_step, env_index].copy_(torch.as_tensor(np.array(task)[:, None])) ++ ++ self.env_steps[env_index] += steps ++ if env_index not in self.ready_buffers and self.env_steps[env_index] >= self.num_steps: ++ self.ready_buffers.add(env_index) ++ ++ def _get_values(self, env_index): + if self._get_value is None: + raise UsageError("Selected strategy requires value predictions. Please provide get_value function.") +- for step in range(self.num_steps): +- values = self._get_value(self.obs[step]) ++ for step in range(0, self.num_steps, self.num_processes): ++ obs = self.obs[step: step + self.num_processes][env_index] ++ values = self._get_value(obs) ++ ++ # Reshape values if necessary + if len(values.shape) == 3: + warnings.warn(f"Value function returned a 3D tensor of shape {values.shape}. Attempting to squeeze last dimension.") + values = torch.squeeze(values, -1) + if len(values.shape) == 1: + warnings.warn(f"Value function returned a 1D tensor of shape {values.shape}. Attempting to unsqueeze last dimension.") + values = torch.unsqueeze(values, -1) +- self.value_preds[step].copy_(values) + +- def after_update(self): ++ self.value_preds[step: step + self.num_processes, env_index].copy_(values) ++ ++ def after_update(self, env_index): + # After consuming the first num_steps of data, remove them and shift the remaining data in the buffer +- self.tasks[0: self.num_steps].copy_(self.tasks[self.num_steps: self.buffer_steps]) +- self.masks[0: self.num_steps].copy_(self.masks[self.num_steps: self.buffer_steps]) +- self.obs[0: self.num_steps][:] = self.obs[self.num_steps: self.buffer_steps][:] ++ self.tasks = self.tasks.roll(-self.num_steps, 0) ++ self.masks = self.masks.roll(-self.num_steps, 0) ++ self.obs[0:][env_index] = self.obs[self.num_steps: self.buffer_steps][env_index] + + if self._requires_value_buffers: +- self.returns[0: self.num_steps].copy_(self.returns[self.num_steps: self.buffer_steps]) +- self.rewards[0: self.num_steps].copy_(self.rewards[self.num_steps: self.buffer_steps]) +- self.value_preds[0: self.num_steps].copy_(self.value_preds[self.num_steps: self.buffer_steps]) ++ self.returns = self.returns.roll(-self.num_steps, 0) ++ self.rewards = self.rewards.roll(-self.num_steps, 0) ++ self.value_preds = self.value_preds.roll(-self.num_steps, 0) + else: +- self.action_log_dist[0: self.num_steps].copy_(self.action_log_dist[self.num_steps: self.buffer_steps]) ++ self.action_log_dist = self.action_log_dist.roll(-self.num_steps, 0) + +- self._fill[0: self.num_steps].copy_(self._fill[self.num_steps: self.buffer_steps]) +- self._fill[self.num_steps: self.buffer_steps].copy_(0) ++ self.env_steps[env_index] -= self.num_steps ++ self.ready_buffers.remove(env_index) + +- self.env_steps = [steps - self.num_steps for steps in self.env_steps] +- self.should_update = False +- self.step = self.step - self.num_steps +- +- def compute_returns(self, gamma, gae_lambda): ++ def compute_returns(self, gamma, gae_lambda, env_index): + assert self._requires_value_buffers, "Selected strategy does not use compute_rewards." +- self._get_values() ++ self._get_values(env_index) + gae = 0 + for step in reversed(range(self.rewards.size(0), self.num_steps)): + delta = ( +- self.rewards[step] +- + gamma * self.value_preds[step + 1] * self.masks[step + 1] +- - self.value_preds[step] ++ self.rewards[step, env_index] ++ + gamma * self.value_preds[step + 1, env_index] * self.masks[step + 1, env_index] ++ - self.value_preds[step, env_index] + ) +- gae = delta + gamma * gae_lambda * self.masks[step + 1] * gae +- self.returns[step] = gae + self.value_preds[step] ++ gae = delta + gamma * gae_lambda * self.masks[step + 1, env_index] * gae ++ self.returns[step, env_index] = gae + self.value_preds[step, env_index] + + + def null(x): +@@ -252,11 +220,15 @@ class PrioritizedLevelReplay(Curriculum): + else: + return [self._task_sampler.sample() for _ in range(k)] + +- def update_on_step(self, obs, rew, term, trunc, info, env_id: int = None) -> None: ++ def update_on_step(self, task, obs, rew, term, trunc, info, env_id: int = None) -> None: + """ + Update the curriculum with the current step results from the environment. + """ + assert env_id is not None, "env_id must be provided for PLR updates." ++ if env_id >= self._num_processes: ++ warnings.warn(f"Env index {env_id} is greater than the number of processes {self._num_processes}. Using index {env_id % self._num_processes} instead.") ++ env_id = env_id % self._num_processes ++ + # Update rollouts + self._rollouts.insert_at_index( + env_id, +@@ -266,14 +238,22 @@ class PrioritizedLevelReplay(Curriculum): + obs=np.array([obs]), + ) + ++ # Update task sampler ++ if env_id in self._rollouts.ready_buffers: ++ self._update_sampler(env_id) ++ + def update_on_step_batch( +- self, step_results: List[Tuple[Any, int, bool, bool, Dict]], env_id: int = None ++ self, step_results: List[Tuple[int, Any, int, bool, bool, Dict]], env_id: int = None + ) -> None: + """ + Update the curriculum with a batch of step results from the environment. + """ + assert env_id is not None, "env_id must be provided for PLR updates." +- obs, rews, terms, truncs, infos = step_results ++ if env_id >= self._num_processes: ++ warnings.warn(f"Env index {env_id} is greater than the number of processes {self._num_processes}. Using index {env_id % self._num_processes} instead.") ++ env_id = env_id % self._num_processes ++ ++ tasks, obs, rews, terms, truncs, infos = step_results + self._rollouts.insert_at_index( + env_id, + mask=np.logical_not(np.logical_or(terms, truncs)), +@@ -281,25 +261,19 @@ class PrioritizedLevelReplay(Curriculum): + reward=rews, + obs=obs, + steps=len(rews), ++ task=tasks, + ) + +- def update_task_progress(self, task: Any, success_prob: float, env_id: int = None) -> None: +- """ +- Update the curriculum with a task and its success probability upon +- success or failure. +- """ +- assert env_id is not None, "env_id must be provided for PLR updates." +- self._rollouts.insert_at_index( +- env_id, +- task=task, +- ) + # Update task sampler +- if self._rollouts.should_update: +- if self._task_sampler.requires_value_buffers: +- self._rollouts.compute_returns(self._gamma, self._gae_lambda) +- self._task_sampler.update_with_rollouts(self._rollouts) +- self._rollouts.after_update() +- self._task_sampler.after_update() ++ if env_id in self._rollouts.ready_buffers: ++ self._update_sampler(env_id) ++ ++ def _update_sampler(self, env_id): ++ if self._task_sampler.requires_value_buffers: ++ self._rollouts.compute_returns(self._gamma, self._gae_lambda, env_id) ++ self._task_sampler.update_with_rollouts(self._rollouts, env_id) ++ self._rollouts.after_update(env_id) ++ self._task_sampler.after_update() + + def _enumerate_tasks(self, space): + assert isinstance(space, Discrete) or isinstance(space, MultiDiscrete), f"Unsupported task space {space}: Expected Discrete or MultiDiscrete" +@@ -312,10 +286,10 @@ class PrioritizedLevelReplay(Curriculum): + """ + Log the task distribution to the provided tensorboard writer. + """ +- super().log_metrics(writer, step) ++ # super().log_metrics(writer, step) + metrics = self._task_sampler.metrics() + writer.add_scalar("curriculum/proportion_seen", metrics["proportion_seen"], step) + writer.add_scalar("curriculum/score", metrics["score"], step) +- for task in list(self.task_space.tasks)[:10]: +- writer.add_scalar(f"curriculum/task_{task - 1}_score", metrics["task_scores"][task - 1], step) +- writer.add_scalar(f"curriculum/task_{task - 1}_staleness", metrics["task_staleness"][task - 1], step) ++ # for task in list(self.task_space.tasks)[:10]: ++ # writer.add_scalar(f"curriculum/task_{task - 1}_score", metrics["task_scores"][task - 1], step) ++ # writer.add_scalar(f"curriculum/task_{task - 1}_staleness", metrics["task_staleness"][task - 1], step) +diff --git a/syllabus/curricula/plr/task_sampler.py b/syllabus/curricula/plr/task_sampler.py +index 15ad485..c1e97a1 100644 +--- a/syllabus/curricula/plr/task_sampler.py ++++ b/syllabus/curricula/plr/task_sampler.py +@@ -73,7 +73,7 @@ class TaskSampler: + 'Must provide action space to PLR if using "policy_entropy", "least_confidence", or "min_margin" strategies' + ) + +- def update_with_rollouts(self, rollouts): ++ def update_with_rollouts(self, rollouts, actor_id=None): + if self.strategy == "random": + return + +@@ -93,7 +93,7 @@ class TaskSampler: + else: + raise ValueError(f"Unsupported strategy, {self.strategy}") + +- self._update_with_rollouts(rollouts, score_function) ++ self._update_with_rollouts(rollouts, score_function, actor_index=actor_id) + + def update_task_score(self, actor_index, task_idx, score, num_steps): + score = self._partial_update_task_score(actor_index, task_idx, score, num_steps, done=True) +@@ -165,14 +165,15 @@ class TaskSampler: + def requires_value_buffers(self): + return self.strategy in ["gae", "value_l1", "one_step_td_error"] + +- def _update_with_rollouts(self, rollouts, score_function): ++ def _update_with_rollouts(self, rollouts, score_function, actor_index=None): + tasks = rollouts.tasks + if not self.requires_value_buffers: + policy_logits = rollouts.action_log_dist + done = ~(rollouts.masks > 0) + total_steps, num_actors = rollouts.tasks.shape[:2] + +- for actor_index in range(num_actors): ++ actors = [actor_index] if actor_index is not None else range(num_actors) ++ for actor_index in actors: + done_steps = done[:, actor_index].nonzero()[:total_steps, 0] + start_t = 0 + +diff --git a/syllabus/curricula/sequential.py b/syllabus/curricula/sequential.py +index baa1263..ec3b8b0 100644 +--- a/syllabus/curricula/sequential.py ++++ b/syllabus/curricula/sequential.py +@@ -177,9 +177,9 @@ class SequentialCurriculum(Curriculum): + if self.current_curriculum.requires_episode_updates: + self.current_curriculum.update_on_episode(episode_return, episode_len, episode_task, env_id) + +- def update_on_step(self, obs, rew, term, trunc, info, env_id=None): ++ def update_on_step(self, task, obs, rew, term, trunc, info, env_id=None): + if self.current_curriculum.requires_step_updates: +- self.current_curriculum.update_on_step(obs, rew, term, trunc, info, env_id) ++ self.current_curriculum.update_on_step(task, obs, rew, term, trunc, info, env_id) + + def update_on_step_batch(self, step_results, env_id=None): + if self.current_curriculum.requires_step_updates: +diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py b/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py +index a6d469e..b848d69 100644 +--- a/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py ++++ b/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py +@@ -14,6 +14,7 @@ import gym as openai_gym + import gymnasium as gym + import numpy as np + import procgen # noqa: F401 ++from procgen import ProcgenEnv + import torch + import torch.nn as nn + import torch.optim as optim +@@ -21,10 +22,10 @@ from shimmy.openai_gym_compatibility import GymV21CompatibilityV0 + from torch.utils.tensorboard import SummaryWriter + + from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum +-from syllabus.curricula import DomainRandomization, LearningProgressCurriculum, CentralizedPrioritizedLevelReplay ++from syllabus.curricula import CentralizedPrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum + from syllabus.examples.models import ProcgenAgent + from syllabus.examples.task_wrappers import ProcgenTaskWrapper +-from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize ++from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs + + + def parse_args(): +@@ -46,6 +47,8 @@ def parse_args(): + help="the entity (team) of wandb's project") + parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="weather to capture videos of the agent performances (check out `videos` folder)") ++ parser.add_argument("--logging-dir", type=str, default=".", ++ help="the base directory for logging and wandb storage.") + + # Algorithm specific arguments + parser.add_argument("--env-id", type=str, default="starpilot", +@@ -124,15 +127,15 @@ PROCGEN_RETURN_BOUNDS = { + } + + +-def make_env(env_id, seed, curriculum_components=None, start_level=0, num_levels=1): ++def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): + def thunk(): + env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) + env = GymV21CompatibilityV0(env=env) +- env = ProcgenTaskWrapper(env, env_id, seed=seed) +- if curriculum_components is not None: ++ if curriculum is not None: ++ env = ProcgenTaskWrapper(env, env_id, seed=seed) + env = MultiProcessingSyncWrapper( + env, +- curriculum_components, ++ curriculum.get_components(), + update_on_step=False, + task_space=env.task_space, + ) +@@ -147,36 +150,38 @@ def wrap_vecenv(vecenv): + return vecenv + + +-def level_replay_evaluate( ++def full_level_replay_evaluate( + env_name, + policy, + num_episodes, + device, +- num_levels=0 ++ num_levels=1 # Not used + ): + policy.eval() +- eval_envs = gym.vector.SyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, task_queue, update_queue, num_levels=num_levels) +- for i in range(1) +- ] ++ ++ eval_envs = ProcgenEnv( ++ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=1, start_level=0, distribution_mode="easy", paint_vel_info=False + ) ++ eval_envs = VecExtractDictObs(eval_envs, "rgb") + eval_envs = wrap_vecenv(eval_envs) + +- eval_episode_rewards = [] ++ # Seed environments ++ seeds = [int.from_bytes(os.urandom(3), byteorder="little") for _ in range(num_episodes)] ++ for i, seed in enumerate(seeds): ++ eval_envs.seed(seed, i) ++ + eval_obs, _ = eval_envs.reset() ++ eval_episode_rewards = [-1] * num_episodes + +- while len(eval_episode_rewards) < num_episodes: ++ while -1 in eval_episode_rewards: + with torch.no_grad(): + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + +- eval_obs, _, truncs, terms, infos = eval_envs.step(np.array([eval_action.cpu().numpy()])) +- +- for info in infos: +- if 'episode' in info.keys(): +- eval_episode_rewards.append(info['episode']['r']) ++ eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) ++ for i, info in enumerate(infos): ++ if 'episode' in info.keys() and eval_episode_rewards[i] == -1: ++ eval_episode_rewards[i] = info['episode']['r'] + +- eval_envs.close() + mean_returns = np.mean(eval_episode_rewards) + stddev_returns = np.std(eval_episode_rewards) + env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] +@@ -185,8 +190,7 @@ def level_replay_evaluate( + return mean_returns, stddev_returns, normalized_mean_returns + + +-def fast_level_replay_evaluate( +- eval_envs, ++def level_replay_evaluate( + env_name, + policy, + num_episodes, +@@ -194,9 +198,13 @@ def fast_level_replay_evaluate( + num_levels=0 + ): + policy.eval() +- possible_seeds = np.arange(0, num_levels + 1) +- eval_obs, _ = eval_envs.reset(seed=list(np.random.choice(possible_seeds, size=num_episodes))) + ++ eval_envs = ProcgenEnv( ++ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False ++ ) ++ eval_envs = VecExtractDictObs(eval_envs, "rgb") ++ eval_envs = wrap_vecenv(eval_envs) ++ eval_obs, _ = eval_envs.reset() + eval_episode_rewards = [-1] * num_episodes + + while -1 in eval_episode_rewards: +@@ -231,10 +239,11 @@ if __name__ == "__main__": + name=run_name, + monitor_gym=True, + save_code=True, +- # dir="/fs/nexus-scratch/rsulli/" ++ dir=args.logging_dir + ) +- wandb.run.log_code("./syllabus/examples") +- writer = SummaryWriter(f"./runs/{run_name}") ++ # wandb.run.log_code("./syllabus/examples") ++ ++ writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) + writer.add_text( + "hyperparameters", + "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), +@@ -250,7 +259,7 @@ if __name__ == "__main__": + print("Device:", device) + + # Curriculum setup +- task_queue = update_queue = None ++ curriculum = None + if args.curriculum: + sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") + sample_env = GymV21CompatibilityV0(env=sample_env) +@@ -273,6 +282,16 @@ if __name__ == "__main__": + elif args.curriculum_method == "lp": + print("Using learning progress.") + curriculum = LearningProgressCurriculum(sample_env.task_space) ++ elif args.curriculum_method == "sq": ++ print("Using sequential curriculum.") ++ curricula = [] ++ stopping = [] ++ for i in range(199): ++ curricula.append(i + 1) ++ stopping.append("steps>=50000") ++ curricula.append(list(range(i + 1))) ++ stopping.append("steps>=50000") ++ curriculum = SequentialCurriculum(curricula, stopping[:-1], sample_env.task_space) + else: + raise ValueError(f"Unknown curriculum method {args.curriculum_method}") + curriculum = make_multiprocessing_curriculum(curriculum) +@@ -285,7 +304,7 @@ if __name__ == "__main__": + make_env( + args.env_id, + args.seed + i, +- curriculum_components=curriculum.get_components() if args.curriculum else None, ++ curriculum=curriculum if args.curriculum else None, + num_levels=1 if args.curriculum else 0 + ) + for i in range(args.num_envs) +@@ -293,22 +312,6 @@ if __name__ == "__main__": + ) + envs = wrap_vecenv(envs) + +- test_eval_envs = gym.vector.AsyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, num_levels=0) +- for i in range(args.num_eval_episodes) +- ] +- ) +- test_eval_envs = wrap_vecenv(test_eval_envs) +- +- train_eval_envs = gym.vector.AsyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, num_levels=200) +- for i in range(args.num_eval_episodes) +- ] +- ) +- train_eval_envs = wrap_vecenv(train_eval_envs) +- + assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" + print("Creating agent") + agent = ProcgenAgent( +@@ -369,6 +372,8 @@ if __name__ == "__main__": + print(f"global_step={global_step}, episodic_return={item['episode']['r']}") + writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) ++ if curriculum is not None: ++ curriculum.log_metrics(writer, global_step) + break + + # Syllabus curriculum update +@@ -388,8 +393,6 @@ if __name__ == "__main__": + }, + } + curriculum.update(update) +- #if args.curriculum: +- # curriculum.log_metrics(writer, global_step) + + # bootstrap value if not done + with torch.no_grad(): +@@ -487,8 +490,18 @@ if __name__ == "__main__": + explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y + + # Evaluate agent +- mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = fast_level_replay_evaluate(test_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=0) +- mean_train_returns, stddev_train_returns, normalized_mean_train_returns = fast_level_replay_evaluate(train_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=200) ++ mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 ++ ) ++ full_mean_eval_returns, full_stddev_eval_returns, full_normalized_mean_eval_returns = full_level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 ++ ) ++ mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 ++ ) ++ full_mean_train_returns, full_stddev_train_returns, full_normalized_mean_train_returns = full_level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 ++ ) + + # TRY NOT TO MODIFY: record rewards for plotting purposes + writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) +@@ -502,12 +515,21 @@ if __name__ == "__main__": + writer.add_scalar("losses/explained_variance", explained_var, global_step) + print("SPS:", int(global_step / (time.time() - start_time))) + writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) ++ + writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) +- writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/stddev_eval_return", stddev_eval_returns, global_step) ++ writer.add_scalar("test_eval/full_mean_episode_return", full_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/full_normalized_mean_eval_return", full_normalized_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/full_stddev_eval_return", full_stddev_eval_returns, global_step) ++ + writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) +- writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) ++ writer.add_scalar("train_eval/stddev_train_return", stddev_train_returns, global_step) ++ writer.add_scalar("train_eval/full_mean_episode_return", full_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/full_normalized_mean_train_return", full_normalized_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/full_stddev_train_return", full_stddev_train_returns, global_step) ++ + writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) + + envs.close() +diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py +index e13c22e..dabcd50 100644 +--- a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py ++++ b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py +@@ -14,6 +14,7 @@ import gym as openai_gym + import gymnasium as gym + import numpy as np + import procgen # noqa: F401 ++from procgen import ProcgenEnv + import torch + import torch.nn as nn + import torch.optim as optim +@@ -24,7 +25,7 @@ from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curri + from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum + from syllabus.examples.models import ProcgenAgent + from syllabus.examples.task_wrappers import ProcgenTaskWrapper +-from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize ++from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs + + + def parse_args(): +@@ -126,18 +127,17 @@ PROCGEN_RETURN_BOUNDS = { + } + + +-def make_env(env_id, seed, curriculum_components=None, start_level=0, num_levels=1): ++def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): + def thunk(): + env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) + env = GymV21CompatibilityV0(env=env) +- env = ProcgenTaskWrapper(env, env_id, seed=seed) +- if curriculum_components is not None: ++ if curriculum is not None: ++ env = ProcgenTaskWrapper(env, env_id, seed=seed) + env = MultiProcessingSyncWrapper( + env, +- curriculum_components, +- update_on_step=False, ++ curriculum.get_components(), ++ update_on_step=curriculum.requires_step_updates, + task_space=env.task_space, +- buffer_size=4, + ) + return env + return thunk +@@ -150,36 +150,38 @@ def wrap_vecenv(vecenv): + return vecenv + + +-def level_replay_evaluate( ++def full_level_replay_evaluate( + env_name, + policy, + num_episodes, + device, +- num_levels=0 ++ num_levels=1 # Not used + ): + policy.eval() +- eval_envs = gym.vector.SyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, task_queue, update_queue, num_levels=num_levels) +- for i in range(1) +- ] ++ ++ eval_envs = ProcgenEnv( ++ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=1, start_level=0, distribution_mode="easy", paint_vel_info=False + ) ++ eval_envs = VecExtractDictObs(eval_envs, "rgb") + eval_envs = wrap_vecenv(eval_envs) + +- eval_episode_rewards = [] ++ # Seed environments ++ seeds = [int.from_bytes(os.urandom(3), byteorder="little") for _ in range(num_episodes)] ++ for i, seed in enumerate(seeds): ++ eval_envs.seed(seed, i) ++ + eval_obs, _ = eval_envs.reset() ++ eval_episode_rewards = [-1] * num_episodes + +- while len(eval_episode_rewards) < num_episodes: ++ while -1 in eval_episode_rewards: + with torch.no_grad(): + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + +- eval_obs, _, truncs, terms, infos = eval_envs.step(np.array([eval_action.cpu().numpy()])) +- +- for info in infos: +- if 'episode' in info.keys(): +- eval_episode_rewards.append(info['episode']['r']) ++ eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) ++ for i, info in enumerate(infos): ++ if 'episode' in info.keys() and eval_episode_rewards[i] == -1: ++ eval_episode_rewards[i] = info['episode']['r'] + +- eval_envs.close() + mean_returns = np.mean(eval_episode_rewards) + stddev_returns = np.std(eval_episode_rewards) + env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] +@@ -188,8 +190,7 @@ def level_replay_evaluate( + return mean_returns, stddev_returns, normalized_mean_returns + + +-def fast_level_replay_evaluate( +- eval_envs, ++def level_replay_evaluate( + env_name, + policy, + num_episodes, +@@ -198,15 +199,12 @@ def fast_level_replay_evaluate( + ): + policy.eval() + +- # Choose evaluation seeds +- if num_levels == 0: +- seeds = np.random.randint(0, 2 ** 16 - 1, size=num_episodes) +- else: +- seeds = np.random.choice(np.arange(0, num_levels), size=num_episodes) +- +- seed_envs = [(int(seed), env) for seed, env in zip(seeds, range(num_episodes))] +- eval_obs, _ = eval_envs.reset(seed=seed_envs) +- ++ eval_envs = ProcgenEnv( ++ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False ++ ) ++ eval_envs = VecExtractDictObs(eval_envs, "rgb") ++ eval_envs = wrap_vecenv(eval_envs) ++ eval_obs, _ = eval_envs.reset() + eval_episode_rewards = [-1] * num_episodes + + while -1 in eval_episode_rewards: +@@ -251,7 +249,7 @@ if __name__ == "__main__": + save_code=True, + dir=args.logging_dir + ) +- wandb.run.log_code(os.path.join(args.logging_dir, "/syllabus/examples")) ++ # wandb.run.log_code("./syllabus/examples") + + writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) + writer.add_text( +@@ -316,7 +314,7 @@ if __name__ == "__main__": + make_env( + args.env_id, + args.seed + i, +- curriculum_components=curriculum.get_components() if args.curriculum else None, ++ curriculum=curriculum if args.curriculum else None, + num_levels=1 if args.curriculum else 0 + ) + for i in range(args.num_envs) +@@ -324,22 +322,6 @@ if __name__ == "__main__": + ) + envs = wrap_vecenv(envs) + +- test_eval_envs = gym.vector.AsyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, num_levels=0) +- for i in range(args.num_eval_episodes) +- ] +- ) +- test_eval_envs = wrap_vecenv(test_eval_envs) +- +- train_eval_envs = gym.vector.AsyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, num_levels=200) +- for i in range(args.num_eval_episodes) +- ] +- ) +- train_eval_envs = wrap_vecenv(train_eval_envs) +- + assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" + print("Creating agent") + agent = ProcgenAgent( +@@ -500,8 +482,18 @@ if __name__ == "__main__": + explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y + + # Evaluate agent +- mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = fast_level_replay_evaluate(test_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=0) +- mean_train_returns, stddev_train_returns, normalized_mean_train_returns = fast_level_replay_evaluate(train_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=200) ++ mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 ++ ) ++ full_mean_eval_returns, full_stddev_eval_returns, full_normalized_mean_eval_returns = full_level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 ++ ) ++ mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 ++ ) ++ full_mean_train_returns, full_stddev_train_returns, full_normalized_mean_train_returns = full_level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 ++ ) + + # TRY NOT TO MODIFY: record rewards for plotting purposes + writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) +@@ -515,12 +507,21 @@ if __name__ == "__main__": + writer.add_scalar("losses/explained_variance", explained_var, global_step) + print("SPS:", int(global_step / (time.time() - start_time))) + writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) ++ + writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) +- writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/stddev_eval_return", stddev_eval_returns, global_step) ++ writer.add_scalar("test_eval/full_mean_episode_return", full_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/full_normalized_mean_eval_return", full_normalized_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/full_stddev_eval_return", full_stddev_eval_returns, global_step) ++ + writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) +- writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) ++ writer.add_scalar("train_eval/stddev_train_return", stddev_train_returns, global_step) ++ writer.add_scalar("train_eval/full_mean_episode_return", full_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/full_normalized_mean_train_return", full_normalized_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/full_stddev_train_return", full_stddev_train_returns, global_step) ++ + writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) + + envs.close() +diff --git a/syllabus/examples/utils/vecenv.py b/syllabus/examples/utils/vecenv.py +index 6e5a0a9..af3b187 100644 +--- a/syllabus/examples/utils/vecenv.py ++++ b/syllabus/examples/utils/vecenv.py +@@ -1,7 +1,6 @@ + import time + from collections import deque + +-import gym + import numpy as np + + +@@ -154,12 +153,20 @@ class VecEnvObservationWrapper(VecEnvWrapper): + pass + + def reset(self): +- obs, infos = self.venv.reset() ++ outputs = self.venv.reset() ++ if len(outputs) == 2: ++ obs, infos = outputs ++ else: ++ obs, infos = outputs, {} + return self.process(obs), infos + + def step_wait(self): +- print(self.venv) +- obs, rews, terms, truncs, infos = self.venv.step_wait() ++ env_outputs = self.venv.step_wait() ++ if len(env_outputs) == 4: ++ obs, rews, terms, infos = env_outputs ++ truncs = np.zeros_like(terms) ++ else: ++ obs, rews, terms, truncs, infos = env_outputs + return self.process(obs), rews, terms, truncs, infos + + +@@ -209,7 +216,10 @@ class VecNormalize(VecEnvWrapper): + + def reset(self, seed=None): + self.ret = np.zeros(self.num_envs) +- obs, infos = self.venv.reset(seed=seed) ++ if seed is not None: ++ obs, infos = self.venv.reset(seed=seed) ++ else: ++ obs, infos = self.venv.reset() + return self._obfilt(obs), infos + + +@@ -228,7 +238,10 @@ class VecMonitor(VecEnvWrapper): + self.eplen_buf = deque([], maxlen=keep_buf) + + def reset(self, seed=None): +- obs, infos = self.venv.reset(seed=seed) ++ if seed is not None: ++ obs, infos = self.venv.reset(seed=seed) ++ else: ++ obs, infos = self.venv.reset() + self.eprets = np.zeros(self.num_envs, 'f') + self.eplens = np.zeros(self.num_envs, 'i') + return obs, infos +@@ -239,7 +252,8 @@ class VecMonitor(VecEnvWrapper): + self.eprets += rews + self.eplens += 1 + # Convert dict of lists to list of dicts +- infos = [dict(zip(infos, t)) for t in zip(*infos.values())] ++ if isinstance(infos, dict): ++ infos = [dict(zip(infos, t)) for t in zip(*infos.values())] + newinfos = list(infos[:]) + for i in range(len(dones)): + if dones[i]: +diff --git a/syllabus/task_space/task_space.py b/syllabus/task_space/task_space.py +index 316e2f2..1ef674b 100644 +--- a/syllabus/task_space/task_space.py ++++ b/syllabus/task_space/task_space.py +@@ -7,20 +7,53 @@ from gymnasium.spaces import Box, Dict, Discrete, MultiBinary, MultiDiscrete, Sp + + class TaskSpace(): + def __init__(self, gym_space: Union[Space, int], tasks=None): +- if isinstance(gym_space, int): +- # Syntactic sugar for discrete space +- gym_space = Discrete(gym_space) ++ ++ if not isinstance(gym_space, Space): ++ gym_space = self._create_gym_space(gym_space) + + self.gym_space = gym_space + +- # Autogenerate task names for discrete spaces +- if isinstance(gym_space, Discrete): +- if tasks is None: +- tasks = range(gym_space.n) ++ # Autogenerate task names ++ if tasks is None: ++ tasks = self._generate_task_names(gym_space) + + self._tasks = set(tasks) if tasks is not None else None + self._encoder, self._decoder = self._make_task_encoder(gym_space, tasks) + ++ def _create_gym_space(self, gym_space): ++ if isinstance(gym_space, int): ++ # Syntactic sugar for discrete space ++ gym_space = Discrete(gym_space) ++ elif isinstance(gym_space, tuple): ++ # Syntactic sugar for discrete space ++ gym_space = MultiDiscrete(gym_space) ++ elif isinstance(gym_space, list): ++ # Syntactic sugar for tuple space ++ spaces = [] ++ for i, value in enumerate(gym_space): ++ spaces[i] = self._create_gym_space(value) ++ gym_space = Tuple(spaces) ++ elif isinstance(gym_space, dict): ++ # Syntactic sugar for dict space ++ spaces = {} ++ for key, value in gym_space.items(): ++ spaces[key] = self._create_gym_space(value) ++ gym_space = Dict(spaces) ++ return gym_space ++ ++ def _generate_task_names(self, gym_space): ++ if isinstance(gym_space, Discrete): ++ tasks = tuple(range(gym_space.n)) ++ elif isinstance(gym_space, MultiDiscrete): ++ tasks = [tuple(range(dim)) for dim in gym_space.nvec] ++ elif isinstance(gym_space, Tuple): ++ tasks = [self._generate_task_names(value) for value in gym_space.spaces] ++ elif isinstance(gym_space, Dict): ++ tasks = {key: tuple(self._generate_task_names(value)) for key, value in gym_space.spaces.items()} ++ else: ++ tasks = None ++ return tasks ++ + def _make_task_encoder(self, space, tasks): + if isinstance(space, Discrete): + assert space.n == len(tasks), f"Number of tasks ({space.n}) must match number of discrete options ({len(tasks)})" +@@ -28,14 +61,46 @@ class TaskSpace(): + self._decode_map = {i: task for i, task in enumerate(tasks)} + encoder = lambda task: self._encode_map[task] if task in self._encode_map else None + decoder = lambda task: self._decode_map[task] if task in self._decode_map else None ++ ++ elif isinstance(space, Box): ++ encoder = lambda task: task if space.contains(np.asarray(task, dtype=space.dtype)) else None ++ decoder = lambda task: task if space.contains(np.asarray(task, dtype=space.dtype)) else None + elif isinstance(space, Tuple): +- for i, task in enumerate(tasks): +- assert self.count_tasks(space.spaces[i]) == len(task), "Each task must have number of components equal to Tuple space length. Got {len(task)} components and space length {self.count_tasks(space.spaces[i])}." ++ ++ assert len(space.spaces) == len(tasks), f"Number of task ({len(space.spaces)})must match options in Tuple ({len(tasks)})" + results = [list(self._make_task_encoder(s, t)) for (s, t) in zip(space.spaces, tasks)] + encoders = [r[0] for r in results] + decoders = [r[1] for r in results] + encoder = lambda task: [e(t) for e, t in zip(encoders, task)] + decoder = lambda task: [d(t) for d, t in zip(decoders, task)] ++ ++ elif isinstance(space, MultiDiscrete): ++ assert len(space.nvec) == len(tasks), f"Number of steps in a tasks ({len(space.nvec)}) must match number of discrete options ({len(tasks)})" ++ ++ combinations = [p for p in itertools.product(*tasks)] ++ encode_map = {task: i for i, task in enumerate(combinations)} ++ decode_map = {i: task for i, task in enumerate(combinations)} ++ ++ encoder = lambda task: encode_map[task] if task in encode_map else None ++ decoder = lambda task: decode_map[task] if task in decode_map else None ++ ++ elif isinstance(space, Dict): ++ ++ def helper(task, spaces, tasks, action="encode"): ++ # Iteratively encodes or decodes each space in the dictionary ++ output = {} ++ if (isinstance(spaces, dict) or isinstance(spaces, Dict)): ++ for key, value in spaces.items(): ++ if (isinstance(value, dict) or isinstance(value, Dict)): ++ temp = helper(task[key], value, tasks[key], action) ++ output.update({key: temp}) ++ else: ++ encoder, decoder = self._make_task_encoder(value, tasks[key]) ++ output[key] = encoder(task[key]) if action == "encode" else decoder(task[key]) ++ return output ++ ++ encoder = lambda task: helper(task, space.spaces, tasks, "encode") ++ decoder = lambda task: helper(task, space.spaces, tasks, "decode") + else: + encoder = lambda task: task + decoder = lambda task: task +@@ -152,6 +217,7 @@ class TaskSpace(): + return Discrete(self.gym_space.n + amount) + + def sample(self): ++ assert isinstance(self.gym_space, Discrete) or isinstance(self.gym_space, Box) or isinstance(self.gym_space, Dict) or isinstance(self.gym_space, Tuple) + return self.decode(self.gym_space.sample()) + + def list_tasks(self): +diff --git a/syllabus/task_space/test_task_space.py b/syllabus/task_space/test_task_space.py +index 0ec6b4e..109d0a7 100644 +--- a/syllabus/task_space/test_task_space.py ++++ b/syllabus/task_space/test_task_space.py +@@ -2,33 +2,148 @@ import gymnasium as gym + from syllabus.task_space import TaskSpace + + if __name__ == "__main__": ++ # Discrete Tests + task_space = TaskSpace(gym.spaces.Discrete(3), ["a", "b", "c"]) ++ + assert task_space.encode("a") == 0, f"Expected 0, got {task_space.encode('a')}" + assert task_space.encode("b") == 1, f"Expected 1, got {task_space.encode('b')}" + assert task_space.encode("c") == 2, f"Expected 2, got {task_space.encode('c')}" +- assert task_space.encode("d") == None, f"Expected None, got {task_space.encode('d')}" ++ assert task_space.encode("d") is None, f"Expected None, got {task_space.encode('d')}" + + assert task_space.decode(0) == "a", f"Expected a, got {task_space.decode(0)}" + assert task_space.decode(1) == "b", f"Expected b, got {task_space.decode(1)}" + assert task_space.decode(2) == "c", f"Expected c, got {task_space.decode(2)}" +- assert task_space.decode(3) == None, f"Expected None, got {task_space.decode(3)}" ++ assert task_space.decode(3) is None, f"Expected None, got {task_space.decode(3)}" + print("Discrete tests passed!") + ++ # MultiDiscrete Tests ++ task_space = TaskSpace(gym.spaces.MultiDiscrete([3, 2]), [("a", "b", "c"), (1, 0)]) ++ ++ assert task_space.encode(('a', 1)) == 0, f"Expected 0, got {task_space.encode(('a', 1))}" ++ assert task_space.encode(('b', 0)) == 3, f"Expected 3, got {task_space.encode(('b', 0))}" ++ assert task_space.encode(('c', 1)) == 4, f"Expected 4, got {task_space.encode(('c', 1))}" ++ ++ assert task_space.decode(3) == ('b', 0), f"Expected ('b', 0), got {task_space.decode(3)}" ++ assert task_space.decode(5) == ('c', 0), f"Expected ('c', 0), got {task_space.decode(5)}" ++ print("MultiDiscrete tests passed!") ++ ++ # Box Tests + task_space = TaskSpace(gym.spaces.Box(low=0, high=1, shape=(2,)), [(0, 0), (0, 1), (1, 0), (1, 1)]) ++ + assert task_space.encode([0.0, 0.0]) == [0.0, 0.0], f"Expected [0.0, 0.0], got {task_space.encode([0.0, 0.0])}" + assert task_space.encode([0.0, 0.1]) == [0.0, 0.1], f"Expected [0.0, 0.1], got {task_space.encode([0.0, 0.1])}" + assert task_space.encode([0.1, 0.1]) == [0.1, 0.1], f"Expected [0.1, 0.1], got {task_space.encode([0.1, 0.1])}" + assert task_space.encode([1.0, 0.1]) == [1.0, 0.1], f"Expected [1.0, 0.1], got {task_space.encode([1.0, 0.1])}" + assert task_space.encode([1.0, 1.0]) == [1.0, 1.0], f"Expected [1.0, 1.0], got {task_space.encode([1.0, 1.0])}" +- assert task_space.encode([1.2, 1.0]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" +- assert task_space.encode([1.0, 1.2]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" +- assert task_space.encode([-0.1, 1.0]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" ++ assert task_space.encode([1.2, 1.0]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" ++ assert task_space.encode([1.0, 1.2]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" ++ assert task_space.encode([-0.1, 1.0]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" + + assert task_space.decode([1.0, 1.0]) == [1.0, 1.0], f"Expected [1.0, 1.0], got {task_space.decode([1.0, 1.0])}" + assert task_space.decode([0.1, 0.1]) == [0.1, 0.1], f"Expected [0.1, 0.1], got {task_space.decode([0.1, 0.1])}" +- assert task_space.decode([-0.1, 1.0]) == None, f"Expected None, got {task_space.decode([1.2, 1.0])}" ++ assert task_space.decode([-0.1, 1.0]) is None, f"Expected None, got {task_space.decode([1.2, 1.0])}" + print("Box tests passed!") + ++ # Tuple Tests ++ task_spaces = (gym.spaces.MultiDiscrete([3, 2]), gym.spaces.Discrete(3)) ++ task_names = ((("a", "b", "c"), (1, 0)), ("X", "Y", "Z")) ++ task_space = TaskSpace(gym.spaces.Tuple(task_spaces), task_names) ++ ++ assert task_space.encode((('a', 0), 'Y')) == [1, 1], f"Expected 0, got {task_space.encode((('a', 1),'Y'))}" ++ assert task_space.decode([0, 1]) == [('a', 1), 'Y'], f"Expected 0, got {task_space.decode([0, 1])}" ++ print("Tuple tests passed!") ++ ++ # Dictionary Tests ++ task_spaces = gym.spaces.Dict({ ++ "ext_controller": gym.spaces.MultiDiscrete([5, 2, 2]), ++ "inner_state": gym.spaces.Dict( ++ { ++ "charge": gym.spaces.Discrete(10), ++ "system_checks": gym.spaces.Tuple((gym.spaces.MultiDiscrete([3, 2]), gym.spaces.Discrete(3))), ++ "job_status": gym.spaces.Dict( ++ { ++ "task": gym.spaces.Discrete(5), ++ "progress": gym.spaces.Box(low=0, high=1, shape=(2,)), ++ } ++ ), ++ } ++ ), ++ }) ++ task_names = { ++ "ext_controller": [("a", "b", "c", "d", "e"), (1, 0), ("X", "Y")], ++ "inner_state": { ++ "charge": [0, 1, 13, 3, 94, 35, 6, 37, 8, 9], ++ "system_checks": ((("a", "b", "c"), (1, 0)), ("X", "Y", "Z")), ++ "job_status": { ++ "task": ["A", "B", "C", "D", "E"], ++ "progress": [(0, 0), (0, 1), (1, 0), (1, 1)], ++ } ++ } ++ } ++ task_space = TaskSpace(task_spaces, task_names) ++ ++ test_val = { ++ "ext_controller": ('b', 1, 'X'), ++ 'inner_state': { ++ 'charge': 1, ++ 'system_checks': [('a', 0), 'Y'], ++ 'job_status': {'task': 'C', 'progress': [0.0, 0.0]} ++ } ++ } ++ decode_val = { ++ "ext_controller": 4, ++ "inner_state": { ++ "charge": 1, ++ "system_checks": [1, 1], ++ "job_status": {"progress": [0.0, 0.0], "task": 2}, ++ }, ++ } ++ ++ assert task_space.encode(test_val) == decode_val, f"Expected {decode_val}, \n but got {task_space.encode(test_val)}" ++ assert task_space.decode(decode_val) == test_val, f"Expected {test_val}, \n but got {task_space.decode(decode_val)}" ++ ++ test_val_2 = { ++ "ext_controller": ("e", 1, "Y"), ++ "inner_state": { ++ "charge": 37, ++ "system_checks": [("b", 0), "Z"], ++ "job_status": {"progress": [0.0, 0.1], "task": "D"}, ++ }, ++ } ++ decode_val_2 = { ++ "ext_controller": 17, ++ "inner_state": { ++ "charge": 7, ++ "system_checks": [3, 2], ++ "job_status": {"progress": [0.0, 0.1], "task": 3}, ++ }, ++ } ++ ++ assert task_space.encode(test_val_2) == decode_val_2, f"Expected {decode_val_2}, \n but got {task_space.encode(test_val_2)}" ++ assert task_space.decode(decode_val_2) == test_val_2, f"Expected {test_val_2}, \n but got {task_space.decode(decode_val_2)}" ++ ++ test_val_3 = { ++ "ext_controller": ("e", 1, "X"), ++ "inner_state": { ++ "charge": 8, ++ "system_checks": [("c", 0), "X"], ++ "job_status": {"progress": [0.5, 0.1], "task": "E"}, ++ }, ++ } ++ decode_val_3 = { ++ "ext_controller": 16, ++ "inner_state": { ++ "charge": 8, ++ "system_checks": [5, 0], ++ "job_status": {"progress": [0.5, 0.1], "task": 4}, ++ }, ++ } ++ ++ assert task_space.encode(test_val_3) == decode_val_3, f"Expected {decode_val_3}, \n but got {task_space.encode(test_val_3)}" ++ assert task_space.decode(decode_val_3) == test_val_3, f"Expected {test_val_3}, \n but got {task_space.decode(decode_val_3)}" ++ ++ print("Dictionary tests passed!") ++ + # Test syntactic sugar + task_space = TaskSpace(3) + assert task_space.encode(0) == 0, f"Expected 0, got {task_space.encode(0)}" +@@ -36,4 +151,32 @@ if __name__ == "__main__": + assert task_space.encode(2) == 2, f"Expected 2, got {task_space.encode(2)}" + assert task_space.encode(3) is None, f"Expected None, got {task_space.encode(3)}" + ++ task_space = TaskSpace((2, 4)) ++ assert task_space.encode((0, 0)) == 0, f"Expected 0, got {task_space.encode((0, 0))}" ++ assert task_space.encode((0, 1)) == 1, f"Expected 1, got {task_space.encode((0, 1))}" ++ assert task_space.encode((1, 0)) == 4, f"Expected 2, got {task_space.encode((1, 0))}" ++ assert task_space.encode((3, 3)) is None, f"Expected None, got {task_space.encode((3, 3))}" ++ ++ task_space = TaskSpace((2, 4)) ++ assert task_space.encode((0, 0)) == 0, f"Expected 0, got {task_space.encode((0, 0))}" ++ assert task_space.encode((0, 1)) == 1, f"Expected 1, got {task_space.encode((0, 1))}" ++ assert task_space.encode((1, 0)) == 4, f"Expected 2, got {task_space.encode((1, 0))}" ++ assert task_space.encode((3, 3)) is None, f"Expected None, got {task_space.encode((3, 3))}" ++ ++ task_space = TaskSpace({"map": 5, "level": (4, 10), "difficulty": 3}) ++ ++ encoding = task_space.encode({"map": 0, "level": (0, 0), "difficulty": 0}) ++ expected = {"map": 0, "level": 0, "difficulty": 0} ++ ++ encoding = task_space.encode({"map": 4, "level": (3, 9), "difficulty": 2}) ++ expected = {"map": 4, "level": 39, "difficulty": 2} ++ assert encoding == expected, f"Expected {expected}, got {encoding}" ++ ++ encoding = task_space.encode({"map": 2, "level": (2, 0), "difficulty": 1}) ++ expected = {"map": 2, "level": 20, "difficulty": 1} ++ assert encoding == expected, f"Expected {expected}, got {encoding}" ++ ++ encoding = task_space.encode({"map": 5, "level": (2, 11), "difficulty": -1}) ++ expected = {"map": None, "level": None, "difficulty": None} ++ assert encoding == expected, f"Expected {expected}, got {encoding}" + print("All tests passed!") +diff --git a/syllabus/tests/utils.py b/syllabus/tests/utils.py +index 314a29c..98bac82 100644 +--- a/syllabus/tests/utils.py ++++ b/syllabus/tests/utils.py +@@ -57,7 +57,7 @@ def run_episode(env, new_task=None, curriculum=None, env_id=0): + action = env.action_space.sample() + obs, rew, term, trunc, info = env.step(action) + if curriculum and curriculum.requires_step_updates: +- curriculum.update_on_step(obs, rew, term, trunc, info, env_id=env_id) ++ curriculum.update_on_step(env.task_space.encode(env.task), obs, rew, term, trunc, info, env_id=env_id) + curriculum.update_task_progress(env.task_space.encode(env.task), info["task_completion"], env_id=env_id) + ep_rew += rew + ep_len += 1 +@@ -87,7 +87,7 @@ def run_set_length(env, curriculum=None, episodes=None, steps=None, env_id=0, en + action = env.action_space.sample() + obs, rew, term, trunc, info = env.step(action) + if curriculum and curriculum.requires_step_updates: +- curriculum.update_on_step(obs, rew, term, trunc, info, env_id=env_id) ++ curriculum.update_on_step(env.task_space.encode(env.task), obs, rew, term, trunc, info, env_id=env_id) + curriculum.update_task_progress(env.task_space.encode(env.task), info["task_completion"], env_id=env_id) + ep_rew += rew + ep_len += 1 +diff --git a/tests/multiprocessing_smoke_tests.py b/tests/multiprocessing_smoke_tests.py +index 9db9f47..b788179 100644 +--- a/tests/multiprocessing_smoke_tests.py ++++ b/tests/multiprocessing_smoke_tests.py +@@ -21,23 +21,23 @@ nethack_env = create_nethack_env() + cartpole_env = create_cartpole_env() + + curricula = [ +- (NoopCurriculum, create_nethack_env, (NetHackScore, nethack_env.task_space), {}), +- (DomainRandomization, create_nethack_env, (nethack_env.task_space,), {}), +- # (LearningProgressCurriculum, create_nethack_env, (nethack_env.task_space,), {}), +- (CentralizedPrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space,), {"device": "cpu", "suppress_usage_warnings": True, "num_processes": N_ENVS}), +- (PrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space, nethack_env.observation_space), { +- "get_value": get_test_values, +- "device": "cpu", +- "num_processes": N_ENVS, +- "num_steps": 2048 +- }), +- (SimpleBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), {}), +- (AnnealingBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), { +- 'start_values': [-0.02, 0.02], +- 'end_values': [-0.3, 0.3], +- 'total_steps': [10] +- }), +- (SequentialCurriculum, create_nethack_env, ([CentralizedPrioritizedLevelReplay(nethack_env.task_space, device="cpu", suppress_usage_warnings=True, num_processes=N_ENVS), PrioritizedLevelReplay(nethack_env.task_space, nethack_env.observation_space, get_value=get_test_values, device="cpu", num_processes=N_ENVS, num_steps=2048), NetHackScore, [NetHackScout, NetHackStaircase]], ["steps>1000", "episodes>=50", "tasks>20"], nethack_env.task_space), {}), ++ (NoopCurriculum, create_nethack_env, (NetHackScore, nethack_env.task_space), {}), ++ (DomainRandomization, create_nethack_env, (nethack_env.task_space,), {}), ++ # (LearningProgressCurriculum, create_nethack_env, (nethack_env.task_space,), {}), ++ (CentralizedPrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space,), {"device": "cpu", "suppress_usage_warnings": True, "num_processes": N_ENVS}), ++ (PrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space, nethack_env.observation_space), { ++ "get_value": get_test_values, ++ "device": "cpu", ++ "num_processes": N_ENVS, ++ "num_steps": 2048 ++ }), ++ (SimpleBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), {}), ++ (AnnealingBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), { ++ 'start_values': [-0.02, 0.02], ++ 'end_values': [-0.3, 0.3], ++ 'total_steps': [10] ++ }), ++ (SequentialCurriculum, create_nethack_env, ([CentralizedPrioritizedLevelReplay(nethack_env.task_space, device="cpu", suppress_usage_warnings=True, num_processes=N_ENVS), PrioritizedLevelReplay(nethack_env.task_space, nethack_env.observation_space, get_value=get_test_values, device="cpu", num_processes=N_ENVS, num_steps=2048), NetHackScore, [NetHackScout, NetHackStaircase]], ["steps>1000", "episodes>=50", "tasks>20"], nethack_env.task_space), {}), + ] + + test_names = [curriculum_args[0].__name__ for curriculum_args in curricula] diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/wandb-metadata.json b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/wandb-metadata.json new file mode 100644 index 00000000..df99c501 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/wandb-metadata.json @@ -0,0 +1,167 @@ +{ + "os": "Linux-3.10.0-1160.11.1.el7.x86_64-x86_64-with-glibc2.10", + "python": "3.8.5", + "heartbeatAt": "2024-04-23T02:52:49.357411", + "startedAt": "2024-04-23T02:52:48.774085", + "docker": null, + "cuda": "10.1.243", + "args": [ + "--curriculum", + "True", + "--track", + "True", + "--env-id", + "bigfish" + ], + "state": "running", + "program": "cleanrl_procgen_plr.py", + "codePathLocal": "cleanrl_procgen_plr.py", + "codePath": "syllabus/examples/training_scripts/cleanrl_procgen_plr.py", + "git": { + "remote": "https://github.com/RoseyGreenBlue/Syllabus.git", + "commit": "63dc8f62e4d9d567eb92bb2f6c2bb186a0dc8ffb" + }, + "email": "djhaayusv04@gmail.com", + "root": "/data/averma/MARL/Syllabus", + "host": "f411843fc70b", + "username": "root", + "executable": "/home/user/miniconda/envs/test2_py/bin/python", + "cpu_count": 12, + "cpu_count_logical": 24, + "cpu_freq": { + "current": 1328.0466666666666, + "min": 1200.0, + "max": 3700.0 + }, + "cpu_freq_per_core": [ + { + "current": 1200.292, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1223.12, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.5, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1202.368, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1493.518, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1231.213, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.5, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1237.023, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1261.096, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1385.815, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1214.819, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1288.073, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1245.947, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1352.197, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1891.748, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1248.229, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1375.024, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1279.772, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 2322.143, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1231.835, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1364.648, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1199.877, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.915, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1284.96, + "min": 1200.0, + "max": 3700.0 + } + ], + "disk": { + "/": { + "total": 5952.626953125, + "used": 988.7798233032227 + } + }, + "memory": { + "total": 251.63711166381836 + } +} diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/wandb-summary.json b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/wandb-summary.json new file mode 100644 index 00000000..b7bc9960 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/wandb-summary.json @@ -0,0 +1 @@ +{"global_step": 32704, "_timestamp": 1713840830.7411258, "_runtime": 61.95500087738037, "_step": 194, "charts/episodic_return": 0.0, "charts/episodic_length": 20.0, "curriculum/proportion_seen": 0.2800000011920929, "curriculum/score": 0.0, "charts/learning_rate": 0.0005000000237487257, "charts/episode_returns": 1.5, "losses/value_loss": 0.5320312976837158, "losses/policy_loss": -0.0004545035772025585, "losses/entropy": 2.707151412963867, "losses/old_approx_kl": 0.00045591534581035376, "losses/approx_kl": 0.0008768583065830171, "losses/clipfrac": 0.0, "losses/explained_variance": 0.004385650157928467, "charts/SPS": 383.0, "test_eval/mean_episode_return": 0.5, "test_eval/normalized_mean_eval_return": -0.012820512987673283, "test_eval/stddev_eval_return": 0.9219544529914856, "test_eval/full_mean_episode_return": 0.699999988079071, "test_eval/full_normalized_mean_eval_return": -0.007692308165132999, "test_eval/full_stddev_eval_return": 0.6403123736381531, "train_eval/mean_episode_return": 1.100000023841858, "train_eval/normalized_mean_train_return": 0.0025641031097620726, "train_eval/stddev_train_return": 2.662705183029175, "train_eval/full_mean_episode_return": 0.699999988079071, "train_eval/full_normalized_mean_train_return": -0.007692308165132999, "train_eval/full_stddev_train_return": 0.7810249924659729, "curriculum/completed_episodes": 103.0, "_wandb": {"runtime": 77}} \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/run-x38taylu.wandb b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/run-x38taylu.wandb new file mode 100644 index 0000000000000000000000000000000000000000..a0ecdb3ffbbb4ce7a0d34b7da4ae641a3f4942b3 GIT binary patch literal 97707 zcmcg#2YeLO)^|fdTtOi|Tb?DNsGx3kW_PDx$KLhXeQI2i4J4+SLXn5YfDn2_N>ECu zh9boRp@>Y5vzw4Rvoo8_PQLu!Q(?1n=YP(*r{8lsy?A5i z-*ZyiTM|8-JKoEAWpAQO;#?kgLZVBS<7e8=Ti(iU zZ`so&WqW(2$neNLxn5nwOmUEHX3KL=if)UTW{%I^+8!C7li;oW<3;x7*_qz>H*5cV zojt0DH!YzHJwDkNpB-4%-mdPA?3dbG`G3WimYtN{3*Su3NlCF^Zf_m$&B)Hl^u={g zO7NxU*_tPMQ?h*aOY9e>q<8I_l-4yaAt^J@)~ubQo&8_-7QVFZaY=aP3)*!_>YA98 z)vX;~{o?HOY;Q_jc2cS@E8CZmm1k?odz{LT{R(?)V9!jWCuMrGL5&tp2R`!HFSfT# z%SnyH6Y+%z!EJAAzkvQC@Q7x-N5E@b;7v>O;o&KGiNL$HvN!AM#e@8`x!B&kt2Z?j z#A)ty$g+&@*zs{(iZ`{3LNffV2ly|hXA@z&c(dcX`LL!?toRl1V*T3$+?QPHfWn8K+yIVS#)V!S{XII$UDunJ1GWs%-veFX>eFWzOL+uJR z;YYd#ZQ4cswUA!<*f$U8fl353#l4 z7VGQjP0dL0WyL~FL2KD{*lg{wH#HJEtg! z$*mII94EP5iSeSd%OG2;mboj(&2Q1BY|^2>B0d6JTeh@S+iWGxY|$5Xv*qHyY_`G( zTf62ihve{dgaC4aL+BiFvn{z*N>W-*&)a%RLY&~9X?w-)=$4(Gk<}?Swrf&$x127H z`1I7+|D}b||`p zq{KuAm=xd5ewY1@oQ$mOOrJLu0+pBuz+PGMVMS-6$K#d-r^_jG2{P{zL>HGJaEU%= zmw2DV`@cVU?=uH`;*}#`>3oj9Q6?$fAYIK+?4`oE=}%z!=8s-(zr+#ih|SDN%Zlxd ze=6FpPn(A6f99Eozk9sdlXzx}j*hm)H2!hfiBaB1@wk@B|El-6;(~9YqnpKN;Ts>v3ayea ztG~jy)n_B3BNNiop!*u&J20~I<)}07J%UHJPQIkxBlG9&!6VJk@$7Yf9!SLVE=qPP zd>)|4V!?Z}lcKMEKFd#lr1Z?BY-s9)m=usUB_`9Ck>c%Tf*!+vedQ&Rfr|q&;9sn2 z-^wIY?>@#czVowqT$|*p>%DvF+!=30x4IAdCYxR#cE0gcp77HJhv`MzGA~-PVaZ#j zUUd7ze~)|tkBdrfJ_XPFSF)hI>>%Z3t0pX+V2i$?Yf5?-m~--#?u1pmHO6n)lj7qteQ@W} z?u75rcnH;9yL>|sE)78NFIk=YBbGD+;rXxsbsIsrjDe85h-MJv=KXqw3W2~|B}ehUzLQ4?{>>mLow)A`QwScoe(h_7;3@_| z$p^F3Sp+W5C1`oTnb`@83MLIC1XnW%%6BYEH-(_Xoe2@Cc-%F~qOz9*Qnc#RaW)kM zmu%HexH*#lFYSbD13MugMjIDkO*1>;$!mO*2}2Bnp?v4iPt+LP9;?{!^gl;9!f+jf zp?t*{tZ4?rj={SV2tzvt!@T!@p0CE>;e%{vQ=;_1=7OPw;d%x`<<=co(+q|`Z@c?L z!f*qFq2T@9E7ceTx1`+;rVPWZLHm9t4DA^V72p4XHO*jnvc;i~-?!OX-k7{T0v?}V zG%IE;-fD~fDk7F*pjdBryhcr$na$%n(V}T zT%BT`&+=uy=ubR!ig_TddrVSVg0E+c;EqYkis?#Ti7zuIyPG!+|M0~iK<?dqY4AF|vwSIu zjyQ_Z$;nj~@53$~cxFudm|(wK_g?KfnDF4EI$yAv5enS|js219AT zS-BA^J0}ZP>89Q{K3zc=91MoS@n2$1GZ>!ach4pau?&Wie#<7RFu1v3E}tn`io7M~ z6T;wRFcgm}!kELf63<86pvZM%Wn!#{ao5u$d29d!~J!RxV6$XzpnBQm0J}k)p_ekP{#9$~} zJ`QV|!5~gAE1?KN4nz<`{IT@VGaXnqcs#+>k||MIe_;Q26hU+fL=gTZi{}jJXbQoQ z)PG(k1h+E?ibpJeMukAKEQ(7VdVeY*xPw7ZRkHROQwUO5+8s(cKA7lY_jQxG2O-1Y+6%bf#zIj|8{Ax zV|@G8Js%K;`xp!*g`aL$WAIp}Q^!`k_AkP4KZ7B6@0VE9j6QlUs*@7KJ-}e7oV)d~ z8iQzA96GaQ(ILX{AcLWL&rYmq21Cm`p35aZJj7tAIdNpP%03G+XO%4dQ24?vgyCTZ zLs8LbtZ4?rzu&)i7h!mW!BDwm&U6h1%VM9GzU?=RFg(g&C@fisHO*l7{qFb25{AbZ z3{@o)=BY4L}tFckbg9c!AwaPaEuqY1+k42GJW1DB{VaF)eB3q6S| z2*Z;MhN2HfU`;a^z7nT=K^UH5Fq99S{Hq!RF9(UEO*t9+_lq|WhNl?}r6cBHO*0rS ze*69-gy9(mL*?+nr_~tTmchLW<;SknxK4<9}lLl~ZCFjNnmJynfCbX%1R*j;?(ZG_nX+3?K|IG8oEM&t0a*AO-U^Oznqx1LCF-hL;!&1tS(=O*0shdc+qKhL;%( zC7TbIsWHgGN)M*|rO`WI?@bt9VKB@aS%WprVAyq2nvyFo1O=Tt?D>|%O{sK=3x-bYzcg()m6oNVXhRr1e@eG3Ep)(#= zA@Eq{u1@Q8?P)@gz#u4`@cHAW5Zp2H#&km9V-V#2)&C6@0wI{AXG$5pd?5aBLXgNH zs2*7GhA9MV?iuJIUDP$8i~QTFsB}0Q4487d?6fS6@#JG$qDY^13+Pk-nz{W8O*S=* zKicvR!jQyZsMz$wXKD;wF#p13MjbtV{|hS#Lo$P*@YEKpX~rf@x_?M_!tf@8q4dC( z_f&o!x&niwGECX#>(4)WKVe8=G4$JwHO*k?F`%*}4bMwWmXw;hpRXkoru`BTePjLB z49>{~+tH@%!p2U%Ed(Ns0a3PoE*3Qd;>m7XzaS9l42Y@^-#?}S!CSVef3rnzB@h`5 zh@ygFSkw%N%kI0R4}o}#0g=0B+L&f)PPi>=^jVp6=2ilc$$%)?_&F9e1LBp|RZ2P| zOW6_^wj8FoEMNxNnTw5xy3s(qW=n1XP4SnmC_t8*9#rOSr3 zS3%&M!8~qL8)8_Wc^?vl?hJ(L5684O1wlOf!(M{WgN3kp#(ioCd@yI#6oe~>t~x>x zdNL3SkI%o)6ohMTihhK4L$APY@H4ffY;mqljlmPFbI+7=9e$vNlXk<~f!*L=w6b~y z7B#aQ{#fwH;{>8N17hC5QNuMLtm`uI^*A(uK>UvZQ9XMC7BvH6&1-v}BM^NU5CwVL zVfk4LffnT;DN0kEh@CdiLm=K^K;$p1GU;x-B!9M#K)lOq7H za5x^kmSFT{VN~TESfhbqU6)4l%X=3RjD9SP>cPjcsu>tVuDbJag7H2JqipiX9cmc7 zGnmCT<;4Cc<%L-UqdyCy@}F^7)eMYweGAVg7z0=sC4X-DLkq*QoNK|%!b=H8J`1CK z!%nPf2F8bzn|CG{16dfkfBfE8?F90?Wnq!6-J*^JV-O3YcjNvSdvd?}f(ZKLn=FoQDx$P2yQNY3|t=@uF&A<@vJoz%g z_<)5`wPC=w8WqkQ$K^;#Igd~>&% ztieCK|7=GvMzS!9_m0P^W?=mAa8^u8W>`*gt#gHy#1V6R}qZSER3oV zeYco{5#MUmWdvgk3!}1l%20K{%3If$)$-a$uOJwOER4d5bFiuzZtSiuR$z=(!B}%- ztQv+ZXls@!ZhRKgTX79PVqxU2KZ#Y%z(^l7ei?D&V-`mBpouFqF!*4}Wm9hYD`T3) z5I4rLFiJMe#Hwas{Qgnj9ptBvw*^Ai;r^3CThFKy2rhT9OpYm+>ZfJB@1p_olkXooU-f^?N{4d#^Go&O*F`}wQ)O; zY>)&ADvcLJr)s3BRf=0a0}&8~z~ZX($fmSm+v zSKerfWYx!8ze^ydvLGrKfA3U-5Q4=6rncb9BeQNI5Yt!?MZf;!GzDVAmp|+$5Yts4 z%D%Zr10q;7V2Ts29Z6XP;u97`{*mwQF$LoE?MDg-L@^7ZX7-_8DiCfb7o=8b3dHj# z-g|^V%wR#3FFMuB6bRo-n;#&RKC@P(`_)p>s+~Wn12eaW58}0%vP_#k$w?%YKC4!x z`&TXOcL1xJQR$sKT)Ti^%vQnp;QfOd7(%eZttn=78nxa-Fg{hm*f12Unt^evxUHCA z%wb^^%>L|@21d|!LsO>dj+1+45RADjjMDd(U{y0Po+x~5d=(xz&la`QhVd(`UT;Q| zOJZ2f-nLG&OAZ%)+@k#&`(^RnyqTCpqB=0GOkr^b`#i;$)-}5uJ{KHL%=L+`J$jat z8jIsv8R?nXN$F{EnBHuZidD?U9FMg%is=;`&$N?Y0Caf^uPIhW}6aGc23 zw=3M}#XGLw2e|WXQM-b`#ffPnxNZ^6uW~Rr{`2917rItUj#{{p9N2C#UO|#2G$D#2 z=f?CFsXku*n%yrRI0(F-*`oH^FqwvBZzP?s*0{@x_%MTm?FXZsrrcBPfneYNKF1BIFsoBw-J+hmz%=@X+y|_$dJh(oYj!g^Wx;(yhnDq zB$vqehn{8A)_I7@Uj}Egi}2{`E!pAXC0-H?5tGy!A@+p7ku_rNaq~F+CQG8^aXX#v z`b-wav?#xi5HHX{jN{=(Oh#lOd1SXCRSHgbv1Mn=43gxJk_ zI9W1A98#>sun}v-THs};=#o4VFJL+mW1!@*TL)Dg0^~)usNaKVYZxCxv33g_OrEjs z3JJLhbeAA{pmw2IUA&Cy0)~dw#1)dC|jS<}Q4D^nAj+LlTAGEtm?}{(JjB*Ab<^(xbFss6;$4wdlnOkJ5PYFb#!9 ziD2|7bS9E~P7x_14?}5JYW%(H$W$x~ZYtNSC#!*7} z;&#JWzzq@^#O?*xe|rs?im!v43O7!m8#zD%0vX8|aLDb{1eOAv(Vf9KbhS>a# z4x3R(F=DeM$#R(1QM4Q~=ir?12I%0N8_^cDdVR=Z+zSym5}TI?XEU9LH)6Al$e%}m zWW?rBqAgnP5dbs9)9H4>MUyZWl-FND?196JfW{?D44e5G@w?VjmUcCp zr7au27>=KrEN%H~->)XQDGM$)G*`fgh%TPvxG;619KIb;1^=AG|m!CNp9BY$PFAAqpji=kPrziHZl6C+UBrQR$F_{Apj}_+R3@#(y^wGcTa2g z-_Cyk^IBU}Rghp9TGVi_cra@kF0!FYB#mbbx(POm$dTs`hH%JgVJ3ZjMtg34A>wtC z?eBDC8*>hf810b|X@{8rXbQ-(?2lJuf%713Qy;Q$|LiyR66EjoAS3Cam%W4;K$I

4Nw-e~~I-oIj!4Nd8i(3d?i!fv~*91Ne?g4S~ zFeq*`c0h|Upyd~SzorKX_lDrYEn-fCk#HlnM}!}q4&y9ORC5m5Ey>U{9!_*pY+X;d zUCz0kPoE@PRUX_{i5Tl(#Amop2u+PKhuqLy`2M?ULnDd9p>%}QJQ{c?q^ggZ>-pNJ zs|oWDI+!sA!idch!k`AYb;hC{zDuI=>i`)^TM3zY4mB~4$S5%vpWE$?gM@jb4rYvv zFyga>qx2NLh7+0!mZ#S6Ksd4(Qvg?%ptG{=nmzwCPndty!Awd|ulEbK3KJaMq0)&V z6*nKgBc_pU4&D;H5+|4}xS6br%YAr7|N98@CLPQGWr!Iu3q3O|K8J6gHL{ItA_}8O zq)VJrfL*D-pFOAh-93Xa{}dcEjj_?z zifLv9il7{$!ra_rFAPg2%-e!vrimX$D$Io(su*S#l@id_R{MDL@FA{;Oe5ywD6?H} zH@o?j>!-()+WsZD+D0k3lsbaSX$PP*958-4ESuuvs;$v66Ie>)wH$CB|s7W`l zg508>h|FS0)JB}PUURz~JW?6h&@e10+u-`T`btdt{C`*WC06g$VKt3VF$9g!MGg@` zhuqTE_dWqNy7|zsfI18ZPm#6J^4OIFlr-Qj9aeKV2&J#bcn3llWS$H;9TK$Day{m9 zxKPM;!cX+bNE0EeSC7>m=fmgU{1LHww;rnzBpb1s7tqQWA}JO!(10Ur`L0H88|@nC zWt4FnFI&O!a9Pv}ftbv^z+?_vy*EQS&N5yV3lD6hoMx!#^mc zk#7{}?PBz8ogQT|^a&V-K{RQPL95I2tY4bg1Cm5FL9hC5`@2l1AEy z3v4u~#`6xrS5oc`BNkuIZ^}KL_-`MQ3iwSkL9%}U88SQB*79nUi+|wal zvnG*m8Odr?Up(YaAbhHi+I76cLsJO#5na^AVWx=8k|#ulkXj53O_wM9g1knRA+?F- zc(T@-!j9m+c+!*%Qe}VYsWMF3Fyb|QU8>3(@;YRkDO;XAa-oKdbS*OA$d3v<(}o~; z?z`)yTmpSm4>Wbi8yIW}{yi+VA?Q%5jJI5WL%7@`f)AW<#tAqIgX_z;uw?2NN|bp_ z2Q&>PG6W4(hP1jN=un(SlULYQOQpc^KpYd4WVh^seDh2~K@jh_xAWb^>En8!VYQ61 zDAd4%0t#6aZ>>qUgqJ{^3ra8_f$dfkT`8~1vgdVFgCdm*rdX_>}J7~`u>Tf6Q zfbhIBjeH~SK#39F6*3|c!C)S*ND}e6Mgt0D%GL3H7X2Yvs!~12iZ{TfIfrm zqU>!w1upVsBchbag(umQp;aU3N>p};GDJPdT>)Tso~>PUjMT;JS*uK5#5(Xqr-|u!;As{{ z<@Ejo)kEem+Q6!7a4sGH$9V+f3=1Q-xB#n~IbCs6jAJ{&ILpE)Kl9#X4UC|-TOl9S zcy8?8bHg5j@ed25qW=)AY6iw(T$eyFaw7uMp#77YDu10kUjqZOsh#Qx?a+7_=l6_! zoM5~c5g2OiU$u1VEUanm4o$-Y^CAKx{|c0me??~psOKtqf?jcD3dP#QdnB6G z)i)wA^3T6$;nD(gW0jiUv$h|B=*NPn+PC^r)&8co4vSbZ=Z)KU5s3F$5T*I$mzv_l z^{-!f6Pe-u5p*0yn3qRIs(-(I|w~cXsuyX zv#V_I^$!#10Xm>*K%9|5Li-#}xr7%`Sl?XYcEAE7PlY*)h#_H`7|`x{!{(f!giwA2 zP2tdTn&!#r`_?W8`sZ-Q)6n$@8Bg%x`_>wNOAwIO;bGsA?`4xu%xXpxlpdMyq zd<-$etcN*6NNNJ%28Vfc)1a{#M-|YhS>NvvF6{qfKk~K)MFelP0G$lO2{gQ-YGqu$ z2^mfZ^c&$h)*5e1baUt!!w@0l_R;;ypcdO*e&Sl%(u4K3G;&-o?91W*2(Euag z&}{C+AtaUZmPfR>b2Z8LP#yUuFVu+BICGBKTVZzkI>S$asZhhegsKs6@Ca{*qkoKi zOV01#n)wT%9;Szy&f*x@Y0RHM6mGn$spM)DF7wvgEG&skCzLE&Eu1L8GgT{zyYk_w zkCXl$uA{$UhYj^N1*C|jSpIe6^vX?LCc8RpvC{p!Yw6urY)xN54^1V>d43Ozz-vmFRk~I z6~mU|ftaG|YWyRwNc#2`ddHFMJLZ1>HNB&~@ejOa@iF`9)@IA|YMH3OsUe?upv;elgnL#r@4g;w9c zKKDTitqN;HEB~Sei+(kCW#@IhyUGM&EDNGw%};&QATX0w3j*GB;|1XB!;TCB@evE6 ztng4DQ=E_wjJ1=C@v)AJL7}a_ivexqMgw7J4Qym~;MH5Nf^nc5Ej#F4!O1Zku3=QL z`_XO>-FqMsR~$q{?YSUGsDzX1x-JGzS2}3K70s@2*#8xnnVjPDW~L>jb&bpPW@D!D zrLa1PPVs;Ipdx_W9->r@%J#+iXAWm!=rAT}*l)JCO-awn@@2(#_on3d;^+?;!;cA@ zI4FZ=TpW2o<01~|+i$Z+1%5LlJtZl=SKwDKyG>abqiaxO#5gJ*wt~9yZm?ez_;p`e zHbyh|%CoiQ9qB)Xm1nvLcL%1W2f<$2s1EQ5C1@s*M7PE;=u35rzB-0W_sfTla_h=m9_8IGb6>D zl;%r_>+a1=@}|XulK-NmsjW-!IB7r)>MVHEf7$H;!ozmt*_uN+LxkGfug>;mWykqI zz1UQrH!aRT%DR@!%7PyI-S+n9el0CMGu4}t^fngte*@nQa0P56aw5ymqv4ImfB-?n z&{>z=Wxwj&1G2Ic5`5j2HS4|yj$wqAjBW%EO8V5cBiJQ-yK`Sk%t=WJte26Lm7Z|! z4clTggsl9ab5_L6P0oJ5y`$=zp1rn-MX6LPY71 zT{!T%>^C9Bh@9X~a}YC(FhUoPY`?zV!i}QzgZ5k1Uk&l%%6`+(x|8-5g1$ZEgwF9& z9gWBN2TWg*?Kj!4t~Vn+K0P%f#g`4~42UxHL13s2>wtM+P0%=li8RQTfOD+I^Q?Y8 zcGf>o^W!6;{tlKLB45Zz^r^r(s9lmr_d-~%H|2|XVkIoAtzJ9E+#OE~ zF6sXbf%t?4Q91hf(<+Bvl!J}>HMKG3-2eP*1frM)QLte6Go}QpvhvU0Nyp95({Xe^ zhE5_$;-TXNI`?IqND5g8P}0Kp&(P)ICN0twRo8ed^PtJBENYXpf zhBt@^Gj)hSip_`!DE6bVMeDF9#n<_G%C#jSv&GiZK%?j}G5}dcbeW+62NMLCC`W$m z()$h)5oYNT0cz2R2&h0Hx(jXX3}ZVb)6@Ve00{kX$YJ6r1166(LK09fz|lRJW_X_H zbFEh`k)22}FcX^@u(*v9#cr+V4RL!77Jgl5f`a zWNGKtT3Y|AW#eaJRWlC5D-~y!5{!8)jH+)ARjK_R+$0pleKX}STq7L$j9|=XVH6eR z?lb52^z@uPLNGpKVU$firMYPo!f6#ZWbWRz1mklSM&)1qhne$xZ1-M&Ik^m9)Vd4{ z6~8C{mm!fVmjP|hRy9v?S8q_Rh5WMCW$-Urx_DxwDUpilv!gkISipiP+5Gnn8W2(t z7u1yBGykjoACTX(kOfgQZ9;ofARdk0tX$Ms!h$FqvG_4H2!w_fWn=B{&wiaiEMh@a zmal%y6o_O0`E4=nip90N!q3@)euZN-ItuwAEecbtNIv=Iy|gQq)b0xZs`E-qu&Nmy z_4v}P7H9Clr4dmlg2mb7J{#E^gsFFiXz-N|4amkA(Eu4o^l5~qfsyycD+8-I<75MpP-rz#Y;Il3u@NR0i8{fB;m>fX zJ?z|Y+1=xl`#O`~u}sJBAOmGY1N1s!&T@EJC!8o~kRZT+^8X1c#CJnNfQnO_BT3R9k@o!AmIqu*jEVbLh{}y z4plC-2^Y31*$mj|P{W~F%nflPRAwYz+C3~~AgR9Pda4fwSU=`MD5KmYX9zk}1_I~n z8Yb{m8^I6me^SoyG$PfE9#kCe!sInH^-$iOp@IDUk#}DtOa3iu$;&@px`!6C(zJj&n)q8)&s%9*C+f_4_e)N?rjIsfP_o!juVjin}!|h#e>q9VBu`tRD zMqyPmFm?=naxZaXH4CF=--c1@pb7fhD(~>A%OAajV3e^iDvS1GRWmSRKfZl8IqGX_ zecTcy+Q=PT+(G5z!vC-e#jHuIPm_FIqacUq@3Q224(zL?FInLDcNppQr|b z;ei%yjC^^10fG3Q1yMY(Z&yEkg1Mje2y`sNmp+2 z4DH}@$c`#MG<}k8)M$)J$4EA0QsC5J?gX5>)^#@^Hbc30@Ww9xA;)IDo?~M${Y;?& zKBxByrqtY0;CFCQOQbogaH|{P*Wh?PMlRFzVfbMJO9Zat#C_|iu)0B~uu59d z$g#mWLgm8eP((2F?zjeecY$nsKoU^#Z44CB}9p6YY zLeT`-;D+rF%om}bbLn?IqF^E*PKls=f{+3!THGkW-gt<1S~OySW^~ynj%%|taX){< zl`Y5+Z>%-Meq&O${k?BB{sa=>TJ3MDP&oXtls06Df2=jc{#EmrPsFNb4DpA@PR!{K z)^3VOhW2aKGGK=fPuXdU{wg9i!JF-k^>+86wJ7$9d;jxz?6bX6QoLPqvSPDp|D(3Y z!yB8Cl1cxM>ye2jDqp4}qgN;1Rlls&z`(FCEeun8>}EmC8?pakQy@NhKes=D*u#P-+}i&gH3&4qS_I;)s?<3IVlNA#VBpAiOo52W zp1+3zu-|n8FwE;VDC;AKhaMMXmkm8kaLcgFB2rSd0RfogcQDCw#68LAsAzEL;c>X( zQKE~TL!T@nQP!jMd>wiG0SY~;^g<8nqtI>ez?o`lTn=wInoBt?*nr3bvkU15Ub(+V zTHnd$sIvzbDB1EOF-y~ra9PBw;Uq0tqE8;tuxh|Us4*t{(2CyGk<=|!amBVH-= zKrazOALG`akPRQov5p3aMxgkF&VJmz#p7gKBSZt`7R=y9iUUKG(DlLuU}9CLduE&m z(byjmbuviNL2;w>qf$h77>ONfL}ExPa1D#V@G{B|ju}8cC^@801&uIZ1Tl^Rdr&i_ z@mQ)jRLXs3%7nHc!-0sX!9n6s#SJqeg9JCt8KMV@)P!M*LYc%AS{V316%Opgd`haN zbqE%nQQOil%l)PsAL5y zpjhfL7KKon0{gH*rnE7#pd|tnJCD0(aY_WVV2Qx*U6XnDDG=dsMAY%1`jW=0>Y7p< zdv;+{8cRaR{XoAE+LF=*Vf530h6Po6O2Fh8s?}n&22xZmmIAKN53T4)VoKT` z<6n}$?<0b7Oa)`%Cs@@CjKw1&@(9Lp7Dm-~$3E4-z_~pw3{#=++P|hK2Som2VdSs< z2dkQa(IF{O>GwRr!kD*fXoUs_=3#1KpwQF!M8x{zo!b*PYFHT6-+YQy&A|B2k<-nH z8-KGfN>(2|u7LrUObf#lH|{ueBAH;EWMPy~IEz)yz-Zm(-KPk~DHcY>=RfyT=Tk65 z*{Z#9Q;APLH-K^~dCdYt|NM)V){Jd!3dE9E zx347-eOVBthvs%yfuQ(6%L-Ex#bvFefW;pjY3Do<|0#hXYM9DAM zG(%Mpp~DhZvEt`}=Z_(Q>R&5R{#C1f`UR_+5vaClN0w1)XF#){Rb?C!*B^#X-WWR6 zFls5h{FdCX1_PY6SAKBv7STJ--0N4B(JqEV+7j;DLVek(IE(diJ8eAzvM(4gK^ZUE ziF*N9@60oASL!KpQS+MxJ;ThCOJ?MxgCKM~->9k#|IIj$KyaSjLSTY zXV{PyFcJogpulfjZ`@oQ>2x62(oJZgjg+nvr7;i!ra~Z(ry&F`tw(MU5he%%0cHsy zKTxuvoK(XQJ=Fk@)JPtnkB}7zQy!G*;|&O31BV@J8*dw<8W8up(U66sugH$G`jL0e znt5poB{7CJ3!cOve-`_m>CC$rY6dxZkl*9L%oAe@5TlLyt@;$|Z2Zo1``0b{{xm2stXb4wLH7fVV>d|SV7DQ$8B#YI(ZEegp(%v`G{E^# z%<;umL3)!-YV!Qn+t&A{oiJQ)C%}z1B7y9}bB#$5s`bj{Ce>*v4K#89+lWIaIISY! zm@*ul`g#$~Z5h??5fKV>iC~o3rR+IuaFD50jX4CbTqV;0Pa)8g2c-wc2N&%%TpftG zhJvHvm#7%f)FPG$?l&e~FpnB_KWG;8C>q>f{ZY^uT8W2CsD2oWLMV+OQ{l7KBu zKaFqH6)_nagQ+~E5~0LYH1H%I;QM5N1|ym!%gSIwzhhoK?C4Qj^i5q;(z{^h6&k}b z?t~>2g2b1csjS7o_^T&VJn$hK50w0J3d@>_2R1)=bx#UwMzUc|!Doei)yWrJ;A>S_ zW1GD|8Tm4bg;6|w5>_<>W7UqUFCrMDSs3$9EgG+Zf%(-I#h7nIMmr<>f;OMj;EM;-ihPNev9R$Xafgsy*Zu7r#s}CbBSUzMG0w&2Zzu zw1u+?MiC36YUIxo)EPZY=d((vu6?><0l}EW!YJvx6RVnm@uzgz4+LW}3!`w^$T=Dq z2qm@LfMslae)WM?70T(aDJ+bt&&Fd_GcX)yJO4~My{WZ1y(LOwvbbpA)vBBxCUpjp zDY!z7NAc0A!qt@1n^v3C^DjDY%BZVNfwRmPE#Om*<84fK+I%8%=@9@A(cGQ*(>d?Fr{j{4~joZAZD>3Y9{>lkSP$YPYigI zK+I-ARIaajRSg0YP_-aTiPNU*pV>|zK4n4V{&?h7Qy^|x(PuD$n8Si7{GtE5Y7poa zvIxY-1M;^Kh`A~d%Z9&e3dHRl*B+#uJ+D^F26pzUp%c{EdN-ONwJ1z&it&*%wvn-z zU#n&PtCp@EjaAKP*#zl-cT%MFnO>xYRHlCR2~7ze0m6EGnywbRvIlNG8=6Q&5~b4poRHDIvH6CuZ(Emq3peq_`57H-`GF) zwO+^_Nx_(5E)o-(8y0UioC=!E1C6uh(2Yw|*U*Q`l0lrjUD!sV@P(cz&{agbJ!OK! zgY+H_T!xi`LLH?+L3hK6pej*_4vZ(EyQ(p08xzKPmIl&`-QW0}vT$GOWZ`&&tBEM_ zfQ$&{%7jG%(Q-M^kO0H_oG4Eq_kg;WYB$Jtj^W-SF<77{26WZ6ffxv)%;894cr>6q zUDIj=+*m}#B(#Q6Zyh5BF6ZEqEB>2$UPQ}GG5$>_#sKpK^FzRPzUHr`UYjcSRi}Z+qu-1?WxJ4WD&JBqW zvVbc#wQ@w$J7r|)s0W>nRuRGV*th>ZP0Dexj&h`+SU*26-~t932xLfuP)S*_fn^%g z_5mb-AfS?m)?hSTvi8B{9u#-utE3#4=qN|3%^Jypj59H)Wran9295{AoLJ?j@$Qo3 zpgFo|1r#-q)~E9fY>~z#d{uM!R_bK7f_Z&0010r_8HS_<(|NVGD@0Q0Z%75q zL1AZ=Xw@luk5uXW^rG6_amf;&6%YZtkp(tP=BhE#HXC0-cR|uGVgOgjZ^thk~Ipjh&8F&A?cGQ`&39jWQNS<T(8jRFIsfsYKh8k3RJe!C1q>sGhMH ztD1rF)S%N!#d0kRqwM5X^_3y$84Q;7GzDW(`ssxP<2x2cdH>y5)eMZ?muyn1`QNKx zEcp9dH8;?uXcaeJdT4)7g0YT;QTB1(3Ue@CzUya+Qqb#bQ_xG5;!?%rnQc@lXej(3 zTf-DHvKJNnn^MplYE#huMawHcYh#KRbLBHH5r}dYL~ic5N7W#32H&Fn{hj;bGy?Gh z3!-?|v`0;Wc&O_7H3VWK3!-Yuf>+caf=-k-#fj}%zgDhi zCKg1+#{S7_5RfN}II(d5#24oB33Nw)K@t_xTRJ< z1=PuRTd}GcH9OTKDy@)P^;~N5mvxh{qJxgvL;V`M)Yv)3frT=Dx@n_qTzM(cTzQ07 z$h)xaB>(rZK9k6$-lpeL!-q5?0(BQ8j|V8)Ls_vjW*MtL_HKZLob zp=e-~ck@`Dhgap@Kmtr7bRIC4F3177huN z+wv^c2r8{8QyV*uk+}r{!+^EJ;YXL4Zm&mDO zh`^udvF&*hggttKKv#C?o}K~`kkbrNucs`#ksP4%*W{bQP0k#L8ZGXVhx%e@;F@&z zfmIax>}5lr%IU{{*MvSek!exr;~wKlqSVaqZ0J*b>ke`sA6GMPhLM* zowlUgqO~xP1aEvC`tJT|c?4r03!|uJGgdVN+Z^JMX*M>LfB%h_&2+ zrfs}XrTsGLZGv%>g;6qnGgdVNj zm>sW8nE4ki$uBe47B6N-{YfDHVnO8pRp3=+{baZw7O`U0i#vA^h!ZLh%g1?5apLz& z=6*&XYFH4Z>woy48U%c5EeKOeDtm9qC<5^}3nG8q*8iCT!R2@AO1t7@?XC!LcF`yK zYWpn*of&3IN446%<44*Rr)qbFf7Rl3^RTL^UGa5+V!uzbFlx4aK1vHC*wH*w%!u6l zhjPO33=1Rw^$2AX!^ymc8t}GzT(_xnZEe$dA9Qrxu7#xh;DDcoGw~$r210RGkZC_-MvY;ULwW! zL}xqzhoC*U_QS(RUu-=(O28s*8g5+Ru+BZe54254v!FhmHm?XALBBL zc;%fiw?Y)6Tict(XXK!dlwQFV{cn3){L$fb;Gc1IzuFw9-ephihn(s|x{ZVfX62ha z`(dQp>8N|Ei~X#*OLPca?Ni;V53!`%Q~Oj8`(Y${;rZGx3hZaiT}TboEh(x#q{cA0 z_M4LGL%fev_f1*#A-s2<12LnBKIpwt5lEq4?WbxAVWy0H?z<`yHOKJ=x2x{SL5Ngu zT{V?lxOD2=cU5E}0`u;=C#wm?%MR(>r)o+;9$aGGch#igoen{+`&3OUOq&E_1JuB; zvb5qwhfF+BD2dD1nNK6}UB2$CYEpsjl34du6{#X!7zZ1DxxI}d?!>0-Zn#h-AuG=o z!AW@LB?(DcZ#rIefeB*)3$uOyHyRk$4G`RL_iW|VbRG+%V%u@7Y6eF1yRG^V zH~O+Ls^?AJsDT09t2OtgxbgXQ!c;Pp{a6?^L%zbQW?=NIx$j#_=*#ZQHXm68|Y-BPP?oTgh+l#U`{Ud{q`M^9i!>kQP zLt~;+c$q6S<64`+)MT!}N6h_4A2ElC-m1C19#7>R&p$!JJb)GEvLC-w--Cqi`e2c% zIbmKs;}lhs@*{(fVx!E8k?eUe$43ft)(+>kyj@XCd(lI4>@b)M9oMP`W{+_7iL+~H zOAlnXbpElQOx^}H==BcGC>J{@GWeK3x}d>`tOAmdl&Lhz#S+ns@{kzQX|+ge3KJMP zhSPqyZQ4Z#X4qXVj_lO&1QB7d9uc4p42j@D-Kzmfd87K2)bxc)Iz)(5Ke&$&yIOU@ ziAOr|W5w-6gdsXape&OS5hN$-Ga<&Zhm?edPIYT|cRVWzl<~$qFI-W84sJ|-ROxql z-v`U>M1-MwM4-DG46^dL%!Cgy?=j?25#_?wCM-=54PfAK<2^D}bV`6t%L~r^SLQyW z40ekQKG;n;17So2%%%@>uv=4W1kixP*h)meuyjs34qo5R3*45k9=Me3{BRvRPw6rJ z&473h6Z0_n-%u@r4TdL05Ik6H=rw!6MYGIsLV~6|s!k9Jd}rYuJJFy(hX#~gGok^N z|8N;dSTtzrqs5Q|+}MKq3E&#wEQ8A56I^3+ZdaO_KG359l5>VMz#SNv4QxmQ>di2+ z47i1QjV*)Z@nJ zOd`Q(JrWp>egz5KIJ6#~b#ynqb;Qt9Po1O0ws;EikIy(}Q4~Ceje<)i%p9Pu&mwCV zq=Ew$H@+gcYR}KgWeSCC{8#wv7g*IyO6QYa-@i{V#VRr zEPvSkI>DI6!YCj2%PI{FgeDdV)#2a07Z8l;ER5VgcVksEFvQnVS`&;R3rvCd_3}rS5r|nVh?+x%*Q!BqmfcSEVSndr0x_Ef zQL}NJpv*qKUGAUZzf zoJ*Yej0I7>b=uRWKs+__!9k>-KCjhJ{ykRkXH~AMREf-gu$!q&?d<0^mn6=MptFr**~%9NVj_GQ&Rg0X;wk$bQRtD1rFj;C)1 z!C0t*ad^oh4GbxmUu)`!>(RvH?+}a<7Dn-!l~~mbjNe}>i=ednBAv84)d>wlW)ZiO zH{e{PamZ}>2_Q{A70vD_G77kEi{=$Fxf9Vjsk-0G6f`f^3z`uo8s$!qPe!TDFpWlO z1EW-;-1H=%J6+t!tm8T^33TQN-wmJwaw7%_B$VfosWwiZ5TlKDLlYc~ z)a(Wj0hh7i1Q4nV96PjAbVV#pQ05bUrAGwBw?;%jnikg*84seO#JLd>F!ao7QBV;9 z#t?NNoY+FW5vftO%p|%CzRwy-lCVsV2%w%J5$KXXTvQfTD$)9BTv{TCC<(g2FB}+F z-3%{!x=bJR01@G9T_T_)VXz%g#^q4mHzWf1U?>T=*{8`Rf#W!&(9kM?-3xQaXhz|| zkKg^4G-IirW;C2(D?4!54rym&oj}5Fv>k9v%W5L1QtKQVVcm3r5@puWT*OYN6UB{F z|N53h;Tt_sKwu9D%%H}$b%Po~1rLfWVG==^d)<`fcMt*RgXxGY8FUe69T`!O1|RM= ziTs4+!Tki6lScU(5dnj;!}Jp*%LnvnR068*@Dnf}=`(Yd zcj##>0}jihhmNNBdhnmDWsqEl%X-*|3aj;~fQ*WfC}6YT(Z;4B6cupyvelxXr2_5- zMQaxJ0xnv_RO0%kL300-zOfbQgfcyyfQ*b06>xnlyoAux1B{e{^*%E@JV3zl1zb*n z<4HWaHB}usk|%cCx>;lz*65iAT-0eq0*bC=DYQ;7B7ya@L^OKQgSJ|lX^wtE+0BLx zlJIBIv2Q3Vw3f{ZRS(TSrOpcRxO&8*obRVe3rZ*}^c|ZODw{q6tD4CQ-7+wJGpYCQ z_0&5uT86BIdPjGap?VKB1L6Ney(42mquFtrE-w95@lrgP+gCXnvX13t`Lg1P>dG~= zLXb&(rn1JPhgAPcyj;)nvT(?JtZIgrLwoJ{heUFN3dXwAvotW=!49-SwKl$TJ<@%H zonVx+FiPeO#K;0OFvjnj&k>9tSQy2pwk_1a2zp17sXl@%$J>Jl#zqy4`Ma^I85qY} zEmsu8k1UL$t<%f2FoG2sO(pSPzj~E2^K=snqw15nSk(-S7qho1_X7UJ!YC+S^OFV! zlCBo%x$m3D9w2UPW?@tx`T?t&f$?+t!na7Mer92m_Z#<{1_oO6ErPM-;IJPEMgs|V+#wTXw!-j>dG}jI;%RK&fhd+Fu~Z$!l+)k9;=#xaiIF{5^^B6 z={OK%F^ytN1nmtO(P$V`2u z1F|}*($WBPxG-B4`2aMYvYSEXlCN$31JPiIE)5JiyNL#LcpyZ=jq=Zia$tGpRYL<@ z{QZ=4X=GO|Xk|?~c)CsS7m{+5{fG%kP-ngM4&|WaqiWnVW@j(YocrBxE zQa~XF#X04!m--q(b`2?RH-{wQH$6#!bvC4eo8x#-cqLj4C$N|ZT1h}f3VQ)s5%DD( zkI3!`iAM{PK!cr;$$Zp>EjReDFy1(;AW`&oy^KkAYD503o+kJ=rX{7T)UO8#8f zA^NeHmB#8S?$(%;q%>a)ZdbfDCMhlEj@0yo zoD|<(_89!6q-SM02z3u{X2Q)KOh6oP{$qOy;-2~t^G`1_0ddSlWgq^8N9~PF9&f9| zSTpIX4$(txwIE}=rKkF0bFzGyv8hREN%84v3Eo&=TKBBjY+qJ3AD7W9HYKS`Y(}r_ zZs}<*hZLKYl6}=8Q(27F58!um0q7ZI5etIrGaAyr`zLx)T+qjL+8j;_094Q(c7&hQ!Vn?xHynH zJ1(xCIH9_uCD;qz%p{tjERCf;;P)a14FrU&Z3Y2o-aCf@Ye zg+i2(Ff}ticEg6jxr^aHg<>>YjgL*}+uIccS1b?*csSNM!Q6bx$i@ai&9bMlJdOzm#UCf`&?jhe;z9iy+%Q|FC;>s9HFA;iFA^%+<_Xt25d;4Nnc z-1iNhc04k9U_FJtb!f-vw=Lyhqx5;>sWP^_gEAoLull^KI5yHWFTDfbZ!;ByIT4wh zTF1-ky`wuuzuvT#rt#QMmpjIhMAX!mh}^kj%}K=EQ@_o45l{O&GC8IWcuDEU9i!W7 zY)#|e^?@;`XOgKoS>M!@4IbaI@sGNy=x2z*h>30L~4KfIjdQe1+ zM-wq|tZqo4e-L0xba=~{m*)&M2@uC5G5=0vCILz`aZP6uAc8NCL!@zSI+Fl7hl@Wq z2@oA>oKQO>Pd&U11Dp7|8F{MNZO!o^rqKmv2&!kdp}j$H*M6uP-A3Q!>IMv|huon6 z$k%54ZynFtRW3p?ba@-d6O~4yn`3 z0`!P9xlnHwAk7!3n~le*iG-Pa?z-`JoSI5J=Fy#-$)_e0CiW>~?)(!9)pSC*FVGM? z|EFp~A+h2)H%CxSDI`!a2e0vjD-MbCVI(vjCgpo!vCR u(myFsO)HuZdG1p+t$;|9b*2IG%3eq?H4U&$y=j0Lp$T5H;{dUF-2Vp%6Yh`z literal 0 HcmV?d00001 diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py new file mode 100644 index 00000000..abf656b5 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py @@ -0,0 +1,522 @@ +""" An example applying Syllabus Prioritized Level Replay to Procgen. This code is based on https://github.com/facebookresearch/level-replay/blob/main/train.py + +NOTE: In order to efficiently change the seed of a procgen environment directly without reinitializing it, +we rely on Minqi Jiang's custom branch of procgen found here: https://github.com/minqi/procgen +""" +import argparse +import os +import random +import time +from collections import deque +from distutils.util import strtobool + +import gym as openai_gym +import gymnasium as gym +import numpy as np +import procgen # noqa: F401 +from procgen import ProcgenEnv +import torch +import torch.nn as nn +import torch.optim as optim +from shimmy.openai_gym_compatibility import GymV21CompatibilityV0 +from torch.utils.tensorboard import SummaryWriter + +from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum +from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum +from syllabus.examples.models import ProcgenAgent +from syllabus.examples.task_wrappers import ProcgenTaskWrapper +from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs + + +def parse_args(): + # fmt: off + parser = argparse.ArgumentParser() + parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), + help="the name of this experiment") + parser.add_argument("--seed", type=int, default=1, + help="seed of the experiment") + parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="if toggled, `torch.backends.cudnn.deterministic=False`") + parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="if toggled, cuda will be enabled by default") + parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="if toggled, this experiment will be tracked with Weights and Biases") + parser.add_argument("--wandb-project-name", type=str, default="syllabus", + help="the wandb's project name") + parser.add_argument("--wandb-entity", type=str, default=None, + help="the entity (team) of wandb's project") + parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="weather to capture videos of the agent performances (check out `videos` folder)") + parser.add_argument("--logging-dir", type=str, default=".", + help="the base directory for logging and wandb storage.") + + # Algorithm specific arguments + parser.add_argument("--env-id", type=str, default="starpilot", + help="the id of the environment") + parser.add_argument("--total-timesteps", type=int, default=int(25e6), + help="total timesteps of the experiments") + parser.add_argument("--learning-rate", type=float, default=5e-4, + help="the learning rate of the optimizer") + parser.add_argument("--num-envs", type=int, default=64, + help="the number of parallel game environments") + parser.add_argument("--num-steps", type=int, default=256, + help="the number of steps to run in each environment per policy rollout") + parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="Toggle learning rate annealing for policy and value networks") + parser.add_argument("--gae", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="Use GAE for advantage computation") + parser.add_argument("--gamma", type=float, default=0.999, + help="the discount factor gamma") + parser.add_argument("--gae-lambda", type=float, default=0.95, + help="the lambda for the general advantage estimation") + parser.add_argument("--num-minibatches", type=int, default=8, + help="the number of mini-batches") + parser.add_argument("--update-epochs", type=int, default=3, + help="the K epochs to update the policy") + parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="Toggles advantages normalization") + parser.add_argument("--clip-coef", type=float, default=0.2, + help="the surrogate clipping coefficient") + parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="Toggles whether or not to use a clipped loss for the value function, as per the paper.") + parser.add_argument("--ent-coef", type=float, default=0.01, + help="coefficient of the entropy") + parser.add_argument("--vf-coef", type=float, default=0.5, + help="coefficient of the value function") + parser.add_argument("--max-grad-norm", type=float, default=0.5, + help="the maximum norm for the gradient clipping") + parser.add_argument("--target-kl", type=float, default=None, + help="the target KL divergence threshold") + + # Procgen arguments + parser.add_argument("--full-dist", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="Train on full distribution of levels.") + + # Curriculum arguments + parser.add_argument("--curriculum", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="if toggled, this experiment will use curriculum learning") + parser.add_argument("--curriculum-method", type=str, default="plr", + help="curriculum method to use") + parser.add_argument("--num-eval-episodes", type=int, default=10, + help="the number of episodes to evaluate the agent on after each policy update.") + + args = parser.parse_args() + args.batch_size = int(args.num_envs * args.num_steps) + args.minibatch_size = int(args.batch_size // args.num_minibatches) + # fmt: on + return args + + +PROCGEN_RETURN_BOUNDS = { + "coinrun": (5, 10), + "starpilot": (2.5, 64), + "caveflyer": (3.5, 12), + "dodgeball": (1.5, 19), + "fruitbot": (-1.5, 32.4), + "chaser": (0.5, 13), + "miner": (1.5, 13), + "jumper": (3, 10), + "leaper": (3, 10), + "maze": (5, 10), + "bigfish": (1, 40), + "heist": (3.5, 10), + "climber": (2, 12.6), + "plunder": (4.5, 30), + "ninja": (3.5, 10), + "bossfight": (0.5, 13), +} + + +def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): + def thunk(): + env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) + env = GymV21CompatibilityV0(env=env) + if curriculum is not None: + env = ProcgenTaskWrapper(env, env_id, seed=seed) + env = MultiProcessingSyncWrapper( + env, + curriculum.get_components(), + update_on_step=False, + task_space=env.task_space, + ) + return env + return thunk + + +def wrap_vecenv(vecenv): + vecenv.is_vector_env = True + vecenv = VecMonitor(venv=vecenv, filename=None, keep_buf=100) + vecenv = VecNormalize(venv=vecenv, ob=False, ret=True) + return vecenv + + +def slow_level_replay_evaluate( + env_name, + policy, + num_episodes, + device, + num_levels=0 +): + policy.eval() + + eval_envs = ProcgenEnv( + num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False + ) + eval_envs = VecExtractDictObs(eval_envs, "rgb") + eval_envs = wrap_vecenv(eval_envs) + eval_obs, _ = eval_envs.reset() + eval_episode_rewards = [] + + while len(eval_episode_rewards) < num_episodes: + with torch.no_grad(): + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + + eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) + for i, info in enumerate(infos): + if 'episode' in info.keys(): + eval_episode_rewards.append(info['episode']['r']) + + mean_returns = np.mean(eval_episode_rewards) + stddev_returns = np.std(eval_episode_rewards) + env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] + normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) + policy.train() + return mean_returns, stddev_returns, normalized_mean_returns + + +def level_replay_evaluate( + env_name, + policy, + num_episodes, + device, + num_levels=0 +): + policy.eval() + + eval_envs = ProcgenEnv( + num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False + ) + eval_envs = VecExtractDictObs(eval_envs, "rgb") + eval_envs = wrap_vecenv(eval_envs) + eval_obs, _ = eval_envs.reset() + eval_episode_rewards = [-1] * num_episodes + + while -1 in eval_episode_rewards: + with torch.no_grad(): + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + + eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) + for i, info in enumerate(infos): + if 'episode' in info.keys() and eval_episode_rewards[i] == -1: + eval_episode_rewards[i] = info['episode']['r'] + + # print(eval_episode_rewards) + mean_returns = np.mean(eval_episode_rewards) + stddev_returns = np.std(eval_episode_rewards) + env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] + normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) + policy.train() + return mean_returns, stddev_returns, normalized_mean_returns + + +def make_value_fn(): + def get_value(obs): + obs = np.array(obs) + with torch.no_grad(): + return agent.get_value(torch.Tensor(obs).to(device)) + return get_value + + +if __name__ == "__main__": + args = parse_args() + run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" + if args.track: + import wandb + + wandb.init( + project=args.wandb_project_name, + entity=args.wandb_entity, + sync_tensorboard=True, + config=vars(args), + name=run_name, + monitor_gym=True, + save_code=True, + dir=args.logging_dir + ) + # wandb.run.log_code("./syllabus/examples") + + writer = SummaryWriter(os.path.join(args.logging_dir, f"./runs/{run_name}")) + writer.add_text( + "hyperparameters", + "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), + ) + + # TRY NOT TO MODIFY: seeding + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + torch.backends.cudnn.deterministic = args.torch_deterministic + + device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu") + print("Device:", device) + + # Curriculum setup + curriculum = None + if args.curriculum: + sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") + sample_env = GymV21CompatibilityV0(env=sample_env) + sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) + + # Intialize Curriculum Method + if args.curriculum_method == "plr": + print("Using prioritized level replay.") + curriculum = PrioritizedLevelReplay( + sample_env.task_space, + sample_env.observation_space, + num_steps=args.num_steps, + num_processes=args.num_envs, + gamma=args.gamma, + gae_lambda=args.gae_lambda, + task_sampler_kwargs_dict={"strategy": "value_l1"}, + get_value=make_value_fn(), + ) + elif args.curriculum_method == "dr": + print("Using domain randomization.") + curriculum = DomainRandomization(sample_env.task_space) + elif args.curriculum_method == "lp": + print("Using learning progress.") + curriculum = LearningProgressCurriculum(sample_env.task_space) + elif args.curriculum_method == "sq": + print("Using sequential curriculum.") + curricula = [] + stopping = [] + for i in range(199): + curricula.append(i + 1) + stopping.append("steps>=50000") + curricula.append(list(range(i + 1))) + stopping.append("steps>=50000") + curriculum = SequentialCurriculum(curricula, stopping[:-1], sample_env.task_space) + else: + raise ValueError(f"Unknown curriculum method {args.curriculum_method}") + curriculum = make_multiprocessing_curriculum(curriculum) + del sample_env + + # env setup + print("Creating env") + envs = gym.vector.AsyncVectorEnv( + [ + make_env( + args.env_id, + args.seed + i, + curriculum=curriculum if args.curriculum else None, + num_levels=1 if args.curriculum else 0 + ) + for i in range(args.num_envs) + ] + ) + envs = wrap_vecenv(envs) + + assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" + print("Creating agent") + agent = ProcgenAgent( + envs.single_observation_space.shape, + envs.single_action_space.n, + arch="large", + base_kwargs={'recurrent': False, 'hidden_size': 256} + ).to(device) + optimizer = optim.Adam(agent.parameters(), lr=args.learning_rate, eps=1e-5) + + # ALGO Logic: Storage setup + obs = torch.zeros((args.num_steps, args.num_envs) + envs.single_observation_space.shape).to(device) + actions = torch.zeros((args.num_steps, args.num_envs) + envs.single_action_space.shape).to(device) + logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device) + rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) + dones = torch.zeros((args.num_steps, args.num_envs)).to(device) + values = torch.zeros((args.num_steps, args.num_envs)).to(device) + + # TRY NOT TO MODIFY: start the game + global_step = 0 + start_time = time.time() + next_obs, _ = envs.reset() + next_obs = torch.Tensor(next_obs).to(device) + next_done = torch.zeros(args.num_envs).to(device) + num_updates = args.total_timesteps // args.batch_size + episode_rewards = deque(maxlen=10) + completed_episodes = 0 + + for update in range(1, num_updates + 1): + # Annealing the rate if instructed to do so. + if args.anneal_lr: + frac = 1.0 - (update - 1.0) / num_updates + lrnow = frac * args.learning_rate + optimizer.param_groups[0]["lr"] = lrnow + + for step in range(0, args.num_steps): + global_step += 1 * args.num_envs + obs[step] = next_obs + dones[step] = next_done + + # ALGO LOGIC: action logic + with torch.no_grad(): + action, logprob, _, value = agent.get_action_and_value(next_obs) + values[step] = value.flatten() + actions[step] = action + logprobs[step] = logprob + + # TRY NOT TO MODIFY: execute the game and log data. + next_obs, reward, term, trunc, info = envs.step(action.cpu().numpy()) + done = np.logical_or(term, trunc) + rewards[step] = torch.tensor(reward).to(device).view(-1) + next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) + completed_episodes += sum(done) + + for item in info: + if "episode" in item.keys(): + episode_rewards.append(item['episode']['r']) + print(f"global_step={global_step}, episodic_return={item['episode']['r']}") + writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) + if curriculum is not None: + curriculum.log_metrics(writer, global_step) + break + + # bootstrap value if not done + with torch.no_grad(): + next_value = agent.get_value(next_obs).reshape(1, -1) + if args.gae: + advantages = torch.zeros_like(rewards).to(device) + lastgaelam = 0 + for t in reversed(range(args.num_steps)): + if t == args.num_steps - 1: + nextnonterminal = 1.0 - next_done + nextvalues = next_value + else: + nextnonterminal = 1.0 - dones[t + 1] + nextvalues = values[t + 1] + delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t] + advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam + returns = advantages + values + else: + returns = torch.zeros_like(rewards).to(device) + for t in reversed(range(args.num_steps)): + if t == args.num_steps - 1: + nextnonterminal = 1.0 - next_done + next_return = next_value + else: + nextnonterminal = 1.0 - dones[t + 1] + next_return = returns[t + 1] + returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return + advantages = returns - values + + # flatten the batch + b_obs = obs.reshape((-1,) + envs.single_observation_space.shape) + b_logprobs = logprobs.reshape(-1) + b_actions = actions.reshape((-1,) + envs.single_action_space.shape) + b_advantages = advantages.reshape(-1) + b_returns = returns.reshape(-1) + b_values = values.reshape(-1) + + # Optimizing the policy and value network + b_inds = np.arange(args.batch_size) + clipfracs = [] + for epoch in range(args.update_epochs): + np.random.shuffle(b_inds) + for start in range(0, args.batch_size, args.minibatch_size): + end = start + args.minibatch_size + mb_inds = b_inds[start:end] + + _, newlogprob, entropy, newvalue = agent.get_action_and_value(b_obs[mb_inds], b_actions.long()[mb_inds]) + logratio = newlogprob - b_logprobs[mb_inds] + ratio = logratio.exp() + + with torch.no_grad(): + # calculate approx_kl http://joschu.net/blog/kl-approx.html + old_approx_kl = (-logratio).mean() + approx_kl = ((ratio - 1) - logratio).mean() + clipfracs += [((ratio - 1.0).abs() > args.clip_coef).float().mean().item()] + + mb_advantages = b_advantages[mb_inds] + if args.norm_adv: + mb_advantages = (mb_advantages - mb_advantages.mean()) / (mb_advantages.std() + 1e-8) + + # Policy loss + pg_loss1 = -mb_advantages * ratio + pg_loss2 = -mb_advantages * torch.clamp(ratio, 1 - args.clip_coef, 1 + args.clip_coef) + pg_loss = torch.max(pg_loss1, pg_loss2).mean() + + # Value loss + newvalue = newvalue.view(-1) + if args.clip_vloss: + v_loss_unclipped = (newvalue - b_returns[mb_inds]) ** 2 + v_clipped = b_values[mb_inds] + torch.clamp( + newvalue - b_values[mb_inds], + -args.clip_coef, + args.clip_coef, + ) + v_loss_clipped = (v_clipped - b_returns[mb_inds]) ** 2 + v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped) + v_loss = 0.5 * v_loss_max.mean() + else: + v_loss = 0.5 * ((newvalue - b_returns[mb_inds]) ** 2).mean() + + entropy_loss = entropy.mean() + loss = pg_loss - args.ent_coef * entropy_loss + v_loss * args.vf_coef + + optimizer.zero_grad() + loss.backward() + nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm) + optimizer.step() + + if args.target_kl is not None: + if approx_kl > args.target_kl: + break + + y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy() + var_y = np.var(y_true) + explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y + + # Evaluate agent + mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=0 + ) + slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=0 + ) + mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=200 + ) + slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=200 + ) + + # TRY NOT TO MODIFY: record rewards for plotting purposes + writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) + writer.add_scalar("charts/episode_returns", np.mean(episode_rewards), global_step) + writer.add_scalar("losses/value_loss", v_loss.item(), global_step) + writer.add_scalar("losses/policy_loss", pg_loss.item(), global_step) + writer.add_scalar("losses/entropy", entropy_loss.item(), global_step) + writer.add_scalar("losses/old_approx_kl", old_approx_kl.item(), global_step) + writer.add_scalar("losses/approx_kl", approx_kl.item(), global_step) + writer.add_scalar("losses/clipfrac", np.mean(clipfracs), global_step) + writer.add_scalar("losses/explained_variance", explained_var, global_step) + print("SPS:", int(global_step / (time.time() - start_time))) + writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) + + writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) + writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) + writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) + writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) + + writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) + writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) + writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) + writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) + + writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) + + envs.close() + writer.close() diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/conda-environment.yaml b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/conda-environment.yaml new file mode 100644 index 00000000..cd0b0b09 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/conda-environment.yaml @@ -0,0 +1,165 @@ +name: test2_py +channels: + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - ca-certificates=2024.3.11=h06a4308_0 + - ld_impl_linux-64=2.38=h1181459_1 + - libffi=3.3=he6710b0_2 + - libgcc-ng=11.2.0=h1234567_1 + - libgomp=11.2.0=h1234567_1 + - libstdcxx-ng=11.2.0=h1234567_1 + - ncurses=6.4=h6a678d5_0 + - openssl=1.1.1w=h7f8727e_0 + - pip=23.3.1=py38h06a4308_0 + - python=3.8.5=h7579374_1 + - readline=8.2=h5eee18b_0 + - setuptools=68.2.2=py38h06a4308_0 + - sqlite=3.41.2=h5eee18b_0 + - tk=8.6.12=h1ccaba5_0 + - wheel=0.41.2=py38h06a4308_0 + - xz=5.4.6=h5eee18b_0 + - zlib=1.2.13=h5eee18b_0 + - pip: + - absl-py==2.1.0 + - aiosignal==1.3.1 + - alabaster==0.7.13 + - appdirs==1.4.4 + - attrs==23.2.0 + - babel==2.14.0 + - beautifulsoup4==4.12.3 + - cachetools==5.3.3 + - certifi==2024.2.2 + - cffi==1.16.0 + - charset-normalizer==3.3.2 + - click==8.1.7 + - cloudpickle==3.0.0 + - cmake==3.29.2 + - contourpy==1.1.1 + - cycler==0.12.1 + - dm-tree==0.1.8 + - docker-pycreds==0.4.0 + - docutils==0.20.1 + - exceptiongroup==1.2.0 + - farama-notifications==0.0.4 + - filelock==3.13.4 + - fonttools==4.51.0 + - frozenlist==1.4.1 + - fsspec==2024.3.1 + - furo==2024.1.29 + - future==1.0.0 + - gitdb==4.0.11 + - gitpython==3.1.43 + - glcontext==2.5.0 + - glfw==1.12.0 + - google-auth==2.29.0 + - google-auth-oauthlib==1.0.0 + - grpcio==1.62.1 + - gym==0.23.0 + - gym-notices==0.0.8 + - gymnasium==0.28.1 + - idna==3.7 + - imageio==2.34.0 + - imageio-ffmpeg==0.3.0 + - imagesize==1.4.1 + - importlib-metadata==7.1.0 + - importlib-resources==6.4.0 + - iniconfig==2.0.0 + - jax-jumpy==1.0.0 + - jinja2==3.1.3 + - jsonschema==4.21.1 + - jsonschema-specifications==2023.12.1 + - kiwisolver==1.4.5 + - lazy-loader==0.4 + - lz4==4.3.3 + - markdown==3.6 + - markdown-it-py==3.0.0 + - markupsafe==2.1.5 + - matplotlib==3.7.5 + - mdurl==0.1.2 + - moderngl==5.10.0 + - mpmath==1.3.0 + - msgpack==1.0.8 + - networkx==3.1 + - numpy==1.24.4 + - nvidia-cublas-cu12==12.1.3.1 + - nvidia-cuda-cupti-cu12==12.1.105 + - nvidia-cuda-nvrtc-cu12==12.1.105 + - nvidia-cuda-runtime-cu12==12.1.105 + - nvidia-cudnn-cu12==8.9.2.26 + - nvidia-cufft-cu12==11.0.2.54 + - nvidia-curand-cu12==10.3.2.106 + - nvidia-cusolver-cu12==11.4.5.107 + - nvidia-cusparse-cu12==12.1.0.106 + - nvidia-nccl-cu12==2.19.3 + - nvidia-nvjitlink-cu12==12.4.127 + - nvidia-nvtx-cu12==12.1.105 + - oauthlib==3.2.2 + - packaging==24.0 + - pandas==2.0.3 + - pillow==10.3.0 + - pkgutil-resolve-name==1.3.10 + - pluggy==1.4.0 + - protobuf==4.25.3 + - psutil==5.9.8 + - py-cpuinfo==9.0.0 + - pyarrow==15.0.2 + - pyasn1==0.6.0 + - pyasn1-modules==0.4.0 + - pycparser==2.22 + - pyenchant==3.2.2 + - pyglet==1.4.11 + - pygments==2.17.2 + - pyparsing==3.1.2 + - pytest==8.1.1 + - pytest-benchmark==4.0.0 + - python-dateutil==2.9.0.post0 + - pytz==2024.1 + - pywavelets==1.4.1 + - pyyaml==6.0.1 + - ray==2.10.0 + - referencing==0.34.0 + - requests==2.31.0 + - requests-oauthlib==2.0.0 + - rich==13.7.1 + - rpds-py==0.18.0 + - rsa==4.9 + - scikit-image==0.21.0 + - scipy==1.10.0 + - sentry-sdk==1.45.0 + - setproctitle==1.3.3 + - shellingham==1.5.4 + - shimmy==1.3.0 + - six==1.16.0 + - smmap==5.0.1 + - snowballstemmer==2.2.0 + - soupsieve==2.5 + - sphinx==7.1.2 + - sphinx-basic-ng==1.0.0b2 + - sphinx-tabs==3.4.5 + - sphinxcontrib-applehelp==1.0.4 + - sphinxcontrib-devhelp==1.0.2 + - sphinxcontrib-htmlhelp==2.0.1 + - sphinxcontrib-jsmath==1.0.1 + - sphinxcontrib-qthelp==1.0.3 + - sphinxcontrib-serializinghtml==1.1.5 + - sphinxcontrib-spelling==8.0.0 + - syllabus-rl==0.5 + - sympy==1.12 + - tensorboard==2.14.0 + - tensorboard-data-server==0.7.2 + - tensorboardx==2.6.2.2 + - tifffile==2023.7.10 + - tomli==2.0.1 + - torch==2.2.2 + - triton==2.2.0 + - typer==0.12.3 + - typing-extensions==4.11.0 + - tzdata==2024.1 + - urllib3==2.2.1 + - wandb==0.16.6 + - werkzeug==3.0.2 + - zipp==3.18.1 +prefix: /home/user/miniconda/envs/test2_py + diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/config.yaml b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/config.yaml new file mode 100644 index 00000000..2d9b2af7 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/config.yaml @@ -0,0 +1,126 @@ +wandb_version: 1 + +exp_name: + desc: null + value: cleanrl_procgen_plr +seed: + desc: null + value: 1 +torch_deterministic: + desc: null + value: true +cuda: + desc: null + value: true +track: + desc: null + value: true +wandb_project_name: + desc: null + value: syllabus +wandb_entity: + desc: null + value: null +capture_video: + desc: null + value: false +logging_dir: + desc: null + value: . +env_id: + desc: null + value: bigfish +total_timesteps: + desc: null + value: 25000000 +learning_rate: + desc: null + value: 0.0005 +num_envs: + desc: null + value: 64 +num_steps: + desc: null + value: 256 +anneal_lr: + desc: null + value: false +gae: + desc: null + value: true +gamma: + desc: null + value: 0.999 +gae_lambda: + desc: null + value: 0.95 +num_minibatches: + desc: null + value: 8 +update_epochs: + desc: null + value: 3 +norm_adv: + desc: null + value: true +clip_coef: + desc: null + value: 0.2 +clip_vloss: + desc: null + value: true +ent_coef: + desc: null + value: 0.01 +vf_coef: + desc: null + value: 0.5 +max_grad_norm: + desc: null + value: 0.5 +target_kl: + desc: null + value: null +full_dist: + desc: null + value: true +curriculum: + desc: null + value: true +curriculum_method: + desc: null + value: plr +num_eval_episodes: + desc: null + value: 10 +batch_size: + desc: null + value: 16384 +minibatch_size: + desc: null + value: 2048 +_wandb: + desc: null + value: + code_path: code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py + python_version: 3.8.5 + cli_version: 0.16.6 + framework: torch + is_jupyter_run: false + is_kaggle_kernel: false + start_time: 1713841235.0 + t: + 1: + - 1 + - 30 + - 55 + 3: + - 13 + - 16 + - 23 + - 35 + 4: 3.8.5 + 5: 0.16.6 + 8: + - 5 + 13: linux-x86_64 diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/diff.patch b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/diff.patch new file mode 100644 index 00000000..ab53c5d0 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/diff.patch @@ -0,0 +1,122 @@ +diff --git a/setup.py b/setup.py +index 31e09f2..22a94e8 100644 +--- a/setup.py ++++ b/setup.py +@@ -2,7 +2,7 @@ from setuptools import find_packages, setup + + + extras = dict() +-extras['test'] = ['cmake', 'ninja', 'nle>=0.9.0', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] ++extras['test'] = ['cmake', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] + extras['docs'] = ['sphinx-tabs', 'sphinxcontrib-spelling', 'furo'] + extras['all'] = extras['test'] + extras['docs'] + +diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py +index dabcd50..abf656b 100644 +--- a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py ++++ b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py +@@ -136,7 +136,7 @@ def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): + env = MultiProcessingSyncWrapper( + env, + curriculum.get_components(), +- update_on_step=curriculum.requires_step_updates, ++ update_on_step=False, + task_space=env.task_space, + ) + return env +@@ -150,37 +150,31 @@ def wrap_vecenv(vecenv): + return vecenv + + +-def full_level_replay_evaluate( ++def slow_level_replay_evaluate( + env_name, + policy, + num_episodes, + device, +- num_levels=1 # Not used ++ num_levels=0 + ): + policy.eval() + + eval_envs = ProcgenEnv( +- num_envs=args.num_eval_episodes, env_name=env_name, num_levels=1, start_level=0, distribution_mode="easy", paint_vel_info=False ++ num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False + ) + eval_envs = VecExtractDictObs(eval_envs, "rgb") + eval_envs = wrap_vecenv(eval_envs) +- +- # Seed environments +- seeds = [int.from_bytes(os.urandom(3), byteorder="little") for _ in range(num_episodes)] +- for i, seed in enumerate(seeds): +- eval_envs.seed(seed, i) +- + eval_obs, _ = eval_envs.reset() +- eval_episode_rewards = [-1] * num_episodes ++ eval_episode_rewards = [] + +- while -1 in eval_episode_rewards: ++ while len(eval_episode_rewards) < num_episodes: + with torch.no_grad(): + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + + eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) + for i, info in enumerate(infos): +- if 'episode' in info.keys() and eval_episode_rewards[i] == -1: +- eval_episode_rewards[i] = info['episode']['r'] ++ if 'episode' in info.keys(): ++ eval_episode_rewards.append(info['episode']['r']) + + mean_returns = np.mean(eval_episode_rewards) + stddev_returns = np.std(eval_episode_rewards) +@@ -251,7 +245,7 @@ if __name__ == "__main__": + ) + # wandb.run.log_code("./syllabus/examples") + +- writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) ++ writer = SummaryWriter(os.path.join(args.logging_dir, f"./runs/{run_name}")) + writer.add_text( + "hyperparameters", + "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), +@@ -485,13 +479,13 @@ if __name__ == "__main__": + mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=0 + ) +- full_mean_eval_returns, full_stddev_eval_returns, full_normalized_mean_eval_returns = full_level_replay_evaluate( ++ slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=0 + ) + mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=200 + ) +- full_mean_train_returns, full_stddev_train_returns, full_normalized_mean_train_returns = full_level_replay_evaluate( ++ slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=200 + ) + +@@ -510,17 +504,17 @@ if __name__ == "__main__": + + writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) +- writer.add_scalar("test_eval/stddev_eval_return", stddev_eval_returns, global_step) +- writer.add_scalar("test_eval/full_mean_episode_return", full_mean_eval_returns, global_step) +- writer.add_scalar("test_eval/full_normalized_mean_eval_return", full_normalized_mean_eval_returns, global_step) +- writer.add_scalar("test_eval/full_stddev_eval_return", full_stddev_eval_returns, global_step) ++ writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) + + writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) +- writer.add_scalar("train_eval/stddev_train_return", stddev_train_returns, global_step) +- writer.add_scalar("train_eval/full_mean_episode_return", full_mean_train_returns, global_step) +- writer.add_scalar("train_eval/full_normalized_mean_train_return", full_normalized_mean_train_returns, global_step) +- writer.add_scalar("train_eval/full_stddev_train_return", full_stddev_train_returns, global_step) ++ writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) + + writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) + diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/events.out.tfevents.1713841239.f411843fc70b.1794.0 b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/events.out.tfevents.1713841239.f411843fc70b.1794.0 new file mode 120000 index 00000000..f3066c3f --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/events.out.tfevents.1713841239.f411843fc70b.1794.0 @@ -0,0 +1 @@ +/data/averma/MARL/Syllabus/syllabus/examples/training_scripts/runs/bigfish__cleanrl_procgen_plr__1__1713841233/events.out.tfevents.1713841239.f411843fc70b.1794.0 \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/requirements.txt b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/requirements.txt new file mode 100644 index 00000000..7f33d240 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/requirements.txt @@ -0,0 +1,146 @@ +Babel==2.14.0 +Farama-Notifications==0.0.4 +GitPython==3.1.43 +Jinja2==3.1.3 +Markdown==3.6 +MarkupSafe==2.1.5 +PyWavelets==1.4.1 +PyYAML==6.0.1 +Pygments==2.17.2 +Shimmy==1.3.0 +Sphinx==7.1.2 +Syllabus-RL==0.5 +Werkzeug==3.0.2 +absl-py==2.1.0 +aiosignal==1.3.1 +alabaster==0.7.13 +appdirs==1.4.4 +attrs==23.2.0 +beautifulsoup4==4.12.3 +cachetools==5.3.3 +certifi==2024.2.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +cloudpickle==3.0.0 +cmake==3.29.2 +colorama==0.4.6 +contourpy==1.1.1 +cycler==0.12.1 +dm-tree==0.1.8 +docker-pycreds==0.4.0 +docutils==0.20.1 +exceptiongroup==1.2.0 +filelock==3.13.4 +fonttools==4.51.0 +frozenlist==1.4.1 +fsspec==2024.3.1 +furo==2024.1.29 +future==1.0.0 +gitdb==4.0.11 +glcontext==2.5.0 +glfw==1.12.0 +google-auth-oauthlib==1.0.0 +google-auth==2.29.0 +grpcio==1.62.1 +gym-notices==0.0.8 +gym==0.23.0 +gymnasium==0.28.1 +idna==3.7 +imageio-ffmpeg==0.3.0 +imageio==2.34.0 +imagesize==1.4.1 +importlib_metadata==7.1.0 +importlib_resources==6.4.0 +iniconfig==2.0.0 +jax-jumpy==1.0.0 +jsonschema-specifications==2023.12.1 +jsonschema==4.21.1 +kiwisolver==1.4.5 +lazy_loader==0.4 +lz4==4.3.3 +markdown-it-py==3.0.0 +matplotlib==3.7.5 +mdurl==0.1.2 +moderngl==5.10.0 +mpmath==1.3.0 +msgpack==1.0.8 +networkx==3.1 +numpy==1.24.4 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-nccl-cu12==2.19.3 +nvidia-nvjitlink-cu12==12.4.127 +nvidia-nvtx-cu12==12.1.105 +oauthlib==3.2.2 +packaging==24.0 +pandas==2.0.3 +pillow==10.3.0 +pip==23.3.1 +pkgutil_resolve_name==1.3.10 +pluggy==1.4.0 +procgen==0.9.5+ed4be81 +protobuf==4.25.3 +psutil==5.9.8 +psutil==5.9.8 +py-cpuinfo==9.0.0 +pyarrow==15.0.2 +pyasn1==0.6.0 +pyasn1_modules==0.4.0 +pycparser==2.22 +pyenchant==3.2.2 +pyglet==1.4.11 +pyparsing==3.1.2 +pytest-benchmark==4.0.0 +pytest==8.1.1 +python-dateutil==2.9.0.post0 +pytz==2024.1 +ray==2.10.0 +referencing==0.34.0 +requests-oauthlib==2.0.0 +requests==2.31.0 +rich==13.7.1 +rpds-py==0.18.0 +rsa==4.9 +scikit-image==0.21.0 +scipy==1.10.0 +sentry-sdk==1.45.0 +setproctitle==1.2.2 +setproctitle==1.3.3 +setuptools==68.2.2 +shellingham==1.5.4 +six==1.16.0 +smmap==5.0.1 +snowballstemmer==2.2.0 +soupsieve==2.5 +sphinx-basic-ng==1.0.0b2 +sphinx-tabs==3.4.5 +sphinxcontrib-applehelp==1.0.4 +sphinxcontrib-devhelp==1.0.2 +sphinxcontrib-htmlhelp==2.0.1 +sphinxcontrib-jsmath==1.0.1 +sphinxcontrib-qthelp==1.0.3 +sphinxcontrib-serializinghtml==1.1.5 +sphinxcontrib-spelling==8.0.0 +sympy==1.12 +tensorboard-data-server==0.7.2 +tensorboard==2.14.0 +tensorboardX==2.6.2.2 +tifffile==2023.7.10 +tomli==2.0.1 +torch==2.2.2 +triton==2.2.0 +typer==0.12.3 +typing_extensions==4.11.0 +tzdata==2024.1 +urllib3==2.2.1 +wandb==0.16.6 +wheel==0.41.2 +zipp==3.18.1 \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch new file mode 100644 index 00000000..7be08932 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch @@ -0,0 +1,1408 @@ +diff --git a/setup.py b/setup.py +index 31e09f2..22a94e8 100644 +--- a/setup.py ++++ b/setup.py +@@ -2,7 +2,7 @@ from setuptools import find_packages, setup + + + extras = dict() +-extras['test'] = ['cmake', 'ninja', 'nle>=0.9.0', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] ++extras['test'] = ['cmake', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] + extras['docs'] = ['sphinx-tabs', 'sphinxcontrib-spelling', 'furo'] + extras['all'] = extras['test'] + extras['docs'] + +diff --git a/syllabus/core/curriculum_base.py b/syllabus/core/curriculum_base.py +index 03284da..4ca9aeb 100644 +--- a/syllabus/core/curriculum_base.py ++++ b/syllabus/core/curriculum_base.py +@@ -76,7 +76,7 @@ class Curriculum: + """ + self.completed_tasks += 1 + +- def update_on_step(self, obs: typing.Any, rew: float, term: bool, trunc: bool, info: dict, env_id: int = None) -> None: ++ def update_on_step(self, task: typing.Any, obs: typing.Any, rew: float, term: bool, trunc: bool, info: dict, env_id: int = None) -> None: + """ Update the curriculum with the current step results from the environment. + + :param obs: Observation from teh environment +@@ -88,7 +88,7 @@ class Curriculum: + """ + raise NotImplementedError("This curriculum does not require step updates. Set update_on_step for the environment sync wrapper to False to improve performance and prevent this error.") + +- def update_on_step_batch(self, step_results: List[typing.Tuple[int, int, int, int, int]], env_id: int = None) -> None: ++ def update_on_step_batch(self, step_results: List[typing.Tuple[Any, Any, int, int, int, int]], env_id: int = None) -> None: + """Update the curriculum with a batch of step results from the environment. + + This method can be overridden to provide a more efficient implementation. It is used +@@ -96,9 +96,9 @@ class Curriculum: + + :param step_results: List of step results + """ +- obs, rews, terms, truncs, infos = tuple(step_results) ++ tasks, obs, rews, terms, truncs, infos = tuple(step_results) + for i in range(len(obs)): +- self.update_on_step(obs[i], rews[i], terms[i], truncs[i], infos[i], env_id=env_id) ++ self.update_on_step(tasks[i], obs[i], rews[i], terms[i], truncs[i], infos[i], env_id=env_id) + + def update_on_episode(self, episode_return: float, episode_length: int, episode_task: Any, env_id: int = None) -> None: + """Update the curriculum with episode results from the environment. +diff --git a/syllabus/core/curriculum_sync_wrapper.py b/syllabus/core/curriculum_sync_wrapper.py +index 6e069d8..f986643 100644 +--- a/syllabus/core/curriculum_sync_wrapper.py ++++ b/syllabus/core/curriculum_sync_wrapper.py +@@ -29,6 +29,14 @@ class CurriculumWrapper: + def tasks(self): + return self.task_space.tasks + ++ @property ++ def requires_step_updates(self): ++ return self.curriculum.requires_step_updates ++ ++ @property ++ def requires_episode_updates(self): ++ return self.curriculum.requires_episode_updates ++ + def get_tasks(self, task_space=None): + return self.task_space.get_tasks(gym_space=task_space) + +diff --git a/syllabus/core/environment_sync_wrapper.py b/syllabus/core/environment_sync_wrapper.py +index c995aa1..6edee7c 100644 +--- a/syllabus/core/environment_sync_wrapper.py ++++ b/syllabus/core/environment_sync_wrapper.py +@@ -19,7 +19,8 @@ class MultiProcessingSyncWrapper(gym.Wrapper): + def __init__(self, + env, + components: MultiProcessingComponents, +- update_on_step: bool = True, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? ++ update_on_step: bool = False, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? ++ update_on_progress: bool = False, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? + batch_size: int = 100, + buffer_size: int = 2, # Having an extra task in the buffer minimizes wait time at reset + task_space: TaskSpace = None, +@@ -34,6 +35,7 @@ class MultiProcessingSyncWrapper(gym.Wrapper): + self.update_queue = components.update_queue + self.task_space = task_space + self.update_on_step = update_on_step ++ self.update_on_progress = update_on_progress + self.batch_size = batch_size + self.global_task_completion = global_task_completion + self.task_progress = 0.0 +@@ -125,17 +127,21 @@ class MultiProcessingSyncWrapper(gym.Wrapper): + def _package_step_updates(self): + step_batch = { + "update_type": "step_batch", +- "metrics": ([self._obs[:self._batch_step], self._rews[:self._batch_step], self._terms[:self._batch_step], self._truncs[:self._batch_step], self._infos[:self._batch_step]],), ++ "metrics": ([self._tasks[:self._batch_step], self._obs[:self._batch_step], self._rews[:self._batch_step], self._terms[:self._batch_step], self._truncs[:self._batch_step], self._infos[:self._batch_step]],), + "env_id": self.instance_id, + "request_sample": False + } +- task_batch = { +- "update_type": "task_progress_batch", +- "metrics": (self._tasks[:self._batch_step], self._task_progresses[:self._batch_step],), +- "env_id": self.instance_id, +- "request_sample": False +- } +- return [step_batch, task_batch] ++ update = [step_batch] ++ ++ if self.update_on_progress: ++ task_batch = { ++ "update_type": "task_progress_batch", ++ "metrics": (self._tasks[:self._batch_step], self._task_progresses[:self._batch_step],), ++ "env_id": self.instance_id, ++ "request_sample": False ++ } ++ update.append(task_batch) ++ return update + + def add_task(self, task): + update = { +diff --git a/syllabus/curricula/annealing_box.py b/syllabus/curricula/annealing_box.py +index 6c565ec..101981c 100644 +--- a/syllabus/curricula/annealing_box.py ++++ b/syllabus/curricula/annealing_box.py +@@ -49,8 +49,8 @@ class AnnealingBoxCurriculum(Curriculum): + """ + # Linear annealing from start_values to end_values + annealed_values = ( +- self.start_values + (self.end_values - self.start_values) * +- np.minimum(self.current_step, self.total_steps) / self.total_steps ++ self.start_values + (self.end_values - self.start_values) * ++ np.minimum(self.current_step, self.total_steps) / self.total_steps + ) + +- return [annealed_values.copy() for _ in range(k)] +\ No newline at end of file ++ return [annealed_values.copy() for _ in range(k)] +diff --git a/syllabus/curricula/noop.py b/syllabus/curricula/noop.py +index f6bd5dc..fb5d8ae 100644 +--- a/syllabus/curricula/noop.py ++++ b/syllabus/curricula/noop.py +@@ -28,7 +28,7 @@ class NoopCurriculum(Curriculum): + """ + pass + +- def update_on_step(self, obs, rew, term, trunc, info, env_id: int = None) -> None: ++ def update_on_step(self, task, obs, rew, term, trunc, info, env_id: int = None) -> None: + """ + Update the curriculum with the current step results from the environment. + """ +diff --git a/syllabus/curricula/plr/plr_wrapper.py b/syllabus/curricula/plr/plr_wrapper.py +index 9515df4..9c808dd 100644 +--- a/syllabus/curricula/plr/plr_wrapper.py ++++ b/syllabus/curricula/plr/plr_wrapper.py +@@ -23,16 +23,15 @@ class RolloutStorage(object): + get_value=None, + ): + self.num_steps = num_steps +- self.buffer_steps = num_steps * 2 # Hack to prevent overflow from lagging updates. ++ self.buffer_steps = num_steps * 4 # Hack to prevent overflow from lagging updates. + self.num_processes = num_processes + self._requires_value_buffers = requires_value_buffers + self._get_value = get_value + self.tasks = torch.zeros(self.buffer_steps, num_processes, 1, dtype=torch.int) + self.masks = torch.ones(self.buffer_steps + 1, num_processes, 1) + self.obs = [[[0] for _ in range(self.num_processes)]] * self.buffer_steps +- self._fill = torch.zeros(self.buffer_steps, num_processes, 1) + self.env_steps = [0] * num_processes +- self.should_update = False ++ self.ready_buffers = set() + + if requires_value_buffers: + self.returns = torch.zeros(self.buffer_steps + 1, num_processes, 1) +@@ -46,12 +45,10 @@ class RolloutStorage(object): + self.action_log_dist = torch.zeros(self.buffer_steps, num_processes, action_space.n) + + self.num_steps = num_steps +- self.step = 0 + + def to(self, device): + self.masks = self.masks.to(device) + self.tasks = self.tasks.to(device) +- self._fill = self._fill.to(device) + if self._requires_value_buffers: + self.rewards = self.rewards.to(device) + self.value_preds = self.value_preds.to(device) +@@ -59,108 +56,79 @@ class RolloutStorage(object): + else: + self.action_log_dist = self.action_log_dist.to(device) + +- def insert(self, masks, action_log_dist=None, value_preds=None, rewards=None, tasks=None): +- if self._requires_value_buffers: +- assert (value_preds is not None and rewards is not None), "Selected strategy requires value_preds and rewards" +- if len(rewards.shape) == 3: +- rewards = rewards.squeeze(2) +- self.value_preds[self.step].copy_(torch.as_tensor(value_preds)) +- self.rewards[self.step].copy_(torch.as_tensor(rewards)[:, None]) +- self.masks[self.step + 1].copy_(torch.as_tensor(masks)[:, None]) +- else: +- self.action_log_dist[self.step].copy_(action_log_dist) +- if tasks is not None: +- assert isinstance(tasks[0], int), "Provided task must be an integer" +- self.tasks[self.step].copy_(torch.as_tensor(tasks)[:, None]) +- self.step = (self.step + 1) % self.num_steps +- + def insert_at_index(self, env_index, mask=None, action_log_dist=None, obs=None, reward=None, task=None, steps=1): +- if env_index >= self.num_processes: +- warnings.warn(f"Env index {env_index} is greater than the number of processes {self.num_processes}. Using index {env_index % self.num_processes} instead.") +- env_index = env_index % self.num_processes +- + step = self.env_steps[env_index] + end_step = step + steps +- # Update buffer fill traacker, and check for common usage errors. +- try: +- if end_step > len(self._fill): +- raise IndexError +- self._fill[step:end_step, env_index] = 1 +- except IndexError as e: +- if any(self._fill[:][env_index] == 0): +- raise UsageError(f"Step {step} + {steps} = {end_step} is out of range for env index {env_index}. Your value for PLR's num_processes may be too high.") from e +- else: +- raise UsageError(f"Step {step} + {steps} = {end_step} is out of range for env index {env_index}. Your value for PLR's num_processes may be too low.") from e + + if mask is not None: + self.masks[step + 1:end_step + 1, env_index].copy_(torch.as_tensor(mask[:, None])) ++ + if obs is not None: + for s in range(step, end_step): + self.obs[s][env_index] = obs[s - step] ++ + if reward is not None: + self.rewards[step:end_step, env_index].copy_(torch.as_tensor(reward[:, None])) ++ + if action_log_dist is not None: + self.action_log_dist[step:end_step, env_index].copy_(torch.as_tensor(action_log_dist[:, None])) ++ + if task is not None: + try: +- task = int(task) ++ int(task[0]) + except TypeError: +- assert isinstance(task, int), f"Provided task must be an integer, got {task} with type {type(task)} instead." +- self.tasks[step:end_step, env_index].copy_(torch.as_tensor(task)) +- else: +- self.env_steps[env_index] += steps +- # Hack for now, we call insert_at_index twice +- while all(self._fill[self.step] == 1): +- self.step = (self.step + 1) % self.buffer_steps +- # Check if we have enough steps to compute a task sampler update +- if self.step == self.num_steps + 1: +- self.should_update = True +- +- def _get_values(self): ++ assert isinstance(task, int), f"Provided task must be an integer, got {task[0]} with type {type(task[0])} instead." ++ self.tasks[step:end_step, env_index].copy_(torch.as_tensor(np.array(task)[:, None])) ++ ++ self.env_steps[env_index] += steps ++ if env_index not in self.ready_buffers and self.env_steps[env_index] >= self.num_steps: ++ self.ready_buffers.add(env_index) ++ ++ def _get_values(self, env_index): + if self._get_value is None: + raise UsageError("Selected strategy requires value predictions. Please provide get_value function.") +- for step in range(self.num_steps): +- values = self._get_value(self.obs[step]) ++ for step in range(0, self.num_steps, self.num_processes): ++ obs = self.obs[step: step + self.num_processes][env_index] ++ values = self._get_value(obs) ++ ++ # Reshape values if necessary + if len(values.shape) == 3: + warnings.warn(f"Value function returned a 3D tensor of shape {values.shape}. Attempting to squeeze last dimension.") + values = torch.squeeze(values, -1) + if len(values.shape) == 1: + warnings.warn(f"Value function returned a 1D tensor of shape {values.shape}. Attempting to unsqueeze last dimension.") + values = torch.unsqueeze(values, -1) +- self.value_preds[step].copy_(values) + +- def after_update(self): ++ self.value_preds[step: step + self.num_processes, env_index].copy_(values) ++ ++ def after_update(self, env_index): + # After consuming the first num_steps of data, remove them and shift the remaining data in the buffer +- self.tasks[0: self.num_steps].copy_(self.tasks[self.num_steps: self.buffer_steps]) +- self.masks[0: self.num_steps].copy_(self.masks[self.num_steps: self.buffer_steps]) +- self.obs[0: self.num_steps][:] = self.obs[self.num_steps: self.buffer_steps][:] ++ self.tasks = self.tasks.roll(-self.num_steps, 0) ++ self.masks = self.masks.roll(-self.num_steps, 0) ++ self.obs[0:][env_index] = self.obs[self.num_steps: self.buffer_steps][env_index] + + if self._requires_value_buffers: +- self.returns[0: self.num_steps].copy_(self.returns[self.num_steps: self.buffer_steps]) +- self.rewards[0: self.num_steps].copy_(self.rewards[self.num_steps: self.buffer_steps]) +- self.value_preds[0: self.num_steps].copy_(self.value_preds[self.num_steps: self.buffer_steps]) ++ self.returns = self.returns.roll(-self.num_steps, 0) ++ self.rewards = self.rewards.roll(-self.num_steps, 0) ++ self.value_preds = self.value_preds.roll(-self.num_steps, 0) + else: +- self.action_log_dist[0: self.num_steps].copy_(self.action_log_dist[self.num_steps: self.buffer_steps]) ++ self.action_log_dist = self.action_log_dist.roll(-self.num_steps, 0) + +- self._fill[0: self.num_steps].copy_(self._fill[self.num_steps: self.buffer_steps]) +- self._fill[self.num_steps: self.buffer_steps].copy_(0) ++ self.env_steps[env_index] -= self.num_steps ++ self.ready_buffers.remove(env_index) + +- self.env_steps = [steps - self.num_steps for steps in self.env_steps] +- self.should_update = False +- self.step = self.step - self.num_steps +- +- def compute_returns(self, gamma, gae_lambda): ++ def compute_returns(self, gamma, gae_lambda, env_index): + assert self._requires_value_buffers, "Selected strategy does not use compute_rewards." +- self._get_values() ++ self._get_values(env_index) + gae = 0 + for step in reversed(range(self.rewards.size(0), self.num_steps)): + delta = ( +- self.rewards[step] +- + gamma * self.value_preds[step + 1] * self.masks[step + 1] +- - self.value_preds[step] ++ self.rewards[step, env_index] ++ + gamma * self.value_preds[step + 1, env_index] * self.masks[step + 1, env_index] ++ - self.value_preds[step, env_index] + ) +- gae = delta + gamma * gae_lambda * self.masks[step + 1] * gae +- self.returns[step] = gae + self.value_preds[step] ++ gae = delta + gamma * gae_lambda * self.masks[step + 1, env_index] * gae ++ self.returns[step, env_index] = gae + self.value_preds[step, env_index] + + + def null(x): +@@ -252,11 +220,15 @@ class PrioritizedLevelReplay(Curriculum): + else: + return [self._task_sampler.sample() for _ in range(k)] + +- def update_on_step(self, obs, rew, term, trunc, info, env_id: int = None) -> None: ++ def update_on_step(self, task, obs, rew, term, trunc, info, env_id: int = None) -> None: + """ + Update the curriculum with the current step results from the environment. + """ + assert env_id is not None, "env_id must be provided for PLR updates." ++ if env_id >= self._num_processes: ++ warnings.warn(f"Env index {env_id} is greater than the number of processes {self._num_processes}. Using index {env_id % self._num_processes} instead.") ++ env_id = env_id % self._num_processes ++ + # Update rollouts + self._rollouts.insert_at_index( + env_id, +@@ -266,14 +238,22 @@ class PrioritizedLevelReplay(Curriculum): + obs=np.array([obs]), + ) + ++ # Update task sampler ++ if env_id in self._rollouts.ready_buffers: ++ self._update_sampler(env_id) ++ + def update_on_step_batch( +- self, step_results: List[Tuple[Any, int, bool, bool, Dict]], env_id: int = None ++ self, step_results: List[Tuple[int, Any, int, bool, bool, Dict]], env_id: int = None + ) -> None: + """ + Update the curriculum with a batch of step results from the environment. + """ + assert env_id is not None, "env_id must be provided for PLR updates." +- obs, rews, terms, truncs, infos = step_results ++ if env_id >= self._num_processes: ++ warnings.warn(f"Env index {env_id} is greater than the number of processes {self._num_processes}. Using index {env_id % self._num_processes} instead.") ++ env_id = env_id % self._num_processes ++ ++ tasks, obs, rews, terms, truncs, infos = step_results + self._rollouts.insert_at_index( + env_id, + mask=np.logical_not(np.logical_or(terms, truncs)), +@@ -281,25 +261,19 @@ class PrioritizedLevelReplay(Curriculum): + reward=rews, + obs=obs, + steps=len(rews), ++ task=tasks, + ) + +- def update_task_progress(self, task: Any, success_prob: float, env_id: int = None) -> None: +- """ +- Update the curriculum with a task and its success probability upon +- success or failure. +- """ +- assert env_id is not None, "env_id must be provided for PLR updates." +- self._rollouts.insert_at_index( +- env_id, +- task=task, +- ) + # Update task sampler +- if self._rollouts.should_update: +- if self._task_sampler.requires_value_buffers: +- self._rollouts.compute_returns(self._gamma, self._gae_lambda) +- self._task_sampler.update_with_rollouts(self._rollouts) +- self._rollouts.after_update() +- self._task_sampler.after_update() ++ if env_id in self._rollouts.ready_buffers: ++ self._update_sampler(env_id) ++ ++ def _update_sampler(self, env_id): ++ if self._task_sampler.requires_value_buffers: ++ self._rollouts.compute_returns(self._gamma, self._gae_lambda, env_id) ++ self._task_sampler.update_with_rollouts(self._rollouts, env_id) ++ self._rollouts.after_update(env_id) ++ self._task_sampler.after_update() + + def _enumerate_tasks(self, space): + assert isinstance(space, Discrete) or isinstance(space, MultiDiscrete), f"Unsupported task space {space}: Expected Discrete or MultiDiscrete" +@@ -312,10 +286,10 @@ class PrioritizedLevelReplay(Curriculum): + """ + Log the task distribution to the provided tensorboard writer. + """ +- super().log_metrics(writer, step) ++ # super().log_metrics(writer, step) + metrics = self._task_sampler.metrics() + writer.add_scalar("curriculum/proportion_seen", metrics["proportion_seen"], step) + writer.add_scalar("curriculum/score", metrics["score"], step) +- for task in list(self.task_space.tasks)[:10]: +- writer.add_scalar(f"curriculum/task_{task - 1}_score", metrics["task_scores"][task - 1], step) +- writer.add_scalar(f"curriculum/task_{task - 1}_staleness", metrics["task_staleness"][task - 1], step) ++ # for task in list(self.task_space.tasks)[:10]: ++ # writer.add_scalar(f"curriculum/task_{task - 1}_score", metrics["task_scores"][task - 1], step) ++ # writer.add_scalar(f"curriculum/task_{task - 1}_staleness", metrics["task_staleness"][task - 1], step) +diff --git a/syllabus/curricula/plr/task_sampler.py b/syllabus/curricula/plr/task_sampler.py +index 15ad485..c1e97a1 100644 +--- a/syllabus/curricula/plr/task_sampler.py ++++ b/syllabus/curricula/plr/task_sampler.py +@@ -73,7 +73,7 @@ class TaskSampler: + 'Must provide action space to PLR if using "policy_entropy", "least_confidence", or "min_margin" strategies' + ) + +- def update_with_rollouts(self, rollouts): ++ def update_with_rollouts(self, rollouts, actor_id=None): + if self.strategy == "random": + return + +@@ -93,7 +93,7 @@ class TaskSampler: + else: + raise ValueError(f"Unsupported strategy, {self.strategy}") + +- self._update_with_rollouts(rollouts, score_function) ++ self._update_with_rollouts(rollouts, score_function, actor_index=actor_id) + + def update_task_score(self, actor_index, task_idx, score, num_steps): + score = self._partial_update_task_score(actor_index, task_idx, score, num_steps, done=True) +@@ -165,14 +165,15 @@ class TaskSampler: + def requires_value_buffers(self): + return self.strategy in ["gae", "value_l1", "one_step_td_error"] + +- def _update_with_rollouts(self, rollouts, score_function): ++ def _update_with_rollouts(self, rollouts, score_function, actor_index=None): + tasks = rollouts.tasks + if not self.requires_value_buffers: + policy_logits = rollouts.action_log_dist + done = ~(rollouts.masks > 0) + total_steps, num_actors = rollouts.tasks.shape[:2] + +- for actor_index in range(num_actors): ++ actors = [actor_index] if actor_index is not None else range(num_actors) ++ for actor_index in actors: + done_steps = done[:, actor_index].nonzero()[:total_steps, 0] + start_t = 0 + +diff --git a/syllabus/curricula/sequential.py b/syllabus/curricula/sequential.py +index baa1263..ec3b8b0 100644 +--- a/syllabus/curricula/sequential.py ++++ b/syllabus/curricula/sequential.py +@@ -177,9 +177,9 @@ class SequentialCurriculum(Curriculum): + if self.current_curriculum.requires_episode_updates: + self.current_curriculum.update_on_episode(episode_return, episode_len, episode_task, env_id) + +- def update_on_step(self, obs, rew, term, trunc, info, env_id=None): ++ def update_on_step(self, task, obs, rew, term, trunc, info, env_id=None): + if self.current_curriculum.requires_step_updates: +- self.current_curriculum.update_on_step(obs, rew, term, trunc, info, env_id) ++ self.current_curriculum.update_on_step(task, obs, rew, term, trunc, info, env_id) + + def update_on_step_batch(self, step_results, env_id=None): + if self.current_curriculum.requires_step_updates: +diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py b/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py +index a6d469e..b848d69 100644 +--- a/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py ++++ b/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py +@@ -14,6 +14,7 @@ import gym as openai_gym + import gymnasium as gym + import numpy as np + import procgen # noqa: F401 ++from procgen import ProcgenEnv + import torch + import torch.nn as nn + import torch.optim as optim +@@ -21,10 +22,10 @@ from shimmy.openai_gym_compatibility import GymV21CompatibilityV0 + from torch.utils.tensorboard import SummaryWriter + + from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum +-from syllabus.curricula import DomainRandomization, LearningProgressCurriculum, CentralizedPrioritizedLevelReplay ++from syllabus.curricula import CentralizedPrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum + from syllabus.examples.models import ProcgenAgent + from syllabus.examples.task_wrappers import ProcgenTaskWrapper +-from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize ++from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs + + + def parse_args(): +@@ -46,6 +47,8 @@ def parse_args(): + help="the entity (team) of wandb's project") + parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="weather to capture videos of the agent performances (check out `videos` folder)") ++ parser.add_argument("--logging-dir", type=str, default=".", ++ help="the base directory for logging and wandb storage.") + + # Algorithm specific arguments + parser.add_argument("--env-id", type=str, default="starpilot", +@@ -124,15 +127,15 @@ PROCGEN_RETURN_BOUNDS = { + } + + +-def make_env(env_id, seed, curriculum_components=None, start_level=0, num_levels=1): ++def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): + def thunk(): + env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) + env = GymV21CompatibilityV0(env=env) +- env = ProcgenTaskWrapper(env, env_id, seed=seed) +- if curriculum_components is not None: ++ if curriculum is not None: ++ env = ProcgenTaskWrapper(env, env_id, seed=seed) + env = MultiProcessingSyncWrapper( + env, +- curriculum_components, ++ curriculum.get_components(), + update_on_step=False, + task_space=env.task_space, + ) +@@ -147,36 +150,38 @@ def wrap_vecenv(vecenv): + return vecenv + + +-def level_replay_evaluate( ++def full_level_replay_evaluate( + env_name, + policy, + num_episodes, + device, +- num_levels=0 ++ num_levels=1 # Not used + ): + policy.eval() +- eval_envs = gym.vector.SyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, task_queue, update_queue, num_levels=num_levels) +- for i in range(1) +- ] ++ ++ eval_envs = ProcgenEnv( ++ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=1, start_level=0, distribution_mode="easy", paint_vel_info=False + ) ++ eval_envs = VecExtractDictObs(eval_envs, "rgb") + eval_envs = wrap_vecenv(eval_envs) + +- eval_episode_rewards = [] ++ # Seed environments ++ seeds = [int.from_bytes(os.urandom(3), byteorder="little") for _ in range(num_episodes)] ++ for i, seed in enumerate(seeds): ++ eval_envs.seed(seed, i) ++ + eval_obs, _ = eval_envs.reset() ++ eval_episode_rewards = [-1] * num_episodes + +- while len(eval_episode_rewards) < num_episodes: ++ while -1 in eval_episode_rewards: + with torch.no_grad(): + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + +- eval_obs, _, truncs, terms, infos = eval_envs.step(np.array([eval_action.cpu().numpy()])) +- +- for info in infos: +- if 'episode' in info.keys(): +- eval_episode_rewards.append(info['episode']['r']) ++ eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) ++ for i, info in enumerate(infos): ++ if 'episode' in info.keys() and eval_episode_rewards[i] == -1: ++ eval_episode_rewards[i] = info['episode']['r'] + +- eval_envs.close() + mean_returns = np.mean(eval_episode_rewards) + stddev_returns = np.std(eval_episode_rewards) + env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] +@@ -185,8 +190,7 @@ def level_replay_evaluate( + return mean_returns, stddev_returns, normalized_mean_returns + + +-def fast_level_replay_evaluate( +- eval_envs, ++def level_replay_evaluate( + env_name, + policy, + num_episodes, +@@ -194,9 +198,13 @@ def fast_level_replay_evaluate( + num_levels=0 + ): + policy.eval() +- possible_seeds = np.arange(0, num_levels + 1) +- eval_obs, _ = eval_envs.reset(seed=list(np.random.choice(possible_seeds, size=num_episodes))) + ++ eval_envs = ProcgenEnv( ++ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False ++ ) ++ eval_envs = VecExtractDictObs(eval_envs, "rgb") ++ eval_envs = wrap_vecenv(eval_envs) ++ eval_obs, _ = eval_envs.reset() + eval_episode_rewards = [-1] * num_episodes + + while -1 in eval_episode_rewards: +@@ -231,10 +239,11 @@ if __name__ == "__main__": + name=run_name, + monitor_gym=True, + save_code=True, +- # dir="/fs/nexus-scratch/rsulli/" ++ dir=args.logging_dir + ) +- wandb.run.log_code("./syllabus/examples") +- writer = SummaryWriter(f"./runs/{run_name}") ++ # wandb.run.log_code("./syllabus/examples") ++ ++ writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) + writer.add_text( + "hyperparameters", + "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), +@@ -250,7 +259,7 @@ if __name__ == "__main__": + print("Device:", device) + + # Curriculum setup +- task_queue = update_queue = None ++ curriculum = None + if args.curriculum: + sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") + sample_env = GymV21CompatibilityV0(env=sample_env) +@@ -273,6 +282,16 @@ if __name__ == "__main__": + elif args.curriculum_method == "lp": + print("Using learning progress.") + curriculum = LearningProgressCurriculum(sample_env.task_space) ++ elif args.curriculum_method == "sq": ++ print("Using sequential curriculum.") ++ curricula = [] ++ stopping = [] ++ for i in range(199): ++ curricula.append(i + 1) ++ stopping.append("steps>=50000") ++ curricula.append(list(range(i + 1))) ++ stopping.append("steps>=50000") ++ curriculum = SequentialCurriculum(curricula, stopping[:-1], sample_env.task_space) + else: + raise ValueError(f"Unknown curriculum method {args.curriculum_method}") + curriculum = make_multiprocessing_curriculum(curriculum) +@@ -285,7 +304,7 @@ if __name__ == "__main__": + make_env( + args.env_id, + args.seed + i, +- curriculum_components=curriculum.get_components() if args.curriculum else None, ++ curriculum=curriculum if args.curriculum else None, + num_levels=1 if args.curriculum else 0 + ) + for i in range(args.num_envs) +@@ -293,22 +312,6 @@ if __name__ == "__main__": + ) + envs = wrap_vecenv(envs) + +- test_eval_envs = gym.vector.AsyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, num_levels=0) +- for i in range(args.num_eval_episodes) +- ] +- ) +- test_eval_envs = wrap_vecenv(test_eval_envs) +- +- train_eval_envs = gym.vector.AsyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, num_levels=200) +- for i in range(args.num_eval_episodes) +- ] +- ) +- train_eval_envs = wrap_vecenv(train_eval_envs) +- + assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" + print("Creating agent") + agent = ProcgenAgent( +@@ -369,6 +372,8 @@ if __name__ == "__main__": + print(f"global_step={global_step}, episodic_return={item['episode']['r']}") + writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) ++ if curriculum is not None: ++ curriculum.log_metrics(writer, global_step) + break + + # Syllabus curriculum update +@@ -388,8 +393,6 @@ if __name__ == "__main__": + }, + } + curriculum.update(update) +- #if args.curriculum: +- # curriculum.log_metrics(writer, global_step) + + # bootstrap value if not done + with torch.no_grad(): +@@ -487,8 +490,18 @@ if __name__ == "__main__": + explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y + + # Evaluate agent +- mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = fast_level_replay_evaluate(test_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=0) +- mean_train_returns, stddev_train_returns, normalized_mean_train_returns = fast_level_replay_evaluate(train_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=200) ++ mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 ++ ) ++ full_mean_eval_returns, full_stddev_eval_returns, full_normalized_mean_eval_returns = full_level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 ++ ) ++ mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 ++ ) ++ full_mean_train_returns, full_stddev_train_returns, full_normalized_mean_train_returns = full_level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 ++ ) + + # TRY NOT TO MODIFY: record rewards for plotting purposes + writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) +@@ -502,12 +515,21 @@ if __name__ == "__main__": + writer.add_scalar("losses/explained_variance", explained_var, global_step) + print("SPS:", int(global_step / (time.time() - start_time))) + writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) ++ + writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) +- writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/stddev_eval_return", stddev_eval_returns, global_step) ++ writer.add_scalar("test_eval/full_mean_episode_return", full_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/full_normalized_mean_eval_return", full_normalized_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/full_stddev_eval_return", full_stddev_eval_returns, global_step) ++ + writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) +- writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) ++ writer.add_scalar("train_eval/stddev_train_return", stddev_train_returns, global_step) ++ writer.add_scalar("train_eval/full_mean_episode_return", full_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/full_normalized_mean_train_return", full_normalized_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/full_stddev_train_return", full_stddev_train_returns, global_step) ++ + writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) + + envs.close() +diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py +index e13c22e..abf656b 100644 +--- a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py ++++ b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py +@@ -14,6 +14,7 @@ import gym as openai_gym + import gymnasium as gym + import numpy as np + import procgen # noqa: F401 ++from procgen import ProcgenEnv + import torch + import torch.nn as nn + import torch.optim as optim +@@ -24,7 +25,7 @@ from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curri + from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum + from syllabus.examples.models import ProcgenAgent + from syllabus.examples.task_wrappers import ProcgenTaskWrapper +-from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize ++from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs + + + def parse_args(): +@@ -126,18 +127,17 @@ PROCGEN_RETURN_BOUNDS = { + } + + +-def make_env(env_id, seed, curriculum_components=None, start_level=0, num_levels=1): ++def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): + def thunk(): + env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) + env = GymV21CompatibilityV0(env=env) +- env = ProcgenTaskWrapper(env, env_id, seed=seed) +- if curriculum_components is not None: ++ if curriculum is not None: ++ env = ProcgenTaskWrapper(env, env_id, seed=seed) + env = MultiProcessingSyncWrapper( + env, +- curriculum_components, ++ curriculum.get_components(), + update_on_step=False, + task_space=env.task_space, +- buffer_size=4, + ) + return env + return thunk +@@ -150,7 +150,7 @@ def wrap_vecenv(vecenv): + return vecenv + + +-def level_replay_evaluate( ++def slow_level_replay_evaluate( + env_name, + policy, + num_episodes, +@@ -158,28 +158,24 @@ def level_replay_evaluate( + num_levels=0 + ): + policy.eval() +- eval_envs = gym.vector.SyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, task_queue, update_queue, num_levels=num_levels) +- for i in range(1) +- ] ++ ++ eval_envs = ProcgenEnv( ++ num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False + ) ++ eval_envs = VecExtractDictObs(eval_envs, "rgb") + eval_envs = wrap_vecenv(eval_envs) +- +- eval_episode_rewards = [] + eval_obs, _ = eval_envs.reset() ++ eval_episode_rewards = [] + + while len(eval_episode_rewards) < num_episodes: + with torch.no_grad(): + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + +- eval_obs, _, truncs, terms, infos = eval_envs.step(np.array([eval_action.cpu().numpy()])) +- +- for info in infos: ++ eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) ++ for i, info in enumerate(infos): + if 'episode' in info.keys(): + eval_episode_rewards.append(info['episode']['r']) + +- eval_envs.close() + mean_returns = np.mean(eval_episode_rewards) + stddev_returns = np.std(eval_episode_rewards) + env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] +@@ -188,8 +184,7 @@ def level_replay_evaluate( + return mean_returns, stddev_returns, normalized_mean_returns + + +-def fast_level_replay_evaluate( +- eval_envs, ++def level_replay_evaluate( + env_name, + policy, + num_episodes, +@@ -198,15 +193,12 @@ def fast_level_replay_evaluate( + ): + policy.eval() + +- # Choose evaluation seeds +- if num_levels == 0: +- seeds = np.random.randint(0, 2 ** 16 - 1, size=num_episodes) +- else: +- seeds = np.random.choice(np.arange(0, num_levels), size=num_episodes) +- +- seed_envs = [(int(seed), env) for seed, env in zip(seeds, range(num_episodes))] +- eval_obs, _ = eval_envs.reset(seed=seed_envs) +- ++ eval_envs = ProcgenEnv( ++ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False ++ ) ++ eval_envs = VecExtractDictObs(eval_envs, "rgb") ++ eval_envs = wrap_vecenv(eval_envs) ++ eval_obs, _ = eval_envs.reset() + eval_episode_rewards = [-1] * num_episodes + + while -1 in eval_episode_rewards: +@@ -251,9 +243,9 @@ if __name__ == "__main__": + save_code=True, + dir=args.logging_dir + ) +- wandb.run.log_code(os.path.join(args.logging_dir, "/syllabus/examples")) ++ # wandb.run.log_code("./syllabus/examples") + +- writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) ++ writer = SummaryWriter(os.path.join(args.logging_dir, f"./runs/{run_name}")) + writer.add_text( + "hyperparameters", + "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), +@@ -316,7 +308,7 @@ if __name__ == "__main__": + make_env( + args.env_id, + args.seed + i, +- curriculum_components=curriculum.get_components() if args.curriculum else None, ++ curriculum=curriculum if args.curriculum else None, + num_levels=1 if args.curriculum else 0 + ) + for i in range(args.num_envs) +@@ -324,22 +316,6 @@ if __name__ == "__main__": + ) + envs = wrap_vecenv(envs) + +- test_eval_envs = gym.vector.AsyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, num_levels=0) +- for i in range(args.num_eval_episodes) +- ] +- ) +- test_eval_envs = wrap_vecenv(test_eval_envs) +- +- train_eval_envs = gym.vector.AsyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, num_levels=200) +- for i in range(args.num_eval_episodes) +- ] +- ) +- train_eval_envs = wrap_vecenv(train_eval_envs) +- + assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" + print("Creating agent") + agent = ProcgenAgent( +@@ -500,8 +476,18 @@ if __name__ == "__main__": + explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y + + # Evaluate agent +- mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = fast_level_replay_evaluate(test_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=0) +- mean_train_returns, stddev_train_returns, normalized_mean_train_returns = fast_level_replay_evaluate(train_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=200) ++ mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 ++ ) ++ slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 ++ ) ++ mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 ++ ) ++ slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 ++ ) + + # TRY NOT TO MODIFY: record rewards for plotting purposes + writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) +@@ -515,12 +501,21 @@ if __name__ == "__main__": + writer.add_scalar("losses/explained_variance", explained_var, global_step) + print("SPS:", int(global_step / (time.time() - start_time))) + writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) ++ + writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) + writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) ++ + writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) + writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) ++ + writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) + + envs.close() +diff --git a/syllabus/examples/utils/vecenv.py b/syllabus/examples/utils/vecenv.py +index 6e5a0a9..af3b187 100644 +--- a/syllabus/examples/utils/vecenv.py ++++ b/syllabus/examples/utils/vecenv.py +@@ -1,7 +1,6 @@ + import time + from collections import deque + +-import gym + import numpy as np + + +@@ -154,12 +153,20 @@ class VecEnvObservationWrapper(VecEnvWrapper): + pass + + def reset(self): +- obs, infos = self.venv.reset() ++ outputs = self.venv.reset() ++ if len(outputs) == 2: ++ obs, infos = outputs ++ else: ++ obs, infos = outputs, {} + return self.process(obs), infos + + def step_wait(self): +- print(self.venv) +- obs, rews, terms, truncs, infos = self.venv.step_wait() ++ env_outputs = self.venv.step_wait() ++ if len(env_outputs) == 4: ++ obs, rews, terms, infos = env_outputs ++ truncs = np.zeros_like(terms) ++ else: ++ obs, rews, terms, truncs, infos = env_outputs + return self.process(obs), rews, terms, truncs, infos + + +@@ -209,7 +216,10 @@ class VecNormalize(VecEnvWrapper): + + def reset(self, seed=None): + self.ret = np.zeros(self.num_envs) +- obs, infos = self.venv.reset(seed=seed) ++ if seed is not None: ++ obs, infos = self.venv.reset(seed=seed) ++ else: ++ obs, infos = self.venv.reset() + return self._obfilt(obs), infos + + +@@ -228,7 +238,10 @@ class VecMonitor(VecEnvWrapper): + self.eplen_buf = deque([], maxlen=keep_buf) + + def reset(self, seed=None): +- obs, infos = self.venv.reset(seed=seed) ++ if seed is not None: ++ obs, infos = self.venv.reset(seed=seed) ++ else: ++ obs, infos = self.venv.reset() + self.eprets = np.zeros(self.num_envs, 'f') + self.eplens = np.zeros(self.num_envs, 'i') + return obs, infos +@@ -239,7 +252,8 @@ class VecMonitor(VecEnvWrapper): + self.eprets += rews + self.eplens += 1 + # Convert dict of lists to list of dicts +- infos = [dict(zip(infos, t)) for t in zip(*infos.values())] ++ if isinstance(infos, dict): ++ infos = [dict(zip(infos, t)) for t in zip(*infos.values())] + newinfos = list(infos[:]) + for i in range(len(dones)): + if dones[i]: +diff --git a/syllabus/task_space/task_space.py b/syllabus/task_space/task_space.py +index 316e2f2..1ef674b 100644 +--- a/syllabus/task_space/task_space.py ++++ b/syllabus/task_space/task_space.py +@@ -7,20 +7,53 @@ from gymnasium.spaces import Box, Dict, Discrete, MultiBinary, MultiDiscrete, Sp + + class TaskSpace(): + def __init__(self, gym_space: Union[Space, int], tasks=None): +- if isinstance(gym_space, int): +- # Syntactic sugar for discrete space +- gym_space = Discrete(gym_space) ++ ++ if not isinstance(gym_space, Space): ++ gym_space = self._create_gym_space(gym_space) + + self.gym_space = gym_space + +- # Autogenerate task names for discrete spaces +- if isinstance(gym_space, Discrete): +- if tasks is None: +- tasks = range(gym_space.n) ++ # Autogenerate task names ++ if tasks is None: ++ tasks = self._generate_task_names(gym_space) + + self._tasks = set(tasks) if tasks is not None else None + self._encoder, self._decoder = self._make_task_encoder(gym_space, tasks) + ++ def _create_gym_space(self, gym_space): ++ if isinstance(gym_space, int): ++ # Syntactic sugar for discrete space ++ gym_space = Discrete(gym_space) ++ elif isinstance(gym_space, tuple): ++ # Syntactic sugar for discrete space ++ gym_space = MultiDiscrete(gym_space) ++ elif isinstance(gym_space, list): ++ # Syntactic sugar for tuple space ++ spaces = [] ++ for i, value in enumerate(gym_space): ++ spaces[i] = self._create_gym_space(value) ++ gym_space = Tuple(spaces) ++ elif isinstance(gym_space, dict): ++ # Syntactic sugar for dict space ++ spaces = {} ++ for key, value in gym_space.items(): ++ spaces[key] = self._create_gym_space(value) ++ gym_space = Dict(spaces) ++ return gym_space ++ ++ def _generate_task_names(self, gym_space): ++ if isinstance(gym_space, Discrete): ++ tasks = tuple(range(gym_space.n)) ++ elif isinstance(gym_space, MultiDiscrete): ++ tasks = [tuple(range(dim)) for dim in gym_space.nvec] ++ elif isinstance(gym_space, Tuple): ++ tasks = [self._generate_task_names(value) for value in gym_space.spaces] ++ elif isinstance(gym_space, Dict): ++ tasks = {key: tuple(self._generate_task_names(value)) for key, value in gym_space.spaces.items()} ++ else: ++ tasks = None ++ return tasks ++ + def _make_task_encoder(self, space, tasks): + if isinstance(space, Discrete): + assert space.n == len(tasks), f"Number of tasks ({space.n}) must match number of discrete options ({len(tasks)})" +@@ -28,14 +61,46 @@ class TaskSpace(): + self._decode_map = {i: task for i, task in enumerate(tasks)} + encoder = lambda task: self._encode_map[task] if task in self._encode_map else None + decoder = lambda task: self._decode_map[task] if task in self._decode_map else None ++ ++ elif isinstance(space, Box): ++ encoder = lambda task: task if space.contains(np.asarray(task, dtype=space.dtype)) else None ++ decoder = lambda task: task if space.contains(np.asarray(task, dtype=space.dtype)) else None + elif isinstance(space, Tuple): +- for i, task in enumerate(tasks): +- assert self.count_tasks(space.spaces[i]) == len(task), "Each task must have number of components equal to Tuple space length. Got {len(task)} components and space length {self.count_tasks(space.spaces[i])}." ++ ++ assert len(space.spaces) == len(tasks), f"Number of task ({len(space.spaces)})must match options in Tuple ({len(tasks)})" + results = [list(self._make_task_encoder(s, t)) for (s, t) in zip(space.spaces, tasks)] + encoders = [r[0] for r in results] + decoders = [r[1] for r in results] + encoder = lambda task: [e(t) for e, t in zip(encoders, task)] + decoder = lambda task: [d(t) for d, t in zip(decoders, task)] ++ ++ elif isinstance(space, MultiDiscrete): ++ assert len(space.nvec) == len(tasks), f"Number of steps in a tasks ({len(space.nvec)}) must match number of discrete options ({len(tasks)})" ++ ++ combinations = [p for p in itertools.product(*tasks)] ++ encode_map = {task: i for i, task in enumerate(combinations)} ++ decode_map = {i: task for i, task in enumerate(combinations)} ++ ++ encoder = lambda task: encode_map[task] if task in encode_map else None ++ decoder = lambda task: decode_map[task] if task in decode_map else None ++ ++ elif isinstance(space, Dict): ++ ++ def helper(task, spaces, tasks, action="encode"): ++ # Iteratively encodes or decodes each space in the dictionary ++ output = {} ++ if (isinstance(spaces, dict) or isinstance(spaces, Dict)): ++ for key, value in spaces.items(): ++ if (isinstance(value, dict) or isinstance(value, Dict)): ++ temp = helper(task[key], value, tasks[key], action) ++ output.update({key: temp}) ++ else: ++ encoder, decoder = self._make_task_encoder(value, tasks[key]) ++ output[key] = encoder(task[key]) if action == "encode" else decoder(task[key]) ++ return output ++ ++ encoder = lambda task: helper(task, space.spaces, tasks, "encode") ++ decoder = lambda task: helper(task, space.spaces, tasks, "decode") + else: + encoder = lambda task: task + decoder = lambda task: task +@@ -152,6 +217,7 @@ class TaskSpace(): + return Discrete(self.gym_space.n + amount) + + def sample(self): ++ assert isinstance(self.gym_space, Discrete) or isinstance(self.gym_space, Box) or isinstance(self.gym_space, Dict) or isinstance(self.gym_space, Tuple) + return self.decode(self.gym_space.sample()) + + def list_tasks(self): +diff --git a/syllabus/task_space/test_task_space.py b/syllabus/task_space/test_task_space.py +index 0ec6b4e..109d0a7 100644 +--- a/syllabus/task_space/test_task_space.py ++++ b/syllabus/task_space/test_task_space.py +@@ -2,33 +2,148 @@ import gymnasium as gym + from syllabus.task_space import TaskSpace + + if __name__ == "__main__": ++ # Discrete Tests + task_space = TaskSpace(gym.spaces.Discrete(3), ["a", "b", "c"]) ++ + assert task_space.encode("a") == 0, f"Expected 0, got {task_space.encode('a')}" + assert task_space.encode("b") == 1, f"Expected 1, got {task_space.encode('b')}" + assert task_space.encode("c") == 2, f"Expected 2, got {task_space.encode('c')}" +- assert task_space.encode("d") == None, f"Expected None, got {task_space.encode('d')}" ++ assert task_space.encode("d") is None, f"Expected None, got {task_space.encode('d')}" + + assert task_space.decode(0) == "a", f"Expected a, got {task_space.decode(0)}" + assert task_space.decode(1) == "b", f"Expected b, got {task_space.decode(1)}" + assert task_space.decode(2) == "c", f"Expected c, got {task_space.decode(2)}" +- assert task_space.decode(3) == None, f"Expected None, got {task_space.decode(3)}" ++ assert task_space.decode(3) is None, f"Expected None, got {task_space.decode(3)}" + print("Discrete tests passed!") + ++ # MultiDiscrete Tests ++ task_space = TaskSpace(gym.spaces.MultiDiscrete([3, 2]), [("a", "b", "c"), (1, 0)]) ++ ++ assert task_space.encode(('a', 1)) == 0, f"Expected 0, got {task_space.encode(('a', 1))}" ++ assert task_space.encode(('b', 0)) == 3, f"Expected 3, got {task_space.encode(('b', 0))}" ++ assert task_space.encode(('c', 1)) == 4, f"Expected 4, got {task_space.encode(('c', 1))}" ++ ++ assert task_space.decode(3) == ('b', 0), f"Expected ('b', 0), got {task_space.decode(3)}" ++ assert task_space.decode(5) == ('c', 0), f"Expected ('c', 0), got {task_space.decode(5)}" ++ print("MultiDiscrete tests passed!") ++ ++ # Box Tests + task_space = TaskSpace(gym.spaces.Box(low=0, high=1, shape=(2,)), [(0, 0), (0, 1), (1, 0), (1, 1)]) ++ + assert task_space.encode([0.0, 0.0]) == [0.0, 0.0], f"Expected [0.0, 0.0], got {task_space.encode([0.0, 0.0])}" + assert task_space.encode([0.0, 0.1]) == [0.0, 0.1], f"Expected [0.0, 0.1], got {task_space.encode([0.0, 0.1])}" + assert task_space.encode([0.1, 0.1]) == [0.1, 0.1], f"Expected [0.1, 0.1], got {task_space.encode([0.1, 0.1])}" + assert task_space.encode([1.0, 0.1]) == [1.0, 0.1], f"Expected [1.0, 0.1], got {task_space.encode([1.0, 0.1])}" + assert task_space.encode([1.0, 1.0]) == [1.0, 1.0], f"Expected [1.0, 1.0], got {task_space.encode([1.0, 1.0])}" +- assert task_space.encode([1.2, 1.0]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" +- assert task_space.encode([1.0, 1.2]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" +- assert task_space.encode([-0.1, 1.0]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" ++ assert task_space.encode([1.2, 1.0]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" ++ assert task_space.encode([1.0, 1.2]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" ++ assert task_space.encode([-0.1, 1.0]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" + + assert task_space.decode([1.0, 1.0]) == [1.0, 1.0], f"Expected [1.0, 1.0], got {task_space.decode([1.0, 1.0])}" + assert task_space.decode([0.1, 0.1]) == [0.1, 0.1], f"Expected [0.1, 0.1], got {task_space.decode([0.1, 0.1])}" +- assert task_space.decode([-0.1, 1.0]) == None, f"Expected None, got {task_space.decode([1.2, 1.0])}" ++ assert task_space.decode([-0.1, 1.0]) is None, f"Expected None, got {task_space.decode([1.2, 1.0])}" + print("Box tests passed!") + ++ # Tuple Tests ++ task_spaces = (gym.spaces.MultiDiscrete([3, 2]), gym.spaces.Discrete(3)) ++ task_names = ((("a", "b", "c"), (1, 0)), ("X", "Y", "Z")) ++ task_space = TaskSpace(gym.spaces.Tuple(task_spaces), task_names) ++ ++ assert task_space.encode((('a', 0), 'Y')) == [1, 1], f"Expected 0, got {task_space.encode((('a', 1),'Y'))}" ++ assert task_space.decode([0, 1]) == [('a', 1), 'Y'], f"Expected 0, got {task_space.decode([0, 1])}" ++ print("Tuple tests passed!") ++ ++ # Dictionary Tests ++ task_spaces = gym.spaces.Dict({ ++ "ext_controller": gym.spaces.MultiDiscrete([5, 2, 2]), ++ "inner_state": gym.spaces.Dict( ++ { ++ "charge": gym.spaces.Discrete(10), ++ "system_checks": gym.spaces.Tuple((gym.spaces.MultiDiscrete([3, 2]), gym.spaces.Discrete(3))), ++ "job_status": gym.spaces.Dict( ++ { ++ "task": gym.spaces.Discrete(5), ++ "progress": gym.spaces.Box(low=0, high=1, shape=(2,)), ++ } ++ ), ++ } ++ ), ++ }) ++ task_names = { ++ "ext_controller": [("a", "b", "c", "d", "e"), (1, 0), ("X", "Y")], ++ "inner_state": { ++ "charge": [0, 1, 13, 3, 94, 35, 6, 37, 8, 9], ++ "system_checks": ((("a", "b", "c"), (1, 0)), ("X", "Y", "Z")), ++ "job_status": { ++ "task": ["A", "B", "C", "D", "E"], ++ "progress": [(0, 0), (0, 1), (1, 0), (1, 1)], ++ } ++ } ++ } ++ task_space = TaskSpace(task_spaces, task_names) ++ ++ test_val = { ++ "ext_controller": ('b', 1, 'X'), ++ 'inner_state': { ++ 'charge': 1, ++ 'system_checks': [('a', 0), 'Y'], ++ 'job_status': {'task': 'C', 'progress': [0.0, 0.0]} ++ } ++ } ++ decode_val = { ++ "ext_controller": 4, ++ "inner_state": { ++ "charge": 1, ++ "system_checks": [1, 1], ++ "job_status": {"progress": [0.0, 0.0], "task": 2}, ++ }, ++ } ++ ++ assert task_space.encode(test_val) == decode_val, f"Expected {decode_val}, \n but got {task_space.encode(test_val)}" ++ assert task_space.decode(decode_val) == test_val, f"Expected {test_val}, \n but got {task_space.decode(decode_val)}" ++ ++ test_val_2 = { ++ "ext_controller": ("e", 1, "Y"), ++ "inner_state": { ++ "charge": 37, ++ "system_checks": [("b", 0), "Z"], ++ "job_status": {"progress": [0.0, 0.1], "task": "D"}, ++ }, ++ } ++ decode_val_2 = { ++ "ext_controller": 17, ++ "inner_state": { ++ "charge": 7, ++ "system_checks": [3, 2], ++ "job_status": {"progress": [0.0, 0.1], "task": 3}, ++ }, ++ } ++ ++ assert task_space.encode(test_val_2) == decode_val_2, f"Expected {decode_val_2}, \n but got {task_space.encode(test_val_2)}" ++ assert task_space.decode(decode_val_2) == test_val_2, f"Expected {test_val_2}, \n but got {task_space.decode(decode_val_2)}" ++ ++ test_val_3 = { ++ "ext_controller": ("e", 1, "X"), ++ "inner_state": { ++ "charge": 8, ++ "system_checks": [("c", 0), "X"], ++ "job_status": {"progress": [0.5, 0.1], "task": "E"}, ++ }, ++ } ++ decode_val_3 = { ++ "ext_controller": 16, ++ "inner_state": { ++ "charge": 8, ++ "system_checks": [5, 0], ++ "job_status": {"progress": [0.5, 0.1], "task": 4}, ++ }, ++ } ++ ++ assert task_space.encode(test_val_3) == decode_val_3, f"Expected {decode_val_3}, \n but got {task_space.encode(test_val_3)}" ++ assert task_space.decode(decode_val_3) == test_val_3, f"Expected {test_val_3}, \n but got {task_space.decode(decode_val_3)}" ++ ++ print("Dictionary tests passed!") ++ + # Test syntactic sugar + task_space = TaskSpace(3) + assert task_space.encode(0) == 0, f"Expected 0, got {task_space.encode(0)}" +@@ -36,4 +151,32 @@ if __name__ == "__main__": + assert task_space.encode(2) == 2, f"Expected 2, got {task_space.encode(2)}" + assert task_space.encode(3) is None, f"Expected None, got {task_space.encode(3)}" + ++ task_space = TaskSpace((2, 4)) ++ assert task_space.encode((0, 0)) == 0, f"Expected 0, got {task_space.encode((0, 0))}" ++ assert task_space.encode((0, 1)) == 1, f"Expected 1, got {task_space.encode((0, 1))}" ++ assert task_space.encode((1, 0)) == 4, f"Expected 2, got {task_space.encode((1, 0))}" ++ assert task_space.encode((3, 3)) is None, f"Expected None, got {task_space.encode((3, 3))}" ++ ++ task_space = TaskSpace((2, 4)) ++ assert task_space.encode((0, 0)) == 0, f"Expected 0, got {task_space.encode((0, 0))}" ++ assert task_space.encode((0, 1)) == 1, f"Expected 1, got {task_space.encode((0, 1))}" ++ assert task_space.encode((1, 0)) == 4, f"Expected 2, got {task_space.encode((1, 0))}" ++ assert task_space.encode((3, 3)) is None, f"Expected None, got {task_space.encode((3, 3))}" ++ ++ task_space = TaskSpace({"map": 5, "level": (4, 10), "difficulty": 3}) ++ ++ encoding = task_space.encode({"map": 0, "level": (0, 0), "difficulty": 0}) ++ expected = {"map": 0, "level": 0, "difficulty": 0} ++ ++ encoding = task_space.encode({"map": 4, "level": (3, 9), "difficulty": 2}) ++ expected = {"map": 4, "level": 39, "difficulty": 2} ++ assert encoding == expected, f"Expected {expected}, got {encoding}" ++ ++ encoding = task_space.encode({"map": 2, "level": (2, 0), "difficulty": 1}) ++ expected = {"map": 2, "level": 20, "difficulty": 1} ++ assert encoding == expected, f"Expected {expected}, got {encoding}" ++ ++ encoding = task_space.encode({"map": 5, "level": (2, 11), "difficulty": -1}) ++ expected = {"map": None, "level": None, "difficulty": None} ++ assert encoding == expected, f"Expected {expected}, got {encoding}" + print("All tests passed!") +diff --git a/syllabus/tests/utils.py b/syllabus/tests/utils.py +index 314a29c..98bac82 100644 +--- a/syllabus/tests/utils.py ++++ b/syllabus/tests/utils.py +@@ -57,7 +57,7 @@ def run_episode(env, new_task=None, curriculum=None, env_id=0): + action = env.action_space.sample() + obs, rew, term, trunc, info = env.step(action) + if curriculum and curriculum.requires_step_updates: +- curriculum.update_on_step(obs, rew, term, trunc, info, env_id=env_id) ++ curriculum.update_on_step(env.task_space.encode(env.task), obs, rew, term, trunc, info, env_id=env_id) + curriculum.update_task_progress(env.task_space.encode(env.task), info["task_completion"], env_id=env_id) + ep_rew += rew + ep_len += 1 +@@ -87,7 +87,7 @@ def run_set_length(env, curriculum=None, episodes=None, steps=None, env_id=0, en + action = env.action_space.sample() + obs, rew, term, trunc, info = env.step(action) + if curriculum and curriculum.requires_step_updates: +- curriculum.update_on_step(obs, rew, term, trunc, info, env_id=env_id) ++ curriculum.update_on_step(env.task_space.encode(env.task), obs, rew, term, trunc, info, env_id=env_id) + curriculum.update_task_progress(env.task_space.encode(env.task), info["task_completion"], env_id=env_id) + ep_rew += rew + ep_len += 1 +diff --git a/tests/multiprocessing_smoke_tests.py b/tests/multiprocessing_smoke_tests.py +index 9db9f47..b788179 100644 +--- a/tests/multiprocessing_smoke_tests.py ++++ b/tests/multiprocessing_smoke_tests.py +@@ -21,23 +21,23 @@ nethack_env = create_nethack_env() + cartpole_env = create_cartpole_env() + + curricula = [ +- (NoopCurriculum, create_nethack_env, (NetHackScore, nethack_env.task_space), {}), +- (DomainRandomization, create_nethack_env, (nethack_env.task_space,), {}), +- # (LearningProgressCurriculum, create_nethack_env, (nethack_env.task_space,), {}), +- (CentralizedPrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space,), {"device": "cpu", "suppress_usage_warnings": True, "num_processes": N_ENVS}), +- (PrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space, nethack_env.observation_space), { +- "get_value": get_test_values, +- "device": "cpu", +- "num_processes": N_ENVS, +- "num_steps": 2048 +- }), +- (SimpleBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), {}), +- (AnnealingBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), { +- 'start_values': [-0.02, 0.02], +- 'end_values': [-0.3, 0.3], +- 'total_steps': [10] +- }), +- (SequentialCurriculum, create_nethack_env, ([CentralizedPrioritizedLevelReplay(nethack_env.task_space, device="cpu", suppress_usage_warnings=True, num_processes=N_ENVS), PrioritizedLevelReplay(nethack_env.task_space, nethack_env.observation_space, get_value=get_test_values, device="cpu", num_processes=N_ENVS, num_steps=2048), NetHackScore, [NetHackScout, NetHackStaircase]], ["steps>1000", "episodes>=50", "tasks>20"], nethack_env.task_space), {}), ++ (NoopCurriculum, create_nethack_env, (NetHackScore, nethack_env.task_space), {}), ++ (DomainRandomization, create_nethack_env, (nethack_env.task_space,), {}), ++ # (LearningProgressCurriculum, create_nethack_env, (nethack_env.task_space,), {}), ++ (CentralizedPrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space,), {"device": "cpu", "suppress_usage_warnings": True, "num_processes": N_ENVS}), ++ (PrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space, nethack_env.observation_space), { ++ "get_value": get_test_values, ++ "device": "cpu", ++ "num_processes": N_ENVS, ++ "num_steps": 2048 ++ }), ++ (SimpleBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), {}), ++ (AnnealingBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), { ++ 'start_values': [-0.02, 0.02], ++ 'end_values': [-0.3, 0.3], ++ 'total_steps': [10] ++ }), ++ (SequentialCurriculum, create_nethack_env, ([CentralizedPrioritizedLevelReplay(nethack_env.task_space, device="cpu", suppress_usage_warnings=True, num_processes=N_ENVS), PrioritizedLevelReplay(nethack_env.task_space, nethack_env.observation_space, get_value=get_test_values, device="cpu", num_processes=N_ENVS, num_steps=2048), NetHackScore, [NetHackScout, NetHackStaircase]], ["steps>1000", "episodes>=50", "tasks>20"], nethack_env.task_space), {}), + ] + + test_names = [curriculum_args[0].__name__ for curriculum_args in curricula] diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/wandb-metadata.json b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/wandb-metadata.json new file mode 100644 index 00000000..97ae1d6e --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/wandb-metadata.json @@ -0,0 +1,167 @@ +{ + "os": "Linux-3.10.0-1160.11.1.el7.x86_64-x86_64-with-glibc2.10", + "python": "3.8.5", + "heartbeatAt": "2024-04-23T03:00:36.306872", + "startedAt": "2024-04-23T03:00:35.394736", + "docker": null, + "cuda": "10.1.243", + "args": [ + "--curriculum", + "True", + "--track", + "True", + "--env-id", + "bigfish" + ], + "state": "running", + "program": "cleanrl_procgen_plr.py", + "codePathLocal": "cleanrl_procgen_plr.py", + "codePath": "syllabus/examples/training_scripts/cleanrl_procgen_plr.py", + "git": { + "remote": "https://github.com/RoseyGreenBlue/Syllabus.git", + "commit": "63dc8f62e4d9d567eb92bb2f6c2bb186a0dc8ffb" + }, + "email": "djhaayusv04@gmail.com", + "root": "/data/averma/MARL/Syllabus", + "host": "f411843fc70b", + "username": "root", + "executable": "/home/user/miniconda/envs/test2_py/bin/python", + "cpu_count": 12, + "cpu_count_logical": 24, + "cpu_freq": { + "current": 1236.6083333333333, + "min": 1200.0, + "max": 3700.0 + }, + "cpu_freq_per_core": [ + { + "current": 1200.5, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.292, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1211.291, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.292, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.5, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1246.984, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1199.877, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1204.65, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.085, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1199.877, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1199.877, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1205.48, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1228.723, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1225.195, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1214.819, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1204.65, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1269.396, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1238.269, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.292, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1199.877, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1397.436, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1388.928, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.708, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1329.162, + "min": 1200.0, + "max": 3700.0 + } + ], + "disk": { + "/": { + "total": 5952.626953125, + "used": 988.7820014953613 + } + }, + "memory": { + "total": 251.63711166381836 + } +} diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/wandb-summary.json b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/wandb-summary.json new file mode 100644 index 00000000..eeaa9075 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/wandb-summary.json @@ -0,0 +1 @@ +{"global_step": 16320, "_timestamp": 1713841250.5553138, "_runtime": 15.14670991897583, "_step": 82, "charts/episodic_return": 1.0, "charts/episodic_length": 150.0, "curriculum/proportion_seen": 0.0, "curriculum/score": 0.0, "_wandb": {"runtime": 27}} \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/run-37l9hfvl.wandb b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/run-37l9hfvl.wandb new file mode 100644 index 0000000000000000000000000000000000000000..25b7561a5425038bd864a11b55d75be8d24ff88b GIT binary patch literal 51518 zcmd^|34Bvk+W$=`SOO>%*Q!{Hj)<5hcfYvnjEai;Hg1_TO-n+W)FfqbEk(2-OI5&v zC`A-1>QFW*2vQUj7eE17WGgCKMWik`Ad3IrlWzB9xwW_H`~S@Q>U>7%ZO*yh=XcI? zp7Wd~{qOwS6?@)3W?cW-^W|eqsitC6W_pGyxNO)IqsJeBVrZiz`SmrJ9RPww6Dg*>#X@xcq=x$jie-=NQwR)aH`X zQ$xdjK4*4eD05yXOQt2&J*>dtcjmi`O($l$d~T;d=yMbV1Fk%`-%;QTW?C$kl#tt< zQ*1KXEvH+0h66#@AV-cn><;F8{GL$Q<0>|#goA}{OE*i3t1!pe_}Aks9m7GVYjES= z&$6Tqb^3F%sd%s36>g~3lG*e|mJ=;q>wm@V4|~GH@kxK7&u2N=(#_>82p0z3jv<~L zcc9qRG1ut}xh*{{U3`JOJdZ!mk>d##n>u7#Gc6}rI=THr93H%K=ge$RUaluJC=;)K zd^iwx`W#_TzB?3l7levUshntw{9`%A(hclEKa~`8hGC6PHY*;9mg6m{{=$3*O2h}u zl3?j!=}bRpD53)=N_dUVPQTxc!hLv&hIi{~>5%6{LG`vd-qJD8nV%2ibhKGjRmEp4 zc=a9nh@pqTjP3$YD3C++X1fi7S|ZkPggg(zpdIZJuLx)q$2Hlh z;YkW-6BNtcmQIdHD=IeK_)w+`_ z=qU(?vf^|EYr*i$N3J-(p*=Yqael?&u;YKS9dBpnc;4GJSFqa^fzNfxw(K#cuBjW> zOqtm z^Y{ygT`)|M9Fj1{bhpJiC>$;b_0P)6^MnT#W?Nl>{H$97A@}gBgKoF~3SXf+>(+*T zZpE*p^^y1-my#=SZXrj_5hdB3t#aAfT(0E8f9#6nw9)h2Y{u>U%{#K&>u_2fxY;H& zh@KIvUI5*kXpBy)Hx%$&F0))hCN^$W3mjdJCpXs$kz9i;msl<;EC_{zZf8CkRBkSS zovPwSjka7-6jaH^+f;iF{G}xG_8iHc>$YXP+zMC!`N>=MtQ&?`PPzMvX8WcYwDe!{ z)r`fqOs|~lNA(@ARV;!v%bFD|^oO!q(P+B=wE=>u^p!hucHD?cJ9$q{HKlb;-4bbL zMa55S?vQ@$-`ztzE_eUdqfF|;KQ#cQ9_#I?DYdM8CQ3DBw7$H{D|(e)hw@UrCu+(o z8Mg%Gb#N6Fw!Xymc^wsc!_Leb)+~IR-q6ze^5#D9=iG}>o`q8^l7({*fs+hb1BGF0 zIJe<1O}$&SYCE^pE~|po*4>;j_}nM&K@nZN=R_d4Mx-d+^Y{u=de7`ak1q#qCZl0| zwOXyMe@Xw8wbJ#dp{w^KO$~J)eYvnhdP+{f4+r@d0t}}8T>I&mab(AC4R)-5>DpDr zi;UUv{<;M(;Uy#+jhpeGD}o|EB&ZG9O*cY)72+<-yu zKku|3=DU=YWGY=ZAMzbeXIK|y$$7c_x94fefg5=_e|+suY_L;69D3Y)*JfG z=dR14SM9;PYUQR+QM8d)U2^UGX?LK!G;c=aRd>`I`j=I!P3gxqy{Z$T9LD9=wu%d; zjr$k9Y|n<5tuMUx=OZZG$jfd%fL9R9!3x zO23Ty2*BfV1l@@H{1+ogX+6R%d%AZagcBPO8mj#98&qioVabZC$_e2l2BB)_su@uT zqAVo|Vce3d1`xu@48qp!>rtf*7s{X>E~!gKv-)_62jmGZT7|vuE%9g)^I*sUX{*2|t#E`)-d{R02Ks19O z#?ir$Q<{45lLv|6EQX3EP`Y&er%?=)JWLY9%wN`7h~WZ;Va*%cQKu1xe=Q$&8ZlTIhEIw% z?}%YguXZRuQ>ZwoPGF$`7T)Syly45uGA@FrrgF$^`c-}xq*!5%MMGQ=?KlB^}f zU}qVYu0x$h7_NEm+yY|Y7>4Q{8;U!yzE$AGIBm%gL)B&OPQ<`73~To8K%GVy_MP(6 zLShhDhN@R4MKcIVi}|>dCOt_EBEwKRXD;eA!f^9r9b}5UrH06RcYWmj!Ni`d45B^W zFm1>!{W#qGB1PVEL*!jwWz7dOdKw~d<=*Zg0)-){oUrAbC;~3tAZ@52&FK5oQ$(OL z1eHJ5oMVXKz8{BoA%gx4LB+%^)+hqr9;Yi9(%{88r;H$i3mJln{a;!Q5%ka4IDrT* zVhGCKdE?qB0x{m;W=Mkv#t#yS;9`bg>+Y&+4H4Xa|G;mF;1Y)5(5$a+jV6%dU0j&O~5@ipX6uWazA^>rTFx)pUA@uyo)uZb}H zona{dVcWDQ2FaGRG2zMM@0m&r|6mx_lzxpmjWAqxOxjY?;VOn<>#jAkq8aQ$vYOIM z`}%!O3|BJ@HG4LpP9qGjAC(3W!!<0!GoMvOGjMXUoa*(1&&VT&0SrUw*xjhp2t)OS z`~FT0*D?%M6DoH`Gl*Oqr(~!<9P|xdMGV(53^ntXp-v+Vd+WCQiQ#&Np>$&P;b;aq zUVvf9v5vX9$0NirkYOl!?_<lkuP9qFsr|f>37;a-2%2s~*UNi%jv^n&o zZdY|AhT9p2vhp3M(+I=fOEYdKhC3LBirw#i62lO04sA$>eRFe4iQ!I$p|W-@>NLV| zy6^P&iQz7Wp=|ovZP5&Tyg9TXhMymM^K@ePC&N&>Wee&w!Z6bO)E;8En_;NlzViEM z20=;I_^@jK^O1GUdl-gKMy*4gMi@?buy8Ul+{-Z3{rX{1)Erut>~ZojhIBaa<&sL$ z;Xa0;Y|0kYX@sHoqQD8na6iLPH2&K$F$_tI`BKjBHJun748x%n2T`XHh9l?R5sCT! z#V}NsRaQhZDDmPYL*sPWr5AQ31}Dp~Z5iq`!Z7S6-+a;`n_;LPTRA_PL5;V5WvDIn z%Uk>?F}N6p%IB7$P9qEp*01}T7;+efnjOsBkDBLn^%q<^c!t}40@Hw?u-VTR3FCb z_TRJ{Ozbv$(sHa*(r!p4Awmp9Wz`dP#vn@e72Zw|VHV=$@xR4F#GAtz>eAo#a$ZFc zg$zW==BcREh!WfP{QU!h7{WkQZu$5JbF>m_(vnNg$4@?vAcis!b)S5RN{v8VIlJ3c z1Tl<(s9RMzr$ZEkosXB9F{H$(Eovb_3}+yUzI_{&8iBB0f6*R-c#whEI_vv4Vgz1pG0Iqv z=soAcH%Nyf6T?usaq3CY49VMPBHgi}H|a3K#6Xm;d+Q`aI!r4c=_H6^7NT_j*BQ|e zaW|S6nm{~0Zpz~XF_ML-9ra^|A&BE|IWUnR9%mscpZ>9LG(_@tnjAhl^hSah#X?lC zJkr+?#On)B{emE#U?J+N<_R$n@z&!EDIp3|&m)K@S%}&(I|V}!Yi_^ry#UH9F*P`+ z`aZU1&6{JQJ2odKZLIt0qgM}Ii&{sU8oX0|t#x(PsMUyf+LQCGi7>{n7*%h*J~0LZ zi&{yV46WbtjVld=*Sq?*^Me6eYXA;H)7Gq8IeAH@0jhg$HEGCRn7Ng|% zx7WpBs7Z@H0`E;ZNElDE7-b8D-gys7Nh8cg|lKYk`{}M2z4Az7%xO&%zqoT8o}Tms`#BSUSu(9XO3AC zgCQkt)o$j63qBx>=`2Rk!O5uA2u8}zs@;T9&SGpW{p#ZwjJW%pa2~StC$=`8pE8gz zX0RCL+xMeZBN*;UJ-#H2mspH7V>e?7ik82zYrZmmrXI0@;i4!bmT2dW2 z;!byJ;1>j|C}S77B1lNa$-KymmM)f#kyD*i;HcXl+edTF*Ba-VUqsR`wHrtDj21t6 z+j~aO-xpb~dcAS3Szl@S(RIc)LC?QuzKtN>U?ECYz0xNdB6<6Jy7YfJa^C9SEJXG4 zxqS>NKkea{E+vS$EJW4*-|vco5TSGIMAJ~faOdoWSJ14#l7%R$dg7mkAU15cuZo<> zyvANwuWa=bt4pGTP*IGxZY3HSq1>lk9GMc#Z|sHjwHB2hL9Ir5;i7x*OCyX0EJpd9 zX;`{qeIj~Svetb}e>>qAawZE|j6=&RP^%G)M+Uw%kuVmq7$tkZTNs0(BrSQd<-wG` zgz+YeQN8t7)M^A{xibF4Z&Bc4Q`(m%+@8Za#SS=|ZpmQXVvk0*D2P^3l2lo>^s=1f z8srS(BAKXT?2$`!Z1JA^-2S}qAUv0?HeAb;-h59^7H*3u2n55PfZu_OnEVZ8oMP#& z`JIp}5QGQDl|Qy9pKQl9GNQuEqKy+|RTOzn>yzz|<*mv54!l*Sw6Egu;vjp%VF0|6 zO_oGei3e1FZvQU;tL*@+sy0znX^GJ$$buG_+p*w*3Eu;Fi7D-C6Rs9w1x}%J?}>>e zD~F4H&<$H8J~OrnG)kNzkV;9`Sg{K-##^2fY^o$FT4cWOu7lI}0ePt@ZErkeoRd%7 zs3j}UVV7qciQ#IKO|0T-CBbI50otbEJ}6D3AzYNQ^K}yWt@t9-sd_z;1skU-3a2YF zB2o0Tn10?YayztZRTXfz6sIUM{uP*xEIxPCGmMazB|yd@d|hOP=LJb0x!$=XMi#LV zB)KZX$TB)G$HT{}s%(?BDoacI_S{eHdU^bI&Ew3zUb_};D6?(tTErwb>BK6}E2^sC zMj?bP46*#g#0{H>`0WJ5IN`5L%)y*uo0_$l^qy?DVk3whH>L3$FAB({Yc*DSej)xz z&3Z+GW{q`Py=E<9MCLes1$)coiHVHnPVYaWYJ1~qo^E(MTGZu~YI)NTeiM1QBFh`YPEG+_jCl9VQnt{FZFTKqa9e;>L2V&fJw_Mt(@lYZEYI zEmx13ZcLZ?HqM%?E`xEoJ1kCuHF0-QAFz)N> z+){s5+o3-rZzo`(4da!BfSd7QobY*V?sdexK0Y&DilN8Mb1EfI+s51uF_;R+Ws?LM zgDApJmRWpfZSfm4IDZmxT8alSwQo_ z5NO7An3cBC<(>VdibFUvIvQg#Ijn(AZR<$1EY^WNRS~BP4yH+o5&aRS`*T zfwyr;Qp=1N|J8sl7JbvlISuki1Cm)ZVPkr7i>5yCu4Sjg=S2x5REYbH_t!4Zr|zD zxVOe{+%h(s=rPMS4B=eMo?EwZ+moIR1GI|8R}9WF)?eYr7@>vI??-=3(AyF~M2iJ|vq=>=FreMPtVlf6<{CL?ag`huA0F8T6bfMw8k#K6Ub?Ymg z^tlyh<>9;SHVOY?4TH^l^3u}lp5+L7dje=IfapOhHW``omfe^>bUO@KJRG+jdkc^p z!b%-GV99xBU-1<|???o#KhaPyfl=&j?9*I3Knpad!de?<_?*IqpuFPkI`cY$-kAUz zSGVXYP5V4Xq)#b1E2O|=n6?;D4wQ9d>V~)Sejw;w37{#~(n*(KLJ6C; zv3Yat;JvXmO2NezNDyJ2j%S8x!S-~|_n)I-dUyO`nl6XYh2}9HV4m8RVOnYrW-PX= zl)Z{9O)^3Y_7lGNMI`2$M9fGk=?RS;IilFM{3qWIAt)^?AwiG1A5zwAVqf5#!C{>! zcd#dZ?tpHe!G0fRLv;71oqw1CV@r|#Orz!g+z!YRvL)EjhCMVqS|Kw=3%oC!9;gNK z7pAl?;>;-(TvwA=W(89>8OwidbYgtl1Cvv=nfnMM?@;J@pFz0|6|jNhv1 zZW}#Z_y-l!RNbCT9$D9C3vKt=QENAxRiXSgHn||(iyV0%n{=7mV+xJL5HHyg87qj!*qoRkeehkyM6=s^pZWFVH; zg>Gp!sU$rQh=zl_7V`PXz|(3hv*a$wm)`&6ToU^0h|niSZEHpD2Aiz(plK7*sPpy<`c#WD~YTGDa7=r9~?_>0X>-ra_ONm758Kx(`0re4L0#S-ibZ=2Lo+efSzsS$*6cC5ameajp>wGz3EueT2+ zsrMz48tDeT_8mzwwlx-0lD z$Np$N|J~w9ZAxxs7NN+H!l%kk!E?Y>@KO}~mmIquEy~B>p9l8c zJeq>hx=1h@XXH{rmGDTJidX>X`U6)>+;YfXwu?Xt->cJ zcM}zF#mqrQ_=IQ_dreqpCUNP?Fo~o-m`G~e9;HVu;D8z5#vMh;KcfNHh-4r(Tp%#R zfeP%v*7RzLU%dW~+X(vC1kiNXlrA)aG92=0Q~Oqv9+d%F65zJcycMkKuv-KrVeM1) zI)eUBB52(mzp5288O-4HU0F+GRuXpnMs;ezigi2WK{yZ{w)zSZFSa($V6=2X<>DvMeofeear&1f<+DN zDnhCs5g7}uto-(u$gKKEf-x%+T;QSt5)m=GWT}pNnVor&jeB<;?j*n z73wuDYzl4BlO^E{X^hmkhybS^;ml+R&h8@%!m)RM;wGs_nB(Wakw?{qj!anp!ZrlGUTw#r zskXYZ7^!LRm}CbzCLWa7)wpk2y#C4`B3mSj&2$nzj?{V^fN9y0j!5(W-;+i$O5@BK zuAWdSfkJ02w9L|iyxva)p!7&{!(G-5w_4S%E{VQ(kq+&}-pvJ(T7U22fZ97t=pLoV z%?)>1*VkGy<1^H1u8;cR|`vRa|>oMNC^PJ6u@dS%eRrfY( zHG*-S|4a*IvYt%T0%<#TqF9kyAX=!>#!b5IFqTR(_O#KqC(QCGf39tT^0vySD#+KA zB=9wKC0xQu0hn>o1MXkacd#uDD_rvT{|Htb+~yS=rpGoQT3=v>o%3EM+rJS;Afp+owZ{NWxjY^l|1tzLfZw8J!*j!7_>(v5R^ z)D(@ub?XkMEzNqRwnxY@&{%H54j|l5KnFk|oCX^CM$2vYQm1|@L8lgJW3Nu9=4I^1 z)^Fc5QtIU_+ar;MX(ILNKyGB9kxXBV9m?y)O3O!NYJoY~8#bxl1Gxo)(2)E0cRZ|~cJ zPM15|=^C8TCqEDhX9V3Yw?CZWa{7E3K4&O=UVlsL$DhB~`k(#>DjaY29@kW1$;!Rs z%&jlWJH0wUt#pFfyU5g3*eB2J!`r`X%E-9N<8x$`9le`rMaTGVqV1Uxwd3)ECIZXR;AL_`|qhaCqd0L!7=s zcgDpTP81ck=DEWTrz>*tpwpk@h&=3*?FeLt?#a(~c>Fn``}$>MH~dP->I?Yu`kdGF z=sW}VTd+asP4+9*+OK@Zek1lf^2?a}zd>0~o4s?J?N|Fp9qjjFll`)CoMC5{bBH^b z@5~x_`7PIH-8$Uob7mKYvOuCTi?AYt|ox<>ob8ZcrfKomCie2ea}$evd2Q&v9nC{X;@oVRtCZISPhn z`8?TK1;fLG0)F1AWQ9Co_XP#$acEumr9jX%D9i6BqoQd>jEnzj%tFoDL7Ph*-1!hRJ&4H9zXp*BQ(U^~>nnckod9lM!Pc-kEv<8FQl6n1`19$A~ef zR+o4vu9#%@;!e>%^{`dnHe5lb6Cbb{i<%p>oj1sZau*Poca}IY0?R^Y& z($KyMBgiUcj8#_tX~ZhU=|`thC!NA{(&`b#MdT5iX~`5BIB&{sy~)ez(sN@;vBn_(>=n^yIkv8JL1BcO1weo4%~I>DHHC zG-A`0&lP;N3uV1x_ReayY1#I2*mP2IZR+rP!kChUVF{;CV@}+FQEyJaHr(^%I&z&p z%r}w{ERXoAR3d}T(i*J##g~j2tSa}CzNerO&Nh2-_OP)L7HxSM23wz8gJI^8Tj-C> zZW)8g^nDi;CpWh>t>q%qmN@N@ZG!c!FQdSjlY`dXFC)j}bJ9GzUxv%=@i}PXXk_^5 z+{?0_GUI&x~Ae)SUT-G_weihrUL`0Jp~I3!Up=$ z!?u$iq<5X8eb=o=<`{X`!KYbor994S+B{CpxcPY3_Q~U1?(pt<6cxU1_Fmm&oXUxd zP@&v7<7j2(rS%D*d)9n3={mf`8)okb%`dV2eY`{p<#AeH;($I^{5lsU{@d&w)>LBA z-VadXp#Q0?Q0w3NllzzUqAyUMYxeeNdh6;JKE_)cS<=aSqYaX!}Snhd;k_fC}9=dY%$ zTF)_Bx*>&RU7(eG8*Lv`ZQ`Y)X;+tl#+@y8Uu7fD?+`pJmYduE6k_8nc^muc5eQW^cF&py=pC z6qnPaZZgyMQ3-?A8{p8wUlx#v%e5lbel^{=h_~B@Z=&ezZEbW`v}FcLOp8rLw*HM@ zKJoL-Ak&j)T&xGZn1Gy4;I-6 zlJk8>>wGK!tU`(H=X@(RjVmCj-_=T8Uh$SesSD1!`ZRLBs~G2dXzOy6_uuAxO9%XW z6Dhn}tMHntwZ;{edcWFdFiLz+>wFKrv>qk4lk=VNkKP?f-uJcg7Onc!puC;~{%NaF z+#0iYZ1dP!{rg6g_i&TNvezDECx?_qC?Y$QzerWcd z(=;+wE#Hn3PmG&lZv9K|p7E!JT|UvOX#3BE-uctzD@gMtqod9R}i z#cefvi<$*5`nC+^6(m*gzp!@DdTY!Yd(1Z^^ER!_hi1NHT;{`f-;kMy5LGrwNeTRFYxG(W4-7~vB5X=p@J$cd)(dp}BddAcd z$Jn}`c6`sT_IFOfeTC-!BU78~Q~uib73TDAmJY6h!eUcLPO=Jo`e~LP_^Z`s#s51B z+(FvDi-&@Ot27!P+M^#zR$OGz@JTM}A&*ZAjSu;#hxoEH+@o#dWi3(71i|9Eoo#Sd;@rtiELI$ zYI+)NBh)JLO;4kZWLK5Qn1$oriB6jAYY?XqNxu$}n zO$B3cT>8nD?zEejinWFZ1>Me^P_fBuSJKb1oE&+O@6Hbdhg+RPoLEB6_PGaMQEcjr zFY~~;3#-hhA8+YY{~N`o6xk{(>1SAaXg?PWh3FHjy%or zga%vhwHAi3N%yA4PL%=+O6jLsj?+H98s&Dv(Gsflc+0VoN1ldnr%&&0Nsas

4Q> wZ>3k$(=A>7?(on+aIiHL>8YKua~~&LttyX_o1S4C{*Yo*Dt*f}j)lno56g`XhyVZp literal 0 HcmV?d00001 diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py new file mode 100644 index 00000000..b8073049 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py @@ -0,0 +1,524 @@ +""" An example applying Syllabus Prioritized Level Replay to Procgen. This code is based on https://github.com/facebookresearch/level-replay/blob/main/train.py + +NOTE: In order to efficiently change the seed of a procgen environment directly without reinitializing it, +we rely on Minqi Jiang's custom branch of procgen found here: https://github.com/minqi/procgen +""" +import argparse +import os +import random +import time +from collections import deque +from distutils.util import strtobool + +import gym as openai_gym +import gymnasium as gym +import numpy as np +import procgen # noqa: F401 +from procgen import ProcgenEnv +import torch +import torch.nn as nn +import torch.optim as optim +from shimmy.openai_gym_compatibility import GymV21CompatibilityV0 +from torch.utils.tensorboard import SummaryWriter + +from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum +from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum +from syllabus.examples.models import ProcgenAgent +from syllabus.examples.task_wrappers import ProcgenTaskWrapper +from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs + + +def parse_args(): + # fmt: off + parser = argparse.ArgumentParser() + parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), + help="the name of this experiment") + parser.add_argument("--seed", type=int, default=1, + help="seed of the experiment") + parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="if toggled, `torch.backends.cudnn.deterministic=False`") + parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="if toggled, cuda will be enabled by default") + parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="if toggled, this experiment will be tracked with Weights and Biases") + parser.add_argument("--wandb-project-name", type=str, default="syllabus", + help="the wandb's project name") + parser.add_argument("--wandb-entity", type=str, default=None, + help="the entity (team) of wandb's project") + parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="weather to capture videos of the agent performances (check out `videos` folder)") + parser.add_argument("--logging-dir", type=str, default=".", + help="the base directory for logging and wandb storage.") + + # Algorithm specific arguments + parser.add_argument("--env-id", type=str, default="starpilot", + help="the id of the environment") + parser.add_argument("--total-timesteps", type=int, default=int(25e6), + help="total timesteps of the experiments") + parser.add_argument("--learning-rate", type=float, default=5e-4, + help="the learning rate of the optimizer") + parser.add_argument("--num-envs", type=int, default=64, + help="the number of parallel game environments") + parser.add_argument("--num-steps", type=int, default=256, + help="the number of steps to run in each environment per policy rollout") + parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="Toggle learning rate annealing for policy and value networks") + parser.add_argument("--gae", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="Use GAE for advantage computation") + parser.add_argument("--gamma", type=float, default=0.999, + help="the discount factor gamma") + parser.add_argument("--gae-lambda", type=float, default=0.95, + help="the lambda for the general advantage estimation") + parser.add_argument("--num-minibatches", type=int, default=8, + help="the number of mini-batches") + parser.add_argument("--update-epochs", type=int, default=3, + help="the K epochs to update the policy") + parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="Toggles advantages normalization") + parser.add_argument("--clip-coef", type=float, default=0.2, + help="the surrogate clipping coefficient") + parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="Toggles whether or not to use a clipped loss for the value function, as per the paper.") + parser.add_argument("--ent-coef", type=float, default=0.01, + help="coefficient of the entropy") + parser.add_argument("--vf-coef", type=float, default=0.5, + help="coefficient of the value function") + parser.add_argument("--max-grad-norm", type=float, default=0.5, + help="the maximum norm for the gradient clipping") + parser.add_argument("--target-kl", type=float, default=None, + help="the target KL divergence threshold") + + # Procgen arguments + parser.add_argument("--full-dist", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="Train on full distribution of levels.") + + # Curriculum arguments + parser.add_argument("--curriculum", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="if toggled, this experiment will use curriculum learning") + parser.add_argument("--curriculum-method", type=str, default="plr", + help="curriculum method to use") + parser.add_argument("--num-eval-episodes", type=int, default=10, + help="the number of episodes to evaluate the agent on after each policy update.") + + args = parser.parse_args() + args.batch_size = int(args.num_envs * args.num_steps) + args.minibatch_size = int(args.batch_size // args.num_minibatches) + # fmt: on + return args + + +PROCGEN_RETURN_BOUNDS = { + "coinrun": (5, 10), + "starpilot": (2.5, 64), + "caveflyer": (3.5, 12), + "dodgeball": (1.5, 19), + "fruitbot": (-1.5, 32.4), + "chaser": (0.5, 13), + "miner": (1.5, 13), + "jumper": (3, 10), + "leaper": (3, 10), + "maze": (5, 10), + "bigfish": (1, 40), + "heist": (3.5, 10), + "climber": (2, 12.6), + "plunder": (4.5, 30), + "ninja": (3.5, 10), + "bossfight": (0.5, 13), +} + + +def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): + def thunk(): + env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) + env = GymV21CompatibilityV0(env=env) + if curriculum is not None: + env = ProcgenTaskWrapper(env, env_id, seed=seed) + env = MultiProcessingSyncWrapper( + env, + curriculum.get_components(), + update_on_step=False, + task_space=env.task_space, + ) + return env + return thunk + + +def wrap_vecenv(vecenv): + vecenv.is_vector_env = True + vecenv = VecMonitor(venv=vecenv, filename=None, keep_buf=100) + vecenv = VecNormalize(venv=vecenv, ob=False, ret=True) + return vecenv + + +def slow_level_replay_evaluate( + env_name, + policy, + num_episodes, + device, + num_levels=0 +): + policy.eval() + + eval_envs = ProcgenEnv( + num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False + ) + eval_envs = VecExtractDictObs(eval_envs, "rgb") + eval_envs = wrap_vecenv(eval_envs) + eval_obs, _ = eval_envs.reset() + eval_episode_rewards = [] + + while len(eval_episode_rewards) < num_episodes: + with torch.no_grad(): + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + + eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) + for i, info in enumerate(infos): + if 'episode' in info.keys(): + eval_episode_rewards.append(info['episode']['r']) + + mean_returns = np.mean(eval_episode_rewards) + stddev_returns = np.std(eval_episode_rewards) + env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] + normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) + policy.train() + return mean_returns, stddev_returns, normalized_mean_returns + + +def level_replay_evaluate( + env_name, + policy, + num_episodes, + device, + num_levels=0 +): + policy.eval() + + eval_envs = ProcgenEnv( + num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False + ) + eval_envs = VecExtractDictObs(eval_envs, "rgb") + eval_envs = wrap_vecenv(eval_envs) + eval_obs, _ = eval_envs.reset() + eval_episode_rewards = [-1] * num_episodes + + while -1 in eval_episode_rewards: + with torch.no_grad(): + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + + eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) + for i, info in enumerate(infos): + if 'episode' in info.keys() and eval_episode_rewards[i] == -1: + eval_episode_rewards[i] = info['episode']['r'] + + # print(eval_episode_rewards) + mean_returns = np.mean(eval_episode_rewards) + stddev_returns = np.std(eval_episode_rewards) + env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] + normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) + policy.train() + return mean_returns, stddev_returns, normalized_mean_returns + + +def make_value_fn(): + def get_value(obs): + obs = np.array(obs) + with torch.no_grad(): + return agent.get_value(torch.Tensor(obs).to(device)) + return get_value + + +if __name__ == "__main__": + args = parse_args() + run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" + if args.track: + import wandb + + wandb.init( + project=args.wandb_project_name, + entity=args.wandb_entity, + sync_tensorboard=True, + config=vars(args), + name=run_name, + monitor_gym=True, + save_code=True, + dir=args.logging_dir + ) + # wandb.run.log_code("./syllabus/examples") + + writer = SummaryWriter(os.path.join(args.logging_dir, f"./runs/{run_name}")) + writer.add_text( + "hyperparameters", + "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), + ) + + # TRY NOT TO MODIFY: seeding + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + torch.backends.cudnn.deterministic = args.torch_deterministic + + device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu") + print("Device:", device) + + # Curriculum setup + curriculum = None + if args.curriculum: + sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") + sample_env = GymV21CompatibilityV0(env=sample_env) + # code to edit + # sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) + sample_env = MinigridTaskWrapper(sample_env, args.env_id, seed=args.seed) + + # Intialize Curriculum Method + if args.curriculum_method == "plr": + print("Using prioritized level replay.") + curriculum = PrioritizedLevelReplay( + sample_env.task_space, + sample_env.observation_space, + num_steps=args.num_steps, + num_processes=args.num_envs, + gamma=args.gamma, + gae_lambda=args.gae_lambda, + task_sampler_kwargs_dict={"strategy": "value_l1"}, + get_value=make_value_fn(), + ) + elif args.curriculum_method == "dr": + print("Using domain randomization.") + curriculum = DomainRandomization(sample_env.task_space) + elif args.curriculum_method == "lp": + print("Using learning progress.") + curriculum = LearningProgressCurriculum(sample_env.task_space) + elif args.curriculum_method == "sq": + print("Using sequential curriculum.") + curricula = [] + stopping = [] + for i in range(199): + curricula.append(i + 1) + stopping.append("steps>=50000") + curricula.append(list(range(i + 1))) + stopping.append("steps>=50000") + curriculum = SequentialCurriculum(curricula, stopping[:-1], sample_env.task_space) + else: + raise ValueError(f"Unknown curriculum method {args.curriculum_method}") + curriculum = make_multiprocessing_curriculum(curriculum) + del sample_env + + # env setup + print("Creating env") + envs = gym.vector.AsyncVectorEnv( + [ + make_env( + args.env_id, + args.seed + i, + curriculum=curriculum if args.curriculum else None, + num_levels=1 if args.curriculum else 0 + ) + for i in range(args.num_envs) + ] + ) + envs = wrap_vecenv(envs) + + assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" + print("Creating agent") + agent = ProcgenAgent( + envs.single_observation_space.shape, + envs.single_action_space.n, + arch="large", + base_kwargs={'recurrent': False, 'hidden_size': 256} + ).to(device) + optimizer = optim.Adam(agent.parameters(), lr=args.learning_rate, eps=1e-5) + + # ALGO Logic: Storage setup + obs = torch.zeros((args.num_steps, args.num_envs) + envs.single_observation_space.shape).to(device) + actions = torch.zeros((args.num_steps, args.num_envs) + envs.single_action_space.shape).to(device) + logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device) + rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) + dones = torch.zeros((args.num_steps, args.num_envs)).to(device) + values = torch.zeros((args.num_steps, args.num_envs)).to(device) + + # TRY NOT TO MODIFY: start the game + global_step = 0 + start_time = time.time() + next_obs, _ = envs.reset() + next_obs = torch.Tensor(next_obs).to(device) + next_done = torch.zeros(args.num_envs).to(device) + num_updates = args.total_timesteps // args.batch_size + episode_rewards = deque(maxlen=10) + completed_episodes = 0 + + for update in range(1, num_updates + 1): + # Annealing the rate if instructed to do so. + if args.anneal_lr: + frac = 1.0 - (update - 1.0) / num_updates + lrnow = frac * args.learning_rate + optimizer.param_groups[0]["lr"] = lrnow + + for step in range(0, args.num_steps): + global_step += 1 * args.num_envs + obs[step] = next_obs + dones[step] = next_done + + # ALGO LOGIC: action logic + with torch.no_grad(): + action, logprob, _, value = agent.get_action_and_value(next_obs) + values[step] = value.flatten() + actions[step] = action + logprobs[step] = logprob + + # TRY NOT TO MODIFY: execute the game and log data. + next_obs, reward, term, trunc, info = envs.step(action.cpu().numpy()) + done = np.logical_or(term, trunc) + rewards[step] = torch.tensor(reward).to(device).view(-1) + next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) + completed_episodes += sum(done) + + for item in info: + if "episode" in item.keys(): + episode_rewards.append(item['episode']['r']) + print(f"global_step={global_step}, episodic_return={item['episode']['r']}") + writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) + if curriculum is not None: + curriculum.log_metrics(writer, global_step) + break + + # bootstrap value if not done + with torch.no_grad(): + next_value = agent.get_value(next_obs).reshape(1, -1) + if args.gae: + advantages = torch.zeros_like(rewards).to(device) + lastgaelam = 0 + for t in reversed(range(args.num_steps)): + if t == args.num_steps - 1: + nextnonterminal = 1.0 - next_done + nextvalues = next_value + else: + nextnonterminal = 1.0 - dones[t + 1] + nextvalues = values[t + 1] + delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t] + advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam + returns = advantages + values + else: + returns = torch.zeros_like(rewards).to(device) + for t in reversed(range(args.num_steps)): + if t == args.num_steps - 1: + nextnonterminal = 1.0 - next_done + next_return = next_value + else: + nextnonterminal = 1.0 - dones[t + 1] + next_return = returns[t + 1] + returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return + advantages = returns - values + + # flatten the batch + b_obs = obs.reshape((-1,) + envs.single_observation_space.shape) + b_logprobs = logprobs.reshape(-1) + b_actions = actions.reshape((-1,) + envs.single_action_space.shape) + b_advantages = advantages.reshape(-1) + b_returns = returns.reshape(-1) + b_values = values.reshape(-1) + + # Optimizing the policy and value network + b_inds = np.arange(args.batch_size) + clipfracs = [] + for epoch in range(args.update_epochs): + np.random.shuffle(b_inds) + for start in range(0, args.batch_size, args.minibatch_size): + end = start + args.minibatch_size + mb_inds = b_inds[start:end] + + _, newlogprob, entropy, newvalue = agent.get_action_and_value(b_obs[mb_inds], b_actions.long()[mb_inds]) + logratio = newlogprob - b_logprobs[mb_inds] + ratio = logratio.exp() + + with torch.no_grad(): + # calculate approx_kl http://joschu.net/blog/kl-approx.html + old_approx_kl = (-logratio).mean() + approx_kl = ((ratio - 1) - logratio).mean() + clipfracs += [((ratio - 1.0).abs() > args.clip_coef).float().mean().item()] + + mb_advantages = b_advantages[mb_inds] + if args.norm_adv: + mb_advantages = (mb_advantages - mb_advantages.mean()) / (mb_advantages.std() + 1e-8) + + # Policy loss + pg_loss1 = -mb_advantages * ratio + pg_loss2 = -mb_advantages * torch.clamp(ratio, 1 - args.clip_coef, 1 + args.clip_coef) + pg_loss = torch.max(pg_loss1, pg_loss2).mean() + + # Value loss + newvalue = newvalue.view(-1) + if args.clip_vloss: + v_loss_unclipped = (newvalue - b_returns[mb_inds]) ** 2 + v_clipped = b_values[mb_inds] + torch.clamp( + newvalue - b_values[mb_inds], + -args.clip_coef, + args.clip_coef, + ) + v_loss_clipped = (v_clipped - b_returns[mb_inds]) ** 2 + v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped) + v_loss = 0.5 * v_loss_max.mean() + else: + v_loss = 0.5 * ((newvalue - b_returns[mb_inds]) ** 2).mean() + + entropy_loss = entropy.mean() + loss = pg_loss - args.ent_coef * entropy_loss + v_loss * args.vf_coef + + optimizer.zero_grad() + loss.backward() + nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm) + optimizer.step() + + if args.target_kl is not None: + if approx_kl > args.target_kl: + break + + y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy() + var_y = np.var(y_true) + explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y + + # Evaluate agent + mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=0 + ) + slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=0 + ) + mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=200 + ) + slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=200 + ) + + # TRY NOT TO MODIFY: record rewards for plotting purposes + writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) + writer.add_scalar("charts/episode_returns", np.mean(episode_rewards), global_step) + writer.add_scalar("losses/value_loss", v_loss.item(), global_step) + writer.add_scalar("losses/policy_loss", pg_loss.item(), global_step) + writer.add_scalar("losses/entropy", entropy_loss.item(), global_step) + writer.add_scalar("losses/old_approx_kl", old_approx_kl.item(), global_step) + writer.add_scalar("losses/approx_kl", approx_kl.item(), global_step) + writer.add_scalar("losses/clipfrac", np.mean(clipfracs), global_step) + writer.add_scalar("losses/explained_variance", explained_var, global_step) + print("SPS:", int(global_step / (time.time() - start_time))) + writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) + + writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) + writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) + writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) + writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) + + writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) + writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) + writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) + writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) + + writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) + + envs.close() + writer.close() diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/conda-environment.yaml b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/conda-environment.yaml new file mode 100644 index 00000000..cd0b0b09 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/conda-environment.yaml @@ -0,0 +1,165 @@ +name: test2_py +channels: + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - ca-certificates=2024.3.11=h06a4308_0 + - ld_impl_linux-64=2.38=h1181459_1 + - libffi=3.3=he6710b0_2 + - libgcc-ng=11.2.0=h1234567_1 + - libgomp=11.2.0=h1234567_1 + - libstdcxx-ng=11.2.0=h1234567_1 + - ncurses=6.4=h6a678d5_0 + - openssl=1.1.1w=h7f8727e_0 + - pip=23.3.1=py38h06a4308_0 + - python=3.8.5=h7579374_1 + - readline=8.2=h5eee18b_0 + - setuptools=68.2.2=py38h06a4308_0 + - sqlite=3.41.2=h5eee18b_0 + - tk=8.6.12=h1ccaba5_0 + - wheel=0.41.2=py38h06a4308_0 + - xz=5.4.6=h5eee18b_0 + - zlib=1.2.13=h5eee18b_0 + - pip: + - absl-py==2.1.0 + - aiosignal==1.3.1 + - alabaster==0.7.13 + - appdirs==1.4.4 + - attrs==23.2.0 + - babel==2.14.0 + - beautifulsoup4==4.12.3 + - cachetools==5.3.3 + - certifi==2024.2.2 + - cffi==1.16.0 + - charset-normalizer==3.3.2 + - click==8.1.7 + - cloudpickle==3.0.0 + - cmake==3.29.2 + - contourpy==1.1.1 + - cycler==0.12.1 + - dm-tree==0.1.8 + - docker-pycreds==0.4.0 + - docutils==0.20.1 + - exceptiongroup==1.2.0 + - farama-notifications==0.0.4 + - filelock==3.13.4 + - fonttools==4.51.0 + - frozenlist==1.4.1 + - fsspec==2024.3.1 + - furo==2024.1.29 + - future==1.0.0 + - gitdb==4.0.11 + - gitpython==3.1.43 + - glcontext==2.5.0 + - glfw==1.12.0 + - google-auth==2.29.0 + - google-auth-oauthlib==1.0.0 + - grpcio==1.62.1 + - gym==0.23.0 + - gym-notices==0.0.8 + - gymnasium==0.28.1 + - idna==3.7 + - imageio==2.34.0 + - imageio-ffmpeg==0.3.0 + - imagesize==1.4.1 + - importlib-metadata==7.1.0 + - importlib-resources==6.4.0 + - iniconfig==2.0.0 + - jax-jumpy==1.0.0 + - jinja2==3.1.3 + - jsonschema==4.21.1 + - jsonschema-specifications==2023.12.1 + - kiwisolver==1.4.5 + - lazy-loader==0.4 + - lz4==4.3.3 + - markdown==3.6 + - markdown-it-py==3.0.0 + - markupsafe==2.1.5 + - matplotlib==3.7.5 + - mdurl==0.1.2 + - moderngl==5.10.0 + - mpmath==1.3.0 + - msgpack==1.0.8 + - networkx==3.1 + - numpy==1.24.4 + - nvidia-cublas-cu12==12.1.3.1 + - nvidia-cuda-cupti-cu12==12.1.105 + - nvidia-cuda-nvrtc-cu12==12.1.105 + - nvidia-cuda-runtime-cu12==12.1.105 + - nvidia-cudnn-cu12==8.9.2.26 + - nvidia-cufft-cu12==11.0.2.54 + - nvidia-curand-cu12==10.3.2.106 + - nvidia-cusolver-cu12==11.4.5.107 + - nvidia-cusparse-cu12==12.1.0.106 + - nvidia-nccl-cu12==2.19.3 + - nvidia-nvjitlink-cu12==12.4.127 + - nvidia-nvtx-cu12==12.1.105 + - oauthlib==3.2.2 + - packaging==24.0 + - pandas==2.0.3 + - pillow==10.3.0 + - pkgutil-resolve-name==1.3.10 + - pluggy==1.4.0 + - protobuf==4.25.3 + - psutil==5.9.8 + - py-cpuinfo==9.0.0 + - pyarrow==15.0.2 + - pyasn1==0.6.0 + - pyasn1-modules==0.4.0 + - pycparser==2.22 + - pyenchant==3.2.2 + - pyglet==1.4.11 + - pygments==2.17.2 + - pyparsing==3.1.2 + - pytest==8.1.1 + - pytest-benchmark==4.0.0 + - python-dateutil==2.9.0.post0 + - pytz==2024.1 + - pywavelets==1.4.1 + - pyyaml==6.0.1 + - ray==2.10.0 + - referencing==0.34.0 + - requests==2.31.0 + - requests-oauthlib==2.0.0 + - rich==13.7.1 + - rpds-py==0.18.0 + - rsa==4.9 + - scikit-image==0.21.0 + - scipy==1.10.0 + - sentry-sdk==1.45.0 + - setproctitle==1.3.3 + - shellingham==1.5.4 + - shimmy==1.3.0 + - six==1.16.0 + - smmap==5.0.1 + - snowballstemmer==2.2.0 + - soupsieve==2.5 + - sphinx==7.1.2 + - sphinx-basic-ng==1.0.0b2 + - sphinx-tabs==3.4.5 + - sphinxcontrib-applehelp==1.0.4 + - sphinxcontrib-devhelp==1.0.2 + - sphinxcontrib-htmlhelp==2.0.1 + - sphinxcontrib-jsmath==1.0.1 + - sphinxcontrib-qthelp==1.0.3 + - sphinxcontrib-serializinghtml==1.1.5 + - sphinxcontrib-spelling==8.0.0 + - syllabus-rl==0.5 + - sympy==1.12 + - tensorboard==2.14.0 + - tensorboard-data-server==0.7.2 + - tensorboardx==2.6.2.2 + - tifffile==2023.7.10 + - tomli==2.0.1 + - torch==2.2.2 + - triton==2.2.0 + - typer==0.12.3 + - typing-extensions==4.11.0 + - tzdata==2024.1 + - urllib3==2.2.1 + - wandb==0.16.6 + - werkzeug==3.0.2 + - zipp==3.18.1 +prefix: /home/user/miniconda/envs/test2_py + diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/config.yaml b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/config.yaml new file mode 100644 index 00000000..fc31f40a --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/config.yaml @@ -0,0 +1,130 @@ +wandb_version: 1 + +exp_name: + desc: null + value: cleanrl_procgen_plr +seed: + desc: null + value: 1 +torch_deterministic: + desc: null + value: true +cuda: + desc: null + value: true +track: + desc: null + value: true +wandb_project_name: + desc: null + value: syllabus +wandb_entity: + desc: null + value: null +capture_video: + desc: null + value: false +logging_dir: + desc: null + value: . +env_id: + desc: null + value: bigfish +total_timesteps: + desc: null + value: 25000000 +learning_rate: + desc: null + value: 0.0005 +num_envs: + desc: null + value: 64 +num_steps: + desc: null + value: 256 +anneal_lr: + desc: null + value: false +gae: + desc: null + value: true +gamma: + desc: null + value: 0.999 +gae_lambda: + desc: null + value: 0.95 +num_minibatches: + desc: null + value: 8 +update_epochs: + desc: null + value: 3 +norm_adv: + desc: null + value: true +clip_coef: + desc: null + value: 0.2 +clip_vloss: + desc: null + value: true +ent_coef: + desc: null + value: 0.01 +vf_coef: + desc: null + value: 0.5 +max_grad_norm: + desc: null + value: 0.5 +target_kl: + desc: null + value: null +full_dist: + desc: null + value: true +curriculum: + desc: null + value: true +curriculum_method: + desc: null + value: plr +num_eval_episodes: + desc: null + value: 10 +batch_size: + desc: null + value: 16384 +minibatch_size: + desc: null + value: 2048 +_wandb: + desc: null + value: + code_path: code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py + python_version: 3.8.5 + cli_version: 0.16.6 + framework: torch + is_jupyter_run: false + is_kaggle_kernel: false + start_time: 1713845396.0 + t: + 1: + - 1 + - 30 + - 55 + 2: + - 1 + - 30 + - 55 + 3: + - 13 + - 16 + - 23 + - 35 + 4: 3.8.5 + 5: 0.16.6 + 8: + - 5 + 13: linux-x86_64 diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/diff.patch b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/diff.patch new file mode 100644 index 00000000..f683cbce --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/diff.patch @@ -0,0 +1,133 @@ +diff --git a/setup.py b/setup.py +index 31e09f2..22a94e8 100644 +--- a/setup.py ++++ b/setup.py +@@ -2,7 +2,7 @@ from setuptools import find_packages, setup + + + extras = dict() +-extras['test'] = ['cmake', 'ninja', 'nle>=0.9.0', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] ++extras['test'] = ['cmake', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] + extras['docs'] = ['sphinx-tabs', 'sphinxcontrib-spelling', 'furo'] + extras['all'] = extras['test'] + extras['docs'] + +diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py +index dabcd50..b807304 100644 +--- a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py ++++ b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py +@@ -136,7 +136,7 @@ def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): + env = MultiProcessingSyncWrapper( + env, + curriculum.get_components(), +- update_on_step=curriculum.requires_step_updates, ++ update_on_step=False, + task_space=env.task_space, + ) + return env +@@ -150,37 +150,31 @@ def wrap_vecenv(vecenv): + return vecenv + + +-def full_level_replay_evaluate( ++def slow_level_replay_evaluate( + env_name, + policy, + num_episodes, + device, +- num_levels=1 # Not used ++ num_levels=0 + ): + policy.eval() + + eval_envs = ProcgenEnv( +- num_envs=args.num_eval_episodes, env_name=env_name, num_levels=1, start_level=0, distribution_mode="easy", paint_vel_info=False ++ num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False + ) + eval_envs = VecExtractDictObs(eval_envs, "rgb") + eval_envs = wrap_vecenv(eval_envs) +- +- # Seed environments +- seeds = [int.from_bytes(os.urandom(3), byteorder="little") for _ in range(num_episodes)] +- for i, seed in enumerate(seeds): +- eval_envs.seed(seed, i) +- + eval_obs, _ = eval_envs.reset() +- eval_episode_rewards = [-1] * num_episodes ++ eval_episode_rewards = [] + +- while -1 in eval_episode_rewards: ++ while len(eval_episode_rewards) < num_episodes: + with torch.no_grad(): + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + + eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) + for i, info in enumerate(infos): +- if 'episode' in info.keys() and eval_episode_rewards[i] == -1: +- eval_episode_rewards[i] = info['episode']['r'] ++ if 'episode' in info.keys(): ++ eval_episode_rewards.append(info['episode']['r']) + + mean_returns = np.mean(eval_episode_rewards) + stddev_returns = np.std(eval_episode_rewards) +@@ -251,7 +245,7 @@ if __name__ == "__main__": + ) + # wandb.run.log_code("./syllabus/examples") + +- writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) ++ writer = SummaryWriter(os.path.join(args.logging_dir, f"./runs/{run_name}")) + writer.add_text( + "hyperparameters", + "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), +@@ -271,7 +265,9 @@ if __name__ == "__main__": + if args.curriculum: + sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") + sample_env = GymV21CompatibilityV0(env=sample_env) +- sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) ++ # code to edit ++ # sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) ++ sample_env = MinigridTaskWrapper(sample_env, args.env_id, seed=args.seed) + + # Intialize Curriculum Method + if args.curriculum_method == "plr": +@@ -485,13 +481,13 @@ if __name__ == "__main__": + mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=0 + ) +- full_mean_eval_returns, full_stddev_eval_returns, full_normalized_mean_eval_returns = full_level_replay_evaluate( ++ slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=0 + ) + mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=200 + ) +- full_mean_train_returns, full_stddev_train_returns, full_normalized_mean_train_returns = full_level_replay_evaluate( ++ slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=200 + ) + +@@ -510,17 +506,17 @@ if __name__ == "__main__": + + writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) +- writer.add_scalar("test_eval/stddev_eval_return", stddev_eval_returns, global_step) +- writer.add_scalar("test_eval/full_mean_episode_return", full_mean_eval_returns, global_step) +- writer.add_scalar("test_eval/full_normalized_mean_eval_return", full_normalized_mean_eval_returns, global_step) +- writer.add_scalar("test_eval/full_stddev_eval_return", full_stddev_eval_returns, global_step) ++ writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) + + writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) +- writer.add_scalar("train_eval/stddev_train_return", stddev_train_returns, global_step) +- writer.add_scalar("train_eval/full_mean_episode_return", full_mean_train_returns, global_step) +- writer.add_scalar("train_eval/full_normalized_mean_train_return", full_normalized_mean_train_returns, global_step) +- writer.add_scalar("train_eval/full_stddev_train_return", full_stddev_train_returns, global_step) ++ writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) + + writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) + diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/events.out.tfevents.1713845400.f411843fc70b.2432.0 b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/events.out.tfevents.1713845400.f411843fc70b.2432.0 new file mode 120000 index 00000000..24fc08a3 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/events.out.tfevents.1713845400.f411843fc70b.2432.0 @@ -0,0 +1 @@ +/data/averma/MARL/Syllabus/syllabus/examples/training_scripts/runs/bigfish__cleanrl_procgen_plr__1__1713845394/events.out.tfevents.1713845400.f411843fc70b.2432.0 \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/requirements.txt b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/requirements.txt new file mode 100644 index 00000000..7f33d240 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/requirements.txt @@ -0,0 +1,146 @@ +Babel==2.14.0 +Farama-Notifications==0.0.4 +GitPython==3.1.43 +Jinja2==3.1.3 +Markdown==3.6 +MarkupSafe==2.1.5 +PyWavelets==1.4.1 +PyYAML==6.0.1 +Pygments==2.17.2 +Shimmy==1.3.0 +Sphinx==7.1.2 +Syllabus-RL==0.5 +Werkzeug==3.0.2 +absl-py==2.1.0 +aiosignal==1.3.1 +alabaster==0.7.13 +appdirs==1.4.4 +attrs==23.2.0 +beautifulsoup4==4.12.3 +cachetools==5.3.3 +certifi==2024.2.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +cloudpickle==3.0.0 +cmake==3.29.2 +colorama==0.4.6 +contourpy==1.1.1 +cycler==0.12.1 +dm-tree==0.1.8 +docker-pycreds==0.4.0 +docutils==0.20.1 +exceptiongroup==1.2.0 +filelock==3.13.4 +fonttools==4.51.0 +frozenlist==1.4.1 +fsspec==2024.3.1 +furo==2024.1.29 +future==1.0.0 +gitdb==4.0.11 +glcontext==2.5.0 +glfw==1.12.0 +google-auth-oauthlib==1.0.0 +google-auth==2.29.0 +grpcio==1.62.1 +gym-notices==0.0.8 +gym==0.23.0 +gymnasium==0.28.1 +idna==3.7 +imageio-ffmpeg==0.3.0 +imageio==2.34.0 +imagesize==1.4.1 +importlib_metadata==7.1.0 +importlib_resources==6.4.0 +iniconfig==2.0.0 +jax-jumpy==1.0.0 +jsonschema-specifications==2023.12.1 +jsonschema==4.21.1 +kiwisolver==1.4.5 +lazy_loader==0.4 +lz4==4.3.3 +markdown-it-py==3.0.0 +matplotlib==3.7.5 +mdurl==0.1.2 +moderngl==5.10.0 +mpmath==1.3.0 +msgpack==1.0.8 +networkx==3.1 +numpy==1.24.4 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-nccl-cu12==2.19.3 +nvidia-nvjitlink-cu12==12.4.127 +nvidia-nvtx-cu12==12.1.105 +oauthlib==3.2.2 +packaging==24.0 +pandas==2.0.3 +pillow==10.3.0 +pip==23.3.1 +pkgutil_resolve_name==1.3.10 +pluggy==1.4.0 +procgen==0.9.5+ed4be81 +protobuf==4.25.3 +psutil==5.9.8 +psutil==5.9.8 +py-cpuinfo==9.0.0 +pyarrow==15.0.2 +pyasn1==0.6.0 +pyasn1_modules==0.4.0 +pycparser==2.22 +pyenchant==3.2.2 +pyglet==1.4.11 +pyparsing==3.1.2 +pytest-benchmark==4.0.0 +pytest==8.1.1 +python-dateutil==2.9.0.post0 +pytz==2024.1 +ray==2.10.0 +referencing==0.34.0 +requests-oauthlib==2.0.0 +requests==2.31.0 +rich==13.7.1 +rpds-py==0.18.0 +rsa==4.9 +scikit-image==0.21.0 +scipy==1.10.0 +sentry-sdk==1.45.0 +setproctitle==1.2.2 +setproctitle==1.3.3 +setuptools==68.2.2 +shellingham==1.5.4 +six==1.16.0 +smmap==5.0.1 +snowballstemmer==2.2.0 +soupsieve==2.5 +sphinx-basic-ng==1.0.0b2 +sphinx-tabs==3.4.5 +sphinxcontrib-applehelp==1.0.4 +sphinxcontrib-devhelp==1.0.2 +sphinxcontrib-htmlhelp==2.0.1 +sphinxcontrib-jsmath==1.0.1 +sphinxcontrib-qthelp==1.0.3 +sphinxcontrib-serializinghtml==1.1.5 +sphinxcontrib-spelling==8.0.0 +sympy==1.12 +tensorboard-data-server==0.7.2 +tensorboard==2.14.0 +tensorboardX==2.6.2.2 +tifffile==2023.7.10 +tomli==2.0.1 +torch==2.2.2 +triton==2.2.0 +typer==0.12.3 +typing_extensions==4.11.0 +tzdata==2024.1 +urllib3==2.2.1 +wandb==0.16.6 +wheel==0.41.2 +zipp==3.18.1 \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch new file mode 100644 index 00000000..66d57e37 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch @@ -0,0 +1,1419 @@ +diff --git a/setup.py b/setup.py +index 31e09f2..22a94e8 100644 +--- a/setup.py ++++ b/setup.py +@@ -2,7 +2,7 @@ from setuptools import find_packages, setup + + + extras = dict() +-extras['test'] = ['cmake', 'ninja', 'nle>=0.9.0', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] ++extras['test'] = ['cmake', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] + extras['docs'] = ['sphinx-tabs', 'sphinxcontrib-spelling', 'furo'] + extras['all'] = extras['test'] + extras['docs'] + +diff --git a/syllabus/core/curriculum_base.py b/syllabus/core/curriculum_base.py +index 03284da..4ca9aeb 100644 +--- a/syllabus/core/curriculum_base.py ++++ b/syllabus/core/curriculum_base.py +@@ -76,7 +76,7 @@ class Curriculum: + """ + self.completed_tasks += 1 + +- def update_on_step(self, obs: typing.Any, rew: float, term: bool, trunc: bool, info: dict, env_id: int = None) -> None: ++ def update_on_step(self, task: typing.Any, obs: typing.Any, rew: float, term: bool, trunc: bool, info: dict, env_id: int = None) -> None: + """ Update the curriculum with the current step results from the environment. + + :param obs: Observation from teh environment +@@ -88,7 +88,7 @@ class Curriculum: + """ + raise NotImplementedError("This curriculum does not require step updates. Set update_on_step for the environment sync wrapper to False to improve performance and prevent this error.") + +- def update_on_step_batch(self, step_results: List[typing.Tuple[int, int, int, int, int]], env_id: int = None) -> None: ++ def update_on_step_batch(self, step_results: List[typing.Tuple[Any, Any, int, int, int, int]], env_id: int = None) -> None: + """Update the curriculum with a batch of step results from the environment. + + This method can be overridden to provide a more efficient implementation. It is used +@@ -96,9 +96,9 @@ class Curriculum: + + :param step_results: List of step results + """ +- obs, rews, terms, truncs, infos = tuple(step_results) ++ tasks, obs, rews, terms, truncs, infos = tuple(step_results) + for i in range(len(obs)): +- self.update_on_step(obs[i], rews[i], terms[i], truncs[i], infos[i], env_id=env_id) ++ self.update_on_step(tasks[i], obs[i], rews[i], terms[i], truncs[i], infos[i], env_id=env_id) + + def update_on_episode(self, episode_return: float, episode_length: int, episode_task: Any, env_id: int = None) -> None: + """Update the curriculum with episode results from the environment. +diff --git a/syllabus/core/curriculum_sync_wrapper.py b/syllabus/core/curriculum_sync_wrapper.py +index 6e069d8..f986643 100644 +--- a/syllabus/core/curriculum_sync_wrapper.py ++++ b/syllabus/core/curriculum_sync_wrapper.py +@@ -29,6 +29,14 @@ class CurriculumWrapper: + def tasks(self): + return self.task_space.tasks + ++ @property ++ def requires_step_updates(self): ++ return self.curriculum.requires_step_updates ++ ++ @property ++ def requires_episode_updates(self): ++ return self.curriculum.requires_episode_updates ++ + def get_tasks(self, task_space=None): + return self.task_space.get_tasks(gym_space=task_space) + +diff --git a/syllabus/core/environment_sync_wrapper.py b/syllabus/core/environment_sync_wrapper.py +index c995aa1..6edee7c 100644 +--- a/syllabus/core/environment_sync_wrapper.py ++++ b/syllabus/core/environment_sync_wrapper.py +@@ -19,7 +19,8 @@ class MultiProcessingSyncWrapper(gym.Wrapper): + def __init__(self, + env, + components: MultiProcessingComponents, +- update_on_step: bool = True, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? ++ update_on_step: bool = False, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? ++ update_on_progress: bool = False, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? + batch_size: int = 100, + buffer_size: int = 2, # Having an extra task in the buffer minimizes wait time at reset + task_space: TaskSpace = None, +@@ -34,6 +35,7 @@ class MultiProcessingSyncWrapper(gym.Wrapper): + self.update_queue = components.update_queue + self.task_space = task_space + self.update_on_step = update_on_step ++ self.update_on_progress = update_on_progress + self.batch_size = batch_size + self.global_task_completion = global_task_completion + self.task_progress = 0.0 +@@ -125,17 +127,21 @@ class MultiProcessingSyncWrapper(gym.Wrapper): + def _package_step_updates(self): + step_batch = { + "update_type": "step_batch", +- "metrics": ([self._obs[:self._batch_step], self._rews[:self._batch_step], self._terms[:self._batch_step], self._truncs[:self._batch_step], self._infos[:self._batch_step]],), ++ "metrics": ([self._tasks[:self._batch_step], self._obs[:self._batch_step], self._rews[:self._batch_step], self._terms[:self._batch_step], self._truncs[:self._batch_step], self._infos[:self._batch_step]],), + "env_id": self.instance_id, + "request_sample": False + } +- task_batch = { +- "update_type": "task_progress_batch", +- "metrics": (self._tasks[:self._batch_step], self._task_progresses[:self._batch_step],), +- "env_id": self.instance_id, +- "request_sample": False +- } +- return [step_batch, task_batch] ++ update = [step_batch] ++ ++ if self.update_on_progress: ++ task_batch = { ++ "update_type": "task_progress_batch", ++ "metrics": (self._tasks[:self._batch_step], self._task_progresses[:self._batch_step],), ++ "env_id": self.instance_id, ++ "request_sample": False ++ } ++ update.append(task_batch) ++ return update + + def add_task(self, task): + update = { +diff --git a/syllabus/curricula/annealing_box.py b/syllabus/curricula/annealing_box.py +index 6c565ec..101981c 100644 +--- a/syllabus/curricula/annealing_box.py ++++ b/syllabus/curricula/annealing_box.py +@@ -49,8 +49,8 @@ class AnnealingBoxCurriculum(Curriculum): + """ + # Linear annealing from start_values to end_values + annealed_values = ( +- self.start_values + (self.end_values - self.start_values) * +- np.minimum(self.current_step, self.total_steps) / self.total_steps ++ self.start_values + (self.end_values - self.start_values) * ++ np.minimum(self.current_step, self.total_steps) / self.total_steps + ) + +- return [annealed_values.copy() for _ in range(k)] +\ No newline at end of file ++ return [annealed_values.copy() for _ in range(k)] +diff --git a/syllabus/curricula/noop.py b/syllabus/curricula/noop.py +index f6bd5dc..fb5d8ae 100644 +--- a/syllabus/curricula/noop.py ++++ b/syllabus/curricula/noop.py +@@ -28,7 +28,7 @@ class NoopCurriculum(Curriculum): + """ + pass + +- def update_on_step(self, obs, rew, term, trunc, info, env_id: int = None) -> None: ++ def update_on_step(self, task, obs, rew, term, trunc, info, env_id: int = None) -> None: + """ + Update the curriculum with the current step results from the environment. + """ +diff --git a/syllabus/curricula/plr/plr_wrapper.py b/syllabus/curricula/plr/plr_wrapper.py +index 9515df4..9c808dd 100644 +--- a/syllabus/curricula/plr/plr_wrapper.py ++++ b/syllabus/curricula/plr/plr_wrapper.py +@@ -23,16 +23,15 @@ class RolloutStorage(object): + get_value=None, + ): + self.num_steps = num_steps +- self.buffer_steps = num_steps * 2 # Hack to prevent overflow from lagging updates. ++ self.buffer_steps = num_steps * 4 # Hack to prevent overflow from lagging updates. + self.num_processes = num_processes + self._requires_value_buffers = requires_value_buffers + self._get_value = get_value + self.tasks = torch.zeros(self.buffer_steps, num_processes, 1, dtype=torch.int) + self.masks = torch.ones(self.buffer_steps + 1, num_processes, 1) + self.obs = [[[0] for _ in range(self.num_processes)]] * self.buffer_steps +- self._fill = torch.zeros(self.buffer_steps, num_processes, 1) + self.env_steps = [0] * num_processes +- self.should_update = False ++ self.ready_buffers = set() + + if requires_value_buffers: + self.returns = torch.zeros(self.buffer_steps + 1, num_processes, 1) +@@ -46,12 +45,10 @@ class RolloutStorage(object): + self.action_log_dist = torch.zeros(self.buffer_steps, num_processes, action_space.n) + + self.num_steps = num_steps +- self.step = 0 + + def to(self, device): + self.masks = self.masks.to(device) + self.tasks = self.tasks.to(device) +- self._fill = self._fill.to(device) + if self._requires_value_buffers: + self.rewards = self.rewards.to(device) + self.value_preds = self.value_preds.to(device) +@@ -59,108 +56,79 @@ class RolloutStorage(object): + else: + self.action_log_dist = self.action_log_dist.to(device) + +- def insert(self, masks, action_log_dist=None, value_preds=None, rewards=None, tasks=None): +- if self._requires_value_buffers: +- assert (value_preds is not None and rewards is not None), "Selected strategy requires value_preds and rewards" +- if len(rewards.shape) == 3: +- rewards = rewards.squeeze(2) +- self.value_preds[self.step].copy_(torch.as_tensor(value_preds)) +- self.rewards[self.step].copy_(torch.as_tensor(rewards)[:, None]) +- self.masks[self.step + 1].copy_(torch.as_tensor(masks)[:, None]) +- else: +- self.action_log_dist[self.step].copy_(action_log_dist) +- if tasks is not None: +- assert isinstance(tasks[0], int), "Provided task must be an integer" +- self.tasks[self.step].copy_(torch.as_tensor(tasks)[:, None]) +- self.step = (self.step + 1) % self.num_steps +- + def insert_at_index(self, env_index, mask=None, action_log_dist=None, obs=None, reward=None, task=None, steps=1): +- if env_index >= self.num_processes: +- warnings.warn(f"Env index {env_index} is greater than the number of processes {self.num_processes}. Using index {env_index % self.num_processes} instead.") +- env_index = env_index % self.num_processes +- + step = self.env_steps[env_index] + end_step = step + steps +- # Update buffer fill traacker, and check for common usage errors. +- try: +- if end_step > len(self._fill): +- raise IndexError +- self._fill[step:end_step, env_index] = 1 +- except IndexError as e: +- if any(self._fill[:][env_index] == 0): +- raise UsageError(f"Step {step} + {steps} = {end_step} is out of range for env index {env_index}. Your value for PLR's num_processes may be too high.") from e +- else: +- raise UsageError(f"Step {step} + {steps} = {end_step} is out of range for env index {env_index}. Your value for PLR's num_processes may be too low.") from e + + if mask is not None: + self.masks[step + 1:end_step + 1, env_index].copy_(torch.as_tensor(mask[:, None])) ++ + if obs is not None: + for s in range(step, end_step): + self.obs[s][env_index] = obs[s - step] ++ + if reward is not None: + self.rewards[step:end_step, env_index].copy_(torch.as_tensor(reward[:, None])) ++ + if action_log_dist is not None: + self.action_log_dist[step:end_step, env_index].copy_(torch.as_tensor(action_log_dist[:, None])) ++ + if task is not None: + try: +- task = int(task) ++ int(task[0]) + except TypeError: +- assert isinstance(task, int), f"Provided task must be an integer, got {task} with type {type(task)} instead." +- self.tasks[step:end_step, env_index].copy_(torch.as_tensor(task)) +- else: +- self.env_steps[env_index] += steps +- # Hack for now, we call insert_at_index twice +- while all(self._fill[self.step] == 1): +- self.step = (self.step + 1) % self.buffer_steps +- # Check if we have enough steps to compute a task sampler update +- if self.step == self.num_steps + 1: +- self.should_update = True +- +- def _get_values(self): ++ assert isinstance(task, int), f"Provided task must be an integer, got {task[0]} with type {type(task[0])} instead." ++ self.tasks[step:end_step, env_index].copy_(torch.as_tensor(np.array(task)[:, None])) ++ ++ self.env_steps[env_index] += steps ++ if env_index not in self.ready_buffers and self.env_steps[env_index] >= self.num_steps: ++ self.ready_buffers.add(env_index) ++ ++ def _get_values(self, env_index): + if self._get_value is None: + raise UsageError("Selected strategy requires value predictions. Please provide get_value function.") +- for step in range(self.num_steps): +- values = self._get_value(self.obs[step]) ++ for step in range(0, self.num_steps, self.num_processes): ++ obs = self.obs[step: step + self.num_processes][env_index] ++ values = self._get_value(obs) ++ ++ # Reshape values if necessary + if len(values.shape) == 3: + warnings.warn(f"Value function returned a 3D tensor of shape {values.shape}. Attempting to squeeze last dimension.") + values = torch.squeeze(values, -1) + if len(values.shape) == 1: + warnings.warn(f"Value function returned a 1D tensor of shape {values.shape}. Attempting to unsqueeze last dimension.") + values = torch.unsqueeze(values, -1) +- self.value_preds[step].copy_(values) + +- def after_update(self): ++ self.value_preds[step: step + self.num_processes, env_index].copy_(values) ++ ++ def after_update(self, env_index): + # After consuming the first num_steps of data, remove them and shift the remaining data in the buffer +- self.tasks[0: self.num_steps].copy_(self.tasks[self.num_steps: self.buffer_steps]) +- self.masks[0: self.num_steps].copy_(self.masks[self.num_steps: self.buffer_steps]) +- self.obs[0: self.num_steps][:] = self.obs[self.num_steps: self.buffer_steps][:] ++ self.tasks = self.tasks.roll(-self.num_steps, 0) ++ self.masks = self.masks.roll(-self.num_steps, 0) ++ self.obs[0:][env_index] = self.obs[self.num_steps: self.buffer_steps][env_index] + + if self._requires_value_buffers: +- self.returns[0: self.num_steps].copy_(self.returns[self.num_steps: self.buffer_steps]) +- self.rewards[0: self.num_steps].copy_(self.rewards[self.num_steps: self.buffer_steps]) +- self.value_preds[0: self.num_steps].copy_(self.value_preds[self.num_steps: self.buffer_steps]) ++ self.returns = self.returns.roll(-self.num_steps, 0) ++ self.rewards = self.rewards.roll(-self.num_steps, 0) ++ self.value_preds = self.value_preds.roll(-self.num_steps, 0) + else: +- self.action_log_dist[0: self.num_steps].copy_(self.action_log_dist[self.num_steps: self.buffer_steps]) ++ self.action_log_dist = self.action_log_dist.roll(-self.num_steps, 0) + +- self._fill[0: self.num_steps].copy_(self._fill[self.num_steps: self.buffer_steps]) +- self._fill[self.num_steps: self.buffer_steps].copy_(0) ++ self.env_steps[env_index] -= self.num_steps ++ self.ready_buffers.remove(env_index) + +- self.env_steps = [steps - self.num_steps for steps in self.env_steps] +- self.should_update = False +- self.step = self.step - self.num_steps +- +- def compute_returns(self, gamma, gae_lambda): ++ def compute_returns(self, gamma, gae_lambda, env_index): + assert self._requires_value_buffers, "Selected strategy does not use compute_rewards." +- self._get_values() ++ self._get_values(env_index) + gae = 0 + for step in reversed(range(self.rewards.size(0), self.num_steps)): + delta = ( +- self.rewards[step] +- + gamma * self.value_preds[step + 1] * self.masks[step + 1] +- - self.value_preds[step] ++ self.rewards[step, env_index] ++ + gamma * self.value_preds[step + 1, env_index] * self.masks[step + 1, env_index] ++ - self.value_preds[step, env_index] + ) +- gae = delta + gamma * gae_lambda * self.masks[step + 1] * gae +- self.returns[step] = gae + self.value_preds[step] ++ gae = delta + gamma * gae_lambda * self.masks[step + 1, env_index] * gae ++ self.returns[step, env_index] = gae + self.value_preds[step, env_index] + + + def null(x): +@@ -252,11 +220,15 @@ class PrioritizedLevelReplay(Curriculum): + else: + return [self._task_sampler.sample() for _ in range(k)] + +- def update_on_step(self, obs, rew, term, trunc, info, env_id: int = None) -> None: ++ def update_on_step(self, task, obs, rew, term, trunc, info, env_id: int = None) -> None: + """ + Update the curriculum with the current step results from the environment. + """ + assert env_id is not None, "env_id must be provided for PLR updates." ++ if env_id >= self._num_processes: ++ warnings.warn(f"Env index {env_id} is greater than the number of processes {self._num_processes}. Using index {env_id % self._num_processes} instead.") ++ env_id = env_id % self._num_processes ++ + # Update rollouts + self._rollouts.insert_at_index( + env_id, +@@ -266,14 +238,22 @@ class PrioritizedLevelReplay(Curriculum): + obs=np.array([obs]), + ) + ++ # Update task sampler ++ if env_id in self._rollouts.ready_buffers: ++ self._update_sampler(env_id) ++ + def update_on_step_batch( +- self, step_results: List[Tuple[Any, int, bool, bool, Dict]], env_id: int = None ++ self, step_results: List[Tuple[int, Any, int, bool, bool, Dict]], env_id: int = None + ) -> None: + """ + Update the curriculum with a batch of step results from the environment. + """ + assert env_id is not None, "env_id must be provided for PLR updates." +- obs, rews, terms, truncs, infos = step_results ++ if env_id >= self._num_processes: ++ warnings.warn(f"Env index {env_id} is greater than the number of processes {self._num_processes}. Using index {env_id % self._num_processes} instead.") ++ env_id = env_id % self._num_processes ++ ++ tasks, obs, rews, terms, truncs, infos = step_results + self._rollouts.insert_at_index( + env_id, + mask=np.logical_not(np.logical_or(terms, truncs)), +@@ -281,25 +261,19 @@ class PrioritizedLevelReplay(Curriculum): + reward=rews, + obs=obs, + steps=len(rews), ++ task=tasks, + ) + +- def update_task_progress(self, task: Any, success_prob: float, env_id: int = None) -> None: +- """ +- Update the curriculum with a task and its success probability upon +- success or failure. +- """ +- assert env_id is not None, "env_id must be provided for PLR updates." +- self._rollouts.insert_at_index( +- env_id, +- task=task, +- ) + # Update task sampler +- if self._rollouts.should_update: +- if self._task_sampler.requires_value_buffers: +- self._rollouts.compute_returns(self._gamma, self._gae_lambda) +- self._task_sampler.update_with_rollouts(self._rollouts) +- self._rollouts.after_update() +- self._task_sampler.after_update() ++ if env_id in self._rollouts.ready_buffers: ++ self._update_sampler(env_id) ++ ++ def _update_sampler(self, env_id): ++ if self._task_sampler.requires_value_buffers: ++ self._rollouts.compute_returns(self._gamma, self._gae_lambda, env_id) ++ self._task_sampler.update_with_rollouts(self._rollouts, env_id) ++ self._rollouts.after_update(env_id) ++ self._task_sampler.after_update() + + def _enumerate_tasks(self, space): + assert isinstance(space, Discrete) or isinstance(space, MultiDiscrete), f"Unsupported task space {space}: Expected Discrete or MultiDiscrete" +@@ -312,10 +286,10 @@ class PrioritizedLevelReplay(Curriculum): + """ + Log the task distribution to the provided tensorboard writer. + """ +- super().log_metrics(writer, step) ++ # super().log_metrics(writer, step) + metrics = self._task_sampler.metrics() + writer.add_scalar("curriculum/proportion_seen", metrics["proportion_seen"], step) + writer.add_scalar("curriculum/score", metrics["score"], step) +- for task in list(self.task_space.tasks)[:10]: +- writer.add_scalar(f"curriculum/task_{task - 1}_score", metrics["task_scores"][task - 1], step) +- writer.add_scalar(f"curriculum/task_{task - 1}_staleness", metrics["task_staleness"][task - 1], step) ++ # for task in list(self.task_space.tasks)[:10]: ++ # writer.add_scalar(f"curriculum/task_{task - 1}_score", metrics["task_scores"][task - 1], step) ++ # writer.add_scalar(f"curriculum/task_{task - 1}_staleness", metrics["task_staleness"][task - 1], step) +diff --git a/syllabus/curricula/plr/task_sampler.py b/syllabus/curricula/plr/task_sampler.py +index 15ad485..c1e97a1 100644 +--- a/syllabus/curricula/plr/task_sampler.py ++++ b/syllabus/curricula/plr/task_sampler.py +@@ -73,7 +73,7 @@ class TaskSampler: + 'Must provide action space to PLR if using "policy_entropy", "least_confidence", or "min_margin" strategies' + ) + +- def update_with_rollouts(self, rollouts): ++ def update_with_rollouts(self, rollouts, actor_id=None): + if self.strategy == "random": + return + +@@ -93,7 +93,7 @@ class TaskSampler: + else: + raise ValueError(f"Unsupported strategy, {self.strategy}") + +- self._update_with_rollouts(rollouts, score_function) ++ self._update_with_rollouts(rollouts, score_function, actor_index=actor_id) + + def update_task_score(self, actor_index, task_idx, score, num_steps): + score = self._partial_update_task_score(actor_index, task_idx, score, num_steps, done=True) +@@ -165,14 +165,15 @@ class TaskSampler: + def requires_value_buffers(self): + return self.strategy in ["gae", "value_l1", "one_step_td_error"] + +- def _update_with_rollouts(self, rollouts, score_function): ++ def _update_with_rollouts(self, rollouts, score_function, actor_index=None): + tasks = rollouts.tasks + if not self.requires_value_buffers: + policy_logits = rollouts.action_log_dist + done = ~(rollouts.masks > 0) + total_steps, num_actors = rollouts.tasks.shape[:2] + +- for actor_index in range(num_actors): ++ actors = [actor_index] if actor_index is not None else range(num_actors) ++ for actor_index in actors: + done_steps = done[:, actor_index].nonzero()[:total_steps, 0] + start_t = 0 + +diff --git a/syllabus/curricula/sequential.py b/syllabus/curricula/sequential.py +index baa1263..ec3b8b0 100644 +--- a/syllabus/curricula/sequential.py ++++ b/syllabus/curricula/sequential.py +@@ -177,9 +177,9 @@ class SequentialCurriculum(Curriculum): + if self.current_curriculum.requires_episode_updates: + self.current_curriculum.update_on_episode(episode_return, episode_len, episode_task, env_id) + +- def update_on_step(self, obs, rew, term, trunc, info, env_id=None): ++ def update_on_step(self, task, obs, rew, term, trunc, info, env_id=None): + if self.current_curriculum.requires_step_updates: +- self.current_curriculum.update_on_step(obs, rew, term, trunc, info, env_id) ++ self.current_curriculum.update_on_step(task, obs, rew, term, trunc, info, env_id) + + def update_on_step_batch(self, step_results, env_id=None): + if self.current_curriculum.requires_step_updates: +diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py b/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py +index a6d469e..b848d69 100644 +--- a/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py ++++ b/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py +@@ -14,6 +14,7 @@ import gym as openai_gym + import gymnasium as gym + import numpy as np + import procgen # noqa: F401 ++from procgen import ProcgenEnv + import torch + import torch.nn as nn + import torch.optim as optim +@@ -21,10 +22,10 @@ from shimmy.openai_gym_compatibility import GymV21CompatibilityV0 + from torch.utils.tensorboard import SummaryWriter + + from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum +-from syllabus.curricula import DomainRandomization, LearningProgressCurriculum, CentralizedPrioritizedLevelReplay ++from syllabus.curricula import CentralizedPrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum + from syllabus.examples.models import ProcgenAgent + from syllabus.examples.task_wrappers import ProcgenTaskWrapper +-from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize ++from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs + + + def parse_args(): +@@ -46,6 +47,8 @@ def parse_args(): + help="the entity (team) of wandb's project") + parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="weather to capture videos of the agent performances (check out `videos` folder)") ++ parser.add_argument("--logging-dir", type=str, default=".", ++ help="the base directory for logging and wandb storage.") + + # Algorithm specific arguments + parser.add_argument("--env-id", type=str, default="starpilot", +@@ -124,15 +127,15 @@ PROCGEN_RETURN_BOUNDS = { + } + + +-def make_env(env_id, seed, curriculum_components=None, start_level=0, num_levels=1): ++def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): + def thunk(): + env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) + env = GymV21CompatibilityV0(env=env) +- env = ProcgenTaskWrapper(env, env_id, seed=seed) +- if curriculum_components is not None: ++ if curriculum is not None: ++ env = ProcgenTaskWrapper(env, env_id, seed=seed) + env = MultiProcessingSyncWrapper( + env, +- curriculum_components, ++ curriculum.get_components(), + update_on_step=False, + task_space=env.task_space, + ) +@@ -147,36 +150,38 @@ def wrap_vecenv(vecenv): + return vecenv + + +-def level_replay_evaluate( ++def full_level_replay_evaluate( + env_name, + policy, + num_episodes, + device, +- num_levels=0 ++ num_levels=1 # Not used + ): + policy.eval() +- eval_envs = gym.vector.SyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, task_queue, update_queue, num_levels=num_levels) +- for i in range(1) +- ] ++ ++ eval_envs = ProcgenEnv( ++ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=1, start_level=0, distribution_mode="easy", paint_vel_info=False + ) ++ eval_envs = VecExtractDictObs(eval_envs, "rgb") + eval_envs = wrap_vecenv(eval_envs) + +- eval_episode_rewards = [] ++ # Seed environments ++ seeds = [int.from_bytes(os.urandom(3), byteorder="little") for _ in range(num_episodes)] ++ for i, seed in enumerate(seeds): ++ eval_envs.seed(seed, i) ++ + eval_obs, _ = eval_envs.reset() ++ eval_episode_rewards = [-1] * num_episodes + +- while len(eval_episode_rewards) < num_episodes: ++ while -1 in eval_episode_rewards: + with torch.no_grad(): + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + +- eval_obs, _, truncs, terms, infos = eval_envs.step(np.array([eval_action.cpu().numpy()])) +- +- for info in infos: +- if 'episode' in info.keys(): +- eval_episode_rewards.append(info['episode']['r']) ++ eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) ++ for i, info in enumerate(infos): ++ if 'episode' in info.keys() and eval_episode_rewards[i] == -1: ++ eval_episode_rewards[i] = info['episode']['r'] + +- eval_envs.close() + mean_returns = np.mean(eval_episode_rewards) + stddev_returns = np.std(eval_episode_rewards) + env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] +@@ -185,8 +190,7 @@ def level_replay_evaluate( + return mean_returns, stddev_returns, normalized_mean_returns + + +-def fast_level_replay_evaluate( +- eval_envs, ++def level_replay_evaluate( + env_name, + policy, + num_episodes, +@@ -194,9 +198,13 @@ def fast_level_replay_evaluate( + num_levels=0 + ): + policy.eval() +- possible_seeds = np.arange(0, num_levels + 1) +- eval_obs, _ = eval_envs.reset(seed=list(np.random.choice(possible_seeds, size=num_episodes))) + ++ eval_envs = ProcgenEnv( ++ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False ++ ) ++ eval_envs = VecExtractDictObs(eval_envs, "rgb") ++ eval_envs = wrap_vecenv(eval_envs) ++ eval_obs, _ = eval_envs.reset() + eval_episode_rewards = [-1] * num_episodes + + while -1 in eval_episode_rewards: +@@ -231,10 +239,11 @@ if __name__ == "__main__": + name=run_name, + monitor_gym=True, + save_code=True, +- # dir="/fs/nexus-scratch/rsulli/" ++ dir=args.logging_dir + ) +- wandb.run.log_code("./syllabus/examples") +- writer = SummaryWriter(f"./runs/{run_name}") ++ # wandb.run.log_code("./syllabus/examples") ++ ++ writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) + writer.add_text( + "hyperparameters", + "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), +@@ -250,7 +259,7 @@ if __name__ == "__main__": + print("Device:", device) + + # Curriculum setup +- task_queue = update_queue = None ++ curriculum = None + if args.curriculum: + sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") + sample_env = GymV21CompatibilityV0(env=sample_env) +@@ -273,6 +282,16 @@ if __name__ == "__main__": + elif args.curriculum_method == "lp": + print("Using learning progress.") + curriculum = LearningProgressCurriculum(sample_env.task_space) ++ elif args.curriculum_method == "sq": ++ print("Using sequential curriculum.") ++ curricula = [] ++ stopping = [] ++ for i in range(199): ++ curricula.append(i + 1) ++ stopping.append("steps>=50000") ++ curricula.append(list(range(i + 1))) ++ stopping.append("steps>=50000") ++ curriculum = SequentialCurriculum(curricula, stopping[:-1], sample_env.task_space) + else: + raise ValueError(f"Unknown curriculum method {args.curriculum_method}") + curriculum = make_multiprocessing_curriculum(curriculum) +@@ -285,7 +304,7 @@ if __name__ == "__main__": + make_env( + args.env_id, + args.seed + i, +- curriculum_components=curriculum.get_components() if args.curriculum else None, ++ curriculum=curriculum if args.curriculum else None, + num_levels=1 if args.curriculum else 0 + ) + for i in range(args.num_envs) +@@ -293,22 +312,6 @@ if __name__ == "__main__": + ) + envs = wrap_vecenv(envs) + +- test_eval_envs = gym.vector.AsyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, num_levels=0) +- for i in range(args.num_eval_episodes) +- ] +- ) +- test_eval_envs = wrap_vecenv(test_eval_envs) +- +- train_eval_envs = gym.vector.AsyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, num_levels=200) +- for i in range(args.num_eval_episodes) +- ] +- ) +- train_eval_envs = wrap_vecenv(train_eval_envs) +- + assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" + print("Creating agent") + agent = ProcgenAgent( +@@ -369,6 +372,8 @@ if __name__ == "__main__": + print(f"global_step={global_step}, episodic_return={item['episode']['r']}") + writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) ++ if curriculum is not None: ++ curriculum.log_metrics(writer, global_step) + break + + # Syllabus curriculum update +@@ -388,8 +393,6 @@ if __name__ == "__main__": + }, + } + curriculum.update(update) +- #if args.curriculum: +- # curriculum.log_metrics(writer, global_step) + + # bootstrap value if not done + with torch.no_grad(): +@@ -487,8 +490,18 @@ if __name__ == "__main__": + explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y + + # Evaluate agent +- mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = fast_level_replay_evaluate(test_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=0) +- mean_train_returns, stddev_train_returns, normalized_mean_train_returns = fast_level_replay_evaluate(train_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=200) ++ mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 ++ ) ++ full_mean_eval_returns, full_stddev_eval_returns, full_normalized_mean_eval_returns = full_level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 ++ ) ++ mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 ++ ) ++ full_mean_train_returns, full_stddev_train_returns, full_normalized_mean_train_returns = full_level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 ++ ) + + # TRY NOT TO MODIFY: record rewards for plotting purposes + writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) +@@ -502,12 +515,21 @@ if __name__ == "__main__": + writer.add_scalar("losses/explained_variance", explained_var, global_step) + print("SPS:", int(global_step / (time.time() - start_time))) + writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) ++ + writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) +- writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/stddev_eval_return", stddev_eval_returns, global_step) ++ writer.add_scalar("test_eval/full_mean_episode_return", full_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/full_normalized_mean_eval_return", full_normalized_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/full_stddev_eval_return", full_stddev_eval_returns, global_step) ++ + writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) +- writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) ++ writer.add_scalar("train_eval/stddev_train_return", stddev_train_returns, global_step) ++ writer.add_scalar("train_eval/full_mean_episode_return", full_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/full_normalized_mean_train_return", full_normalized_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/full_stddev_train_return", full_stddev_train_returns, global_step) ++ + writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) + + envs.close() +diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py +index e13c22e..b807304 100644 +--- a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py ++++ b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py +@@ -14,6 +14,7 @@ import gym as openai_gym + import gymnasium as gym + import numpy as np + import procgen # noqa: F401 ++from procgen import ProcgenEnv + import torch + import torch.nn as nn + import torch.optim as optim +@@ -24,7 +25,7 @@ from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curri + from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum + from syllabus.examples.models import ProcgenAgent + from syllabus.examples.task_wrappers import ProcgenTaskWrapper +-from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize ++from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs + + + def parse_args(): +@@ -126,18 +127,17 @@ PROCGEN_RETURN_BOUNDS = { + } + + +-def make_env(env_id, seed, curriculum_components=None, start_level=0, num_levels=1): ++def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): + def thunk(): + env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) + env = GymV21CompatibilityV0(env=env) +- env = ProcgenTaskWrapper(env, env_id, seed=seed) +- if curriculum_components is not None: ++ if curriculum is not None: ++ env = ProcgenTaskWrapper(env, env_id, seed=seed) + env = MultiProcessingSyncWrapper( + env, +- curriculum_components, ++ curriculum.get_components(), + update_on_step=False, + task_space=env.task_space, +- buffer_size=4, + ) + return env + return thunk +@@ -150,7 +150,7 @@ def wrap_vecenv(vecenv): + return vecenv + + +-def level_replay_evaluate( ++def slow_level_replay_evaluate( + env_name, + policy, + num_episodes, +@@ -158,28 +158,24 @@ def level_replay_evaluate( + num_levels=0 + ): + policy.eval() +- eval_envs = gym.vector.SyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, task_queue, update_queue, num_levels=num_levels) +- for i in range(1) +- ] ++ ++ eval_envs = ProcgenEnv( ++ num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False + ) ++ eval_envs = VecExtractDictObs(eval_envs, "rgb") + eval_envs = wrap_vecenv(eval_envs) +- +- eval_episode_rewards = [] + eval_obs, _ = eval_envs.reset() ++ eval_episode_rewards = [] + + while len(eval_episode_rewards) < num_episodes: + with torch.no_grad(): + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + +- eval_obs, _, truncs, terms, infos = eval_envs.step(np.array([eval_action.cpu().numpy()])) +- +- for info in infos: ++ eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) ++ for i, info in enumerate(infos): + if 'episode' in info.keys(): + eval_episode_rewards.append(info['episode']['r']) + +- eval_envs.close() + mean_returns = np.mean(eval_episode_rewards) + stddev_returns = np.std(eval_episode_rewards) + env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] +@@ -188,8 +184,7 @@ def level_replay_evaluate( + return mean_returns, stddev_returns, normalized_mean_returns + + +-def fast_level_replay_evaluate( +- eval_envs, ++def level_replay_evaluate( + env_name, + policy, + num_episodes, +@@ -198,15 +193,12 @@ def fast_level_replay_evaluate( + ): + policy.eval() + +- # Choose evaluation seeds +- if num_levels == 0: +- seeds = np.random.randint(0, 2 ** 16 - 1, size=num_episodes) +- else: +- seeds = np.random.choice(np.arange(0, num_levels), size=num_episodes) +- +- seed_envs = [(int(seed), env) for seed, env in zip(seeds, range(num_episodes))] +- eval_obs, _ = eval_envs.reset(seed=seed_envs) +- ++ eval_envs = ProcgenEnv( ++ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False ++ ) ++ eval_envs = VecExtractDictObs(eval_envs, "rgb") ++ eval_envs = wrap_vecenv(eval_envs) ++ eval_obs, _ = eval_envs.reset() + eval_episode_rewards = [-1] * num_episodes + + while -1 in eval_episode_rewards: +@@ -251,9 +243,9 @@ if __name__ == "__main__": + save_code=True, + dir=args.logging_dir + ) +- wandb.run.log_code(os.path.join(args.logging_dir, "/syllabus/examples")) ++ # wandb.run.log_code("./syllabus/examples") + +- writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) ++ writer = SummaryWriter(os.path.join(args.logging_dir, f"./runs/{run_name}")) + writer.add_text( + "hyperparameters", + "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), +@@ -273,7 +265,9 @@ if __name__ == "__main__": + if args.curriculum: + sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") + sample_env = GymV21CompatibilityV0(env=sample_env) +- sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) ++ # code to edit ++ # sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) ++ sample_env = MinigridTaskWrapper(sample_env, args.env_id, seed=args.seed) + + # Intialize Curriculum Method + if args.curriculum_method == "plr": +@@ -316,7 +310,7 @@ if __name__ == "__main__": + make_env( + args.env_id, + args.seed + i, +- curriculum_components=curriculum.get_components() if args.curriculum else None, ++ curriculum=curriculum if args.curriculum else None, + num_levels=1 if args.curriculum else 0 + ) + for i in range(args.num_envs) +@@ -324,22 +318,6 @@ if __name__ == "__main__": + ) + envs = wrap_vecenv(envs) + +- test_eval_envs = gym.vector.AsyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, num_levels=0) +- for i in range(args.num_eval_episodes) +- ] +- ) +- test_eval_envs = wrap_vecenv(test_eval_envs) +- +- train_eval_envs = gym.vector.AsyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, num_levels=200) +- for i in range(args.num_eval_episodes) +- ] +- ) +- train_eval_envs = wrap_vecenv(train_eval_envs) +- + assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" + print("Creating agent") + agent = ProcgenAgent( +@@ -500,8 +478,18 @@ if __name__ == "__main__": + explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y + + # Evaluate agent +- mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = fast_level_replay_evaluate(test_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=0) +- mean_train_returns, stddev_train_returns, normalized_mean_train_returns = fast_level_replay_evaluate(train_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=200) ++ mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 ++ ) ++ slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 ++ ) ++ mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 ++ ) ++ slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 ++ ) + + # TRY NOT TO MODIFY: record rewards for plotting purposes + writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) +@@ -515,12 +503,21 @@ if __name__ == "__main__": + writer.add_scalar("losses/explained_variance", explained_var, global_step) + print("SPS:", int(global_step / (time.time() - start_time))) + writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) ++ + writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) + writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) ++ + writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) + writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) ++ + writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) + + envs.close() +diff --git a/syllabus/examples/utils/vecenv.py b/syllabus/examples/utils/vecenv.py +index 6e5a0a9..af3b187 100644 +--- a/syllabus/examples/utils/vecenv.py ++++ b/syllabus/examples/utils/vecenv.py +@@ -1,7 +1,6 @@ + import time + from collections import deque + +-import gym + import numpy as np + + +@@ -154,12 +153,20 @@ class VecEnvObservationWrapper(VecEnvWrapper): + pass + + def reset(self): +- obs, infos = self.venv.reset() ++ outputs = self.venv.reset() ++ if len(outputs) == 2: ++ obs, infos = outputs ++ else: ++ obs, infos = outputs, {} + return self.process(obs), infos + + def step_wait(self): +- print(self.venv) +- obs, rews, terms, truncs, infos = self.venv.step_wait() ++ env_outputs = self.venv.step_wait() ++ if len(env_outputs) == 4: ++ obs, rews, terms, infos = env_outputs ++ truncs = np.zeros_like(terms) ++ else: ++ obs, rews, terms, truncs, infos = env_outputs + return self.process(obs), rews, terms, truncs, infos + + +@@ -209,7 +216,10 @@ class VecNormalize(VecEnvWrapper): + + def reset(self, seed=None): + self.ret = np.zeros(self.num_envs) +- obs, infos = self.venv.reset(seed=seed) ++ if seed is not None: ++ obs, infos = self.venv.reset(seed=seed) ++ else: ++ obs, infos = self.venv.reset() + return self._obfilt(obs), infos + + +@@ -228,7 +238,10 @@ class VecMonitor(VecEnvWrapper): + self.eplen_buf = deque([], maxlen=keep_buf) + + def reset(self, seed=None): +- obs, infos = self.venv.reset(seed=seed) ++ if seed is not None: ++ obs, infos = self.venv.reset(seed=seed) ++ else: ++ obs, infos = self.venv.reset() + self.eprets = np.zeros(self.num_envs, 'f') + self.eplens = np.zeros(self.num_envs, 'i') + return obs, infos +@@ -239,7 +252,8 @@ class VecMonitor(VecEnvWrapper): + self.eprets += rews + self.eplens += 1 + # Convert dict of lists to list of dicts +- infos = [dict(zip(infos, t)) for t in zip(*infos.values())] ++ if isinstance(infos, dict): ++ infos = [dict(zip(infos, t)) for t in zip(*infos.values())] + newinfos = list(infos[:]) + for i in range(len(dones)): + if dones[i]: +diff --git a/syllabus/task_space/task_space.py b/syllabus/task_space/task_space.py +index 316e2f2..1ef674b 100644 +--- a/syllabus/task_space/task_space.py ++++ b/syllabus/task_space/task_space.py +@@ -7,20 +7,53 @@ from gymnasium.spaces import Box, Dict, Discrete, MultiBinary, MultiDiscrete, Sp + + class TaskSpace(): + def __init__(self, gym_space: Union[Space, int], tasks=None): +- if isinstance(gym_space, int): +- # Syntactic sugar for discrete space +- gym_space = Discrete(gym_space) ++ ++ if not isinstance(gym_space, Space): ++ gym_space = self._create_gym_space(gym_space) + + self.gym_space = gym_space + +- # Autogenerate task names for discrete spaces +- if isinstance(gym_space, Discrete): +- if tasks is None: +- tasks = range(gym_space.n) ++ # Autogenerate task names ++ if tasks is None: ++ tasks = self._generate_task_names(gym_space) + + self._tasks = set(tasks) if tasks is not None else None + self._encoder, self._decoder = self._make_task_encoder(gym_space, tasks) + ++ def _create_gym_space(self, gym_space): ++ if isinstance(gym_space, int): ++ # Syntactic sugar for discrete space ++ gym_space = Discrete(gym_space) ++ elif isinstance(gym_space, tuple): ++ # Syntactic sugar for discrete space ++ gym_space = MultiDiscrete(gym_space) ++ elif isinstance(gym_space, list): ++ # Syntactic sugar for tuple space ++ spaces = [] ++ for i, value in enumerate(gym_space): ++ spaces[i] = self._create_gym_space(value) ++ gym_space = Tuple(spaces) ++ elif isinstance(gym_space, dict): ++ # Syntactic sugar for dict space ++ spaces = {} ++ for key, value in gym_space.items(): ++ spaces[key] = self._create_gym_space(value) ++ gym_space = Dict(spaces) ++ return gym_space ++ ++ def _generate_task_names(self, gym_space): ++ if isinstance(gym_space, Discrete): ++ tasks = tuple(range(gym_space.n)) ++ elif isinstance(gym_space, MultiDiscrete): ++ tasks = [tuple(range(dim)) for dim in gym_space.nvec] ++ elif isinstance(gym_space, Tuple): ++ tasks = [self._generate_task_names(value) for value in gym_space.spaces] ++ elif isinstance(gym_space, Dict): ++ tasks = {key: tuple(self._generate_task_names(value)) for key, value in gym_space.spaces.items()} ++ else: ++ tasks = None ++ return tasks ++ + def _make_task_encoder(self, space, tasks): + if isinstance(space, Discrete): + assert space.n == len(tasks), f"Number of tasks ({space.n}) must match number of discrete options ({len(tasks)})" +@@ -28,14 +61,46 @@ class TaskSpace(): + self._decode_map = {i: task for i, task in enumerate(tasks)} + encoder = lambda task: self._encode_map[task] if task in self._encode_map else None + decoder = lambda task: self._decode_map[task] if task in self._decode_map else None ++ ++ elif isinstance(space, Box): ++ encoder = lambda task: task if space.contains(np.asarray(task, dtype=space.dtype)) else None ++ decoder = lambda task: task if space.contains(np.asarray(task, dtype=space.dtype)) else None + elif isinstance(space, Tuple): +- for i, task in enumerate(tasks): +- assert self.count_tasks(space.spaces[i]) == len(task), "Each task must have number of components equal to Tuple space length. Got {len(task)} components and space length {self.count_tasks(space.spaces[i])}." ++ ++ assert len(space.spaces) == len(tasks), f"Number of task ({len(space.spaces)})must match options in Tuple ({len(tasks)})" + results = [list(self._make_task_encoder(s, t)) for (s, t) in zip(space.spaces, tasks)] + encoders = [r[0] for r in results] + decoders = [r[1] for r in results] + encoder = lambda task: [e(t) for e, t in zip(encoders, task)] + decoder = lambda task: [d(t) for d, t in zip(decoders, task)] ++ ++ elif isinstance(space, MultiDiscrete): ++ assert len(space.nvec) == len(tasks), f"Number of steps in a tasks ({len(space.nvec)}) must match number of discrete options ({len(tasks)})" ++ ++ combinations = [p for p in itertools.product(*tasks)] ++ encode_map = {task: i for i, task in enumerate(combinations)} ++ decode_map = {i: task for i, task in enumerate(combinations)} ++ ++ encoder = lambda task: encode_map[task] if task in encode_map else None ++ decoder = lambda task: decode_map[task] if task in decode_map else None ++ ++ elif isinstance(space, Dict): ++ ++ def helper(task, spaces, tasks, action="encode"): ++ # Iteratively encodes or decodes each space in the dictionary ++ output = {} ++ if (isinstance(spaces, dict) or isinstance(spaces, Dict)): ++ for key, value in spaces.items(): ++ if (isinstance(value, dict) or isinstance(value, Dict)): ++ temp = helper(task[key], value, tasks[key], action) ++ output.update({key: temp}) ++ else: ++ encoder, decoder = self._make_task_encoder(value, tasks[key]) ++ output[key] = encoder(task[key]) if action == "encode" else decoder(task[key]) ++ return output ++ ++ encoder = lambda task: helper(task, space.spaces, tasks, "encode") ++ decoder = lambda task: helper(task, space.spaces, tasks, "decode") + else: + encoder = lambda task: task + decoder = lambda task: task +@@ -152,6 +217,7 @@ class TaskSpace(): + return Discrete(self.gym_space.n + amount) + + def sample(self): ++ assert isinstance(self.gym_space, Discrete) or isinstance(self.gym_space, Box) or isinstance(self.gym_space, Dict) or isinstance(self.gym_space, Tuple) + return self.decode(self.gym_space.sample()) + + def list_tasks(self): +diff --git a/syllabus/task_space/test_task_space.py b/syllabus/task_space/test_task_space.py +index 0ec6b4e..109d0a7 100644 +--- a/syllabus/task_space/test_task_space.py ++++ b/syllabus/task_space/test_task_space.py +@@ -2,33 +2,148 @@ import gymnasium as gym + from syllabus.task_space import TaskSpace + + if __name__ == "__main__": ++ # Discrete Tests + task_space = TaskSpace(gym.spaces.Discrete(3), ["a", "b", "c"]) ++ + assert task_space.encode("a") == 0, f"Expected 0, got {task_space.encode('a')}" + assert task_space.encode("b") == 1, f"Expected 1, got {task_space.encode('b')}" + assert task_space.encode("c") == 2, f"Expected 2, got {task_space.encode('c')}" +- assert task_space.encode("d") == None, f"Expected None, got {task_space.encode('d')}" ++ assert task_space.encode("d") is None, f"Expected None, got {task_space.encode('d')}" + + assert task_space.decode(0) == "a", f"Expected a, got {task_space.decode(0)}" + assert task_space.decode(1) == "b", f"Expected b, got {task_space.decode(1)}" + assert task_space.decode(2) == "c", f"Expected c, got {task_space.decode(2)}" +- assert task_space.decode(3) == None, f"Expected None, got {task_space.decode(3)}" ++ assert task_space.decode(3) is None, f"Expected None, got {task_space.decode(3)}" + print("Discrete tests passed!") + ++ # MultiDiscrete Tests ++ task_space = TaskSpace(gym.spaces.MultiDiscrete([3, 2]), [("a", "b", "c"), (1, 0)]) ++ ++ assert task_space.encode(('a', 1)) == 0, f"Expected 0, got {task_space.encode(('a', 1))}" ++ assert task_space.encode(('b', 0)) == 3, f"Expected 3, got {task_space.encode(('b', 0))}" ++ assert task_space.encode(('c', 1)) == 4, f"Expected 4, got {task_space.encode(('c', 1))}" ++ ++ assert task_space.decode(3) == ('b', 0), f"Expected ('b', 0), got {task_space.decode(3)}" ++ assert task_space.decode(5) == ('c', 0), f"Expected ('c', 0), got {task_space.decode(5)}" ++ print("MultiDiscrete tests passed!") ++ ++ # Box Tests + task_space = TaskSpace(gym.spaces.Box(low=0, high=1, shape=(2,)), [(0, 0), (0, 1), (1, 0), (1, 1)]) ++ + assert task_space.encode([0.0, 0.0]) == [0.0, 0.0], f"Expected [0.0, 0.0], got {task_space.encode([0.0, 0.0])}" + assert task_space.encode([0.0, 0.1]) == [0.0, 0.1], f"Expected [0.0, 0.1], got {task_space.encode([0.0, 0.1])}" + assert task_space.encode([0.1, 0.1]) == [0.1, 0.1], f"Expected [0.1, 0.1], got {task_space.encode([0.1, 0.1])}" + assert task_space.encode([1.0, 0.1]) == [1.0, 0.1], f"Expected [1.0, 0.1], got {task_space.encode([1.0, 0.1])}" + assert task_space.encode([1.0, 1.0]) == [1.0, 1.0], f"Expected [1.0, 1.0], got {task_space.encode([1.0, 1.0])}" +- assert task_space.encode([1.2, 1.0]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" +- assert task_space.encode([1.0, 1.2]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" +- assert task_space.encode([-0.1, 1.0]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" ++ assert task_space.encode([1.2, 1.0]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" ++ assert task_space.encode([1.0, 1.2]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" ++ assert task_space.encode([-0.1, 1.0]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" + + assert task_space.decode([1.0, 1.0]) == [1.0, 1.0], f"Expected [1.0, 1.0], got {task_space.decode([1.0, 1.0])}" + assert task_space.decode([0.1, 0.1]) == [0.1, 0.1], f"Expected [0.1, 0.1], got {task_space.decode([0.1, 0.1])}" +- assert task_space.decode([-0.1, 1.0]) == None, f"Expected None, got {task_space.decode([1.2, 1.0])}" ++ assert task_space.decode([-0.1, 1.0]) is None, f"Expected None, got {task_space.decode([1.2, 1.0])}" + print("Box tests passed!") + ++ # Tuple Tests ++ task_spaces = (gym.spaces.MultiDiscrete([3, 2]), gym.spaces.Discrete(3)) ++ task_names = ((("a", "b", "c"), (1, 0)), ("X", "Y", "Z")) ++ task_space = TaskSpace(gym.spaces.Tuple(task_spaces), task_names) ++ ++ assert task_space.encode((('a', 0), 'Y')) == [1, 1], f"Expected 0, got {task_space.encode((('a', 1),'Y'))}" ++ assert task_space.decode([0, 1]) == [('a', 1), 'Y'], f"Expected 0, got {task_space.decode([0, 1])}" ++ print("Tuple tests passed!") ++ ++ # Dictionary Tests ++ task_spaces = gym.spaces.Dict({ ++ "ext_controller": gym.spaces.MultiDiscrete([5, 2, 2]), ++ "inner_state": gym.spaces.Dict( ++ { ++ "charge": gym.spaces.Discrete(10), ++ "system_checks": gym.spaces.Tuple((gym.spaces.MultiDiscrete([3, 2]), gym.spaces.Discrete(3))), ++ "job_status": gym.spaces.Dict( ++ { ++ "task": gym.spaces.Discrete(5), ++ "progress": gym.spaces.Box(low=0, high=1, shape=(2,)), ++ } ++ ), ++ } ++ ), ++ }) ++ task_names = { ++ "ext_controller": [("a", "b", "c", "d", "e"), (1, 0), ("X", "Y")], ++ "inner_state": { ++ "charge": [0, 1, 13, 3, 94, 35, 6, 37, 8, 9], ++ "system_checks": ((("a", "b", "c"), (1, 0)), ("X", "Y", "Z")), ++ "job_status": { ++ "task": ["A", "B", "C", "D", "E"], ++ "progress": [(0, 0), (0, 1), (1, 0), (1, 1)], ++ } ++ } ++ } ++ task_space = TaskSpace(task_spaces, task_names) ++ ++ test_val = { ++ "ext_controller": ('b', 1, 'X'), ++ 'inner_state': { ++ 'charge': 1, ++ 'system_checks': [('a', 0), 'Y'], ++ 'job_status': {'task': 'C', 'progress': [0.0, 0.0]} ++ } ++ } ++ decode_val = { ++ "ext_controller": 4, ++ "inner_state": { ++ "charge": 1, ++ "system_checks": [1, 1], ++ "job_status": {"progress": [0.0, 0.0], "task": 2}, ++ }, ++ } ++ ++ assert task_space.encode(test_val) == decode_val, f"Expected {decode_val}, \n but got {task_space.encode(test_val)}" ++ assert task_space.decode(decode_val) == test_val, f"Expected {test_val}, \n but got {task_space.decode(decode_val)}" ++ ++ test_val_2 = { ++ "ext_controller": ("e", 1, "Y"), ++ "inner_state": { ++ "charge": 37, ++ "system_checks": [("b", 0), "Z"], ++ "job_status": {"progress": [0.0, 0.1], "task": "D"}, ++ }, ++ } ++ decode_val_2 = { ++ "ext_controller": 17, ++ "inner_state": { ++ "charge": 7, ++ "system_checks": [3, 2], ++ "job_status": {"progress": [0.0, 0.1], "task": 3}, ++ }, ++ } ++ ++ assert task_space.encode(test_val_2) == decode_val_2, f"Expected {decode_val_2}, \n but got {task_space.encode(test_val_2)}" ++ assert task_space.decode(decode_val_2) == test_val_2, f"Expected {test_val_2}, \n but got {task_space.decode(decode_val_2)}" ++ ++ test_val_3 = { ++ "ext_controller": ("e", 1, "X"), ++ "inner_state": { ++ "charge": 8, ++ "system_checks": [("c", 0), "X"], ++ "job_status": {"progress": [0.5, 0.1], "task": "E"}, ++ }, ++ } ++ decode_val_3 = { ++ "ext_controller": 16, ++ "inner_state": { ++ "charge": 8, ++ "system_checks": [5, 0], ++ "job_status": {"progress": [0.5, 0.1], "task": 4}, ++ }, ++ } ++ ++ assert task_space.encode(test_val_3) == decode_val_3, f"Expected {decode_val_3}, \n but got {task_space.encode(test_val_3)}" ++ assert task_space.decode(decode_val_3) == test_val_3, f"Expected {test_val_3}, \n but got {task_space.decode(decode_val_3)}" ++ ++ print("Dictionary tests passed!") ++ + # Test syntactic sugar + task_space = TaskSpace(3) + assert task_space.encode(0) == 0, f"Expected 0, got {task_space.encode(0)}" +@@ -36,4 +151,32 @@ if __name__ == "__main__": + assert task_space.encode(2) == 2, f"Expected 2, got {task_space.encode(2)}" + assert task_space.encode(3) is None, f"Expected None, got {task_space.encode(3)}" + ++ task_space = TaskSpace((2, 4)) ++ assert task_space.encode((0, 0)) == 0, f"Expected 0, got {task_space.encode((0, 0))}" ++ assert task_space.encode((0, 1)) == 1, f"Expected 1, got {task_space.encode((0, 1))}" ++ assert task_space.encode((1, 0)) == 4, f"Expected 2, got {task_space.encode((1, 0))}" ++ assert task_space.encode((3, 3)) is None, f"Expected None, got {task_space.encode((3, 3))}" ++ ++ task_space = TaskSpace((2, 4)) ++ assert task_space.encode((0, 0)) == 0, f"Expected 0, got {task_space.encode((0, 0))}" ++ assert task_space.encode((0, 1)) == 1, f"Expected 1, got {task_space.encode((0, 1))}" ++ assert task_space.encode((1, 0)) == 4, f"Expected 2, got {task_space.encode((1, 0))}" ++ assert task_space.encode((3, 3)) is None, f"Expected None, got {task_space.encode((3, 3))}" ++ ++ task_space = TaskSpace({"map": 5, "level": (4, 10), "difficulty": 3}) ++ ++ encoding = task_space.encode({"map": 0, "level": (0, 0), "difficulty": 0}) ++ expected = {"map": 0, "level": 0, "difficulty": 0} ++ ++ encoding = task_space.encode({"map": 4, "level": (3, 9), "difficulty": 2}) ++ expected = {"map": 4, "level": 39, "difficulty": 2} ++ assert encoding == expected, f"Expected {expected}, got {encoding}" ++ ++ encoding = task_space.encode({"map": 2, "level": (2, 0), "difficulty": 1}) ++ expected = {"map": 2, "level": 20, "difficulty": 1} ++ assert encoding == expected, f"Expected {expected}, got {encoding}" ++ ++ encoding = task_space.encode({"map": 5, "level": (2, 11), "difficulty": -1}) ++ expected = {"map": None, "level": None, "difficulty": None} ++ assert encoding == expected, f"Expected {expected}, got {encoding}" + print("All tests passed!") +diff --git a/syllabus/tests/utils.py b/syllabus/tests/utils.py +index 314a29c..98bac82 100644 +--- a/syllabus/tests/utils.py ++++ b/syllabus/tests/utils.py +@@ -57,7 +57,7 @@ def run_episode(env, new_task=None, curriculum=None, env_id=0): + action = env.action_space.sample() + obs, rew, term, trunc, info = env.step(action) + if curriculum and curriculum.requires_step_updates: +- curriculum.update_on_step(obs, rew, term, trunc, info, env_id=env_id) ++ curriculum.update_on_step(env.task_space.encode(env.task), obs, rew, term, trunc, info, env_id=env_id) + curriculum.update_task_progress(env.task_space.encode(env.task), info["task_completion"], env_id=env_id) + ep_rew += rew + ep_len += 1 +@@ -87,7 +87,7 @@ def run_set_length(env, curriculum=None, episodes=None, steps=None, env_id=0, en + action = env.action_space.sample() + obs, rew, term, trunc, info = env.step(action) + if curriculum and curriculum.requires_step_updates: +- curriculum.update_on_step(obs, rew, term, trunc, info, env_id=env_id) ++ curriculum.update_on_step(env.task_space.encode(env.task), obs, rew, term, trunc, info, env_id=env_id) + curriculum.update_task_progress(env.task_space.encode(env.task), info["task_completion"], env_id=env_id) + ep_rew += rew + ep_len += 1 +diff --git a/tests/multiprocessing_smoke_tests.py b/tests/multiprocessing_smoke_tests.py +index 9db9f47..b788179 100644 +--- a/tests/multiprocessing_smoke_tests.py ++++ b/tests/multiprocessing_smoke_tests.py +@@ -21,23 +21,23 @@ nethack_env = create_nethack_env() + cartpole_env = create_cartpole_env() + + curricula = [ +- (NoopCurriculum, create_nethack_env, (NetHackScore, nethack_env.task_space), {}), +- (DomainRandomization, create_nethack_env, (nethack_env.task_space,), {}), +- # (LearningProgressCurriculum, create_nethack_env, (nethack_env.task_space,), {}), +- (CentralizedPrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space,), {"device": "cpu", "suppress_usage_warnings": True, "num_processes": N_ENVS}), +- (PrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space, nethack_env.observation_space), { +- "get_value": get_test_values, +- "device": "cpu", +- "num_processes": N_ENVS, +- "num_steps": 2048 +- }), +- (SimpleBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), {}), +- (AnnealingBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), { +- 'start_values': [-0.02, 0.02], +- 'end_values': [-0.3, 0.3], +- 'total_steps': [10] +- }), +- (SequentialCurriculum, create_nethack_env, ([CentralizedPrioritizedLevelReplay(nethack_env.task_space, device="cpu", suppress_usage_warnings=True, num_processes=N_ENVS), PrioritizedLevelReplay(nethack_env.task_space, nethack_env.observation_space, get_value=get_test_values, device="cpu", num_processes=N_ENVS, num_steps=2048), NetHackScore, [NetHackScout, NetHackStaircase]], ["steps>1000", "episodes>=50", "tasks>20"], nethack_env.task_space), {}), ++ (NoopCurriculum, create_nethack_env, (NetHackScore, nethack_env.task_space), {}), ++ (DomainRandomization, create_nethack_env, (nethack_env.task_space,), {}), ++ # (LearningProgressCurriculum, create_nethack_env, (nethack_env.task_space,), {}), ++ (CentralizedPrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space,), {"device": "cpu", "suppress_usage_warnings": True, "num_processes": N_ENVS}), ++ (PrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space, nethack_env.observation_space), { ++ "get_value": get_test_values, ++ "device": "cpu", ++ "num_processes": N_ENVS, ++ "num_steps": 2048 ++ }), ++ (SimpleBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), {}), ++ (AnnealingBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), { ++ 'start_values': [-0.02, 0.02], ++ 'end_values': [-0.3, 0.3], ++ 'total_steps': [10] ++ }), ++ (SequentialCurriculum, create_nethack_env, ([CentralizedPrioritizedLevelReplay(nethack_env.task_space, device="cpu", suppress_usage_warnings=True, num_processes=N_ENVS), PrioritizedLevelReplay(nethack_env.task_space, nethack_env.observation_space, get_value=get_test_values, device="cpu", num_processes=N_ENVS, num_steps=2048), NetHackScore, [NetHackScout, NetHackStaircase]], ["steps>1000", "episodes>=50", "tasks>20"], nethack_env.task_space), {}), + ] + + test_names = [curriculum_args[0].__name__ for curriculum_args in curricula] diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/wandb-metadata.json b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/wandb-metadata.json new file mode 100644 index 00000000..8d950c03 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/wandb-metadata.json @@ -0,0 +1,167 @@ +{ + "os": "Linux-3.10.0-1160.11.1.el7.x86_64-x86_64-with-glibc2.10", + "python": "3.8.5", + "heartbeatAt": "2024-04-23T04:09:57.173591", + "startedAt": "2024-04-23T04:09:56.534155", + "docker": null, + "cuda": "10.1.243", + "args": [ + "--curriculum", + "True", + "--track", + "True", + "--env-id", + "bigfish" + ], + "state": "running", + "program": "cleanrl_procgen_plr.py", + "codePathLocal": "cleanrl_procgen_plr.py", + "codePath": "syllabus/examples/training_scripts/cleanrl_procgen_plr.py", + "git": { + "remote": "https://github.com/RoseyGreenBlue/Syllabus.git", + "commit": "63dc8f62e4d9d567eb92bb2f6c2bb186a0dc8ffb" + }, + "email": "djhaayusv04@gmail.com", + "root": "/data/averma/MARL/Syllabus", + "host": "f411843fc70b", + "username": "root", + "executable": "/home/user/miniconda/envs/test2_py/bin/python", + "cpu_count": 12, + "cpu_count_logical": 24, + "cpu_freq": { + "current": 1261.3982916666669, + "min": 1200.0, + "max": 3700.0 + }, + "cpu_freq_per_core": [ + { + "current": 1200.292, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.085, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.292, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1281.64, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1260.473, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.5, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1316.503, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1301.354, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1271.057, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1244.287, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1199.877, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1398.474, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1242.834, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1258.605, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1210.668, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1446.826, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1216.687, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1211.083, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1312.976, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1207.971, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1266.699, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1577.355, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1244.494, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1237.023, + "min": 1200.0, + "max": 3700.0 + } + ], + "disk": { + "/": { + "total": 5952.626953125, + "used": 988.7801742553711 + } + }, + "memory": { + "total": 251.63711166381836 + } +} diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/wandb-summary.json b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/wandb-summary.json new file mode 100644 index 00000000..8488a97c --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 5}, "global_step": 0, "_timestamp": 1713845400.8411036, "_runtime": 4.293798446655273, "_step": 0} \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/run-maennc1u.wandb b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/run-maennc1u.wandb new file mode 100644 index 0000000000000000000000000000000000000000..3c6de985d34cce1e3733f2be400ec45f0ad6d2e3 GIT binary patch literal 5972 zcmcgwe~cV;74O{cUH86sz20qGdR=~WSrl4mXXp3MoNXxww59SZmA0WlC-dVwJ2N-G zIy1YsdxWGO5(q*d;Ew{57y|(be-xFZ3Q;5y5rURPp(dJGNFXF8f(k!(uzjwoUUMr zHdvmPSXR<8uQB{H#EO?k5z4s=_B@TQy;=99AWP&1<_YRQYnMjjz@*uL#JiwRvbCDTpYzXh~|kL!&B)8O`=s0d*~%dp%Mcd zLlg-j9<4?>uja~t2nquNhbGW)GSG&|FuVY63@e_80o(y6+P;mVOjQA(7B{QWU{!Hl zAZL&wB}oDu1Y+4yTs5T(MtO*Tb%LD`t17W(Vz5bw@b0Qwpo3d74t&i_23hbU=lP*4 zD|&sw2)L#>Rv>FWHh>I@WD+y!P~Gt(&}%sZsz7l2wJZcmLzpY;MprR$WtFQmv!F88l0GOkdH?Cs{tr zE3!<(Uxfzmcvj-qq(nr@U7>D#-IM z#3YWuFM>EaK$HeqQY87yApvQD6s89@5%#EKd9}vYhA7Aa_Y83ek!BnR(NwuywPLfT zlA7<9_xTZS?hY~brkxrtf1$n3Nq8l{NnmwNGz127x}@_$1*;OHs*E9M@Q)S+g-Yax zx@ga>t4r^XiUP980pW6Yk$mrhFYv*nPqeJ$F9FZT* z9Zl2h*ga1?lqsyd2G=bOPc1!^cju1XeI1~#wAb{Y-t+bE0V+{kI^Nn#Eug%M#hadZ7BB5uZho0jfzp)&8-n)IC z=D|mPy_{fP`r^P3lQdo1lcpcO|J(mrnrhm|C+`4z>+IcK@}B%X*sClvvzDIUzq$LD zBYVMz^X#=9A5Q%J&)`EgNzx?gIe0-oCX9_|Yc-cwIN8|I3_Tx>xUA{gdzR z2CpXViH=t zcT?#NG%yqPI%KIiqg^dK%g_|(r6qA%zn`pEG+!Rm5| zzRNoQs}}&;h4aLCX_T(olpe8Iq(XKR@eq;C5#xjRf9dU~-_H=ai>BUL*^TpoU!BYp zpf-g9WQM$rtVK!{-;BfLY=P1vxkVQ?ps`-*?rEs-BvB+QVgX8@RAhN+jYAcnJW2=P z94vhcjf9qUJcBqeW7v6ehbOIL@FJTgk9&g(j&2RDkKd1|Vp*P=Mkm zsF4&2-!km^ZSPo;?a1gp8AI|N8Pg}@61}9L0!((XY>=mWK)J=jEJKPNQ2q=V1hk|z?p>@DvLz`$yOF*;u$i4Y{xG3$)r{c`6LGVOk(H?1lSVxGsBWp5`9b9PYt-! z74~tHt@LW1$&Jv}Jd>Orhx*vbvLt90+79*c)26QFY#%?VY$1?-e#F)^rix4-KN&8a zkCv>Do?cLd7k-pjD;EAA*0!(s%fEQo$u6EgHq>p;b1fD0{CWU!(tw8T_2 zji;fF!D5!x3=Jy+XDF&v*+bk+92#<>mu{Hvgy0H zT#Im6CLJG2hH_*oAugvT%SsqqhN405CW@7$Ay|&xV5TlBbUZUtiqQHHAFf#;cGH^9 z2>m(ik#Qr=PkPdRn%h51AH4B^vftlz@G3{yf5lZ`^qqCfGOAYm0P~Dqx*3hN=zv*E zdnNhFnTmZYZ40=*%d}W$SJS+)vCp3cTW@blERZW2^N^4eQMQI!kRj0v;LcJ*-jzxhrsl#IE`?}Q#W541{%~)6M zhBl{BqSiRfiy^Oxid$s@lZsV(Mu%2aO)&-2f^Lh5O_Pi5xmkKv6(Yx=W-HCc>^yX; z*vJq$$y0Tcvv^UkLmpd{+f?;RgPWNNX2MyWW9rsnj;HBJnxSR`IuyJ*W<085@U!+@ z)o6nMwrWaZ;M!h8U@IauCsDIPv(5>Dpb4%!%gnkVZO^%N!7}t9bR+C=G*eeG{cl5; B89x93 literal 0 HcmV?d00001 diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py new file mode 100644 index 00000000..70a27f6a --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py @@ -0,0 +1,524 @@ +""" An example applying Syllabus Prioritized Level Replay to Procgen. This code is based on https://github.com/facebookresearch/level-replay/blob/main/train.py + +NOTE: In order to efficiently change the seed of a procgen environment directly without reinitializing it, +we rely on Minqi Jiang's custom branch of procgen found here: https://github.com/minqi/procgen +""" +import argparse +import os +import random +import time +from collections import deque +from distutils.util import strtobool + +import gym as openai_gym +import gymnasium as gym +import numpy as np +import procgen # noqa: F401 +from procgen import ProcgenEnv +import torch +import torch.nn as nn +import torch.optim as optim +from shimmy.openai_gym_compatibility import GymV21CompatibilityV0 +from torch.utils.tensorboard import SummaryWriter + +from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum +from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum +from syllabus.examples.models import ProcgenAgent +from syllabus.examples.task_wrappers import ProcgenTaskWrapper, MinigridTaskWrapper +from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs + + +def parse_args(): + # fmt: off + parser = argparse.ArgumentParser() + parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), + help="the name of this experiment") + parser.add_argument("--seed", type=int, default=1, + help="seed of the experiment") + parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="if toggled, `torch.backends.cudnn.deterministic=False`") + parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="if toggled, cuda will be enabled by default") + parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="if toggled, this experiment will be tracked with Weights and Biases") + parser.add_argument("--wandb-project-name", type=str, default="syllabus", + help="the wandb's project name") + parser.add_argument("--wandb-entity", type=str, default=None, + help="the entity (team) of wandb's project") + parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="weather to capture videos of the agent performances (check out `videos` folder)") + parser.add_argument("--logging-dir", type=str, default=".", + help="the base directory for logging and wandb storage.") + + # Algorithm specific arguments + parser.add_argument("--env-id", type=str, default="starpilot", + help="the id of the environment") + parser.add_argument("--total-timesteps", type=int, default=int(25e6), + help="total timesteps of the experiments") + parser.add_argument("--learning-rate", type=float, default=5e-4, + help="the learning rate of the optimizer") + parser.add_argument("--num-envs", type=int, default=64, + help="the number of parallel game environments") + parser.add_argument("--num-steps", type=int, default=256, + help="the number of steps to run in each environment per policy rollout") + parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="Toggle learning rate annealing for policy and value networks") + parser.add_argument("--gae", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="Use GAE for advantage computation") + parser.add_argument("--gamma", type=float, default=0.999, + help="the discount factor gamma") + parser.add_argument("--gae-lambda", type=float, default=0.95, + help="the lambda for the general advantage estimation") + parser.add_argument("--num-minibatches", type=int, default=8, + help="the number of mini-batches") + parser.add_argument("--update-epochs", type=int, default=3, + help="the K epochs to update the policy") + parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="Toggles advantages normalization") + parser.add_argument("--clip-coef", type=float, default=0.2, + help="the surrogate clipping coefficient") + parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="Toggles whether or not to use a clipped loss for the value function, as per the paper.") + parser.add_argument("--ent-coef", type=float, default=0.01, + help="coefficient of the entropy") + parser.add_argument("--vf-coef", type=float, default=0.5, + help="coefficient of the value function") + parser.add_argument("--max-grad-norm", type=float, default=0.5, + help="the maximum norm for the gradient clipping") + parser.add_argument("--target-kl", type=float, default=None, + help="the target KL divergence threshold") + + # Procgen arguments + parser.add_argument("--full-dist", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, + help="Train on full distribution of levels.") + + # Curriculum arguments + parser.add_argument("--curriculum", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="if toggled, this experiment will use curriculum learning") + parser.add_argument("--curriculum-method", type=str, default="plr", + help="curriculum method to use") + parser.add_argument("--num-eval-episodes", type=int, default=10, + help="the number of episodes to evaluate the agent on after each policy update.") + + args = parser.parse_args() + args.batch_size = int(args.num_envs * args.num_steps) + args.minibatch_size = int(args.batch_size // args.num_minibatches) + # fmt: on + return args + + +PROCGEN_RETURN_BOUNDS = { + "coinrun": (5, 10), + "starpilot": (2.5, 64), + "caveflyer": (3.5, 12), + "dodgeball": (1.5, 19), + "fruitbot": (-1.5, 32.4), + "chaser": (0.5, 13), + "miner": (1.5, 13), + "jumper": (3, 10), + "leaper": (3, 10), + "maze": (5, 10), + "bigfish": (1, 40), + "heist": (3.5, 10), + "climber": (2, 12.6), + "plunder": (4.5, 30), + "ninja": (3.5, 10), + "bossfight": (0.5, 13), +} + + +def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): + def thunk(): + env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) + env = GymV21CompatibilityV0(env=env) + if curriculum is not None: + env = ProcgenTaskWrapper(env, env_id, seed=seed) + env = MultiProcessingSyncWrapper( + env, + curriculum.get_components(), + update_on_step=False, + task_space=env.task_space, + ) + return env + return thunk + + +def wrap_vecenv(vecenv): + vecenv.is_vector_env = True + vecenv = VecMonitor(venv=vecenv, filename=None, keep_buf=100) + vecenv = VecNormalize(venv=vecenv, ob=False, ret=True) + return vecenv + + +def slow_level_replay_evaluate( + env_name, + policy, + num_episodes, + device, + num_levels=0 +): + policy.eval() + + eval_envs = ProcgenEnv( + num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False + ) + eval_envs = VecExtractDictObs(eval_envs, "rgb") + eval_envs = wrap_vecenv(eval_envs) + eval_obs, _ = eval_envs.reset() + eval_episode_rewards = [] + + while len(eval_episode_rewards) < num_episodes: + with torch.no_grad(): + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + + eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) + for i, info in enumerate(infos): + if 'episode' in info.keys(): + eval_episode_rewards.append(info['episode']['r']) + + mean_returns = np.mean(eval_episode_rewards) + stddev_returns = np.std(eval_episode_rewards) + env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] + normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) + policy.train() + return mean_returns, stddev_returns, normalized_mean_returns + + +def level_replay_evaluate( + env_name, + policy, + num_episodes, + device, + num_levels=0 +): + policy.eval() + + eval_envs = ProcgenEnv( + num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False + ) + eval_envs = VecExtractDictObs(eval_envs, "rgb") + eval_envs = wrap_vecenv(eval_envs) + eval_obs, _ = eval_envs.reset() + eval_episode_rewards = [-1] * num_episodes + + while -1 in eval_episode_rewards: + with torch.no_grad(): + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + + eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) + for i, info in enumerate(infos): + if 'episode' in info.keys() and eval_episode_rewards[i] == -1: + eval_episode_rewards[i] = info['episode']['r'] + + # print(eval_episode_rewards) + mean_returns = np.mean(eval_episode_rewards) + stddev_returns = np.std(eval_episode_rewards) + env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] + normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) + policy.train() + return mean_returns, stddev_returns, normalized_mean_returns + + +def make_value_fn(): + def get_value(obs): + obs = np.array(obs) + with torch.no_grad(): + return agent.get_value(torch.Tensor(obs).to(device)) + return get_value + + +if __name__ == "__main__": + args = parse_args() + run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" + if args.track: + import wandb + + wandb.init( + project=args.wandb_project_name, + entity=args.wandb_entity, + sync_tensorboard=True, + config=vars(args), + name=run_name, + monitor_gym=True, + save_code=True, + dir=args.logging_dir + ) + # wandb.run.log_code("./syllabus/examples") + + writer = SummaryWriter(os.path.join(args.logging_dir, f"./runs/{run_name}")) + writer.add_text( + "hyperparameters", + "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), + ) + + # TRY NOT TO MODIFY: seeding + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + torch.backends.cudnn.deterministic = args.torch_deterministic + + device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu") + print("Device:", device) + + # Curriculum setup + curriculum = None + if args.curriculum: + sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") + sample_env = GymV21CompatibilityV0(env=sample_env) + # code to edit + # sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) + sample_env = MinigridTaskWrapper(sample_env, args.env_id, seed=args.seed) + + # Intialize Curriculum Method + if args.curriculum_method == "plr": + print("Using prioritized level replay.") + curriculum = PrioritizedLevelReplay( + sample_env.task_space, + sample_env.observation_space, + num_steps=args.num_steps, + num_processes=args.num_envs, + gamma=args.gamma, + gae_lambda=args.gae_lambda, + task_sampler_kwargs_dict={"strategy": "value_l1"}, + get_value=make_value_fn(), + ) + elif args.curriculum_method == "dr": + print("Using domain randomization.") + curriculum = DomainRandomization(sample_env.task_space) + elif args.curriculum_method == "lp": + print("Using learning progress.") + curriculum = LearningProgressCurriculum(sample_env.task_space) + elif args.curriculum_method == "sq": + print("Using sequential curriculum.") + curricula = [] + stopping = [] + for i in range(199): + curricula.append(i + 1) + stopping.append("steps>=50000") + curricula.append(list(range(i + 1))) + stopping.append("steps>=50000") + curriculum = SequentialCurriculum(curricula, stopping[:-1], sample_env.task_space) + else: + raise ValueError(f"Unknown curriculum method {args.curriculum_method}") + curriculum = make_multiprocessing_curriculum(curriculum) + del sample_env + + # env setup + print("Creating env") + envs = gym.vector.AsyncVectorEnv( + [ + make_env( + args.env_id, + args.seed + i, + curriculum=curriculum if args.curriculum else None, + num_levels=1 if args.curriculum else 0 + ) + for i in range(args.num_envs) + ] + ) + envs = wrap_vecenv(envs) + + assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" + print("Creating agent") + agent = ProcgenAgent( + envs.single_observation_space.shape, + envs.single_action_space.n, + arch="large", + base_kwargs={'recurrent': False, 'hidden_size': 256} + ).to(device) + optimizer = optim.Adam(agent.parameters(), lr=args.learning_rate, eps=1e-5) + + # ALGO Logic: Storage setup + obs = torch.zeros((args.num_steps, args.num_envs) + envs.single_observation_space.shape).to(device) + actions = torch.zeros((args.num_steps, args.num_envs) + envs.single_action_space.shape).to(device) + logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device) + rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) + dones = torch.zeros((args.num_steps, args.num_envs)).to(device) + values = torch.zeros((args.num_steps, args.num_envs)).to(device) + + # TRY NOT TO MODIFY: start the game + global_step = 0 + start_time = time.time() + next_obs, _ = envs.reset() + next_obs = torch.Tensor(next_obs).to(device) + next_done = torch.zeros(args.num_envs).to(device) + num_updates = args.total_timesteps // args.batch_size + episode_rewards = deque(maxlen=10) + completed_episodes = 0 + + for update in range(1, num_updates + 1): + # Annealing the rate if instructed to do so. + if args.anneal_lr: + frac = 1.0 - (update - 1.0) / num_updates + lrnow = frac * args.learning_rate + optimizer.param_groups[0]["lr"] = lrnow + + for step in range(0, args.num_steps): + global_step += 1 * args.num_envs + obs[step] = next_obs + dones[step] = next_done + + # ALGO LOGIC: action logic + with torch.no_grad(): + action, logprob, _, value = agent.get_action_and_value(next_obs) + values[step] = value.flatten() + actions[step] = action + logprobs[step] = logprob + + # TRY NOT TO MODIFY: execute the game and log data. + next_obs, reward, term, trunc, info = envs.step(action.cpu().numpy()) + done = np.logical_or(term, trunc) + rewards[step] = torch.tensor(reward).to(device).view(-1) + next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) + completed_episodes += sum(done) + + for item in info: + if "episode" in item.keys(): + episode_rewards.append(item['episode']['r']) + print(f"global_step={global_step}, episodic_return={item['episode']['r']}") + writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) + if curriculum is not None: + curriculum.log_metrics(writer, global_step) + break + + # bootstrap value if not done + with torch.no_grad(): + next_value = agent.get_value(next_obs).reshape(1, -1) + if args.gae: + advantages = torch.zeros_like(rewards).to(device) + lastgaelam = 0 + for t in reversed(range(args.num_steps)): + if t == args.num_steps - 1: + nextnonterminal = 1.0 - next_done + nextvalues = next_value + else: + nextnonterminal = 1.0 - dones[t + 1] + nextvalues = values[t + 1] + delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t] + advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam + returns = advantages + values + else: + returns = torch.zeros_like(rewards).to(device) + for t in reversed(range(args.num_steps)): + if t == args.num_steps - 1: + nextnonterminal = 1.0 - next_done + next_return = next_value + else: + nextnonterminal = 1.0 - dones[t + 1] + next_return = returns[t + 1] + returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return + advantages = returns - values + + # flatten the batch + b_obs = obs.reshape((-1,) + envs.single_observation_space.shape) + b_logprobs = logprobs.reshape(-1) + b_actions = actions.reshape((-1,) + envs.single_action_space.shape) + b_advantages = advantages.reshape(-1) + b_returns = returns.reshape(-1) + b_values = values.reshape(-1) + + # Optimizing the policy and value network + b_inds = np.arange(args.batch_size) + clipfracs = [] + for epoch in range(args.update_epochs): + np.random.shuffle(b_inds) + for start in range(0, args.batch_size, args.minibatch_size): + end = start + args.minibatch_size + mb_inds = b_inds[start:end] + + _, newlogprob, entropy, newvalue = agent.get_action_and_value(b_obs[mb_inds], b_actions.long()[mb_inds]) + logratio = newlogprob - b_logprobs[mb_inds] + ratio = logratio.exp() + + with torch.no_grad(): + # calculate approx_kl http://joschu.net/blog/kl-approx.html + old_approx_kl = (-logratio).mean() + approx_kl = ((ratio - 1) - logratio).mean() + clipfracs += [((ratio - 1.0).abs() > args.clip_coef).float().mean().item()] + + mb_advantages = b_advantages[mb_inds] + if args.norm_adv: + mb_advantages = (mb_advantages - mb_advantages.mean()) / (mb_advantages.std() + 1e-8) + + # Policy loss + pg_loss1 = -mb_advantages * ratio + pg_loss2 = -mb_advantages * torch.clamp(ratio, 1 - args.clip_coef, 1 + args.clip_coef) + pg_loss = torch.max(pg_loss1, pg_loss2).mean() + + # Value loss + newvalue = newvalue.view(-1) + if args.clip_vloss: + v_loss_unclipped = (newvalue - b_returns[mb_inds]) ** 2 + v_clipped = b_values[mb_inds] + torch.clamp( + newvalue - b_values[mb_inds], + -args.clip_coef, + args.clip_coef, + ) + v_loss_clipped = (v_clipped - b_returns[mb_inds]) ** 2 + v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped) + v_loss = 0.5 * v_loss_max.mean() + else: + v_loss = 0.5 * ((newvalue - b_returns[mb_inds]) ** 2).mean() + + entropy_loss = entropy.mean() + loss = pg_loss - args.ent_coef * entropy_loss + v_loss * args.vf_coef + + optimizer.zero_grad() + loss.backward() + nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm) + optimizer.step() + + if args.target_kl is not None: + if approx_kl > args.target_kl: + break + + y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy() + var_y = np.var(y_true) + explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y + + # Evaluate agent + mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=0 + ) + slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=0 + ) + mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=200 + ) + slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=200 + ) + + # TRY NOT TO MODIFY: record rewards for plotting purposes + writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) + writer.add_scalar("charts/episode_returns", np.mean(episode_rewards), global_step) + writer.add_scalar("losses/value_loss", v_loss.item(), global_step) + writer.add_scalar("losses/policy_loss", pg_loss.item(), global_step) + writer.add_scalar("losses/entropy", entropy_loss.item(), global_step) + writer.add_scalar("losses/old_approx_kl", old_approx_kl.item(), global_step) + writer.add_scalar("losses/approx_kl", approx_kl.item(), global_step) + writer.add_scalar("losses/clipfrac", np.mean(clipfracs), global_step) + writer.add_scalar("losses/explained_variance", explained_var, global_step) + print("SPS:", int(global_step / (time.time() - start_time))) + writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) + + writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) + writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) + writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) + writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) + + writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) + writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) + writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) + writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) + + writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) + + envs.close() + writer.close() diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/conda-environment.yaml b/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/conda-environment.yaml new file mode 100644 index 00000000..cd0b0b09 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/conda-environment.yaml @@ -0,0 +1,165 @@ +name: test2_py +channels: + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - ca-certificates=2024.3.11=h06a4308_0 + - ld_impl_linux-64=2.38=h1181459_1 + - libffi=3.3=he6710b0_2 + - libgcc-ng=11.2.0=h1234567_1 + - libgomp=11.2.0=h1234567_1 + - libstdcxx-ng=11.2.0=h1234567_1 + - ncurses=6.4=h6a678d5_0 + - openssl=1.1.1w=h7f8727e_0 + - pip=23.3.1=py38h06a4308_0 + - python=3.8.5=h7579374_1 + - readline=8.2=h5eee18b_0 + - setuptools=68.2.2=py38h06a4308_0 + - sqlite=3.41.2=h5eee18b_0 + - tk=8.6.12=h1ccaba5_0 + - wheel=0.41.2=py38h06a4308_0 + - xz=5.4.6=h5eee18b_0 + - zlib=1.2.13=h5eee18b_0 + - pip: + - absl-py==2.1.0 + - aiosignal==1.3.1 + - alabaster==0.7.13 + - appdirs==1.4.4 + - attrs==23.2.0 + - babel==2.14.0 + - beautifulsoup4==4.12.3 + - cachetools==5.3.3 + - certifi==2024.2.2 + - cffi==1.16.0 + - charset-normalizer==3.3.2 + - click==8.1.7 + - cloudpickle==3.0.0 + - cmake==3.29.2 + - contourpy==1.1.1 + - cycler==0.12.1 + - dm-tree==0.1.8 + - docker-pycreds==0.4.0 + - docutils==0.20.1 + - exceptiongroup==1.2.0 + - farama-notifications==0.0.4 + - filelock==3.13.4 + - fonttools==4.51.0 + - frozenlist==1.4.1 + - fsspec==2024.3.1 + - furo==2024.1.29 + - future==1.0.0 + - gitdb==4.0.11 + - gitpython==3.1.43 + - glcontext==2.5.0 + - glfw==1.12.0 + - google-auth==2.29.0 + - google-auth-oauthlib==1.0.0 + - grpcio==1.62.1 + - gym==0.23.0 + - gym-notices==0.0.8 + - gymnasium==0.28.1 + - idna==3.7 + - imageio==2.34.0 + - imageio-ffmpeg==0.3.0 + - imagesize==1.4.1 + - importlib-metadata==7.1.0 + - importlib-resources==6.4.0 + - iniconfig==2.0.0 + - jax-jumpy==1.0.0 + - jinja2==3.1.3 + - jsonschema==4.21.1 + - jsonschema-specifications==2023.12.1 + - kiwisolver==1.4.5 + - lazy-loader==0.4 + - lz4==4.3.3 + - markdown==3.6 + - markdown-it-py==3.0.0 + - markupsafe==2.1.5 + - matplotlib==3.7.5 + - mdurl==0.1.2 + - moderngl==5.10.0 + - mpmath==1.3.0 + - msgpack==1.0.8 + - networkx==3.1 + - numpy==1.24.4 + - nvidia-cublas-cu12==12.1.3.1 + - nvidia-cuda-cupti-cu12==12.1.105 + - nvidia-cuda-nvrtc-cu12==12.1.105 + - nvidia-cuda-runtime-cu12==12.1.105 + - nvidia-cudnn-cu12==8.9.2.26 + - nvidia-cufft-cu12==11.0.2.54 + - nvidia-curand-cu12==10.3.2.106 + - nvidia-cusolver-cu12==11.4.5.107 + - nvidia-cusparse-cu12==12.1.0.106 + - nvidia-nccl-cu12==2.19.3 + - nvidia-nvjitlink-cu12==12.4.127 + - nvidia-nvtx-cu12==12.1.105 + - oauthlib==3.2.2 + - packaging==24.0 + - pandas==2.0.3 + - pillow==10.3.0 + - pkgutil-resolve-name==1.3.10 + - pluggy==1.4.0 + - protobuf==4.25.3 + - psutil==5.9.8 + - py-cpuinfo==9.0.0 + - pyarrow==15.0.2 + - pyasn1==0.6.0 + - pyasn1-modules==0.4.0 + - pycparser==2.22 + - pyenchant==3.2.2 + - pyglet==1.4.11 + - pygments==2.17.2 + - pyparsing==3.1.2 + - pytest==8.1.1 + - pytest-benchmark==4.0.0 + - python-dateutil==2.9.0.post0 + - pytz==2024.1 + - pywavelets==1.4.1 + - pyyaml==6.0.1 + - ray==2.10.0 + - referencing==0.34.0 + - requests==2.31.0 + - requests-oauthlib==2.0.0 + - rich==13.7.1 + - rpds-py==0.18.0 + - rsa==4.9 + - scikit-image==0.21.0 + - scipy==1.10.0 + - sentry-sdk==1.45.0 + - setproctitle==1.3.3 + - shellingham==1.5.4 + - shimmy==1.3.0 + - six==1.16.0 + - smmap==5.0.1 + - snowballstemmer==2.2.0 + - soupsieve==2.5 + - sphinx==7.1.2 + - sphinx-basic-ng==1.0.0b2 + - sphinx-tabs==3.4.5 + - sphinxcontrib-applehelp==1.0.4 + - sphinxcontrib-devhelp==1.0.2 + - sphinxcontrib-htmlhelp==2.0.1 + - sphinxcontrib-jsmath==1.0.1 + - sphinxcontrib-qthelp==1.0.3 + - sphinxcontrib-serializinghtml==1.1.5 + - sphinxcontrib-spelling==8.0.0 + - syllabus-rl==0.5 + - sympy==1.12 + - tensorboard==2.14.0 + - tensorboard-data-server==0.7.2 + - tensorboardx==2.6.2.2 + - tifffile==2023.7.10 + - tomli==2.0.1 + - torch==2.2.2 + - triton==2.2.0 + - typer==0.12.3 + - typing-extensions==4.11.0 + - tzdata==2024.1 + - urllib3==2.2.1 + - wandb==0.16.6 + - werkzeug==3.0.2 + - zipp==3.18.1 +prefix: /home/user/miniconda/envs/test2_py + diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/config.yaml b/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/config.yaml new file mode 100644 index 00000000..19f16db4 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/config.yaml @@ -0,0 +1,126 @@ +wandb_version: 1 + +exp_name: + desc: null + value: cleanrl_procgen_plr +seed: + desc: null + value: 1 +torch_deterministic: + desc: null + value: true +cuda: + desc: null + value: true +track: + desc: null + value: true +wandb_project_name: + desc: null + value: syllabus +wandb_entity: + desc: null + value: null +capture_video: + desc: null + value: false +logging_dir: + desc: null + value: . +env_id: + desc: null + value: bigfish +total_timesteps: + desc: null + value: 25000000 +learning_rate: + desc: null + value: 0.0005 +num_envs: + desc: null + value: 64 +num_steps: + desc: null + value: 256 +anneal_lr: + desc: null + value: false +gae: + desc: null + value: true +gamma: + desc: null + value: 0.999 +gae_lambda: + desc: null + value: 0.95 +num_minibatches: + desc: null + value: 8 +update_epochs: + desc: null + value: 3 +norm_adv: + desc: null + value: true +clip_coef: + desc: null + value: 0.2 +clip_vloss: + desc: null + value: true +ent_coef: + desc: null + value: 0.01 +vf_coef: + desc: null + value: 0.5 +max_grad_norm: + desc: null + value: 0.5 +target_kl: + desc: null + value: null +full_dist: + desc: null + value: true +curriculum: + desc: null + value: true +curriculum_method: + desc: null + value: plr +num_eval_episodes: + desc: null + value: 10 +batch_size: + desc: null + value: 16384 +minibatch_size: + desc: null + value: 2048 +_wandb: + desc: null + value: + code_path: code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py + python_version: 3.8.5 + cli_version: 0.16.6 + framework: torch + is_jupyter_run: false + is_kaggle_kernel: false + start_time: 1713845639.0 + t: + 1: + - 1 + - 30 + - 55 + 3: + - 13 + - 16 + - 23 + - 35 + 4: 3.8.5 + 5: 0.16.6 + 8: + - 5 + 13: linux-x86_64 diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/diff.patch b/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/diff.patch new file mode 100644 index 00000000..40d0796c --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/diff.patch @@ -0,0 +1,142 @@ +diff --git a/setup.py b/setup.py +index 31e09f2..22a94e8 100644 +--- a/setup.py ++++ b/setup.py +@@ -2,7 +2,7 @@ from setuptools import find_packages, setup + + + extras = dict() +-extras['test'] = ['cmake', 'ninja', 'nle>=0.9.0', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] ++extras['test'] = ['cmake', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] + extras['docs'] = ['sphinx-tabs', 'sphinxcontrib-spelling', 'furo'] + extras['all'] = extras['test'] + extras['docs'] + +diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py +index dabcd50..70a27f6 100644 +--- a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py ++++ b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py +@@ -24,7 +24,7 @@ from torch.utils.tensorboard import SummaryWriter + from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum + from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum + from syllabus.examples.models import ProcgenAgent +-from syllabus.examples.task_wrappers import ProcgenTaskWrapper ++from syllabus.examples.task_wrappers import ProcgenTaskWrapper, MinigridTaskWrapper + from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs + + +@@ -136,7 +136,7 @@ def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): + env = MultiProcessingSyncWrapper( + env, + curriculum.get_components(), +- update_on_step=curriculum.requires_step_updates, ++ update_on_step=False, + task_space=env.task_space, + ) + return env +@@ -150,37 +150,31 @@ def wrap_vecenv(vecenv): + return vecenv + + +-def full_level_replay_evaluate( ++def slow_level_replay_evaluate( + env_name, + policy, + num_episodes, + device, +- num_levels=1 # Not used ++ num_levels=0 + ): + policy.eval() + + eval_envs = ProcgenEnv( +- num_envs=args.num_eval_episodes, env_name=env_name, num_levels=1, start_level=0, distribution_mode="easy", paint_vel_info=False ++ num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False + ) + eval_envs = VecExtractDictObs(eval_envs, "rgb") + eval_envs = wrap_vecenv(eval_envs) +- +- # Seed environments +- seeds = [int.from_bytes(os.urandom(3), byteorder="little") for _ in range(num_episodes)] +- for i, seed in enumerate(seeds): +- eval_envs.seed(seed, i) +- + eval_obs, _ = eval_envs.reset() +- eval_episode_rewards = [-1] * num_episodes ++ eval_episode_rewards = [] + +- while -1 in eval_episode_rewards: ++ while len(eval_episode_rewards) < num_episodes: + with torch.no_grad(): + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + + eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) + for i, info in enumerate(infos): +- if 'episode' in info.keys() and eval_episode_rewards[i] == -1: +- eval_episode_rewards[i] = info['episode']['r'] ++ if 'episode' in info.keys(): ++ eval_episode_rewards.append(info['episode']['r']) + + mean_returns = np.mean(eval_episode_rewards) + stddev_returns = np.std(eval_episode_rewards) +@@ -251,7 +245,7 @@ if __name__ == "__main__": + ) + # wandb.run.log_code("./syllabus/examples") + +- writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) ++ writer = SummaryWriter(os.path.join(args.logging_dir, f"./runs/{run_name}")) + writer.add_text( + "hyperparameters", + "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), +@@ -271,7 +265,9 @@ if __name__ == "__main__": + if args.curriculum: + sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") + sample_env = GymV21CompatibilityV0(env=sample_env) +- sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) ++ # code to edit ++ # sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) ++ sample_env = MinigridTaskWrapper(sample_env, args.env_id, seed=args.seed) + + # Intialize Curriculum Method + if args.curriculum_method == "plr": +@@ -485,13 +481,13 @@ if __name__ == "__main__": + mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=0 + ) +- full_mean_eval_returns, full_stddev_eval_returns, full_normalized_mean_eval_returns = full_level_replay_evaluate( ++ slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=0 + ) + mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=200 + ) +- full_mean_train_returns, full_stddev_train_returns, full_normalized_mean_train_returns = full_level_replay_evaluate( ++ slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( + args.env_id, agent, args.num_eval_episodes, device, num_levels=200 + ) + +@@ -510,17 +506,17 @@ if __name__ == "__main__": + + writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) +- writer.add_scalar("test_eval/stddev_eval_return", stddev_eval_returns, global_step) +- writer.add_scalar("test_eval/full_mean_episode_return", full_mean_eval_returns, global_step) +- writer.add_scalar("test_eval/full_normalized_mean_eval_return", full_normalized_mean_eval_returns, global_step) +- writer.add_scalar("test_eval/full_stddev_eval_return", full_stddev_eval_returns, global_step) ++ writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) + + writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) +- writer.add_scalar("train_eval/stddev_train_return", stddev_train_returns, global_step) +- writer.add_scalar("train_eval/full_mean_episode_return", full_mean_train_returns, global_step) +- writer.add_scalar("train_eval/full_normalized_mean_train_return", full_normalized_mean_train_returns, global_step) +- writer.add_scalar("train_eval/full_stddev_train_return", full_stddev_train_returns, global_step) ++ writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) + + writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) + diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch b/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch new file mode 100644 index 00000000..b3eac157 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch @@ -0,0 +1,1421 @@ +diff --git a/setup.py b/setup.py +index 31e09f2..22a94e8 100644 +--- a/setup.py ++++ b/setup.py +@@ -2,7 +2,7 @@ from setuptools import find_packages, setup + + + extras = dict() +-extras['test'] = ['cmake', 'ninja', 'nle>=0.9.0', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] ++extras['test'] = ['cmake', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] + extras['docs'] = ['sphinx-tabs', 'sphinxcontrib-spelling', 'furo'] + extras['all'] = extras['test'] + extras['docs'] + +diff --git a/syllabus/core/curriculum_base.py b/syllabus/core/curriculum_base.py +index 03284da..4ca9aeb 100644 +--- a/syllabus/core/curriculum_base.py ++++ b/syllabus/core/curriculum_base.py +@@ -76,7 +76,7 @@ class Curriculum: + """ + self.completed_tasks += 1 + +- def update_on_step(self, obs: typing.Any, rew: float, term: bool, trunc: bool, info: dict, env_id: int = None) -> None: ++ def update_on_step(self, task: typing.Any, obs: typing.Any, rew: float, term: bool, trunc: bool, info: dict, env_id: int = None) -> None: + """ Update the curriculum with the current step results from the environment. + + :param obs: Observation from teh environment +@@ -88,7 +88,7 @@ class Curriculum: + """ + raise NotImplementedError("This curriculum does not require step updates. Set update_on_step for the environment sync wrapper to False to improve performance and prevent this error.") + +- def update_on_step_batch(self, step_results: List[typing.Tuple[int, int, int, int, int]], env_id: int = None) -> None: ++ def update_on_step_batch(self, step_results: List[typing.Tuple[Any, Any, int, int, int, int]], env_id: int = None) -> None: + """Update the curriculum with a batch of step results from the environment. + + This method can be overridden to provide a more efficient implementation. It is used +@@ -96,9 +96,9 @@ class Curriculum: + + :param step_results: List of step results + """ +- obs, rews, terms, truncs, infos = tuple(step_results) ++ tasks, obs, rews, terms, truncs, infos = tuple(step_results) + for i in range(len(obs)): +- self.update_on_step(obs[i], rews[i], terms[i], truncs[i], infos[i], env_id=env_id) ++ self.update_on_step(tasks[i], obs[i], rews[i], terms[i], truncs[i], infos[i], env_id=env_id) + + def update_on_episode(self, episode_return: float, episode_length: int, episode_task: Any, env_id: int = None) -> None: + """Update the curriculum with episode results from the environment. +diff --git a/syllabus/core/curriculum_sync_wrapper.py b/syllabus/core/curriculum_sync_wrapper.py +index 6e069d8..f986643 100644 +--- a/syllabus/core/curriculum_sync_wrapper.py ++++ b/syllabus/core/curriculum_sync_wrapper.py +@@ -29,6 +29,14 @@ class CurriculumWrapper: + def tasks(self): + return self.task_space.tasks + ++ @property ++ def requires_step_updates(self): ++ return self.curriculum.requires_step_updates ++ ++ @property ++ def requires_episode_updates(self): ++ return self.curriculum.requires_episode_updates ++ + def get_tasks(self, task_space=None): + return self.task_space.get_tasks(gym_space=task_space) + +diff --git a/syllabus/core/environment_sync_wrapper.py b/syllabus/core/environment_sync_wrapper.py +index c995aa1..6edee7c 100644 +--- a/syllabus/core/environment_sync_wrapper.py ++++ b/syllabus/core/environment_sync_wrapper.py +@@ -19,7 +19,8 @@ class MultiProcessingSyncWrapper(gym.Wrapper): + def __init__(self, + env, + components: MultiProcessingComponents, +- update_on_step: bool = True, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? ++ update_on_step: bool = False, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? ++ update_on_progress: bool = False, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? + batch_size: int = 100, + buffer_size: int = 2, # Having an extra task in the buffer minimizes wait time at reset + task_space: TaskSpace = None, +@@ -34,6 +35,7 @@ class MultiProcessingSyncWrapper(gym.Wrapper): + self.update_queue = components.update_queue + self.task_space = task_space + self.update_on_step = update_on_step ++ self.update_on_progress = update_on_progress + self.batch_size = batch_size + self.global_task_completion = global_task_completion + self.task_progress = 0.0 +@@ -125,17 +127,21 @@ class MultiProcessingSyncWrapper(gym.Wrapper): + def _package_step_updates(self): + step_batch = { + "update_type": "step_batch", +- "metrics": ([self._obs[:self._batch_step], self._rews[:self._batch_step], self._terms[:self._batch_step], self._truncs[:self._batch_step], self._infos[:self._batch_step]],), ++ "metrics": ([self._tasks[:self._batch_step], self._obs[:self._batch_step], self._rews[:self._batch_step], self._terms[:self._batch_step], self._truncs[:self._batch_step], self._infos[:self._batch_step]],), + "env_id": self.instance_id, + "request_sample": False + } +- task_batch = { +- "update_type": "task_progress_batch", +- "metrics": (self._tasks[:self._batch_step], self._task_progresses[:self._batch_step],), +- "env_id": self.instance_id, +- "request_sample": False +- } +- return [step_batch, task_batch] ++ update = [step_batch] ++ ++ if self.update_on_progress: ++ task_batch = { ++ "update_type": "task_progress_batch", ++ "metrics": (self._tasks[:self._batch_step], self._task_progresses[:self._batch_step],), ++ "env_id": self.instance_id, ++ "request_sample": False ++ } ++ update.append(task_batch) ++ return update + + def add_task(self, task): + update = { +diff --git a/syllabus/curricula/annealing_box.py b/syllabus/curricula/annealing_box.py +index 6c565ec..101981c 100644 +--- a/syllabus/curricula/annealing_box.py ++++ b/syllabus/curricula/annealing_box.py +@@ -49,8 +49,8 @@ class AnnealingBoxCurriculum(Curriculum): + """ + # Linear annealing from start_values to end_values + annealed_values = ( +- self.start_values + (self.end_values - self.start_values) * +- np.minimum(self.current_step, self.total_steps) / self.total_steps ++ self.start_values + (self.end_values - self.start_values) * ++ np.minimum(self.current_step, self.total_steps) / self.total_steps + ) + +- return [annealed_values.copy() for _ in range(k)] +\ No newline at end of file ++ return [annealed_values.copy() for _ in range(k)] +diff --git a/syllabus/curricula/noop.py b/syllabus/curricula/noop.py +index f6bd5dc..fb5d8ae 100644 +--- a/syllabus/curricula/noop.py ++++ b/syllabus/curricula/noop.py +@@ -28,7 +28,7 @@ class NoopCurriculum(Curriculum): + """ + pass + +- def update_on_step(self, obs, rew, term, trunc, info, env_id: int = None) -> None: ++ def update_on_step(self, task, obs, rew, term, trunc, info, env_id: int = None) -> None: + """ + Update the curriculum with the current step results from the environment. + """ +diff --git a/syllabus/curricula/plr/plr_wrapper.py b/syllabus/curricula/plr/plr_wrapper.py +index 9515df4..9c808dd 100644 +--- a/syllabus/curricula/plr/plr_wrapper.py ++++ b/syllabus/curricula/plr/plr_wrapper.py +@@ -23,16 +23,15 @@ class RolloutStorage(object): + get_value=None, + ): + self.num_steps = num_steps +- self.buffer_steps = num_steps * 2 # Hack to prevent overflow from lagging updates. ++ self.buffer_steps = num_steps * 4 # Hack to prevent overflow from lagging updates. + self.num_processes = num_processes + self._requires_value_buffers = requires_value_buffers + self._get_value = get_value + self.tasks = torch.zeros(self.buffer_steps, num_processes, 1, dtype=torch.int) + self.masks = torch.ones(self.buffer_steps + 1, num_processes, 1) + self.obs = [[[0] for _ in range(self.num_processes)]] * self.buffer_steps +- self._fill = torch.zeros(self.buffer_steps, num_processes, 1) + self.env_steps = [0] * num_processes +- self.should_update = False ++ self.ready_buffers = set() + + if requires_value_buffers: + self.returns = torch.zeros(self.buffer_steps + 1, num_processes, 1) +@@ -46,12 +45,10 @@ class RolloutStorage(object): + self.action_log_dist = torch.zeros(self.buffer_steps, num_processes, action_space.n) + + self.num_steps = num_steps +- self.step = 0 + + def to(self, device): + self.masks = self.masks.to(device) + self.tasks = self.tasks.to(device) +- self._fill = self._fill.to(device) + if self._requires_value_buffers: + self.rewards = self.rewards.to(device) + self.value_preds = self.value_preds.to(device) +@@ -59,108 +56,79 @@ class RolloutStorage(object): + else: + self.action_log_dist = self.action_log_dist.to(device) + +- def insert(self, masks, action_log_dist=None, value_preds=None, rewards=None, tasks=None): +- if self._requires_value_buffers: +- assert (value_preds is not None and rewards is not None), "Selected strategy requires value_preds and rewards" +- if len(rewards.shape) == 3: +- rewards = rewards.squeeze(2) +- self.value_preds[self.step].copy_(torch.as_tensor(value_preds)) +- self.rewards[self.step].copy_(torch.as_tensor(rewards)[:, None]) +- self.masks[self.step + 1].copy_(torch.as_tensor(masks)[:, None]) +- else: +- self.action_log_dist[self.step].copy_(action_log_dist) +- if tasks is not None: +- assert isinstance(tasks[0], int), "Provided task must be an integer" +- self.tasks[self.step].copy_(torch.as_tensor(tasks)[:, None]) +- self.step = (self.step + 1) % self.num_steps +- + def insert_at_index(self, env_index, mask=None, action_log_dist=None, obs=None, reward=None, task=None, steps=1): +- if env_index >= self.num_processes: +- warnings.warn(f"Env index {env_index} is greater than the number of processes {self.num_processes}. Using index {env_index % self.num_processes} instead.") +- env_index = env_index % self.num_processes +- + step = self.env_steps[env_index] + end_step = step + steps +- # Update buffer fill traacker, and check for common usage errors. +- try: +- if end_step > len(self._fill): +- raise IndexError +- self._fill[step:end_step, env_index] = 1 +- except IndexError as e: +- if any(self._fill[:][env_index] == 0): +- raise UsageError(f"Step {step} + {steps} = {end_step} is out of range for env index {env_index}. Your value for PLR's num_processes may be too high.") from e +- else: +- raise UsageError(f"Step {step} + {steps} = {end_step} is out of range for env index {env_index}. Your value for PLR's num_processes may be too low.") from e + + if mask is not None: + self.masks[step + 1:end_step + 1, env_index].copy_(torch.as_tensor(mask[:, None])) ++ + if obs is not None: + for s in range(step, end_step): + self.obs[s][env_index] = obs[s - step] ++ + if reward is not None: + self.rewards[step:end_step, env_index].copy_(torch.as_tensor(reward[:, None])) ++ + if action_log_dist is not None: + self.action_log_dist[step:end_step, env_index].copy_(torch.as_tensor(action_log_dist[:, None])) ++ + if task is not None: + try: +- task = int(task) ++ int(task[0]) + except TypeError: +- assert isinstance(task, int), f"Provided task must be an integer, got {task} with type {type(task)} instead." +- self.tasks[step:end_step, env_index].copy_(torch.as_tensor(task)) +- else: +- self.env_steps[env_index] += steps +- # Hack for now, we call insert_at_index twice +- while all(self._fill[self.step] == 1): +- self.step = (self.step + 1) % self.buffer_steps +- # Check if we have enough steps to compute a task sampler update +- if self.step == self.num_steps + 1: +- self.should_update = True +- +- def _get_values(self): ++ assert isinstance(task, int), f"Provided task must be an integer, got {task[0]} with type {type(task[0])} instead." ++ self.tasks[step:end_step, env_index].copy_(torch.as_tensor(np.array(task)[:, None])) ++ ++ self.env_steps[env_index] += steps ++ if env_index not in self.ready_buffers and self.env_steps[env_index] >= self.num_steps: ++ self.ready_buffers.add(env_index) ++ ++ def _get_values(self, env_index): + if self._get_value is None: + raise UsageError("Selected strategy requires value predictions. Please provide get_value function.") +- for step in range(self.num_steps): +- values = self._get_value(self.obs[step]) ++ for step in range(0, self.num_steps, self.num_processes): ++ obs = self.obs[step: step + self.num_processes][env_index] ++ values = self._get_value(obs) ++ ++ # Reshape values if necessary + if len(values.shape) == 3: + warnings.warn(f"Value function returned a 3D tensor of shape {values.shape}. Attempting to squeeze last dimension.") + values = torch.squeeze(values, -1) + if len(values.shape) == 1: + warnings.warn(f"Value function returned a 1D tensor of shape {values.shape}. Attempting to unsqueeze last dimension.") + values = torch.unsqueeze(values, -1) +- self.value_preds[step].copy_(values) + +- def after_update(self): ++ self.value_preds[step: step + self.num_processes, env_index].copy_(values) ++ ++ def after_update(self, env_index): + # After consuming the first num_steps of data, remove them and shift the remaining data in the buffer +- self.tasks[0: self.num_steps].copy_(self.tasks[self.num_steps: self.buffer_steps]) +- self.masks[0: self.num_steps].copy_(self.masks[self.num_steps: self.buffer_steps]) +- self.obs[0: self.num_steps][:] = self.obs[self.num_steps: self.buffer_steps][:] ++ self.tasks = self.tasks.roll(-self.num_steps, 0) ++ self.masks = self.masks.roll(-self.num_steps, 0) ++ self.obs[0:][env_index] = self.obs[self.num_steps: self.buffer_steps][env_index] + + if self._requires_value_buffers: +- self.returns[0: self.num_steps].copy_(self.returns[self.num_steps: self.buffer_steps]) +- self.rewards[0: self.num_steps].copy_(self.rewards[self.num_steps: self.buffer_steps]) +- self.value_preds[0: self.num_steps].copy_(self.value_preds[self.num_steps: self.buffer_steps]) ++ self.returns = self.returns.roll(-self.num_steps, 0) ++ self.rewards = self.rewards.roll(-self.num_steps, 0) ++ self.value_preds = self.value_preds.roll(-self.num_steps, 0) + else: +- self.action_log_dist[0: self.num_steps].copy_(self.action_log_dist[self.num_steps: self.buffer_steps]) ++ self.action_log_dist = self.action_log_dist.roll(-self.num_steps, 0) + +- self._fill[0: self.num_steps].copy_(self._fill[self.num_steps: self.buffer_steps]) +- self._fill[self.num_steps: self.buffer_steps].copy_(0) ++ self.env_steps[env_index] -= self.num_steps ++ self.ready_buffers.remove(env_index) + +- self.env_steps = [steps - self.num_steps for steps in self.env_steps] +- self.should_update = False +- self.step = self.step - self.num_steps +- +- def compute_returns(self, gamma, gae_lambda): ++ def compute_returns(self, gamma, gae_lambda, env_index): + assert self._requires_value_buffers, "Selected strategy does not use compute_rewards." +- self._get_values() ++ self._get_values(env_index) + gae = 0 + for step in reversed(range(self.rewards.size(0), self.num_steps)): + delta = ( +- self.rewards[step] +- + gamma * self.value_preds[step + 1] * self.masks[step + 1] +- - self.value_preds[step] ++ self.rewards[step, env_index] ++ + gamma * self.value_preds[step + 1, env_index] * self.masks[step + 1, env_index] ++ - self.value_preds[step, env_index] + ) +- gae = delta + gamma * gae_lambda * self.masks[step + 1] * gae +- self.returns[step] = gae + self.value_preds[step] ++ gae = delta + gamma * gae_lambda * self.masks[step + 1, env_index] * gae ++ self.returns[step, env_index] = gae + self.value_preds[step, env_index] + + + def null(x): +@@ -252,11 +220,15 @@ class PrioritizedLevelReplay(Curriculum): + else: + return [self._task_sampler.sample() for _ in range(k)] + +- def update_on_step(self, obs, rew, term, trunc, info, env_id: int = None) -> None: ++ def update_on_step(self, task, obs, rew, term, trunc, info, env_id: int = None) -> None: + """ + Update the curriculum with the current step results from the environment. + """ + assert env_id is not None, "env_id must be provided for PLR updates." ++ if env_id >= self._num_processes: ++ warnings.warn(f"Env index {env_id} is greater than the number of processes {self._num_processes}. Using index {env_id % self._num_processes} instead.") ++ env_id = env_id % self._num_processes ++ + # Update rollouts + self._rollouts.insert_at_index( + env_id, +@@ -266,14 +238,22 @@ class PrioritizedLevelReplay(Curriculum): + obs=np.array([obs]), + ) + ++ # Update task sampler ++ if env_id in self._rollouts.ready_buffers: ++ self._update_sampler(env_id) ++ + def update_on_step_batch( +- self, step_results: List[Tuple[Any, int, bool, bool, Dict]], env_id: int = None ++ self, step_results: List[Tuple[int, Any, int, bool, bool, Dict]], env_id: int = None + ) -> None: + """ + Update the curriculum with a batch of step results from the environment. + """ + assert env_id is not None, "env_id must be provided for PLR updates." +- obs, rews, terms, truncs, infos = step_results ++ if env_id >= self._num_processes: ++ warnings.warn(f"Env index {env_id} is greater than the number of processes {self._num_processes}. Using index {env_id % self._num_processes} instead.") ++ env_id = env_id % self._num_processes ++ ++ tasks, obs, rews, terms, truncs, infos = step_results + self._rollouts.insert_at_index( + env_id, + mask=np.logical_not(np.logical_or(terms, truncs)), +@@ -281,25 +261,19 @@ class PrioritizedLevelReplay(Curriculum): + reward=rews, + obs=obs, + steps=len(rews), ++ task=tasks, + ) + +- def update_task_progress(self, task: Any, success_prob: float, env_id: int = None) -> None: +- """ +- Update the curriculum with a task and its success probability upon +- success or failure. +- """ +- assert env_id is not None, "env_id must be provided for PLR updates." +- self._rollouts.insert_at_index( +- env_id, +- task=task, +- ) + # Update task sampler +- if self._rollouts.should_update: +- if self._task_sampler.requires_value_buffers: +- self._rollouts.compute_returns(self._gamma, self._gae_lambda) +- self._task_sampler.update_with_rollouts(self._rollouts) +- self._rollouts.after_update() +- self._task_sampler.after_update() ++ if env_id in self._rollouts.ready_buffers: ++ self._update_sampler(env_id) ++ ++ def _update_sampler(self, env_id): ++ if self._task_sampler.requires_value_buffers: ++ self._rollouts.compute_returns(self._gamma, self._gae_lambda, env_id) ++ self._task_sampler.update_with_rollouts(self._rollouts, env_id) ++ self._rollouts.after_update(env_id) ++ self._task_sampler.after_update() + + def _enumerate_tasks(self, space): + assert isinstance(space, Discrete) or isinstance(space, MultiDiscrete), f"Unsupported task space {space}: Expected Discrete or MultiDiscrete" +@@ -312,10 +286,10 @@ class PrioritizedLevelReplay(Curriculum): + """ + Log the task distribution to the provided tensorboard writer. + """ +- super().log_metrics(writer, step) ++ # super().log_metrics(writer, step) + metrics = self._task_sampler.metrics() + writer.add_scalar("curriculum/proportion_seen", metrics["proportion_seen"], step) + writer.add_scalar("curriculum/score", metrics["score"], step) +- for task in list(self.task_space.tasks)[:10]: +- writer.add_scalar(f"curriculum/task_{task - 1}_score", metrics["task_scores"][task - 1], step) +- writer.add_scalar(f"curriculum/task_{task - 1}_staleness", metrics["task_staleness"][task - 1], step) ++ # for task in list(self.task_space.tasks)[:10]: ++ # writer.add_scalar(f"curriculum/task_{task - 1}_score", metrics["task_scores"][task - 1], step) ++ # writer.add_scalar(f"curriculum/task_{task - 1}_staleness", metrics["task_staleness"][task - 1], step) +diff --git a/syllabus/curricula/plr/task_sampler.py b/syllabus/curricula/plr/task_sampler.py +index 15ad485..c1e97a1 100644 +--- a/syllabus/curricula/plr/task_sampler.py ++++ b/syllabus/curricula/plr/task_sampler.py +@@ -73,7 +73,7 @@ class TaskSampler: + 'Must provide action space to PLR if using "policy_entropy", "least_confidence", or "min_margin" strategies' + ) + +- def update_with_rollouts(self, rollouts): ++ def update_with_rollouts(self, rollouts, actor_id=None): + if self.strategy == "random": + return + +@@ -93,7 +93,7 @@ class TaskSampler: + else: + raise ValueError(f"Unsupported strategy, {self.strategy}") + +- self._update_with_rollouts(rollouts, score_function) ++ self._update_with_rollouts(rollouts, score_function, actor_index=actor_id) + + def update_task_score(self, actor_index, task_idx, score, num_steps): + score = self._partial_update_task_score(actor_index, task_idx, score, num_steps, done=True) +@@ -165,14 +165,15 @@ class TaskSampler: + def requires_value_buffers(self): + return self.strategy in ["gae", "value_l1", "one_step_td_error"] + +- def _update_with_rollouts(self, rollouts, score_function): ++ def _update_with_rollouts(self, rollouts, score_function, actor_index=None): + tasks = rollouts.tasks + if not self.requires_value_buffers: + policy_logits = rollouts.action_log_dist + done = ~(rollouts.masks > 0) + total_steps, num_actors = rollouts.tasks.shape[:2] + +- for actor_index in range(num_actors): ++ actors = [actor_index] if actor_index is not None else range(num_actors) ++ for actor_index in actors: + done_steps = done[:, actor_index].nonzero()[:total_steps, 0] + start_t = 0 + +diff --git a/syllabus/curricula/sequential.py b/syllabus/curricula/sequential.py +index baa1263..ec3b8b0 100644 +--- a/syllabus/curricula/sequential.py ++++ b/syllabus/curricula/sequential.py +@@ -177,9 +177,9 @@ class SequentialCurriculum(Curriculum): + if self.current_curriculum.requires_episode_updates: + self.current_curriculum.update_on_episode(episode_return, episode_len, episode_task, env_id) + +- def update_on_step(self, obs, rew, term, trunc, info, env_id=None): ++ def update_on_step(self, task, obs, rew, term, trunc, info, env_id=None): + if self.current_curriculum.requires_step_updates: +- self.current_curriculum.update_on_step(obs, rew, term, trunc, info, env_id) ++ self.current_curriculum.update_on_step(task, obs, rew, term, trunc, info, env_id) + + def update_on_step_batch(self, step_results, env_id=None): + if self.current_curriculum.requires_step_updates: +diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py b/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py +index a6d469e..b848d69 100644 +--- a/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py ++++ b/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py +@@ -14,6 +14,7 @@ import gym as openai_gym + import gymnasium as gym + import numpy as np + import procgen # noqa: F401 ++from procgen import ProcgenEnv + import torch + import torch.nn as nn + import torch.optim as optim +@@ -21,10 +22,10 @@ from shimmy.openai_gym_compatibility import GymV21CompatibilityV0 + from torch.utils.tensorboard import SummaryWriter + + from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum +-from syllabus.curricula import DomainRandomization, LearningProgressCurriculum, CentralizedPrioritizedLevelReplay ++from syllabus.curricula import CentralizedPrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum + from syllabus.examples.models import ProcgenAgent + from syllabus.examples.task_wrappers import ProcgenTaskWrapper +-from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize ++from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs + + + def parse_args(): +@@ -46,6 +47,8 @@ def parse_args(): + help="the entity (team) of wandb's project") + parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + help="weather to capture videos of the agent performances (check out `videos` folder)") ++ parser.add_argument("--logging-dir", type=str, default=".", ++ help="the base directory for logging and wandb storage.") + + # Algorithm specific arguments + parser.add_argument("--env-id", type=str, default="starpilot", +@@ -124,15 +127,15 @@ PROCGEN_RETURN_BOUNDS = { + } + + +-def make_env(env_id, seed, curriculum_components=None, start_level=0, num_levels=1): ++def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): + def thunk(): + env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) + env = GymV21CompatibilityV0(env=env) +- env = ProcgenTaskWrapper(env, env_id, seed=seed) +- if curriculum_components is not None: ++ if curriculum is not None: ++ env = ProcgenTaskWrapper(env, env_id, seed=seed) + env = MultiProcessingSyncWrapper( + env, +- curriculum_components, ++ curriculum.get_components(), + update_on_step=False, + task_space=env.task_space, + ) +@@ -147,36 +150,38 @@ def wrap_vecenv(vecenv): + return vecenv + + +-def level_replay_evaluate( ++def full_level_replay_evaluate( + env_name, + policy, + num_episodes, + device, +- num_levels=0 ++ num_levels=1 # Not used + ): + policy.eval() +- eval_envs = gym.vector.SyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, task_queue, update_queue, num_levels=num_levels) +- for i in range(1) +- ] ++ ++ eval_envs = ProcgenEnv( ++ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=1, start_level=0, distribution_mode="easy", paint_vel_info=False + ) ++ eval_envs = VecExtractDictObs(eval_envs, "rgb") + eval_envs = wrap_vecenv(eval_envs) + +- eval_episode_rewards = [] ++ # Seed environments ++ seeds = [int.from_bytes(os.urandom(3), byteorder="little") for _ in range(num_episodes)] ++ for i, seed in enumerate(seeds): ++ eval_envs.seed(seed, i) ++ + eval_obs, _ = eval_envs.reset() ++ eval_episode_rewards = [-1] * num_episodes + +- while len(eval_episode_rewards) < num_episodes: ++ while -1 in eval_episode_rewards: + with torch.no_grad(): + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + +- eval_obs, _, truncs, terms, infos = eval_envs.step(np.array([eval_action.cpu().numpy()])) +- +- for info in infos: +- if 'episode' in info.keys(): +- eval_episode_rewards.append(info['episode']['r']) ++ eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) ++ for i, info in enumerate(infos): ++ if 'episode' in info.keys() and eval_episode_rewards[i] == -1: ++ eval_episode_rewards[i] = info['episode']['r'] + +- eval_envs.close() + mean_returns = np.mean(eval_episode_rewards) + stddev_returns = np.std(eval_episode_rewards) + env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] +@@ -185,8 +190,7 @@ def level_replay_evaluate( + return mean_returns, stddev_returns, normalized_mean_returns + + +-def fast_level_replay_evaluate( +- eval_envs, ++def level_replay_evaluate( + env_name, + policy, + num_episodes, +@@ -194,9 +198,13 @@ def fast_level_replay_evaluate( + num_levels=0 + ): + policy.eval() +- possible_seeds = np.arange(0, num_levels + 1) +- eval_obs, _ = eval_envs.reset(seed=list(np.random.choice(possible_seeds, size=num_episodes))) + ++ eval_envs = ProcgenEnv( ++ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False ++ ) ++ eval_envs = VecExtractDictObs(eval_envs, "rgb") ++ eval_envs = wrap_vecenv(eval_envs) ++ eval_obs, _ = eval_envs.reset() + eval_episode_rewards = [-1] * num_episodes + + while -1 in eval_episode_rewards: +@@ -231,10 +239,11 @@ if __name__ == "__main__": + name=run_name, + monitor_gym=True, + save_code=True, +- # dir="/fs/nexus-scratch/rsulli/" ++ dir=args.logging_dir + ) +- wandb.run.log_code("./syllabus/examples") +- writer = SummaryWriter(f"./runs/{run_name}") ++ # wandb.run.log_code("./syllabus/examples") ++ ++ writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) + writer.add_text( + "hyperparameters", + "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), +@@ -250,7 +259,7 @@ if __name__ == "__main__": + print("Device:", device) + + # Curriculum setup +- task_queue = update_queue = None ++ curriculum = None + if args.curriculum: + sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") + sample_env = GymV21CompatibilityV0(env=sample_env) +@@ -273,6 +282,16 @@ if __name__ == "__main__": + elif args.curriculum_method == "lp": + print("Using learning progress.") + curriculum = LearningProgressCurriculum(sample_env.task_space) ++ elif args.curriculum_method == "sq": ++ print("Using sequential curriculum.") ++ curricula = [] ++ stopping = [] ++ for i in range(199): ++ curricula.append(i + 1) ++ stopping.append("steps>=50000") ++ curricula.append(list(range(i + 1))) ++ stopping.append("steps>=50000") ++ curriculum = SequentialCurriculum(curricula, stopping[:-1], sample_env.task_space) + else: + raise ValueError(f"Unknown curriculum method {args.curriculum_method}") + curriculum = make_multiprocessing_curriculum(curriculum) +@@ -285,7 +304,7 @@ if __name__ == "__main__": + make_env( + args.env_id, + args.seed + i, +- curriculum_components=curriculum.get_components() if args.curriculum else None, ++ curriculum=curriculum if args.curriculum else None, + num_levels=1 if args.curriculum else 0 + ) + for i in range(args.num_envs) +@@ -293,22 +312,6 @@ if __name__ == "__main__": + ) + envs = wrap_vecenv(envs) + +- test_eval_envs = gym.vector.AsyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, num_levels=0) +- for i in range(args.num_eval_episodes) +- ] +- ) +- test_eval_envs = wrap_vecenv(test_eval_envs) +- +- train_eval_envs = gym.vector.AsyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, num_levels=200) +- for i in range(args.num_eval_episodes) +- ] +- ) +- train_eval_envs = wrap_vecenv(train_eval_envs) +- + assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" + print("Creating agent") + agent = ProcgenAgent( +@@ -369,6 +372,8 @@ if __name__ == "__main__": + print(f"global_step={global_step}, episodic_return={item['episode']['r']}") + writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) ++ if curriculum is not None: ++ curriculum.log_metrics(writer, global_step) + break + + # Syllabus curriculum update +@@ -388,8 +393,6 @@ if __name__ == "__main__": + }, + } + curriculum.update(update) +- #if args.curriculum: +- # curriculum.log_metrics(writer, global_step) + + # bootstrap value if not done + with torch.no_grad(): +@@ -487,8 +490,18 @@ if __name__ == "__main__": + explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y + + # Evaluate agent +- mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = fast_level_replay_evaluate(test_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=0) +- mean_train_returns, stddev_train_returns, normalized_mean_train_returns = fast_level_replay_evaluate(train_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=200) ++ mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 ++ ) ++ full_mean_eval_returns, full_stddev_eval_returns, full_normalized_mean_eval_returns = full_level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 ++ ) ++ mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 ++ ) ++ full_mean_train_returns, full_stddev_train_returns, full_normalized_mean_train_returns = full_level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 ++ ) + + # TRY NOT TO MODIFY: record rewards for plotting purposes + writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) +@@ -502,12 +515,21 @@ if __name__ == "__main__": + writer.add_scalar("losses/explained_variance", explained_var, global_step) + print("SPS:", int(global_step / (time.time() - start_time))) + writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) ++ + writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) +- writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/stddev_eval_return", stddev_eval_returns, global_step) ++ writer.add_scalar("test_eval/full_mean_episode_return", full_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/full_normalized_mean_eval_return", full_normalized_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/full_stddev_eval_return", full_stddev_eval_returns, global_step) ++ + writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) +- writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) ++ writer.add_scalar("train_eval/stddev_train_return", stddev_train_returns, global_step) ++ writer.add_scalar("train_eval/full_mean_episode_return", full_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/full_normalized_mean_train_return", full_normalized_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/full_stddev_train_return", full_stddev_train_returns, global_step) ++ + writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) + + envs.close() +diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py +index e13c22e..70a27f6 100644 +--- a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py ++++ b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py +@@ -14,6 +14,7 @@ import gym as openai_gym + import gymnasium as gym + import numpy as np + import procgen # noqa: F401 ++from procgen import ProcgenEnv + import torch + import torch.nn as nn + import torch.optim as optim +@@ -23,8 +24,8 @@ from torch.utils.tensorboard import SummaryWriter + from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum + from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum + from syllabus.examples.models import ProcgenAgent +-from syllabus.examples.task_wrappers import ProcgenTaskWrapper +-from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize ++from syllabus.examples.task_wrappers import ProcgenTaskWrapper, MinigridTaskWrapper ++from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs + + + def parse_args(): +@@ -126,18 +127,17 @@ PROCGEN_RETURN_BOUNDS = { + } + + +-def make_env(env_id, seed, curriculum_components=None, start_level=0, num_levels=1): ++def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): + def thunk(): + env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) + env = GymV21CompatibilityV0(env=env) +- env = ProcgenTaskWrapper(env, env_id, seed=seed) +- if curriculum_components is not None: ++ if curriculum is not None: ++ env = ProcgenTaskWrapper(env, env_id, seed=seed) + env = MultiProcessingSyncWrapper( + env, +- curriculum_components, ++ curriculum.get_components(), + update_on_step=False, + task_space=env.task_space, +- buffer_size=4, + ) + return env + return thunk +@@ -150,7 +150,7 @@ def wrap_vecenv(vecenv): + return vecenv + + +-def level_replay_evaluate( ++def slow_level_replay_evaluate( + env_name, + policy, + num_episodes, +@@ -158,28 +158,24 @@ def level_replay_evaluate( + num_levels=0 + ): + policy.eval() +- eval_envs = gym.vector.SyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, task_queue, update_queue, num_levels=num_levels) +- for i in range(1) +- ] ++ ++ eval_envs = ProcgenEnv( ++ num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False + ) ++ eval_envs = VecExtractDictObs(eval_envs, "rgb") + eval_envs = wrap_vecenv(eval_envs) +- +- eval_episode_rewards = [] + eval_obs, _ = eval_envs.reset() ++ eval_episode_rewards = [] + + while len(eval_episode_rewards) < num_episodes: + with torch.no_grad(): + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + +- eval_obs, _, truncs, terms, infos = eval_envs.step(np.array([eval_action.cpu().numpy()])) +- +- for info in infos: ++ eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) ++ for i, info in enumerate(infos): + if 'episode' in info.keys(): + eval_episode_rewards.append(info['episode']['r']) + +- eval_envs.close() + mean_returns = np.mean(eval_episode_rewards) + stddev_returns = np.std(eval_episode_rewards) + env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] +@@ -188,8 +184,7 @@ def level_replay_evaluate( + return mean_returns, stddev_returns, normalized_mean_returns + + +-def fast_level_replay_evaluate( +- eval_envs, ++def level_replay_evaluate( + env_name, + policy, + num_episodes, +@@ -198,15 +193,12 @@ def fast_level_replay_evaluate( + ): + policy.eval() + +- # Choose evaluation seeds +- if num_levels == 0: +- seeds = np.random.randint(0, 2 ** 16 - 1, size=num_episodes) +- else: +- seeds = np.random.choice(np.arange(0, num_levels), size=num_episodes) +- +- seed_envs = [(int(seed), env) for seed, env in zip(seeds, range(num_episodes))] +- eval_obs, _ = eval_envs.reset(seed=seed_envs) +- ++ eval_envs = ProcgenEnv( ++ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False ++ ) ++ eval_envs = VecExtractDictObs(eval_envs, "rgb") ++ eval_envs = wrap_vecenv(eval_envs) ++ eval_obs, _ = eval_envs.reset() + eval_episode_rewards = [-1] * num_episodes + + while -1 in eval_episode_rewards: +@@ -251,9 +243,9 @@ if __name__ == "__main__": + save_code=True, + dir=args.logging_dir + ) +- wandb.run.log_code(os.path.join(args.logging_dir, "/syllabus/examples")) ++ # wandb.run.log_code("./syllabus/examples") + +- writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) ++ writer = SummaryWriter(os.path.join(args.logging_dir, f"./runs/{run_name}")) + writer.add_text( + "hyperparameters", + "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), +@@ -273,7 +265,9 @@ if __name__ == "__main__": + if args.curriculum: + sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") + sample_env = GymV21CompatibilityV0(env=sample_env) +- sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) ++ # code to edit ++ # sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) ++ sample_env = MinigridTaskWrapper(sample_env, args.env_id, seed=args.seed) + + # Intialize Curriculum Method + if args.curriculum_method == "plr": +@@ -316,7 +310,7 @@ if __name__ == "__main__": + make_env( + args.env_id, + args.seed + i, +- curriculum_components=curriculum.get_components() if args.curriculum else None, ++ curriculum=curriculum if args.curriculum else None, + num_levels=1 if args.curriculum else 0 + ) + for i in range(args.num_envs) +@@ -324,22 +318,6 @@ if __name__ == "__main__": + ) + envs = wrap_vecenv(envs) + +- test_eval_envs = gym.vector.AsyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, num_levels=0) +- for i in range(args.num_eval_episodes) +- ] +- ) +- test_eval_envs = wrap_vecenv(test_eval_envs) +- +- train_eval_envs = gym.vector.AsyncVectorEnv( +- [ +- make_env(args.env_id, args.seed + i, num_levels=200) +- for i in range(args.num_eval_episodes) +- ] +- ) +- train_eval_envs = wrap_vecenv(train_eval_envs) +- + assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" + print("Creating agent") + agent = ProcgenAgent( +@@ -500,8 +478,18 @@ if __name__ == "__main__": + explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y + + # Evaluate agent +- mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = fast_level_replay_evaluate(test_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=0) +- mean_train_returns, stddev_train_returns, normalized_mean_train_returns = fast_level_replay_evaluate(train_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=200) ++ mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 ++ ) ++ slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 ++ ) ++ mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 ++ ) ++ slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( ++ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 ++ ) + + # TRY NOT TO MODIFY: record rewards for plotting purposes + writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) +@@ -515,12 +503,21 @@ if __name__ == "__main__": + writer.add_scalar("losses/explained_variance", explained_var, global_step) + print("SPS:", int(global_step / (time.time() - start_time))) + writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) ++ + writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) + writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) ++ writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) ++ + writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) + writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) ++ writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) ++ + writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) + + envs.close() +diff --git a/syllabus/examples/utils/vecenv.py b/syllabus/examples/utils/vecenv.py +index 6e5a0a9..af3b187 100644 +--- a/syllabus/examples/utils/vecenv.py ++++ b/syllabus/examples/utils/vecenv.py +@@ -1,7 +1,6 @@ + import time + from collections import deque + +-import gym + import numpy as np + + +@@ -154,12 +153,20 @@ class VecEnvObservationWrapper(VecEnvWrapper): + pass + + def reset(self): +- obs, infos = self.venv.reset() ++ outputs = self.venv.reset() ++ if len(outputs) == 2: ++ obs, infos = outputs ++ else: ++ obs, infos = outputs, {} + return self.process(obs), infos + + def step_wait(self): +- print(self.venv) +- obs, rews, terms, truncs, infos = self.venv.step_wait() ++ env_outputs = self.venv.step_wait() ++ if len(env_outputs) == 4: ++ obs, rews, terms, infos = env_outputs ++ truncs = np.zeros_like(terms) ++ else: ++ obs, rews, terms, truncs, infos = env_outputs + return self.process(obs), rews, terms, truncs, infos + + +@@ -209,7 +216,10 @@ class VecNormalize(VecEnvWrapper): + + def reset(self, seed=None): + self.ret = np.zeros(self.num_envs) +- obs, infos = self.venv.reset(seed=seed) ++ if seed is not None: ++ obs, infos = self.venv.reset(seed=seed) ++ else: ++ obs, infos = self.venv.reset() + return self._obfilt(obs), infos + + +@@ -228,7 +238,10 @@ class VecMonitor(VecEnvWrapper): + self.eplen_buf = deque([], maxlen=keep_buf) + + def reset(self, seed=None): +- obs, infos = self.venv.reset(seed=seed) ++ if seed is not None: ++ obs, infos = self.venv.reset(seed=seed) ++ else: ++ obs, infos = self.venv.reset() + self.eprets = np.zeros(self.num_envs, 'f') + self.eplens = np.zeros(self.num_envs, 'i') + return obs, infos +@@ -239,7 +252,8 @@ class VecMonitor(VecEnvWrapper): + self.eprets += rews + self.eplens += 1 + # Convert dict of lists to list of dicts +- infos = [dict(zip(infos, t)) for t in zip(*infos.values())] ++ if isinstance(infos, dict): ++ infos = [dict(zip(infos, t)) for t in zip(*infos.values())] + newinfos = list(infos[:]) + for i in range(len(dones)): + if dones[i]: +diff --git a/syllabus/task_space/task_space.py b/syllabus/task_space/task_space.py +index 316e2f2..1ef674b 100644 +--- a/syllabus/task_space/task_space.py ++++ b/syllabus/task_space/task_space.py +@@ -7,20 +7,53 @@ from gymnasium.spaces import Box, Dict, Discrete, MultiBinary, MultiDiscrete, Sp + + class TaskSpace(): + def __init__(self, gym_space: Union[Space, int], tasks=None): +- if isinstance(gym_space, int): +- # Syntactic sugar for discrete space +- gym_space = Discrete(gym_space) ++ ++ if not isinstance(gym_space, Space): ++ gym_space = self._create_gym_space(gym_space) + + self.gym_space = gym_space + +- # Autogenerate task names for discrete spaces +- if isinstance(gym_space, Discrete): +- if tasks is None: +- tasks = range(gym_space.n) ++ # Autogenerate task names ++ if tasks is None: ++ tasks = self._generate_task_names(gym_space) + + self._tasks = set(tasks) if tasks is not None else None + self._encoder, self._decoder = self._make_task_encoder(gym_space, tasks) + ++ def _create_gym_space(self, gym_space): ++ if isinstance(gym_space, int): ++ # Syntactic sugar for discrete space ++ gym_space = Discrete(gym_space) ++ elif isinstance(gym_space, tuple): ++ # Syntactic sugar for discrete space ++ gym_space = MultiDiscrete(gym_space) ++ elif isinstance(gym_space, list): ++ # Syntactic sugar for tuple space ++ spaces = [] ++ for i, value in enumerate(gym_space): ++ spaces[i] = self._create_gym_space(value) ++ gym_space = Tuple(spaces) ++ elif isinstance(gym_space, dict): ++ # Syntactic sugar for dict space ++ spaces = {} ++ for key, value in gym_space.items(): ++ spaces[key] = self._create_gym_space(value) ++ gym_space = Dict(spaces) ++ return gym_space ++ ++ def _generate_task_names(self, gym_space): ++ if isinstance(gym_space, Discrete): ++ tasks = tuple(range(gym_space.n)) ++ elif isinstance(gym_space, MultiDiscrete): ++ tasks = [tuple(range(dim)) for dim in gym_space.nvec] ++ elif isinstance(gym_space, Tuple): ++ tasks = [self._generate_task_names(value) for value in gym_space.spaces] ++ elif isinstance(gym_space, Dict): ++ tasks = {key: tuple(self._generate_task_names(value)) for key, value in gym_space.spaces.items()} ++ else: ++ tasks = None ++ return tasks ++ + def _make_task_encoder(self, space, tasks): + if isinstance(space, Discrete): + assert space.n == len(tasks), f"Number of tasks ({space.n}) must match number of discrete options ({len(tasks)})" +@@ -28,14 +61,46 @@ class TaskSpace(): + self._decode_map = {i: task for i, task in enumerate(tasks)} + encoder = lambda task: self._encode_map[task] if task in self._encode_map else None + decoder = lambda task: self._decode_map[task] if task in self._decode_map else None ++ ++ elif isinstance(space, Box): ++ encoder = lambda task: task if space.contains(np.asarray(task, dtype=space.dtype)) else None ++ decoder = lambda task: task if space.contains(np.asarray(task, dtype=space.dtype)) else None + elif isinstance(space, Tuple): +- for i, task in enumerate(tasks): +- assert self.count_tasks(space.spaces[i]) == len(task), "Each task must have number of components equal to Tuple space length. Got {len(task)} components and space length {self.count_tasks(space.spaces[i])}." ++ ++ assert len(space.spaces) == len(tasks), f"Number of task ({len(space.spaces)})must match options in Tuple ({len(tasks)})" + results = [list(self._make_task_encoder(s, t)) for (s, t) in zip(space.spaces, tasks)] + encoders = [r[0] for r in results] + decoders = [r[1] for r in results] + encoder = lambda task: [e(t) for e, t in zip(encoders, task)] + decoder = lambda task: [d(t) for d, t in zip(decoders, task)] ++ ++ elif isinstance(space, MultiDiscrete): ++ assert len(space.nvec) == len(tasks), f"Number of steps in a tasks ({len(space.nvec)}) must match number of discrete options ({len(tasks)})" ++ ++ combinations = [p for p in itertools.product(*tasks)] ++ encode_map = {task: i for i, task in enumerate(combinations)} ++ decode_map = {i: task for i, task in enumerate(combinations)} ++ ++ encoder = lambda task: encode_map[task] if task in encode_map else None ++ decoder = lambda task: decode_map[task] if task in decode_map else None ++ ++ elif isinstance(space, Dict): ++ ++ def helper(task, spaces, tasks, action="encode"): ++ # Iteratively encodes or decodes each space in the dictionary ++ output = {} ++ if (isinstance(spaces, dict) or isinstance(spaces, Dict)): ++ for key, value in spaces.items(): ++ if (isinstance(value, dict) or isinstance(value, Dict)): ++ temp = helper(task[key], value, tasks[key], action) ++ output.update({key: temp}) ++ else: ++ encoder, decoder = self._make_task_encoder(value, tasks[key]) ++ output[key] = encoder(task[key]) if action == "encode" else decoder(task[key]) ++ return output ++ ++ encoder = lambda task: helper(task, space.spaces, tasks, "encode") ++ decoder = lambda task: helper(task, space.spaces, tasks, "decode") + else: + encoder = lambda task: task + decoder = lambda task: task +@@ -152,6 +217,7 @@ class TaskSpace(): + return Discrete(self.gym_space.n + amount) + + def sample(self): ++ assert isinstance(self.gym_space, Discrete) or isinstance(self.gym_space, Box) or isinstance(self.gym_space, Dict) or isinstance(self.gym_space, Tuple) + return self.decode(self.gym_space.sample()) + + def list_tasks(self): +diff --git a/syllabus/task_space/test_task_space.py b/syllabus/task_space/test_task_space.py +index 0ec6b4e..109d0a7 100644 +--- a/syllabus/task_space/test_task_space.py ++++ b/syllabus/task_space/test_task_space.py +@@ -2,33 +2,148 @@ import gymnasium as gym + from syllabus.task_space import TaskSpace + + if __name__ == "__main__": ++ # Discrete Tests + task_space = TaskSpace(gym.spaces.Discrete(3), ["a", "b", "c"]) ++ + assert task_space.encode("a") == 0, f"Expected 0, got {task_space.encode('a')}" + assert task_space.encode("b") == 1, f"Expected 1, got {task_space.encode('b')}" + assert task_space.encode("c") == 2, f"Expected 2, got {task_space.encode('c')}" +- assert task_space.encode("d") == None, f"Expected None, got {task_space.encode('d')}" ++ assert task_space.encode("d") is None, f"Expected None, got {task_space.encode('d')}" + + assert task_space.decode(0) == "a", f"Expected a, got {task_space.decode(0)}" + assert task_space.decode(1) == "b", f"Expected b, got {task_space.decode(1)}" + assert task_space.decode(2) == "c", f"Expected c, got {task_space.decode(2)}" +- assert task_space.decode(3) == None, f"Expected None, got {task_space.decode(3)}" ++ assert task_space.decode(3) is None, f"Expected None, got {task_space.decode(3)}" + print("Discrete tests passed!") + ++ # MultiDiscrete Tests ++ task_space = TaskSpace(gym.spaces.MultiDiscrete([3, 2]), [("a", "b", "c"), (1, 0)]) ++ ++ assert task_space.encode(('a', 1)) == 0, f"Expected 0, got {task_space.encode(('a', 1))}" ++ assert task_space.encode(('b', 0)) == 3, f"Expected 3, got {task_space.encode(('b', 0))}" ++ assert task_space.encode(('c', 1)) == 4, f"Expected 4, got {task_space.encode(('c', 1))}" ++ ++ assert task_space.decode(3) == ('b', 0), f"Expected ('b', 0), got {task_space.decode(3)}" ++ assert task_space.decode(5) == ('c', 0), f"Expected ('c', 0), got {task_space.decode(5)}" ++ print("MultiDiscrete tests passed!") ++ ++ # Box Tests + task_space = TaskSpace(gym.spaces.Box(low=0, high=1, shape=(2,)), [(0, 0), (0, 1), (1, 0), (1, 1)]) ++ + assert task_space.encode([0.0, 0.0]) == [0.0, 0.0], f"Expected [0.0, 0.0], got {task_space.encode([0.0, 0.0])}" + assert task_space.encode([0.0, 0.1]) == [0.0, 0.1], f"Expected [0.0, 0.1], got {task_space.encode([0.0, 0.1])}" + assert task_space.encode([0.1, 0.1]) == [0.1, 0.1], f"Expected [0.1, 0.1], got {task_space.encode([0.1, 0.1])}" + assert task_space.encode([1.0, 0.1]) == [1.0, 0.1], f"Expected [1.0, 0.1], got {task_space.encode([1.0, 0.1])}" + assert task_space.encode([1.0, 1.0]) == [1.0, 1.0], f"Expected [1.0, 1.0], got {task_space.encode([1.0, 1.0])}" +- assert task_space.encode([1.2, 1.0]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" +- assert task_space.encode([1.0, 1.2]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" +- assert task_space.encode([-0.1, 1.0]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" ++ assert task_space.encode([1.2, 1.0]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" ++ assert task_space.encode([1.0, 1.2]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" ++ assert task_space.encode([-0.1, 1.0]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" + + assert task_space.decode([1.0, 1.0]) == [1.0, 1.0], f"Expected [1.0, 1.0], got {task_space.decode([1.0, 1.0])}" + assert task_space.decode([0.1, 0.1]) == [0.1, 0.1], f"Expected [0.1, 0.1], got {task_space.decode([0.1, 0.1])}" +- assert task_space.decode([-0.1, 1.0]) == None, f"Expected None, got {task_space.decode([1.2, 1.0])}" ++ assert task_space.decode([-0.1, 1.0]) is None, f"Expected None, got {task_space.decode([1.2, 1.0])}" + print("Box tests passed!") + ++ # Tuple Tests ++ task_spaces = (gym.spaces.MultiDiscrete([3, 2]), gym.spaces.Discrete(3)) ++ task_names = ((("a", "b", "c"), (1, 0)), ("X", "Y", "Z")) ++ task_space = TaskSpace(gym.spaces.Tuple(task_spaces), task_names) ++ ++ assert task_space.encode((('a', 0), 'Y')) == [1, 1], f"Expected 0, got {task_space.encode((('a', 1),'Y'))}" ++ assert task_space.decode([0, 1]) == [('a', 1), 'Y'], f"Expected 0, got {task_space.decode([0, 1])}" ++ print("Tuple tests passed!") ++ ++ # Dictionary Tests ++ task_spaces = gym.spaces.Dict({ ++ "ext_controller": gym.spaces.MultiDiscrete([5, 2, 2]), ++ "inner_state": gym.spaces.Dict( ++ { ++ "charge": gym.spaces.Discrete(10), ++ "system_checks": gym.spaces.Tuple((gym.spaces.MultiDiscrete([3, 2]), gym.spaces.Discrete(3))), ++ "job_status": gym.spaces.Dict( ++ { ++ "task": gym.spaces.Discrete(5), ++ "progress": gym.spaces.Box(low=0, high=1, shape=(2,)), ++ } ++ ), ++ } ++ ), ++ }) ++ task_names = { ++ "ext_controller": [("a", "b", "c", "d", "e"), (1, 0), ("X", "Y")], ++ "inner_state": { ++ "charge": [0, 1, 13, 3, 94, 35, 6, 37, 8, 9], ++ "system_checks": ((("a", "b", "c"), (1, 0)), ("X", "Y", "Z")), ++ "job_status": { ++ "task": ["A", "B", "C", "D", "E"], ++ "progress": [(0, 0), (0, 1), (1, 0), (1, 1)], ++ } ++ } ++ } ++ task_space = TaskSpace(task_spaces, task_names) ++ ++ test_val = { ++ "ext_controller": ('b', 1, 'X'), ++ 'inner_state': { ++ 'charge': 1, ++ 'system_checks': [('a', 0), 'Y'], ++ 'job_status': {'task': 'C', 'progress': [0.0, 0.0]} ++ } ++ } ++ decode_val = { ++ "ext_controller": 4, ++ "inner_state": { ++ "charge": 1, ++ "system_checks": [1, 1], ++ "job_status": {"progress": [0.0, 0.0], "task": 2}, ++ }, ++ } ++ ++ assert task_space.encode(test_val) == decode_val, f"Expected {decode_val}, \n but got {task_space.encode(test_val)}" ++ assert task_space.decode(decode_val) == test_val, f"Expected {test_val}, \n but got {task_space.decode(decode_val)}" ++ ++ test_val_2 = { ++ "ext_controller": ("e", 1, "Y"), ++ "inner_state": { ++ "charge": 37, ++ "system_checks": [("b", 0), "Z"], ++ "job_status": {"progress": [0.0, 0.1], "task": "D"}, ++ }, ++ } ++ decode_val_2 = { ++ "ext_controller": 17, ++ "inner_state": { ++ "charge": 7, ++ "system_checks": [3, 2], ++ "job_status": {"progress": [0.0, 0.1], "task": 3}, ++ }, ++ } ++ ++ assert task_space.encode(test_val_2) == decode_val_2, f"Expected {decode_val_2}, \n but got {task_space.encode(test_val_2)}" ++ assert task_space.decode(decode_val_2) == test_val_2, f"Expected {test_val_2}, \n but got {task_space.decode(decode_val_2)}" ++ ++ test_val_3 = { ++ "ext_controller": ("e", 1, "X"), ++ "inner_state": { ++ "charge": 8, ++ "system_checks": [("c", 0), "X"], ++ "job_status": {"progress": [0.5, 0.1], "task": "E"}, ++ }, ++ } ++ decode_val_3 = { ++ "ext_controller": 16, ++ "inner_state": { ++ "charge": 8, ++ "system_checks": [5, 0], ++ "job_status": {"progress": [0.5, 0.1], "task": 4}, ++ }, ++ } ++ ++ assert task_space.encode(test_val_3) == decode_val_3, f"Expected {decode_val_3}, \n but got {task_space.encode(test_val_3)}" ++ assert task_space.decode(decode_val_3) == test_val_3, f"Expected {test_val_3}, \n but got {task_space.decode(decode_val_3)}" ++ ++ print("Dictionary tests passed!") ++ + # Test syntactic sugar + task_space = TaskSpace(3) + assert task_space.encode(0) == 0, f"Expected 0, got {task_space.encode(0)}" +@@ -36,4 +151,32 @@ if __name__ == "__main__": + assert task_space.encode(2) == 2, f"Expected 2, got {task_space.encode(2)}" + assert task_space.encode(3) is None, f"Expected None, got {task_space.encode(3)}" + ++ task_space = TaskSpace((2, 4)) ++ assert task_space.encode((0, 0)) == 0, f"Expected 0, got {task_space.encode((0, 0))}" ++ assert task_space.encode((0, 1)) == 1, f"Expected 1, got {task_space.encode((0, 1))}" ++ assert task_space.encode((1, 0)) == 4, f"Expected 2, got {task_space.encode((1, 0))}" ++ assert task_space.encode((3, 3)) is None, f"Expected None, got {task_space.encode((3, 3))}" ++ ++ task_space = TaskSpace((2, 4)) ++ assert task_space.encode((0, 0)) == 0, f"Expected 0, got {task_space.encode((0, 0))}" ++ assert task_space.encode((0, 1)) == 1, f"Expected 1, got {task_space.encode((0, 1))}" ++ assert task_space.encode((1, 0)) == 4, f"Expected 2, got {task_space.encode((1, 0))}" ++ assert task_space.encode((3, 3)) is None, f"Expected None, got {task_space.encode((3, 3))}" ++ ++ task_space = TaskSpace({"map": 5, "level": (4, 10), "difficulty": 3}) ++ ++ encoding = task_space.encode({"map": 0, "level": (0, 0), "difficulty": 0}) ++ expected = {"map": 0, "level": 0, "difficulty": 0} ++ ++ encoding = task_space.encode({"map": 4, "level": (3, 9), "difficulty": 2}) ++ expected = {"map": 4, "level": 39, "difficulty": 2} ++ assert encoding == expected, f"Expected {expected}, got {encoding}" ++ ++ encoding = task_space.encode({"map": 2, "level": (2, 0), "difficulty": 1}) ++ expected = {"map": 2, "level": 20, "difficulty": 1} ++ assert encoding == expected, f"Expected {expected}, got {encoding}" ++ ++ encoding = task_space.encode({"map": 5, "level": (2, 11), "difficulty": -1}) ++ expected = {"map": None, "level": None, "difficulty": None} ++ assert encoding == expected, f"Expected {expected}, got {encoding}" + print("All tests passed!") +diff --git a/syllabus/tests/utils.py b/syllabus/tests/utils.py +index 314a29c..98bac82 100644 +--- a/syllabus/tests/utils.py ++++ b/syllabus/tests/utils.py +@@ -57,7 +57,7 @@ def run_episode(env, new_task=None, curriculum=None, env_id=0): + action = env.action_space.sample() + obs, rew, term, trunc, info = env.step(action) + if curriculum and curriculum.requires_step_updates: +- curriculum.update_on_step(obs, rew, term, trunc, info, env_id=env_id) ++ curriculum.update_on_step(env.task_space.encode(env.task), obs, rew, term, trunc, info, env_id=env_id) + curriculum.update_task_progress(env.task_space.encode(env.task), info["task_completion"], env_id=env_id) + ep_rew += rew + ep_len += 1 +@@ -87,7 +87,7 @@ def run_set_length(env, curriculum=None, episodes=None, steps=None, env_id=0, en + action = env.action_space.sample() + obs, rew, term, trunc, info = env.step(action) + if curriculum and curriculum.requires_step_updates: +- curriculum.update_on_step(obs, rew, term, trunc, info, env_id=env_id) ++ curriculum.update_on_step(env.task_space.encode(env.task), obs, rew, term, trunc, info, env_id=env_id) + curriculum.update_task_progress(env.task_space.encode(env.task), info["task_completion"], env_id=env_id) + ep_rew += rew + ep_len += 1 +diff --git a/tests/multiprocessing_smoke_tests.py b/tests/multiprocessing_smoke_tests.py +index 9db9f47..b788179 100644 +--- a/tests/multiprocessing_smoke_tests.py ++++ b/tests/multiprocessing_smoke_tests.py +@@ -21,23 +21,23 @@ nethack_env = create_nethack_env() + cartpole_env = create_cartpole_env() + + curricula = [ +- (NoopCurriculum, create_nethack_env, (NetHackScore, nethack_env.task_space), {}), +- (DomainRandomization, create_nethack_env, (nethack_env.task_space,), {}), +- # (LearningProgressCurriculum, create_nethack_env, (nethack_env.task_space,), {}), +- (CentralizedPrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space,), {"device": "cpu", "suppress_usage_warnings": True, "num_processes": N_ENVS}), +- (PrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space, nethack_env.observation_space), { +- "get_value": get_test_values, +- "device": "cpu", +- "num_processes": N_ENVS, +- "num_steps": 2048 +- }), +- (SimpleBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), {}), +- (AnnealingBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), { +- 'start_values': [-0.02, 0.02], +- 'end_values': [-0.3, 0.3], +- 'total_steps': [10] +- }), +- (SequentialCurriculum, create_nethack_env, ([CentralizedPrioritizedLevelReplay(nethack_env.task_space, device="cpu", suppress_usage_warnings=True, num_processes=N_ENVS), PrioritizedLevelReplay(nethack_env.task_space, nethack_env.observation_space, get_value=get_test_values, device="cpu", num_processes=N_ENVS, num_steps=2048), NetHackScore, [NetHackScout, NetHackStaircase]], ["steps>1000", "episodes>=50", "tasks>20"], nethack_env.task_space), {}), ++ (NoopCurriculum, create_nethack_env, (NetHackScore, nethack_env.task_space), {}), ++ (DomainRandomization, create_nethack_env, (nethack_env.task_space,), {}), ++ # (LearningProgressCurriculum, create_nethack_env, (nethack_env.task_space,), {}), ++ (CentralizedPrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space,), {"device": "cpu", "suppress_usage_warnings": True, "num_processes": N_ENVS}), ++ (PrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space, nethack_env.observation_space), { ++ "get_value": get_test_values, ++ "device": "cpu", ++ "num_processes": N_ENVS, ++ "num_steps": 2048 ++ }), ++ (SimpleBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), {}), ++ (AnnealingBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), { ++ 'start_values': [-0.02, 0.02], ++ 'end_values': [-0.3, 0.3], ++ 'total_steps': [10] ++ }), ++ (SequentialCurriculum, create_nethack_env, ([CentralizedPrioritizedLevelReplay(nethack_env.task_space, device="cpu", suppress_usage_warnings=True, num_processes=N_ENVS), PrioritizedLevelReplay(nethack_env.task_space, nethack_env.observation_space, get_value=get_test_values, device="cpu", num_processes=N_ENVS, num_steps=2048), NetHackScore, [NetHackScout, NetHackStaircase]], ["steps>1000", "episodes>=50", "tasks>20"], nethack_env.task_space), {}), + ] + + test_names = [curriculum_args[0].__name__ for curriculum_args in curricula] diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/wandb-metadata.json b/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/wandb-metadata.json new file mode 100644 index 00000000..40a4901d --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/wandb-metadata.json @@ -0,0 +1,167 @@ +{ + "os": "Linux-3.10.0-1160.11.1.el7.x86_64-x86_64-with-glibc2.10", + "python": "3.8.5", + "heartbeatAt": "2024-04-23T04:14:00.420002", + "startedAt": "2024-04-23T04:13:59.807444", + "docker": null, + "cuda": "10.1.243", + "args": [ + "--curriculum", + "True", + "--track", + "True", + "--env-id", + "bigfish" + ], + "state": "running", + "program": "cleanrl_procgen_plr.py", + "codePathLocal": "cleanrl_procgen_plr.py", + "codePath": "syllabus/examples/training_scripts/cleanrl_procgen_plr.py", + "git": { + "remote": "https://github.com/RoseyGreenBlue/Syllabus.git", + "commit": "63dc8f62e4d9d567eb92bb2f6c2bb186a0dc8ffb" + }, + "email": "djhaayusv04@gmail.com", + "root": "/data/averma/MARL/Syllabus", + "host": "f411843fc70b", + "username": "root", + "executable": "/home/user/miniconda/envs/test2_py/bin/python", + "cpu_count": 12, + "cpu_count_logical": 24, + "cpu_freq": { + "current": 1261.035125, + "min": 1200.0, + "max": 3700.0 + }, + "cpu_freq_per_core": [ + { + "current": 1500.573, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1204.028, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1199.877, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1385.607, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1783.215, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1370.458, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.085, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.292, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1216.064, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.292, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.5, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.292, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1441.638, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1207.141, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1397.229, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1199.877, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1499.951, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1204.858, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.292, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1202.575, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1200.085, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1244.079, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1243.872, + "min": 1200.0, + "max": 3700.0 + }, + { + "current": 1210.253, + "min": 1200.0, + "max": 3700.0 + } + ], + "disk": { + "/": { + "total": 5952.626953125, + "used": 988.7802200317383 + } + }, + "memory": { + "total": 251.63711166381836 + } +} diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/wandb-summary.json b/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/wandb-summary.json new file mode 100644 index 00000000..4ac1ba99 --- /dev/null +++ b/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 3}} \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/run-4m0uhqaw.wandb b/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/run-4m0uhqaw.wandb new file mode 100644 index 0000000000000000000000000000000000000000..574c4e238df8ee3afeed73d09d49399c09331206 GIT binary patch literal 2455 zcmbVOON$&;6z)6yNX<>pGv;AMAr>fuA-C(@)k6q6o2FK(52|9>gwrCJ0RE$JzeL!=k=X)?y2q9 zpWFHQ>&Cl{n=keoNI(-Li5E=YGhN3pOqn|Rpl@1^=Xk!R4F&_|yRvs5HNNd)Ea;J3 zjNYKPT0i~yecD38(gex>tu1vmYd9Jjw?prCYTZNkg}EhB$PdDhIz=u$=wJy8Y(KSQ z8nFpFEx941aTeNXmbgO}+i93d7~@vXm^VR4!B62+JjvXV?J>@>D2Ric2kr#5cvdjH zf?ICk(b?lFZu5+~d$Z?f@a8QVdroP7m$|%li>1Se@M+wg9$_&L_yK6fMHu3>0&?0ifii;*2G|a1uh!dZDh=0lmF1Ua9%DE$^$ z7Pv)NfbP`qdAu#=I}v3MseSnrj3&IugZ+%h%KwR}@$x;O9)v(KD? z>Nu)zy6{i2Oe&YS?;N>yKL5_opN31TIYH}ismO*x{XXp<1bhpM@WwHK-khTDn3svBvZT z3}*Pyn{gC_`96fqW1!k%|0o3GKDTHc#VU)n+TbWORTJ@M$%wK@Ng2ruJC2j{wcY|L zV6<qYQLrb%B?v2s6b6UXF(PZ)W5}qYKp3=}%KUo|-APz)e%1r>`zjqY`#$4DLZADQ){wzLQnTc4GWCw(fX#npBA$w^D_NS`Lnx>F}Dz4(r^aM Date: Mon, 29 Apr 2024 06:32:19 +0000 Subject: [PATCH 02/10] aayush changes --- syllabus/examples/training_scripts/.gitignore | 3 + .../examples/training_scripts/command.txt | 1 - .../training_scripts/requirements.txt | 131 -- .../training_scripts/wandb/latest-run | 1 - .../training_scripts/cleanrl_procgen_plr.py | 522 ------ .../files/conda-environment.yaml | 165 -- .../files/config.yaml | 130 -- .../files/diff.patch | 13 - ...out.tfevents.1713840453.f411843fc70b.297.0 | 1 - .../files/requirements.txt | 146 -- ...8eada0f5549603091d9236731d61fe0bce82.patch | 566 ------- .../files/wandb-metadata.json | 167 -- .../files/wandb-summary.json | 1 - .../run-rpqjdbky.wandb | Bin 258994 -> 0 bytes .../training_scripts/cleanrl_procgen_plr.py | 528 ------ .../files/conda-environment.yaml | 165 -- .../files/config.yaml | 130 -- .../files/diff.patch | 13 - ...ut.tfevents.1713840773.f411843fc70b.1109.0 | 1 - .../files/requirements.txt | 146 -- ...8eada0f5549603091d9236731d61fe0bce82.patch | 1417 ---------------- .../files/wandb-metadata.json | 167 -- .../files/wandb-summary.json | 1 - .../run-x38taylu.wandb | Bin 97707 -> 0 bytes .../training_scripts/cleanrl_procgen_plr.py | 522 ------ .../files/conda-environment.yaml | 165 -- .../files/config.yaml | 126 -- .../files/diff.patch | 122 -- ...ut.tfevents.1713841239.f411843fc70b.1794.0 | 1 - .../files/requirements.txt | 146 -- ...8eada0f5549603091d9236731d61fe0bce82.patch | 1408 ---------------- .../files/wandb-metadata.json | 167 -- .../files/wandb-summary.json | 1 - .../run-37l9hfvl.wandb | Bin 51518 -> 0 bytes .../training_scripts/cleanrl_procgen_plr.py | 524 ------ .../files/conda-environment.yaml | 165 -- .../files/config.yaml | 130 -- .../files/diff.patch | 133 -- ...ut.tfevents.1713845400.f411843fc70b.2432.0 | 1 - .../files/requirements.txt | 146 -- ...8eada0f5549603091d9236731d61fe0bce82.patch | 1419 ---------------- .../files/wandb-metadata.json | 167 -- .../files/wandb-summary.json | 1 - .../run-maennc1u.wandb | Bin 5972 -> 0 bytes .../training_scripts/cleanrl_procgen_plr.py | 524 ------ .../files/conda-environment.yaml | 165 -- .../files/config.yaml | 126 -- .../files/diff.patch | 142 -- ...8eada0f5549603091d9236731d61fe0bce82.patch | 1421 ----------------- .../files/wandb-metadata.json | 167 -- .../files/wandb-summary.json | 1 - .../run-4m0uhqaw.wandb | Bin 2455 -> 0 bytes 52 files changed, 3 insertions(+), 12302 deletions(-) create mode 100644 syllabus/examples/training_scripts/.gitignore delete mode 100644 syllabus/examples/training_scripts/command.txt delete mode 100644 syllabus/examples/training_scripts/requirements.txt delete mode 120000 syllabus/examples/training_scripts/wandb/latest-run delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/conda-environment.yaml delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/config.yaml delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/diff.patch delete mode 120000 syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/events.out.tfevents.1713840453.f411843fc70b.297.0 delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/requirements.txt delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/wandb-metadata.json delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/wandb-summary.json delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/run-rpqjdbky.wandb delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/conda-environment.yaml delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/config.yaml delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/diff.patch delete mode 120000 syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/events.out.tfevents.1713840773.f411843fc70b.1109.0 delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/requirements.txt delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/wandb-metadata.json delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/wandb-summary.json delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/run-x38taylu.wandb delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/conda-environment.yaml delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/config.yaml delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/diff.patch delete mode 120000 syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/events.out.tfevents.1713841239.f411843fc70b.1794.0 delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/requirements.txt delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/wandb-metadata.json delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/wandb-summary.json delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/run-37l9hfvl.wandb delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/conda-environment.yaml delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/config.yaml delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/diff.patch delete mode 120000 syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/events.out.tfevents.1713845400.f411843fc70b.2432.0 delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/requirements.txt delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/wandb-metadata.json delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/wandb-summary.json delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/run-maennc1u.wandb delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/conda-environment.yaml delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/config.yaml delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/diff.patch delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/wandb-metadata.json delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/wandb-summary.json delete mode 100644 syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/run-4m0uhqaw.wandb diff --git a/syllabus/examples/training_scripts/.gitignore b/syllabus/examples/training_scripts/.gitignore new file mode 100644 index 00000000..49e0461d --- /dev/null +++ b/syllabus/examples/training_scripts/.gitignore @@ -0,0 +1,3 @@ +command.txt +wandb +requirements.txt diff --git a/syllabus/examples/training_scripts/command.txt b/syllabus/examples/training_scripts/command.txt deleted file mode 100644 index ab135b53..00000000 --- a/syllabus/examples/training_scripts/command.txt +++ /dev/null @@ -1 +0,0 @@ -python cleanrl_procgen_plr.py --curriculum True --track True --env-id "bigfish" diff --git a/syllabus/examples/training_scripts/requirements.txt b/syllabus/examples/training_scripts/requirements.txt deleted file mode 100644 index 505ff718..00000000 --- a/syllabus/examples/training_scripts/requirements.txt +++ /dev/null @@ -1,131 +0,0 @@ -absl-py==2.1.0 -aiosignal==1.3.1 -alabaster==0.7.13 -attrs==23.2.0 -Babel==2.14.0 -beautifulsoup4==4.12.3 -cachetools==5.3.3 -certifi==2024.2.2 -cffi==1.16.0 -charset-normalizer==3.3.2 -click==8.1.7 -cloudpickle==3.0.0 -cmake==3.29.2 -contourpy==1.1.1 -cycler==0.12.1 -dm-tree==0.1.8 -docutils==0.20.1 -exceptiongroup==1.2.0 -Farama-Notifications==0.0.4 -filelock==3.13.4 -fonttools==4.51.0 -frozenlist==1.4.1 -fsspec==2024.3.1 -furo==2024.1.29 -future==1.0.0 -glcontext==2.5.0 -glfw==1.12.0 -google-auth==2.29.0 -google-auth-oauthlib==1.0.0 -grpcio==1.62.1 -gym==0.23.0 -gym-notices==0.0.8 -gymnasium==0.28.1 -idna==3.7 -imageio==2.34.0 -imageio-ffmpeg==0.3.0 -imagesize==1.4.1 -importlib_metadata==7.1.0 -importlib_resources==6.4.0 -iniconfig==2.0.0 -jax-jumpy==1.0.0 -Jinja2==3.1.3 -jsonschema==4.21.1 -jsonschema-specifications==2023.12.1 -kiwisolver==1.4.5 -lazy_loader==0.4 -lz4==4.3.3 -Markdown==3.6 -markdown-it-py==3.0.0 -MarkupSafe==2.1.5 -matplotlib==3.7.5 -mdurl==0.1.2 -moderngl==5.10.0 -mpmath==1.3.0 -msgpack==1.0.8 -networkx==3.1 -numpy==1.24.4 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-nccl-cu12==2.19.3 -nvidia-nvjitlink-cu12==12.4.127 -nvidia-nvtx-cu12==12.1.105 -oauthlib==3.2.2 -packaging==24.0 -pandas==2.0.3 -pillow==10.3.0 -pkgutil_resolve_name==1.3.10 -pluggy==1.4.0 --e git+https://github.com/RyanNavillus/procgen.git@ed4be818681701c52cdf5d4996d1ceca7083e368#egg=procgen -protobuf==5.26.1 -py-cpuinfo==9.0.0 -pyarrow==15.0.2 -pyasn1==0.6.0 -pyasn1_modules==0.4.0 -pycparser==2.22 -pyenchant==3.2.2 -pyglet==1.4.11 -Pygments==2.17.2 -pyparsing==3.1.2 -pytest==8.1.1 -pytest-benchmark==4.0.0 -python-dateutil==2.9.0.post0 -pytz==2024.1 -PyWavelets==1.4.1 -PyYAML==6.0.1 -ray==2.10.0 -referencing==0.34.0 -requests==2.31.0 -requests-oauthlib==2.0.0 -rich==13.7.1 -rpds-py==0.18.0 -rsa==4.9 -scikit-image==0.21.0 -scipy==1.10.0 -shellingham==1.5.4 -Shimmy==1.3.0 -six==1.16.0 -snowballstemmer==2.2.0 -soupsieve==2.5 -Sphinx==7.1.2 -sphinx-basic-ng==1.0.0b2 -sphinx-tabs==3.4.5 -sphinxcontrib-applehelp==1.0.4 -sphinxcontrib-devhelp==1.0.2 -sphinxcontrib-htmlhelp==2.0.1 -sphinxcontrib-jsmath==1.0.1 -sphinxcontrib-qthelp==1.0.3 -sphinxcontrib-serializinghtml==1.1.5 -sphinxcontrib-spelling==8.0.0 --e git+https://github.com/RoseyGreenBlue/Syllabus.git@6e36433fbb5c0e990358d7f895d976086dbfb65e#egg=Syllabus_RL -sympy==1.12 -tensorboard==2.14.0 -tensorboard-data-server==0.7.2 -tensorboardX==2.6.2.2 -tifffile==2023.7.10 -tomli==2.0.1 -torch==2.2.2 -triton==2.2.0 -typer==0.12.3 -typing_extensions==4.11.0 -tzdata==2024.1 -urllib3==2.2.1 -Werkzeug==3.0.2 -zipp==3.18.1 diff --git a/syllabus/examples/training_scripts/wandb/latest-run b/syllabus/examples/training_scripts/wandb/latest-run deleted file mode 120000 index 459ba0d0..00000000 --- a/syllabus/examples/training_scripts/wandb/latest-run +++ /dev/null @@ -1 +0,0 @@ -run-20240423_041359-4m0uhqaw \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py deleted file mode 100644 index d2d54b58..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py +++ /dev/null @@ -1,522 +0,0 @@ -""" An example applying Syllabus Prioritized Level Replay to Procgen. This code is based on https://github.com/facebookresearch/level-replay/blob/main/train.py - -NOTE: In order to efficiently change the seed of a procgen environment directly without reinitializing it, -we rely on Minqi Jiang's custom branch of procgen found here: https://github.com/minqi/procgen -""" -import argparse -import os -import random -import time -from collections import deque -from distutils.util import strtobool - -import gym as openai_gym -import gymnasium as gym -import numpy as np -import procgen # noqa: F401 -from procgen import ProcgenEnv -import torch -import torch.nn as nn -import torch.optim as optim -from shimmy.openai_gym_compatibility import GymV21CompatibilityV0 -from torch.utils.tensorboard import SummaryWriter - -from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum -from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum -from syllabus.examples.models import ProcgenAgent -from syllabus.examples.task_wrappers import ProcgenTaskWrapper -from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs - - -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="syllabus", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="weather to capture videos of the agent performances (check out `videos` folder)") - parser.add_argument("--logging-dir", type=str, default=".", - help="the base directory for logging and wandb storage.") - - # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="starpilot", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=int(25e6), - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=5e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=64, - help="the number of parallel game environments") - parser.add_argument("--num-steps", type=int, default=256, - help="the number of steps to run in each environment per policy rollout") - parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="Toggle learning rate annealing for policy and value networks") - parser.add_argument("--gae", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Use GAE for advantage computation") - parser.add_argument("--gamma", type=float, default=0.999, - help="the discount factor gamma") - parser.add_argument("--gae-lambda", type=float, default=0.95, - help="the lambda for the general advantage estimation") - parser.add_argument("--num-minibatches", type=int, default=8, - help="the number of mini-batches") - parser.add_argument("--update-epochs", type=int, default=3, - help="the K epochs to update the policy") - parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles advantages normalization") - parser.add_argument("--clip-coef", type=float, default=0.2, - help="the surrogate clipping coefficient") - parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles whether or not to use a clipped loss for the value function, as per the paper.") - parser.add_argument("--ent-coef", type=float, default=0.01, - help="coefficient of the entropy") - parser.add_argument("--vf-coef", type=float, default=0.5, - help="coefficient of the value function") - parser.add_argument("--max-grad-norm", type=float, default=0.5, - help="the maximum norm for the gradient clipping") - parser.add_argument("--target-kl", type=float, default=None, - help="the target KL divergence threshold") - - # Procgen arguments - parser.add_argument("--full-dist", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Train on full distribution of levels.") - - # Curriculum arguments - parser.add_argument("--curriculum", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will use curriculum learning") - parser.add_argument("--curriculum-method", type=str, default="plr", - help="curriculum method to use") - parser.add_argument("--num-eval-episodes", type=int, default=10, - help="the number of episodes to evaluate the agent on after each policy update.") - - args = parser.parse_args() - args.batch_size = int(args.num_envs * args.num_steps) - args.minibatch_size = int(args.batch_size // args.num_minibatches) - # fmt: on - return args - - -PROCGEN_RETURN_BOUNDS = { - "coinrun": (5, 10), - "starpilot": (2.5, 64), - "caveflyer": (3.5, 12), - "dodgeball": (1.5, 19), - "fruitbot": (-1.5, 32.4), - "chaser": (0.5, 13), - "miner": (1.5, 13), - "jumper": (3, 10), - "leaper": (3, 10), - "maze": (5, 10), - "bigfish": (1, 40), - "heist": (3.5, 10), - "climber": (2, 12.6), - "plunder": (4.5, 30), - "ninja": (3.5, 10), - "bossfight": (0.5, 13), -} - - -def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): - def thunk(): - env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) - env = GymV21CompatibilityV0(env=env) - if curriculum is not None: - env = ProcgenTaskWrapper(env, env_id, seed=seed) - env = MultiProcessingSyncWrapper( - env, - curriculum.get_components(), - update_on_step=False, - task_space=env.task_space, - ) - return env - return thunk - - -def wrap_vecenv(vecenv): - vecenv.is_vector_env = True - vecenv = VecMonitor(venv=vecenv, filename=None, keep_buf=100) - vecenv = VecNormalize(venv=vecenv, ob=False, ret=True) - return vecenv - - -def slow_level_replay_evaluate( - env_name, - policy, - num_episodes, - device, - num_levels=0 -): - policy.eval() - - eval_envs = ProcgenEnv( - num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False - ) - eval_envs = VecExtractDictObs(eval_envs, "rgb") - eval_envs = wrap_vecenv(eval_envs) - eval_obs, _ = eval_envs.reset() - eval_episode_rewards = [] - - while len(eval_episode_rewards) < num_episodes: - with torch.no_grad(): - eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) - - eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) - for i, info in enumerate(infos): - if 'episode' in info.keys(): - eval_episode_rewards.append(info['episode']['r']) - - mean_returns = np.mean(eval_episode_rewards) - stddev_returns = np.std(eval_episode_rewards) - env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] - normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) - policy.train() - return mean_returns, stddev_returns, normalized_mean_returns - - -def level_replay_evaluate( - env_name, - policy, - num_episodes, - device, - num_levels=0 -): - policy.eval() - - eval_envs = ProcgenEnv( - num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False - ) - eval_envs = VecExtractDictObs(eval_envs, "rgb") - eval_envs = wrap_vecenv(eval_envs) - eval_obs, _ = eval_envs.reset() - eval_episode_rewards = [-1] * num_episodes - - while -1 in eval_episode_rewards: - with torch.no_grad(): - eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) - - eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) - for i, info in enumerate(infos): - if 'episode' in info.keys() and eval_episode_rewards[i] == -1: - eval_episode_rewards[i] = info['episode']['r'] - - # print(eval_episode_rewards) - mean_returns = np.mean(eval_episode_rewards) - stddev_returns = np.std(eval_episode_rewards) - env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] - normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) - policy.train() - return mean_returns, stddev_returns, normalized_mean_returns - - -def make_value_fn(): - def get_value(obs): - obs = np.array(obs) - with torch.no_grad(): - return agent.get_value(torch.Tensor(obs).to(device)) - return get_value - - -if __name__ == "__main__": - args = parse_args() - run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" - if args.track: - import wandb - - wandb.init( - project=args.wandb_project_name, - entity=args.wandb_entity, - sync_tensorboard=True, - config=vars(args), - name=run_name, - monitor_gym=True, - save_code=True, - dir=args.logging_dir - ) - # wandb.run.log_code("./syllabus/examples") - - writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) - writer.add_text( - "hyperparameters", - "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), - ) - - # TRY NOT TO MODIFY: seeding - random.seed(args.seed) - np.random.seed(args.seed) - torch.manual_seed(args.seed) - torch.backends.cudnn.deterministic = args.torch_deterministic - - device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu") - print("Device:", device) - - # Curriculum setup - curriculum = None - if args.curriculum: - sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") - sample_env = GymV21CompatibilityV0(env=sample_env) - sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) - - # Intialize Curriculum Method - if args.curriculum_method == "plr": - print("Using prioritized level replay.") - curriculum = PrioritizedLevelReplay( - sample_env.task_space, - sample_env.observation_space, - num_steps=args.num_steps, - num_processes=args.num_envs, - gamma=args.gamma, - gae_lambda=args.gae_lambda, - task_sampler_kwargs_dict={"strategy": "value_l1"}, - get_value=make_value_fn(), - ) - elif args.curriculum_method == "dr": - print("Using domain randomization.") - curriculum = DomainRandomization(sample_env.task_space) - elif args.curriculum_method == "lp": - print("Using learning progress.") - curriculum = LearningProgressCurriculum(sample_env.task_space) - elif args.curriculum_method == "sq": - print("Using sequential curriculum.") - curricula = [] - stopping = [] - for i in range(199): - curricula.append(i + 1) - stopping.append("steps>=50000") - curricula.append(list(range(i + 1))) - stopping.append("steps>=50000") - curriculum = SequentialCurriculum(curricula, stopping[:-1], sample_env.task_space) - else: - raise ValueError(f"Unknown curriculum method {args.curriculum_method}") - curriculum = make_multiprocessing_curriculum(curriculum) - del sample_env - - # env setup - print("Creating env") - envs = gym.vector.AsyncVectorEnv( - [ - make_env( - args.env_id, - args.seed + i, - curriculum=curriculum if args.curriculum else None, - num_levels=1 if args.curriculum else 0 - ) - for i in range(args.num_envs) - ] - ) - envs = wrap_vecenv(envs) - - assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" - print("Creating agent") - agent = ProcgenAgent( - envs.single_observation_space.shape, - envs.single_action_space.n, - arch="large", - base_kwargs={'recurrent': False, 'hidden_size': 256} - ).to(device) - optimizer = optim.Adam(agent.parameters(), lr=args.learning_rate, eps=1e-5) - - # ALGO Logic: Storage setup - obs = torch.zeros((args.num_steps, args.num_envs) + envs.single_observation_space.shape).to(device) - actions = torch.zeros((args.num_steps, args.num_envs) + envs.single_action_space.shape).to(device) - logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device) - rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) - dones = torch.zeros((args.num_steps, args.num_envs)).to(device) - values = torch.zeros((args.num_steps, args.num_envs)).to(device) - - # TRY NOT TO MODIFY: start the game - global_step = 0 - start_time = time.time() - next_obs, _ = envs.reset() - next_obs = torch.Tensor(next_obs).to(device) - next_done = torch.zeros(args.num_envs).to(device) - num_updates = args.total_timesteps // args.batch_size - episode_rewards = deque(maxlen=10) - completed_episodes = 0 - - for update in range(1, num_updates + 1): - # Annealing the rate if instructed to do so. - if args.anneal_lr: - frac = 1.0 - (update - 1.0) / num_updates - lrnow = frac * args.learning_rate - optimizer.param_groups[0]["lr"] = lrnow - - for step in range(0, args.num_steps): - global_step += 1 * args.num_envs - obs[step] = next_obs - dones[step] = next_done - - # ALGO LOGIC: action logic - with torch.no_grad(): - action, logprob, _, value = agent.get_action_and_value(next_obs) - values[step] = value.flatten() - actions[step] = action - logprobs[step] = logprob - - # TRY NOT TO MODIFY: execute the game and log data. - next_obs, reward, term, trunc, info = envs.step(action.cpu().numpy()) - done = np.logical_or(term, trunc) - rewards[step] = torch.tensor(reward).to(device).view(-1) - next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) - completed_episodes += sum(done) - - for item in info: - if "episode" in item.keys(): - episode_rewards.append(item['episode']['r']) - print(f"global_step={global_step}, episodic_return={item['episode']['r']}") - writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) - if curriculum is not None: - curriculum.log_metrics(writer, global_step) - break - - # bootstrap value if not done - with torch.no_grad(): - next_value = agent.get_value(next_obs).reshape(1, -1) - if args.gae: - advantages = torch.zeros_like(rewards).to(device) - lastgaelam = 0 - for t in reversed(range(args.num_steps)): - if t == args.num_steps - 1: - nextnonterminal = 1.0 - next_done - nextvalues = next_value - else: - nextnonterminal = 1.0 - dones[t + 1] - nextvalues = values[t + 1] - delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t] - advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam - returns = advantages + values - else: - returns = torch.zeros_like(rewards).to(device) - for t in reversed(range(args.num_steps)): - if t == args.num_steps - 1: - nextnonterminal = 1.0 - next_done - next_return = next_value - else: - nextnonterminal = 1.0 - dones[t + 1] - next_return = returns[t + 1] - returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return - advantages = returns - values - - # flatten the batch - b_obs = obs.reshape((-1,) + envs.single_observation_space.shape) - b_logprobs = logprobs.reshape(-1) - b_actions = actions.reshape((-1,) + envs.single_action_space.shape) - b_advantages = advantages.reshape(-1) - b_returns = returns.reshape(-1) - b_values = values.reshape(-1) - - # Optimizing the policy and value network - b_inds = np.arange(args.batch_size) - clipfracs = [] - for epoch in range(args.update_epochs): - np.random.shuffle(b_inds) - for start in range(0, args.batch_size, args.minibatch_size): - end = start + args.minibatch_size - mb_inds = b_inds[start:end] - - _, newlogprob, entropy, newvalue = agent.get_action_and_value(b_obs[mb_inds], b_actions.long()[mb_inds]) - logratio = newlogprob - b_logprobs[mb_inds] - ratio = logratio.exp() - - with torch.no_grad(): - # calculate approx_kl http://joschu.net/blog/kl-approx.html - old_approx_kl = (-logratio).mean() - approx_kl = ((ratio - 1) - logratio).mean() - clipfracs += [((ratio - 1.0).abs() > args.clip_coef).float().mean().item()] - - mb_advantages = b_advantages[mb_inds] - if args.norm_adv: - mb_advantages = (mb_advantages - mb_advantages.mean()) / (mb_advantages.std() + 1e-8) - - # Policy loss - pg_loss1 = -mb_advantages * ratio - pg_loss2 = -mb_advantages * torch.clamp(ratio, 1 - args.clip_coef, 1 + args.clip_coef) - pg_loss = torch.max(pg_loss1, pg_loss2).mean() - - # Value loss - newvalue = newvalue.view(-1) - if args.clip_vloss: - v_loss_unclipped = (newvalue - b_returns[mb_inds]) ** 2 - v_clipped = b_values[mb_inds] + torch.clamp( - newvalue - b_values[mb_inds], - -args.clip_coef, - args.clip_coef, - ) - v_loss_clipped = (v_clipped - b_returns[mb_inds]) ** 2 - v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped) - v_loss = 0.5 * v_loss_max.mean() - else: - v_loss = 0.5 * ((newvalue - b_returns[mb_inds]) ** 2).mean() - - entropy_loss = entropy.mean() - loss = pg_loss - args.ent_coef * entropy_loss + v_loss * args.vf_coef - - optimizer.zero_grad() - loss.backward() - nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm) - optimizer.step() - - if args.target_kl is not None: - if approx_kl > args.target_kl: - break - - y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy() - var_y = np.var(y_true) - explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y - - # Evaluate agent - mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=0 - ) - slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=0 - ) - mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=200 - ) - slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=200 - ) - - # TRY NOT TO MODIFY: record rewards for plotting purposes - writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) - writer.add_scalar("charts/episode_returns", np.mean(episode_rewards), global_step) - writer.add_scalar("losses/value_loss", v_loss.item(), global_step) - writer.add_scalar("losses/policy_loss", pg_loss.item(), global_step) - writer.add_scalar("losses/entropy", entropy_loss.item(), global_step) - writer.add_scalar("losses/old_approx_kl", old_approx_kl.item(), global_step) - writer.add_scalar("losses/approx_kl", approx_kl.item(), global_step) - writer.add_scalar("losses/clipfrac", np.mean(clipfracs), global_step) - writer.add_scalar("losses/explained_variance", explained_var, global_step) - print("SPS:", int(global_step / (time.time() - start_time))) - writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) - - writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) - writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) - writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) - writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) - writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) - writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) - - writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) - writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) - writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) - writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) - writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) - writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) - - writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) - - envs.close() - writer.close() diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/conda-environment.yaml b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/conda-environment.yaml deleted file mode 100644 index cd0b0b09..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/conda-environment.yaml +++ /dev/null @@ -1,165 +0,0 @@ -name: test2_py -channels: - - defaults -dependencies: - - _libgcc_mutex=0.1=main - - _openmp_mutex=5.1=1_gnu - - ca-certificates=2024.3.11=h06a4308_0 - - ld_impl_linux-64=2.38=h1181459_1 - - libffi=3.3=he6710b0_2 - - libgcc-ng=11.2.0=h1234567_1 - - libgomp=11.2.0=h1234567_1 - - libstdcxx-ng=11.2.0=h1234567_1 - - ncurses=6.4=h6a678d5_0 - - openssl=1.1.1w=h7f8727e_0 - - pip=23.3.1=py38h06a4308_0 - - python=3.8.5=h7579374_1 - - readline=8.2=h5eee18b_0 - - setuptools=68.2.2=py38h06a4308_0 - - sqlite=3.41.2=h5eee18b_0 - - tk=8.6.12=h1ccaba5_0 - - wheel=0.41.2=py38h06a4308_0 - - xz=5.4.6=h5eee18b_0 - - zlib=1.2.13=h5eee18b_0 - - pip: - - absl-py==2.1.0 - - aiosignal==1.3.1 - - alabaster==0.7.13 - - appdirs==1.4.4 - - attrs==23.2.0 - - babel==2.14.0 - - beautifulsoup4==4.12.3 - - cachetools==5.3.3 - - certifi==2024.2.2 - - cffi==1.16.0 - - charset-normalizer==3.3.2 - - click==8.1.7 - - cloudpickle==3.0.0 - - cmake==3.29.2 - - contourpy==1.1.1 - - cycler==0.12.1 - - dm-tree==0.1.8 - - docker-pycreds==0.4.0 - - docutils==0.20.1 - - exceptiongroup==1.2.0 - - farama-notifications==0.0.4 - - filelock==3.13.4 - - fonttools==4.51.0 - - frozenlist==1.4.1 - - fsspec==2024.3.1 - - furo==2024.1.29 - - future==1.0.0 - - gitdb==4.0.11 - - gitpython==3.1.43 - - glcontext==2.5.0 - - glfw==1.12.0 - - google-auth==2.29.0 - - google-auth-oauthlib==1.0.0 - - grpcio==1.62.1 - - gym==0.23.0 - - gym-notices==0.0.8 - - gymnasium==0.28.1 - - idna==3.7 - - imageio==2.34.0 - - imageio-ffmpeg==0.3.0 - - imagesize==1.4.1 - - importlib-metadata==7.1.0 - - importlib-resources==6.4.0 - - iniconfig==2.0.0 - - jax-jumpy==1.0.0 - - jinja2==3.1.3 - - jsonschema==4.21.1 - - jsonschema-specifications==2023.12.1 - - kiwisolver==1.4.5 - - lazy-loader==0.4 - - lz4==4.3.3 - - markdown==3.6 - - markdown-it-py==3.0.0 - - markupsafe==2.1.5 - - matplotlib==3.7.5 - - mdurl==0.1.2 - - moderngl==5.10.0 - - mpmath==1.3.0 - - msgpack==1.0.8 - - networkx==3.1 - - numpy==1.24.4 - - nvidia-cublas-cu12==12.1.3.1 - - nvidia-cuda-cupti-cu12==12.1.105 - - nvidia-cuda-nvrtc-cu12==12.1.105 - - nvidia-cuda-runtime-cu12==12.1.105 - - nvidia-cudnn-cu12==8.9.2.26 - - nvidia-cufft-cu12==11.0.2.54 - - nvidia-curand-cu12==10.3.2.106 - - nvidia-cusolver-cu12==11.4.5.107 - - nvidia-cusparse-cu12==12.1.0.106 - - nvidia-nccl-cu12==2.19.3 - - nvidia-nvjitlink-cu12==12.4.127 - - nvidia-nvtx-cu12==12.1.105 - - oauthlib==3.2.2 - - packaging==24.0 - - pandas==2.0.3 - - pillow==10.3.0 - - pkgutil-resolve-name==1.3.10 - - pluggy==1.4.0 - - protobuf==4.25.3 - - psutil==5.9.8 - - py-cpuinfo==9.0.0 - - pyarrow==15.0.2 - - pyasn1==0.6.0 - - pyasn1-modules==0.4.0 - - pycparser==2.22 - - pyenchant==3.2.2 - - pyglet==1.4.11 - - pygments==2.17.2 - - pyparsing==3.1.2 - - pytest==8.1.1 - - pytest-benchmark==4.0.0 - - python-dateutil==2.9.0.post0 - - pytz==2024.1 - - pywavelets==1.4.1 - - pyyaml==6.0.1 - - ray==2.10.0 - - referencing==0.34.0 - - requests==2.31.0 - - requests-oauthlib==2.0.0 - - rich==13.7.1 - - rpds-py==0.18.0 - - rsa==4.9 - - scikit-image==0.21.0 - - scipy==1.10.0 - - sentry-sdk==1.45.0 - - setproctitle==1.3.3 - - shellingham==1.5.4 - - shimmy==1.3.0 - - six==1.16.0 - - smmap==5.0.1 - - snowballstemmer==2.2.0 - - soupsieve==2.5 - - sphinx==7.1.2 - - sphinx-basic-ng==1.0.0b2 - - sphinx-tabs==3.4.5 - - sphinxcontrib-applehelp==1.0.4 - - sphinxcontrib-devhelp==1.0.2 - - sphinxcontrib-htmlhelp==2.0.1 - - sphinxcontrib-jsmath==1.0.1 - - sphinxcontrib-qthelp==1.0.3 - - sphinxcontrib-serializinghtml==1.1.5 - - sphinxcontrib-spelling==8.0.0 - - syllabus-rl==0.5 - - sympy==1.12 - - tensorboard==2.14.0 - - tensorboard-data-server==0.7.2 - - tensorboardx==2.6.2.2 - - tifffile==2023.7.10 - - tomli==2.0.1 - - torch==2.2.2 - - triton==2.2.0 - - typer==0.12.3 - - typing-extensions==4.11.0 - - tzdata==2024.1 - - urllib3==2.2.1 - - wandb==0.16.6 - - werkzeug==3.0.2 - - zipp==3.18.1 -prefix: /home/user/miniconda/envs/test2_py - diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/config.yaml b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/config.yaml deleted file mode 100644 index 60afaf32..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/config.yaml +++ /dev/null @@ -1,130 +0,0 @@ -wandb_version: 1 - -exp_name: - desc: null - value: cleanrl_procgen_plr -seed: - desc: null - value: 1 -torch_deterministic: - desc: null - value: true -cuda: - desc: null - value: true -track: - desc: null - value: true -wandb_project_name: - desc: null - value: syllabus -wandb_entity: - desc: null - value: null -capture_video: - desc: null - value: false -logging_dir: - desc: null - value: . -env_id: - desc: null - value: bigfish -total_timesteps: - desc: null - value: 25000000 -learning_rate: - desc: null - value: 0.0005 -num_envs: - desc: null - value: 64 -num_steps: - desc: null - value: 256 -anneal_lr: - desc: null - value: false -gae: - desc: null - value: true -gamma: - desc: null - value: 0.999 -gae_lambda: - desc: null - value: 0.95 -num_minibatches: - desc: null - value: 8 -update_epochs: - desc: null - value: 3 -norm_adv: - desc: null - value: true -clip_coef: - desc: null - value: 0.2 -clip_vloss: - desc: null - value: true -ent_coef: - desc: null - value: 0.01 -vf_coef: - desc: null - value: 0.5 -max_grad_norm: - desc: null - value: 0.5 -target_kl: - desc: null - value: null -full_dist: - desc: null - value: true -curriculum: - desc: null - value: true -curriculum_method: - desc: null - value: plr -num_eval_episodes: - desc: null - value: 10 -batch_size: - desc: null - value: 16384 -minibatch_size: - desc: null - value: 2048 -_wandb: - desc: null - value: - code_path: code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py - python_version: 3.8.5 - cli_version: 0.16.6 - framework: torch - is_jupyter_run: false - is_kaggle_kernel: false - start_time: 1713840447.0 - t: - 1: - - 1 - - 30 - - 55 - 2: - - 1 - - 30 - - 55 - 3: - - 13 - - 16 - - 23 - - 35 - 4: 3.8.5 - 5: 0.16.6 - 8: - - 5 - 13: linux-x86_64 diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/diff.patch b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/diff.patch deleted file mode 100644 index 0a6b4640..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/diff.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/setup.py b/setup.py -index 31e09f2..22a94e8 100644 ---- a/setup.py -+++ b/setup.py -@@ -2,7 +2,7 @@ from setuptools import find_packages, setup - - - extras = dict() --extras['test'] = ['cmake', 'ninja', 'nle>=0.9.0', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] -+extras['test'] = ['cmake', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] - extras['docs'] = ['sphinx-tabs', 'sphinxcontrib-spelling', 'furo'] - extras['all'] = extras['test'] + extras['docs'] - diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/events.out.tfevents.1713840453.f411843fc70b.297.0 b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/events.out.tfevents.1713840453.f411843fc70b.297.0 deleted file mode 120000 index b47732cd..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/events.out.tfevents.1713840453.f411843fc70b.297.0 +++ /dev/null @@ -1 +0,0 @@ -/data/averma/MARL/Syllabus/syllabus/examples/training_scripts/runs/{run_name}/events.out.tfevents.1713840453.f411843fc70b.297.0 \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/requirements.txt b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/requirements.txt deleted file mode 100644 index 7f33d240..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/requirements.txt +++ /dev/null @@ -1,146 +0,0 @@ -Babel==2.14.0 -Farama-Notifications==0.0.4 -GitPython==3.1.43 -Jinja2==3.1.3 -Markdown==3.6 -MarkupSafe==2.1.5 -PyWavelets==1.4.1 -PyYAML==6.0.1 -Pygments==2.17.2 -Shimmy==1.3.0 -Sphinx==7.1.2 -Syllabus-RL==0.5 -Werkzeug==3.0.2 -absl-py==2.1.0 -aiosignal==1.3.1 -alabaster==0.7.13 -appdirs==1.4.4 -attrs==23.2.0 -beautifulsoup4==4.12.3 -cachetools==5.3.3 -certifi==2024.2.2 -cffi==1.16.0 -charset-normalizer==3.3.2 -click==8.1.7 -cloudpickle==3.0.0 -cmake==3.29.2 -colorama==0.4.6 -contourpy==1.1.1 -cycler==0.12.1 -dm-tree==0.1.8 -docker-pycreds==0.4.0 -docutils==0.20.1 -exceptiongroup==1.2.0 -filelock==3.13.4 -fonttools==4.51.0 -frozenlist==1.4.1 -fsspec==2024.3.1 -furo==2024.1.29 -future==1.0.0 -gitdb==4.0.11 -glcontext==2.5.0 -glfw==1.12.0 -google-auth-oauthlib==1.0.0 -google-auth==2.29.0 -grpcio==1.62.1 -gym-notices==0.0.8 -gym==0.23.0 -gymnasium==0.28.1 -idna==3.7 -imageio-ffmpeg==0.3.0 -imageio==2.34.0 -imagesize==1.4.1 -importlib_metadata==7.1.0 -importlib_resources==6.4.0 -iniconfig==2.0.0 -jax-jumpy==1.0.0 -jsonschema-specifications==2023.12.1 -jsonschema==4.21.1 -kiwisolver==1.4.5 -lazy_loader==0.4 -lz4==4.3.3 -markdown-it-py==3.0.0 -matplotlib==3.7.5 -mdurl==0.1.2 -moderngl==5.10.0 -mpmath==1.3.0 -msgpack==1.0.8 -networkx==3.1 -numpy==1.24.4 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-nccl-cu12==2.19.3 -nvidia-nvjitlink-cu12==12.4.127 -nvidia-nvtx-cu12==12.1.105 -oauthlib==3.2.2 -packaging==24.0 -pandas==2.0.3 -pillow==10.3.0 -pip==23.3.1 -pkgutil_resolve_name==1.3.10 -pluggy==1.4.0 -procgen==0.9.5+ed4be81 -protobuf==4.25.3 -psutil==5.9.8 -psutil==5.9.8 -py-cpuinfo==9.0.0 -pyarrow==15.0.2 -pyasn1==0.6.0 -pyasn1_modules==0.4.0 -pycparser==2.22 -pyenchant==3.2.2 -pyglet==1.4.11 -pyparsing==3.1.2 -pytest-benchmark==4.0.0 -pytest==8.1.1 -python-dateutil==2.9.0.post0 -pytz==2024.1 -ray==2.10.0 -referencing==0.34.0 -requests-oauthlib==2.0.0 -requests==2.31.0 -rich==13.7.1 -rpds-py==0.18.0 -rsa==4.9 -scikit-image==0.21.0 -scipy==1.10.0 -sentry-sdk==1.45.0 -setproctitle==1.2.2 -setproctitle==1.3.3 -setuptools==68.2.2 -shellingham==1.5.4 -six==1.16.0 -smmap==5.0.1 -snowballstemmer==2.2.0 -soupsieve==2.5 -sphinx-basic-ng==1.0.0b2 -sphinx-tabs==3.4.5 -sphinxcontrib-applehelp==1.0.4 -sphinxcontrib-devhelp==1.0.2 -sphinxcontrib-htmlhelp==2.0.1 -sphinxcontrib-jsmath==1.0.1 -sphinxcontrib-qthelp==1.0.3 -sphinxcontrib-serializinghtml==1.1.5 -sphinxcontrib-spelling==8.0.0 -sympy==1.12 -tensorboard-data-server==0.7.2 -tensorboard==2.14.0 -tensorboardX==2.6.2.2 -tifffile==2023.7.10 -tomli==2.0.1 -torch==2.2.2 -triton==2.2.0 -typer==0.12.3 -typing_extensions==4.11.0 -tzdata==2024.1 -urllib3==2.2.1 -wandb==0.16.6 -wheel==0.41.2 -zipp==3.18.1 \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch deleted file mode 100644 index 2da133fd..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch +++ /dev/null @@ -1,566 +0,0 @@ -diff --git a/setup.py b/setup.py -index 31e09f2..22a94e8 100644 ---- a/setup.py -+++ b/setup.py -@@ -2,7 +2,7 @@ from setuptools import find_packages, setup - - - extras = dict() --extras['test'] = ['cmake', 'ninja', 'nle>=0.9.0', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] -+extras['test'] = ['cmake', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] - extras['docs'] = ['sphinx-tabs', 'sphinxcontrib-spelling', 'furo'] - extras['all'] = extras['test'] + extras['docs'] - -diff --git a/syllabus/curricula/plr/plr_wrapper.py b/syllabus/curricula/plr/plr_wrapper.py -index 9515df4..f89828b 100644 ---- a/syllabus/curricula/plr/plr_wrapper.py -+++ b/syllabus/curricula/plr/plr_wrapper.py -@@ -312,10 +312,10 @@ class PrioritizedLevelReplay(Curriculum): - """ - Log the task distribution to the provided tensorboard writer. - """ -- super().log_metrics(writer, step) -+ # super().log_metrics(writer, step) - metrics = self._task_sampler.metrics() - writer.add_scalar("curriculum/proportion_seen", metrics["proportion_seen"], step) - writer.add_scalar("curriculum/score", metrics["score"], step) -- for task in list(self.task_space.tasks)[:10]: -- writer.add_scalar(f"curriculum/task_{task - 1}_score", metrics["task_scores"][task - 1], step) -- writer.add_scalar(f"curriculum/task_{task - 1}_staleness", metrics["task_staleness"][task - 1], step) -+ # for task in list(self.task_space.tasks)[:10]: -+ # writer.add_scalar(f"curriculum/task_{task - 1}_score", metrics["task_scores"][task - 1], step) -+ # writer.add_scalar(f"curriculum/task_{task - 1}_staleness", metrics["task_staleness"][task - 1], step) -diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py b/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py -index a6d469e..8f1cc34 100644 ---- a/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py -+++ b/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py -@@ -14,6 +14,7 @@ import gym as openai_gym - import gymnasium as gym - import numpy as np - import procgen # noqa: F401 -+from procgen import ProcgenEnv - import torch - import torch.nn as nn - import torch.optim as optim -@@ -21,10 +22,10 @@ from shimmy.openai_gym_compatibility import GymV21CompatibilityV0 - from torch.utils.tensorboard import SummaryWriter - - from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum --from syllabus.curricula import DomainRandomization, LearningProgressCurriculum, CentralizedPrioritizedLevelReplay -+from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum - from syllabus.examples.models import ProcgenAgent - from syllabus.examples.task_wrappers import ProcgenTaskWrapper --from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize -+from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs - - - def parse_args(): -@@ -46,6 +47,8 @@ def parse_args(): - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="weather to capture videos of the agent performances (check out `videos` folder)") -+ parser.add_argument("--logging-dir", type=str, default=".", -+ help="the base directory for logging and wandb storage.") - - # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="starpilot", -@@ -124,15 +127,15 @@ PROCGEN_RETURN_BOUNDS = { - } - - --def make_env(env_id, seed, curriculum_components=None, start_level=0, num_levels=1): -+def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): - def thunk(): - env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) - env = GymV21CompatibilityV0(env=env) -- env = ProcgenTaskWrapper(env, env_id, seed=seed) -- if curriculum_components is not None: -+ if curriculum is not None: -+ env = ProcgenTaskWrapper(env, env_id, seed=seed) - env = MultiProcessingSyncWrapper( - env, -- curriculum_components, -+ curriculum.get_components(), - update_on_step=False, - task_space=env.task_space, - ) -@@ -147,7 +150,7 @@ def wrap_vecenv(vecenv): - return vecenv - - --def level_replay_evaluate( -+def slow_level_replay_evaluate( - env_name, - policy, - num_episodes, -@@ -155,28 +158,24 @@ def level_replay_evaluate( - num_levels=0 - ): - policy.eval() -- eval_envs = gym.vector.SyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, task_queue, update_queue, num_levels=num_levels) -- for i in range(1) -- ] -+ -+ eval_envs = ProcgenEnv( -+ num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False - ) -+ eval_envs = VecExtractDictObs(eval_envs, "rgb") - eval_envs = wrap_vecenv(eval_envs) -- -- eval_episode_rewards = [] - eval_obs, _ = eval_envs.reset() -+ eval_episode_rewards = [] - - while len(eval_episode_rewards) < num_episodes: - with torch.no_grad(): - eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) - -- eval_obs, _, truncs, terms, infos = eval_envs.step(np.array([eval_action.cpu().numpy()])) -- -- for info in infos: -+ eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) -+ for i, info in enumerate(infos): - if 'episode' in info.keys(): - eval_episode_rewards.append(info['episode']['r']) - -- eval_envs.close() - mean_returns = np.mean(eval_episode_rewards) - stddev_returns = np.std(eval_episode_rewards) - env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] -@@ -185,8 +184,7 @@ def level_replay_evaluate( - return mean_returns, stddev_returns, normalized_mean_returns - - --def fast_level_replay_evaluate( -- eval_envs, -+def level_replay_evaluate( - env_name, - policy, - num_episodes, -@@ -194,9 +192,13 @@ def fast_level_replay_evaluate( - num_levels=0 - ): - policy.eval() -- possible_seeds = np.arange(0, num_levels + 1) -- eval_obs, _ = eval_envs.reset(seed=list(np.random.choice(possible_seeds, size=num_episodes))) - -+ eval_envs = ProcgenEnv( -+ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False -+ ) -+ eval_envs = VecExtractDictObs(eval_envs, "rgb") -+ eval_envs = wrap_vecenv(eval_envs) -+ eval_obs, _ = eval_envs.reset() - eval_episode_rewards = [-1] * num_episodes - - while -1 in eval_episode_rewards: -@@ -231,10 +233,11 @@ if __name__ == "__main__": - name=run_name, - monitor_gym=True, - save_code=True, -- # dir="/fs/nexus-scratch/rsulli/" -+ dir=args.logging_dir - ) -- wandb.run.log_code("./syllabus/examples") -- writer = SummaryWriter(f"./runs/{run_name}") -+ # wandb.run.log_code("./syllabus/examples") -+ -+ writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) - writer.add_text( - "hyperparameters", - "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), -@@ -250,7 +253,7 @@ if __name__ == "__main__": - print("Device:", device) - - # Curriculum setup -- task_queue = update_queue = None -+ curriculum = None - if args.curriculum: - sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") - sample_env = GymV21CompatibilityV0(env=sample_env) -@@ -273,6 +276,16 @@ if __name__ == "__main__": - elif args.curriculum_method == "lp": - print("Using learning progress.") - curriculum = LearningProgressCurriculum(sample_env.task_space) -+ elif args.curriculum_method == "sq": -+ print("Using sequential curriculum.") -+ curricula = [] -+ stopping = [] -+ for i in range(199): -+ curricula.append(i + 1) -+ stopping.append("steps>=50000") -+ curricula.append(list(range(i + 1))) -+ stopping.append("steps>=50000") -+ curriculum = SequentialCurriculum(curricula, stopping[:-1], sample_env.task_space) - else: - raise ValueError(f"Unknown curriculum method {args.curriculum_method}") - curriculum = make_multiprocessing_curriculum(curriculum) -@@ -285,7 +298,7 @@ if __name__ == "__main__": - make_env( - args.env_id, - args.seed + i, -- curriculum_components=curriculum.get_components() if args.curriculum else None, -+ curriculum=curriculum if args.curriculum else None, - num_levels=1 if args.curriculum else 0 - ) - for i in range(args.num_envs) -@@ -293,22 +306,6 @@ if __name__ == "__main__": - ) - envs = wrap_vecenv(envs) - -- test_eval_envs = gym.vector.AsyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, num_levels=0) -- for i in range(args.num_eval_episodes) -- ] -- ) -- test_eval_envs = wrap_vecenv(test_eval_envs) -- -- train_eval_envs = gym.vector.AsyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, num_levels=200) -- for i in range(args.num_eval_episodes) -- ] -- ) -- train_eval_envs = wrap_vecenv(train_eval_envs) -- - assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" - print("Creating agent") - agent = ProcgenAgent( -@@ -369,6 +366,8 @@ if __name__ == "__main__": - print(f"global_step={global_step}, episodic_return={item['episode']['r']}") - writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) -+ if curriculum is not None: -+ curriculum.log_metrics(writer, global_step) - break - - # Syllabus curriculum update -@@ -388,8 +387,6 @@ if __name__ == "__main__": - }, - } - curriculum.update(update) -- #if args.curriculum: -- # curriculum.log_metrics(writer, global_step) - - # bootstrap value if not done - with torch.no_grad(): -@@ -487,8 +484,18 @@ if __name__ == "__main__": - explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y - - # Evaluate agent -- mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = fast_level_replay_evaluate(test_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=0) -- mean_train_returns, stddev_train_returns, normalized_mean_train_returns = fast_level_replay_evaluate(train_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=200) -+ mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 -+ ) -+ slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 -+ ) -+ mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 -+ ) -+ slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 -+ ) - - # TRY NOT TO MODIFY: record rewards for plotting purposes - writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) -@@ -502,12 +509,21 @@ if __name__ == "__main__": - writer.add_scalar("losses/explained_variance", explained_var, global_step) - print("SPS:", int(global_step / (time.time() - start_time))) - writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) -+ - writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) - writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) - writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) -+ - writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) - writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) - writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) -+ - writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) - - envs.close() -diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -index e13c22e..d2d54b5 100644 ---- a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -+++ b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -@@ -14,6 +14,7 @@ import gym as openai_gym - import gymnasium as gym - import numpy as np - import procgen # noqa: F401 -+from procgen import ProcgenEnv - import torch - import torch.nn as nn - import torch.optim as optim -@@ -24,7 +25,7 @@ from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curri - from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum - from syllabus.examples.models import ProcgenAgent - from syllabus.examples.task_wrappers import ProcgenTaskWrapper --from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize -+from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs - - - def parse_args(): -@@ -126,18 +127,17 @@ PROCGEN_RETURN_BOUNDS = { - } - - --def make_env(env_id, seed, curriculum_components=None, start_level=0, num_levels=1): -+def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): - def thunk(): - env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) - env = GymV21CompatibilityV0(env=env) -- env = ProcgenTaskWrapper(env, env_id, seed=seed) -- if curriculum_components is not None: -+ if curriculum is not None: -+ env = ProcgenTaskWrapper(env, env_id, seed=seed) - env = MultiProcessingSyncWrapper( - env, -- curriculum_components, -+ curriculum.get_components(), - update_on_step=False, - task_space=env.task_space, -- buffer_size=4, - ) - return env - return thunk -@@ -150,7 +150,7 @@ def wrap_vecenv(vecenv): - return vecenv - - --def level_replay_evaluate( -+def slow_level_replay_evaluate( - env_name, - policy, - num_episodes, -@@ -158,28 +158,24 @@ def level_replay_evaluate( - num_levels=0 - ): - policy.eval() -- eval_envs = gym.vector.SyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, task_queue, update_queue, num_levels=num_levels) -- for i in range(1) -- ] -+ -+ eval_envs = ProcgenEnv( -+ num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False - ) -+ eval_envs = VecExtractDictObs(eval_envs, "rgb") - eval_envs = wrap_vecenv(eval_envs) -- -- eval_episode_rewards = [] - eval_obs, _ = eval_envs.reset() -+ eval_episode_rewards = [] - - while len(eval_episode_rewards) < num_episodes: - with torch.no_grad(): - eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) - -- eval_obs, _, truncs, terms, infos = eval_envs.step(np.array([eval_action.cpu().numpy()])) -- -- for info in infos: -+ eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) -+ for i, info in enumerate(infos): - if 'episode' in info.keys(): - eval_episode_rewards.append(info['episode']['r']) - -- eval_envs.close() - mean_returns = np.mean(eval_episode_rewards) - stddev_returns = np.std(eval_episode_rewards) - env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] -@@ -188,8 +184,7 @@ def level_replay_evaluate( - return mean_returns, stddev_returns, normalized_mean_returns - - --def fast_level_replay_evaluate( -- eval_envs, -+def level_replay_evaluate( - env_name, - policy, - num_episodes, -@@ -198,15 +193,12 @@ def fast_level_replay_evaluate( - ): - policy.eval() - -- # Choose evaluation seeds -- if num_levels == 0: -- seeds = np.random.randint(0, 2 ** 16 - 1, size=num_episodes) -- else: -- seeds = np.random.choice(np.arange(0, num_levels), size=num_episodes) -- -- seed_envs = [(int(seed), env) for seed, env in zip(seeds, range(num_episodes))] -- eval_obs, _ = eval_envs.reset(seed=seed_envs) -- -+ eval_envs = ProcgenEnv( -+ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False -+ ) -+ eval_envs = VecExtractDictObs(eval_envs, "rgb") -+ eval_envs = wrap_vecenv(eval_envs) -+ eval_obs, _ = eval_envs.reset() - eval_episode_rewards = [-1] * num_episodes - - while -1 in eval_episode_rewards: -@@ -251,7 +243,7 @@ if __name__ == "__main__": - save_code=True, - dir=args.logging_dir - ) -- wandb.run.log_code(os.path.join(args.logging_dir, "/syllabus/examples")) -+ # wandb.run.log_code("./syllabus/examples") - - writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) - writer.add_text( -@@ -316,7 +308,7 @@ if __name__ == "__main__": - make_env( - args.env_id, - args.seed + i, -- curriculum_components=curriculum.get_components() if args.curriculum else None, -+ curriculum=curriculum if args.curriculum else None, - num_levels=1 if args.curriculum else 0 - ) - for i in range(args.num_envs) -@@ -324,22 +316,6 @@ if __name__ == "__main__": - ) - envs = wrap_vecenv(envs) - -- test_eval_envs = gym.vector.AsyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, num_levels=0) -- for i in range(args.num_eval_episodes) -- ] -- ) -- test_eval_envs = wrap_vecenv(test_eval_envs) -- -- train_eval_envs = gym.vector.AsyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, num_levels=200) -- for i in range(args.num_eval_episodes) -- ] -- ) -- train_eval_envs = wrap_vecenv(train_eval_envs) -- - assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" - print("Creating agent") - agent = ProcgenAgent( -@@ -500,8 +476,18 @@ if __name__ == "__main__": - explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y - - # Evaluate agent -- mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = fast_level_replay_evaluate(test_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=0) -- mean_train_returns, stddev_train_returns, normalized_mean_train_returns = fast_level_replay_evaluate(train_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=200) -+ mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 -+ ) -+ slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 -+ ) -+ mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 -+ ) -+ slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 -+ ) - - # TRY NOT TO MODIFY: record rewards for plotting purposes - writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) -@@ -515,12 +501,21 @@ if __name__ == "__main__": - writer.add_scalar("losses/explained_variance", explained_var, global_step) - print("SPS:", int(global_step / (time.time() - start_time))) - writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) -+ - writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) - writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) - writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) -+ - writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) - writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) - writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) -+ - writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) - - envs.close() -diff --git a/syllabus/examples/utils/vecenv.py b/syllabus/examples/utils/vecenv.py -index 6e5a0a9..af3b187 100644 ---- a/syllabus/examples/utils/vecenv.py -+++ b/syllabus/examples/utils/vecenv.py -@@ -1,7 +1,6 @@ - import time - from collections import deque - --import gym - import numpy as np - - -@@ -154,12 +153,20 @@ class VecEnvObservationWrapper(VecEnvWrapper): - pass - - def reset(self): -- obs, infos = self.venv.reset() -+ outputs = self.venv.reset() -+ if len(outputs) == 2: -+ obs, infos = outputs -+ else: -+ obs, infos = outputs, {} - return self.process(obs), infos - - def step_wait(self): -- print(self.venv) -- obs, rews, terms, truncs, infos = self.venv.step_wait() -+ env_outputs = self.venv.step_wait() -+ if len(env_outputs) == 4: -+ obs, rews, terms, infos = env_outputs -+ truncs = np.zeros_like(terms) -+ else: -+ obs, rews, terms, truncs, infos = env_outputs - return self.process(obs), rews, terms, truncs, infos - - -@@ -209,7 +216,10 @@ class VecNormalize(VecEnvWrapper): - - def reset(self, seed=None): - self.ret = np.zeros(self.num_envs) -- obs, infos = self.venv.reset(seed=seed) -+ if seed is not None: -+ obs, infos = self.venv.reset(seed=seed) -+ else: -+ obs, infos = self.venv.reset() - return self._obfilt(obs), infos - - -@@ -228,7 +238,10 @@ class VecMonitor(VecEnvWrapper): - self.eplen_buf = deque([], maxlen=keep_buf) - - def reset(self, seed=None): -- obs, infos = self.venv.reset(seed=seed) -+ if seed is not None: -+ obs, infos = self.venv.reset(seed=seed) -+ else: -+ obs, infos = self.venv.reset() - self.eprets = np.zeros(self.num_envs, 'f') - self.eplens = np.zeros(self.num_envs, 'i') - return obs, infos -@@ -239,7 +252,8 @@ class VecMonitor(VecEnvWrapper): - self.eprets += rews - self.eplens += 1 - # Convert dict of lists to list of dicts -- infos = [dict(zip(infos, t)) for t in zip(*infos.values())] -+ if isinstance(infos, dict): -+ infos = [dict(zip(infos, t)) for t in zip(*infos.values())] - newinfos = list(infos[:]) - for i in range(len(dones)): - if dones[i]: diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/wandb-metadata.json b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/wandb-metadata.json deleted file mode 100644 index e72715df..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/wandb-metadata.json +++ /dev/null @@ -1,167 +0,0 @@ -{ - "os": "Linux-3.10.0-1160.11.1.el7.x86_64-x86_64-with-glibc2.10", - "python": "3.8.5", - "heartbeatAt": "2024-04-23T02:47:28.253643", - "startedAt": "2024-04-23T02:47:27.596549", - "docker": null, - "cuda": "10.1.243", - "args": [ - "--curriculum", - "True", - "--track", - "True", - "--env-id", - "bigfish" - ], - "state": "running", - "program": "cleanrl_procgen_plr.py", - "codePathLocal": "cleanrl_procgen_plr.py", - "codePath": "syllabus/examples/training_scripts/cleanrl_procgen_plr.py", - "git": { - "remote": "https://github.com/RoseyGreenBlue/Syllabus.git", - "commit": "6e36433fbb5c0e990358d7f895d976086dbfb65e" - }, - "email": "djhaayusv04@gmail.com", - "root": "/data/averma/MARL/Syllabus", - "host": "f411843fc70b", - "username": "root", - "executable": "/home/user/miniconda/envs/test2_py/bin/python", - "cpu_count": 12, - "cpu_count_logical": 24, - "cpu_freq": { - "current": 1281.8734583333332, - "min": 1200.0, - "max": 3700.0 - }, - "cpu_freq_per_core": [ - { - "current": 1288.073, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1204.858, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1270.434, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1199.877, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1294.714, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.085, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1295.751, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.292, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.292, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1202.575, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1202.575, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1233.288, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1325.427, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1274.377, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1481.066, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.292, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1259.436, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.5, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1208.801, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1227.062, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1449.108, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1313.598, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1253.002, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1763.293, - "min": 1200.0, - "max": 3700.0 - } - ], - "disk": { - "/": { - "total": 5952.626953125, - "used": 988.7798461914062 - } - }, - "memory": { - "total": 251.63711166381836 - } -} diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/wandb-summary.json b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/wandb-summary.json deleted file mode 100644 index 15bc4d38..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/files/wandb-summary.json +++ /dev/null @@ -1 +0,0 @@ -{"global_step": 81856, "_timestamp": 1713840633.2564902, "_runtime": 185.6468482017517, "_step": 520, "charts/episodic_return": 0.0, "charts/episodic_length": 79.0, "curriculum/proportion_seen": 0.0, "curriculum/score": 0.0, "charts/learning_rate": 0.0005000000237487257, "charts/episode_returns": 1.2000000476837158, "losses/value_loss": 0.10687784850597382, "losses/policy_loss": -0.0007377793081104755, "losses/entropy": 2.6995317935943604, "losses/old_approx_kl": 0.0001328418729826808, "losses/approx_kl": 0.0014047222211956978, "losses/clipfrac": 0.0, "losses/explained_variance": 0.0861138105392456, "charts/SPS": 390.0, "test_eval/mean_episode_return": 1.399999976158142, "test_eval/normalized_mean_eval_return": 0.010256409645080566, "test_eval/stddev_eval_return": 1.399999976158142, "test_eval/slow_mean_episode_return": 0.5, "test_eval/slow_normalized_mean_eval_return": -0.012820512987673283, "test_eval/slow_stddev_eval_return": 0.5, "train_eval/mean_episode_return": 0.4000000059604645, "train_eval/normalized_mean_train_return": -0.015384615398943424, "train_eval/stddev_train_return": 0.4000000059604645, "train_eval/slow_mean_episode_return": 1.100000023841858, "train_eval/slow_normalized_mean_train_return": 0.0025641031097620726, "train_eval/slow_stddev_train_return": 1.100000023841858, "curriculum/completed_episodes": 500.0, "_wandb": {"runtime": 204}} \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/run-rpqjdbky.wandb b/syllabus/examples/training_scripts/wandb/run-20240423_024727-rpqjdbky/run-rpqjdbky.wandb deleted file mode 100644 index 3278c7155636059b0ec6c6e516aaf73d957ef6d9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 258994 zcmeFad0-UP^2ci+Yy--sqERH`3hFS^+jQf;UHAQR-R?ENkPHwANz8&EE@2NK5cU89 zkwwC$K|z8b1W*in*aM0NL5M6N>;Vzqw`LN^>7G7gdOGj@^Z5O|_u?dd=F?TD&N)?e zs@lJP=VSYpU)6GC&exu+!s>_h4Qm|{>5)ZE6WprONp?w^TZ$IsjvWO}^~8Eziq=t> z6L!_AMh-{)euQU<_iAXts5l?aqmjeDPAciuGf&TIrXnR zDWZN#ulV@rj%g{a7u0pMcGUOwOo~c~?(Xdyc3tb3cyDwHIbzJQTk4cM-uKepY zjykEy(J|dB|9-2Z*?Z9mu^s90UA-}>6~j7OU;ajp>l}^zKjKYDjZ5u?-%Lo0k9V|i zTpbggl$w_8jp`8>>rLz%R;N>Re2Ul6+|e*Tv2*9Rgw9d1amjtdYPWW_c3kVI>rLnp z6^F0fpmoQ%&Yj{?y0peuZ=9N#8XX^%8rR*MlIl%L=^IvGkX^n%jvE|TgFQKco|GJ& zinmeM<-|v_qp_oYLR$AIJQ2T8OOYH+9S!JTR6L@#AS?J94Wbhgym)v#zC^`$Yvici zIT{c0znjL6I-R4tcgM@A<8o@6hTn1EB=wCSGl@-bkC^H$-3ivZI-!L27hzXK!j$ zxA;o&;NNwEm>`0b)XE`f+_dE6xR|u~H2kiA+?yOtul&oX?%vcci4anq);>ME!O_(B zqI=*qdXwT(5@U(JmfHn^I(%;}#tkg!JeSFHMULER>q8+K2t zH4$R1Uq7rYEG(~fSVY4vVd?l^VPRRd!dll6ovKrA3k7s5PUW#$cZYRt6d#w6*7Lre zsuHD0bHd(oIJ=~#CZ)7**RFG1YL~Q*&X~mR?Oshx@%DNu+3QVsEI!TK?zM_}?!=EY zyIb*!iX@7iI(C#}TwYCciLx5&>7;6MtmaW%suJ6=Q%6PiGG^z_uKli_2cuP6m=y+v zXzqKf_d#yaP{wFy*ObHr$0Lr1=oMRks}2~t*tkxeoDfn>7so@62hx&KQj@*W-JwvO zIsrIZQ@t3`)k&5mO>v1X%^j-=qT&(Vv5LEs*VQq`s|x<#KQ#C5@jdaC!{2)B%KL3* z@lF3DTg|*NX~&&=U%>Eno^9c1?ri65mz)Nc!L(|8XwvPyltp4Mc zFTD8Hvv^$nuGdz1T*jBD+DFumNlL5!17|w^a|3<92F&*>`Kuh?&r$uyP5CIc>-~6~ zL-06chuhl&_AkYmn3n2H?NsrX%84tB&Z_g)DQF(2t4XcT!k)VAZ9Ji2*E>Mis6q;v z8MAhWMKtf278f520}@%Wayp&P>VL;=v90<%hY=cez0UUz*ZAa|J8<4R5#h0k39x+s zVCTSSPP6sB!|5enUGWnAPt4fA_8m(van8KLCY|xPYr499ui!`D7cBf?!NS@RtvaOm z`Qs9k<5FR#V&v&NZeItF(wonAC7ozqp%eaR7k@Xixuv(-z39d3;_bFIxOP$&hd#Iv6|CX=|R~o+#Bg%*W+TMlD*i?B|M0&NA(F_ZFPS# zz3uBN-nM_pl4Ik>gjqs3*1v5}Lb#qq*!a^_J%UHE38DAJrH>Ip3kIR^h;A`}7~EDg>4QNB-k`Z~Q^j2H4_Ul+_~VucIt)1WClTDt5R`ne zsI!hh)a;rIGdj-vf(TkN1O*${b+$y%%QNW`5!}KMl=b=gT^)fW+a-hYN0eWPpcO+< zTw3t1C4#Y+cDJCJ5Lq!3{36X9*sp(>4nbBW<4mxmM$Ol}{sPT}TPtRQf6S6e!!f3n znXu@<$?e3@nqeqP|84OSPKis zSR~P7n^k?o^OVmb-N7*A_t|05s@95c@E$ST$uN{0TE0NfptIc`v^Af5dPv!;qi*>m~z(7QnPx+9+ks zYWWH=+{ZBFj6aMqt;pfJ6Xn+sgOg#%7@dzJ5HqJGcc5K{CEI$_k8LLqLpz3{XwX`W zX@%k1r&D_ogNtFvTCwLhJ%b6NA7ol+7<0 zQA;NWS+a}a=GqBOi9uu-GUuJbm{u5G`@e%@h(Tf)(u=1r)-$LsyLPMtRy4eY7-WW_ zeEb&}(+a~Ija%*|28ChB-#l)so&nB&<2r3gmv;7>Kbsgl3`5C zFccive?Z58^P63AxV6VFg%~u3A@5`<#ogWZmgUMzqB!2TOKo z@~W~2h~a*QAv5z7#0y)K7jS6$^{iQfyV)*PS`ohV5}h6%t~fnZ47uiu>n#zybwWHt1dlKT zIbU69r6X|LW>|}RU-LQ7h5UFa&uEM!%sWkc9vnR!c^8{h&H?iQq|wAoIs5Z&)I@cf;`A zMDQ<$px~GEj(UPX%Yr3y)bYE4t%=|%hM;`@(2kY}#^#rHBZ8+Hg2KtGQ*;ELK#PJU zg0X9Qyh8-fFa+77Hl$c0*nY$L@nlb*t+1zl!;q0ZW`J&=r>KF>(3aF_z&jltCwux_ zg+29;S-f;I#kPK8W;kt z&6XIRSyh@u46iT@Wre?BOe+i@ulu+gF}%t!lpi>?RnH&>I=flYrQIV^eCN;C7>10o zmoTOkhI&tY;*-O_8HW74*~jz@vTWDlrkxq&3+;4Z7&7O7g)yx#Jk}=cJjvld3`614 zS!eYOirX$fK%W)wT_jz4ona`<&c&Ek81Cr#{z+nZgJDSjVb|XV2HQ4zcV73&Hez^_ zVaT0Wj4`b+G&uc>FJkf*!%&dhcarYh<52@`lq}8ZPWAiGBZjvbhJq2pFs2oT?>qi4 zjpXoOh9NKe{0u#VW?RT)_`F?3#PC0cA!9`UnbsJFrU`ct!#fN^!S+5YbquQ8w(Z`3 zlZx&jhA5VydSldQ4ABfjcHXxb(+WfQ zgKz&y3>_JU%wN`*>ly3|ZUEVN5Fw-v4!2PYj(HhJv-F!}JV_WY;13+IuH`O$?nGhV0+Z zVN5FwuRPR7wz9!MXJZui2YuReb{QSC|! zS!I%7bUS1Q(mzq}B}XSEd6SWZ*WRPGkL-}*O@7^kx<mH++E7}JVM&6;_~W8J{et*eX# z2S@$QKG#xNrh`3?YVJTAKha7Mt4Cp8&!;eAeAlK`Ml3A6*3$P+-ud*CbfE5DaiI2p z|D5F)Tj~Hr+Z;>F9(Xp24%7)12WtP2c}E7_Vu@hfk2mk63t4Zc|qsfoj{H;Oo9ex)VVPLr^kfcUMaUsSi4OC*X0ZT~%LFkzav|mi#g(EaJ{8 z@_*3nLfW-W`a73gJOCg9J3`F{(-6IVUZrd`{+PS-Z zKoIY-5bF+LP%99w=@$|Sq9+59|4qRT14O|5E=y7Tw-$f&5JB`}Ao7lE!=P3m!kV3V zi6Gu*AkzQJI%$CL1hV3mCiOwrHTM%lZw4ZF?F*SIty>Vp2Mk2cUw`D+(#;9C z5XdrFlEi?hsM`qQLk6N~^C=8!1!B+m^Z5kv5d)F_*>UW<*@^A8FTv=Y=uPhu#K#Oo z;n;GEoBhASQ{GC%^U}i@itJ-6`qkD`cx;>5AAM85JLS3e31cwwPZVHOE3&w{wUkE~ zeOZk1L*vIAFmSEJCM!E!8t^z_^kXr~KAnzHtzbmGKfeKC^k*?LmQI~!zzCQuWoeHl z9M1fPFb3!_X60a1D;NiEJv5&%2C^8L<7O|?V+i);8XfG`rZHg*VlmQZEX1f*Fg`Du zbl?v>Fe9wl{xDntV|Qz{ae{Y5vir5BmHRctD$bzKEs!(;e=)U3oR$>Ww2ZuHLEfB9q!Sk5?X_KnFBHRIi zaQsQ?{{UC30bD`B-7%Mn>_}CX<*K+s)34&Y9R==?ux1Cta666_Z#d<6o9@RUUrkXw zs;CAfZe>(}N&$yUBqLiTEIJ_ux8(MSqFd2q+^Vc{9uTjY+kEK}AP)^|_Im(i$_Y1D zX}1&4b;}x;7~UT!i9zMB5Zf+f(J6~=+3mq82!fV1sVcFw^w`~nB;-#53t2=Cxw(+t zPQeY$!6SpsVjePXLfI~4S&>>E8A5tJ+(fUDq|Vc8!Q&A%zNrpMtkeLp;_*nTCa5lt z%f%9Vyo07pBq5J55i)k1Tp??CbP#hL6d4!#>^5GCs=8gWtctWsxfvm=Y13RY2szUP z88elKEQ(mLmEbEBcPiI__3{X^q)PDJ;7YE_uE^TtWB+>M7?4MXH9Hi@USki)MfLz# z4Q(nbqy{!GQ65Ef2?~}eQIIt3ks4igS>c}g;-dBnF^@81#!N7Cli|z4pes_4eHV_8 z+(k;EAF!KQ62rjdTw<3qO{X+C2R5`QWST|}8z(=cz9tk(=JuZbo z7Kdx+|0XeyH4!uN{kh1pBD=)U_Fsw|GP3SW0c1&06}O6+r%94dwWVQeKiWiM9%mwE z+z;R~dt_|fLlkr9YqL?zih^nE(NtLwP;sQH1(W3uUU-zmJl;gixK6-BmNAu~+9Azm zPYqaYEKm@$gg+h;)|+8guIf;B4%7a;O)Q^SrmetEd0k=^OJ?gcFl%$kVFM1 z3%ASdR%8#0>>1X1=1CIs1QRhMT*DKy$EC@dO9)NOwtEP`jIuhC>c-*L1xJC%h*^E5 z|LS_Q(ViH1qfNIPxXhA@NE3gj9Zb$@=x64ZRlOoj+>+CcQ-vV7;VRHj*`n%X)}Gt9 zWe7=mQeY|5T?ihrs$d0F`I9-=Mq3Tte(7ws

C8k#I;*fvm7gv*7wFHRTL3e`>;v zyAwQS4LhAs56!CWJ8j7+qOOtPfo}w7W}Kd@%2{xCUEktWGGLPf8!)%Uz9xX zxr8*CwHh#Bl872s=+mSm3# zKY>Tq6t{@kS!Fs4;)^41X-Hz8Y9eOb=irIi?Sh#3JFsB(ORE8!jm@{j7l!vl*T*D4@q92#O$M>Qxo9d})1)^`&6` zEUZ~cfP*t$6K8D}oPtZ%RCjQz9ck|AtmAgsexj0`&}q0VVYuC@M-f;-D^c~&FCpk@ zCeXNL!V|P0!C4L)!4otsReb0gFk*^`;!%pAi858JVW$QcoSOi zz)A#HZa%Z^jxfooC>rLphCR32Lk`=j+O0m_YuzD=ugzfNYlV}3-(rZb1-yR^F;>4| z(Bt)X|446oX5cqXw`X|jB|>xEGGD!d*-rcY%94{TCq+xJkyBAilzG$I#s>;>iTQIA zX56;nX%}1+I2wmGqu^5r)k$I4{dqJhtq9ffXizVPS#-^;RZx%OsI%BOYH>jsw985y z^}+sazobJ$PT)g>N5FRS7B;Zh_;O%Ta7u#6*P5V42|Ik-U!jUvne4$?2|g~`4_BF> zqWj*B%daK1nQfvrxE#b)8*EbrzS_`C;Tk1(4H#lo6kP~;Dk^zzA(X1jLht8xO(y0! zCd{}@#4`_)h6F}LXnCtoF!Nx)-&S&}stN^^B=|916yUDPEdKA|f%Ry`of~+?_2BXm zk6FgP3cIVI%zQ!9Td~zyEjcxp2X0J3M(Erni_A$$l-|jFr+}d6nL*>EWa^+75Uo|< z01HvOg_?jC6nFv15rqRHHh)zmEqA-$)q~Cr^8=q7=z5cRas;4ZaQVnVjhIja>wu=> zV2Cw9CIv?>LsewujM^uUw4K$M-xi-&WbLSx7afuEwPa%-7+C2MK5S`c`(QYpEM2MtSs zn$)3LZZY&3t&2+lTBTJAaaBnXpomqNC0Cc$O`jw|e`O+QTHZL!Dl)spAOZBCf);CF zLJKZrB}ua8c1tdJ1Q=-ds#kZsO@jW~M9{eZ#f66YNZ7dq+m>*bXfgD?m_g72R%aIy zw$SYW;TVQlSk+9PPRt8Um~lagXT;z~5~2P)&@k z{`%}&#JnglGhJyiXU0Utst_bof@UjM%u?v{S$9&Ck+Vx_O61E#k{1H#9u>#us&baw{_*+_VqR>* zjQA3d*^PJ}HuNDe3%2`vaEnV8+!FrZpAo~(XGQI@Ka!a9Oqg-^iN_2Vkc;xccycB& z@-m-1HDI?Xz<`TuTs- ziEDmOYDEyMScu|FQxh#g^nUMFZz-O)+P4e6PM3C%un9GvG2MoW*acwY^ADV=JZmP0 zz9BQL)QU%z6y&jB2a1FxcGXf>AM%VQ=!1e@ZN)hrv z!lmjFJ*)}Wn)e&gmzJ#`e9Kkkslg+zt1s(*q3w8{t6AI2ujFVp$KltcK z3v!RG^|{9aP{SL<6E&hHE_ZOZ32#cx*`gEIh){Wizb^@D#iY=!m=Rim7gT}BkP-#I zPE}|%_Kn5e2zrwlG!B&Jg2q8uRK0#AZYALkjskbG95XXC0cQ}%dCBln75cQ<9uswyTOd5 zKZt`Y=x$64ecNqhMmr2mgFg_`W>Xv$;oZLm+(QXbTPqWy{C=Cv-*$}CS4Dw?Er6rN zl8@rn-FNM$M44@si8B6Cvr39Fs+B~U?54NgP8i!+jFPpVpj`@EQUtDDyQ)|`+4tG= zgt3FgNI#Neu}Mqz4Eb@w*vVp)eezAA0Rwvvqua}pG#1?T%?ZNT#bRX7E5xW)q%mUg z;Fg3@q{A4s2Ssaj(!hbxCXBVS93KvxN#azBgO`yOHJVKK^A{WU~irv~>= z1I!Ir_3FF62GkjnP8dJ27+EVbhFYt(^XbBfB*OSvhw;bg*?J7gzQzduJ=LrSVeDlw zGFMK)s8*C}y1J+>Vf>=Qn0{f60V7~<0qmQrFO7S%UT;enzv?jh^!w2oM)Kdg>JvsW zi;+9~w?YGk7|5S$X^I!uO4&;o`&f*kU57EM6=}q7F8F~keq%B6e)#Q}0V80a1gwPB zm&TMS@7zro`&o?adB-rS6^xg!n>2zj4zL)RgTEcC&j&^_gk1(#95iAZVf@Zw#nL*{~(J|^y#prwH=37)_&`%aGl-7>S5O^xFIgLKG7UvY0A!zwb;fb zCuhk95=se6lQ+3&r(P&s_y^5jvY?SAhzXB28AT9BS%{qBGp^S|NVfGW zyW{75eF@?i3z5BHp0%zz;p6W21rtkIi1e-f@7F^hd~VZrcNlOrQ#x;y2h?6Wt`H2hN zbr1*>*d>X7wJdv?AWpFm8J`YHumrJJyyM~Hc;0Cf2e!byzl#l2F0YI`UK-LJtNMNv@rjr7_R;D1jLGSkj)y#t0ItHpZ_0P)2J;ArHE=pc ztqElQN$`51d=BH}QzY>w*Z0JH){GfvNglI=v=m%>jxzjJAwVh-V7Rt zTArj4gccDG2u-`C&_!)@N7lt>;2lOjtAa~aDA!n76x1+<*RqF1xuqJ2+ejI zMUAo(GGeaqz$*wWR8?<}J5G3f2tog422FR?P2-Qa(}|$wKgb;6={FJ+?T2=wGy$?k z==L^B$}nLmk1%zzH;(lCl9_(v=+2Y03VVU87+gaZbXL1UkHr~8El4PvfUOvO_X6^; z8BvQFt2;~~OZK;kC8HxgPt@4t(JedPI!!wdURX-10ZAjH1zGgCm4JL>-IC>z-X65> z9#Zf0T2!6Dm_7eRu!)0pSI_+^HE+u1;Mj|uvnVEIkqtuxc<4YdtTZ;-f z29PuiJ{KBw{jmB6h32Yv=y`ueW~4b{|B1T^xY)=F+OzoV3ms`j_cLS0NrlJkhP|L* zG;c}=Te0NOS1bebdj>iI;Cr3#X`;q z$|eb_TSH+(w%~|I+de<$p+ry*s8!KC!0)EZDOf#4A0dOMAb=wTv%323^X1n}{FYpt z18Y?@5ActgoxTa9T5)my&}GPcQrJNz3QJBLp6~>mOHfLIE4*L|YkyXo2b+9c!Ig35 zgP#Ul<0{rzbzNBU+T7>CoKdUmb!bb|u>M(}TeoDu*{37IUwiqr_SKim#RVP@Ij9EL zs<=Gie`LX=;m<^bMOOcLH{JD?@*JKwq*m8!t9-@$Bbm=c)JCYX`cIsqe(@Q7!=daq z%-^~S-_Sx!us_?zPEEk`KB-l4N5xN^G2`OD^?pF)G8;XzmefDyhU^l87{)@B9Pit~ z62v#%Z~mDehO-cbt1^@I5CMDT!7No@5+yfpnLrRDScu}26O%1LOn>9mpU4trR$3yz zB#MU}n`N*>*nk^Ff%#c|ik^3`eT*#8$VyA(AGK)r#2jmuXv4^bV+dmui;=N$&oVs* z_5}en$`Z!6il>w?Mza_hU+u@JR_4XmTZi06S;Av#QD+6?u7?%_^ZhL@J&MS?;@eZo zljI)XY}nN6{EZ$b((2%qKpA=z`(uvjs{8(NnFT+EqqJ|WW;+@LL^eOX$vAZ3WGJFo zki+S?wxV4~yw~48F)A5t82g4@2hrk=D*nMA)E1UdaU#GOz#HXnw~>N=8?<3)<7gV6 zNUa*%^@xs7^G4BM`i3<}dlIDb!RljvFh#TF<2T=(q%LY*dfvHNxH*FI6S&cg+uJC*cDv)|D-TFXjg9s8@Qr!-_t4HocBqclRemZy@x6)x zlHyVlW3POrrY(JuV$ekcx8n&%TitI4|CaDep^7z~=8zHy!D|kW<5t~M0)5p0 z!y_v?H5r27yvL?c9^ZF{3mQ=t=qBFkT~&Td|AqY@Rd$zwN>fH8+&(BljSVY`0?H0{ z*oyDU{$7A#+dFQnGHi@j%bVQ z2=EmGe)~XQ;VwtZD&2r$61ykGdsCr{6^e`Lk4tuBKiTs;YOBGnDR_mbb%ainY|%MY z7`U*7Mn{}+CPy5^#Z3L|J`0B`@U9@|_n=l?w%b)h)Z~~VPb5q*>Ju~jWO2aP8{^4q zG2X;$f$Qd`X9J>!%L3Kl`G;Au)4T(`?WOqvO&u8Ee?{RR*aw8dsvZuc8;f4~iw;Ct zW(Oi{q0A0M1Wji~C{RcTA{%b*u_t&yBYzz|uds6Bh7&w&Rhi}5OWyjKwq6t1tyg*3 zk^_dV7m{yn+ClCZ^Ii;Xy(Y3-ui{EdHLxeG%#YjK4 z6r)^;x1xu@3B)K0OJ?e}y>&+s#C#Sad*Bd{C5YnQ{k9Us z7c4~H-d(AB2=wPOLRgYS?LS_6j3B;bA<}35mTC#&v(YDy6GScxkv;m-M|ucc_%}jW zlEgJ*>VHBI3s{JPr5PVvg1Ga}_d3(m{;G0n`_;2-)1J}#-4rhL2AJBGHie7(9c@EX z`|HZ7?H{$^@P3SHWoj3!-7}3a7P1({KNd_iU<7RPZ0WW}zjtn_MHq`%jGVDsFsc=d zAyYHIC5&%ajO^7LzBXWJN&ua*B#mntkC{Rki&>1^Q#&!L6^!5d?p#P1c`Qc$&a$mK z4Af@~H!v{D;qSLFJ~KS`x@k|QfW*0 z3{i*4gL~Umof_(d;p%aM{>~KIJY|yPjQY8#c+AhP3aZ?;S3iZIHOx7*$wRg~g$@{K z?c)C)-9*sKO`z#y$CET-7wB^xd`9zJc3S8;H#+YuO8ZiFKe_=UUV{H){FIt&Q&#yT za@nmgaoNG`#dX=C6GYHHWh7}Hv?ui3Sev3V-h9?jtOeaV zxzIFSx#46j^!lepNh4H>aF2`y8m60pmd38TJ%ylGn?Tdy)f^f)C?tn^$)Q1uHK5*T zIN(MTNW@JLbzyRWHF@3K0}%xMy%{tPyF6%I`a%#n_;SUa(lubYk=c*bP=qy6kO=p) z7)dMti@WDI?ZtjD*^6nM=Jn_ki#lFtXTV>r2#Lq6g?^R8pxnONd5HHR1r0k;2HA7u zaF@O$GxxMW{f+dU7_E?Gc3^vA>!%^y5pjv8m{9p)M`$v$c;q( zqZu`)fjtRYL_-5tkn34Ncc8ZCh{Af~0v^&mP?88mv1GOclcqh;;h~#}y1c6`tiyjYGmJ#CBs1jxs`~OUwyfUeQlJ=YO|#70-|1PMva3#&ypdN z9*V@bWI<)E)WEbxz6vZEcE|#xF0j<@MprF+J`vRGSeHP~-cJ$lVk+jKdQ~77vL%GV-t%}eC*H<1`PNLZ1P<$*?NyJhPs)>$o%dkMzw;`u6@i}!YE`ha*v(S zmzu$`U$9FWo1?=gkTkZi7-hrKw^+lN=zJ)JFt)N7CH=SlY>%N6Bwy_um z%YVVBR-`fRt@F1L#&#B??8}leJqAiz2MA4A^1(Wqyz5Jo*ui3?7oWtaRxq07ZCXhf zJ6Vjvg-ic5Vgw3KU`45ZAh=!ZlCC6;T`WeyA8RnG6^w^IjPw=$DPl1)epoY57qb&^ ztH!RmQG9>5*9l{{4&(PijA{j=!`L-FNE&-sjP%0gBMlhnOk=cEmXs>3VBtH2@e_+t zxO^=}wSsZek=J~I;GbEHym4PnHDcHnX8FHAI^+??UKXQl;4+MA1tWRX%l{#1{K8`7 zPP;J2fDy0?A&wi>pBvk581)rl{K{gK|IvT0HH`Eldj=CmF^iG6Z__shjDS@LEp0|x z+%Rk~VeDfu(m&sWQLRYhg|j_AB#hr!jI2xL>kSx4YBSCaOP1=c58iJ?82ed_;vf2L zu!d23d*R9iJn%qe9JSmRmd_rU(?qv3bi-9?6UEIBznD&O)ZZ)PsQy9Iw|v#ak}MYO ze|tGW9AqKVGk?BA4}nBpn;?FRn%j&Z4zUmgyAIxA3F7|y`<^C<5*DIx?R=LGg5q{I zL44O|+Jgk~2Mba1`8O_05ZSkH9!C&|S%{3yr*&nd&;T${vZkfE-Rj(jMFeq#g~*%o z_Y;;Niq1`5Ll8$^g_YlS@79)G` zB#deWW9O1HC4_OB#VFW#VwWC63|Iovl2V*gK4(S^*ALJ4B?}q~XYjKNf?*?Eh! zm)B}`I6yGc=PBY+W5<9?Kp_bm5#o@k4Z_w`YG>s}oq2@ja6^&7mO9N?KAYms=L5%` z1$64-WRi$Zx;N$Ghp8}^xzP(7dV%Yr7kG;Z%_*p}=4%O~6SbUPC+uP=m@fnlQ~BIb zylD;KU`=p>>UWJmqhby2L7~MHdZV~mK`TAmeEcNM>Oak9HK$x8*)sHaQqV?;tKY#S zZF?Om)G%{XHvttPDvBxT)!WncmbU##)E7-eO+GHJsL|IFNeDa(9xQr+$au(;+MwUm zv0H-g8rM6~H<=Z++GkpKnMD1UiKr3wFfC@{6E!MMAiB$?4kl{Vc1uQ`t#F4~QIRX< zK?4olvZXb;ae5IM@Jl8JoE&I81CA0Y)GC@w9ei4cE^D0ur{-43fI#sUq{hLJON_AH zt}nf>-9#Gxx0!}hD`?Ih17hYdG$zqZk#+2q1chWX7timfM~6`P@FF z;pw#lmg_;&cwUN_=)?^|6s_ZHcraPxl5-7ED{f3asJFk78AC0!T9Go7G`vskz*`iY zl)32@C=QA@%GYp8HQ>c!QMNQ>YNLi@f8xRYW7MNU)irjD;ufE2IOlm1c3(4Lb4tOI zIYX-*gtz(F!Gw)-ZiD;GSte;5nnAO>H%idI9YJ1DVZDT(mWF$Ow>XR%xVeA+_;W{3V5PM zT^Si_&+j)skxA>;z}f+8C}LN~J7uEHl?x^VRYXIfwxOSyVN#s~kZQjrqbhUQ=ZYRw{=xiXB(3%SWBf$Y?+g?D zMx|D+exukb4(|LdN-+JlyTGttIf*83Q-R>qoxwLCgn1* zpgHkU=*S*azqyOG{TW0k&rhW%04?C21_}x?)b2|Y+ul#qLrkerL4-$*>i}>}ajAo8 zI7%Jaui^MoD4c{k`pE1;Jx_*OsB^aUC7RYl&89V{@-N96HIz{FpPwy88qSrqEA-V` zcOOS_LcyskD%w)QW5_~_4GuJpOa$~NwJVB8`d#>0xhszve7tbC83R$4g8u7o`AH!k z@37hx#UuTr7XP&tqgwItx(3HwPZ+~>7}KVIr_Ur2?Q6^O(Sm`#-b*7`jP%{#VN@#^ z`SK)T`rK+#s?jV)=9dHZTEn={J^BPmV+@OtwX3+lKIku@bd(VT zbGiB$-3LT%AdImrM%Fh+Fsc=4wEq3T-GniY#mGqSH^YDtus*CMX^frwb`D{TXECz3 z567rhFm6hm{xo4^u^9Pt4=gcY1g!IENgB6)T#r|qC{^Q77pMPdDeW=#9GfhCAPrqzumh)-FF zqI2Wg=pj&P-#9TWY1GRz-rhwJlUazYN!e{IK{QVI-wA@4!a`&o-u|)<0#`fiGEzUs z9lW0)rm_(EEB3x@3F79t9WrTFWLM4#zhv{*E}W)czh&I^u}Ku)b$g^K&5F+|XN7;% zlF}6z)yk}Bbj`!x5ymtYBe%48g8>7cAR~sQY`07D(&dCPoy8~`J9eWrjANt4e8QN) zVq_fMTx7(sEur?5ozwFedlhjXM5RWRMFTEaZnep!T0YsmP^UL1|u8{zcDt<^(Fw4m-S_Bt6ee z($urTGzx@rk_vh&g(hjucE1v$N+888Z$t-B;8@43yq??sG=)Rvn}kEq0gfwaRFMqo zS0cA4@6?TY${|Zy=PvfZwTT!$97~AE!}rCy(*#e03DLgZN?({b8|YlbbFtuhBWekR z=1#M}Bm}4hLDq1Y5b-ZTm%SjUS4X+$l74?_qTh6i;^{Zq+5}5pAomIHlC4U`ZEej+ z@*uRUpn;YPB_U9^mkr|!+Qlvlj}ddO88cO3=lIfa72h`nLh+E>5Dn2el>m)ilu%L> z!$5Ie#*T@uSzUj+g{=1i6YGsoFjvm-(ZONGHQpp=?vz$)U`nIf391B8%SSj(QGC6s z^%mV1PVG8N)L)rV<2+|BY3L>H)oZw<&69MnH4B!;e*H${FbS9E(JBs?+;y@R7Tl&a zAg}4yCWZ{vk2raTpr)$N0zc0%*!D!Fc(=`yTB5tZE^M4UB1#6Zq82X<+VKEEFEoLs z1EaYi^VL(K{;)hr|G&U&ShDb)G?aQll9(Iq1K4y8QA*smG=`uTnL*=x$n&3qxV8%6mVDRN(LGepn?Ot`4*Lj|K0Az=2z9Kzt|vX$I;V|{&ZEiHwo42 zed|0=-Vz!i;|~`-n3mg6bX|Aw06B~q%5_lUG|~duneB13lIoI@=NZ`j&~)DALZjVA zu;6Ym=$cS+6l9^&KFAeE>INI{6V<1VPpnODizVzqIrmKIC;G4h!iIJ^k%Sq~`APtP z%N~?-Cw(%^n!mosf4e+R7)x1CoQXSf3>Y})8hv4wFm9{=hCmq0S&ZzWy%^OB#>e*$IYbyMSd5JEgBBPt z(EZtnVaZ<~wQ=Dt!pLVa(szu&s8%q(FRr(gFjle{xr?%w8ZZ>w?o4K;oj*qyt5}Tu z-{)XdD;N)byJa(BtY$ItS5Mz;zzA3#+mbX!x;tMYjPF^DqU?DX)e6SKs~`N4Fn(Y$ za%caz+kg?UJhmkaA%E=ygt3OjD4AJ^QLSLOzV3aRFn-iwoIJ4Kh@l3U8`Y;y_GQ5u^1(pSs2v{#&>I% zHzkbqEJoRm%<=ljA}(eGkcK6t+OTrchlH_##VG%25=OOx(Qn*9Ux}8DI*hHG=NT|i z-q2`pEv*}Oj@;?1->`|r$k|+kQLSJ^9=ksx0q-2HEY*up~OZ(ntvF1CmolL0nVIK@|gC$izG$z2Iz zD+^KBf8%|YB=PGjbMGREZ7f9LjY831Y0!>?}d- zU?IxC{_?~1|tfSf7? zS}!b7yubT}Wn`g>DyO!8)UpFVVpJ;x+!wLQLXB}Q8%-E{ zSd8pJ`54s-M%nj0eH-+jSd7eVEA$ocQA9RSmX;-HH19Tk4PpGuVq^?mXR!i)($)X& zPD-_x#V9|u=y$y|&}`GLbH;t$=RQXmzpxmYTb5%~E7E8l|Iq=$_?5*d`hMD<1`J%8 zvq`BsZoYRMVHC3%c|+!6R4W*@>mCmyN7FtNM-xS;xsC@US0igZG;a&JmqYWT8sY@F z`-Se8ZfanuyVogdi+fAHYs%z^6t5A$9Q4-dU{L*(p{dY5Hcs+R9 zjh>chcEHtc96Y#!_Jls6bwL6!V^Sh}0r#K;@}siQ;^{iiwIN9#G=qkZgez%uK2=f3 zmFF)HCTYd?90oiPkuUVEl+f*fuHsgW{7KT$yRRB`9MFeMB7Z2z%eCFOTSbkexzHqP zUceJoA?%*iphnsTVt%+J0t3#*J0*GBgP+|*C9_Lv2kbA72G!7B(`*$KEQbEzz3?O* zOuubt@St0;R3~mUBH-y#HLRv=bU;#Cq(1v93HlE+K_j5T({2xLHflk-9nh5J3flgj ztboR%%tKObB3lFp<>ceK#@a=G^p$Q%&{_rFnl-mAFP@smX5z%KEOR6HW zS9c|KP2;=Yq7B&*lg$aaf_Un!qJwh~Ll#WE)zI&A>egu$h3e6t8*wk(R;C-Nj3LwR z_;teqvff8ctT%N@=Sf;cvJ(o;b0r*^dwQ%4K*Xu-m+=QAOSv+by7ij|gn~fw% zE_F>zYZ+A+a7!DBhS)Z;IXbdfIJ@{En$~3|(^}=$0z>T*+Q9sSTdcgqI~!6g4Ymw5 zHjp0Y@ijcq*>9A-8F+g-iTZ?@s4;7KqQW8Bf%1x9!JE)v3T`ETa?{+HRry z0juD0mp)EkvAdII)bOkEsFC`EG&TOT4ral{(D&blQ-=rHamX}8p)w5_D>`bWebh$- zX{|nGvR0EDj!P}#axB`m@pdRdsYBm?8}_2uBI5?R8|`#ZdV-a-r=9xhG1BkTCXyy4 z;W107B7@uMAu&s#GaDq0FGcmR1S-7X1_EQiRcD{Z{SrZahV}O4ZY~(14-er)Zr80X z&+dA@b_!=%Z(rH*tr*pcw{K9Z4#x@O9E(x-MZpXMM!@RcSU9U6Ad1V_xrCgS+vzV17}$l_Q$?ZdtsvKLs4tgOEx zxWr-_=8=!nfoxR6^wO7 zFAqq<1N($`)#`^eYgFI=!2G_St*ISxSLgV|j?wW^DXHG12jNf)bTiX!Nvkp^eYc-D z`m!9^1+y`%6^@uOXMCAM{lY6MMEE)K3U?Rk(hA6G2MEYx;jF%?daS#2J7M%^F$xC$ zhEc6xNN;_gmw*Qj2(P$z{ipxl_0qHT*f=m8UA6_g{;&Aat0)dUFuda4wSUn3^Ox#b zf*70e$s+_Yh=nNMnD5p@phT5T=63X|C$|wq1`AQJY`xnO#PqK=y+ja$S%|#zn;$Sh z1nSRWNuMH$9d8lD5Edf)^j{BHg1BwXxDEs{l!eGyeN0z19vkXFsixRCRew_4bo(2t z2;vhKqA2UEwW{&^A{HMah+!;5d7on)b&_zSkfTwZaK)zj5aVYq@YU}h&O+quI@8gT zB+e~uI+-9wun?Jle9}V?AqGk_wNx_Y$mkbdAxUJi5ZP15_OJx;=qs%*62wRyM8;BG zmWqgDie3AtUxx4ZsnaMHqG-ozYgsCHw=8T+3-{>CmBMfBvND%t>LT%?TMkrF!P0{F z+-vL4(@HU>a;5N(nla-CjA~`2I54|yB4LcxVU#SHZomjwOUM#N`hdr7A&hY>M#19m zF{%}e)Z9d0{`GhkBRBv2d;g(mp4QNi0Uj^x=E-7>Fwb$T77fjqh?g z{YDs{vKS@5Wnok+N;S6m`Y6Jf%wlAH`Fp7WBVb7+#CfW}WQ<5Tnn)N^Sd6@{OEIby zjH2e*KbGQwQ^Nz7t<<=+wNW$>_xD{P)>N>++_H+^2)KTP@Hq-sBKd@MooEY^mJK`( z=Ej)1DVGZ6G zWR5UBPnulrc>-#576~qCp15*wRBPGv zrahREsD?-g*CC3fog3Rh`GXpew4$O14Xz^N&bh7vuOiOa*5z%Yo?%MOORYk2NHo`= zBE7*}-@I9kwhc8vjQ}LBQo;$WQW%DjwY2bwPknchXPOysZjEVB<9(vOZ%}HkhT9&U z*N}%Hpi?{2B~f%)SBzGXAAD!=H76kI&%>J?3DDfe=SJcwI4*$Vx~~!_V=UR5nwFf< zH!Pf-PFxK~vRlZq*7^K3rznY3OTs03WK@o4eg2BFt)4uHRD4!o75BNFxZEBIy2QVO zO$M5$;&xQb*DYMYMv)XnKvIy3OX6($pyK)T>ASw7JxWes75BNR@Rb=8ex+>{;*Pk< zuZ5=J_NNbOPRWIW3NrQHaZCD)U}V15Tev(C(6ib7WzLb!pXq$Rm?(j6{~&p!`uod8 z?e3{V`^!1({xY+u2%}orU(RhjunEa-u9@uMoio>1svU$4T#$Yq!88`V1wx*vx_~64 zr@>Y2649Cf6|-5Jto`SSk4Mm+V_x7r2PG(Q@&T!IkSeUNs87es=`kvp$#+7G6@oyY+I9PG*m^`P!^o3M&mHS z+GCHqd~>rI1pS2>G@OMz?Z(23T^yg9s`v1ykIG)h za9{GMVaJes$6tJD*79tb{apk#CvGX=xQIp!BKlD2YTaO}e_onP)L)rVQ~5#D1C)n$ zap(dO>H*4j2Z4sJcPIuaqqPM#*Sg#xk8<|WmFsC@e{D9g=~sBv*u=q7hNR(wZ`&Q3 z4L3||>T7|c1SLYEFrChbd%h_>vX6}TLNg-{XDSaHRuc6GgJbhl-2O~<4RwuhLcw8K zM)Oi#J0_3%)H^pnOIwshCR-G8z4Df8^sGYyT4HW)16K^kt9~_8%4>af72#f;aWO#N=+y6mLRm z->`bfz4xH}Tf~i5K1ue*^ynK_9}N*vkW=zRTrCkuvmr8za_^6%>36{G+WRX$n&u8{T(MQ!0| zl9ZenL!&xVyCi#~V^jKu)xt}>!_mU`L3eNW#N=Mi=pNB=@zEXQy)Qi0H>?599x&v% zdl%8zQP=+ya7IE4JrOM(O{@N1a!LyQMm>0i@v$443dH|Kwy zX(`d2y)ReJs&EKa!Q8y2>ZecPxpmQ>)#*_qnmOwEK8k}=w60r$`pNir^?m;?F%1*E zj!$vEi^=^-w#?+D;_af)ztn?E;@lkto%wi2VK~$5UtRH~P;43Zd{Mev=k-wC zkLGTOr7gt@vn>UdR4z5n|9Ci;I+&}+_Cn4IMnoxEY%1W5g)f@DxUUNJKin*lsPoN4 z%`NW>T}Asulmrh6TC)56Pta()jnfevJ-H*%N{Jgs#Y!sdOL;VujH=$ zP;aIrG*P_#q25Z#s#CqZVCsz&!je~(AiX`8(Xda$clWKsI&&UQMYJowkKUN$saMu$ zy)DQ0kuNWadQ%Qe_43lMx8yQ8;wtT}Vb#OsIpNB>rhi3{ta4?2(Z3^FPcN?#Sg4F^ zga%gsYO8xo;Yc$e9RG*T5>OZU%BO}G1--iRss2sTw&U`f zGOT$rM0fd38P`1Al)U_%5x-^M1-z>sp;89e`mmph=t6CM%|k#JQj(c94_}5BwLoiL zU2>2+Nwotnd9~e6-+EYeO|!V-!HwfoB;AT9c%aA$&7T_bJj~##@UGV(t-WFWY`;bL zWo2AM_-ik}*1q~1^;bG zzG!<~L~UGsuKp9-WfqR4Z}$Ug;4!LpWPa{o*;f?A@W!D{lF5$f7<^3FFMr~RG#W8#P-#pk%n9_r0Cdm#-7*i z?7!)7x;DJxo`!$avaFLB)ru^hZLqKvVXR{@a@LLbPLF}MxOOENI$v~*C5-hfM!^r0 zFsc=dl;)-H5XJ@_#>^Q7Mhx3Z#63E&d?;aTWHHj0e}Pf0U__p}ALK7!r07WWKZ3LQLSKfXqTBn7=&wZ6 zv4zFRUH)l_HH@qewtr0+TUm^}%M86WN^h! z^PVD%T`We{S9>t36=~d=^W>j|QN&`D?HTjA0Ry?LMhr_+{Kn4SSfL{gmj(H{pG+;XN41l;o=#Rz#ve?(tHKKeTy z*T!^-PEJi}7w?TuPKZnB9F-iMiY^h?;cSQlEB?VBIB2LuRe+eLH_D$p1~(5jpe{!n zN7MMkloW4DyB^W;Y2GOMOW&~OF64KhAp{hofbxHzhU7iiaM_LOcR0m?7wkhE1s9BRXz(+E?^OB8hIK(+*8mY6}hrv&=0fyRfU zB{dm>;Jiuv8p_1@{vfZIF5$|KM;&)n`8EAl_J35FEa`u>GV&)-w2m@15fg<1vLjb~ zU-mZxjojXGTa}ShcrP1emgo;U*2pSMfZOqu<6iym^1u4^q!qrmjUqlsK19eArN0zM zfG=SNVE7VuIaE@eI>=tJp**c3#~#@jNKFuQ zWfWCu-?6*qP|WrxvzRSTNTxAcva@jI1&LErM3xt))T2p>1v8REkz5%(>xKtyLlqYzG;S&&xfv3JZD(`3Rb@-7=%+}~ zdrbsQr!StMMY^5BPp~3W&0|(+0kbns4dq5Uw+cTxV)Lq~%b8R?xBs`>LGq;kVj^g~ zD6XI-fogk&CTJTn=?sE~3msMw-yX@nsN2eDw|f5*Q|04;{?){jE@H*!X*V8>C7Cbj zpgSkq>j^^Cs3wS1QA)5vEP&1DR<#-3`|K2`^@Gv-WU&B#%2JOy?9BJ!REAJck&=H)A9=g zPEjmW{;R6qntRFf&-v`wZ)VKAK6wzdj8d~mk`D>mLvJNCL&hc<0a}v;fx5b)ahxtV zrU~`Z_uoUxz28i^aUSJOX4;^F= zum{=f{W)d&gRF#8hw&h5$;orgM`a`EAp1LekS!ScHAb~^kUco+)eeMlkj2Q&K5)T^ z5hyzcx>kK()jjU!y$RzGi&0ciiczg#{1Vyn2w{}482RH@jnLPrL81HrZUsvi^6y2x z2;&bHBX`;+jA{krv(?l7O&EuD7#C+xG+@}*HZ7;u6aONNBP>Sth}9U?3P$hP6P*a- zD2q{iY~@S?hJ9_*Ru6r72w@y!F-p#^$Ea2?)=jzb9SYQxRt9Q5^F_O|=ghrZ7pRc~ zc80Q~Q$N1;({T#a9Ip)2_yC3p*Jg0(JSY1aWZ3S2GCW6bq5D;=4|kAnLxe z*Vi!Zbmgq5knG7-{q_EI>@4iEQO#X%cA{BvrgBzPjJjnbMzu04-f8IZ1!>N*7}@EU zh8r;8zB6i+rD;(%Jf|*UoMSOE4rPq6hH=eHFYF~7RnB6RpIc<8kb(^30RD7KI#rfB z;t9ey&tjAezNF(Ld4?ZD`3oJ%n_S(gIX(-rE8>L|hZ5Y)G zM(38_`1<%}#C%pRKb`ku>_T7#TZ1 z$Ea2?-b}jvS;FYgVw4PDk*?pOEA}-`dn|cOOTrkyVw4N0AUPdF*4`x z7;3<qAoVYWqF-dxqiKHc7KRaSZCo$9+;4(wC z`I7d96+?4(8YGP{wi|vT1<`0!q-Uv>IlnZYP5#c&bpjTUL%sl~>Jq3`QAH)5(Bd|> z-=2k_1#~QxC0|6Fbd_;zdq$=_d6q;y#!S@I(9Z0pG(l6mI!H;H;FCIZ^)`qaeQ2PC z6f#8&oGojjsfT8**+`-uYbI)5M?0cMK?x1_Dnf~2z!4p)JKqS6GHU2k3Fo$m8?>yb zwSik-DL_T6SnCZp5?q#+_WiTl!3B`vtzUq-HOO3+!R&D6z15nvga1b%UJUlyaamWS&39F8m)&>M16Zni?l_ zsVNIKNYS-mCl0iR4O!L(ONM$TD2s?g2O{q}OQw!!5T8ZRQ%#|fUdvgs(9zb{=a08! z5j1aB%QYZr6^acxqwf{+gLSi7+t#?e2U)UgGfT#6XNP;kNR7jhgTGD(Gv8_r2pZNK zNztf6iz|oBE)+F+&W2RJcOe85vkhfG4f7fKJ#I)8TV$lav&ceT z*G;ixx~H3&Za7hS^BIe{ss?GwNy`y$!Gfg;*@zhhjg%==9HTO?@CdNf;=nt98bMZU zhM5(^5rrpcg%X?jTN6rK=0T$$uHDdZxw&zWMqLX5ox>&FjFvvX^QjL=)HBUQjZTML zQASXJ;mY`g7K|n>TdWX@hSU zbs~+vU93jc_q5;JfAw5)QO{z1yZOT>U{otE>gW@W-h`3EVr0$vVU#|n2Q~8oxTqnS z>SOd5lphIWHj7bkelte3g5jC+#&*J(!(tSDHfpi~!@j14JMMh9lrZM97=@qD$Ea2? z;)h50+SJTrF|yW8oo&RhEp6{`t>hxYn9pLA^j7$`1d6UOl8;^q;?0v4la za3)5zg7K%atsP-}#bV^o9JRxMVPEgX^D8#=C5*3GjN<8&F{%}e;p?ZhNx%aa)~RUr zH_ex}m;UF77P@3H70Z=P!k9d?#ePZ_TU4i_*`I&Voaqx=Sc3Sv%k}dJ;u{tsbLr-1 z^bm0E*#t3h^M?--#9|gAz_c()scX(!cc(dQo!#!40=XYYE9 zYQ@Z+{ve_`Nn;g@kvZ(EB?b&!F*WLxrG4#f`90SX#%dO${M0gxY6WBWi#I+(7~iuP zd4HwVpJ;_&uw|hMHp*XjLdHa7aB0oV8SR3OFOA9 z_dotOVf?7W_-iyqwSsY~QFq_=ynw~XnzrzO0V7~nQcFrTyVhLa+*r$E6r5O&QLSJ+ zwYlLSlEyj~BX9O!Lk&a{+vDV6`D@(Oo!yQKJJ`X}nWWE4j5COeaQwfAW4 zBRiyclVA52ifAACWI~U~xP(}5&qzgzj7y2^jDnx3-sH&CF3}124{u~bTKA6ldt#?Z zs?zDDx)AtLC!T^r6BWPe!hfR?c66-M5gGZO|IsN<`X6_9YJFvd-rj^BQNF=G{KrG} zPVvTfa^g!wT^{~JcJM1tjl3f=z)xTPZmru`DD)M-f3ifgV}nku4(^0NtfJ1ut}+6vrd!~#&#B?e8O&wY6YX|ppknBV+V^-Fl)pSBSxTo zx}^p1{Kffq6UI&!qww5ZjA{iV{)^`yr>Ms+lc)!!fASo|@Rm!sbI)^;28#d)^aHkL zp`f-i(g*35I0`_ZrVr~3SGt|L?$uH-7nwN2J#dTi{KC|_(G|om43ohP>1pIRwj0`m zi^B?f%}6dJ6SB~rIOn>f#{s?D#2HRbR30=EnNYTce`%VScp(7PezBh#(GCq&?lr1+ zjC9tjc_?nR`OXKL67?Q4YHCf&iQb}2vE&XC?IHE$Q#0r@mO`utoo0|c!A@$y)&AgrZ=*^5 zv)QDEKbOa>QN*CKB{sGiZ2!xzN5erqBYo z_E&U4RbtF!G_G(VSdR!C3oT@85kC|3FJ{oZcC1KF(2yewI~kIsZEu680U9M&Fr#G^ zRq$ojkO|^bEnBrD=wD5s$+c`QX=L;wmWg5-Awi=|v+Z^asb#3IgIGPTBCrLEkW92T z>d`L=y4VaFu4o=KO2k7KLQ`)PRJR>krlcUWJj5;nx%sSm%P9}P>K%M{ zVteAdN~H?Xe$|EkU~VJ_v!eE#Zrx@c>Gys!QNuUR6EzALV&fCq?i=l0Y}aqBRA`4r z{mTH(6y$==uO4;a3xYmi1`Ss_7aE-{+(}Nn43R2v(I$oMY=Mq_QxnxU z3Ho<4XgJ<^(CBq2((=!PCjI8^$W+@ev!Hnma(8LlEhAf7mwzL=CN*e&A3+~9gNB=) z3yrqqA`&ur&_T~xHDJDRArDuJBsV(7lde`haft3`4&)>d^dS>ya_aM-sZX_nSIL78 zcA`dW5!>gqitml0nKIJA1eHBpio(WB_hEuAF@r`3fD0|+xE|`gJSB8XW;k)ENEF5G zIsy6bx>N!(`2Z@^Y;=t|CgXkz7fWdh^bf0(yejFKRS*M9D)1>F=nB z%<8u^|0kDk%{pu*YHCevnlh?5abl$sq&)Qwwos$+t^LrrWQ>w?=r%66aWR&KmTx2h(r2 z{VO&0JAy2r0y(aeyEL{)svxSh2JM|nqAm?AYM&#XXTMQ$Rtxn=jjl>ITQan15|QaI zVRM3&OTRtQN;|e5mk8+NIuCyT;X`%lXIS|HrFX#eRzDeV|DtE($%9|Udhj!5W?@t- z9{hwK$9E@;6D&sF{@wWo43xsRDW2k9@pL+2oMbWb_m*H(D;V29I^2jbPO%u}gIBIM zVA$70H+$SwJqY78i;=l+Jw~;Hv9aYs%V{k8Y(hOYe+xI+Y)kjuzb*Yu%HJxl%-`}4TDW0OJxdU- z=Nhdci1RE&asQ>w^bi3XU%^gSe|~REpRk1>F0c?~3x8;43F4`)d#@*mKUs*pH%9-I7EQmsp70 z;<17yhzB0{%$FkjHw%%wJmYCS1a5&F?W`q9d^6$g_5_h$w<6EiKPQR~j(XY>MEi)H z^+=ui)U8NX_CGJHXy2=P2*ezW5IA~Pe@?9E-t!TH=*vRnryqUQ62$m!g^LNI9}7`( zD*atOM8LLPmLQ&tn&O+-{aJ|OxkKNz1d%pz=QLU=2Gp%6dgqr!#>M?vx|}WCa0-+_ zU`e0;ub|))TJQ$ettfiuA2t2kqZrl7N-<Jv0D^gNzy%8hSvXNDRC{RUK-Q>-sedjvohdR^5Pg3VmJ%o|gqB72t&Dhx>wF zJ-7cpvWDoZhMq$b+E*ZPgAIsC?xC*P4-@qSGio>`c+?2Apfz9cfC$e`j;>esJD1R; zPjf>D;Vp;rn3c6~-|+QirJ$ZzH(<>^@_}>1K*;t%XpJvx(r=!}*oM*yx+uPe%p6HV zJ1rCr@n~qg$0)dX^~zes1U<sh5R8SO+AR-Lj++MJkvCLq_K58a} zs|M3dLbHY*dg!5t9-4$6dg!5t9;AgHdg!6Q-*u8e_9^>J_C5Fif1dZ{_xv33mVG~G z?Yh?5-=(UX1kYaiE%BB4K=rS2Lu{{J{Qt*pq4@vIGNt{=TQ=ls@&dcMmbMsP`_gL$En?XoD|F2qvMm{szgJ(IPnI65 zWod48KuFjkX%i{fy41X7+U|dhQzw)4wwum%Ue%=Cg zZ=Rt80gZl*ve5|RBMS?g z&X`5(-X-Wsn$Wz<4{oGtAU9lR^47Do0aYLsrwuv~U}J)QO8>?}y9R&#;nI9Sr&XwV zD#-Mxsmc$LF)dJx9JhO8QU2Ls<7#C?zZSPhko&^z-n6mi9xSc&Sp2)&^K?l8{F+t^ zrV$%d`!J2+*n#<*#deRcwqz{Xda{DJ*dXVv^$xiu{*KBVaJ2UDvcwDe>VXPP##s98dr|oP%nq5H7dNydbsy5>#Q0v zpq|RQ`clSq?Ilkpz@luFRz&Kd1gSjZG}hIZzbF~4in#h-nf*X38mQAX1}eEJxyEy0 zyW!$*IP`j~)V~ZLsMzM?dMWN6LUYl}o)=w~9uKctO4j>}wO+xkHt042&_Q+gvD3D*x;vNy}g+i;>># zHd+79(-+VYDiO@!0W4zaWe`EJp6} ztvLz|mzuhxP^aqO=d106F_*>2oVX9IieO|k`FdY8zHnY~1b@7gUzD^u%Oi{6!;`Mk zr-Ue;A6o5MN<)}m9KkPaG`s&+j}S!mkST)+B7=oUyOGmO4xw5@P-tM(Z;?@*AQrF? zx%o$$2|+YV7=N1}7Rn&59{xfOfg5?sVIgFRGQVDrB8WvSME=5aUkE|G`Q4(=2x2h{ zk-8?SjT{2W6iNuJ9;KgY@4mdIK0z#DAyOv{Xd?tsx3wvbhW66ppP%T&|)T5@h9e$oL zma`byJ7=L)5sV3YH`gJI6)Z;L!Gm)Y7^(&9gfI?t$(EF%RS+d4D8OGl6 zXjKFw>&(i|gt4B*NZh?=q5?xz)L5vPJi2U79)4?QM4+8u`p?@lu5CX#YpXS zVvzy^HR%kp#?!lg`JFH}u^3r%E}&HrjNck`lVmD4vlzM8hGr`;j8g+{T>r;Lgt3Li z$e%PGt%_j$`0`sr1A2#sPYTsQxKTn6 zYV7!%&0|3QO{Gr?X-06jAgBPH3O5@kblLkY`a;r?+Nnj2!!xgcQ|&{9T)AFXN@C)f z8e%ENJ6#b|K++baZy`=E>)?(SL(?{qt#@fr3Xsy)klFFV z&Q|!DQ5c6E*3NyWT}O#}w+1zx&9$ka!iHO=xO2$0wH~zrI#Nn%RP@5=bmA%`?k%!j zSC^~oW3ji$U74fduB3t^Txu6Gw{)uFP!H$L;5M8832Q3Y$j(DuU`UZ*A0TsAx-;JR zZWhhqdo<>7+@a*S*kNpxU_mKBzl?1_%_la7M02>)P1;8&9EpM{vL5bv>`))+4*%a; zJsig7*&1gJAo;SF4A^y}+ZkdTJ$#>552w;knh9K_N(SvY|K@PJ z{|PQC&{WhK_COc~VJ;Thy7uadtu%-4*XrR^`AIWXk5tLL(EQ*hGwIFY1`KNzYUJIb zmqS2|irBH#wv;7RQ>lj^(CXpb(oY^_Kibg6`Kb>?&6~p^+82Ob?LrN}taMxomP zEe%T2V6#%)j-~azuTxjg)#&QvQsL5LUy9z&rPu4~0h!gjF1s6vX4s%Xg@#?i+|Ci( zt8JxO?U2T-2FV@=_LZj4gor3#d_q_hk|U~H-4IyBT>_+5y6Aawne{5W;k;mYN5{}X zHldX!HYoU7WYDhNXP%OTFAr-$!(YQQwF|F1kVd1It;k!q4HsmA2BO)5(!v}&I zHxs#gJpwV4oSc4kbxDeh!o+j2Y*|7E`wg{D! zA&#<+q10^`Mk^dcNTg7@>G1ufcMQ$maJC#dhK{k0p_~mj(W;1JsQIhiB^CPPGK>q0 z*C{X{)uP0}AS*q_ykGy2N~oP+G1AAZMXMqhYtOs)MB};S6^Gg;NRiu=4%g(4cqmb; zc(8?xQFp|@ex^{{$>LC3VWSy$JBaxjA1L#-oghxJ5ZSXgRh7jHu@g4R)p+cUjNb_2 zGz(F%cXw4GOI%pmZx}(GVIfjGE^^8tRO{g&?YQ*)sYPtJZwcZo3z4{DwNnV<=J3f; z1aXdqNKBvdnhXLv9u-Rn#XJ`d8f+$r^D>COb6*pJsQ6a7ZwMlvg~;oW8ZU=Xt-c~O zvMWEZVK6~lU?GyHOo2`dtp8TI)k-VDIm<_7#G-#6lFDz4E&d zL|jlacMQJoa`7ZvI8kKvJ~~o9J)z3EQPb1Gsm;Hqwc<+gBwI8m-a3a?MJCx7(^78| z##I(0_te_$3JerEHfXIlwfA2=3F8`zk+pd{S{1<<6Y{AP?ktdDTv&8bj^TlBszDe% z|NHp|gmIn4$iB25t%_iDI5%?>S>pzaQIOs%NgiOsoeQI^QLEi&(+T4yi;;h92wD}v zc;xw3QsI?bEJl8ZaU&HNI2;&cjdr*8g%HMV79(%=474hO@vU?FJ7kSJEJogp+lr{I z2g&0GVSKgW{6NCE%VOkYc3UilVQsSXBw-|0ERugIoHKHAG8Iu<6mM6thS19Q(Hk{} z5=MuLMe;92tk=cEfA{79kN|mu8)Tx~H(xjxN?kq;$ z%*}$S0RQMyFqtrVuoyWv(iI8dh(Z~4aD6MKdIPdXPZlF%;Ud8V@R!b~93{6yuZrsT zY$zLp=WD@z6I~f^7>w^rg|w>CK@zJOsTh)wH{2~&o6LpmO^ID41xb2qVZ+zJ#dcFl z1K*pdiyZ)WBCtu%8cH2rkBxlXB~v*(mJJoE96Aqc_o=8ZQsRPdf`<*IY%jd$x=tpZ zXU$2$BV!|CRzz=bt%?+Ir3*JKwRO(H-=CyD-dD@m+@b`ym2ZJMzs?mEU1Pff(Z>_9k=9k|&K?P-WS zdvVjBHMV{H?FWYtbw4d?s!O052ZYEECJU5p;X%`tZk`(zBP0M_96cNw6i}~#$6TiI z>~S3VX3HY#;{7$cIQdt2wx%Kvb{&^FLGiG0D?Jd{ICOd8Ge_AQ)Zk!^?X>;p=|N=d z0UE|e%$JL8bJ%b+pT_<8f&fcwDmps&@8$|^1ox;Z3iT(Zbw1fB3qx#L=Q z4tMx3=Afj;Z*CK;&pjsUEC3Nb>uRd!bfNs8=f=XsD^p~t%ke}sqVsM zKygR;6mFvgU=$KUS~W>{u=(>|XT+BGHqahrq{bcvwGlZ7Z0P+UtrKO(bg{XU89f*O zPMY$BQ&1!E-)nOqD-HTIY!7!0xqc#wOg&1=)Y!jsO^sAm9cH}_j-+hAp;b(c3Zxdh z#bt-+lx#cdb-%K~+JdM@YfzIHk2kJ8$mfJkonNL7!0Fls50aInULA08S>3W>?OB<( z)LXS2WaKI~?OLg3l}Z^>rbO|NjDLp4`r^W+?<$Y_Z(euti&oi- zsO@RB7Qbkdz38->9k${Z?eZ5fG2Uy_A$!r~FuPsFpLEJzbUIPHx%fqw>_zON9QNWD z-Le;5_~bnlkNia&ve$|~>6N|c#0=r7^Q5UJJt3%qvX2OLfblqsUzI;2{3Or)&&nT? z-DCE;?tNDNl#mt%oZ@emKPHICNU(*!O7@(}*==U`y^qSDlMOSA?cS^M2ZfK~Srj)b ze^RJdjK^5~s{B#m7Lo1VH{0Zo$_AOcd!I#4EBhevs>56S&G?Dzt7z9l=96%3vPV@8 z55rlfwyAn-TueB9(46pNoG+|pe8-?N_&p6wwWSxLd{HrR?ag6r!XhHWT15KZe}je# zlCep9$y?_kQ-#8JbPOuzL8Y}ibxk!&el9LPo<30#ha8L9OWoz+k}uyIHD%!<1joZI zBjSHDe`9VPANGUqqv9b|4ry<$I**&Gm3;Lbd~F46;HaO~G*y&diipO4gJ&3OpZIT; zr2iJv8UwsM`h~}pvh%IK-p52ZredpFm4a$kt+b&qd#cx8Bg@t)*ZiaAO-rAW^IV8| zIXV2t7CZb4zccmZ&{0V73NHN@_E`RRr)K!VaTQxVTH;5hbsmK;EQ{@S>A&*T(-$kz zZyeA5#{4O>={K56|AjXb7fV-=Cltq43Zav;a;M6(>Ab3iOwe~q-|DOrXQ~j!L>43Q zU>;f(!SDr-@AYDN2)9+NIoG73VMcKw&4tn9#^A!*fRtdrwtfaAvYJhgg#Q>8mk=Kk z=?jaCj)?vtG%hT`*D>f(oUIUL!T;kAsbzW)7IB-Jm}(Wh-xnJZAJftoTBz`hp4hb@@aT{a5g;8;r%ao89&G1V-3 zYivwpM0oq6_dfap{SI7T#XcE709Bdc*K?VkHB~EmHx|ygnArAc9#gOdicXMrLfSE$ z$5zt|rbmk26%*MqG%Oa=N4wCUB0B~>LSKryk&ypJ1}irAAhnqqo2nPRy~JCk=Rol$ zx(8@=;}B#)B*j$Q^ia`z!y_YNzmE$Ghb}!GN3EvEi+@?W*vPPmXkW|FHeqoQVbS4u zsz8T0Gm_P@)3iEp>VcEe^oYq+^h`02(1atE5o}_FnGKlAc;4(=)QqX#5n53Fn4{I6PO*gQ)57?ej)o&wWhtM z`X!o<*L->^1L$p26ZuE^KfQ3gz{0Sz1D+@pG$1Rh`~=mXuksTbn(BtPj*E*3Zynh> zDkMB6DmK!WfDTsF8!?{C+oiJevZI9qm#|$Za_+`$7hJ}}<6~R~-ZDQvCu!uRu?s=Q z&`!G*3jtzu%5)$s)umquOmWuov=B^Y7lQO1z0j)2LNMpI&+K$Unxb_=;>j=5LO{nt zT$Au`A@Kipn(Q=8*7HD|7J&-XM`vT~xUY7+)&or_=X7etqH1It3uVBLt$kdUa|-!W zl{^n2+l|Xf+e|s9(<&B~R4Z&WWze?Ag-kcnvoV4orn3;a^LM=_^l@f`q!$;cBn!B8XWmMC!~VUkgDjm^~&x2H!WkctR`cp8JBjHK&R(5eW= zp1m2_gt36dNFLWcSBaq}t|K%wc7IUy2w7txi;>%JAX*i{IQwQJDQR{Qi;>oI$Y}+J zaV5OoUjM~n!dT2=#RY_+uvd>MsPM)J^exC)k8CZuIpLRd&x8z0URA@9HbNwbjV z?IYp9X&oODU;Hn{JHoJ#*vL5g|IoH^I5hd<%(3m8y1h+5$aY2vjkN!dSs#WasVbsldRMQRS2?gwe2jcn`u@$zr6Z9YU)jqa!Qs$`ry_ z#bOi;7?Y~NK)Q`V7%zT2r7>ZwW-(I7%t5On7&T5zkZx41VKH*<_L!ryw`iAdC$xM(&K2XjKGbOVakigt3vuNZg!! zQi0*Ns?1bE80&LdZzhaQEJi_>;b>I^qf3M7cEZ@qViYXS93k^Pz!ruHNO~5YI!qW_ zWEg$7pj8o!giA}FA&hJmBWq^2SqcnQA#0(r(J-{uUVCvK1J1wd)o_d)!qkWx<}eX9WKVSxj`fU9SDhGA&{aPKIL`}8QP>C5D zw@y43JyY{C6P={9=3i!_a#$nJ*kYw@Oe@k*Wowsb)a7MIXf`{b)xkZSd!9t?#-$^F z>!}$pa*Ga*D#l0_LIM!(FySgsi2x$92KH5W=4&$cK`mqRN{bOT?pKgNnLoz%8h$N` zs3GuX_qdS#j_OqG9B#F|+Ahsb#?I9+Hu)WST^!XWaTSM8P2-xobRkdC_&)BnU_QgO z5!i(aGM6;A)q3QQGPTYD`;dChvE&!y&1Tr1L8FXoYrWYlAU)iR`~obisNsxM5OzAV z+9s{OTtK!ytYvF>(YUro%8FNKQ>tfcn-tqOdOkza4Q4XrNYV1eq*~)D*YKb7AEAML zL}Os1rW|)K4k1jG0oU*1yjk28h=GlAa}J#RP=pr+P-Q*b@z1*MWHR8yx0{Qt*T+#o z)$sWYZ6fywb-Nu-FZL9&q3v2=>Mu#29@pyQJT-p&T-=vMWn=wKDX)(kul|XwVH$ZJ zCvF_$tiVc@WBamh?B^rN*eA4%4WA*;*eEKCyfyyF*4>I5e@zJ3xD|!Vy;Xz|L^SiH!`$iv1_7Fz-R<0jA8cK;_d`Cc5U>9{C^3m(w6g`hJF?ae+s zU>n)`q=v1L7o~lF8$H~M98p}b^KW2V{il|1#*uA~vj+B*7_+4hHN{O$ZASS!XNvQ83LDLxzPqN7<1?}8hJyrgmW9YaxS^pO zLbZyY5X6f$Tn`b%ITj*&^sa_N5WAl1v6&#wvk=J($Gs$nP%VCk3B2@{82RX?B?OVr zLgWoz{*n+xv#BT7#o+rc6noMOEt`IPSSNX!1!*ZLBd|iISpWBh2gs9tvDlMd*lO~$ zacEVZ;&-6E&uHa!nn#}Q=2aAH;nyks z(3tR6i+>SD0gI7(Zl<6#=L=;Y>_8aTS&YA8MjWTl<^&)HBVlfJGd!khljK@Cw;AO(N&0-{G z^%# zBWpqT(_$F^=$0Z?G3d@>ByU`NSApR%Y)@6Cab{b>=)q!SpIL)eMXd4hkRd0?8a-Kz zyt^ZM$dfGKUsOpz658Ol`SOBPh_@Guk+p0RS{1>ld!_zggwdPD$lZBspb|q(3t6ZJ zz^4a0t|DvnVKI{KbR8syVcGNJT*BzfVkGYBn<~e^23KXm6~Z{NF63jvNM) zDKOx>P+|y;jYF^YSwt8ES&Zb2K4?`0HXFRCKqju+h{BJ028*@U%|Irs(^SO6t*fR4LQe`y%fbU3*o_ZaN;(|6@XwIr6~CmkTpQr zO$kj7PF!S#x|`kMWufhspTF5dZqA{VR7IAZHeN|a67(UpJR3iLfsGJ9Y-2S(9t$xP zlCpzhBXnrtx@WN+W8;0D$kxNOY)#b(HM2_~#flo2I@Hv^dBG|uk@+39QQ8{uaa3hQ z02(!vWo}OAw|i^UIS=UJl~g5`o#c$t_T!UA5Nf*Vq(+jhd4Bu=BoaZL6S;DTKSTV= zf^%E*+!Ng zsby(6x_FkxvxK^W{;1|zIso~BP_jYr+yglSY>puez*2kK#VlM(bJ-}3xs3c`Txtiz zeUP5So4_eMiZ`mEZ0`4{R)%aKZAOxF4ku(h*g}kUoA+k<+X;j{S_>O~G%hw|*}XdA zoVt^l-T%o<1sic#qz58njuJu{Yuhaq2PdqcKAx)4$I0);!-jN=6JN`eT_ktwm_Ig-|i z8`85-ZD2QMc3Ztl^`BtrTxYiRqE zX*r8&RFBgb)yRtHc5leq;<@neG3af~q{5(nT3SA;LG3`M9Qrou!6W5|#diEQ`LQ`< z>+xE)Mh#xxYK`rx+s)4+(i_we4fMBrt3bQ#5Ve6?JQfI~FP1!c*qu#Zdi*d=WfQcf zGO7Z>*^OeSi`}H|nC7ipMqF7@QUe*+Z7dLpak+6EWT;&gQop@JJ$#~859bwuz|n&W z={cPI94O06LMla9@MIQ=X@0X1mwy!FdZ62e@6F)?(g<)a$=PK}iky zRi_;(C2msQm2FYH_DkiTBU4Y-GBw5pm)hY)X>dL@?Yp?t{%=v_r%cGtV(~#ebPI%F zWZO}PWn1luJ7`!>(HPd`N##OgVSsyyYife#&1T5B55OFb^is^g}lRu-pb{nH~63 z2;Gqp4UgZ6`vFu;?KU2H;yotvKZ+zI-M7@#`rd0 zOZq&^Sd5hPe%BQkC?a4`r@Zf$Vn(V-<^$ zcXgN7w68qg)yiG4` zG(YQh6(NXE6YZe{v5AGqnm)dk0>ZE)_RsV74X3llW)>p1$Mjl45Kp}KcV}`9Y|(NJ zAR$K6H9*(>;6cT4k9%Cy_4uJ+iSdVI2TBIuZlBAINSZ7H?Qk4i^XmEYz|Phk%ky<@dtd6|J2ZMY9c{VPm@BB5JJ01u-Y?#`HlW(A3N$L4 zc&K6soSjy-hkMWbI^#A$@6>|EK^VW-6q*nbh58?1QL!C^s-yG>s)AUY=;6rI(Yg1n zJA(&cE<>BB!b=`)FC?PaVh>0Vx^tu5KpNJ&G={Ys$7Y_TQ3KrrK`XAM_4+qzdHCJG z<=$OC>~zv>WnjutjgOb=ZgSsS=t%^%nL);4|;2eyk8x1=l!l!alj?R}QE zkpzkMXiefc#&fNW7#_j|x}BUiouNFQ(VH_+BOXb$myoCm;TD$KF|KvF7PLA0Ta%ij zTLNPvBaLb>=~DBSE(93-#+J|GcGQTpAv_L^9VjTWv$)gwbN#kK)W!E|b#XWec(z8? zFUnu>Cp6k;^Qdj8+Go76Z4h9B3>x%WT=Kj~r|Y*vw>)I*eHz9lrveund2()OCh%5l z-92eQs9_Asld~a-S0+B?boc%KN;O)t_iHTK$PeV6KOmBc+O7IsoI9}nU%Zq(99k4k ztXnP**k!YsXKs4Ebb>yh1r3h{&(x66#I)hxY-T`hV-;xVFQMiVGIVj)WG6Fk|B5|4psjr#P@X@p$0ZM@kV=`AVL;T$ar*+}s!q z>|EA4Qm}YtW4Ut#`fN%+6_85r{QUKy->;H$JH8(*z zk!w2$Xp!%MXFI*QDIha4wwEXo2Kf~`3aK%*ijk(#qiVJBWV$1)=~CBCdrfXSY+RJ4 z6N>39`7kSpOm~zuUG}KCuL+s%t*-gY2;vwEk$v;ZzvK{@Q_7~@??kb;x6osme11nac?2EzER1qmw@Ii!lzi`b#ope+R&y5bN2?;< z-liYdpHCPkS&W=D+b1Y6RO?1yjFf&dux1{MBaBllM$+H|XjKH`tL3LM2;(%1k=wUm zrUFB?ZiEnqcWOgPeB}&_k+!?zEHR8a1OEJyFwU|VY1gMD%c6vEf~q;+g)p*rjMzrj zILBfn=gvi|BG!oR_NY|;;yjCyIKAg^C5D>&9sB#zcdF(S-v0+-^s@n3JleP3qoV#aAJ-$#IMRQ_U}cj zA{b$lF2#~HuCW-YYp)+yV5q9633aMPow^SqYZR~;nX5aW5W|>W@3~(I<2s9xIH}`B z1%}J8B$?%5@6IKR8!Se_fnI1;#2Px7;3a7@a)-sp%+EusBG%|pbzE=4 zxXWT>k2x+c4Fe%ckIIBAv~Fy8yZ^gnjl{}D!bF98s+5!Yf~8?5M?W)?_O~4>7fBjq z7e>(6oUhF8Zy|JUm}trFyiHHi{s7Z?3~J!1ZfDe;?n>KO5uWa3gIf=xDsR5)Lc` z7;(`FT@J2u#;sw$zSk`|)ulOEX@G`XV+@7YlhF0bOGUK&8NLhS$!6=)Rk- z0YWA8_S8kr%u2M2WqWJ0ltF)nZGtbz{I!+WduaD-#=5l#RT~^rK|h)^9dOw;^Nq)iO4TW^u6HaFQcqf@^KPfgOO{7*Hb%3acs!$IEZQxoyMp zy-$;|leLV^lg+})g_sWFkNlXEZXdS?!q`?QgrF!J?lq$~vRhKO{g;yq29mM+X&D<% z0^sryBx?7L5T`G>e{%FF2Qa|-Htl1&TafseMQ&^7ib})2q|Mm?trZ(C2p%<+hC(eYE;V&v-iqxte#Hj0 z*MkC1HVB3xH-=fU-L4+4mk*Jt2Wps_92Q(?Y;nJCNdslx4dMx?Zv^ zsoT@K$@w2*Ks~r}k@9q*uO)TO-p;aU6STL~E;?eSDt&z8%h5x&lAmfwv7f52)%=A= z(W;2MEj8w7cfv?vF_LqJ^;KY44J+jFYwnFa!Whb8WON#jRz)z@yaOWQSFFE}t+)uox**5A9G`0|H1&A1>zk(s!yMzPF^< z-$)iC`N(OsDq@X3ciT#rxkj-VNyF#JZx1>Wij#>^byp^zdL*NLxeGn#mHViYLqR(?5}^DNwL^@#j)6}Qb0L-<#>~9*+9awN}m!M8p{%1?N71T z`Ngr=!bWqiPcsQYwB9!BPlCu`Are!DKP87y6;2Y`-tHS2-Pv=kYQP=P`L3c90oMx7x}V~7{^>6s?GRvx(qPa~zL=db z@;sPVXt)z?aD4Fkx5ME=m0teUD{_bP`?vqtN+&8rA#uab3UnwrvzE4>7+UuY8r3Vc zMm0~_7eQKNtyqvVpxeFq)Z|p{00};*xf?5+X7ht8_ZI2SbHsB)p(=md!q(6`LlS~UL=T6$`=otW@zp<#Q^CC`Ficd z`2d%Jsmh%Tm+IJ5Jg@!JmKDwt_9jhil-STrQ?j8x4K}Shxu&|<0k|X%Y#XB6R3{1g zMY3&**D-GO#Y`I6n>9u@oz-|tmzPq3{5x{ljNhdI8k%_6He$L(@+G?y_d1)N7=4tW zw`f7by{2tzoGffsDdmgT#dSxv0Xb>%dO=R)GoaoZw&BRBmO;B*zXX3k9XwmBgL9SV z0F8AE^(MIETKCXp0JL&kqidt!8|3z|Ig=F@@wz`B`bAm7-l~NS2OiJXP@zG>8-F^} z#kK^Zi#t%g65S9Z6gNa<*7l?=`|kH>U~glce>qdSG?#lY-D<^e(S1uFpP2lr`vvlY zZD;*psRg~83wbbG*G}j`5Ib0itnNF%l|!hi*JHjdJ;aHd$$12^lZD7VzW-Yxh*uZ1 z`h_5Nu@HH?`~N0`!0c>POnc+7tz`*fHw%%IH{v%Th^}*;RVc!eQygI_j1r{p96ef| zC*f4pd>3*iKeeyN42rPqDUPrd#U`etp;eIx%g=wmDk&-d&0-X6xid+DVOL9q66#Ui zUfKQ%VeDlwk`lY8iDBGYzBZDqv5&<_o3;>w1Z;uoK_?yot`P9%XMn-cO+PO2)%oTa>0voOy6rDx!KCHxy8xUuP{~WA!k~~AXTE->~ zSj{^hkgJ0)S*IMQ-u4=jbb+Au;8F&52`Gny3r;1C?XXynoyiP%VG>M^dOpQGaDVOm&xE^yr zj?EDHg~tkh5`=MVjAsbh`=P0QI?}K{qA{$Ucv?J5bKOJ7Vy@npB z#e*BjR(Qo&kuemFe{g!Y+op05jjG(VbETS9D(#X|7Sa~ZiK_FsscN(@p>0guPv&@E zbVA3VO1RR3O1)-B$(Q1M;cf7e6`NV;Y&q&YXet{X+qz>=c~n2Oy6V(1)xclP7Bl`C z8taP-m%gc-(`I(nd7|(&EBjh`CrV#ii(jSLII$zp}(P%O8{7 zV|LvCX4!KpZ*`k(o_pV{cv5b&_x^8|KPudfu{(=DiwfLKU!sm?yZ@@}QI$tD!F{h? z^ibJn(VblPzFGdP5JGp`>eMz>kBy57r%##_evI>lwT$l=R0bWbfvL9iLXZ(kC-XPv*70FK_&&Nfq^vFssmD##O1}CIzPAD@SJES>X{so_ z6cLU8hPet30sOa0(tnF-jp1D$)vj&!>Yvs6>V3?E$11m~RVk=u)k>G786juO%w1*b zlxzM`^QNUglAWv3YCr8fj~DMe3%@fdc{#o_D7f@rc<{}6u4eec6O~&%TH;6M_gRH6 zENk})92;v_%l(6XV;=h()B0?r-)JiRZ_IpPgH%fBWbr1y5IXZ(lDr}k>fjodn`eD$ z%WT3p#bRX4?T1!HFrqKVze{J8(=0|}&v6CvEMlwxMx9k&Jn~lrVVq$x3bLl6RS}E@ zFSlMv7-wY|7gKu4Qw(v#T_q(<=*TrOvAUFEc#g$LUp@}4ieOZ8x_|B-6jbSa?Mq-9t_SV&kKUtCmJ$osE<(k!HT`$$Ylt>Z)Di~l9ObzEFTcQ1qZh+ z{OWi!{mb*;*Sp`sufFIup;BYN|IbbR8t;qz-i#j+dawE4*~agGYjA^Lm3QC!ZS@)n z4DvNyJ)5Xo<+I(YU~R!*MYlrwhe0R#M=L};O{3$2yjyh~BG#??m`>IqjEgKr+O6K1 zatvHCG-`CTIy5heFfOqe8M6kXRguxrVQ|cGa@AcfcGX>$)~?Kb+hjG7FyE@lG733O zE!P?+kgM)Wv8%4I(bTbf#cCq0_;It8_IZ_sNZ&BPDf12Clej*@RhD?-B}d}yjzuXAC0{E%;Z=q-H$fU)vzAJaRk&#>c zN%^K6XCKSGW0QPO4viPby#trx_+l^dqWtvYbd)$($`3Cn(crrGgXCuyrxW4f;$N#c zx>!*yq4;ztJGqp@xstZ*8fQFg=Vs#<)pc9T)kVT3nyC^vi^GGBv;kcQA1_11=JY>( zSH;!kLQoa66ppvZp=W)$&IX@^HojJ-Or<-OYaXj?del^<#7$0|e&O)f&IiIJXt)9( z?1sc=1*qWAk^BjRIP z`aC0dTxe5$Yq zrM}>sP zM8!t>63`8cdN5{z@|axE$z>BCa)&X*VP-fG>HZay8iypsDYCu42=|L*3eeVwZ9+RM~3dCVQ6yH zXhms>8F5g$&Bhzlde8wG(KiO{z-Kl3z!=cO$w= zbzXIDR+1%oW7=+f5+rO5**x5jM8>NXGF~!U+a7wV@(i+d7cE=E0l~vYZY2b3{Y+&y zKr>mE@Cs}sP+&GgDmx<7%uHsrZ^>KsCC%SmHRf*@M<^N@(^wq*uXToynDN8f>5yuj zK~dIg_po^gR>$|-TBVcIshg=tmA=qnlePKKuQJaW)EkZR#J=L3GKbuMNv0xI`XVpU zt&_hBdCnI6_sUK3CwDg$DX$fNU(V5<$?^m@?3s*8ntZYE%?HSz+{081^US8F38N>Ak=d#LN(BaXB1-SRkT3SzwsY4IMlTj4Z|VrNDuNOI>->d;(VNA{ z?YU)>0s|>v24RePcI|J3(TByz+^`3&ieQX2x35eXeOZj8CH;;nFpzX*5JphggSQAH znZ?K-Hv+ASV4NKGU#ZSdKN&{v#PbRa+_6()U{x-?Te<9??(u}tpT)@CJr1pkV1zEL zI+d(3K!$O9b02wJ7?j#WeV4%*h z(i+&`m);s3bKUg_V=#-6c5x(H6~XA#pji@O3}G?y&Mup-z)-D$CDf^0eb-haj1(3l zdGLC)DuR)HH^i`9o8vB45M?SU1tenIE#_A zHx0QYY<@S~c`BVs2;=HQpHw1ij9@YH*HY$=h&5)Hz4QlRjASu#7O%`zV5ru>62kcE z^cx9;F^a{==(rKBieNPPxt1g#H=4!B-QDxD0t31a${{X<@l4%$=LjQ}#mMhH5Uq+} zoZ0=s4YI}<79(@Rkzw)}J<1xZtl~l#?Z3SH1YwM2F$&h6L8~GdNe@3?i7>{o7`fg1 zPEcZ~>8A=|Jp9I!4->|C79)SbP_!z7G2xLO7YJhli;=Q2WuY7c*N9byxX{+H;Aoz- zZcJn`QZCLyt0EZXUz~Q0Feb4WnMcpS>d*nT{iw;dwiVsbNji&ToSwje--s@AW1TmF` zNZK-}sT=|#HOgs9s7ICE^1~K_n8rfnFB;WU2;%I`^A>`b&O)U08uXmO33j~QBCCOGA|?iNIr^h(R?-8n^&EfB5;y5EB^BmA9+ zU9b)p=LF|lo^yDN=^9=payV#v6g?!y;KjWme>{rDS8IZg$pt5q8&^Ew+IApnTGC%` zb)EeEKz)LqsRa%H1P|Jd=ji0$sz#sY3V0c>)JD)=8!q=yfD2J;)?aRQ|GKxyujc_h zOT)E>Do>i1UC6aY(NElW;hLJvM6@TTH24r6N1$fsg|BIogJ zjbV-2C0s8mZZ%QaFI{S$^BD2i01RtnZBYat(l-c%vEe_fw{=+XJi?x%g$-v3Z#F{> zL+FmsF==4h;2K+Z zVAC56y`@QA9EBv20)lWm1PNK6xy@SfyF^KXah?V=xnj7`bV-vRC(wg-1_IiP0)rkn z%&qu+vf0dLTNE*%5q0qS8XX*Q0`78+#1c&6{Bf-p-$ojb;kL$*LDe1v^brvA;{TZG z%w|88cFaVRc!tI#PF0C`uzOEaGWJ3(W8*VCW4p0* z+5HUn8ZXT2p*$Q2Pr4BhLsv(RTFDjL=Gsy1KZnWKi?oc*slrD46t^Aw6bFA~>l&M` z${9S11DhhfNK&-BJt(rsV!JdI1$)D;h>3LABCD#hyl|G&JVJhBgJ3_iO9*>zM8u;j3|Thh+1DWI*{GL2mddCYjb z6m$fujeq8%b%{TP`#-AXIZ8+@g>VQqDA-WgT-A{m(~} zr(Du?1=N$YAj_6C(Ph~7j!Vr~$k;2ijE$*)hmEBH732APGrca3EIz-daAg-qN-;9I z@$+4jHOq`^$GiL6cZdP?%3^PFp+_`v=`^{@lpBg?DlSZ{#ib9Xri^~R3VD-P6?>D5 z{IJ~@393v@AGB`;VXS5`QkEr6kwxgK)Vqp18I~zM#!rV|k?IDnVKI`j`=eD6r}#gA zeZoZ;Ygvq(W7F3wFjOnm30dRm>nSJ68d)-oSqspr2u9tw$x?)V9gC4VW63TB25!&N^1yVY`)dH#mJjGVX7R1 zju8gUR1eR)JdLbzfW^qVIUTKvVDx`Kb17jQWHC|)F5IEOFs@SF6JxjcCX8GbBW2?% zv?_v;Kj3n9G`{dqag_e5lrNEWs9RMwN)JgpwY+Ab)i3|!#|BcA{%~=WzOd1PuBlap zAWj`{|Cb<+un>uNS3V<$z@bIiql9+p>l1F(CWxafMBeF*&j>-pe>A@>K^$WtGMB9R zmmES>=UK=SXVYuXB8cNGMDFq}{}O^Q4SxDrf;hoKj|A7qY~~0Rs*b#3>dcb?(vDLJ$R2>X$o* z?>nvKSwqr|wr34eh8`y_6>_~T;N*MO47f9+;&yO5A*Bk>njLOl*6rZ@$KR&V^MF31 z?rf2K4LoNH^h$BX&d;lGh|E=s?el*52K)n457x5_+B?y{Y>Y$j8m(=s)u@*Ex`oObZf z>QGY$=TUnC;Z5{FgBWK5)IY?vH8vh(_k_OFF_dBi=hb5b7#6%cy(H*@Tm~=SVXjAQ zz^GQn2#|t7avhL|Km>#7-rjC?U*1bQt^DGhR^e_TbMDdsa(@YeNh)q1A%97ak7tgh zoz{ioomOG1=^NLeRgs<6dUwr%)L}1bby)a%cyklNMou?GM0mqgFO|^h^}oj|XJn{w zpn3(YhslXeLvdKf4z2VqQD4%aCf5*`8VP7#Ka+1bms-!(Q27ghtx-xHmobnLPTOR*hkF;j*8df<^(_rs+o*aT zr-wrV4W{N#XSz#v0Mh>h<4jD&0rK7ydIxW|h-=!N%0j-8kf1jmJjyO02GM4ngM;HN#5oRGiKpKfV4$46r*^ zDbkxMJhrFpnJO=+=C&JGP;HLuJt@}LsY;RFOp&wT*c`!vYAxLVY(N;DWf+;u7At%n zFqAT+fd^muIJo!q<0<6x=)z*;cUzBEMSLF1YTcSi7+qP6^wo2>DllAXu|8apDm_Mx z$_-BwMmH8Ctnk&6EmXEbvB8;9aMoPwBv?_wJ^VRo0By03yF%mmY zn4rKw`mI4@-Fqzm`GS&Xc8GtjCCM$Z9V_Yg)O79(rcnHfq9!$ge2R%d-j*67P( zrGaJUjP61h{aB2wsW;H72*$s|LLMZH{wzk` z>GD!T*v&YNnyKn9|MnNc7|LSgEL$X4YN%k}Z)FH$7>kj9y+Dz8 z1Yf6789Nm+2Dc%M;VeexY0J-lo*C>4Wn0`dzP#*n#D-nIufmlU_^$;#}Y;= zi;=Um_jUz_aq^Zwv>5p+VT@rh@`n#bt0EXJA0J?j!WWLM(#nPUbEZnyr2MJW#lf;v z5hvVc25m*&i<&X;U9>r_N~;HxzFs$DTFB2mboU0-c2qZ6NRm&^j?Ej zMaIO}ueV-C7*kk`l%9LXDlk+f2!(o7(1G_KqoF;O#Ynt(46TY_T)nY(2zgMZX?akP zY{2uN*r_lCVt+g@p>Dt)C2)=Rpt#||x8UY5JRrE4$$C&+yIa20ny{y9VRLH2ke3kE zPLP?UpF_osdIaE}9;&~(Ac}(M7LtijG=;@>e|+tOa_52lR~6M70q_=aZdwyI${y3b zO`f$eJh}NqC_NPbV|!2v!fW-Aj5>VQjJwQJ&KvbAQP0q#hF6B?5_SMv=l(6J-0*CT z*mnS^Jyh<*3%LvwvX^C1IlX=Am6gh1q-%9?IB|F*8x?GD@ljvjiiZu)L;$dHtsJ3N zluvP>raS8!by*%h)^9A0?3o(a|0=3HQ_zOOTa=)Ih>zRj zl}Se+GvZSDuGGh8Y4ma2OXo&=P>#mwcId}Fc$tC$7};1lp!|c_o)`CyT?nq19N8}0 zdvBhqOU9n9Wo#-6p%u`?dOetDfyK68 z?75~N8GDYFvEhT_Vmm!9$Wm~z^``LvB;$bEfzUO=u_|F)m!n>Nm znj)x$;UBfzc$GpsQji>82(3Y`59y(7E{@AN>(f)Mhf3;rk~G*J?z)*eHi8EBe64{EhZxt^E)P^!_}B!+8`vo176@!7vT!gh zxv*WqO2o`(?!J3tz9VC2Xc-$0G#)mR3y}!HU%d1Nb^vm5P&XA(YXmY;8P9H!DfYQM zp@;WpP%h4bDylV{kVFTuCr%f~B@YS>^CodUXjdRijhc*9G!F*_WRJ+kwKu>0>ykdo zLJd=scaCRjq-7zoiQmKZsJ(%h&CvfK=7H4P%qZ9H0jK8)-B3^ zuptM)pZ-l1at%(+pu(uogMk_>F0Npc!dlwa@2xXCh-Nk) zd?{1m_C$K3(tC?de(7VcFJ?6SnB1N#Shr{X@;hi%#O)cnb;?%4Sjl1(4BE0miJ=zl z6T%o?e`FkCtYR_JdR{}TA{b5Yezu-4Ra?N^gxG-5f6w#u^#M z$ZSEepqJmCkwqA5S&a0>Nrx2HfS*sTK?jNCJ41T~LZ!;Mp@vQr6RxGzm?L>QY{jJyjI z(W(f>ro7wIOtpo@$VuM2MS)>l$&xMCKW|`<0|Ju_-o|iz{hsv>meMjkscl* zeY&ja1Ld;&^g-hyH0yuO zU-ixGOqMvoLgb&A93h85#Y*Ma7P3U|6-}Qfh&&m@$bAt)5EU19Pa}wvEJXVB#ec{l z5Xd(OqHo(PHwoet3z5}%?H@uA<6ipWbDEz{7tc>cW2|#lclpr|i4I1cnVJrLC5+~$ zGsW{$VXLWgwxLy#`DxvM$B!V4vn)pL#hC*Y7`RJnkTHHJ6E>AF&aoJIHy5E*5sbS} z_3d^JUwB@_k>{csE1HoQ2*ShVqmv9q9xI-A-W!NW4BZZf)`$n!CoRYbU}6&Ph}hS! zP^>XuBi85w6wi@|j1n&{DRObLWA5Ny}RezZ$obBdF&_xPxbH zeQf`uP)Z*?Qd!_JKz$WOiAVH?pC0bw_J(jNkO7BA{rA_)vBU6^%Bu3R3+ zFW1uZi29lqHLu1B<}!#TI#7^G*VJ5W1Jd8*wg$Er>lU<=Y*@5qQ)7JzWdE{vCkQU=E~#VdNp*(E$-ISwS?9E9BSNhiFh=*BI2uW8}zMz|>6eMzjuyV1%bdS4xji2UH_r9Uky?Iq-(7llr1i=jcQmwZg z4L}Nk!%X$Da2FD_bZnH6$P8+ay=I615cN$hYIrhvYnBJPF8|A*=9$`Re6~8M9ne<6 zDvcU&*i5k0-a0Q|d77wiX;EVWAu%+s`P-PKsP$p6Y)x4gLU0EI$- z_GOMh^l&?Fq2P)YY2RUw!rI#Yu48iyty_sz)f4{6U8@ZZD>|r@pkL5ln&bUTl2NW( zxc)`=jj%LkYma+F^FMED&u) zUNywDowB*inI8UTbE593NsVKNX6hTLk>rR`!!ubjaA+ zmG|R^W}@z-MGgNkuYW_85tZw?(7Fc?tMTdi4zmmLw;nnmIBk&XW~tp{)=d3`s5@&> z!{N-cH8ks_dV2oMWdTrQ2Q_x!C@GJ!+ctUOZLjCk*UH#pK;5Nkk?dBX*Ep}|g*6H% zFV=d4oV4?O|STe#0-w zwcV|1k=R&KTwvjKJZllxcAc29CNf?UYrOQ$vlJ>_kOMX<=3`y*AE{bZcNU{y&=NtF zu5Xg;Z3v?Wi;;AG*?vVIgh#`mKKSmi++k#mo-9Vj()DOn#2QyW-q(~cda)Qe`6If@ z^JuWSP_YKqi_#~clz+WXEyC!{Via7SfL28?`gyaq5k?;tBjwOFh1eK$wN+wp@B^0~ zS&Xdoj>E+;UfCD4mN1f8jGSG&7RjuEJ*rK`8d#!Aj}cij>N&#b$6_Rn zJB(IEtkLt07j6?qe-sgVx-=lhE_!|ynRMKLKp*CjEvc1 z_9`$C*HR8~?2<}vjkoqzmIRdsu^1_rrlM65j4ux~e338)vlzK|?;KNNsObp`VN4AF z>n2%a2#b-C(*3v?#?g;zyh#`-EJi`E-T4X(hgz-xoSCJ!#^eu8%?V>Di;=VO5Ly+n z#-GtYUm}cQEJoJu^uJ_zK$va~s{oq*>4a#)7|vp3?_Y#gMKG4M?eGR+j9@X6a+l}G zG2jDJnW==Vajive8exoNF>=oppj8o!HG})MkH!~{s#>H*bX`hkO5b?BJe#@!9dE-H z@il8ky-TSZqpKFF5fwI?yeXxE5X83Y@0}osR2Cw$Td(SJ2-S);LgV|XokLzFh%qcg z>ZXC!g&@`(cU~oku`ERH!TAm35O%c-6hbRsyZBM(31S=zk$5z-fe^%viQn%gi192$ z>V#=;$swF-yAUCW+_z@GO%M}Uh=L{a-V%aHZuou&f|$rcq^+L+p&UY0MOSD?_*vSP z4+&xt3z3$-^g|(tax-6=L=b5#MC#pjUn?Ngvg3sIgVme5q)L;MS%~Z%+rAcpc;}0D zT?t|e3z2tcT8tb5=VaynL#R8s`g|opOl2W5Z!L=vg2=7>c?LmDVDLbdj*eyJ)$a zQM}w0wwiWfI$9N3?tXc2{;YHO!t|=@7yNOjo9i01TT!^hs_(+!g|rN)qNDTzyDe18 z&5n>8#Q95lFmQW+QJo7(=K(!a%Wp&BAdWv2a{P!I`4`(U09qN*!d+by6?CD_ByIyU zzHg_++OEe#Qp|RiCN(djg_0J~+M@gve>{UOQseT^;jcn%qY?=er^c;xCQ^+|oa=wy z@=yn#t<}MKb)xK`c0oSCPgw(-{{by!2gmh(gz=zP;(+i1+rjO#!e{g$=s8-@R4++0 zSrnBWq!cEurLm^-^NftoqjQ+O4ph#8@Dd7!BDai%cHG=GZv&0$xmx|3$|dojQC>|a z4}+MvK@J-ZF^2bVsKHrrwI8B}c9{r~)8hPk`RX4jVmq&@YIzaKE5w`2kgLlVe_EvAr>OzW#+~ zw1w(3ng=03?lkU(j2E_VJZhS+xP3eTLPVfOtlC8(db`sro5!t|cP5PPMT^%W^~Fna zg7M~YuLa7t`k9lw9jV>;ZHm)uLr4uVOehga8G$8xxYgQh<2MJXhcDLX;p8&o4Q%KT zp?)G?lSj|i(1r@Yd}c>_7>rGMb=a{mJ=|*RKPO0<#+PVe!^ftLjh~C?|NoL(#_Qrx zJ_`UgCPoixGJ9;S zoE#D~Uas`kJ2anVYRqR=IR1Ej9AZ`|lj&cAu*?4?TYds@nvwm5g6XKhh&u`F(uMgw z^R*O$UakdA!p@pUYzS|ngdio0^VTlCBZ$}k=}ZOMN$ISZ$5E_S2JODm>Zw-w+Adi`lc#|2N9)3?|D$pKePhyF|>5EOXLV?*?U575ALA_FIP}AKcj-{aj zPxB;~T5nJXKn2xl#=!{NM!MKyBl%ipz4lJnxa}h{^(rk>!*j`{#;%XH_dHwcQM>(5 z1y`CHBG$-@!__71nb~c)&C=+xN55+Y=+#wQ*=ts<)N9py;Z`?CD(bo~19LBYXqM@0I9>xD=9!lL6MLu2D&!hi5ZhsH+6 znPc16YZ4q85$y}c#mXkZ5z)afMa8si9qD_;6pTNiO@gC+Z6jlTz-L;(@ zMQ@2WN5(||@O-0tFEtW)eup0Y^$~i0StXxe{@U3h&+obNKe$ieyVg~0HRJyCOTRr2 z&u`Sd=NHm4EFml;tc@=&DlFvv*FR|%(!70SWLS&V@gecW|Ke*G78M)mix0t5^hL&p z6hCX}uO*%~t`161TkW|?PhNV0O@i@EW8y-;_k|_2j`PLiamV{2zc;rCi}yVb!-TdI zd2+``)NevhZhgromz3S1t3c1Im;23%8~CmbRa>pN|KxJAyLJt#GxPqF`!OcU7t%W3 z7Z(y05gid86WuZ_#24KrJ|w{xpI{4(Z66XD(IOEYskOFT|o<|%xhp>5;BVq<+RLuqh^M!*)*(~R@s`9&8#y!$Ub z-#9GphxjJJjT`^emi|rXVP1XWC_T)LB_C#LdUuhBnOE*wGg@voRc$r@{=-b{(i;zR z;{TTqlVUoU3hqCtk`M9wm^eJG!Y6FR6I}7xoO1L8HG8Gb{)3T|29`nsk4V<}IIqCpb7@(;OC^gfMJ$O73|O zRg)&VlA|{wI<|EJMsVR;^Sty|k%i&OuJ2Z-C$_8P6HD4VN#uzwcBi>%0Pki7aOTM= z|E~;S&&aek^e}TuKFs7x(?lNT`+tl}rUAT%8NgW`(*Lg>z@5w+f2Sw-cgZK1-f5P| z6MXK;aZA_WyY^OX)#v^IPV6xkPw>rtIvv8&Ub`Xm3BI_=8(z8jHw6(`f!Qb|H) z(AMxkRHxVM>NMG4`cKzsSNo0|h{0x@K~vF55(Y08ChRo<)^!P+Y^&44?rLB#))958 zQVc57CwE@6X={-NdlTI07q=C!LzJ}Olr)!~A)zEEHKpoP!1_(XPUadVhSVNl#NA)B zHZUCSY~Blt)k6G?b4tIo3REq_SjhJ+o0iyR% zyKlm;L##gqQmZ``tOZ~*nbnw(UP<0 zCm^lw;98edrX~|xz|GrN6=8tb3DdGUt&Kk$0CDONdxOF+F;l z3a&8vdz=QiyAn3dCbjWoF5qq8D%kOz*1^p|;nehrD(54GyK#k$_l)EU&o~#cF9Z

gT$>*@3Mj8cI6Q^H1b7ton|1mJ=> zb)V-G2wvw>H~~LN>Rv!1KEOphbkR&M;`^7%rlHYU4?a4JJ5URV?t{Ck!ZV&%?YS69 z_7^T$XXAN7$$E|z2O{(BwKHF1`USunw&q(d51xfo?ZZ{AZ)qb}^-*(gHZtFbxcQp* zw*#W%%s1bdXdOQNjZ1AYcM?k7{jFpnGT%pb=4;mX0^V>l-xCeBkh)({*!@#y%}#Q4gJ#z;ivTf!VyAH5OeMeU0z{9e zcFTn)Jx-;Vh2%`c3P{MXo37K;?l z#TD*p+D)!-$CTFVA%K{N`>!Q&4*X5sRlT=LC?2@XB%el$eac61r#=E zId8jJM=*6XMl*Lmy7EH&%A4bB39sC3bK1uf0j`MBid-j4WUJgS`Fayejz z&lIRG=50000") - curricula.append(list(range(i + 1))) - stopping.append("steps>=50000") - curriculum = SequentialCurriculum(curricula, stopping[:-1], sample_env.task_space) - else: - raise ValueError(f"Unknown curriculum method {args.curriculum_method}") - curriculum = make_multiprocessing_curriculum(curriculum) - del sample_env - - # env setup - print("Creating env") - envs = gym.vector.AsyncVectorEnv( - [ - make_env( - args.env_id, - args.seed + i, - curriculum=curriculum if args.curriculum else None, - num_levels=1 if args.curriculum else 0 - ) - for i in range(args.num_envs) - ] - ) - envs = wrap_vecenv(envs) - - assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" - print("Creating agent") - agent = ProcgenAgent( - envs.single_observation_space.shape, - envs.single_action_space.n, - arch="large", - base_kwargs={'recurrent': False, 'hidden_size': 256} - ).to(device) - optimizer = optim.Adam(agent.parameters(), lr=args.learning_rate, eps=1e-5) - - # ALGO Logic: Storage setup - obs = torch.zeros((args.num_steps, args.num_envs) + envs.single_observation_space.shape).to(device) - actions = torch.zeros((args.num_steps, args.num_envs) + envs.single_action_space.shape).to(device) - logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device) - rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) - dones = torch.zeros((args.num_steps, args.num_envs)).to(device) - values = torch.zeros((args.num_steps, args.num_envs)).to(device) - - # TRY NOT TO MODIFY: start the game - global_step = 0 - start_time = time.time() - next_obs, _ = envs.reset() - next_obs = torch.Tensor(next_obs).to(device) - next_done = torch.zeros(args.num_envs).to(device) - num_updates = args.total_timesteps // args.batch_size - episode_rewards = deque(maxlen=10) - completed_episodes = 0 - - for update in range(1, num_updates + 1): - # Annealing the rate if instructed to do so. - if args.anneal_lr: - frac = 1.0 - (update - 1.0) / num_updates - lrnow = frac * args.learning_rate - optimizer.param_groups[0]["lr"] = lrnow - - for step in range(0, args.num_steps): - global_step += 1 * args.num_envs - obs[step] = next_obs - dones[step] = next_done - - # ALGO LOGIC: action logic - with torch.no_grad(): - action, logprob, _, value = agent.get_action_and_value(next_obs) - values[step] = value.flatten() - actions[step] = action - logprobs[step] = logprob - - # TRY NOT TO MODIFY: execute the game and log data. - next_obs, reward, term, trunc, info = envs.step(action.cpu().numpy()) - done = np.logical_or(term, trunc) - rewards[step] = torch.tensor(reward).to(device).view(-1) - next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) - completed_episodes += sum(done) - - for item in info: - if "episode" in item.keys(): - episode_rewards.append(item['episode']['r']) - print(f"global_step={global_step}, episodic_return={item['episode']['r']}") - writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) - if curriculum is not None: - curriculum.log_metrics(writer, global_step) - break - - # bootstrap value if not done - with torch.no_grad(): - next_value = agent.get_value(next_obs).reshape(1, -1) - if args.gae: - advantages = torch.zeros_like(rewards).to(device) - lastgaelam = 0 - for t in reversed(range(args.num_steps)): - if t == args.num_steps - 1: - nextnonterminal = 1.0 - next_done - nextvalues = next_value - else: - nextnonterminal = 1.0 - dones[t + 1] - nextvalues = values[t + 1] - delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t] - advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam - returns = advantages + values - else: - returns = torch.zeros_like(rewards).to(device) - for t in reversed(range(args.num_steps)): - if t == args.num_steps - 1: - nextnonterminal = 1.0 - next_done - next_return = next_value - else: - nextnonterminal = 1.0 - dones[t + 1] - next_return = returns[t + 1] - returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return - advantages = returns - values - - # flatten the batch - b_obs = obs.reshape((-1,) + envs.single_observation_space.shape) - b_logprobs = logprobs.reshape(-1) - b_actions = actions.reshape((-1,) + envs.single_action_space.shape) - b_advantages = advantages.reshape(-1) - b_returns = returns.reshape(-1) - b_values = values.reshape(-1) - - # Optimizing the policy and value network - b_inds = np.arange(args.batch_size) - clipfracs = [] - for epoch in range(args.update_epochs): - np.random.shuffle(b_inds) - for start in range(0, args.batch_size, args.minibatch_size): - end = start + args.minibatch_size - mb_inds = b_inds[start:end] - - _, newlogprob, entropy, newvalue = agent.get_action_and_value(b_obs[mb_inds], b_actions.long()[mb_inds]) - logratio = newlogprob - b_logprobs[mb_inds] - ratio = logratio.exp() - - with torch.no_grad(): - # calculate approx_kl http://joschu.net/blog/kl-approx.html - old_approx_kl = (-logratio).mean() - approx_kl = ((ratio - 1) - logratio).mean() - clipfracs += [((ratio - 1.0).abs() > args.clip_coef).float().mean().item()] - - mb_advantages = b_advantages[mb_inds] - if args.norm_adv: - mb_advantages = (mb_advantages - mb_advantages.mean()) / (mb_advantages.std() + 1e-8) - - # Policy loss - pg_loss1 = -mb_advantages * ratio - pg_loss2 = -mb_advantages * torch.clamp(ratio, 1 - args.clip_coef, 1 + args.clip_coef) - pg_loss = torch.max(pg_loss1, pg_loss2).mean() - - # Value loss - newvalue = newvalue.view(-1) - if args.clip_vloss: - v_loss_unclipped = (newvalue - b_returns[mb_inds]) ** 2 - v_clipped = b_values[mb_inds] + torch.clamp( - newvalue - b_values[mb_inds], - -args.clip_coef, - args.clip_coef, - ) - v_loss_clipped = (v_clipped - b_returns[mb_inds]) ** 2 - v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped) - v_loss = 0.5 * v_loss_max.mean() - else: - v_loss = 0.5 * ((newvalue - b_returns[mb_inds]) ** 2).mean() - - entropy_loss = entropy.mean() - loss = pg_loss - args.ent_coef * entropy_loss + v_loss * args.vf_coef - - optimizer.zero_grad() - loss.backward() - nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm) - optimizer.step() - - if args.target_kl is not None: - if approx_kl > args.target_kl: - break - - y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy() - var_y = np.var(y_true) - explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y - - # Evaluate agent - mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=0 - ) - full_mean_eval_returns, full_stddev_eval_returns, full_normalized_mean_eval_returns = full_level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=0 - ) - mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=200 - ) - full_mean_train_returns, full_stddev_train_returns, full_normalized_mean_train_returns = full_level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=200 - ) - - # TRY NOT TO MODIFY: record rewards for plotting purposes - writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) - writer.add_scalar("charts/episode_returns", np.mean(episode_rewards), global_step) - writer.add_scalar("losses/value_loss", v_loss.item(), global_step) - writer.add_scalar("losses/policy_loss", pg_loss.item(), global_step) - writer.add_scalar("losses/entropy", entropy_loss.item(), global_step) - writer.add_scalar("losses/old_approx_kl", old_approx_kl.item(), global_step) - writer.add_scalar("losses/approx_kl", approx_kl.item(), global_step) - writer.add_scalar("losses/clipfrac", np.mean(clipfracs), global_step) - writer.add_scalar("losses/explained_variance", explained_var, global_step) - print("SPS:", int(global_step / (time.time() - start_time))) - writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) - - writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) - writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) - writer.add_scalar("test_eval/stddev_eval_return", stddev_eval_returns, global_step) - writer.add_scalar("test_eval/full_mean_episode_return", full_mean_eval_returns, global_step) - writer.add_scalar("test_eval/full_normalized_mean_eval_return", full_normalized_mean_eval_returns, global_step) - writer.add_scalar("test_eval/full_stddev_eval_return", full_stddev_eval_returns, global_step) - - writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) - writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) - writer.add_scalar("train_eval/stddev_train_return", stddev_train_returns, global_step) - writer.add_scalar("train_eval/full_mean_episode_return", full_mean_train_returns, global_step) - writer.add_scalar("train_eval/full_normalized_mean_train_return", full_normalized_mean_train_returns, global_step) - writer.add_scalar("train_eval/full_stddev_train_return", full_stddev_train_returns, global_step) - - writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) - - envs.close() - writer.close() diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/conda-environment.yaml b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/conda-environment.yaml deleted file mode 100644 index cd0b0b09..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/conda-environment.yaml +++ /dev/null @@ -1,165 +0,0 @@ -name: test2_py -channels: - - defaults -dependencies: - - _libgcc_mutex=0.1=main - - _openmp_mutex=5.1=1_gnu - - ca-certificates=2024.3.11=h06a4308_0 - - ld_impl_linux-64=2.38=h1181459_1 - - libffi=3.3=he6710b0_2 - - libgcc-ng=11.2.0=h1234567_1 - - libgomp=11.2.0=h1234567_1 - - libstdcxx-ng=11.2.0=h1234567_1 - - ncurses=6.4=h6a678d5_0 - - openssl=1.1.1w=h7f8727e_0 - - pip=23.3.1=py38h06a4308_0 - - python=3.8.5=h7579374_1 - - readline=8.2=h5eee18b_0 - - setuptools=68.2.2=py38h06a4308_0 - - sqlite=3.41.2=h5eee18b_0 - - tk=8.6.12=h1ccaba5_0 - - wheel=0.41.2=py38h06a4308_0 - - xz=5.4.6=h5eee18b_0 - - zlib=1.2.13=h5eee18b_0 - - pip: - - absl-py==2.1.0 - - aiosignal==1.3.1 - - alabaster==0.7.13 - - appdirs==1.4.4 - - attrs==23.2.0 - - babel==2.14.0 - - beautifulsoup4==4.12.3 - - cachetools==5.3.3 - - certifi==2024.2.2 - - cffi==1.16.0 - - charset-normalizer==3.3.2 - - click==8.1.7 - - cloudpickle==3.0.0 - - cmake==3.29.2 - - contourpy==1.1.1 - - cycler==0.12.1 - - dm-tree==0.1.8 - - docker-pycreds==0.4.0 - - docutils==0.20.1 - - exceptiongroup==1.2.0 - - farama-notifications==0.0.4 - - filelock==3.13.4 - - fonttools==4.51.0 - - frozenlist==1.4.1 - - fsspec==2024.3.1 - - furo==2024.1.29 - - future==1.0.0 - - gitdb==4.0.11 - - gitpython==3.1.43 - - glcontext==2.5.0 - - glfw==1.12.0 - - google-auth==2.29.0 - - google-auth-oauthlib==1.0.0 - - grpcio==1.62.1 - - gym==0.23.0 - - gym-notices==0.0.8 - - gymnasium==0.28.1 - - idna==3.7 - - imageio==2.34.0 - - imageio-ffmpeg==0.3.0 - - imagesize==1.4.1 - - importlib-metadata==7.1.0 - - importlib-resources==6.4.0 - - iniconfig==2.0.0 - - jax-jumpy==1.0.0 - - jinja2==3.1.3 - - jsonschema==4.21.1 - - jsonschema-specifications==2023.12.1 - - kiwisolver==1.4.5 - - lazy-loader==0.4 - - lz4==4.3.3 - - markdown==3.6 - - markdown-it-py==3.0.0 - - markupsafe==2.1.5 - - matplotlib==3.7.5 - - mdurl==0.1.2 - - moderngl==5.10.0 - - mpmath==1.3.0 - - msgpack==1.0.8 - - networkx==3.1 - - numpy==1.24.4 - - nvidia-cublas-cu12==12.1.3.1 - - nvidia-cuda-cupti-cu12==12.1.105 - - nvidia-cuda-nvrtc-cu12==12.1.105 - - nvidia-cuda-runtime-cu12==12.1.105 - - nvidia-cudnn-cu12==8.9.2.26 - - nvidia-cufft-cu12==11.0.2.54 - - nvidia-curand-cu12==10.3.2.106 - - nvidia-cusolver-cu12==11.4.5.107 - - nvidia-cusparse-cu12==12.1.0.106 - - nvidia-nccl-cu12==2.19.3 - - nvidia-nvjitlink-cu12==12.4.127 - - nvidia-nvtx-cu12==12.1.105 - - oauthlib==3.2.2 - - packaging==24.0 - - pandas==2.0.3 - - pillow==10.3.0 - - pkgutil-resolve-name==1.3.10 - - pluggy==1.4.0 - - protobuf==4.25.3 - - psutil==5.9.8 - - py-cpuinfo==9.0.0 - - pyarrow==15.0.2 - - pyasn1==0.6.0 - - pyasn1-modules==0.4.0 - - pycparser==2.22 - - pyenchant==3.2.2 - - pyglet==1.4.11 - - pygments==2.17.2 - - pyparsing==3.1.2 - - pytest==8.1.1 - - pytest-benchmark==4.0.0 - - python-dateutil==2.9.0.post0 - - pytz==2024.1 - - pywavelets==1.4.1 - - pyyaml==6.0.1 - - ray==2.10.0 - - referencing==0.34.0 - - requests==2.31.0 - - requests-oauthlib==2.0.0 - - rich==13.7.1 - - rpds-py==0.18.0 - - rsa==4.9 - - scikit-image==0.21.0 - - scipy==1.10.0 - - sentry-sdk==1.45.0 - - setproctitle==1.3.3 - - shellingham==1.5.4 - - shimmy==1.3.0 - - six==1.16.0 - - smmap==5.0.1 - - snowballstemmer==2.2.0 - - soupsieve==2.5 - - sphinx==7.1.2 - - sphinx-basic-ng==1.0.0b2 - - sphinx-tabs==3.4.5 - - sphinxcontrib-applehelp==1.0.4 - - sphinxcontrib-devhelp==1.0.2 - - sphinxcontrib-htmlhelp==2.0.1 - - sphinxcontrib-jsmath==1.0.1 - - sphinxcontrib-qthelp==1.0.3 - - sphinxcontrib-serializinghtml==1.1.5 - - sphinxcontrib-spelling==8.0.0 - - syllabus-rl==0.5 - - sympy==1.12 - - tensorboard==2.14.0 - - tensorboard-data-server==0.7.2 - - tensorboardx==2.6.2.2 - - tifffile==2023.7.10 - - tomli==2.0.1 - - torch==2.2.2 - - triton==2.2.0 - - typer==0.12.3 - - typing-extensions==4.11.0 - - tzdata==2024.1 - - urllib3==2.2.1 - - wandb==0.16.6 - - werkzeug==3.0.2 - - zipp==3.18.1 -prefix: /home/user/miniconda/envs/test2_py - diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/config.yaml b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/config.yaml deleted file mode 100644 index d65c04d2..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/config.yaml +++ /dev/null @@ -1,130 +0,0 @@ -wandb_version: 1 - -exp_name: - desc: null - value: cleanrl_procgen_plr -seed: - desc: null - value: 1 -torch_deterministic: - desc: null - value: true -cuda: - desc: null - value: true -track: - desc: null - value: true -wandb_project_name: - desc: null - value: syllabus -wandb_entity: - desc: null - value: null -capture_video: - desc: null - value: false -logging_dir: - desc: null - value: . -env_id: - desc: null - value: bigfish -total_timesteps: - desc: null - value: 25000000 -learning_rate: - desc: null - value: 0.0005 -num_envs: - desc: null - value: 64 -num_steps: - desc: null - value: 256 -anneal_lr: - desc: null - value: false -gae: - desc: null - value: true -gamma: - desc: null - value: 0.999 -gae_lambda: - desc: null - value: 0.95 -num_minibatches: - desc: null - value: 8 -update_epochs: - desc: null - value: 3 -norm_adv: - desc: null - value: true -clip_coef: - desc: null - value: 0.2 -clip_vloss: - desc: null - value: true -ent_coef: - desc: null - value: 0.01 -vf_coef: - desc: null - value: 0.5 -max_grad_norm: - desc: null - value: 0.5 -target_kl: - desc: null - value: null -full_dist: - desc: null - value: true -curriculum: - desc: null - value: true -curriculum_method: - desc: null - value: plr -num_eval_episodes: - desc: null - value: 10 -batch_size: - desc: null - value: 16384 -minibatch_size: - desc: null - value: 2048 -_wandb: - desc: null - value: - code_path: code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py - python_version: 3.8.5 - cli_version: 0.16.6 - framework: torch - is_jupyter_run: false - is_kaggle_kernel: false - start_time: 1713840768.0 - t: - 1: - - 1 - - 30 - - 55 - 2: - - 1 - - 30 - - 55 - 3: - - 13 - - 16 - - 23 - - 35 - 4: 3.8.5 - 5: 0.16.6 - 8: - - 5 - 13: linux-x86_64 diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/diff.patch b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/diff.patch deleted file mode 100644 index 0a6b4640..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/diff.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/setup.py b/setup.py -index 31e09f2..22a94e8 100644 ---- a/setup.py -+++ b/setup.py -@@ -2,7 +2,7 @@ from setuptools import find_packages, setup - - - extras = dict() --extras['test'] = ['cmake', 'ninja', 'nle>=0.9.0', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] -+extras['test'] = ['cmake', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] - extras['docs'] = ['sphinx-tabs', 'sphinxcontrib-spelling', 'furo'] - extras['all'] = extras['test'] + extras['docs'] - diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/events.out.tfevents.1713840773.f411843fc70b.1109.0 b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/events.out.tfevents.1713840773.f411843fc70b.1109.0 deleted file mode 120000 index c1b8de5c..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/events.out.tfevents.1713840773.f411843fc70b.1109.0 +++ /dev/null @@ -1 +0,0 @@ -/data/averma/MARL/Syllabus/syllabus/examples/training_scripts/runs/{run_name}/events.out.tfevents.1713840773.f411843fc70b.1109.0 \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/requirements.txt b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/requirements.txt deleted file mode 100644 index 7f33d240..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/requirements.txt +++ /dev/null @@ -1,146 +0,0 @@ -Babel==2.14.0 -Farama-Notifications==0.0.4 -GitPython==3.1.43 -Jinja2==3.1.3 -Markdown==3.6 -MarkupSafe==2.1.5 -PyWavelets==1.4.1 -PyYAML==6.0.1 -Pygments==2.17.2 -Shimmy==1.3.0 -Sphinx==7.1.2 -Syllabus-RL==0.5 -Werkzeug==3.0.2 -absl-py==2.1.0 -aiosignal==1.3.1 -alabaster==0.7.13 -appdirs==1.4.4 -attrs==23.2.0 -beautifulsoup4==4.12.3 -cachetools==5.3.3 -certifi==2024.2.2 -cffi==1.16.0 -charset-normalizer==3.3.2 -click==8.1.7 -cloudpickle==3.0.0 -cmake==3.29.2 -colorama==0.4.6 -contourpy==1.1.1 -cycler==0.12.1 -dm-tree==0.1.8 -docker-pycreds==0.4.0 -docutils==0.20.1 -exceptiongroup==1.2.0 -filelock==3.13.4 -fonttools==4.51.0 -frozenlist==1.4.1 -fsspec==2024.3.1 -furo==2024.1.29 -future==1.0.0 -gitdb==4.0.11 -glcontext==2.5.0 -glfw==1.12.0 -google-auth-oauthlib==1.0.0 -google-auth==2.29.0 -grpcio==1.62.1 -gym-notices==0.0.8 -gym==0.23.0 -gymnasium==0.28.1 -idna==3.7 -imageio-ffmpeg==0.3.0 -imageio==2.34.0 -imagesize==1.4.1 -importlib_metadata==7.1.0 -importlib_resources==6.4.0 -iniconfig==2.0.0 -jax-jumpy==1.0.0 -jsonschema-specifications==2023.12.1 -jsonschema==4.21.1 -kiwisolver==1.4.5 -lazy_loader==0.4 -lz4==4.3.3 -markdown-it-py==3.0.0 -matplotlib==3.7.5 -mdurl==0.1.2 -moderngl==5.10.0 -mpmath==1.3.0 -msgpack==1.0.8 -networkx==3.1 -numpy==1.24.4 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-nccl-cu12==2.19.3 -nvidia-nvjitlink-cu12==12.4.127 -nvidia-nvtx-cu12==12.1.105 -oauthlib==3.2.2 -packaging==24.0 -pandas==2.0.3 -pillow==10.3.0 -pip==23.3.1 -pkgutil_resolve_name==1.3.10 -pluggy==1.4.0 -procgen==0.9.5+ed4be81 -protobuf==4.25.3 -psutil==5.9.8 -psutil==5.9.8 -py-cpuinfo==9.0.0 -pyarrow==15.0.2 -pyasn1==0.6.0 -pyasn1_modules==0.4.0 -pycparser==2.22 -pyenchant==3.2.2 -pyglet==1.4.11 -pyparsing==3.1.2 -pytest-benchmark==4.0.0 -pytest==8.1.1 -python-dateutil==2.9.0.post0 -pytz==2024.1 -ray==2.10.0 -referencing==0.34.0 -requests-oauthlib==2.0.0 -requests==2.31.0 -rich==13.7.1 -rpds-py==0.18.0 -rsa==4.9 -scikit-image==0.21.0 -scipy==1.10.0 -sentry-sdk==1.45.0 -setproctitle==1.2.2 -setproctitle==1.3.3 -setuptools==68.2.2 -shellingham==1.5.4 -six==1.16.0 -smmap==5.0.1 -snowballstemmer==2.2.0 -soupsieve==2.5 -sphinx-basic-ng==1.0.0b2 -sphinx-tabs==3.4.5 -sphinxcontrib-applehelp==1.0.4 -sphinxcontrib-devhelp==1.0.2 -sphinxcontrib-htmlhelp==2.0.1 -sphinxcontrib-jsmath==1.0.1 -sphinxcontrib-qthelp==1.0.3 -sphinxcontrib-serializinghtml==1.1.5 -sphinxcontrib-spelling==8.0.0 -sympy==1.12 -tensorboard-data-server==0.7.2 -tensorboard==2.14.0 -tensorboardX==2.6.2.2 -tifffile==2023.7.10 -tomli==2.0.1 -torch==2.2.2 -triton==2.2.0 -typer==0.12.3 -typing_extensions==4.11.0 -tzdata==2024.1 -urllib3==2.2.1 -wandb==0.16.6 -wheel==0.41.2 -zipp==3.18.1 \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch deleted file mode 100644 index 5b0b6409..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch +++ /dev/null @@ -1,1417 +0,0 @@ -diff --git a/setup.py b/setup.py -index 31e09f2..22a94e8 100644 ---- a/setup.py -+++ b/setup.py -@@ -2,7 +2,7 @@ from setuptools import find_packages, setup - - - extras = dict() --extras['test'] = ['cmake', 'ninja', 'nle>=0.9.0', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] -+extras['test'] = ['cmake', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] - extras['docs'] = ['sphinx-tabs', 'sphinxcontrib-spelling', 'furo'] - extras['all'] = extras['test'] + extras['docs'] - -diff --git a/syllabus/core/curriculum_base.py b/syllabus/core/curriculum_base.py -index 03284da..4ca9aeb 100644 ---- a/syllabus/core/curriculum_base.py -+++ b/syllabus/core/curriculum_base.py -@@ -76,7 +76,7 @@ class Curriculum: - """ - self.completed_tasks += 1 - -- def update_on_step(self, obs: typing.Any, rew: float, term: bool, trunc: bool, info: dict, env_id: int = None) -> None: -+ def update_on_step(self, task: typing.Any, obs: typing.Any, rew: float, term: bool, trunc: bool, info: dict, env_id: int = None) -> None: - """ Update the curriculum with the current step results from the environment. - - :param obs: Observation from teh environment -@@ -88,7 +88,7 @@ class Curriculum: - """ - raise NotImplementedError("This curriculum does not require step updates. Set update_on_step for the environment sync wrapper to False to improve performance and prevent this error.") - -- def update_on_step_batch(self, step_results: List[typing.Tuple[int, int, int, int, int]], env_id: int = None) -> None: -+ def update_on_step_batch(self, step_results: List[typing.Tuple[Any, Any, int, int, int, int]], env_id: int = None) -> None: - """Update the curriculum with a batch of step results from the environment. - - This method can be overridden to provide a more efficient implementation. It is used -@@ -96,9 +96,9 @@ class Curriculum: - - :param step_results: List of step results - """ -- obs, rews, terms, truncs, infos = tuple(step_results) -+ tasks, obs, rews, terms, truncs, infos = tuple(step_results) - for i in range(len(obs)): -- self.update_on_step(obs[i], rews[i], terms[i], truncs[i], infos[i], env_id=env_id) -+ self.update_on_step(tasks[i], obs[i], rews[i], terms[i], truncs[i], infos[i], env_id=env_id) - - def update_on_episode(self, episode_return: float, episode_length: int, episode_task: Any, env_id: int = None) -> None: - """Update the curriculum with episode results from the environment. -diff --git a/syllabus/core/curriculum_sync_wrapper.py b/syllabus/core/curriculum_sync_wrapper.py -index 6e069d8..f986643 100644 ---- a/syllabus/core/curriculum_sync_wrapper.py -+++ b/syllabus/core/curriculum_sync_wrapper.py -@@ -29,6 +29,14 @@ class CurriculumWrapper: - def tasks(self): - return self.task_space.tasks - -+ @property -+ def requires_step_updates(self): -+ return self.curriculum.requires_step_updates -+ -+ @property -+ def requires_episode_updates(self): -+ return self.curriculum.requires_episode_updates -+ - def get_tasks(self, task_space=None): - return self.task_space.get_tasks(gym_space=task_space) - -diff --git a/syllabus/core/environment_sync_wrapper.py b/syllabus/core/environment_sync_wrapper.py -index c995aa1..6edee7c 100644 ---- a/syllabus/core/environment_sync_wrapper.py -+++ b/syllabus/core/environment_sync_wrapper.py -@@ -19,7 +19,8 @@ class MultiProcessingSyncWrapper(gym.Wrapper): - def __init__(self, - env, - components: MultiProcessingComponents, -- update_on_step: bool = True, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? -+ update_on_step: bool = False, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? -+ update_on_progress: bool = False, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? - batch_size: int = 100, - buffer_size: int = 2, # Having an extra task in the buffer minimizes wait time at reset - task_space: TaskSpace = None, -@@ -34,6 +35,7 @@ class MultiProcessingSyncWrapper(gym.Wrapper): - self.update_queue = components.update_queue - self.task_space = task_space - self.update_on_step = update_on_step -+ self.update_on_progress = update_on_progress - self.batch_size = batch_size - self.global_task_completion = global_task_completion - self.task_progress = 0.0 -@@ -125,17 +127,21 @@ class MultiProcessingSyncWrapper(gym.Wrapper): - def _package_step_updates(self): - step_batch = { - "update_type": "step_batch", -- "metrics": ([self._obs[:self._batch_step], self._rews[:self._batch_step], self._terms[:self._batch_step], self._truncs[:self._batch_step], self._infos[:self._batch_step]],), -+ "metrics": ([self._tasks[:self._batch_step], self._obs[:self._batch_step], self._rews[:self._batch_step], self._terms[:self._batch_step], self._truncs[:self._batch_step], self._infos[:self._batch_step]],), - "env_id": self.instance_id, - "request_sample": False - } -- task_batch = { -- "update_type": "task_progress_batch", -- "metrics": (self._tasks[:self._batch_step], self._task_progresses[:self._batch_step],), -- "env_id": self.instance_id, -- "request_sample": False -- } -- return [step_batch, task_batch] -+ update = [step_batch] -+ -+ if self.update_on_progress: -+ task_batch = { -+ "update_type": "task_progress_batch", -+ "metrics": (self._tasks[:self._batch_step], self._task_progresses[:self._batch_step],), -+ "env_id": self.instance_id, -+ "request_sample": False -+ } -+ update.append(task_batch) -+ return update - - def add_task(self, task): - update = { -diff --git a/syllabus/curricula/annealing_box.py b/syllabus/curricula/annealing_box.py -index 6c565ec..101981c 100644 ---- a/syllabus/curricula/annealing_box.py -+++ b/syllabus/curricula/annealing_box.py -@@ -49,8 +49,8 @@ class AnnealingBoxCurriculum(Curriculum): - """ - # Linear annealing from start_values to end_values - annealed_values = ( -- self.start_values + (self.end_values - self.start_values) * -- np.minimum(self.current_step, self.total_steps) / self.total_steps -+ self.start_values + (self.end_values - self.start_values) * -+ np.minimum(self.current_step, self.total_steps) / self.total_steps - ) - -- return [annealed_values.copy() for _ in range(k)] -\ No newline at end of file -+ return [annealed_values.copy() for _ in range(k)] -diff --git a/syllabus/curricula/noop.py b/syllabus/curricula/noop.py -index f6bd5dc..fb5d8ae 100644 ---- a/syllabus/curricula/noop.py -+++ b/syllabus/curricula/noop.py -@@ -28,7 +28,7 @@ class NoopCurriculum(Curriculum): - """ - pass - -- def update_on_step(self, obs, rew, term, trunc, info, env_id: int = None) -> None: -+ def update_on_step(self, task, obs, rew, term, trunc, info, env_id: int = None) -> None: - """ - Update the curriculum with the current step results from the environment. - """ -diff --git a/syllabus/curricula/plr/plr_wrapper.py b/syllabus/curricula/plr/plr_wrapper.py -index 9515df4..9c808dd 100644 ---- a/syllabus/curricula/plr/plr_wrapper.py -+++ b/syllabus/curricula/plr/plr_wrapper.py -@@ -23,16 +23,15 @@ class RolloutStorage(object): - get_value=None, - ): - self.num_steps = num_steps -- self.buffer_steps = num_steps * 2 # Hack to prevent overflow from lagging updates. -+ self.buffer_steps = num_steps * 4 # Hack to prevent overflow from lagging updates. - self.num_processes = num_processes - self._requires_value_buffers = requires_value_buffers - self._get_value = get_value - self.tasks = torch.zeros(self.buffer_steps, num_processes, 1, dtype=torch.int) - self.masks = torch.ones(self.buffer_steps + 1, num_processes, 1) - self.obs = [[[0] for _ in range(self.num_processes)]] * self.buffer_steps -- self._fill = torch.zeros(self.buffer_steps, num_processes, 1) - self.env_steps = [0] * num_processes -- self.should_update = False -+ self.ready_buffers = set() - - if requires_value_buffers: - self.returns = torch.zeros(self.buffer_steps + 1, num_processes, 1) -@@ -46,12 +45,10 @@ class RolloutStorage(object): - self.action_log_dist = torch.zeros(self.buffer_steps, num_processes, action_space.n) - - self.num_steps = num_steps -- self.step = 0 - - def to(self, device): - self.masks = self.masks.to(device) - self.tasks = self.tasks.to(device) -- self._fill = self._fill.to(device) - if self._requires_value_buffers: - self.rewards = self.rewards.to(device) - self.value_preds = self.value_preds.to(device) -@@ -59,108 +56,79 @@ class RolloutStorage(object): - else: - self.action_log_dist = self.action_log_dist.to(device) - -- def insert(self, masks, action_log_dist=None, value_preds=None, rewards=None, tasks=None): -- if self._requires_value_buffers: -- assert (value_preds is not None and rewards is not None), "Selected strategy requires value_preds and rewards" -- if len(rewards.shape) == 3: -- rewards = rewards.squeeze(2) -- self.value_preds[self.step].copy_(torch.as_tensor(value_preds)) -- self.rewards[self.step].copy_(torch.as_tensor(rewards)[:, None]) -- self.masks[self.step + 1].copy_(torch.as_tensor(masks)[:, None]) -- else: -- self.action_log_dist[self.step].copy_(action_log_dist) -- if tasks is not None: -- assert isinstance(tasks[0], int), "Provided task must be an integer" -- self.tasks[self.step].copy_(torch.as_tensor(tasks)[:, None]) -- self.step = (self.step + 1) % self.num_steps -- - def insert_at_index(self, env_index, mask=None, action_log_dist=None, obs=None, reward=None, task=None, steps=1): -- if env_index >= self.num_processes: -- warnings.warn(f"Env index {env_index} is greater than the number of processes {self.num_processes}. Using index {env_index % self.num_processes} instead.") -- env_index = env_index % self.num_processes -- - step = self.env_steps[env_index] - end_step = step + steps -- # Update buffer fill traacker, and check for common usage errors. -- try: -- if end_step > len(self._fill): -- raise IndexError -- self._fill[step:end_step, env_index] = 1 -- except IndexError as e: -- if any(self._fill[:][env_index] == 0): -- raise UsageError(f"Step {step} + {steps} = {end_step} is out of range for env index {env_index}. Your value for PLR's num_processes may be too high.") from e -- else: -- raise UsageError(f"Step {step} + {steps} = {end_step} is out of range for env index {env_index}. Your value for PLR's num_processes may be too low.") from e - - if mask is not None: - self.masks[step + 1:end_step + 1, env_index].copy_(torch.as_tensor(mask[:, None])) -+ - if obs is not None: - for s in range(step, end_step): - self.obs[s][env_index] = obs[s - step] -+ - if reward is not None: - self.rewards[step:end_step, env_index].copy_(torch.as_tensor(reward[:, None])) -+ - if action_log_dist is not None: - self.action_log_dist[step:end_step, env_index].copy_(torch.as_tensor(action_log_dist[:, None])) -+ - if task is not None: - try: -- task = int(task) -+ int(task[0]) - except TypeError: -- assert isinstance(task, int), f"Provided task must be an integer, got {task} with type {type(task)} instead." -- self.tasks[step:end_step, env_index].copy_(torch.as_tensor(task)) -- else: -- self.env_steps[env_index] += steps -- # Hack for now, we call insert_at_index twice -- while all(self._fill[self.step] == 1): -- self.step = (self.step + 1) % self.buffer_steps -- # Check if we have enough steps to compute a task sampler update -- if self.step == self.num_steps + 1: -- self.should_update = True -- -- def _get_values(self): -+ assert isinstance(task, int), f"Provided task must be an integer, got {task[0]} with type {type(task[0])} instead." -+ self.tasks[step:end_step, env_index].copy_(torch.as_tensor(np.array(task)[:, None])) -+ -+ self.env_steps[env_index] += steps -+ if env_index not in self.ready_buffers and self.env_steps[env_index] >= self.num_steps: -+ self.ready_buffers.add(env_index) -+ -+ def _get_values(self, env_index): - if self._get_value is None: - raise UsageError("Selected strategy requires value predictions. Please provide get_value function.") -- for step in range(self.num_steps): -- values = self._get_value(self.obs[step]) -+ for step in range(0, self.num_steps, self.num_processes): -+ obs = self.obs[step: step + self.num_processes][env_index] -+ values = self._get_value(obs) -+ -+ # Reshape values if necessary - if len(values.shape) == 3: - warnings.warn(f"Value function returned a 3D tensor of shape {values.shape}. Attempting to squeeze last dimension.") - values = torch.squeeze(values, -1) - if len(values.shape) == 1: - warnings.warn(f"Value function returned a 1D tensor of shape {values.shape}. Attempting to unsqueeze last dimension.") - values = torch.unsqueeze(values, -1) -- self.value_preds[step].copy_(values) - -- def after_update(self): -+ self.value_preds[step: step + self.num_processes, env_index].copy_(values) -+ -+ def after_update(self, env_index): - # After consuming the first num_steps of data, remove them and shift the remaining data in the buffer -- self.tasks[0: self.num_steps].copy_(self.tasks[self.num_steps: self.buffer_steps]) -- self.masks[0: self.num_steps].copy_(self.masks[self.num_steps: self.buffer_steps]) -- self.obs[0: self.num_steps][:] = self.obs[self.num_steps: self.buffer_steps][:] -+ self.tasks = self.tasks.roll(-self.num_steps, 0) -+ self.masks = self.masks.roll(-self.num_steps, 0) -+ self.obs[0:][env_index] = self.obs[self.num_steps: self.buffer_steps][env_index] - - if self._requires_value_buffers: -- self.returns[0: self.num_steps].copy_(self.returns[self.num_steps: self.buffer_steps]) -- self.rewards[0: self.num_steps].copy_(self.rewards[self.num_steps: self.buffer_steps]) -- self.value_preds[0: self.num_steps].copy_(self.value_preds[self.num_steps: self.buffer_steps]) -+ self.returns = self.returns.roll(-self.num_steps, 0) -+ self.rewards = self.rewards.roll(-self.num_steps, 0) -+ self.value_preds = self.value_preds.roll(-self.num_steps, 0) - else: -- self.action_log_dist[0: self.num_steps].copy_(self.action_log_dist[self.num_steps: self.buffer_steps]) -+ self.action_log_dist = self.action_log_dist.roll(-self.num_steps, 0) - -- self._fill[0: self.num_steps].copy_(self._fill[self.num_steps: self.buffer_steps]) -- self._fill[self.num_steps: self.buffer_steps].copy_(0) -+ self.env_steps[env_index] -= self.num_steps -+ self.ready_buffers.remove(env_index) - -- self.env_steps = [steps - self.num_steps for steps in self.env_steps] -- self.should_update = False -- self.step = self.step - self.num_steps -- -- def compute_returns(self, gamma, gae_lambda): -+ def compute_returns(self, gamma, gae_lambda, env_index): - assert self._requires_value_buffers, "Selected strategy does not use compute_rewards." -- self._get_values() -+ self._get_values(env_index) - gae = 0 - for step in reversed(range(self.rewards.size(0), self.num_steps)): - delta = ( -- self.rewards[step] -- + gamma * self.value_preds[step + 1] * self.masks[step + 1] -- - self.value_preds[step] -+ self.rewards[step, env_index] -+ + gamma * self.value_preds[step + 1, env_index] * self.masks[step + 1, env_index] -+ - self.value_preds[step, env_index] - ) -- gae = delta + gamma * gae_lambda * self.masks[step + 1] * gae -- self.returns[step] = gae + self.value_preds[step] -+ gae = delta + gamma * gae_lambda * self.masks[step + 1, env_index] * gae -+ self.returns[step, env_index] = gae + self.value_preds[step, env_index] - - - def null(x): -@@ -252,11 +220,15 @@ class PrioritizedLevelReplay(Curriculum): - else: - return [self._task_sampler.sample() for _ in range(k)] - -- def update_on_step(self, obs, rew, term, trunc, info, env_id: int = None) -> None: -+ def update_on_step(self, task, obs, rew, term, trunc, info, env_id: int = None) -> None: - """ - Update the curriculum with the current step results from the environment. - """ - assert env_id is not None, "env_id must be provided for PLR updates." -+ if env_id >= self._num_processes: -+ warnings.warn(f"Env index {env_id} is greater than the number of processes {self._num_processes}. Using index {env_id % self._num_processes} instead.") -+ env_id = env_id % self._num_processes -+ - # Update rollouts - self._rollouts.insert_at_index( - env_id, -@@ -266,14 +238,22 @@ class PrioritizedLevelReplay(Curriculum): - obs=np.array([obs]), - ) - -+ # Update task sampler -+ if env_id in self._rollouts.ready_buffers: -+ self._update_sampler(env_id) -+ - def update_on_step_batch( -- self, step_results: List[Tuple[Any, int, bool, bool, Dict]], env_id: int = None -+ self, step_results: List[Tuple[int, Any, int, bool, bool, Dict]], env_id: int = None - ) -> None: - """ - Update the curriculum with a batch of step results from the environment. - """ - assert env_id is not None, "env_id must be provided for PLR updates." -- obs, rews, terms, truncs, infos = step_results -+ if env_id >= self._num_processes: -+ warnings.warn(f"Env index {env_id} is greater than the number of processes {self._num_processes}. Using index {env_id % self._num_processes} instead.") -+ env_id = env_id % self._num_processes -+ -+ tasks, obs, rews, terms, truncs, infos = step_results - self._rollouts.insert_at_index( - env_id, - mask=np.logical_not(np.logical_or(terms, truncs)), -@@ -281,25 +261,19 @@ class PrioritizedLevelReplay(Curriculum): - reward=rews, - obs=obs, - steps=len(rews), -+ task=tasks, - ) - -- def update_task_progress(self, task: Any, success_prob: float, env_id: int = None) -> None: -- """ -- Update the curriculum with a task and its success probability upon -- success or failure. -- """ -- assert env_id is not None, "env_id must be provided for PLR updates." -- self._rollouts.insert_at_index( -- env_id, -- task=task, -- ) - # Update task sampler -- if self._rollouts.should_update: -- if self._task_sampler.requires_value_buffers: -- self._rollouts.compute_returns(self._gamma, self._gae_lambda) -- self._task_sampler.update_with_rollouts(self._rollouts) -- self._rollouts.after_update() -- self._task_sampler.after_update() -+ if env_id in self._rollouts.ready_buffers: -+ self._update_sampler(env_id) -+ -+ def _update_sampler(self, env_id): -+ if self._task_sampler.requires_value_buffers: -+ self._rollouts.compute_returns(self._gamma, self._gae_lambda, env_id) -+ self._task_sampler.update_with_rollouts(self._rollouts, env_id) -+ self._rollouts.after_update(env_id) -+ self._task_sampler.after_update() - - def _enumerate_tasks(self, space): - assert isinstance(space, Discrete) or isinstance(space, MultiDiscrete), f"Unsupported task space {space}: Expected Discrete or MultiDiscrete" -@@ -312,10 +286,10 @@ class PrioritizedLevelReplay(Curriculum): - """ - Log the task distribution to the provided tensorboard writer. - """ -- super().log_metrics(writer, step) -+ # super().log_metrics(writer, step) - metrics = self._task_sampler.metrics() - writer.add_scalar("curriculum/proportion_seen", metrics["proportion_seen"], step) - writer.add_scalar("curriculum/score", metrics["score"], step) -- for task in list(self.task_space.tasks)[:10]: -- writer.add_scalar(f"curriculum/task_{task - 1}_score", metrics["task_scores"][task - 1], step) -- writer.add_scalar(f"curriculum/task_{task - 1}_staleness", metrics["task_staleness"][task - 1], step) -+ # for task in list(self.task_space.tasks)[:10]: -+ # writer.add_scalar(f"curriculum/task_{task - 1}_score", metrics["task_scores"][task - 1], step) -+ # writer.add_scalar(f"curriculum/task_{task - 1}_staleness", metrics["task_staleness"][task - 1], step) -diff --git a/syllabus/curricula/plr/task_sampler.py b/syllabus/curricula/plr/task_sampler.py -index 15ad485..c1e97a1 100644 ---- a/syllabus/curricula/plr/task_sampler.py -+++ b/syllabus/curricula/plr/task_sampler.py -@@ -73,7 +73,7 @@ class TaskSampler: - 'Must provide action space to PLR if using "policy_entropy", "least_confidence", or "min_margin" strategies' - ) - -- def update_with_rollouts(self, rollouts): -+ def update_with_rollouts(self, rollouts, actor_id=None): - if self.strategy == "random": - return - -@@ -93,7 +93,7 @@ class TaskSampler: - else: - raise ValueError(f"Unsupported strategy, {self.strategy}") - -- self._update_with_rollouts(rollouts, score_function) -+ self._update_with_rollouts(rollouts, score_function, actor_index=actor_id) - - def update_task_score(self, actor_index, task_idx, score, num_steps): - score = self._partial_update_task_score(actor_index, task_idx, score, num_steps, done=True) -@@ -165,14 +165,15 @@ class TaskSampler: - def requires_value_buffers(self): - return self.strategy in ["gae", "value_l1", "one_step_td_error"] - -- def _update_with_rollouts(self, rollouts, score_function): -+ def _update_with_rollouts(self, rollouts, score_function, actor_index=None): - tasks = rollouts.tasks - if not self.requires_value_buffers: - policy_logits = rollouts.action_log_dist - done = ~(rollouts.masks > 0) - total_steps, num_actors = rollouts.tasks.shape[:2] - -- for actor_index in range(num_actors): -+ actors = [actor_index] if actor_index is not None else range(num_actors) -+ for actor_index in actors: - done_steps = done[:, actor_index].nonzero()[:total_steps, 0] - start_t = 0 - -diff --git a/syllabus/curricula/sequential.py b/syllabus/curricula/sequential.py -index baa1263..ec3b8b0 100644 ---- a/syllabus/curricula/sequential.py -+++ b/syllabus/curricula/sequential.py -@@ -177,9 +177,9 @@ class SequentialCurriculum(Curriculum): - if self.current_curriculum.requires_episode_updates: - self.current_curriculum.update_on_episode(episode_return, episode_len, episode_task, env_id) - -- def update_on_step(self, obs, rew, term, trunc, info, env_id=None): -+ def update_on_step(self, task, obs, rew, term, trunc, info, env_id=None): - if self.current_curriculum.requires_step_updates: -- self.current_curriculum.update_on_step(obs, rew, term, trunc, info, env_id) -+ self.current_curriculum.update_on_step(task, obs, rew, term, trunc, info, env_id) - - def update_on_step_batch(self, step_results, env_id=None): - if self.current_curriculum.requires_step_updates: -diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py b/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py -index a6d469e..b848d69 100644 ---- a/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py -+++ b/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py -@@ -14,6 +14,7 @@ import gym as openai_gym - import gymnasium as gym - import numpy as np - import procgen # noqa: F401 -+from procgen import ProcgenEnv - import torch - import torch.nn as nn - import torch.optim as optim -@@ -21,10 +22,10 @@ from shimmy.openai_gym_compatibility import GymV21CompatibilityV0 - from torch.utils.tensorboard import SummaryWriter - - from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum --from syllabus.curricula import DomainRandomization, LearningProgressCurriculum, CentralizedPrioritizedLevelReplay -+from syllabus.curricula import CentralizedPrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum - from syllabus.examples.models import ProcgenAgent - from syllabus.examples.task_wrappers import ProcgenTaskWrapper --from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize -+from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs - - - def parse_args(): -@@ -46,6 +47,8 @@ def parse_args(): - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="weather to capture videos of the agent performances (check out `videos` folder)") -+ parser.add_argument("--logging-dir", type=str, default=".", -+ help="the base directory for logging and wandb storage.") - - # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="starpilot", -@@ -124,15 +127,15 @@ PROCGEN_RETURN_BOUNDS = { - } - - --def make_env(env_id, seed, curriculum_components=None, start_level=0, num_levels=1): -+def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): - def thunk(): - env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) - env = GymV21CompatibilityV0(env=env) -- env = ProcgenTaskWrapper(env, env_id, seed=seed) -- if curriculum_components is not None: -+ if curriculum is not None: -+ env = ProcgenTaskWrapper(env, env_id, seed=seed) - env = MultiProcessingSyncWrapper( - env, -- curriculum_components, -+ curriculum.get_components(), - update_on_step=False, - task_space=env.task_space, - ) -@@ -147,36 +150,38 @@ def wrap_vecenv(vecenv): - return vecenv - - --def level_replay_evaluate( -+def full_level_replay_evaluate( - env_name, - policy, - num_episodes, - device, -- num_levels=0 -+ num_levels=1 # Not used - ): - policy.eval() -- eval_envs = gym.vector.SyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, task_queue, update_queue, num_levels=num_levels) -- for i in range(1) -- ] -+ -+ eval_envs = ProcgenEnv( -+ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=1, start_level=0, distribution_mode="easy", paint_vel_info=False - ) -+ eval_envs = VecExtractDictObs(eval_envs, "rgb") - eval_envs = wrap_vecenv(eval_envs) - -- eval_episode_rewards = [] -+ # Seed environments -+ seeds = [int.from_bytes(os.urandom(3), byteorder="little") for _ in range(num_episodes)] -+ for i, seed in enumerate(seeds): -+ eval_envs.seed(seed, i) -+ - eval_obs, _ = eval_envs.reset() -+ eval_episode_rewards = [-1] * num_episodes - -- while len(eval_episode_rewards) < num_episodes: -+ while -1 in eval_episode_rewards: - with torch.no_grad(): - eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) - -- eval_obs, _, truncs, terms, infos = eval_envs.step(np.array([eval_action.cpu().numpy()])) -- -- for info in infos: -- if 'episode' in info.keys(): -- eval_episode_rewards.append(info['episode']['r']) -+ eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) -+ for i, info in enumerate(infos): -+ if 'episode' in info.keys() and eval_episode_rewards[i] == -1: -+ eval_episode_rewards[i] = info['episode']['r'] - -- eval_envs.close() - mean_returns = np.mean(eval_episode_rewards) - stddev_returns = np.std(eval_episode_rewards) - env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] -@@ -185,8 +190,7 @@ def level_replay_evaluate( - return mean_returns, stddev_returns, normalized_mean_returns - - --def fast_level_replay_evaluate( -- eval_envs, -+def level_replay_evaluate( - env_name, - policy, - num_episodes, -@@ -194,9 +198,13 @@ def fast_level_replay_evaluate( - num_levels=0 - ): - policy.eval() -- possible_seeds = np.arange(0, num_levels + 1) -- eval_obs, _ = eval_envs.reset(seed=list(np.random.choice(possible_seeds, size=num_episodes))) - -+ eval_envs = ProcgenEnv( -+ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False -+ ) -+ eval_envs = VecExtractDictObs(eval_envs, "rgb") -+ eval_envs = wrap_vecenv(eval_envs) -+ eval_obs, _ = eval_envs.reset() - eval_episode_rewards = [-1] * num_episodes - - while -1 in eval_episode_rewards: -@@ -231,10 +239,11 @@ if __name__ == "__main__": - name=run_name, - monitor_gym=True, - save_code=True, -- # dir="/fs/nexus-scratch/rsulli/" -+ dir=args.logging_dir - ) -- wandb.run.log_code("./syllabus/examples") -- writer = SummaryWriter(f"./runs/{run_name}") -+ # wandb.run.log_code("./syllabus/examples") -+ -+ writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) - writer.add_text( - "hyperparameters", - "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), -@@ -250,7 +259,7 @@ if __name__ == "__main__": - print("Device:", device) - - # Curriculum setup -- task_queue = update_queue = None -+ curriculum = None - if args.curriculum: - sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") - sample_env = GymV21CompatibilityV0(env=sample_env) -@@ -273,6 +282,16 @@ if __name__ == "__main__": - elif args.curriculum_method == "lp": - print("Using learning progress.") - curriculum = LearningProgressCurriculum(sample_env.task_space) -+ elif args.curriculum_method == "sq": -+ print("Using sequential curriculum.") -+ curricula = [] -+ stopping = [] -+ for i in range(199): -+ curricula.append(i + 1) -+ stopping.append("steps>=50000") -+ curricula.append(list(range(i + 1))) -+ stopping.append("steps>=50000") -+ curriculum = SequentialCurriculum(curricula, stopping[:-1], sample_env.task_space) - else: - raise ValueError(f"Unknown curriculum method {args.curriculum_method}") - curriculum = make_multiprocessing_curriculum(curriculum) -@@ -285,7 +304,7 @@ if __name__ == "__main__": - make_env( - args.env_id, - args.seed + i, -- curriculum_components=curriculum.get_components() if args.curriculum else None, -+ curriculum=curriculum if args.curriculum else None, - num_levels=1 if args.curriculum else 0 - ) - for i in range(args.num_envs) -@@ -293,22 +312,6 @@ if __name__ == "__main__": - ) - envs = wrap_vecenv(envs) - -- test_eval_envs = gym.vector.AsyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, num_levels=0) -- for i in range(args.num_eval_episodes) -- ] -- ) -- test_eval_envs = wrap_vecenv(test_eval_envs) -- -- train_eval_envs = gym.vector.AsyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, num_levels=200) -- for i in range(args.num_eval_episodes) -- ] -- ) -- train_eval_envs = wrap_vecenv(train_eval_envs) -- - assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" - print("Creating agent") - agent = ProcgenAgent( -@@ -369,6 +372,8 @@ if __name__ == "__main__": - print(f"global_step={global_step}, episodic_return={item['episode']['r']}") - writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) -+ if curriculum is not None: -+ curriculum.log_metrics(writer, global_step) - break - - # Syllabus curriculum update -@@ -388,8 +393,6 @@ if __name__ == "__main__": - }, - } - curriculum.update(update) -- #if args.curriculum: -- # curriculum.log_metrics(writer, global_step) - - # bootstrap value if not done - with torch.no_grad(): -@@ -487,8 +490,18 @@ if __name__ == "__main__": - explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y - - # Evaluate agent -- mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = fast_level_replay_evaluate(test_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=0) -- mean_train_returns, stddev_train_returns, normalized_mean_train_returns = fast_level_replay_evaluate(train_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=200) -+ mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 -+ ) -+ full_mean_eval_returns, full_stddev_eval_returns, full_normalized_mean_eval_returns = full_level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 -+ ) -+ mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 -+ ) -+ full_mean_train_returns, full_stddev_train_returns, full_normalized_mean_train_returns = full_level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 -+ ) - - # TRY NOT TO MODIFY: record rewards for plotting purposes - writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) -@@ -502,12 +515,21 @@ if __name__ == "__main__": - writer.add_scalar("losses/explained_variance", explained_var, global_step) - print("SPS:", int(global_step / (time.time() - start_time))) - writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) -+ - writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) - writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) -- writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/stddev_eval_return", stddev_eval_returns, global_step) -+ writer.add_scalar("test_eval/full_mean_episode_return", full_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/full_normalized_mean_eval_return", full_normalized_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/full_stddev_eval_return", full_stddev_eval_returns, global_step) -+ - writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) - writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) -- writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) -+ writer.add_scalar("train_eval/stddev_train_return", stddev_train_returns, global_step) -+ writer.add_scalar("train_eval/full_mean_episode_return", full_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/full_normalized_mean_train_return", full_normalized_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/full_stddev_train_return", full_stddev_train_returns, global_step) -+ - writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) - - envs.close() -diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -index e13c22e..dabcd50 100644 ---- a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -+++ b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -@@ -14,6 +14,7 @@ import gym as openai_gym - import gymnasium as gym - import numpy as np - import procgen # noqa: F401 -+from procgen import ProcgenEnv - import torch - import torch.nn as nn - import torch.optim as optim -@@ -24,7 +25,7 @@ from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curri - from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum - from syllabus.examples.models import ProcgenAgent - from syllabus.examples.task_wrappers import ProcgenTaskWrapper --from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize -+from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs - - - def parse_args(): -@@ -126,18 +127,17 @@ PROCGEN_RETURN_BOUNDS = { - } - - --def make_env(env_id, seed, curriculum_components=None, start_level=0, num_levels=1): -+def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): - def thunk(): - env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) - env = GymV21CompatibilityV0(env=env) -- env = ProcgenTaskWrapper(env, env_id, seed=seed) -- if curriculum_components is not None: -+ if curriculum is not None: -+ env = ProcgenTaskWrapper(env, env_id, seed=seed) - env = MultiProcessingSyncWrapper( - env, -- curriculum_components, -- update_on_step=False, -+ curriculum.get_components(), -+ update_on_step=curriculum.requires_step_updates, - task_space=env.task_space, -- buffer_size=4, - ) - return env - return thunk -@@ -150,36 +150,38 @@ def wrap_vecenv(vecenv): - return vecenv - - --def level_replay_evaluate( -+def full_level_replay_evaluate( - env_name, - policy, - num_episodes, - device, -- num_levels=0 -+ num_levels=1 # Not used - ): - policy.eval() -- eval_envs = gym.vector.SyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, task_queue, update_queue, num_levels=num_levels) -- for i in range(1) -- ] -+ -+ eval_envs = ProcgenEnv( -+ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=1, start_level=0, distribution_mode="easy", paint_vel_info=False - ) -+ eval_envs = VecExtractDictObs(eval_envs, "rgb") - eval_envs = wrap_vecenv(eval_envs) - -- eval_episode_rewards = [] -+ # Seed environments -+ seeds = [int.from_bytes(os.urandom(3), byteorder="little") for _ in range(num_episodes)] -+ for i, seed in enumerate(seeds): -+ eval_envs.seed(seed, i) -+ - eval_obs, _ = eval_envs.reset() -+ eval_episode_rewards = [-1] * num_episodes - -- while len(eval_episode_rewards) < num_episodes: -+ while -1 in eval_episode_rewards: - with torch.no_grad(): - eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) - -- eval_obs, _, truncs, terms, infos = eval_envs.step(np.array([eval_action.cpu().numpy()])) -- -- for info in infos: -- if 'episode' in info.keys(): -- eval_episode_rewards.append(info['episode']['r']) -+ eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) -+ for i, info in enumerate(infos): -+ if 'episode' in info.keys() and eval_episode_rewards[i] == -1: -+ eval_episode_rewards[i] = info['episode']['r'] - -- eval_envs.close() - mean_returns = np.mean(eval_episode_rewards) - stddev_returns = np.std(eval_episode_rewards) - env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] -@@ -188,8 +190,7 @@ def level_replay_evaluate( - return mean_returns, stddev_returns, normalized_mean_returns - - --def fast_level_replay_evaluate( -- eval_envs, -+def level_replay_evaluate( - env_name, - policy, - num_episodes, -@@ -198,15 +199,12 @@ def fast_level_replay_evaluate( - ): - policy.eval() - -- # Choose evaluation seeds -- if num_levels == 0: -- seeds = np.random.randint(0, 2 ** 16 - 1, size=num_episodes) -- else: -- seeds = np.random.choice(np.arange(0, num_levels), size=num_episodes) -- -- seed_envs = [(int(seed), env) for seed, env in zip(seeds, range(num_episodes))] -- eval_obs, _ = eval_envs.reset(seed=seed_envs) -- -+ eval_envs = ProcgenEnv( -+ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False -+ ) -+ eval_envs = VecExtractDictObs(eval_envs, "rgb") -+ eval_envs = wrap_vecenv(eval_envs) -+ eval_obs, _ = eval_envs.reset() - eval_episode_rewards = [-1] * num_episodes - - while -1 in eval_episode_rewards: -@@ -251,7 +249,7 @@ if __name__ == "__main__": - save_code=True, - dir=args.logging_dir - ) -- wandb.run.log_code(os.path.join(args.logging_dir, "/syllabus/examples")) -+ # wandb.run.log_code("./syllabus/examples") - - writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) - writer.add_text( -@@ -316,7 +314,7 @@ if __name__ == "__main__": - make_env( - args.env_id, - args.seed + i, -- curriculum_components=curriculum.get_components() if args.curriculum else None, -+ curriculum=curriculum if args.curriculum else None, - num_levels=1 if args.curriculum else 0 - ) - for i in range(args.num_envs) -@@ -324,22 +322,6 @@ if __name__ == "__main__": - ) - envs = wrap_vecenv(envs) - -- test_eval_envs = gym.vector.AsyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, num_levels=0) -- for i in range(args.num_eval_episodes) -- ] -- ) -- test_eval_envs = wrap_vecenv(test_eval_envs) -- -- train_eval_envs = gym.vector.AsyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, num_levels=200) -- for i in range(args.num_eval_episodes) -- ] -- ) -- train_eval_envs = wrap_vecenv(train_eval_envs) -- - assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" - print("Creating agent") - agent = ProcgenAgent( -@@ -500,8 +482,18 @@ if __name__ == "__main__": - explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y - - # Evaluate agent -- mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = fast_level_replay_evaluate(test_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=0) -- mean_train_returns, stddev_train_returns, normalized_mean_train_returns = fast_level_replay_evaluate(train_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=200) -+ mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 -+ ) -+ full_mean_eval_returns, full_stddev_eval_returns, full_normalized_mean_eval_returns = full_level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 -+ ) -+ mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 -+ ) -+ full_mean_train_returns, full_stddev_train_returns, full_normalized_mean_train_returns = full_level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 -+ ) - - # TRY NOT TO MODIFY: record rewards for plotting purposes - writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) -@@ -515,12 +507,21 @@ if __name__ == "__main__": - writer.add_scalar("losses/explained_variance", explained_var, global_step) - print("SPS:", int(global_step / (time.time() - start_time))) - writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) -+ - writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) - writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) -- writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/stddev_eval_return", stddev_eval_returns, global_step) -+ writer.add_scalar("test_eval/full_mean_episode_return", full_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/full_normalized_mean_eval_return", full_normalized_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/full_stddev_eval_return", full_stddev_eval_returns, global_step) -+ - writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) - writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) -- writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) -+ writer.add_scalar("train_eval/stddev_train_return", stddev_train_returns, global_step) -+ writer.add_scalar("train_eval/full_mean_episode_return", full_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/full_normalized_mean_train_return", full_normalized_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/full_stddev_train_return", full_stddev_train_returns, global_step) -+ - writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) - - envs.close() -diff --git a/syllabus/examples/utils/vecenv.py b/syllabus/examples/utils/vecenv.py -index 6e5a0a9..af3b187 100644 ---- a/syllabus/examples/utils/vecenv.py -+++ b/syllabus/examples/utils/vecenv.py -@@ -1,7 +1,6 @@ - import time - from collections import deque - --import gym - import numpy as np - - -@@ -154,12 +153,20 @@ class VecEnvObservationWrapper(VecEnvWrapper): - pass - - def reset(self): -- obs, infos = self.venv.reset() -+ outputs = self.venv.reset() -+ if len(outputs) == 2: -+ obs, infos = outputs -+ else: -+ obs, infos = outputs, {} - return self.process(obs), infos - - def step_wait(self): -- print(self.venv) -- obs, rews, terms, truncs, infos = self.venv.step_wait() -+ env_outputs = self.venv.step_wait() -+ if len(env_outputs) == 4: -+ obs, rews, terms, infos = env_outputs -+ truncs = np.zeros_like(terms) -+ else: -+ obs, rews, terms, truncs, infos = env_outputs - return self.process(obs), rews, terms, truncs, infos - - -@@ -209,7 +216,10 @@ class VecNormalize(VecEnvWrapper): - - def reset(self, seed=None): - self.ret = np.zeros(self.num_envs) -- obs, infos = self.venv.reset(seed=seed) -+ if seed is not None: -+ obs, infos = self.venv.reset(seed=seed) -+ else: -+ obs, infos = self.venv.reset() - return self._obfilt(obs), infos - - -@@ -228,7 +238,10 @@ class VecMonitor(VecEnvWrapper): - self.eplen_buf = deque([], maxlen=keep_buf) - - def reset(self, seed=None): -- obs, infos = self.venv.reset(seed=seed) -+ if seed is not None: -+ obs, infos = self.venv.reset(seed=seed) -+ else: -+ obs, infos = self.venv.reset() - self.eprets = np.zeros(self.num_envs, 'f') - self.eplens = np.zeros(self.num_envs, 'i') - return obs, infos -@@ -239,7 +252,8 @@ class VecMonitor(VecEnvWrapper): - self.eprets += rews - self.eplens += 1 - # Convert dict of lists to list of dicts -- infos = [dict(zip(infos, t)) for t in zip(*infos.values())] -+ if isinstance(infos, dict): -+ infos = [dict(zip(infos, t)) for t in zip(*infos.values())] - newinfos = list(infos[:]) - for i in range(len(dones)): - if dones[i]: -diff --git a/syllabus/task_space/task_space.py b/syllabus/task_space/task_space.py -index 316e2f2..1ef674b 100644 ---- a/syllabus/task_space/task_space.py -+++ b/syllabus/task_space/task_space.py -@@ -7,20 +7,53 @@ from gymnasium.spaces import Box, Dict, Discrete, MultiBinary, MultiDiscrete, Sp - - class TaskSpace(): - def __init__(self, gym_space: Union[Space, int], tasks=None): -- if isinstance(gym_space, int): -- # Syntactic sugar for discrete space -- gym_space = Discrete(gym_space) -+ -+ if not isinstance(gym_space, Space): -+ gym_space = self._create_gym_space(gym_space) - - self.gym_space = gym_space - -- # Autogenerate task names for discrete spaces -- if isinstance(gym_space, Discrete): -- if tasks is None: -- tasks = range(gym_space.n) -+ # Autogenerate task names -+ if tasks is None: -+ tasks = self._generate_task_names(gym_space) - - self._tasks = set(tasks) if tasks is not None else None - self._encoder, self._decoder = self._make_task_encoder(gym_space, tasks) - -+ def _create_gym_space(self, gym_space): -+ if isinstance(gym_space, int): -+ # Syntactic sugar for discrete space -+ gym_space = Discrete(gym_space) -+ elif isinstance(gym_space, tuple): -+ # Syntactic sugar for discrete space -+ gym_space = MultiDiscrete(gym_space) -+ elif isinstance(gym_space, list): -+ # Syntactic sugar for tuple space -+ spaces = [] -+ for i, value in enumerate(gym_space): -+ spaces[i] = self._create_gym_space(value) -+ gym_space = Tuple(spaces) -+ elif isinstance(gym_space, dict): -+ # Syntactic sugar for dict space -+ spaces = {} -+ for key, value in gym_space.items(): -+ spaces[key] = self._create_gym_space(value) -+ gym_space = Dict(spaces) -+ return gym_space -+ -+ def _generate_task_names(self, gym_space): -+ if isinstance(gym_space, Discrete): -+ tasks = tuple(range(gym_space.n)) -+ elif isinstance(gym_space, MultiDiscrete): -+ tasks = [tuple(range(dim)) for dim in gym_space.nvec] -+ elif isinstance(gym_space, Tuple): -+ tasks = [self._generate_task_names(value) for value in gym_space.spaces] -+ elif isinstance(gym_space, Dict): -+ tasks = {key: tuple(self._generate_task_names(value)) for key, value in gym_space.spaces.items()} -+ else: -+ tasks = None -+ return tasks -+ - def _make_task_encoder(self, space, tasks): - if isinstance(space, Discrete): - assert space.n == len(tasks), f"Number of tasks ({space.n}) must match number of discrete options ({len(tasks)})" -@@ -28,14 +61,46 @@ class TaskSpace(): - self._decode_map = {i: task for i, task in enumerate(tasks)} - encoder = lambda task: self._encode_map[task] if task in self._encode_map else None - decoder = lambda task: self._decode_map[task] if task in self._decode_map else None -+ -+ elif isinstance(space, Box): -+ encoder = lambda task: task if space.contains(np.asarray(task, dtype=space.dtype)) else None -+ decoder = lambda task: task if space.contains(np.asarray(task, dtype=space.dtype)) else None - elif isinstance(space, Tuple): -- for i, task in enumerate(tasks): -- assert self.count_tasks(space.spaces[i]) == len(task), "Each task must have number of components equal to Tuple space length. Got {len(task)} components and space length {self.count_tasks(space.spaces[i])}." -+ -+ assert len(space.spaces) == len(tasks), f"Number of task ({len(space.spaces)})must match options in Tuple ({len(tasks)})" - results = [list(self._make_task_encoder(s, t)) for (s, t) in zip(space.spaces, tasks)] - encoders = [r[0] for r in results] - decoders = [r[1] for r in results] - encoder = lambda task: [e(t) for e, t in zip(encoders, task)] - decoder = lambda task: [d(t) for d, t in zip(decoders, task)] -+ -+ elif isinstance(space, MultiDiscrete): -+ assert len(space.nvec) == len(tasks), f"Number of steps in a tasks ({len(space.nvec)}) must match number of discrete options ({len(tasks)})" -+ -+ combinations = [p for p in itertools.product(*tasks)] -+ encode_map = {task: i for i, task in enumerate(combinations)} -+ decode_map = {i: task for i, task in enumerate(combinations)} -+ -+ encoder = lambda task: encode_map[task] if task in encode_map else None -+ decoder = lambda task: decode_map[task] if task in decode_map else None -+ -+ elif isinstance(space, Dict): -+ -+ def helper(task, spaces, tasks, action="encode"): -+ # Iteratively encodes or decodes each space in the dictionary -+ output = {} -+ if (isinstance(spaces, dict) or isinstance(spaces, Dict)): -+ for key, value in spaces.items(): -+ if (isinstance(value, dict) or isinstance(value, Dict)): -+ temp = helper(task[key], value, tasks[key], action) -+ output.update({key: temp}) -+ else: -+ encoder, decoder = self._make_task_encoder(value, tasks[key]) -+ output[key] = encoder(task[key]) if action == "encode" else decoder(task[key]) -+ return output -+ -+ encoder = lambda task: helper(task, space.spaces, tasks, "encode") -+ decoder = lambda task: helper(task, space.spaces, tasks, "decode") - else: - encoder = lambda task: task - decoder = lambda task: task -@@ -152,6 +217,7 @@ class TaskSpace(): - return Discrete(self.gym_space.n + amount) - - def sample(self): -+ assert isinstance(self.gym_space, Discrete) or isinstance(self.gym_space, Box) or isinstance(self.gym_space, Dict) or isinstance(self.gym_space, Tuple) - return self.decode(self.gym_space.sample()) - - def list_tasks(self): -diff --git a/syllabus/task_space/test_task_space.py b/syllabus/task_space/test_task_space.py -index 0ec6b4e..109d0a7 100644 ---- a/syllabus/task_space/test_task_space.py -+++ b/syllabus/task_space/test_task_space.py -@@ -2,33 +2,148 @@ import gymnasium as gym - from syllabus.task_space import TaskSpace - - if __name__ == "__main__": -+ # Discrete Tests - task_space = TaskSpace(gym.spaces.Discrete(3), ["a", "b", "c"]) -+ - assert task_space.encode("a") == 0, f"Expected 0, got {task_space.encode('a')}" - assert task_space.encode("b") == 1, f"Expected 1, got {task_space.encode('b')}" - assert task_space.encode("c") == 2, f"Expected 2, got {task_space.encode('c')}" -- assert task_space.encode("d") == None, f"Expected None, got {task_space.encode('d')}" -+ assert task_space.encode("d") is None, f"Expected None, got {task_space.encode('d')}" - - assert task_space.decode(0) == "a", f"Expected a, got {task_space.decode(0)}" - assert task_space.decode(1) == "b", f"Expected b, got {task_space.decode(1)}" - assert task_space.decode(2) == "c", f"Expected c, got {task_space.decode(2)}" -- assert task_space.decode(3) == None, f"Expected None, got {task_space.decode(3)}" -+ assert task_space.decode(3) is None, f"Expected None, got {task_space.decode(3)}" - print("Discrete tests passed!") - -+ # MultiDiscrete Tests -+ task_space = TaskSpace(gym.spaces.MultiDiscrete([3, 2]), [("a", "b", "c"), (1, 0)]) -+ -+ assert task_space.encode(('a', 1)) == 0, f"Expected 0, got {task_space.encode(('a', 1))}" -+ assert task_space.encode(('b', 0)) == 3, f"Expected 3, got {task_space.encode(('b', 0))}" -+ assert task_space.encode(('c', 1)) == 4, f"Expected 4, got {task_space.encode(('c', 1))}" -+ -+ assert task_space.decode(3) == ('b', 0), f"Expected ('b', 0), got {task_space.decode(3)}" -+ assert task_space.decode(5) == ('c', 0), f"Expected ('c', 0), got {task_space.decode(5)}" -+ print("MultiDiscrete tests passed!") -+ -+ # Box Tests - task_space = TaskSpace(gym.spaces.Box(low=0, high=1, shape=(2,)), [(0, 0), (0, 1), (1, 0), (1, 1)]) -+ - assert task_space.encode([0.0, 0.0]) == [0.0, 0.0], f"Expected [0.0, 0.0], got {task_space.encode([0.0, 0.0])}" - assert task_space.encode([0.0, 0.1]) == [0.0, 0.1], f"Expected [0.0, 0.1], got {task_space.encode([0.0, 0.1])}" - assert task_space.encode([0.1, 0.1]) == [0.1, 0.1], f"Expected [0.1, 0.1], got {task_space.encode([0.1, 0.1])}" - assert task_space.encode([1.0, 0.1]) == [1.0, 0.1], f"Expected [1.0, 0.1], got {task_space.encode([1.0, 0.1])}" - assert task_space.encode([1.0, 1.0]) == [1.0, 1.0], f"Expected [1.0, 1.0], got {task_space.encode([1.0, 1.0])}" -- assert task_space.encode([1.2, 1.0]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" -- assert task_space.encode([1.0, 1.2]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" -- assert task_space.encode([-0.1, 1.0]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" -+ assert task_space.encode([1.2, 1.0]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" -+ assert task_space.encode([1.0, 1.2]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" -+ assert task_space.encode([-0.1, 1.0]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" - - assert task_space.decode([1.0, 1.0]) == [1.0, 1.0], f"Expected [1.0, 1.0], got {task_space.decode([1.0, 1.0])}" - assert task_space.decode([0.1, 0.1]) == [0.1, 0.1], f"Expected [0.1, 0.1], got {task_space.decode([0.1, 0.1])}" -- assert task_space.decode([-0.1, 1.0]) == None, f"Expected None, got {task_space.decode([1.2, 1.0])}" -+ assert task_space.decode([-0.1, 1.0]) is None, f"Expected None, got {task_space.decode([1.2, 1.0])}" - print("Box tests passed!") - -+ # Tuple Tests -+ task_spaces = (gym.spaces.MultiDiscrete([3, 2]), gym.spaces.Discrete(3)) -+ task_names = ((("a", "b", "c"), (1, 0)), ("X", "Y", "Z")) -+ task_space = TaskSpace(gym.spaces.Tuple(task_spaces), task_names) -+ -+ assert task_space.encode((('a', 0), 'Y')) == [1, 1], f"Expected 0, got {task_space.encode((('a', 1),'Y'))}" -+ assert task_space.decode([0, 1]) == [('a', 1), 'Y'], f"Expected 0, got {task_space.decode([0, 1])}" -+ print("Tuple tests passed!") -+ -+ # Dictionary Tests -+ task_spaces = gym.spaces.Dict({ -+ "ext_controller": gym.spaces.MultiDiscrete([5, 2, 2]), -+ "inner_state": gym.spaces.Dict( -+ { -+ "charge": gym.spaces.Discrete(10), -+ "system_checks": gym.spaces.Tuple((gym.spaces.MultiDiscrete([3, 2]), gym.spaces.Discrete(3))), -+ "job_status": gym.spaces.Dict( -+ { -+ "task": gym.spaces.Discrete(5), -+ "progress": gym.spaces.Box(low=0, high=1, shape=(2,)), -+ } -+ ), -+ } -+ ), -+ }) -+ task_names = { -+ "ext_controller": [("a", "b", "c", "d", "e"), (1, 0), ("X", "Y")], -+ "inner_state": { -+ "charge": [0, 1, 13, 3, 94, 35, 6, 37, 8, 9], -+ "system_checks": ((("a", "b", "c"), (1, 0)), ("X", "Y", "Z")), -+ "job_status": { -+ "task": ["A", "B", "C", "D", "E"], -+ "progress": [(0, 0), (0, 1), (1, 0), (1, 1)], -+ } -+ } -+ } -+ task_space = TaskSpace(task_spaces, task_names) -+ -+ test_val = { -+ "ext_controller": ('b', 1, 'X'), -+ 'inner_state': { -+ 'charge': 1, -+ 'system_checks': [('a', 0), 'Y'], -+ 'job_status': {'task': 'C', 'progress': [0.0, 0.0]} -+ } -+ } -+ decode_val = { -+ "ext_controller": 4, -+ "inner_state": { -+ "charge": 1, -+ "system_checks": [1, 1], -+ "job_status": {"progress": [0.0, 0.0], "task": 2}, -+ }, -+ } -+ -+ assert task_space.encode(test_val) == decode_val, f"Expected {decode_val}, \n but got {task_space.encode(test_val)}" -+ assert task_space.decode(decode_val) == test_val, f"Expected {test_val}, \n but got {task_space.decode(decode_val)}" -+ -+ test_val_2 = { -+ "ext_controller": ("e", 1, "Y"), -+ "inner_state": { -+ "charge": 37, -+ "system_checks": [("b", 0), "Z"], -+ "job_status": {"progress": [0.0, 0.1], "task": "D"}, -+ }, -+ } -+ decode_val_2 = { -+ "ext_controller": 17, -+ "inner_state": { -+ "charge": 7, -+ "system_checks": [3, 2], -+ "job_status": {"progress": [0.0, 0.1], "task": 3}, -+ }, -+ } -+ -+ assert task_space.encode(test_val_2) == decode_val_2, f"Expected {decode_val_2}, \n but got {task_space.encode(test_val_2)}" -+ assert task_space.decode(decode_val_2) == test_val_2, f"Expected {test_val_2}, \n but got {task_space.decode(decode_val_2)}" -+ -+ test_val_3 = { -+ "ext_controller": ("e", 1, "X"), -+ "inner_state": { -+ "charge": 8, -+ "system_checks": [("c", 0), "X"], -+ "job_status": {"progress": [0.5, 0.1], "task": "E"}, -+ }, -+ } -+ decode_val_3 = { -+ "ext_controller": 16, -+ "inner_state": { -+ "charge": 8, -+ "system_checks": [5, 0], -+ "job_status": {"progress": [0.5, 0.1], "task": 4}, -+ }, -+ } -+ -+ assert task_space.encode(test_val_3) == decode_val_3, f"Expected {decode_val_3}, \n but got {task_space.encode(test_val_3)}" -+ assert task_space.decode(decode_val_3) == test_val_3, f"Expected {test_val_3}, \n but got {task_space.decode(decode_val_3)}" -+ -+ print("Dictionary tests passed!") -+ - # Test syntactic sugar - task_space = TaskSpace(3) - assert task_space.encode(0) == 0, f"Expected 0, got {task_space.encode(0)}" -@@ -36,4 +151,32 @@ if __name__ == "__main__": - assert task_space.encode(2) == 2, f"Expected 2, got {task_space.encode(2)}" - assert task_space.encode(3) is None, f"Expected None, got {task_space.encode(3)}" - -+ task_space = TaskSpace((2, 4)) -+ assert task_space.encode((0, 0)) == 0, f"Expected 0, got {task_space.encode((0, 0))}" -+ assert task_space.encode((0, 1)) == 1, f"Expected 1, got {task_space.encode((0, 1))}" -+ assert task_space.encode((1, 0)) == 4, f"Expected 2, got {task_space.encode((1, 0))}" -+ assert task_space.encode((3, 3)) is None, f"Expected None, got {task_space.encode((3, 3))}" -+ -+ task_space = TaskSpace((2, 4)) -+ assert task_space.encode((0, 0)) == 0, f"Expected 0, got {task_space.encode((0, 0))}" -+ assert task_space.encode((0, 1)) == 1, f"Expected 1, got {task_space.encode((0, 1))}" -+ assert task_space.encode((1, 0)) == 4, f"Expected 2, got {task_space.encode((1, 0))}" -+ assert task_space.encode((3, 3)) is None, f"Expected None, got {task_space.encode((3, 3))}" -+ -+ task_space = TaskSpace({"map": 5, "level": (4, 10), "difficulty": 3}) -+ -+ encoding = task_space.encode({"map": 0, "level": (0, 0), "difficulty": 0}) -+ expected = {"map": 0, "level": 0, "difficulty": 0} -+ -+ encoding = task_space.encode({"map": 4, "level": (3, 9), "difficulty": 2}) -+ expected = {"map": 4, "level": 39, "difficulty": 2} -+ assert encoding == expected, f"Expected {expected}, got {encoding}" -+ -+ encoding = task_space.encode({"map": 2, "level": (2, 0), "difficulty": 1}) -+ expected = {"map": 2, "level": 20, "difficulty": 1} -+ assert encoding == expected, f"Expected {expected}, got {encoding}" -+ -+ encoding = task_space.encode({"map": 5, "level": (2, 11), "difficulty": -1}) -+ expected = {"map": None, "level": None, "difficulty": None} -+ assert encoding == expected, f"Expected {expected}, got {encoding}" - print("All tests passed!") -diff --git a/syllabus/tests/utils.py b/syllabus/tests/utils.py -index 314a29c..98bac82 100644 ---- a/syllabus/tests/utils.py -+++ b/syllabus/tests/utils.py -@@ -57,7 +57,7 @@ def run_episode(env, new_task=None, curriculum=None, env_id=0): - action = env.action_space.sample() - obs, rew, term, trunc, info = env.step(action) - if curriculum and curriculum.requires_step_updates: -- curriculum.update_on_step(obs, rew, term, trunc, info, env_id=env_id) -+ curriculum.update_on_step(env.task_space.encode(env.task), obs, rew, term, trunc, info, env_id=env_id) - curriculum.update_task_progress(env.task_space.encode(env.task), info["task_completion"], env_id=env_id) - ep_rew += rew - ep_len += 1 -@@ -87,7 +87,7 @@ def run_set_length(env, curriculum=None, episodes=None, steps=None, env_id=0, en - action = env.action_space.sample() - obs, rew, term, trunc, info = env.step(action) - if curriculum and curriculum.requires_step_updates: -- curriculum.update_on_step(obs, rew, term, trunc, info, env_id=env_id) -+ curriculum.update_on_step(env.task_space.encode(env.task), obs, rew, term, trunc, info, env_id=env_id) - curriculum.update_task_progress(env.task_space.encode(env.task), info["task_completion"], env_id=env_id) - ep_rew += rew - ep_len += 1 -diff --git a/tests/multiprocessing_smoke_tests.py b/tests/multiprocessing_smoke_tests.py -index 9db9f47..b788179 100644 ---- a/tests/multiprocessing_smoke_tests.py -+++ b/tests/multiprocessing_smoke_tests.py -@@ -21,23 +21,23 @@ nethack_env = create_nethack_env() - cartpole_env = create_cartpole_env() - - curricula = [ -- (NoopCurriculum, create_nethack_env, (NetHackScore, nethack_env.task_space), {}), -- (DomainRandomization, create_nethack_env, (nethack_env.task_space,), {}), -- # (LearningProgressCurriculum, create_nethack_env, (nethack_env.task_space,), {}), -- (CentralizedPrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space,), {"device": "cpu", "suppress_usage_warnings": True, "num_processes": N_ENVS}), -- (PrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space, nethack_env.observation_space), { -- "get_value": get_test_values, -- "device": "cpu", -- "num_processes": N_ENVS, -- "num_steps": 2048 -- }), -- (SimpleBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), {}), -- (AnnealingBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), { -- 'start_values': [-0.02, 0.02], -- 'end_values': [-0.3, 0.3], -- 'total_steps': [10] -- }), -- (SequentialCurriculum, create_nethack_env, ([CentralizedPrioritizedLevelReplay(nethack_env.task_space, device="cpu", suppress_usage_warnings=True, num_processes=N_ENVS), PrioritizedLevelReplay(nethack_env.task_space, nethack_env.observation_space, get_value=get_test_values, device="cpu", num_processes=N_ENVS, num_steps=2048), NetHackScore, [NetHackScout, NetHackStaircase]], ["steps>1000", "episodes>=50", "tasks>20"], nethack_env.task_space), {}), -+ (NoopCurriculum, create_nethack_env, (NetHackScore, nethack_env.task_space), {}), -+ (DomainRandomization, create_nethack_env, (nethack_env.task_space,), {}), -+ # (LearningProgressCurriculum, create_nethack_env, (nethack_env.task_space,), {}), -+ (CentralizedPrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space,), {"device": "cpu", "suppress_usage_warnings": True, "num_processes": N_ENVS}), -+ (PrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space, nethack_env.observation_space), { -+ "get_value": get_test_values, -+ "device": "cpu", -+ "num_processes": N_ENVS, -+ "num_steps": 2048 -+ }), -+ (SimpleBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), {}), -+ (AnnealingBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), { -+ 'start_values': [-0.02, 0.02], -+ 'end_values': [-0.3, 0.3], -+ 'total_steps': [10] -+ }), -+ (SequentialCurriculum, create_nethack_env, ([CentralizedPrioritizedLevelReplay(nethack_env.task_space, device="cpu", suppress_usage_warnings=True, num_processes=N_ENVS), PrioritizedLevelReplay(nethack_env.task_space, nethack_env.observation_space, get_value=get_test_values, device="cpu", num_processes=N_ENVS, num_steps=2048), NetHackScore, [NetHackScout, NetHackStaircase]], ["steps>1000", "episodes>=50", "tasks>20"], nethack_env.task_space), {}), - ] - - test_names = [curriculum_args[0].__name__ for curriculum_args in curricula] diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/wandb-metadata.json b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/wandb-metadata.json deleted file mode 100644 index df99c501..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/wandb-metadata.json +++ /dev/null @@ -1,167 +0,0 @@ -{ - "os": "Linux-3.10.0-1160.11.1.el7.x86_64-x86_64-with-glibc2.10", - "python": "3.8.5", - "heartbeatAt": "2024-04-23T02:52:49.357411", - "startedAt": "2024-04-23T02:52:48.774085", - "docker": null, - "cuda": "10.1.243", - "args": [ - "--curriculum", - "True", - "--track", - "True", - "--env-id", - "bigfish" - ], - "state": "running", - "program": "cleanrl_procgen_plr.py", - "codePathLocal": "cleanrl_procgen_plr.py", - "codePath": "syllabus/examples/training_scripts/cleanrl_procgen_plr.py", - "git": { - "remote": "https://github.com/RoseyGreenBlue/Syllabus.git", - "commit": "63dc8f62e4d9d567eb92bb2f6c2bb186a0dc8ffb" - }, - "email": "djhaayusv04@gmail.com", - "root": "/data/averma/MARL/Syllabus", - "host": "f411843fc70b", - "username": "root", - "executable": "/home/user/miniconda/envs/test2_py/bin/python", - "cpu_count": 12, - "cpu_count_logical": 24, - "cpu_freq": { - "current": 1328.0466666666666, - "min": 1200.0, - "max": 3700.0 - }, - "cpu_freq_per_core": [ - { - "current": 1200.292, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1223.12, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.5, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1202.368, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1493.518, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1231.213, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.5, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1237.023, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1261.096, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1385.815, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1214.819, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1288.073, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1245.947, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1352.197, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1891.748, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1248.229, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1375.024, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1279.772, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 2322.143, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1231.835, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1364.648, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1199.877, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.915, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1284.96, - "min": 1200.0, - "max": 3700.0 - } - ], - "disk": { - "/": { - "total": 5952.626953125, - "used": 988.7798233032227 - } - }, - "memory": { - "total": 251.63711166381836 - } -} diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/wandb-summary.json b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/wandb-summary.json deleted file mode 100644 index b7bc9960..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/files/wandb-summary.json +++ /dev/null @@ -1 +0,0 @@ -{"global_step": 32704, "_timestamp": 1713840830.7411258, "_runtime": 61.95500087738037, "_step": 194, "charts/episodic_return": 0.0, "charts/episodic_length": 20.0, "curriculum/proportion_seen": 0.2800000011920929, "curriculum/score": 0.0, "charts/learning_rate": 0.0005000000237487257, "charts/episode_returns": 1.5, "losses/value_loss": 0.5320312976837158, "losses/policy_loss": -0.0004545035772025585, "losses/entropy": 2.707151412963867, "losses/old_approx_kl": 0.00045591534581035376, "losses/approx_kl": 0.0008768583065830171, "losses/clipfrac": 0.0, "losses/explained_variance": 0.004385650157928467, "charts/SPS": 383.0, "test_eval/mean_episode_return": 0.5, "test_eval/normalized_mean_eval_return": -0.012820512987673283, "test_eval/stddev_eval_return": 0.9219544529914856, "test_eval/full_mean_episode_return": 0.699999988079071, "test_eval/full_normalized_mean_eval_return": -0.007692308165132999, "test_eval/full_stddev_eval_return": 0.6403123736381531, "train_eval/mean_episode_return": 1.100000023841858, "train_eval/normalized_mean_train_return": 0.0025641031097620726, "train_eval/stddev_train_return": 2.662705183029175, "train_eval/full_mean_episode_return": 0.699999988079071, "train_eval/full_normalized_mean_train_return": -0.007692308165132999, "train_eval/full_stddev_train_return": 0.7810249924659729, "curriculum/completed_episodes": 103.0, "_wandb": {"runtime": 77}} \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/run-x38taylu.wandb b/syllabus/examples/training_scripts/wandb/run-20240423_025248-x38taylu/run-x38taylu.wandb deleted file mode 100644 index a0ecdb3ffbbb4ce7a0d34b7da4ae641a3f4942b3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 97707 zcmcg#2YeLO)^|fdTtOi|Tb?DNsGx3kW_PDx$KLhXeQI2i4J4+SLXn5YfDn2_N>ECu zh9boRp@>Y5vzw4Rvoo8_PQLu!Q(?1n=YP(*r{8lsy?A5i z-*ZyiTM|8-JKoEAWpAQO;#?kgLZVBS<7e8=Ti(iU zZ`so&WqW(2$neNLxn5nwOmUEHX3KL=if)UTW{%I^+8!C7li;oW<3;x7*_qz>H*5cV zojt0DH!YzHJwDkNpB-4%-mdPA?3dbG`G3WimYtN{3*Su3NlCF^Zf_m$&B)Hl^u={g zO7NxU*_tPMQ?h*aOY9e>q<8I_l-4yaAt^J@)~ubQo&8_-7QVFZaY=aP3)*!_>YA98 z)vX;~{o?HOY;Q_jc2cS@E8CZmm1k?odz{LT{R(?)V9!jWCuMrGL5&tp2R`!HFSfT# z%SnyH6Y+%z!EJAAzkvQC@Q7x-N5E@b;7v>O;o&KGiNL$HvN!AM#e@8`x!B&kt2Z?j z#A)ty$g+&@*zs{(iZ`{3LNffV2ly|hXA@z&c(dcX`LL!?toRl1V*T3$+?QPHfWn8K+yIVS#)V!S{XII$UDunJ1GWs%-veFX>eFWzOL+uJR z;YYd#ZQ4cswUA!<*f$U8fl353#l4 z7VGQjP0dL0WyL~FL2KD{*lg{wH#HJEtg! z$*mII94EP5iSeSd%OG2;mboj(&2Q1BY|^2>B0d6JTeh@S+iWGxY|$5Xv*qHyY_`G( zTf62ihve{dgaC4aL+BiFvn{z*N>W-*&)a%RLY&~9X?w-)=$4(Gk<}?Swrf&$x127H z`1I7+|D}b||`p zq{KuAm=xd5ewY1@oQ$mOOrJLu0+pBuz+PGMVMS-6$K#d-r^_jG2{P{zL>HGJaEU%= zmw2DV`@cVU?=uH`;*}#`>3oj9Q6?$fAYIK+?4`oE=}%z!=8s-(zr+#ih|SDN%Zlxd ze=6FpPn(A6f99Eozk9sdlXzx}j*hm)H2!hfiBaB1@wk@B|El-6;(~9YqnpKN;Ts>v3ayea ztG~jy)n_B3BNNiop!*u&J20~I<)}07J%UHJPQIkxBlG9&!6VJk@$7Yf9!SLVE=qPP zd>)|4V!?Z}lcKMEKFd#lr1Z?BY-s9)m=usUB_`9Ck>c%Tf*!+vedQ&Rfr|q&;9sn2 z-^wIY?>@#czVowqT$|*p>%DvF+!=30x4IAdCYxR#cE0gcp77HJhv`MzGA~-PVaZ#j zUUd7ze~)|tkBdrfJ_XPFSF)hI>>%Z3t0pX+V2i$?Yf5?-m~--#?u1pmHO6n)lj7qteQ@W} z?u75rcnH;9yL>|sE)78NFIk=YBbGD+;rXxsbsIsrjDe85h-MJv=KXqw3W2~|B}ehUzLQ4?{>>mLow)A`QwScoe(h_7;3@_| z$p^F3Sp+W5C1`oTnb`@83MLIC1XnW%%6BYEH-(_Xoe2@Cc-%F~qOz9*Qnc#RaW)kM zmu%HexH*#lFYSbD13MugMjIDkO*1>;$!mO*2}2Bnp?v4iPt+LP9;?{!^gl;9!f+jf zp?t*{tZ4?rj={SV2tzvt!@T!@p0CE>;e%{vQ=;_1=7OPw;d%x`<<=co(+q|`Z@c?L z!f*qFq2T@9E7ceTx1`+;rVPWZLHm9t4DA^V72p4XHO*jnvc;i~-?!OX-k7{T0v?}V zG%IE;-fD~fDk7F*pjdBryhcr$na$%n(V}T zT%BT`&+=uy=ubR!ig_TddrVSVg0E+c;EqYkis?#Ti7zuIyPG!+|M0~iK<?dqY4AF|vwSIu zjyQ_Z$;nj~@53$~cxFudm|(wK_g?KfnDF4EI$yAv5enS|js219AT zS-BA^J0}ZP>89Q{K3zc=91MoS@n2$1GZ>!ach4pau?&Wie#<7RFu1v3E}tn`io7M~ z6T;wRFcgm}!kELf63<86pvZM%Wn!#{ao5u$d29d!~J!RxV6$XzpnBQm0J}k)p_ekP{#9$~} zJ`QV|!5~gAE1?KN4nz<`{IT@VGaXnqcs#+>k||MIe_;Q26hU+fL=gTZi{}jJXbQoQ z)PG(k1h+E?ibpJeMukAKEQ(7VdVeY*xPw7ZRkHROQwUO5+8s(cKA7lY_jQxG2O-1Y+6%bf#zIj|8{Ax zV|@G8Js%K;`xp!*g`aL$WAIp}Q^!`k_AkP4KZ7B6@0VE9j6QlUs*@7KJ-}e7oV)d~ z8iQzA96GaQ(ILX{AcLWL&rYmq21Cm`p35aZJj7tAIdNpP%03G+XO%4dQ24?vgyCTZ zLs8LbtZ4?rzu&)i7h!mW!BDwm&U6h1%VM9GzU?=RFg(g&C@fisHO*l7{qFb25{AbZ z3{@o)=BY4L}tFckbg9c!AwaPaEuqY1+k42GJW1DB{VaF)eB3q6S| z2*Z;MhN2HfU`;a^z7nT=K^UH5Fq99S{Hq!RF9(UEO*t9+_lq|WhNl?}r6cBHO*0rS ze*69-gy9(mL*?+nr_~tTmchLW<;SknxK4<9}lLl~ZCFjNnmJynfCbX%1R*j;?(ZG_nX+3?K|IG8oEM&t0a*AO-U^Oznqx1LCF-hL;!&1tS(=O*0shdc+qKhL;%( zC7TbIsWHgGN)M*|rO`WI?@bt9VKB@aS%WprVAyq2nvyFo1O=Tt?D>|%O{sK=3x-bYzcg()m6oNVXhRr1e@eG3Ep)(#= zA@Eq{u1@Q8?P)@gz#u4`@cHAW5Zp2H#&km9V-V#2)&C6@0wI{AXG$5pd?5aBLXgNH zs2*7GhA9MV?iuJIUDP$8i~QTFsB}0Q4487d?6fS6@#JG$qDY^13+Pk-nz{W8O*S=* zKicvR!jQyZsMz$wXKD;wF#p13MjbtV{|hS#Lo$P*@YEKpX~rf@x_?M_!tf@8q4dC( z_f&o!x&niwGECX#>(4)WKVe8=G4$JwHO*k?F`%*}4bMwWmXw;hpRXkoru`BTePjLB z49>{~+tH@%!p2U%Ed(Ns0a3PoE*3Qd;>m7XzaS9l42Y@^-#?}S!CSVef3rnzB@h`5 zh@ygFSkw%N%kI0R4}o}#0g=0B+L&f)PPi>=^jVp6=2ilc$$%)?_&F9e1LBp|RZ2P| zOW6_^wj8FoEMNxNnTw5xy3s(qW=n1XP4SnmC_t8*9#rOSr3 zS3%&M!8~qL8)8_Wc^?vl?hJ(L5684O1wlOf!(M{WgN3kp#(ioCd@yI#6oe~>t~x>x zdNL3SkI%o)6ohMTihhK4L$APY@H4ffY;mqljlmPFbI+7=9e$vNlXk<~f!*L=w6b~y z7B#aQ{#fwH;{>8N17hC5QNuMLtm`uI^*A(uK>UvZQ9XMC7BvH6&1-v}BM^NU5CwVL zVfk4LffnT;DN0kEh@CdiLm=K^K;$p1GU;x-B!9M#K)lOq7H za5x^kmSFT{VN~TESfhbqU6)4l%X=3RjD9SP>cPjcsu>tVuDbJag7H2JqipiX9cmc7 zGnmCT<;4Cc<%L-UqdyCy@}F^7)eMYweGAVg7z0=sC4X-DLkq*QoNK|%!b=H8J`1CK z!%nPf2F8bzn|CG{16dfkfBfE8?F90?Wnq!6-J*^JV-O3YcjNvSdvd?}f(ZKLn=FoQDx$P2yQNY3|t=@uF&A<@vJoz%g z_<)5`wPC=w8WqkQ$K^;#Igd~>&% ztieCK|7=GvMzS!9_m0P^W?=mAa8^u8W>`*gt#gHy#1V6R}qZSER3oV zeYco{5#MUmWdvgk3!}1l%20K{%3If$)$-a$uOJwOER4d5bFiuzZtSiuR$z=(!B}%- ztQv+ZXls@!ZhRKgTX79PVqxU2KZ#Y%z(^l7ei?D&V-`mBpouFqF!*4}Wm9hYD`T3) z5I4rLFiJMe#Hwas{Qgnj9ptBvw*^Ai;r^3CThFKy2rhT9OpYm+>ZfJB@1p_olkXooU-f^?N{4d#^Go&O*F`}wQ)O; zY>)&ADvcLJr)s3BRf=0a0}&8~z~ZX($fmSm+v zSKerfWYx!8ze^ydvLGrKfA3U-5Q4=6rncb9BeQNI5Yt!?MZf;!GzDVAmp|+$5Yts4 z%D%Zr10q;7V2Ts29Z6XP;u97`{*mwQF$LoE?MDg-L@^7ZX7-_8DiCfb7o=8b3dHj# z-g|^V%wR#3FFMuB6bRo-n;#&RKC@P(`_)p>s+~Wn12eaW58}0%vP_#k$w?%YKC4!x z`&TXOcL1xJQR$sKT)Ti^%vQnp;QfOd7(%eZttn=78nxa-Fg{hm*f12Unt^evxUHCA z%wb^^%>L|@21d|!LsO>dj+1+45RADjjMDd(U{y0Po+x~5d=(xz&la`QhVd(`UT;Q| zOJZ2f-nLG&OAZ%)+@k#&`(^RnyqTCpqB=0GOkr^b`#i;$)-}5uJ{KHL%=L+`J$jat z8jIsv8R?nXN$F{EnBHuZidD?U9FMg%is=;`&$N?Y0Caf^uPIhW}6aGc23 zw=3M}#XGLw2e|WXQM-b`#ffPnxNZ^6uW~Rr{`2917rItUj#{{p9N2C#UO|#2G$D#2 z=f?CFsXku*n%yrRI0(F-*`oH^FqwvBZzP?s*0{@x_%MTm?FXZsrrcBPfneYNKF1BIFsoBw-J+hmz%=@X+y|_$dJh(oYj!g^Wx;(yhnDq zB$vqehn{8A)_I7@Uj}Egi}2{`E!pAXC0-H?5tGy!A@+p7ku_rNaq~F+CQG8^aXX#v z`b-wav?#xi5HHX{jN{=(Oh#lOd1SXCRSHgbv1Mn=43gxJk_ zI9W1A98#>sun}v-THs};=#o4VFJL+mW1!@*TL)Dg0^~)usNaKVYZxCxv33g_OrEjs z3JJLhbeAA{pmw2IUA&Cy0)~dw#1)dC|jS<}Q4D^nAj+LlTAGEtm?}{(JjB*Ab<^(xbFss6;$4wdlnOkJ5PYFb#!9 ziD2|7bS9E~P7x_14?}5JYW%(H$W$x~ZYtNSC#!*7} z;&#JWzzq@^#O?*xe|rs?im!v43O7!m8#zD%0vX8|aLDb{1eOAv(Vf9KbhS>a# z4x3R(F=DeM$#R(1QM4Q~=ir?12I%0N8_^cDdVR=Z+zSym5}TI?XEU9LH)6Al$e%}m zWW?rBqAgnP5dbs9)9H4>MUyZWl-FND?196JfW{?D44e5G@w?VjmUcCp zr7au27>=KrEN%H~->)XQDGM$)G*`fgh%TPvxG;619KIb;1^=AG|m!CNp9BY$PFAAqpji=kPrziHZl6C+UBrQR$F_{Apj}_+R3@#(y^wGcTa2g z-_Cyk^IBU}Rghp9TGVi_cra@kF0!FYB#mbbx(POm$dTs`hH%JgVJ3ZjMtg34A>wtC z?eBDC8*>hf810b|X@{8rXbQ-(?2lJuf%713Qy;Q$|LiyR66EjoAS3Cam%W4;K$I

4Nw-e~~I-oIj!4Nd8i(3d?i!fv~*91Ne?g4S~ zFeq*`c0h|Upyd~SzorKX_lDrYEn-fCk#HlnM}!}q4&y9ORC5m5Ey>U{9!_*pY+X;d zUCz0kPoE@PRUX_{i5Tl(#Amop2u+PKhuqLy`2M?ULnDd9p>%}QJQ{c?q^ggZ>-pNJ zs|oWDI+!sA!idch!k`AYb;hC{zDuI=>i`)^TM3zY4mB~4$S5%vpWE$?gM@jb4rYvv zFyga>qx2NLh7+0!mZ#S6Ksd4(Qvg?%ptG{=nmzwCPndty!Awd|ulEbK3KJaMq0)&V z6*nKgBc_pU4&D;H5+|4}xS6br%YAr7|N98@CLPQGWr!Iu3q3O|K8J6gHL{ItA_}8O zq)VJrfL*D-pFOAh-93Xa{}dcEjj_?z zifLv9il7{$!ra_rFAPg2%-e!vrimX$D$Io(su*S#l@id_R{MDL@FA{;Oe5ywD6?H} zH@o?j>!-()+WsZD+D0k3lsbaSX$PP*958-4ESuuvs;$v66Ie>)wH$CB|s7W`l zg508>h|FS0)JB}PUURz~JW?6h&@e10+u-`T`btdt{C`*WC06g$VKt3VF$9g!MGg@` zhuqTE_dWqNy7|zsfI18ZPm#6J^4OIFlr-Qj9aeKV2&J#bcn3llWS$H;9TK$Day{m9 zxKPM;!cX+bNE0EeSC7>m=fmgU{1LHww;rnzBpb1s7tqQWA}JO!(10Ur`L0H88|@nC zWt4FnFI&O!a9Pv}ftbv^z+?_vy*EQS&N5yV3lD6hoMx!#^mc zk#7{}?PBz8ogQT|^a&V-K{RQPL95I2tY4bg1Cm5FL9hC5`@2l1AEy z3v4u~#`6xrS5oc`BNkuIZ^}KL_-`MQ3iwSkL9%}U88SQB*79nUi+|wal zvnG*m8Odr?Up(YaAbhHi+I76cLsJO#5na^AVWx=8k|#ulkXj53O_wM9g1knRA+?F- zc(T@-!j9m+c+!*%Qe}VYsWMF3Fyb|QU8>3(@;YRkDO;XAa-oKdbS*OA$d3v<(}o~; z?z`)yTmpSm4>Wbi8yIW}{yi+VA?Q%5jJI5WL%7@`f)AW<#tAqIgX_z;uw?2NN|bp_ z2Q&>PG6W4(hP1jN=un(SlULYQOQpc^KpYd4WVh^seDh2~K@jh_xAWb^>En8!VYQ61 zDAd4%0t#6aZ>>qUgqJ{^3ra8_f$dfkT`8~1vgdVFgCdm*rdX_>}J7~`u>Tf6Q zfbhIBjeH~SK#39F6*3|c!C)S*ND}e6Mgt0D%GL3H7X2Yvs!~12iZ{TfIfrm zqU>!w1upVsBchbag(umQp;aU3N>p};GDJPdT>)Tso~>PUjMT;JS*uK5#5(Xqr-|u!;As{{ z<@Ejo)kEem+Q6!7a4sGH$9V+f3=1Q-xB#n~IbCs6jAJ{&ILpE)Kl9#X4UC|-TOl9S zcy8?8bHg5j@ed25qW=)AY6iw(T$eyFaw7uMp#77YDu10kUjqZOsh#Qx?a+7_=l6_! zoM5~c5g2OiU$u1VEUanm4o$-Y^CAKx{|c0me??~psOKtqf?jcD3dP#QdnB6G z)i)wA^3T6$;nD(gW0jiUv$h|B=*NPn+PC^r)&8co4vSbZ=Z)KU5s3F$5T*I$mzv_l z^{-!f6Pe-u5p*0yn3qRIs(-(I|w~cXsuyX zv#V_I^$!#10Xm>*K%9|5Li-#}xr7%`Sl?XYcEAE7PlY*)h#_H`7|`x{!{(f!giwA2 zP2tdTn&!#r`_?W8`sZ-Q)6n$@8Bg%x`_>wNOAwIO;bGsA?`4xu%xXpxlpdMyq zd<-$etcN*6NNNJ%28Vfc)1a{#M-|YhS>NvvF6{qfKk~K)MFelP0G$lO2{gQ-YGqu$ z2^mfZ^c&$h)*5e1baUt!!w@0l_R;;ypcdO*e&Sl%(u4K3G;&-o?91W*2(Euag z&}{C+AtaUZmPfR>b2Z8LP#yUuFVu+BICGBKTVZzkI>S$asZhhegsKs6@Ca{*qkoKi zOV01#n)wT%9;Szy&f*x@Y0RHM6mGn$spM)DF7wvgEG&skCzLE&Eu1L8GgT{zyYk_w zkCXl$uA{$UhYj^N1*C|jSpIe6^vX?LCc8RpvC{p!Yw6urY)xN54^1V>d43Ozz-vmFRk~I z6~mU|ftaG|YWyRwNc#2`ddHFMJLZ1>HNB&~@ejOa@iF`9)@IA|YMH3OsUe?upv;elgnL#r@4g;w9c zKKDTitqN;HEB~Sei+(kCW#@IhyUGM&EDNGw%};&QATX0w3j*GB;|1XB!;TCB@evE6 ztng4DQ=E_wjJ1=C@v)AJL7}a_ivexqMgw7J4Qym~;MH5Nf^nc5Ej#F4!O1Zku3=QL z`_XO>-FqMsR~$q{?YSUGsDzX1x-JGzS2}3K70s@2*#8xnnVjPDW~L>jb&bpPW@D!D zrLa1PPVs;Ipdx_W9->r@%J#+iXAWm!=rAT}*l)JCO-awn@@2(#_on3d;^+?;!;cA@ zI4FZ=TpW2o<01~|+i$Z+1%5LlJtZl=SKwDKyG>abqiaxO#5gJ*wt~9yZm?ez_;p`e zHbyh|%CoiQ9qB)Xm1nvLcL%1W2f<$2s1EQ5C1@s*M7PE;=u35rzB-0W_sfTla_h=m9_8IGb6>D zl;%r_>+a1=@}|XulK-NmsjW-!IB7r)>MVHEf7$H;!ozmt*_uN+LxkGfug>;mWykqI zz1UQrH!aRT%DR@!%7PyI-S+n9el0CMGu4}t^fngte*@nQa0P56aw5ymqv4ImfB-?n z&{>z=Wxwj&1G2Ic5`5j2HS4|yj$wqAjBW%EO8V5cBiJQ-yK`Sk%t=WJte26Lm7Z|! z4clTggsl9ab5_L6P0oJ5y`$=zp1rn-MX6LPY71 zT{!T%>^C9Bh@9X~a}YC(FhUoPY`?zV!i}QzgZ5k1Uk&l%%6`+(x|8-5g1$ZEgwF9& z9gWBN2TWg*?Kj!4t~Vn+K0P%f#g`4~42UxHL13s2>wtM+P0%=li8RQTfOD+I^Q?Y8 zcGf>o^W!6;{tlKLB45Zz^r^r(s9lmr_d-~%H|2|XVkIoAtzJ9E+#OE~ zF6sXbf%t?4Q91hf(<+Bvl!J}>HMKG3-2eP*1frM)QLte6Go}QpvhvU0Nyp95({Xe^ zhE5_$;-TXNI`?IqND5g8P}0Kp&(P)ICN0twRo8ed^PtJBENYXpf zhBt@^Gj)hSip_`!DE6bVMeDF9#n<_G%C#jSv&GiZK%?j}G5}dcbeW+62NMLCC`W$m z()$h)5oYNT0cz2R2&h0Hx(jXX3}ZVb)6@Ve00{kX$YJ6r1166(LK09fz|lRJW_X_H zbFEh`k)22}FcX^@u(*v9#cr+V4RL!77Jgl5f`a zWNGKtT3Y|AW#eaJRWlC5D-~y!5{!8)jH+)ARjK_R+$0pleKX}STq7L$j9|=XVH6eR z?lb52^z@uPLNGpKVU$firMYPo!f6#ZWbWRz1mklSM&)1qhne$xZ1-M&Ik^m9)Vd4{ z6~8C{mm!fVmjP|hRy9v?S8q_Rh5WMCW$-Urx_DxwDUpilv!gkISipiP+5Gnn8W2(t z7u1yBGykjoACTX(kOfgQZ9;ofARdk0tX$Ms!h$FqvG_4H2!w_fWn=B{&wiaiEMh@a zmal%y6o_O0`E4=nip90N!q3@)euZN-ItuwAEecbtNIv=Iy|gQq)b0xZs`E-qu&Nmy z_4v}P7H9Clr4dmlg2mb7J{#E^gsFFiXz-N|4amkA(Eu4o^l5~qfsyycD+8-I<75MpP-rz#Y;Il3u@NR0i8{fB;m>fX zJ?z|Y+1=xl`#O`~u}sJBAOmGY1N1s!&T@EJC!8o~kRZT+^8X1c#CJnNfQnO_BT3R9k@o!AmIqu*jEVbLh{}y z4plC-2^Y31*$mj|P{W~F%nflPRAwYz+C3~~AgR9Pda4fwSU=`MD5KmYX9zk}1_I~n z8Yb{m8^I6me^SoyG$PfE9#kCe!sInH^-$iOp@IDUk#}DtOa3iu$;&@px`!6C(zJj&n)q8)&s%9*C+f_4_e)N?rjIsfP_o!juVjin}!|h#e>q9VBu`tRD zMqyPmFm?=naxZaXH4CF=--c1@pb7fhD(~>A%OAajV3e^iDvS1GRWmSRKfZl8IqGX_ zecTcy+Q=PT+(G5z!vC-e#jHuIPm_FIqacUq@3Q224(zL?FInLDcNppQr|b z;ei%yjC^^10fG3Q1yMY(Z&yEkg1Mje2y`sNmp+2 z4DH}@$c`#MG<}k8)M$)J$4EA0QsC5J?gX5>)^#@^Hbc30@Ww9xA;)IDo?~M${Y;?& zKBxByrqtY0;CFCQOQbogaH|{P*Wh?PMlRFzVfbMJO9Zat#C_|iu)0B~uu59d z$g#mWLgm8eP((2F?zjeecY$nsKoU^#Z44CB}9p6YY zLeT`-;D+rF%om}bbLn?IqF^E*PKls=f{+3!THGkW-gt<1S~OySW^~ynj%%|taX){< zl`Y5+Z>%-Meq&O${k?BB{sa=>TJ3MDP&oXtls06Df2=jc{#EmrPsFNb4DpA@PR!{K z)^3VOhW2aKGGK=fPuXdU{wg9i!JF-k^>+86wJ7$9d;jxz?6bX6QoLPqvSPDp|D(3Y z!yB8Cl1cxM>ye2jDqp4}qgN;1Rlls&z`(FCEeun8>}EmC8?pakQy@NhKes=D*u#P-+}i&gH3&4qS_I;)s?<3IVlNA#VBpAiOo52W zp1+3zu-|n8FwE;VDC;AKhaMMXmkm8kaLcgFB2rSd0RfogcQDCw#68LAsAzEL;c>X( zQKE~TL!T@nQP!jMd>wiG0SY~;^g<8nqtI>ez?o`lTn=wInoBt?*nr3bvkU15Ub(+V zTHnd$sIvzbDB1EOF-y~ra9PBw;Uq0tqE8;tuxh|Us4*t{(2CyGk<=|!amBVH-= zKrazOALG`akPRQov5p3aMxgkF&VJmz#p7gKBSZt`7R=y9iUUKG(DlLuU}9CLduE&m z(byjmbuviNL2;w>qf$h77>ONfL}ExPa1D#V@G{B|ju}8cC^@801&uIZ1Tl^Rdr&i_ z@mQ)jRLXs3%7nHc!-0sX!9n6s#SJqeg9JCt8KMV@)P!M*LYc%AS{V316%Opgd`haN zbqE%nQQOil%l)PsAL5y zpjhfL7KKon0{gH*rnE7#pd|tnJCD0(aY_WVV2Qx*U6XnDDG=dsMAY%1`jW=0>Y7p< zdv;+{8cRaR{XoAE+LF=*Vf530h6Po6O2Fh8s?}n&22xZmmIAKN53T4)VoKT` z<6n}$?<0b7Oa)`%Cs@@CjKw1&@(9Lp7Dm-~$3E4-z_~pw3{#=++P|hK2Som2VdSs< z2dkQa(IF{O>GwRr!kD*fXoUs_=3#1KpwQF!M8x{zo!b*PYFHT6-+YQy&A|B2k<-nH z8-KGfN>(2|u7LrUObf#lH|{ueBAH;EWMPy~IEz)yz-Zm(-KPk~DHcY>=RfyT=Tk65 z*{Z#9Q;APLH-K^~dCdYt|NM)V){Jd!3dE9E zx347-eOVBthvs%yfuQ(6%L-Ex#bvFefW;pjY3Do<|0#hXYM9DAM zG(%Mpp~DhZvEt`}=Z_(Q>R&5R{#C1f`UR_+5vaClN0w1)XF#){Rb?C!*B^#X-WWR6 zFls5h{FdCX1_PY6SAKBv7STJ--0N4B(JqEV+7j;DLVek(IE(diJ8eAzvM(4gK^ZUE ziF*N9@60oASL!KpQS+MxJ;ThCOJ?MxgCKM~->9k#|IIj$KyaSjLSTY zXV{PyFcJogpulfjZ`@oQ>2x62(oJZgjg+nvr7;i!ra~Z(ry&F`tw(MU5he%%0cHsy zKTxuvoK(XQJ=Fk@)JPtnkB}7zQy!G*;|&O31BV@J8*dw<8W8up(U66sugH$G`jL0e znt5poB{7CJ3!cOve-`_m>CC$rY6dxZkl*9L%oAe@5TlLyt@;$|Z2Zo1``0b{{xm2stXb4wLH7fVV>d|SV7DQ$8B#YI(ZEegp(%v`G{E^# z%<;umL3)!-YV!Qn+t&A{oiJQ)C%}z1B7y9}bB#$5s`bj{Ce>*v4K#89+lWIaIISY! zm@*ul`g#$~Z5h??5fKV>iC~o3rR+IuaFD50jX4CbTqV;0Pa)8g2c-wc2N&%%TpftG zhJvHvm#7%f)FPG$?l&e~FpnB_KWG;8C>q>f{ZY^uT8W2CsD2oWLMV+OQ{l7KBu zKaFqH6)_nagQ+~E5~0LYH1H%I;QM5N1|ym!%gSIwzhhoK?C4Qj^i5q;(z{^h6&k}b z?t~>2g2b1csjS7o_^T&VJn$hK50w0J3d@>_2R1)=bx#UwMzUc|!Doei)yWrJ;A>S_ zW1GD|8Tm4bg;6|w5>_<>W7UqUFCrMDSs3$9EgG+Zf%(-I#h7nIMmr<>f;OMj;EM;-ihPNev9R$Xafgsy*Zu7r#s}CbBSUzMG0w&2Zzu zw1u+?MiC36YUIxo)EPZY=d((vu6?><0l}EW!YJvx6RVnm@uzgz4+LW}3!`w^$T=Dq z2qm@LfMslae)WM?70T(aDJ+bt&&Fd_GcX)yJO4~My{WZ1y(LOwvbbpA)vBBxCUpjp zDY!z7NAc0A!qt@1n^v3C^DjDY%BZVNfwRmPE#Om*<84fK+I%8%=@9@A(cGQ*(>d?Fr{j{4~joZAZD>3Y9{>lkSP$YPYigI zK+I-ARIaajRSg0YP_-aTiPNU*pV>|zK4n4V{&?h7Qy^|x(PuD$n8Si7{GtE5Y7poa zvIxY-1M;^Kh`A~d%Z9&e3dHRl*B+#uJ+D^F26pzUp%c{EdN-ONwJ1z&it&*%wvn-z zU#n&PtCp@EjaAKP*#zl-cT%MFnO>xYRHlCR2~7ze0m6EGnywbRvIlNG8=6Q&5~b4poRHDIvH6CuZ(Emq3peq_`57H-`GF) zwO+^_Nx_(5E)o-(8y0UioC=!E1C6uh(2Yw|*U*Q`l0lrjUD!sV@P(cz&{agbJ!OK! zgY+H_T!xi`LLH?+L3hK6pej*_4vZ(EyQ(p08xzKPmIl&`-QW0}vT$GOWZ`&&tBEM_ zfQ$&{%7jG%(Q-M^kO0H_oG4Eq_kg;WYB$Jtj^W-SF<77{26WZ6ffxv)%;894cr>6q zUDIj=+*m}#B(#Q6Zyh5BF6ZEqEB>2$UPQ}GG5$>_#sKpK^FzRPzUHr`UYjcSRi}Z+qu-1?WxJ4WD&JBqW zvVbc#wQ@w$J7r|)s0W>nRuRGV*th>ZP0Dexj&h`+SU*26-~t932xLfuP)S*_fn^%g z_5mb-AfS?m)?hSTvi8B{9u#-utE3#4=qN|3%^Jypj59H)Wran9295{AoLJ?j@$Qo3 zpgFo|1r#-q)~E9fY>~z#d{uM!R_bK7f_Z&0010r_8HS_<(|NVGD@0Q0Z%75q zL1AZ=Xw@luk5uXW^rG6_amf;&6%YZtkp(tP=BhE#HXC0-cR|uGVgOgjZ^thk~Ipjh&8F&A?cGQ`&39jWQNS<T(8jRFIsfsYKh8k3RJe!C1q>sGhMH ztD1rF)S%N!#d0kRqwM5X^_3y$84Q;7GzDW(`ssxP<2x2cdH>y5)eMZ?muyn1`QNKx zEcp9dH8;?uXcaeJdT4)7g0YT;QTB1(3Ue@CzUya+Qqb#bQ_xG5;!?%rnQc@lXej(3 zTf-DHvKJNnn^MplYE#huMawHcYh#KRbLBHH5r}dYL~ic5N7W#32H&Fn{hj;bGy?Gh z3!-?|v`0;Wc&O_7H3VWK3!-Yuf>+caf=-k-#fj}%zgDhi zCKg1+#{S7_5RfN}II(d5#24oB33Nw)K@t_xTRJ< z1=PuRTd}GcH9OTKDy@)P^;~N5mvxh{qJxgvL;V`M)Yv)3frT=Dx@n_qTzM(cTzQ07 z$h)xaB>(rZK9k6$-lpeL!-q5?0(BQ8j|V8)Ls_vjW*MtL_HKZLob zp=e-~ck@`Dhgap@Kmtr7bRIC4F3177huN z+wv^c2r8{8QyV*uk+}r{!+^EJ;YXL4Zm&mDO zh`^udvF&*hggttKKv#C?o}K~`kkbrNucs`#ksP4%*W{bQP0k#L8ZGXVhx%e@;F@&z zfmIax>}5lr%IU{{*MvSek!exr;~wKlqSVaqZ0J*b>ke`sA6GMPhLM* zowlUgqO~xP1aEvC`tJT|c?4r03!|uJGgdVN+Z^JMX*M>LfB%h_&2+ zrfs}XrTsGLZGv%>g;6qnGgdVNj zm>sW8nE4ki$uBe47B6N-{YfDHVnO8pRp3=+{baZw7O`U0i#vA^h!ZLh%g1?5apLz& z=6*&XYFH4Z>woy48U%c5EeKOeDtm9qC<5^}3nG8q*8iCT!R2@AO1t7@?XC!LcF`yK zYWpn*of&3IN446%<44*Rr)qbFf7Rl3^RTL^UGa5+V!uzbFlx4aK1vHC*wH*w%!u6l zhjPO33=1Rw^$2AX!^ymc8t}GzT(_xnZEe$dA9Qrxu7#xh;DDcoGw~$r210RGkZC_-MvY;ULwW! zL}xqzhoC*U_QS(RUu-=(O28s*8g5+Ru+BZe54254v!FhmHm?XALBBL zc;%fiw?Y)6Tict(XXK!dlwQFV{cn3){L$fb;Gc1IzuFw9-ephihn(s|x{ZVfX62ha z`(dQp>8N|Ei~X#*OLPca?Ni;V53!`%Q~Oj8`(Y${;rZGx3hZaiT}TboEh(x#q{cA0 z_M4LGL%fev_f1*#A-s2<12LnBKIpwt5lEq4?WbxAVWy0H?z<`yHOKJ=x2x{SL5Ngu zT{V?lxOD2=cU5E}0`u;=C#wm?%MR(>r)o+;9$aGGch#igoen{+`&3OUOq&E_1JuB; zvb5qwhfF+BD2dD1nNK6}UB2$CYEpsjl34du6{#X!7zZ1DxxI}d?!>0-Zn#h-AuG=o z!AW@LB?(DcZ#rIefeB*)3$uOyHyRk$4G`RL_iW|VbRG+%V%u@7Y6eF1yRG^V zH~O+Ls^?AJsDT09t2OtgxbgXQ!c;Pp{a6?^L%zbQW?=NIx$j#_=*#ZQHXm68|Y-BPP?oTgh+l#U`{Ud{q`M^9i!>kQP zLt~;+c$q6S<64`+)MT!}N6h_4A2ElC-m1C19#7>R&p$!JJb)GEvLC-w--Cqi`e2c% zIbmKs;}lhs@*{(fVx!E8k?eUe$43ft)(+>kyj@XCd(lI4>@b)M9oMP`W{+_7iL+~H zOAlnXbpElQOx^}H==BcGC>J{@GWeK3x}d>`tOAmdl&Lhz#S+ns@{kzQX|+ge3KJMP zhSPqyZQ4Z#X4qXVj_lO&1QB7d9uc4p42j@D-Kzmfd87K2)bxc)Iz)(5Ke&$&yIOU@ ziAOr|W5w-6gdsXape&OS5hN$-Ga<&Zhm?edPIYT|cRVWzl<~$qFI-W84sJ|-ROxql z-v`U>M1-MwM4-DG46^dL%!Cgy?=j?25#_?wCM-=54PfAK<2^D}bV`6t%L~r^SLQyW z40ekQKG;n;17So2%%%@>uv=4W1kixP*h)meuyjs34qo5R3*45k9=Me3{BRvRPw6rJ z&473h6Z0_n-%u@r4TdL05Ik6H=rw!6MYGIsLV~6|s!k9Jd}rYuJJFy(hX#~gGok^N z|8N;dSTtzrqs5Q|+}MKq3E&#wEQ8A56I^3+ZdaO_KG359l5>VMz#SNv4QxmQ>di2+ z47i1QjV*)Z@nJ zOd`Q(JrWp>egz5KIJ6#~b#ynqb;Qt9Po1O0ws;EikIy(}Q4~Ceje<)i%p9Pu&mwCV zq=Ew$H@+gcYR}KgWeSCC{8#wv7g*IyO6QYa-@i{V#VRr zEPvSkI>DI6!YCj2%PI{FgeDdV)#2a07Z8l;ER5VgcVksEFvQnVS`&;R3rvCd_3}rS5r|nVh?+x%*Q!BqmfcSEVSndr0x_Ef zQL}NJpv*qKUGAUZzf zoJ*Yej0I7>b=uRWKs+__!9k>-KCjhJ{ykRkXH~AMREf-gu$!q&?d<0^mn6=MptFr**~%9NVj_GQ&Rg0X;wk$bQRtD1rFj;C)1 z!C0t*ad^oh4GbxmUu)`!>(RvH?+}a<7Dn-!l~~mbjNe}>i=ednBAv84)d>wlW)ZiO zH{e{PamZ}>2_Q{A70vD_G77kEi{=$Fxf9Vjsk-0G6f`f^3z`uo8s$!qPe!TDFpWlO z1EW-;-1H=%J6+t!tm8T^33TQN-wmJwaw7%_B$VfosWwiZ5TlKDLlYc~ z)a(Wj0hh7i1Q4nV96PjAbVV#pQ05bUrAGwBw?;%jnikg*84seO#JLd>F!ao7QBV;9 z#t?NNoY+FW5vftO%p|%CzRwy-lCVsV2%w%J5$KXXTvQfTD$)9BTv{TCC<(g2FB}+F z-3%{!x=bJR01@G9T_T_)VXz%g#^q4mHzWf1U?>T=*{8`Rf#W!&(9kM?-3xQaXhz|| zkKg^4G-IirW;C2(D?4!54rym&oj}5Fv>k9v%W5L1QtKQVVcm3r5@puWT*OYN6UB{F z|N53h;Tt_sKwu9D%%H}$b%Po~1rLfWVG==^d)<`fcMt*RgXxGY8FUe69T`!O1|RM= ziTs4+!Tki6lScU(5dnj;!}Jp*%LnvnR068*@Dnf}=`(Yd zcj##>0}jihhmNNBdhnmDWsqEl%X-*|3aj;~fQ*WfC}6YT(Z;4B6cupyvelxXr2_5- zMQaxJ0xnv_RO0%kL300-zOfbQgfcyyfQ*b06>xnlyoAux1B{e{^*%E@JV3zl1zb*n z<4HWaHB}usk|%cCx>;lz*65iAT-0eq0*bC=DYQ;7B7ya@L^OKQgSJ|lX^wtE+0BLx zlJIBIv2Q3Vw3f{ZRS(TSrOpcRxO&8*obRVe3rZ*}^c|ZODw{q6tD4CQ-7+wJGpYCQ z_0&5uT86BIdPjGap?VKB1L6Ney(42mquFtrE-w95@lrgP+gCXnvX13t`Lg1P>dG~= zLXb&(rn1JPhgAPcyj;)nvT(?JtZIgrLwoJ{heUFN3dXwAvotW=!49-SwKl$TJ<@%H zonVx+FiPeO#K;0OFvjnj&k>9tSQy2pwk_1a2zp17sXl@%$J>Jl#zqy4`Ma^I85qY} zEmsu8k1UL$t<%f2FoG2sO(pSPzj~E2^K=snqw15nSk(-S7qho1_X7UJ!YC+S^OFV! zlCBo%x$m3D9w2UPW?@tx`T?t&f$?+t!na7Mer92m_Z#<{1_oO6ErPM-;IJPEMgs|V+#wTXw!-j>dG}jI;%RK&fhd+Fu~Z$!l+)k9;=#xaiIF{5^^B6 z={OK%F^ytN1nmtO(P$V`2u z1F|}*($WBPxG-B4`2aMYvYSEXlCN$31JPiIE)5JiyNL#LcpyZ=jq=Zia$tGpRYL<@ z{QZ=4X=GO|Xk|?~c)CsS7m{+5{fG%kP-ngM4&|WaqiWnVW@j(YocrBxE zQa~XF#X04!m--q(b`2?RH-{wQH$6#!bvC4eo8x#-cqLj4C$N|ZT1h}f3VQ)s5%DD( zkI3!`iAM{PK!cr;$$Zp>EjReDFy1(;AW`&oy^KkAYD503o+kJ=rX{7T)UO8#8f zA^NeHmB#8S?$(%;q%>a)ZdbfDCMhlEj@0yo zoD|<(_89!6q-SM02z3u{X2Q)KOh6oP{$qOy;-2~t^G`1_0ddSlWgq^8N9~PF9&f9| zSTpIX4$(txwIE}=rKkF0bFzGyv8hREN%84v3Eo&=TKBBjY+qJ3AD7W9HYKS`Y(}r_ zZs}<*hZLKYl6}=8Q(27F58!um0q7ZI5etIrGaAyr`zLx)T+qjL+8j;_094Q(c7&hQ!Vn?xHynH zJ1(xCIH9_uCD;qz%p{tjERCf;;P)a14FrU&Z3Y2o-aCf@Ye zg+i2(Ff}ticEg6jxr^aHg<>>YjgL*}+uIccS1b?*csSNM!Q6bx$i@ai&9bMlJdOzm#UCf`&?jhe;z9iy+%Q|FC;>s9HFA;iFA^%+<_Xt25d;4Nnc z-1iNhc04k9U_FJtb!f-vw=Lyhqx5;>sWP^_gEAoLull^KI5yHWFTDfbZ!;ByIT4wh zTF1-ky`wuuzuvT#rt#QMmpjIhMAX!mh}^kj%}K=EQ@_o45l{O&GC8IWcuDEU9i!W7 zY)#|e^?@;`XOgKoS>M!@4IbaI@sGNy=x2z*h>30L~4KfIjdQe1+ zM-wq|tZqo4e-L0xba=~{m*)&M2@uC5G5=0vCILz`aZP6uAc8NCL!@zSI+Fl7hl@Wq z2@oA>oKQO>Pd&U11Dp7|8F{MNZO!o^rqKmv2&!kdp}j$H*M6uP-A3Q!>IMv|huon6 z$k%54ZynFtRW3p?ba@-d6O~4yn`3 z0`!P9xlnHwAk7!3n~le*iG-Pa?z-`JoSI5J=Fy#-$)_e0CiW>~?)(!9)pSC*FVGM? z|EFp~A+h2)H%CxSDI`!a2e0vjD-MbCVI(vjCgpo!vCR u(myFsO)HuZdG1p+t$;|9b*2IG%3eq?H4U&$y=j0Lp$T5H;{dUF-2Vp%6Yh`z diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py deleted file mode 100644 index abf656b5..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py +++ /dev/null @@ -1,522 +0,0 @@ -""" An example applying Syllabus Prioritized Level Replay to Procgen. This code is based on https://github.com/facebookresearch/level-replay/blob/main/train.py - -NOTE: In order to efficiently change the seed of a procgen environment directly without reinitializing it, -we rely on Minqi Jiang's custom branch of procgen found here: https://github.com/minqi/procgen -""" -import argparse -import os -import random -import time -from collections import deque -from distutils.util import strtobool - -import gym as openai_gym -import gymnasium as gym -import numpy as np -import procgen # noqa: F401 -from procgen import ProcgenEnv -import torch -import torch.nn as nn -import torch.optim as optim -from shimmy.openai_gym_compatibility import GymV21CompatibilityV0 -from torch.utils.tensorboard import SummaryWriter - -from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum -from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum -from syllabus.examples.models import ProcgenAgent -from syllabus.examples.task_wrappers import ProcgenTaskWrapper -from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs - - -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="syllabus", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="weather to capture videos of the agent performances (check out `videos` folder)") - parser.add_argument("--logging-dir", type=str, default=".", - help="the base directory for logging and wandb storage.") - - # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="starpilot", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=int(25e6), - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=5e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=64, - help="the number of parallel game environments") - parser.add_argument("--num-steps", type=int, default=256, - help="the number of steps to run in each environment per policy rollout") - parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="Toggle learning rate annealing for policy and value networks") - parser.add_argument("--gae", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Use GAE for advantage computation") - parser.add_argument("--gamma", type=float, default=0.999, - help="the discount factor gamma") - parser.add_argument("--gae-lambda", type=float, default=0.95, - help="the lambda for the general advantage estimation") - parser.add_argument("--num-minibatches", type=int, default=8, - help="the number of mini-batches") - parser.add_argument("--update-epochs", type=int, default=3, - help="the K epochs to update the policy") - parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles advantages normalization") - parser.add_argument("--clip-coef", type=float, default=0.2, - help="the surrogate clipping coefficient") - parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles whether or not to use a clipped loss for the value function, as per the paper.") - parser.add_argument("--ent-coef", type=float, default=0.01, - help="coefficient of the entropy") - parser.add_argument("--vf-coef", type=float, default=0.5, - help="coefficient of the value function") - parser.add_argument("--max-grad-norm", type=float, default=0.5, - help="the maximum norm for the gradient clipping") - parser.add_argument("--target-kl", type=float, default=None, - help="the target KL divergence threshold") - - # Procgen arguments - parser.add_argument("--full-dist", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Train on full distribution of levels.") - - # Curriculum arguments - parser.add_argument("--curriculum", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will use curriculum learning") - parser.add_argument("--curriculum-method", type=str, default="plr", - help="curriculum method to use") - parser.add_argument("--num-eval-episodes", type=int, default=10, - help="the number of episodes to evaluate the agent on after each policy update.") - - args = parser.parse_args() - args.batch_size = int(args.num_envs * args.num_steps) - args.minibatch_size = int(args.batch_size // args.num_minibatches) - # fmt: on - return args - - -PROCGEN_RETURN_BOUNDS = { - "coinrun": (5, 10), - "starpilot": (2.5, 64), - "caveflyer": (3.5, 12), - "dodgeball": (1.5, 19), - "fruitbot": (-1.5, 32.4), - "chaser": (0.5, 13), - "miner": (1.5, 13), - "jumper": (3, 10), - "leaper": (3, 10), - "maze": (5, 10), - "bigfish": (1, 40), - "heist": (3.5, 10), - "climber": (2, 12.6), - "plunder": (4.5, 30), - "ninja": (3.5, 10), - "bossfight": (0.5, 13), -} - - -def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): - def thunk(): - env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) - env = GymV21CompatibilityV0(env=env) - if curriculum is not None: - env = ProcgenTaskWrapper(env, env_id, seed=seed) - env = MultiProcessingSyncWrapper( - env, - curriculum.get_components(), - update_on_step=False, - task_space=env.task_space, - ) - return env - return thunk - - -def wrap_vecenv(vecenv): - vecenv.is_vector_env = True - vecenv = VecMonitor(venv=vecenv, filename=None, keep_buf=100) - vecenv = VecNormalize(venv=vecenv, ob=False, ret=True) - return vecenv - - -def slow_level_replay_evaluate( - env_name, - policy, - num_episodes, - device, - num_levels=0 -): - policy.eval() - - eval_envs = ProcgenEnv( - num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False - ) - eval_envs = VecExtractDictObs(eval_envs, "rgb") - eval_envs = wrap_vecenv(eval_envs) - eval_obs, _ = eval_envs.reset() - eval_episode_rewards = [] - - while len(eval_episode_rewards) < num_episodes: - with torch.no_grad(): - eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) - - eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) - for i, info in enumerate(infos): - if 'episode' in info.keys(): - eval_episode_rewards.append(info['episode']['r']) - - mean_returns = np.mean(eval_episode_rewards) - stddev_returns = np.std(eval_episode_rewards) - env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] - normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) - policy.train() - return mean_returns, stddev_returns, normalized_mean_returns - - -def level_replay_evaluate( - env_name, - policy, - num_episodes, - device, - num_levels=0 -): - policy.eval() - - eval_envs = ProcgenEnv( - num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False - ) - eval_envs = VecExtractDictObs(eval_envs, "rgb") - eval_envs = wrap_vecenv(eval_envs) - eval_obs, _ = eval_envs.reset() - eval_episode_rewards = [-1] * num_episodes - - while -1 in eval_episode_rewards: - with torch.no_grad(): - eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) - - eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) - for i, info in enumerate(infos): - if 'episode' in info.keys() and eval_episode_rewards[i] == -1: - eval_episode_rewards[i] = info['episode']['r'] - - # print(eval_episode_rewards) - mean_returns = np.mean(eval_episode_rewards) - stddev_returns = np.std(eval_episode_rewards) - env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] - normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) - policy.train() - return mean_returns, stddev_returns, normalized_mean_returns - - -def make_value_fn(): - def get_value(obs): - obs = np.array(obs) - with torch.no_grad(): - return agent.get_value(torch.Tensor(obs).to(device)) - return get_value - - -if __name__ == "__main__": - args = parse_args() - run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" - if args.track: - import wandb - - wandb.init( - project=args.wandb_project_name, - entity=args.wandb_entity, - sync_tensorboard=True, - config=vars(args), - name=run_name, - monitor_gym=True, - save_code=True, - dir=args.logging_dir - ) - # wandb.run.log_code("./syllabus/examples") - - writer = SummaryWriter(os.path.join(args.logging_dir, f"./runs/{run_name}")) - writer.add_text( - "hyperparameters", - "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), - ) - - # TRY NOT TO MODIFY: seeding - random.seed(args.seed) - np.random.seed(args.seed) - torch.manual_seed(args.seed) - torch.backends.cudnn.deterministic = args.torch_deterministic - - device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu") - print("Device:", device) - - # Curriculum setup - curriculum = None - if args.curriculum: - sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") - sample_env = GymV21CompatibilityV0(env=sample_env) - sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) - - # Intialize Curriculum Method - if args.curriculum_method == "plr": - print("Using prioritized level replay.") - curriculum = PrioritizedLevelReplay( - sample_env.task_space, - sample_env.observation_space, - num_steps=args.num_steps, - num_processes=args.num_envs, - gamma=args.gamma, - gae_lambda=args.gae_lambda, - task_sampler_kwargs_dict={"strategy": "value_l1"}, - get_value=make_value_fn(), - ) - elif args.curriculum_method == "dr": - print("Using domain randomization.") - curriculum = DomainRandomization(sample_env.task_space) - elif args.curriculum_method == "lp": - print("Using learning progress.") - curriculum = LearningProgressCurriculum(sample_env.task_space) - elif args.curriculum_method == "sq": - print("Using sequential curriculum.") - curricula = [] - stopping = [] - for i in range(199): - curricula.append(i + 1) - stopping.append("steps>=50000") - curricula.append(list(range(i + 1))) - stopping.append("steps>=50000") - curriculum = SequentialCurriculum(curricula, stopping[:-1], sample_env.task_space) - else: - raise ValueError(f"Unknown curriculum method {args.curriculum_method}") - curriculum = make_multiprocessing_curriculum(curriculum) - del sample_env - - # env setup - print("Creating env") - envs = gym.vector.AsyncVectorEnv( - [ - make_env( - args.env_id, - args.seed + i, - curriculum=curriculum if args.curriculum else None, - num_levels=1 if args.curriculum else 0 - ) - for i in range(args.num_envs) - ] - ) - envs = wrap_vecenv(envs) - - assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" - print("Creating agent") - agent = ProcgenAgent( - envs.single_observation_space.shape, - envs.single_action_space.n, - arch="large", - base_kwargs={'recurrent': False, 'hidden_size': 256} - ).to(device) - optimizer = optim.Adam(agent.parameters(), lr=args.learning_rate, eps=1e-5) - - # ALGO Logic: Storage setup - obs = torch.zeros((args.num_steps, args.num_envs) + envs.single_observation_space.shape).to(device) - actions = torch.zeros((args.num_steps, args.num_envs) + envs.single_action_space.shape).to(device) - logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device) - rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) - dones = torch.zeros((args.num_steps, args.num_envs)).to(device) - values = torch.zeros((args.num_steps, args.num_envs)).to(device) - - # TRY NOT TO MODIFY: start the game - global_step = 0 - start_time = time.time() - next_obs, _ = envs.reset() - next_obs = torch.Tensor(next_obs).to(device) - next_done = torch.zeros(args.num_envs).to(device) - num_updates = args.total_timesteps // args.batch_size - episode_rewards = deque(maxlen=10) - completed_episodes = 0 - - for update in range(1, num_updates + 1): - # Annealing the rate if instructed to do so. - if args.anneal_lr: - frac = 1.0 - (update - 1.0) / num_updates - lrnow = frac * args.learning_rate - optimizer.param_groups[0]["lr"] = lrnow - - for step in range(0, args.num_steps): - global_step += 1 * args.num_envs - obs[step] = next_obs - dones[step] = next_done - - # ALGO LOGIC: action logic - with torch.no_grad(): - action, logprob, _, value = agent.get_action_and_value(next_obs) - values[step] = value.flatten() - actions[step] = action - logprobs[step] = logprob - - # TRY NOT TO MODIFY: execute the game and log data. - next_obs, reward, term, trunc, info = envs.step(action.cpu().numpy()) - done = np.logical_or(term, trunc) - rewards[step] = torch.tensor(reward).to(device).view(-1) - next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) - completed_episodes += sum(done) - - for item in info: - if "episode" in item.keys(): - episode_rewards.append(item['episode']['r']) - print(f"global_step={global_step}, episodic_return={item['episode']['r']}") - writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) - if curriculum is not None: - curriculum.log_metrics(writer, global_step) - break - - # bootstrap value if not done - with torch.no_grad(): - next_value = agent.get_value(next_obs).reshape(1, -1) - if args.gae: - advantages = torch.zeros_like(rewards).to(device) - lastgaelam = 0 - for t in reversed(range(args.num_steps)): - if t == args.num_steps - 1: - nextnonterminal = 1.0 - next_done - nextvalues = next_value - else: - nextnonterminal = 1.0 - dones[t + 1] - nextvalues = values[t + 1] - delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t] - advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam - returns = advantages + values - else: - returns = torch.zeros_like(rewards).to(device) - for t in reversed(range(args.num_steps)): - if t == args.num_steps - 1: - nextnonterminal = 1.0 - next_done - next_return = next_value - else: - nextnonterminal = 1.0 - dones[t + 1] - next_return = returns[t + 1] - returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return - advantages = returns - values - - # flatten the batch - b_obs = obs.reshape((-1,) + envs.single_observation_space.shape) - b_logprobs = logprobs.reshape(-1) - b_actions = actions.reshape((-1,) + envs.single_action_space.shape) - b_advantages = advantages.reshape(-1) - b_returns = returns.reshape(-1) - b_values = values.reshape(-1) - - # Optimizing the policy and value network - b_inds = np.arange(args.batch_size) - clipfracs = [] - for epoch in range(args.update_epochs): - np.random.shuffle(b_inds) - for start in range(0, args.batch_size, args.minibatch_size): - end = start + args.minibatch_size - mb_inds = b_inds[start:end] - - _, newlogprob, entropy, newvalue = agent.get_action_and_value(b_obs[mb_inds], b_actions.long()[mb_inds]) - logratio = newlogprob - b_logprobs[mb_inds] - ratio = logratio.exp() - - with torch.no_grad(): - # calculate approx_kl http://joschu.net/blog/kl-approx.html - old_approx_kl = (-logratio).mean() - approx_kl = ((ratio - 1) - logratio).mean() - clipfracs += [((ratio - 1.0).abs() > args.clip_coef).float().mean().item()] - - mb_advantages = b_advantages[mb_inds] - if args.norm_adv: - mb_advantages = (mb_advantages - mb_advantages.mean()) / (mb_advantages.std() + 1e-8) - - # Policy loss - pg_loss1 = -mb_advantages * ratio - pg_loss2 = -mb_advantages * torch.clamp(ratio, 1 - args.clip_coef, 1 + args.clip_coef) - pg_loss = torch.max(pg_loss1, pg_loss2).mean() - - # Value loss - newvalue = newvalue.view(-1) - if args.clip_vloss: - v_loss_unclipped = (newvalue - b_returns[mb_inds]) ** 2 - v_clipped = b_values[mb_inds] + torch.clamp( - newvalue - b_values[mb_inds], - -args.clip_coef, - args.clip_coef, - ) - v_loss_clipped = (v_clipped - b_returns[mb_inds]) ** 2 - v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped) - v_loss = 0.5 * v_loss_max.mean() - else: - v_loss = 0.5 * ((newvalue - b_returns[mb_inds]) ** 2).mean() - - entropy_loss = entropy.mean() - loss = pg_loss - args.ent_coef * entropy_loss + v_loss * args.vf_coef - - optimizer.zero_grad() - loss.backward() - nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm) - optimizer.step() - - if args.target_kl is not None: - if approx_kl > args.target_kl: - break - - y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy() - var_y = np.var(y_true) - explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y - - # Evaluate agent - mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=0 - ) - slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=0 - ) - mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=200 - ) - slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=200 - ) - - # TRY NOT TO MODIFY: record rewards for plotting purposes - writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) - writer.add_scalar("charts/episode_returns", np.mean(episode_rewards), global_step) - writer.add_scalar("losses/value_loss", v_loss.item(), global_step) - writer.add_scalar("losses/policy_loss", pg_loss.item(), global_step) - writer.add_scalar("losses/entropy", entropy_loss.item(), global_step) - writer.add_scalar("losses/old_approx_kl", old_approx_kl.item(), global_step) - writer.add_scalar("losses/approx_kl", approx_kl.item(), global_step) - writer.add_scalar("losses/clipfrac", np.mean(clipfracs), global_step) - writer.add_scalar("losses/explained_variance", explained_var, global_step) - print("SPS:", int(global_step / (time.time() - start_time))) - writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) - - writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) - writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) - writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) - writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) - writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) - writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) - - writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) - writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) - writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) - writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) - writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) - writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) - - writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) - - envs.close() - writer.close() diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/conda-environment.yaml b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/conda-environment.yaml deleted file mode 100644 index cd0b0b09..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/conda-environment.yaml +++ /dev/null @@ -1,165 +0,0 @@ -name: test2_py -channels: - - defaults -dependencies: - - _libgcc_mutex=0.1=main - - _openmp_mutex=5.1=1_gnu - - ca-certificates=2024.3.11=h06a4308_0 - - ld_impl_linux-64=2.38=h1181459_1 - - libffi=3.3=he6710b0_2 - - libgcc-ng=11.2.0=h1234567_1 - - libgomp=11.2.0=h1234567_1 - - libstdcxx-ng=11.2.0=h1234567_1 - - ncurses=6.4=h6a678d5_0 - - openssl=1.1.1w=h7f8727e_0 - - pip=23.3.1=py38h06a4308_0 - - python=3.8.5=h7579374_1 - - readline=8.2=h5eee18b_0 - - setuptools=68.2.2=py38h06a4308_0 - - sqlite=3.41.2=h5eee18b_0 - - tk=8.6.12=h1ccaba5_0 - - wheel=0.41.2=py38h06a4308_0 - - xz=5.4.6=h5eee18b_0 - - zlib=1.2.13=h5eee18b_0 - - pip: - - absl-py==2.1.0 - - aiosignal==1.3.1 - - alabaster==0.7.13 - - appdirs==1.4.4 - - attrs==23.2.0 - - babel==2.14.0 - - beautifulsoup4==4.12.3 - - cachetools==5.3.3 - - certifi==2024.2.2 - - cffi==1.16.0 - - charset-normalizer==3.3.2 - - click==8.1.7 - - cloudpickle==3.0.0 - - cmake==3.29.2 - - contourpy==1.1.1 - - cycler==0.12.1 - - dm-tree==0.1.8 - - docker-pycreds==0.4.0 - - docutils==0.20.1 - - exceptiongroup==1.2.0 - - farama-notifications==0.0.4 - - filelock==3.13.4 - - fonttools==4.51.0 - - frozenlist==1.4.1 - - fsspec==2024.3.1 - - furo==2024.1.29 - - future==1.0.0 - - gitdb==4.0.11 - - gitpython==3.1.43 - - glcontext==2.5.0 - - glfw==1.12.0 - - google-auth==2.29.0 - - google-auth-oauthlib==1.0.0 - - grpcio==1.62.1 - - gym==0.23.0 - - gym-notices==0.0.8 - - gymnasium==0.28.1 - - idna==3.7 - - imageio==2.34.0 - - imageio-ffmpeg==0.3.0 - - imagesize==1.4.1 - - importlib-metadata==7.1.0 - - importlib-resources==6.4.0 - - iniconfig==2.0.0 - - jax-jumpy==1.0.0 - - jinja2==3.1.3 - - jsonschema==4.21.1 - - jsonschema-specifications==2023.12.1 - - kiwisolver==1.4.5 - - lazy-loader==0.4 - - lz4==4.3.3 - - markdown==3.6 - - markdown-it-py==3.0.0 - - markupsafe==2.1.5 - - matplotlib==3.7.5 - - mdurl==0.1.2 - - moderngl==5.10.0 - - mpmath==1.3.0 - - msgpack==1.0.8 - - networkx==3.1 - - numpy==1.24.4 - - nvidia-cublas-cu12==12.1.3.1 - - nvidia-cuda-cupti-cu12==12.1.105 - - nvidia-cuda-nvrtc-cu12==12.1.105 - - nvidia-cuda-runtime-cu12==12.1.105 - - nvidia-cudnn-cu12==8.9.2.26 - - nvidia-cufft-cu12==11.0.2.54 - - nvidia-curand-cu12==10.3.2.106 - - nvidia-cusolver-cu12==11.4.5.107 - - nvidia-cusparse-cu12==12.1.0.106 - - nvidia-nccl-cu12==2.19.3 - - nvidia-nvjitlink-cu12==12.4.127 - - nvidia-nvtx-cu12==12.1.105 - - oauthlib==3.2.2 - - packaging==24.0 - - pandas==2.0.3 - - pillow==10.3.0 - - pkgutil-resolve-name==1.3.10 - - pluggy==1.4.0 - - protobuf==4.25.3 - - psutil==5.9.8 - - py-cpuinfo==9.0.0 - - pyarrow==15.0.2 - - pyasn1==0.6.0 - - pyasn1-modules==0.4.0 - - pycparser==2.22 - - pyenchant==3.2.2 - - pyglet==1.4.11 - - pygments==2.17.2 - - pyparsing==3.1.2 - - pytest==8.1.1 - - pytest-benchmark==4.0.0 - - python-dateutil==2.9.0.post0 - - pytz==2024.1 - - pywavelets==1.4.1 - - pyyaml==6.0.1 - - ray==2.10.0 - - referencing==0.34.0 - - requests==2.31.0 - - requests-oauthlib==2.0.0 - - rich==13.7.1 - - rpds-py==0.18.0 - - rsa==4.9 - - scikit-image==0.21.0 - - scipy==1.10.0 - - sentry-sdk==1.45.0 - - setproctitle==1.3.3 - - shellingham==1.5.4 - - shimmy==1.3.0 - - six==1.16.0 - - smmap==5.0.1 - - snowballstemmer==2.2.0 - - soupsieve==2.5 - - sphinx==7.1.2 - - sphinx-basic-ng==1.0.0b2 - - sphinx-tabs==3.4.5 - - sphinxcontrib-applehelp==1.0.4 - - sphinxcontrib-devhelp==1.0.2 - - sphinxcontrib-htmlhelp==2.0.1 - - sphinxcontrib-jsmath==1.0.1 - - sphinxcontrib-qthelp==1.0.3 - - sphinxcontrib-serializinghtml==1.1.5 - - sphinxcontrib-spelling==8.0.0 - - syllabus-rl==0.5 - - sympy==1.12 - - tensorboard==2.14.0 - - tensorboard-data-server==0.7.2 - - tensorboardx==2.6.2.2 - - tifffile==2023.7.10 - - tomli==2.0.1 - - torch==2.2.2 - - triton==2.2.0 - - typer==0.12.3 - - typing-extensions==4.11.0 - - tzdata==2024.1 - - urllib3==2.2.1 - - wandb==0.16.6 - - werkzeug==3.0.2 - - zipp==3.18.1 -prefix: /home/user/miniconda/envs/test2_py - diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/config.yaml b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/config.yaml deleted file mode 100644 index 2d9b2af7..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/config.yaml +++ /dev/null @@ -1,126 +0,0 @@ -wandb_version: 1 - -exp_name: - desc: null - value: cleanrl_procgen_plr -seed: - desc: null - value: 1 -torch_deterministic: - desc: null - value: true -cuda: - desc: null - value: true -track: - desc: null - value: true -wandb_project_name: - desc: null - value: syllabus -wandb_entity: - desc: null - value: null -capture_video: - desc: null - value: false -logging_dir: - desc: null - value: . -env_id: - desc: null - value: bigfish -total_timesteps: - desc: null - value: 25000000 -learning_rate: - desc: null - value: 0.0005 -num_envs: - desc: null - value: 64 -num_steps: - desc: null - value: 256 -anneal_lr: - desc: null - value: false -gae: - desc: null - value: true -gamma: - desc: null - value: 0.999 -gae_lambda: - desc: null - value: 0.95 -num_minibatches: - desc: null - value: 8 -update_epochs: - desc: null - value: 3 -norm_adv: - desc: null - value: true -clip_coef: - desc: null - value: 0.2 -clip_vloss: - desc: null - value: true -ent_coef: - desc: null - value: 0.01 -vf_coef: - desc: null - value: 0.5 -max_grad_norm: - desc: null - value: 0.5 -target_kl: - desc: null - value: null -full_dist: - desc: null - value: true -curriculum: - desc: null - value: true -curriculum_method: - desc: null - value: plr -num_eval_episodes: - desc: null - value: 10 -batch_size: - desc: null - value: 16384 -minibatch_size: - desc: null - value: 2048 -_wandb: - desc: null - value: - code_path: code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py - python_version: 3.8.5 - cli_version: 0.16.6 - framework: torch - is_jupyter_run: false - is_kaggle_kernel: false - start_time: 1713841235.0 - t: - 1: - - 1 - - 30 - - 55 - 3: - - 13 - - 16 - - 23 - - 35 - 4: 3.8.5 - 5: 0.16.6 - 8: - - 5 - 13: linux-x86_64 diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/diff.patch b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/diff.patch deleted file mode 100644 index ab53c5d0..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/diff.patch +++ /dev/null @@ -1,122 +0,0 @@ -diff --git a/setup.py b/setup.py -index 31e09f2..22a94e8 100644 ---- a/setup.py -+++ b/setup.py -@@ -2,7 +2,7 @@ from setuptools import find_packages, setup - - - extras = dict() --extras['test'] = ['cmake', 'ninja', 'nle>=0.9.0', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] -+extras['test'] = ['cmake', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] - extras['docs'] = ['sphinx-tabs', 'sphinxcontrib-spelling', 'furo'] - extras['all'] = extras['test'] + extras['docs'] - -diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -index dabcd50..abf656b 100644 ---- a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -+++ b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -@@ -136,7 +136,7 @@ def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): - env = MultiProcessingSyncWrapper( - env, - curriculum.get_components(), -- update_on_step=curriculum.requires_step_updates, -+ update_on_step=False, - task_space=env.task_space, - ) - return env -@@ -150,37 +150,31 @@ def wrap_vecenv(vecenv): - return vecenv - - --def full_level_replay_evaluate( -+def slow_level_replay_evaluate( - env_name, - policy, - num_episodes, - device, -- num_levels=1 # Not used -+ num_levels=0 - ): - policy.eval() - - eval_envs = ProcgenEnv( -- num_envs=args.num_eval_episodes, env_name=env_name, num_levels=1, start_level=0, distribution_mode="easy", paint_vel_info=False -+ num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False - ) - eval_envs = VecExtractDictObs(eval_envs, "rgb") - eval_envs = wrap_vecenv(eval_envs) -- -- # Seed environments -- seeds = [int.from_bytes(os.urandom(3), byteorder="little") for _ in range(num_episodes)] -- for i, seed in enumerate(seeds): -- eval_envs.seed(seed, i) -- - eval_obs, _ = eval_envs.reset() -- eval_episode_rewards = [-1] * num_episodes -+ eval_episode_rewards = [] - -- while -1 in eval_episode_rewards: -+ while len(eval_episode_rewards) < num_episodes: - with torch.no_grad(): - eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) - - eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) - for i, info in enumerate(infos): -- if 'episode' in info.keys() and eval_episode_rewards[i] == -1: -- eval_episode_rewards[i] = info['episode']['r'] -+ if 'episode' in info.keys(): -+ eval_episode_rewards.append(info['episode']['r']) - - mean_returns = np.mean(eval_episode_rewards) - stddev_returns = np.std(eval_episode_rewards) -@@ -251,7 +245,7 @@ if __name__ == "__main__": - ) - # wandb.run.log_code("./syllabus/examples") - -- writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) -+ writer = SummaryWriter(os.path.join(args.logging_dir, f"./runs/{run_name}")) - writer.add_text( - "hyperparameters", - "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), -@@ -485,13 +479,13 @@ if __name__ == "__main__": - mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=0 - ) -- full_mean_eval_returns, full_stddev_eval_returns, full_normalized_mean_eval_returns = full_level_replay_evaluate( -+ slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=0 - ) - mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=200 - ) -- full_mean_train_returns, full_stddev_train_returns, full_normalized_mean_train_returns = full_level_replay_evaluate( -+ slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=200 - ) - -@@ -510,17 +504,17 @@ if __name__ == "__main__": - - writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) - writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) -- writer.add_scalar("test_eval/stddev_eval_return", stddev_eval_returns, global_step) -- writer.add_scalar("test_eval/full_mean_episode_return", full_mean_eval_returns, global_step) -- writer.add_scalar("test_eval/full_normalized_mean_eval_return", full_normalized_mean_eval_returns, global_step) -- writer.add_scalar("test_eval/full_stddev_eval_return", full_stddev_eval_returns, global_step) -+ writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) - - writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) - writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) -- writer.add_scalar("train_eval/stddev_train_return", stddev_train_returns, global_step) -- writer.add_scalar("train_eval/full_mean_episode_return", full_mean_train_returns, global_step) -- writer.add_scalar("train_eval/full_normalized_mean_train_return", full_normalized_mean_train_returns, global_step) -- writer.add_scalar("train_eval/full_stddev_train_return", full_stddev_train_returns, global_step) -+ writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) - - writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) - diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/events.out.tfevents.1713841239.f411843fc70b.1794.0 b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/events.out.tfevents.1713841239.f411843fc70b.1794.0 deleted file mode 120000 index f3066c3f..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/events.out.tfevents.1713841239.f411843fc70b.1794.0 +++ /dev/null @@ -1 +0,0 @@ -/data/averma/MARL/Syllabus/syllabus/examples/training_scripts/runs/bigfish__cleanrl_procgen_plr__1__1713841233/events.out.tfevents.1713841239.f411843fc70b.1794.0 \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/requirements.txt b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/requirements.txt deleted file mode 100644 index 7f33d240..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/requirements.txt +++ /dev/null @@ -1,146 +0,0 @@ -Babel==2.14.0 -Farama-Notifications==0.0.4 -GitPython==3.1.43 -Jinja2==3.1.3 -Markdown==3.6 -MarkupSafe==2.1.5 -PyWavelets==1.4.1 -PyYAML==6.0.1 -Pygments==2.17.2 -Shimmy==1.3.0 -Sphinx==7.1.2 -Syllabus-RL==0.5 -Werkzeug==3.0.2 -absl-py==2.1.0 -aiosignal==1.3.1 -alabaster==0.7.13 -appdirs==1.4.4 -attrs==23.2.0 -beautifulsoup4==4.12.3 -cachetools==5.3.3 -certifi==2024.2.2 -cffi==1.16.0 -charset-normalizer==3.3.2 -click==8.1.7 -cloudpickle==3.0.0 -cmake==3.29.2 -colorama==0.4.6 -contourpy==1.1.1 -cycler==0.12.1 -dm-tree==0.1.8 -docker-pycreds==0.4.0 -docutils==0.20.1 -exceptiongroup==1.2.0 -filelock==3.13.4 -fonttools==4.51.0 -frozenlist==1.4.1 -fsspec==2024.3.1 -furo==2024.1.29 -future==1.0.0 -gitdb==4.0.11 -glcontext==2.5.0 -glfw==1.12.0 -google-auth-oauthlib==1.0.0 -google-auth==2.29.0 -grpcio==1.62.1 -gym-notices==0.0.8 -gym==0.23.0 -gymnasium==0.28.1 -idna==3.7 -imageio-ffmpeg==0.3.0 -imageio==2.34.0 -imagesize==1.4.1 -importlib_metadata==7.1.0 -importlib_resources==6.4.0 -iniconfig==2.0.0 -jax-jumpy==1.0.0 -jsonschema-specifications==2023.12.1 -jsonschema==4.21.1 -kiwisolver==1.4.5 -lazy_loader==0.4 -lz4==4.3.3 -markdown-it-py==3.0.0 -matplotlib==3.7.5 -mdurl==0.1.2 -moderngl==5.10.0 -mpmath==1.3.0 -msgpack==1.0.8 -networkx==3.1 -numpy==1.24.4 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-nccl-cu12==2.19.3 -nvidia-nvjitlink-cu12==12.4.127 -nvidia-nvtx-cu12==12.1.105 -oauthlib==3.2.2 -packaging==24.0 -pandas==2.0.3 -pillow==10.3.0 -pip==23.3.1 -pkgutil_resolve_name==1.3.10 -pluggy==1.4.0 -procgen==0.9.5+ed4be81 -protobuf==4.25.3 -psutil==5.9.8 -psutil==5.9.8 -py-cpuinfo==9.0.0 -pyarrow==15.0.2 -pyasn1==0.6.0 -pyasn1_modules==0.4.0 -pycparser==2.22 -pyenchant==3.2.2 -pyglet==1.4.11 -pyparsing==3.1.2 -pytest-benchmark==4.0.0 -pytest==8.1.1 -python-dateutil==2.9.0.post0 -pytz==2024.1 -ray==2.10.0 -referencing==0.34.0 -requests-oauthlib==2.0.0 -requests==2.31.0 -rich==13.7.1 -rpds-py==0.18.0 -rsa==4.9 -scikit-image==0.21.0 -scipy==1.10.0 -sentry-sdk==1.45.0 -setproctitle==1.2.2 -setproctitle==1.3.3 -setuptools==68.2.2 -shellingham==1.5.4 -six==1.16.0 -smmap==5.0.1 -snowballstemmer==2.2.0 -soupsieve==2.5 -sphinx-basic-ng==1.0.0b2 -sphinx-tabs==3.4.5 -sphinxcontrib-applehelp==1.0.4 -sphinxcontrib-devhelp==1.0.2 -sphinxcontrib-htmlhelp==2.0.1 -sphinxcontrib-jsmath==1.0.1 -sphinxcontrib-qthelp==1.0.3 -sphinxcontrib-serializinghtml==1.1.5 -sphinxcontrib-spelling==8.0.0 -sympy==1.12 -tensorboard-data-server==0.7.2 -tensorboard==2.14.0 -tensorboardX==2.6.2.2 -tifffile==2023.7.10 -tomli==2.0.1 -torch==2.2.2 -triton==2.2.0 -typer==0.12.3 -typing_extensions==4.11.0 -tzdata==2024.1 -urllib3==2.2.1 -wandb==0.16.6 -wheel==0.41.2 -zipp==3.18.1 \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch deleted file mode 100644 index 7be08932..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch +++ /dev/null @@ -1,1408 +0,0 @@ -diff --git a/setup.py b/setup.py -index 31e09f2..22a94e8 100644 ---- a/setup.py -+++ b/setup.py -@@ -2,7 +2,7 @@ from setuptools import find_packages, setup - - - extras = dict() --extras['test'] = ['cmake', 'ninja', 'nle>=0.9.0', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] -+extras['test'] = ['cmake', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] - extras['docs'] = ['sphinx-tabs', 'sphinxcontrib-spelling', 'furo'] - extras['all'] = extras['test'] + extras['docs'] - -diff --git a/syllabus/core/curriculum_base.py b/syllabus/core/curriculum_base.py -index 03284da..4ca9aeb 100644 ---- a/syllabus/core/curriculum_base.py -+++ b/syllabus/core/curriculum_base.py -@@ -76,7 +76,7 @@ class Curriculum: - """ - self.completed_tasks += 1 - -- def update_on_step(self, obs: typing.Any, rew: float, term: bool, trunc: bool, info: dict, env_id: int = None) -> None: -+ def update_on_step(self, task: typing.Any, obs: typing.Any, rew: float, term: bool, trunc: bool, info: dict, env_id: int = None) -> None: - """ Update the curriculum with the current step results from the environment. - - :param obs: Observation from teh environment -@@ -88,7 +88,7 @@ class Curriculum: - """ - raise NotImplementedError("This curriculum does not require step updates. Set update_on_step for the environment sync wrapper to False to improve performance and prevent this error.") - -- def update_on_step_batch(self, step_results: List[typing.Tuple[int, int, int, int, int]], env_id: int = None) -> None: -+ def update_on_step_batch(self, step_results: List[typing.Tuple[Any, Any, int, int, int, int]], env_id: int = None) -> None: - """Update the curriculum with a batch of step results from the environment. - - This method can be overridden to provide a more efficient implementation. It is used -@@ -96,9 +96,9 @@ class Curriculum: - - :param step_results: List of step results - """ -- obs, rews, terms, truncs, infos = tuple(step_results) -+ tasks, obs, rews, terms, truncs, infos = tuple(step_results) - for i in range(len(obs)): -- self.update_on_step(obs[i], rews[i], terms[i], truncs[i], infos[i], env_id=env_id) -+ self.update_on_step(tasks[i], obs[i], rews[i], terms[i], truncs[i], infos[i], env_id=env_id) - - def update_on_episode(self, episode_return: float, episode_length: int, episode_task: Any, env_id: int = None) -> None: - """Update the curriculum with episode results from the environment. -diff --git a/syllabus/core/curriculum_sync_wrapper.py b/syllabus/core/curriculum_sync_wrapper.py -index 6e069d8..f986643 100644 ---- a/syllabus/core/curriculum_sync_wrapper.py -+++ b/syllabus/core/curriculum_sync_wrapper.py -@@ -29,6 +29,14 @@ class CurriculumWrapper: - def tasks(self): - return self.task_space.tasks - -+ @property -+ def requires_step_updates(self): -+ return self.curriculum.requires_step_updates -+ -+ @property -+ def requires_episode_updates(self): -+ return self.curriculum.requires_episode_updates -+ - def get_tasks(self, task_space=None): - return self.task_space.get_tasks(gym_space=task_space) - -diff --git a/syllabus/core/environment_sync_wrapper.py b/syllabus/core/environment_sync_wrapper.py -index c995aa1..6edee7c 100644 ---- a/syllabus/core/environment_sync_wrapper.py -+++ b/syllabus/core/environment_sync_wrapper.py -@@ -19,7 +19,8 @@ class MultiProcessingSyncWrapper(gym.Wrapper): - def __init__(self, - env, - components: MultiProcessingComponents, -- update_on_step: bool = True, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? -+ update_on_step: bool = False, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? -+ update_on_progress: bool = False, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? - batch_size: int = 100, - buffer_size: int = 2, # Having an extra task in the buffer minimizes wait time at reset - task_space: TaskSpace = None, -@@ -34,6 +35,7 @@ class MultiProcessingSyncWrapper(gym.Wrapper): - self.update_queue = components.update_queue - self.task_space = task_space - self.update_on_step = update_on_step -+ self.update_on_progress = update_on_progress - self.batch_size = batch_size - self.global_task_completion = global_task_completion - self.task_progress = 0.0 -@@ -125,17 +127,21 @@ class MultiProcessingSyncWrapper(gym.Wrapper): - def _package_step_updates(self): - step_batch = { - "update_type": "step_batch", -- "metrics": ([self._obs[:self._batch_step], self._rews[:self._batch_step], self._terms[:self._batch_step], self._truncs[:self._batch_step], self._infos[:self._batch_step]],), -+ "metrics": ([self._tasks[:self._batch_step], self._obs[:self._batch_step], self._rews[:self._batch_step], self._terms[:self._batch_step], self._truncs[:self._batch_step], self._infos[:self._batch_step]],), - "env_id": self.instance_id, - "request_sample": False - } -- task_batch = { -- "update_type": "task_progress_batch", -- "metrics": (self._tasks[:self._batch_step], self._task_progresses[:self._batch_step],), -- "env_id": self.instance_id, -- "request_sample": False -- } -- return [step_batch, task_batch] -+ update = [step_batch] -+ -+ if self.update_on_progress: -+ task_batch = { -+ "update_type": "task_progress_batch", -+ "metrics": (self._tasks[:self._batch_step], self._task_progresses[:self._batch_step],), -+ "env_id": self.instance_id, -+ "request_sample": False -+ } -+ update.append(task_batch) -+ return update - - def add_task(self, task): - update = { -diff --git a/syllabus/curricula/annealing_box.py b/syllabus/curricula/annealing_box.py -index 6c565ec..101981c 100644 ---- a/syllabus/curricula/annealing_box.py -+++ b/syllabus/curricula/annealing_box.py -@@ -49,8 +49,8 @@ class AnnealingBoxCurriculum(Curriculum): - """ - # Linear annealing from start_values to end_values - annealed_values = ( -- self.start_values + (self.end_values - self.start_values) * -- np.minimum(self.current_step, self.total_steps) / self.total_steps -+ self.start_values + (self.end_values - self.start_values) * -+ np.minimum(self.current_step, self.total_steps) / self.total_steps - ) - -- return [annealed_values.copy() for _ in range(k)] -\ No newline at end of file -+ return [annealed_values.copy() for _ in range(k)] -diff --git a/syllabus/curricula/noop.py b/syllabus/curricula/noop.py -index f6bd5dc..fb5d8ae 100644 ---- a/syllabus/curricula/noop.py -+++ b/syllabus/curricula/noop.py -@@ -28,7 +28,7 @@ class NoopCurriculum(Curriculum): - """ - pass - -- def update_on_step(self, obs, rew, term, trunc, info, env_id: int = None) -> None: -+ def update_on_step(self, task, obs, rew, term, trunc, info, env_id: int = None) -> None: - """ - Update the curriculum with the current step results from the environment. - """ -diff --git a/syllabus/curricula/plr/plr_wrapper.py b/syllabus/curricula/plr/plr_wrapper.py -index 9515df4..9c808dd 100644 ---- a/syllabus/curricula/plr/plr_wrapper.py -+++ b/syllabus/curricula/plr/plr_wrapper.py -@@ -23,16 +23,15 @@ class RolloutStorage(object): - get_value=None, - ): - self.num_steps = num_steps -- self.buffer_steps = num_steps * 2 # Hack to prevent overflow from lagging updates. -+ self.buffer_steps = num_steps * 4 # Hack to prevent overflow from lagging updates. - self.num_processes = num_processes - self._requires_value_buffers = requires_value_buffers - self._get_value = get_value - self.tasks = torch.zeros(self.buffer_steps, num_processes, 1, dtype=torch.int) - self.masks = torch.ones(self.buffer_steps + 1, num_processes, 1) - self.obs = [[[0] for _ in range(self.num_processes)]] * self.buffer_steps -- self._fill = torch.zeros(self.buffer_steps, num_processes, 1) - self.env_steps = [0] * num_processes -- self.should_update = False -+ self.ready_buffers = set() - - if requires_value_buffers: - self.returns = torch.zeros(self.buffer_steps + 1, num_processes, 1) -@@ -46,12 +45,10 @@ class RolloutStorage(object): - self.action_log_dist = torch.zeros(self.buffer_steps, num_processes, action_space.n) - - self.num_steps = num_steps -- self.step = 0 - - def to(self, device): - self.masks = self.masks.to(device) - self.tasks = self.tasks.to(device) -- self._fill = self._fill.to(device) - if self._requires_value_buffers: - self.rewards = self.rewards.to(device) - self.value_preds = self.value_preds.to(device) -@@ -59,108 +56,79 @@ class RolloutStorage(object): - else: - self.action_log_dist = self.action_log_dist.to(device) - -- def insert(self, masks, action_log_dist=None, value_preds=None, rewards=None, tasks=None): -- if self._requires_value_buffers: -- assert (value_preds is not None and rewards is not None), "Selected strategy requires value_preds and rewards" -- if len(rewards.shape) == 3: -- rewards = rewards.squeeze(2) -- self.value_preds[self.step].copy_(torch.as_tensor(value_preds)) -- self.rewards[self.step].copy_(torch.as_tensor(rewards)[:, None]) -- self.masks[self.step + 1].copy_(torch.as_tensor(masks)[:, None]) -- else: -- self.action_log_dist[self.step].copy_(action_log_dist) -- if tasks is not None: -- assert isinstance(tasks[0], int), "Provided task must be an integer" -- self.tasks[self.step].copy_(torch.as_tensor(tasks)[:, None]) -- self.step = (self.step + 1) % self.num_steps -- - def insert_at_index(self, env_index, mask=None, action_log_dist=None, obs=None, reward=None, task=None, steps=1): -- if env_index >= self.num_processes: -- warnings.warn(f"Env index {env_index} is greater than the number of processes {self.num_processes}. Using index {env_index % self.num_processes} instead.") -- env_index = env_index % self.num_processes -- - step = self.env_steps[env_index] - end_step = step + steps -- # Update buffer fill traacker, and check for common usage errors. -- try: -- if end_step > len(self._fill): -- raise IndexError -- self._fill[step:end_step, env_index] = 1 -- except IndexError as e: -- if any(self._fill[:][env_index] == 0): -- raise UsageError(f"Step {step} + {steps} = {end_step} is out of range for env index {env_index}. Your value for PLR's num_processes may be too high.") from e -- else: -- raise UsageError(f"Step {step} + {steps} = {end_step} is out of range for env index {env_index}. Your value for PLR's num_processes may be too low.") from e - - if mask is not None: - self.masks[step + 1:end_step + 1, env_index].copy_(torch.as_tensor(mask[:, None])) -+ - if obs is not None: - for s in range(step, end_step): - self.obs[s][env_index] = obs[s - step] -+ - if reward is not None: - self.rewards[step:end_step, env_index].copy_(torch.as_tensor(reward[:, None])) -+ - if action_log_dist is not None: - self.action_log_dist[step:end_step, env_index].copy_(torch.as_tensor(action_log_dist[:, None])) -+ - if task is not None: - try: -- task = int(task) -+ int(task[0]) - except TypeError: -- assert isinstance(task, int), f"Provided task must be an integer, got {task} with type {type(task)} instead." -- self.tasks[step:end_step, env_index].copy_(torch.as_tensor(task)) -- else: -- self.env_steps[env_index] += steps -- # Hack for now, we call insert_at_index twice -- while all(self._fill[self.step] == 1): -- self.step = (self.step + 1) % self.buffer_steps -- # Check if we have enough steps to compute a task sampler update -- if self.step == self.num_steps + 1: -- self.should_update = True -- -- def _get_values(self): -+ assert isinstance(task, int), f"Provided task must be an integer, got {task[0]} with type {type(task[0])} instead." -+ self.tasks[step:end_step, env_index].copy_(torch.as_tensor(np.array(task)[:, None])) -+ -+ self.env_steps[env_index] += steps -+ if env_index not in self.ready_buffers and self.env_steps[env_index] >= self.num_steps: -+ self.ready_buffers.add(env_index) -+ -+ def _get_values(self, env_index): - if self._get_value is None: - raise UsageError("Selected strategy requires value predictions. Please provide get_value function.") -- for step in range(self.num_steps): -- values = self._get_value(self.obs[step]) -+ for step in range(0, self.num_steps, self.num_processes): -+ obs = self.obs[step: step + self.num_processes][env_index] -+ values = self._get_value(obs) -+ -+ # Reshape values if necessary - if len(values.shape) == 3: - warnings.warn(f"Value function returned a 3D tensor of shape {values.shape}. Attempting to squeeze last dimension.") - values = torch.squeeze(values, -1) - if len(values.shape) == 1: - warnings.warn(f"Value function returned a 1D tensor of shape {values.shape}. Attempting to unsqueeze last dimension.") - values = torch.unsqueeze(values, -1) -- self.value_preds[step].copy_(values) - -- def after_update(self): -+ self.value_preds[step: step + self.num_processes, env_index].copy_(values) -+ -+ def after_update(self, env_index): - # After consuming the first num_steps of data, remove them and shift the remaining data in the buffer -- self.tasks[0: self.num_steps].copy_(self.tasks[self.num_steps: self.buffer_steps]) -- self.masks[0: self.num_steps].copy_(self.masks[self.num_steps: self.buffer_steps]) -- self.obs[0: self.num_steps][:] = self.obs[self.num_steps: self.buffer_steps][:] -+ self.tasks = self.tasks.roll(-self.num_steps, 0) -+ self.masks = self.masks.roll(-self.num_steps, 0) -+ self.obs[0:][env_index] = self.obs[self.num_steps: self.buffer_steps][env_index] - - if self._requires_value_buffers: -- self.returns[0: self.num_steps].copy_(self.returns[self.num_steps: self.buffer_steps]) -- self.rewards[0: self.num_steps].copy_(self.rewards[self.num_steps: self.buffer_steps]) -- self.value_preds[0: self.num_steps].copy_(self.value_preds[self.num_steps: self.buffer_steps]) -+ self.returns = self.returns.roll(-self.num_steps, 0) -+ self.rewards = self.rewards.roll(-self.num_steps, 0) -+ self.value_preds = self.value_preds.roll(-self.num_steps, 0) - else: -- self.action_log_dist[0: self.num_steps].copy_(self.action_log_dist[self.num_steps: self.buffer_steps]) -+ self.action_log_dist = self.action_log_dist.roll(-self.num_steps, 0) - -- self._fill[0: self.num_steps].copy_(self._fill[self.num_steps: self.buffer_steps]) -- self._fill[self.num_steps: self.buffer_steps].copy_(0) -+ self.env_steps[env_index] -= self.num_steps -+ self.ready_buffers.remove(env_index) - -- self.env_steps = [steps - self.num_steps for steps in self.env_steps] -- self.should_update = False -- self.step = self.step - self.num_steps -- -- def compute_returns(self, gamma, gae_lambda): -+ def compute_returns(self, gamma, gae_lambda, env_index): - assert self._requires_value_buffers, "Selected strategy does not use compute_rewards." -- self._get_values() -+ self._get_values(env_index) - gae = 0 - for step in reversed(range(self.rewards.size(0), self.num_steps)): - delta = ( -- self.rewards[step] -- + gamma * self.value_preds[step + 1] * self.masks[step + 1] -- - self.value_preds[step] -+ self.rewards[step, env_index] -+ + gamma * self.value_preds[step + 1, env_index] * self.masks[step + 1, env_index] -+ - self.value_preds[step, env_index] - ) -- gae = delta + gamma * gae_lambda * self.masks[step + 1] * gae -- self.returns[step] = gae + self.value_preds[step] -+ gae = delta + gamma * gae_lambda * self.masks[step + 1, env_index] * gae -+ self.returns[step, env_index] = gae + self.value_preds[step, env_index] - - - def null(x): -@@ -252,11 +220,15 @@ class PrioritizedLevelReplay(Curriculum): - else: - return [self._task_sampler.sample() for _ in range(k)] - -- def update_on_step(self, obs, rew, term, trunc, info, env_id: int = None) -> None: -+ def update_on_step(self, task, obs, rew, term, trunc, info, env_id: int = None) -> None: - """ - Update the curriculum with the current step results from the environment. - """ - assert env_id is not None, "env_id must be provided for PLR updates." -+ if env_id >= self._num_processes: -+ warnings.warn(f"Env index {env_id} is greater than the number of processes {self._num_processes}. Using index {env_id % self._num_processes} instead.") -+ env_id = env_id % self._num_processes -+ - # Update rollouts - self._rollouts.insert_at_index( - env_id, -@@ -266,14 +238,22 @@ class PrioritizedLevelReplay(Curriculum): - obs=np.array([obs]), - ) - -+ # Update task sampler -+ if env_id in self._rollouts.ready_buffers: -+ self._update_sampler(env_id) -+ - def update_on_step_batch( -- self, step_results: List[Tuple[Any, int, bool, bool, Dict]], env_id: int = None -+ self, step_results: List[Tuple[int, Any, int, bool, bool, Dict]], env_id: int = None - ) -> None: - """ - Update the curriculum with a batch of step results from the environment. - """ - assert env_id is not None, "env_id must be provided for PLR updates." -- obs, rews, terms, truncs, infos = step_results -+ if env_id >= self._num_processes: -+ warnings.warn(f"Env index {env_id} is greater than the number of processes {self._num_processes}. Using index {env_id % self._num_processes} instead.") -+ env_id = env_id % self._num_processes -+ -+ tasks, obs, rews, terms, truncs, infos = step_results - self._rollouts.insert_at_index( - env_id, - mask=np.logical_not(np.logical_or(terms, truncs)), -@@ -281,25 +261,19 @@ class PrioritizedLevelReplay(Curriculum): - reward=rews, - obs=obs, - steps=len(rews), -+ task=tasks, - ) - -- def update_task_progress(self, task: Any, success_prob: float, env_id: int = None) -> None: -- """ -- Update the curriculum with a task and its success probability upon -- success or failure. -- """ -- assert env_id is not None, "env_id must be provided for PLR updates." -- self._rollouts.insert_at_index( -- env_id, -- task=task, -- ) - # Update task sampler -- if self._rollouts.should_update: -- if self._task_sampler.requires_value_buffers: -- self._rollouts.compute_returns(self._gamma, self._gae_lambda) -- self._task_sampler.update_with_rollouts(self._rollouts) -- self._rollouts.after_update() -- self._task_sampler.after_update() -+ if env_id in self._rollouts.ready_buffers: -+ self._update_sampler(env_id) -+ -+ def _update_sampler(self, env_id): -+ if self._task_sampler.requires_value_buffers: -+ self._rollouts.compute_returns(self._gamma, self._gae_lambda, env_id) -+ self._task_sampler.update_with_rollouts(self._rollouts, env_id) -+ self._rollouts.after_update(env_id) -+ self._task_sampler.after_update() - - def _enumerate_tasks(self, space): - assert isinstance(space, Discrete) or isinstance(space, MultiDiscrete), f"Unsupported task space {space}: Expected Discrete or MultiDiscrete" -@@ -312,10 +286,10 @@ class PrioritizedLevelReplay(Curriculum): - """ - Log the task distribution to the provided tensorboard writer. - """ -- super().log_metrics(writer, step) -+ # super().log_metrics(writer, step) - metrics = self._task_sampler.metrics() - writer.add_scalar("curriculum/proportion_seen", metrics["proportion_seen"], step) - writer.add_scalar("curriculum/score", metrics["score"], step) -- for task in list(self.task_space.tasks)[:10]: -- writer.add_scalar(f"curriculum/task_{task - 1}_score", metrics["task_scores"][task - 1], step) -- writer.add_scalar(f"curriculum/task_{task - 1}_staleness", metrics["task_staleness"][task - 1], step) -+ # for task in list(self.task_space.tasks)[:10]: -+ # writer.add_scalar(f"curriculum/task_{task - 1}_score", metrics["task_scores"][task - 1], step) -+ # writer.add_scalar(f"curriculum/task_{task - 1}_staleness", metrics["task_staleness"][task - 1], step) -diff --git a/syllabus/curricula/plr/task_sampler.py b/syllabus/curricula/plr/task_sampler.py -index 15ad485..c1e97a1 100644 ---- a/syllabus/curricula/plr/task_sampler.py -+++ b/syllabus/curricula/plr/task_sampler.py -@@ -73,7 +73,7 @@ class TaskSampler: - 'Must provide action space to PLR if using "policy_entropy", "least_confidence", or "min_margin" strategies' - ) - -- def update_with_rollouts(self, rollouts): -+ def update_with_rollouts(self, rollouts, actor_id=None): - if self.strategy == "random": - return - -@@ -93,7 +93,7 @@ class TaskSampler: - else: - raise ValueError(f"Unsupported strategy, {self.strategy}") - -- self._update_with_rollouts(rollouts, score_function) -+ self._update_with_rollouts(rollouts, score_function, actor_index=actor_id) - - def update_task_score(self, actor_index, task_idx, score, num_steps): - score = self._partial_update_task_score(actor_index, task_idx, score, num_steps, done=True) -@@ -165,14 +165,15 @@ class TaskSampler: - def requires_value_buffers(self): - return self.strategy in ["gae", "value_l1", "one_step_td_error"] - -- def _update_with_rollouts(self, rollouts, score_function): -+ def _update_with_rollouts(self, rollouts, score_function, actor_index=None): - tasks = rollouts.tasks - if not self.requires_value_buffers: - policy_logits = rollouts.action_log_dist - done = ~(rollouts.masks > 0) - total_steps, num_actors = rollouts.tasks.shape[:2] - -- for actor_index in range(num_actors): -+ actors = [actor_index] if actor_index is not None else range(num_actors) -+ for actor_index in actors: - done_steps = done[:, actor_index].nonzero()[:total_steps, 0] - start_t = 0 - -diff --git a/syllabus/curricula/sequential.py b/syllabus/curricula/sequential.py -index baa1263..ec3b8b0 100644 ---- a/syllabus/curricula/sequential.py -+++ b/syllabus/curricula/sequential.py -@@ -177,9 +177,9 @@ class SequentialCurriculum(Curriculum): - if self.current_curriculum.requires_episode_updates: - self.current_curriculum.update_on_episode(episode_return, episode_len, episode_task, env_id) - -- def update_on_step(self, obs, rew, term, trunc, info, env_id=None): -+ def update_on_step(self, task, obs, rew, term, trunc, info, env_id=None): - if self.current_curriculum.requires_step_updates: -- self.current_curriculum.update_on_step(obs, rew, term, trunc, info, env_id) -+ self.current_curriculum.update_on_step(task, obs, rew, term, trunc, info, env_id) - - def update_on_step_batch(self, step_results, env_id=None): - if self.current_curriculum.requires_step_updates: -diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py b/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py -index a6d469e..b848d69 100644 ---- a/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py -+++ b/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py -@@ -14,6 +14,7 @@ import gym as openai_gym - import gymnasium as gym - import numpy as np - import procgen # noqa: F401 -+from procgen import ProcgenEnv - import torch - import torch.nn as nn - import torch.optim as optim -@@ -21,10 +22,10 @@ from shimmy.openai_gym_compatibility import GymV21CompatibilityV0 - from torch.utils.tensorboard import SummaryWriter - - from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum --from syllabus.curricula import DomainRandomization, LearningProgressCurriculum, CentralizedPrioritizedLevelReplay -+from syllabus.curricula import CentralizedPrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum - from syllabus.examples.models import ProcgenAgent - from syllabus.examples.task_wrappers import ProcgenTaskWrapper --from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize -+from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs - - - def parse_args(): -@@ -46,6 +47,8 @@ def parse_args(): - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="weather to capture videos of the agent performances (check out `videos` folder)") -+ parser.add_argument("--logging-dir", type=str, default=".", -+ help="the base directory for logging and wandb storage.") - - # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="starpilot", -@@ -124,15 +127,15 @@ PROCGEN_RETURN_BOUNDS = { - } - - --def make_env(env_id, seed, curriculum_components=None, start_level=0, num_levels=1): -+def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): - def thunk(): - env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) - env = GymV21CompatibilityV0(env=env) -- env = ProcgenTaskWrapper(env, env_id, seed=seed) -- if curriculum_components is not None: -+ if curriculum is not None: -+ env = ProcgenTaskWrapper(env, env_id, seed=seed) - env = MultiProcessingSyncWrapper( - env, -- curriculum_components, -+ curriculum.get_components(), - update_on_step=False, - task_space=env.task_space, - ) -@@ -147,36 +150,38 @@ def wrap_vecenv(vecenv): - return vecenv - - --def level_replay_evaluate( -+def full_level_replay_evaluate( - env_name, - policy, - num_episodes, - device, -- num_levels=0 -+ num_levels=1 # Not used - ): - policy.eval() -- eval_envs = gym.vector.SyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, task_queue, update_queue, num_levels=num_levels) -- for i in range(1) -- ] -+ -+ eval_envs = ProcgenEnv( -+ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=1, start_level=0, distribution_mode="easy", paint_vel_info=False - ) -+ eval_envs = VecExtractDictObs(eval_envs, "rgb") - eval_envs = wrap_vecenv(eval_envs) - -- eval_episode_rewards = [] -+ # Seed environments -+ seeds = [int.from_bytes(os.urandom(3), byteorder="little") for _ in range(num_episodes)] -+ for i, seed in enumerate(seeds): -+ eval_envs.seed(seed, i) -+ - eval_obs, _ = eval_envs.reset() -+ eval_episode_rewards = [-1] * num_episodes - -- while len(eval_episode_rewards) < num_episodes: -+ while -1 in eval_episode_rewards: - with torch.no_grad(): - eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) - -- eval_obs, _, truncs, terms, infos = eval_envs.step(np.array([eval_action.cpu().numpy()])) -- -- for info in infos: -- if 'episode' in info.keys(): -- eval_episode_rewards.append(info['episode']['r']) -+ eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) -+ for i, info in enumerate(infos): -+ if 'episode' in info.keys() and eval_episode_rewards[i] == -1: -+ eval_episode_rewards[i] = info['episode']['r'] - -- eval_envs.close() - mean_returns = np.mean(eval_episode_rewards) - stddev_returns = np.std(eval_episode_rewards) - env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] -@@ -185,8 +190,7 @@ def level_replay_evaluate( - return mean_returns, stddev_returns, normalized_mean_returns - - --def fast_level_replay_evaluate( -- eval_envs, -+def level_replay_evaluate( - env_name, - policy, - num_episodes, -@@ -194,9 +198,13 @@ def fast_level_replay_evaluate( - num_levels=0 - ): - policy.eval() -- possible_seeds = np.arange(0, num_levels + 1) -- eval_obs, _ = eval_envs.reset(seed=list(np.random.choice(possible_seeds, size=num_episodes))) - -+ eval_envs = ProcgenEnv( -+ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False -+ ) -+ eval_envs = VecExtractDictObs(eval_envs, "rgb") -+ eval_envs = wrap_vecenv(eval_envs) -+ eval_obs, _ = eval_envs.reset() - eval_episode_rewards = [-1] * num_episodes - - while -1 in eval_episode_rewards: -@@ -231,10 +239,11 @@ if __name__ == "__main__": - name=run_name, - monitor_gym=True, - save_code=True, -- # dir="/fs/nexus-scratch/rsulli/" -+ dir=args.logging_dir - ) -- wandb.run.log_code("./syllabus/examples") -- writer = SummaryWriter(f"./runs/{run_name}") -+ # wandb.run.log_code("./syllabus/examples") -+ -+ writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) - writer.add_text( - "hyperparameters", - "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), -@@ -250,7 +259,7 @@ if __name__ == "__main__": - print("Device:", device) - - # Curriculum setup -- task_queue = update_queue = None -+ curriculum = None - if args.curriculum: - sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") - sample_env = GymV21CompatibilityV0(env=sample_env) -@@ -273,6 +282,16 @@ if __name__ == "__main__": - elif args.curriculum_method == "lp": - print("Using learning progress.") - curriculum = LearningProgressCurriculum(sample_env.task_space) -+ elif args.curriculum_method == "sq": -+ print("Using sequential curriculum.") -+ curricula = [] -+ stopping = [] -+ for i in range(199): -+ curricula.append(i + 1) -+ stopping.append("steps>=50000") -+ curricula.append(list(range(i + 1))) -+ stopping.append("steps>=50000") -+ curriculum = SequentialCurriculum(curricula, stopping[:-1], sample_env.task_space) - else: - raise ValueError(f"Unknown curriculum method {args.curriculum_method}") - curriculum = make_multiprocessing_curriculum(curriculum) -@@ -285,7 +304,7 @@ if __name__ == "__main__": - make_env( - args.env_id, - args.seed + i, -- curriculum_components=curriculum.get_components() if args.curriculum else None, -+ curriculum=curriculum if args.curriculum else None, - num_levels=1 if args.curriculum else 0 - ) - for i in range(args.num_envs) -@@ -293,22 +312,6 @@ if __name__ == "__main__": - ) - envs = wrap_vecenv(envs) - -- test_eval_envs = gym.vector.AsyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, num_levels=0) -- for i in range(args.num_eval_episodes) -- ] -- ) -- test_eval_envs = wrap_vecenv(test_eval_envs) -- -- train_eval_envs = gym.vector.AsyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, num_levels=200) -- for i in range(args.num_eval_episodes) -- ] -- ) -- train_eval_envs = wrap_vecenv(train_eval_envs) -- - assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" - print("Creating agent") - agent = ProcgenAgent( -@@ -369,6 +372,8 @@ if __name__ == "__main__": - print(f"global_step={global_step}, episodic_return={item['episode']['r']}") - writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) -+ if curriculum is not None: -+ curriculum.log_metrics(writer, global_step) - break - - # Syllabus curriculum update -@@ -388,8 +393,6 @@ if __name__ == "__main__": - }, - } - curriculum.update(update) -- #if args.curriculum: -- # curriculum.log_metrics(writer, global_step) - - # bootstrap value if not done - with torch.no_grad(): -@@ -487,8 +490,18 @@ if __name__ == "__main__": - explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y - - # Evaluate agent -- mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = fast_level_replay_evaluate(test_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=0) -- mean_train_returns, stddev_train_returns, normalized_mean_train_returns = fast_level_replay_evaluate(train_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=200) -+ mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 -+ ) -+ full_mean_eval_returns, full_stddev_eval_returns, full_normalized_mean_eval_returns = full_level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 -+ ) -+ mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 -+ ) -+ full_mean_train_returns, full_stddev_train_returns, full_normalized_mean_train_returns = full_level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 -+ ) - - # TRY NOT TO MODIFY: record rewards for plotting purposes - writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) -@@ -502,12 +515,21 @@ if __name__ == "__main__": - writer.add_scalar("losses/explained_variance", explained_var, global_step) - print("SPS:", int(global_step / (time.time() - start_time))) - writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) -+ - writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) - writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) -- writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/stddev_eval_return", stddev_eval_returns, global_step) -+ writer.add_scalar("test_eval/full_mean_episode_return", full_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/full_normalized_mean_eval_return", full_normalized_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/full_stddev_eval_return", full_stddev_eval_returns, global_step) -+ - writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) - writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) -- writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) -+ writer.add_scalar("train_eval/stddev_train_return", stddev_train_returns, global_step) -+ writer.add_scalar("train_eval/full_mean_episode_return", full_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/full_normalized_mean_train_return", full_normalized_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/full_stddev_train_return", full_stddev_train_returns, global_step) -+ - writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) - - envs.close() -diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -index e13c22e..abf656b 100644 ---- a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -+++ b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -@@ -14,6 +14,7 @@ import gym as openai_gym - import gymnasium as gym - import numpy as np - import procgen # noqa: F401 -+from procgen import ProcgenEnv - import torch - import torch.nn as nn - import torch.optim as optim -@@ -24,7 +25,7 @@ from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curri - from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum - from syllabus.examples.models import ProcgenAgent - from syllabus.examples.task_wrappers import ProcgenTaskWrapper --from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize -+from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs - - - def parse_args(): -@@ -126,18 +127,17 @@ PROCGEN_RETURN_BOUNDS = { - } - - --def make_env(env_id, seed, curriculum_components=None, start_level=0, num_levels=1): -+def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): - def thunk(): - env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) - env = GymV21CompatibilityV0(env=env) -- env = ProcgenTaskWrapper(env, env_id, seed=seed) -- if curriculum_components is not None: -+ if curriculum is not None: -+ env = ProcgenTaskWrapper(env, env_id, seed=seed) - env = MultiProcessingSyncWrapper( - env, -- curriculum_components, -+ curriculum.get_components(), - update_on_step=False, - task_space=env.task_space, -- buffer_size=4, - ) - return env - return thunk -@@ -150,7 +150,7 @@ def wrap_vecenv(vecenv): - return vecenv - - --def level_replay_evaluate( -+def slow_level_replay_evaluate( - env_name, - policy, - num_episodes, -@@ -158,28 +158,24 @@ def level_replay_evaluate( - num_levels=0 - ): - policy.eval() -- eval_envs = gym.vector.SyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, task_queue, update_queue, num_levels=num_levels) -- for i in range(1) -- ] -+ -+ eval_envs = ProcgenEnv( -+ num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False - ) -+ eval_envs = VecExtractDictObs(eval_envs, "rgb") - eval_envs = wrap_vecenv(eval_envs) -- -- eval_episode_rewards = [] - eval_obs, _ = eval_envs.reset() -+ eval_episode_rewards = [] - - while len(eval_episode_rewards) < num_episodes: - with torch.no_grad(): - eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) - -- eval_obs, _, truncs, terms, infos = eval_envs.step(np.array([eval_action.cpu().numpy()])) -- -- for info in infos: -+ eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) -+ for i, info in enumerate(infos): - if 'episode' in info.keys(): - eval_episode_rewards.append(info['episode']['r']) - -- eval_envs.close() - mean_returns = np.mean(eval_episode_rewards) - stddev_returns = np.std(eval_episode_rewards) - env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] -@@ -188,8 +184,7 @@ def level_replay_evaluate( - return mean_returns, stddev_returns, normalized_mean_returns - - --def fast_level_replay_evaluate( -- eval_envs, -+def level_replay_evaluate( - env_name, - policy, - num_episodes, -@@ -198,15 +193,12 @@ def fast_level_replay_evaluate( - ): - policy.eval() - -- # Choose evaluation seeds -- if num_levels == 0: -- seeds = np.random.randint(0, 2 ** 16 - 1, size=num_episodes) -- else: -- seeds = np.random.choice(np.arange(0, num_levels), size=num_episodes) -- -- seed_envs = [(int(seed), env) for seed, env in zip(seeds, range(num_episodes))] -- eval_obs, _ = eval_envs.reset(seed=seed_envs) -- -+ eval_envs = ProcgenEnv( -+ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False -+ ) -+ eval_envs = VecExtractDictObs(eval_envs, "rgb") -+ eval_envs = wrap_vecenv(eval_envs) -+ eval_obs, _ = eval_envs.reset() - eval_episode_rewards = [-1] * num_episodes - - while -1 in eval_episode_rewards: -@@ -251,9 +243,9 @@ if __name__ == "__main__": - save_code=True, - dir=args.logging_dir - ) -- wandb.run.log_code(os.path.join(args.logging_dir, "/syllabus/examples")) -+ # wandb.run.log_code("./syllabus/examples") - -- writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) -+ writer = SummaryWriter(os.path.join(args.logging_dir, f"./runs/{run_name}")) - writer.add_text( - "hyperparameters", - "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), -@@ -316,7 +308,7 @@ if __name__ == "__main__": - make_env( - args.env_id, - args.seed + i, -- curriculum_components=curriculum.get_components() if args.curriculum else None, -+ curriculum=curriculum if args.curriculum else None, - num_levels=1 if args.curriculum else 0 - ) - for i in range(args.num_envs) -@@ -324,22 +316,6 @@ if __name__ == "__main__": - ) - envs = wrap_vecenv(envs) - -- test_eval_envs = gym.vector.AsyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, num_levels=0) -- for i in range(args.num_eval_episodes) -- ] -- ) -- test_eval_envs = wrap_vecenv(test_eval_envs) -- -- train_eval_envs = gym.vector.AsyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, num_levels=200) -- for i in range(args.num_eval_episodes) -- ] -- ) -- train_eval_envs = wrap_vecenv(train_eval_envs) -- - assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" - print("Creating agent") - agent = ProcgenAgent( -@@ -500,8 +476,18 @@ if __name__ == "__main__": - explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y - - # Evaluate agent -- mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = fast_level_replay_evaluate(test_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=0) -- mean_train_returns, stddev_train_returns, normalized_mean_train_returns = fast_level_replay_evaluate(train_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=200) -+ mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 -+ ) -+ slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 -+ ) -+ mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 -+ ) -+ slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 -+ ) - - # TRY NOT TO MODIFY: record rewards for plotting purposes - writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) -@@ -515,12 +501,21 @@ if __name__ == "__main__": - writer.add_scalar("losses/explained_variance", explained_var, global_step) - print("SPS:", int(global_step / (time.time() - start_time))) - writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) -+ - writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) - writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) - writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) -+ - writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) - writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) - writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) -+ - writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) - - envs.close() -diff --git a/syllabus/examples/utils/vecenv.py b/syllabus/examples/utils/vecenv.py -index 6e5a0a9..af3b187 100644 ---- a/syllabus/examples/utils/vecenv.py -+++ b/syllabus/examples/utils/vecenv.py -@@ -1,7 +1,6 @@ - import time - from collections import deque - --import gym - import numpy as np - - -@@ -154,12 +153,20 @@ class VecEnvObservationWrapper(VecEnvWrapper): - pass - - def reset(self): -- obs, infos = self.venv.reset() -+ outputs = self.venv.reset() -+ if len(outputs) == 2: -+ obs, infos = outputs -+ else: -+ obs, infos = outputs, {} - return self.process(obs), infos - - def step_wait(self): -- print(self.venv) -- obs, rews, terms, truncs, infos = self.venv.step_wait() -+ env_outputs = self.venv.step_wait() -+ if len(env_outputs) == 4: -+ obs, rews, terms, infos = env_outputs -+ truncs = np.zeros_like(terms) -+ else: -+ obs, rews, terms, truncs, infos = env_outputs - return self.process(obs), rews, terms, truncs, infos - - -@@ -209,7 +216,10 @@ class VecNormalize(VecEnvWrapper): - - def reset(self, seed=None): - self.ret = np.zeros(self.num_envs) -- obs, infos = self.venv.reset(seed=seed) -+ if seed is not None: -+ obs, infos = self.venv.reset(seed=seed) -+ else: -+ obs, infos = self.venv.reset() - return self._obfilt(obs), infos - - -@@ -228,7 +238,10 @@ class VecMonitor(VecEnvWrapper): - self.eplen_buf = deque([], maxlen=keep_buf) - - def reset(self, seed=None): -- obs, infos = self.venv.reset(seed=seed) -+ if seed is not None: -+ obs, infos = self.venv.reset(seed=seed) -+ else: -+ obs, infos = self.venv.reset() - self.eprets = np.zeros(self.num_envs, 'f') - self.eplens = np.zeros(self.num_envs, 'i') - return obs, infos -@@ -239,7 +252,8 @@ class VecMonitor(VecEnvWrapper): - self.eprets += rews - self.eplens += 1 - # Convert dict of lists to list of dicts -- infos = [dict(zip(infos, t)) for t in zip(*infos.values())] -+ if isinstance(infos, dict): -+ infos = [dict(zip(infos, t)) for t in zip(*infos.values())] - newinfos = list(infos[:]) - for i in range(len(dones)): - if dones[i]: -diff --git a/syllabus/task_space/task_space.py b/syllabus/task_space/task_space.py -index 316e2f2..1ef674b 100644 ---- a/syllabus/task_space/task_space.py -+++ b/syllabus/task_space/task_space.py -@@ -7,20 +7,53 @@ from gymnasium.spaces import Box, Dict, Discrete, MultiBinary, MultiDiscrete, Sp - - class TaskSpace(): - def __init__(self, gym_space: Union[Space, int], tasks=None): -- if isinstance(gym_space, int): -- # Syntactic sugar for discrete space -- gym_space = Discrete(gym_space) -+ -+ if not isinstance(gym_space, Space): -+ gym_space = self._create_gym_space(gym_space) - - self.gym_space = gym_space - -- # Autogenerate task names for discrete spaces -- if isinstance(gym_space, Discrete): -- if tasks is None: -- tasks = range(gym_space.n) -+ # Autogenerate task names -+ if tasks is None: -+ tasks = self._generate_task_names(gym_space) - - self._tasks = set(tasks) if tasks is not None else None - self._encoder, self._decoder = self._make_task_encoder(gym_space, tasks) - -+ def _create_gym_space(self, gym_space): -+ if isinstance(gym_space, int): -+ # Syntactic sugar for discrete space -+ gym_space = Discrete(gym_space) -+ elif isinstance(gym_space, tuple): -+ # Syntactic sugar for discrete space -+ gym_space = MultiDiscrete(gym_space) -+ elif isinstance(gym_space, list): -+ # Syntactic sugar for tuple space -+ spaces = [] -+ for i, value in enumerate(gym_space): -+ spaces[i] = self._create_gym_space(value) -+ gym_space = Tuple(spaces) -+ elif isinstance(gym_space, dict): -+ # Syntactic sugar for dict space -+ spaces = {} -+ for key, value in gym_space.items(): -+ spaces[key] = self._create_gym_space(value) -+ gym_space = Dict(spaces) -+ return gym_space -+ -+ def _generate_task_names(self, gym_space): -+ if isinstance(gym_space, Discrete): -+ tasks = tuple(range(gym_space.n)) -+ elif isinstance(gym_space, MultiDiscrete): -+ tasks = [tuple(range(dim)) for dim in gym_space.nvec] -+ elif isinstance(gym_space, Tuple): -+ tasks = [self._generate_task_names(value) for value in gym_space.spaces] -+ elif isinstance(gym_space, Dict): -+ tasks = {key: tuple(self._generate_task_names(value)) for key, value in gym_space.spaces.items()} -+ else: -+ tasks = None -+ return tasks -+ - def _make_task_encoder(self, space, tasks): - if isinstance(space, Discrete): - assert space.n == len(tasks), f"Number of tasks ({space.n}) must match number of discrete options ({len(tasks)})" -@@ -28,14 +61,46 @@ class TaskSpace(): - self._decode_map = {i: task for i, task in enumerate(tasks)} - encoder = lambda task: self._encode_map[task] if task in self._encode_map else None - decoder = lambda task: self._decode_map[task] if task in self._decode_map else None -+ -+ elif isinstance(space, Box): -+ encoder = lambda task: task if space.contains(np.asarray(task, dtype=space.dtype)) else None -+ decoder = lambda task: task if space.contains(np.asarray(task, dtype=space.dtype)) else None - elif isinstance(space, Tuple): -- for i, task in enumerate(tasks): -- assert self.count_tasks(space.spaces[i]) == len(task), "Each task must have number of components equal to Tuple space length. Got {len(task)} components and space length {self.count_tasks(space.spaces[i])}." -+ -+ assert len(space.spaces) == len(tasks), f"Number of task ({len(space.spaces)})must match options in Tuple ({len(tasks)})" - results = [list(self._make_task_encoder(s, t)) for (s, t) in zip(space.spaces, tasks)] - encoders = [r[0] for r in results] - decoders = [r[1] for r in results] - encoder = lambda task: [e(t) for e, t in zip(encoders, task)] - decoder = lambda task: [d(t) for d, t in zip(decoders, task)] -+ -+ elif isinstance(space, MultiDiscrete): -+ assert len(space.nvec) == len(tasks), f"Number of steps in a tasks ({len(space.nvec)}) must match number of discrete options ({len(tasks)})" -+ -+ combinations = [p for p in itertools.product(*tasks)] -+ encode_map = {task: i for i, task in enumerate(combinations)} -+ decode_map = {i: task for i, task in enumerate(combinations)} -+ -+ encoder = lambda task: encode_map[task] if task in encode_map else None -+ decoder = lambda task: decode_map[task] if task in decode_map else None -+ -+ elif isinstance(space, Dict): -+ -+ def helper(task, spaces, tasks, action="encode"): -+ # Iteratively encodes or decodes each space in the dictionary -+ output = {} -+ if (isinstance(spaces, dict) or isinstance(spaces, Dict)): -+ for key, value in spaces.items(): -+ if (isinstance(value, dict) or isinstance(value, Dict)): -+ temp = helper(task[key], value, tasks[key], action) -+ output.update({key: temp}) -+ else: -+ encoder, decoder = self._make_task_encoder(value, tasks[key]) -+ output[key] = encoder(task[key]) if action == "encode" else decoder(task[key]) -+ return output -+ -+ encoder = lambda task: helper(task, space.spaces, tasks, "encode") -+ decoder = lambda task: helper(task, space.spaces, tasks, "decode") - else: - encoder = lambda task: task - decoder = lambda task: task -@@ -152,6 +217,7 @@ class TaskSpace(): - return Discrete(self.gym_space.n + amount) - - def sample(self): -+ assert isinstance(self.gym_space, Discrete) or isinstance(self.gym_space, Box) or isinstance(self.gym_space, Dict) or isinstance(self.gym_space, Tuple) - return self.decode(self.gym_space.sample()) - - def list_tasks(self): -diff --git a/syllabus/task_space/test_task_space.py b/syllabus/task_space/test_task_space.py -index 0ec6b4e..109d0a7 100644 ---- a/syllabus/task_space/test_task_space.py -+++ b/syllabus/task_space/test_task_space.py -@@ -2,33 +2,148 @@ import gymnasium as gym - from syllabus.task_space import TaskSpace - - if __name__ == "__main__": -+ # Discrete Tests - task_space = TaskSpace(gym.spaces.Discrete(3), ["a", "b", "c"]) -+ - assert task_space.encode("a") == 0, f"Expected 0, got {task_space.encode('a')}" - assert task_space.encode("b") == 1, f"Expected 1, got {task_space.encode('b')}" - assert task_space.encode("c") == 2, f"Expected 2, got {task_space.encode('c')}" -- assert task_space.encode("d") == None, f"Expected None, got {task_space.encode('d')}" -+ assert task_space.encode("d") is None, f"Expected None, got {task_space.encode('d')}" - - assert task_space.decode(0) == "a", f"Expected a, got {task_space.decode(0)}" - assert task_space.decode(1) == "b", f"Expected b, got {task_space.decode(1)}" - assert task_space.decode(2) == "c", f"Expected c, got {task_space.decode(2)}" -- assert task_space.decode(3) == None, f"Expected None, got {task_space.decode(3)}" -+ assert task_space.decode(3) is None, f"Expected None, got {task_space.decode(3)}" - print("Discrete tests passed!") - -+ # MultiDiscrete Tests -+ task_space = TaskSpace(gym.spaces.MultiDiscrete([3, 2]), [("a", "b", "c"), (1, 0)]) -+ -+ assert task_space.encode(('a', 1)) == 0, f"Expected 0, got {task_space.encode(('a', 1))}" -+ assert task_space.encode(('b', 0)) == 3, f"Expected 3, got {task_space.encode(('b', 0))}" -+ assert task_space.encode(('c', 1)) == 4, f"Expected 4, got {task_space.encode(('c', 1))}" -+ -+ assert task_space.decode(3) == ('b', 0), f"Expected ('b', 0), got {task_space.decode(3)}" -+ assert task_space.decode(5) == ('c', 0), f"Expected ('c', 0), got {task_space.decode(5)}" -+ print("MultiDiscrete tests passed!") -+ -+ # Box Tests - task_space = TaskSpace(gym.spaces.Box(low=0, high=1, shape=(2,)), [(0, 0), (0, 1), (1, 0), (1, 1)]) -+ - assert task_space.encode([0.0, 0.0]) == [0.0, 0.0], f"Expected [0.0, 0.0], got {task_space.encode([0.0, 0.0])}" - assert task_space.encode([0.0, 0.1]) == [0.0, 0.1], f"Expected [0.0, 0.1], got {task_space.encode([0.0, 0.1])}" - assert task_space.encode([0.1, 0.1]) == [0.1, 0.1], f"Expected [0.1, 0.1], got {task_space.encode([0.1, 0.1])}" - assert task_space.encode([1.0, 0.1]) == [1.0, 0.1], f"Expected [1.0, 0.1], got {task_space.encode([1.0, 0.1])}" - assert task_space.encode([1.0, 1.0]) == [1.0, 1.0], f"Expected [1.0, 1.0], got {task_space.encode([1.0, 1.0])}" -- assert task_space.encode([1.2, 1.0]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" -- assert task_space.encode([1.0, 1.2]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" -- assert task_space.encode([-0.1, 1.0]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" -+ assert task_space.encode([1.2, 1.0]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" -+ assert task_space.encode([1.0, 1.2]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" -+ assert task_space.encode([-0.1, 1.0]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" - - assert task_space.decode([1.0, 1.0]) == [1.0, 1.0], f"Expected [1.0, 1.0], got {task_space.decode([1.0, 1.0])}" - assert task_space.decode([0.1, 0.1]) == [0.1, 0.1], f"Expected [0.1, 0.1], got {task_space.decode([0.1, 0.1])}" -- assert task_space.decode([-0.1, 1.0]) == None, f"Expected None, got {task_space.decode([1.2, 1.0])}" -+ assert task_space.decode([-0.1, 1.0]) is None, f"Expected None, got {task_space.decode([1.2, 1.0])}" - print("Box tests passed!") - -+ # Tuple Tests -+ task_spaces = (gym.spaces.MultiDiscrete([3, 2]), gym.spaces.Discrete(3)) -+ task_names = ((("a", "b", "c"), (1, 0)), ("X", "Y", "Z")) -+ task_space = TaskSpace(gym.spaces.Tuple(task_spaces), task_names) -+ -+ assert task_space.encode((('a', 0), 'Y')) == [1, 1], f"Expected 0, got {task_space.encode((('a', 1),'Y'))}" -+ assert task_space.decode([0, 1]) == [('a', 1), 'Y'], f"Expected 0, got {task_space.decode([0, 1])}" -+ print("Tuple tests passed!") -+ -+ # Dictionary Tests -+ task_spaces = gym.spaces.Dict({ -+ "ext_controller": gym.spaces.MultiDiscrete([5, 2, 2]), -+ "inner_state": gym.spaces.Dict( -+ { -+ "charge": gym.spaces.Discrete(10), -+ "system_checks": gym.spaces.Tuple((gym.spaces.MultiDiscrete([3, 2]), gym.spaces.Discrete(3))), -+ "job_status": gym.spaces.Dict( -+ { -+ "task": gym.spaces.Discrete(5), -+ "progress": gym.spaces.Box(low=0, high=1, shape=(2,)), -+ } -+ ), -+ } -+ ), -+ }) -+ task_names = { -+ "ext_controller": [("a", "b", "c", "d", "e"), (1, 0), ("X", "Y")], -+ "inner_state": { -+ "charge": [0, 1, 13, 3, 94, 35, 6, 37, 8, 9], -+ "system_checks": ((("a", "b", "c"), (1, 0)), ("X", "Y", "Z")), -+ "job_status": { -+ "task": ["A", "B", "C", "D", "E"], -+ "progress": [(0, 0), (0, 1), (1, 0), (1, 1)], -+ } -+ } -+ } -+ task_space = TaskSpace(task_spaces, task_names) -+ -+ test_val = { -+ "ext_controller": ('b', 1, 'X'), -+ 'inner_state': { -+ 'charge': 1, -+ 'system_checks': [('a', 0), 'Y'], -+ 'job_status': {'task': 'C', 'progress': [0.0, 0.0]} -+ } -+ } -+ decode_val = { -+ "ext_controller": 4, -+ "inner_state": { -+ "charge": 1, -+ "system_checks": [1, 1], -+ "job_status": {"progress": [0.0, 0.0], "task": 2}, -+ }, -+ } -+ -+ assert task_space.encode(test_val) == decode_val, f"Expected {decode_val}, \n but got {task_space.encode(test_val)}" -+ assert task_space.decode(decode_val) == test_val, f"Expected {test_val}, \n but got {task_space.decode(decode_val)}" -+ -+ test_val_2 = { -+ "ext_controller": ("e", 1, "Y"), -+ "inner_state": { -+ "charge": 37, -+ "system_checks": [("b", 0), "Z"], -+ "job_status": {"progress": [0.0, 0.1], "task": "D"}, -+ }, -+ } -+ decode_val_2 = { -+ "ext_controller": 17, -+ "inner_state": { -+ "charge": 7, -+ "system_checks": [3, 2], -+ "job_status": {"progress": [0.0, 0.1], "task": 3}, -+ }, -+ } -+ -+ assert task_space.encode(test_val_2) == decode_val_2, f"Expected {decode_val_2}, \n but got {task_space.encode(test_val_2)}" -+ assert task_space.decode(decode_val_2) == test_val_2, f"Expected {test_val_2}, \n but got {task_space.decode(decode_val_2)}" -+ -+ test_val_3 = { -+ "ext_controller": ("e", 1, "X"), -+ "inner_state": { -+ "charge": 8, -+ "system_checks": [("c", 0), "X"], -+ "job_status": {"progress": [0.5, 0.1], "task": "E"}, -+ }, -+ } -+ decode_val_3 = { -+ "ext_controller": 16, -+ "inner_state": { -+ "charge": 8, -+ "system_checks": [5, 0], -+ "job_status": {"progress": [0.5, 0.1], "task": 4}, -+ }, -+ } -+ -+ assert task_space.encode(test_val_3) == decode_val_3, f"Expected {decode_val_3}, \n but got {task_space.encode(test_val_3)}" -+ assert task_space.decode(decode_val_3) == test_val_3, f"Expected {test_val_3}, \n but got {task_space.decode(decode_val_3)}" -+ -+ print("Dictionary tests passed!") -+ - # Test syntactic sugar - task_space = TaskSpace(3) - assert task_space.encode(0) == 0, f"Expected 0, got {task_space.encode(0)}" -@@ -36,4 +151,32 @@ if __name__ == "__main__": - assert task_space.encode(2) == 2, f"Expected 2, got {task_space.encode(2)}" - assert task_space.encode(3) is None, f"Expected None, got {task_space.encode(3)}" - -+ task_space = TaskSpace((2, 4)) -+ assert task_space.encode((0, 0)) == 0, f"Expected 0, got {task_space.encode((0, 0))}" -+ assert task_space.encode((0, 1)) == 1, f"Expected 1, got {task_space.encode((0, 1))}" -+ assert task_space.encode((1, 0)) == 4, f"Expected 2, got {task_space.encode((1, 0))}" -+ assert task_space.encode((3, 3)) is None, f"Expected None, got {task_space.encode((3, 3))}" -+ -+ task_space = TaskSpace((2, 4)) -+ assert task_space.encode((0, 0)) == 0, f"Expected 0, got {task_space.encode((0, 0))}" -+ assert task_space.encode((0, 1)) == 1, f"Expected 1, got {task_space.encode((0, 1))}" -+ assert task_space.encode((1, 0)) == 4, f"Expected 2, got {task_space.encode((1, 0))}" -+ assert task_space.encode((3, 3)) is None, f"Expected None, got {task_space.encode((3, 3))}" -+ -+ task_space = TaskSpace({"map": 5, "level": (4, 10), "difficulty": 3}) -+ -+ encoding = task_space.encode({"map": 0, "level": (0, 0), "difficulty": 0}) -+ expected = {"map": 0, "level": 0, "difficulty": 0} -+ -+ encoding = task_space.encode({"map": 4, "level": (3, 9), "difficulty": 2}) -+ expected = {"map": 4, "level": 39, "difficulty": 2} -+ assert encoding == expected, f"Expected {expected}, got {encoding}" -+ -+ encoding = task_space.encode({"map": 2, "level": (2, 0), "difficulty": 1}) -+ expected = {"map": 2, "level": 20, "difficulty": 1} -+ assert encoding == expected, f"Expected {expected}, got {encoding}" -+ -+ encoding = task_space.encode({"map": 5, "level": (2, 11), "difficulty": -1}) -+ expected = {"map": None, "level": None, "difficulty": None} -+ assert encoding == expected, f"Expected {expected}, got {encoding}" - print("All tests passed!") -diff --git a/syllabus/tests/utils.py b/syllabus/tests/utils.py -index 314a29c..98bac82 100644 ---- a/syllabus/tests/utils.py -+++ b/syllabus/tests/utils.py -@@ -57,7 +57,7 @@ def run_episode(env, new_task=None, curriculum=None, env_id=0): - action = env.action_space.sample() - obs, rew, term, trunc, info = env.step(action) - if curriculum and curriculum.requires_step_updates: -- curriculum.update_on_step(obs, rew, term, trunc, info, env_id=env_id) -+ curriculum.update_on_step(env.task_space.encode(env.task), obs, rew, term, trunc, info, env_id=env_id) - curriculum.update_task_progress(env.task_space.encode(env.task), info["task_completion"], env_id=env_id) - ep_rew += rew - ep_len += 1 -@@ -87,7 +87,7 @@ def run_set_length(env, curriculum=None, episodes=None, steps=None, env_id=0, en - action = env.action_space.sample() - obs, rew, term, trunc, info = env.step(action) - if curriculum and curriculum.requires_step_updates: -- curriculum.update_on_step(obs, rew, term, trunc, info, env_id=env_id) -+ curriculum.update_on_step(env.task_space.encode(env.task), obs, rew, term, trunc, info, env_id=env_id) - curriculum.update_task_progress(env.task_space.encode(env.task), info["task_completion"], env_id=env_id) - ep_rew += rew - ep_len += 1 -diff --git a/tests/multiprocessing_smoke_tests.py b/tests/multiprocessing_smoke_tests.py -index 9db9f47..b788179 100644 ---- a/tests/multiprocessing_smoke_tests.py -+++ b/tests/multiprocessing_smoke_tests.py -@@ -21,23 +21,23 @@ nethack_env = create_nethack_env() - cartpole_env = create_cartpole_env() - - curricula = [ -- (NoopCurriculum, create_nethack_env, (NetHackScore, nethack_env.task_space), {}), -- (DomainRandomization, create_nethack_env, (nethack_env.task_space,), {}), -- # (LearningProgressCurriculum, create_nethack_env, (nethack_env.task_space,), {}), -- (CentralizedPrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space,), {"device": "cpu", "suppress_usage_warnings": True, "num_processes": N_ENVS}), -- (PrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space, nethack_env.observation_space), { -- "get_value": get_test_values, -- "device": "cpu", -- "num_processes": N_ENVS, -- "num_steps": 2048 -- }), -- (SimpleBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), {}), -- (AnnealingBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), { -- 'start_values': [-0.02, 0.02], -- 'end_values': [-0.3, 0.3], -- 'total_steps': [10] -- }), -- (SequentialCurriculum, create_nethack_env, ([CentralizedPrioritizedLevelReplay(nethack_env.task_space, device="cpu", suppress_usage_warnings=True, num_processes=N_ENVS), PrioritizedLevelReplay(nethack_env.task_space, nethack_env.observation_space, get_value=get_test_values, device="cpu", num_processes=N_ENVS, num_steps=2048), NetHackScore, [NetHackScout, NetHackStaircase]], ["steps>1000", "episodes>=50", "tasks>20"], nethack_env.task_space), {}), -+ (NoopCurriculum, create_nethack_env, (NetHackScore, nethack_env.task_space), {}), -+ (DomainRandomization, create_nethack_env, (nethack_env.task_space,), {}), -+ # (LearningProgressCurriculum, create_nethack_env, (nethack_env.task_space,), {}), -+ (CentralizedPrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space,), {"device": "cpu", "suppress_usage_warnings": True, "num_processes": N_ENVS}), -+ (PrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space, nethack_env.observation_space), { -+ "get_value": get_test_values, -+ "device": "cpu", -+ "num_processes": N_ENVS, -+ "num_steps": 2048 -+ }), -+ (SimpleBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), {}), -+ (AnnealingBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), { -+ 'start_values': [-0.02, 0.02], -+ 'end_values': [-0.3, 0.3], -+ 'total_steps': [10] -+ }), -+ (SequentialCurriculum, create_nethack_env, ([CentralizedPrioritizedLevelReplay(nethack_env.task_space, device="cpu", suppress_usage_warnings=True, num_processes=N_ENVS), PrioritizedLevelReplay(nethack_env.task_space, nethack_env.observation_space, get_value=get_test_values, device="cpu", num_processes=N_ENVS, num_steps=2048), NetHackScore, [NetHackScout, NetHackStaircase]], ["steps>1000", "episodes>=50", "tasks>20"], nethack_env.task_space), {}), - ] - - test_names = [curriculum_args[0].__name__ for curriculum_args in curricula] diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/wandb-metadata.json b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/wandb-metadata.json deleted file mode 100644 index 97ae1d6e..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/wandb-metadata.json +++ /dev/null @@ -1,167 +0,0 @@ -{ - "os": "Linux-3.10.0-1160.11.1.el7.x86_64-x86_64-with-glibc2.10", - "python": "3.8.5", - "heartbeatAt": "2024-04-23T03:00:36.306872", - "startedAt": "2024-04-23T03:00:35.394736", - "docker": null, - "cuda": "10.1.243", - "args": [ - "--curriculum", - "True", - "--track", - "True", - "--env-id", - "bigfish" - ], - "state": "running", - "program": "cleanrl_procgen_plr.py", - "codePathLocal": "cleanrl_procgen_plr.py", - "codePath": "syllabus/examples/training_scripts/cleanrl_procgen_plr.py", - "git": { - "remote": "https://github.com/RoseyGreenBlue/Syllabus.git", - "commit": "63dc8f62e4d9d567eb92bb2f6c2bb186a0dc8ffb" - }, - "email": "djhaayusv04@gmail.com", - "root": "/data/averma/MARL/Syllabus", - "host": "f411843fc70b", - "username": "root", - "executable": "/home/user/miniconda/envs/test2_py/bin/python", - "cpu_count": 12, - "cpu_count_logical": 24, - "cpu_freq": { - "current": 1236.6083333333333, - "min": 1200.0, - "max": 3700.0 - }, - "cpu_freq_per_core": [ - { - "current": 1200.5, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.292, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1211.291, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.292, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.5, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1246.984, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1199.877, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1204.65, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.085, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1199.877, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1199.877, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1205.48, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1228.723, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1225.195, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1214.819, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1204.65, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1269.396, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1238.269, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.292, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1199.877, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1397.436, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1388.928, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.708, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1329.162, - "min": 1200.0, - "max": 3700.0 - } - ], - "disk": { - "/": { - "total": 5952.626953125, - "used": 988.7820014953613 - } - }, - "memory": { - "total": 251.63711166381836 - } -} diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/wandb-summary.json b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/wandb-summary.json deleted file mode 100644 index eeaa9075..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/files/wandb-summary.json +++ /dev/null @@ -1 +0,0 @@ -{"global_step": 16320, "_timestamp": 1713841250.5553138, "_runtime": 15.14670991897583, "_step": 82, "charts/episodic_return": 1.0, "charts/episodic_length": 150.0, "curriculum/proportion_seen": 0.0, "curriculum/score": 0.0, "_wandb": {"runtime": 27}} \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/run-37l9hfvl.wandb b/syllabus/examples/training_scripts/wandb/run-20240423_030035-37l9hfvl/run-37l9hfvl.wandb deleted file mode 100644 index 25b7561a5425038bd864a11b55d75be8d24ff88b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 51518 zcmd^|34Bvk+W$=`SOO>%*Q!{Hj)<5hcfYvnjEai;Hg1_TO-n+W)FfqbEk(2-OI5&v zC`A-1>QFW*2vQUj7eE17WGgCKMWik`Ad3IrlWzB9xwW_H`~S@Q>U>7%ZO*yh=XcI? zp7Wd~{qOwS6?@)3W?cW-^W|eqsitC6W_pGyxNO)IqsJeBVrZiz`SmrJ9RPww6Dg*>#X@xcq=x$jie-=NQwR)aH`X zQ$xdjK4*4eD05yXOQt2&J*>dtcjmi`O($l$d~T;d=yMbV1Fk%`-%;QTW?C$kl#tt< zQ*1KXEvH+0h66#@AV-cn><;F8{GL$Q<0>|#goA}{OE*i3t1!pe_}Aks9m7GVYjES= z&$6Tqb^3F%sd%s36>g~3lG*e|mJ=;q>wm@V4|~GH@kxK7&u2N=(#_>82p0z3jv<~L zcc9qRG1ut}xh*{{U3`JOJdZ!mk>d##n>u7#Gc6}rI=THr93H%K=ge$RUaluJC=;)K zd^iwx`W#_TzB?3l7levUshntw{9`%A(hclEKa~`8hGC6PHY*;9mg6m{{=$3*O2h}u zl3?j!=}bRpD53)=N_dUVPQTxc!hLv&hIi{~>5%6{LG`vd-qJD8nV%2ibhKGjRmEp4 zc=a9nh@pqTjP3$YD3C++X1fi7S|ZkPggg(zpdIZJuLx)q$2Hlh z;YkW-6BNtcmQIdHD=IeK_)w+`_ z=qU(?vf^|EYr*i$N3J-(p*=Yqael?&u;YKS9dBpnc;4GJSFqa^fzNfxw(K#cuBjW> zOqtm z^Y{ygT`)|M9Fj1{bhpJiC>$;b_0P)6^MnT#W?Nl>{H$97A@}gBgKoF~3SXf+>(+*T zZpE*p^^y1-my#=SZXrj_5hdB3t#aAfT(0E8f9#6nw9)h2Y{u>U%{#K&>u_2fxY;H& zh@KIvUI5*kXpBy)Hx%$&F0))hCN^$W3mjdJCpXs$kz9i;msl<;EC_{zZf8CkRBkSS zovPwSjka7-6jaH^+f;iF{G}xG_8iHc>$YXP+zMC!`N>=MtQ&?`PPzMvX8WcYwDe!{ z)r`fqOs|~lNA(@ARV;!v%bFD|^oO!q(P+B=wE=>u^p!hucHD?cJ9$q{HKlb;-4bbL zMa55S?vQ@$-`ztzE_eUdqfF|;KQ#cQ9_#I?DYdM8CQ3DBw7$H{D|(e)hw@UrCu+(o z8Mg%Gb#N6Fw!Xymc^wsc!_Leb)+~IR-q6ze^5#D9=iG}>o`q8^l7({*fs+hb1BGF0 zIJe<1O}$&SYCE^pE~|po*4>;j_}nM&K@nZN=R_d4Mx-d+^Y{u=de7`ak1q#qCZl0| zwOXyMe@Xw8wbJ#dp{w^KO$~J)eYvnhdP+{f4+r@d0t}}8T>I&mab(AC4R)-5>DpDr zi;UUv{<;M(;Uy#+jhpeGD}o|EB&ZG9O*cY)72+<-yu zKku|3=DU=YWGY=ZAMzbeXIK|y$$7c_x94fefg5=_e|+suY_L;69D3Y)*JfG z=dR14SM9;PYUQR+QM8d)U2^UGX?LK!G;c=aRd>`I`j=I!P3gxqy{Z$T9LD9=wu%d; zjr$k9Y|n<5tuMUx=OZZG$jfd%fL9R9!3x zO23Ty2*BfV1l@@H{1+ogX+6R%d%AZagcBPO8mj#98&qioVabZC$_e2l2BB)_su@uT zqAVo|Vce3d1`xu@48qp!>rtf*7s{X>E~!gKv-)_62jmGZT7|vuE%9g)^I*sUX{*2|t#E`)-d{R02Ks19O z#?ir$Q<{45lLv|6EQX3EP`Y&er%?=)JWLY9%wN`7h~WZ;Va*%cQKu1xe=Q$&8ZlTIhEIw% z?}%YguXZRuQ>ZwoPGF$`7T)Syly45uGA@FrrgF$^`c-}xq*!5%MMGQ=?KlB^}f zU}qVYu0x$h7_NEm+yY|Y7>4Q{8;U!yzE$AGIBm%gL)B&OPQ<`73~To8K%GVy_MP(6 zLShhDhN@R4MKcIVi}|>dCOt_EBEwKRXD;eA!f^9r9b}5UrH06RcYWmj!Ni`d45B^W zFm1>!{W#qGB1PVEL*!jwWz7dOdKw~d<=*Zg0)-){oUrAbC;~3tAZ@52&FK5oQ$(OL z1eHJ5oMVXKz8{BoA%gx4LB+%^)+hqr9;Yi9(%{88r;H$i3mJln{a;!Q5%ka4IDrT* zVhGCKdE?qB0x{m;W=Mkv#t#yS;9`bg>+Y&+4H4Xa|G;mF;1Y)5(5$a+jV6%dU0j&O~5@ipX6uWazA^>rTFx)pUA@uyo)uZb}H zona{dVcWDQ2FaGRG2zMM@0m&r|6mx_lzxpmjWAqxOxjY?;VOn<>#jAkq8aQ$vYOIM z`}%!O3|BJ@HG4LpP9qGjAC(3W!!<0!GoMvOGjMXUoa*(1&&VT&0SrUw*xjhp2t)OS z`~FT0*D?%M6DoH`Gl*Oqr(~!<9P|xdMGV(53^ntXp-v+Vd+WCQiQ#&Np>$&P;b;aq zUVvf9v5vX9$0NirkYOl!?_<lkuP9qFsr|f>37;a-2%2s~*UNi%jv^n&o zZdY|AhT9p2vhp3M(+I=fOEYdKhC3LBirw#i62lO04sA$>eRFe4iQ!I$p|W-@>NLV| zy6^P&iQz7Wp=|ovZP5&Tyg9TXhMymM^K@ePC&N&>Wee&w!Z6bO)E;8En_;NlzViEM z20=;I_^@jK^O1GUdl-gKMy*4gMi@?buy8Ul+{-Z3{rX{1)Erut>~ZojhIBaa<&sL$ z;Xa0;Y|0kYX@sHoqQD8na6iLPH2&K$F$_tI`BKjBHJun748x%n2T`XHh9l?R5sCT! z#V}NsRaQhZDDmPYL*sPWr5AQ31}Dp~Z5iq`!Z7S6-+a;`n_;LPTRA_PL5;V5WvDIn z%Uk>?F}N6p%IB7$P9qEp*01}T7;+efnjOsBkDBLn^%q<^c!t}40@Hw?u-VTR3FCb z_TRJ{Ozbv$(sHa*(r!p4Awmp9Wz`dP#vn@e72Zw|VHV=$@xR4F#GAtz>eAo#a$ZFc zg$zW==BcREh!WfP{QU!h7{WkQZu$5JbF>m_(vnNg$4@?vAcis!b)S5RN{v8VIlJ3c z1Tl<(s9RMzr$ZEkosXB9F{H$(Eovb_3}+yUzI_{&8iBB0f6*R-c#whEI_vv4Vgz1pG0Iqv z=soAcH%Nyf6T?usaq3CY49VMPBHgi}H|a3K#6Xm;d+Q`aI!r4c=_H6^7NT_j*BQ|e zaW|S6nm{~0Zpz~XF_ML-9ra^|A&BE|IWUnR9%mscpZ>9LG(_@tnjAhl^hSah#X?lC zJkr+?#On)B{emE#U?J+N<_R$n@z&!EDIp3|&m)K@S%}&(I|V}!Yi_^ry#UH9F*P`+ z`aZU1&6{JQJ2odKZLIt0qgM}Ii&{sU8oX0|t#x(PsMUyf+LQCGi7>{n7*%h*J~0LZ zi&{yV46WbtjVld=*Sq?*^Me6eYXA;H)7Gq8IeAH@0jhg$HEGCRn7Ng|% zx7WpBs7Z@H0`E;ZNElDE7-b8D-gys7Nh8cg|lKYk`{}M2z4Az7%xO&%zqoT8o}Tms`#BSUSu(9XO3AC zgCQkt)o$j63qBx>=`2Rk!O5uA2u8}zs@;T9&SGpW{p#ZwjJW%pa2~StC$=`8pE8gz zX0RCL+xMeZBN*;UJ-#H2mspH7V>e?7ik82zYrZmmrXI0@;i4!bmT2dW2 z;!byJ;1>j|C}S77B1lNa$-KymmM)f#kyD*i;HcXl+edTF*Ba-VUqsR`wHrtDj21t6 z+j~aO-xpb~dcAS3Szl@S(RIc)LC?QuzKtN>U?ECYz0xNdB6<6Jy7YfJa^C9SEJXG4 zxqS>NKkea{E+vS$EJW4*-|vco5TSGIMAJ~faOdoWSJ14#l7%R$dg7mkAU15cuZo<> zyvANwuWa=bt4pGTP*IGxZY3HSq1>lk9GMc#Z|sHjwHB2hL9Ir5;i7x*OCyX0EJpd9 zX;`{qeIj~Svetb}e>>qAawZE|j6=&RP^%G)M+Uw%kuVmq7$tkZTNs0(BrSQd<-wG` zgz+YeQN8t7)M^A{xibF4Z&Bc4Q`(m%+@8Za#SS=|ZpmQXVvk0*D2P^3l2lo>^s=1f z8srS(BAKXT?2$`!Z1JA^-2S}qAUv0?HeAb;-h59^7H*3u2n55PfZu_OnEVZ8oMP#& z`JIp}5QGQDl|Qy9pKQl9GNQuEqKy+|RTOzn>yzz|<*mv54!l*Sw6Egu;vjp%VF0|6 zO_oGei3e1FZvQU;tL*@+sy0znX^GJ$$buG_+p*w*3Eu;Fi7D-C6Rs9w1x}%J?}>>e zD~F4H&<$H8J~OrnG)kNzkV;9`Sg{K-##^2fY^o$FT4cWOu7lI}0ePt@ZErkeoRd%7 zs3j}UVV7qciQ#IKO|0T-CBbI50otbEJ}6D3AzYNQ^K}yWt@t9-sd_z;1skU-3a2YF zB2o0Tn10?YayztZRTXfz6sIUM{uP*xEIxPCGmMazB|yd@d|hOP=LJb0x!$=XMi#LV zB)KZX$TB)G$HT{}s%(?BDoacI_S{eHdU^bI&Ew3zUb_};D6?(tTErwb>BK6}E2^sC zMj?bP46*#g#0{H>`0WJ5IN`5L%)y*uo0_$l^qy?DVk3whH>L3$FAB({Yc*DSej)xz z&3Z+GW{q`Py=E<9MCLes1$)coiHVHnPVYaWYJ1~qo^E(MTGZu~YI)NTeiM1QBFh`YPEG+_jCl9VQnt{FZFTKqa9e;>L2V&fJw_Mt(@lYZEYI zEmx13ZcLZ?HqM%?E`xEoJ1kCuHF0-QAFz)N> z+){s5+o3-rZzo`(4da!BfSd7QobY*V?sdexK0Y&DilN8Mb1EfI+s51uF_;R+Ws?LM zgDApJmRWpfZSfm4IDZmxT8alSwQo_ z5NO7An3cBC<(>VdibFUvIvQg#Ijn(AZR<$1EY^WNRS~BP4yH+o5&aRS`*T zfwyr;Qp=1N|J8sl7JbvlISuki1Cm)ZVPkr7i>5yCu4Sjg=S2x5REYbH_t!4Zr|zD zxVOe{+%h(s=rPMS4B=eMo?EwZ+moIR1GI|8R}9WF)?eYr7@>vI??-=3(AyF~M2iJ|vq=>=FreMPtVlf6<{CL?ag`huA0F8T6bfMw8k#K6Ub?Ymg z^tlyh<>9;SHVOY?4TH^l^3u}lp5+L7dje=IfapOhHW``omfe^>bUO@KJRG+jdkc^p z!b%-GV99xBU-1<|???o#KhaPyfl=&j?9*I3Knpad!de?<_?*IqpuFPkI`cY$-kAUz zSGVXYP5V4Xq)#b1E2O|=n6?;D4wQ9d>V~)Sejw;w37{#~(n*(KLJ6C; zv3Yat;JvXmO2NezNDyJ2j%S8x!S-~|_n)I-dUyO`nl6XYh2}9HV4m8RVOnYrW-PX= zl)Z{9O)^3Y_7lGNMI`2$M9fGk=?RS;IilFM{3qWIAt)^?AwiG1A5zwAVqf5#!C{>! zcd#dZ?tpHe!G0fRLv;71oqw1CV@r|#Orz!g+z!YRvL)EjhCMVqS|Kw=3%oC!9;gNK z7pAl?;>;-(TvwA=W(89>8OwidbYgtl1Cvv=nfnMM?@;J@pFz0|6|jNhv1 zZW}#Z_y-l!RNbCT9$D9C3vKt=QENAxRiXSgHn||(iyV0%n{=7mV+xJL5HHyg87qj!*qoRkeehkyM6=s^pZWFVH; zg>Gp!sU$rQh=zl_7V`PXz|(3hv*a$wm)`&6ToU^0h|niSZEHpD2Aiz(plK7*sPpy<`c#WD~YTGDa7=r9~?_>0X>-ra_ONm758Kx(`0re4L0#S-ibZ=2Lo+efSzsS$*6cC5ameajp>wGz3EueT2+ zsrMz48tDeT_8mzwwlx-0lD z$Np$N|J~w9ZAxxs7NN+H!l%kk!E?Y>@KO}~mmIquEy~B>p9l8c zJeq>hx=1h@XXH{rmGDTJidX>X`U6)>+;YfXwu?Xt->cJ zcM}zF#mqrQ_=IQ_dreqpCUNP?Fo~o-m`G~e9;HVu;D8z5#vMh;KcfNHh-4r(Tp%#R zfeP%v*7RzLU%dW~+X(vC1kiNXlrA)aG92=0Q~Oqv9+d%F65zJcycMkKuv-KrVeM1) zI)eUBB52(mzp5288O-4HU0F+GRuXpnMs;ezigi2WK{yZ{w)zSZFSa($V6=2X<>DvMeofeear&1f<+DN zDnhCs5g7}uto-(u$gKKEf-x%+T;QSt5)m=GWT}pNnVor&jeB<;?j*n z73wuDYzl4BlO^E{X^hmkhybS^;ml+R&h8@%!m)RM;wGs_nB(Wakw?{qj!anp!ZrlGUTw#r zskXYZ7^!LRm}CbzCLWa7)wpk2y#C4`B3mSj&2$nzj?{V^fN9y0j!5(W-;+i$O5@BK zuAWdSfkJ02w9L|iyxva)p!7&{!(G-5w_4S%E{VQ(kq+&}-pvJ(T7U22fZ97t=pLoV z%?)>1*VkGy<1^H1u8;cR|`vRa|>oMNC^PJ6u@dS%eRrfY( zHG*-S|4a*IvYt%T0%<#TqF9kyAX=!>#!b5IFqTR(_O#KqC(QCGf39tT^0vySD#+KA zB=9wKC0xQu0hn>o1MXkacd#uDD_rvT{|Htb+~yS=rpGoQT3=v>o%3EM+rJS;Afp+owZ{NWxjY^l|1tzLfZw8J!*j!7_>(v5R^ z)D(@ub?XkMEzNqRwnxY@&{%H54j|l5KnFk|oCX^CM$2vYQm1|@L8lgJW3Nu9=4I^1 z)^Fc5QtIU_+ar;MX(ILNKyGB9kxXBV9m?y)O3O!NYJoY~8#bxl1Gxo)(2)E0cRZ|~cJ zPM15|=^C8TCqEDhX9V3Yw?CZWa{7E3K4&O=UVlsL$DhB~`k(#>DjaY29@kW1$;!Rs z%&jlWJH0wUt#pFfyU5g3*eB2J!`r`X%E-9N<8x$`9le`rMaTGVqV1Uxwd3)ECIZXR;AL_`|qhaCqd0L!7=s zcgDpTP81ck=DEWTrz>*tpwpk@h&=3*?FeLt?#a(~c>Fn``}$>MH~dP->I?Yu`kdGF z=sW}VTd+asP4+9*+OK@Zek1lf^2?a}zd>0~o4s?J?N|Fp9qjjFll`)CoMC5{bBH^b z@5~x_`7PIH-8$Uob7mKYvOuCTi?AYt|ox<>ob8ZcrfKomCie2ea}$evd2Q&v9nC{X;@oVRtCZISPhn z`8?TK1;fLG0)F1AWQ9Co_XP#$acEumr9jX%D9i6BqoQd>jEnzj%tFoDL7Ph*-1!hRJ&4H9zXp*BQ(U^~>nnckod9lM!Pc-kEv<8FQl6n1`19$A~ef zR+o4vu9#%@;!e>%^{`dnHe5lb6Cbb{i<%p>oj1sZau*Poca}IY0?R^Y& z($KyMBgiUcj8#_tX~ZhU=|`thC!NA{(&`b#MdT5iX~`5BIB&{sy~)ez(sN@;vBn_(>=n^yIkv8JL1BcO1weo4%~I>DHHC zG-A`0&lP;N3uV1x_ReayY1#I2*mP2IZR+rP!kChUVF{;CV@}+FQEyJaHr(^%I&z&p z%r}w{ERXoAR3d}T(i*J##g~j2tSa}CzNerO&Nh2-_OP)L7HxSM23wz8gJI^8Tj-C> zZW)8g^nDi;CpWh>t>q%qmN@N@ZG!c!FQdSjlY`dXFC)j}bJ9GzUxv%=@i}PXXk_^5 z+{?0_GUI&x~Ae)SUT-G_weihrUL`0Jp~I3!Up=$ z!?u$iq<5X8eb=o=<`{X`!KYbor994S+B{CpxcPY3_Q~U1?(pt<6cxU1_Fmm&oXUxd zP@&v7<7j2(rS%D*d)9n3={mf`8)okb%`dV2eY`{p<#AeH;($I^{5lsU{@d&w)>LBA z-VadXp#Q0?Q0w3NllzzUqAyUMYxeeNdh6;JKE_)cS<=aSqYaX!}Snhd;k_fC}9=dY%$ zTF)_Bx*>&RU7(eG8*Lv`ZQ`Y)X;+tl#+@y8Uu7fD?+`pJmYduE6k_8nc^muc5eQW^cF&py=pC z6qnPaZZgyMQ3-?A8{p8wUlx#v%e5lbel^{=h_~B@Z=&ezZEbW`v}FcLOp8rLw*HM@ zKJoL-Ak&j)T&xGZn1Gy4;I-6 zlJk8>>wGK!tU`(H=X@(RjVmCj-_=T8Uh$SesSD1!`ZRLBs~G2dXzOy6_uuAxO9%XW z6Dhn}tMHntwZ;{edcWFdFiLz+>wFKrv>qk4lk=VNkKP?f-uJcg7Onc!puC;~{%NaF z+#0iYZ1dP!{rg6g_i&TNvezDECx?_qC?Y$QzerWcd z(=;+wE#Hn3PmG&lZv9K|p7E!JT|UvOX#3BE-uctzD@gMtqod9R}i z#cefvi<$*5`nC+^6(m*gzp!@DdTY!Yd(1Z^^ER!_hi1NHT;{`f-;kMy5LGrwNeTRFYxG(W4-7~vB5X=p@J$cd)(dp}BddAcd z$Jn}`c6`sT_IFOfeTC-!BU78~Q~uib73TDAmJY6h!eUcLPO=Jo`e~LP_^Z`s#s51B z+(FvDi-&@Ot27!P+M^#zR$OGz@JTM}A&*ZAjSu;#hxoEH+@o#dWi3(71i|9Eoo#Sd;@rtiELI$ zYI+)NBh)JLO;4kZWLK5Qn1$oriB6jAYY?XqNxu$}n zO$B3cT>8nD?zEejinWFZ1>Me^P_fBuSJKb1oE&+O@6Hbdhg+RPoLEB6_PGaMQEcjr zFY~~;3#-hhA8+YY{~N`o6xk{(>1SAaXg?PWh3FHjy%or zga%vhwHAi3N%yA4PL%=+O6jLsj?+H98s&Dv(Gsflc+0VoN1ldnr%&&0Nsas

4Q> wZ>3k$(=A>7?(on+aIiHL>8YKua~~&LttyX_o1S4C{*Yo*Dt*f}j)lno56g`XhyVZp diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py deleted file mode 100644 index b8073049..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py +++ /dev/null @@ -1,524 +0,0 @@ -""" An example applying Syllabus Prioritized Level Replay to Procgen. This code is based on https://github.com/facebookresearch/level-replay/blob/main/train.py - -NOTE: In order to efficiently change the seed of a procgen environment directly without reinitializing it, -we rely on Minqi Jiang's custom branch of procgen found here: https://github.com/minqi/procgen -""" -import argparse -import os -import random -import time -from collections import deque -from distutils.util import strtobool - -import gym as openai_gym -import gymnasium as gym -import numpy as np -import procgen # noqa: F401 -from procgen import ProcgenEnv -import torch -import torch.nn as nn -import torch.optim as optim -from shimmy.openai_gym_compatibility import GymV21CompatibilityV0 -from torch.utils.tensorboard import SummaryWriter - -from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum -from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum -from syllabus.examples.models import ProcgenAgent -from syllabus.examples.task_wrappers import ProcgenTaskWrapper -from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs - - -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="syllabus", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="weather to capture videos of the agent performances (check out `videos` folder)") - parser.add_argument("--logging-dir", type=str, default=".", - help="the base directory for logging and wandb storage.") - - # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="starpilot", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=int(25e6), - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=5e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=64, - help="the number of parallel game environments") - parser.add_argument("--num-steps", type=int, default=256, - help="the number of steps to run in each environment per policy rollout") - parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="Toggle learning rate annealing for policy and value networks") - parser.add_argument("--gae", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Use GAE for advantage computation") - parser.add_argument("--gamma", type=float, default=0.999, - help="the discount factor gamma") - parser.add_argument("--gae-lambda", type=float, default=0.95, - help="the lambda for the general advantage estimation") - parser.add_argument("--num-minibatches", type=int, default=8, - help="the number of mini-batches") - parser.add_argument("--update-epochs", type=int, default=3, - help="the K epochs to update the policy") - parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles advantages normalization") - parser.add_argument("--clip-coef", type=float, default=0.2, - help="the surrogate clipping coefficient") - parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles whether or not to use a clipped loss for the value function, as per the paper.") - parser.add_argument("--ent-coef", type=float, default=0.01, - help="coefficient of the entropy") - parser.add_argument("--vf-coef", type=float, default=0.5, - help="coefficient of the value function") - parser.add_argument("--max-grad-norm", type=float, default=0.5, - help="the maximum norm for the gradient clipping") - parser.add_argument("--target-kl", type=float, default=None, - help="the target KL divergence threshold") - - # Procgen arguments - parser.add_argument("--full-dist", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Train on full distribution of levels.") - - # Curriculum arguments - parser.add_argument("--curriculum", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will use curriculum learning") - parser.add_argument("--curriculum-method", type=str, default="plr", - help="curriculum method to use") - parser.add_argument("--num-eval-episodes", type=int, default=10, - help="the number of episodes to evaluate the agent on after each policy update.") - - args = parser.parse_args() - args.batch_size = int(args.num_envs * args.num_steps) - args.minibatch_size = int(args.batch_size // args.num_minibatches) - # fmt: on - return args - - -PROCGEN_RETURN_BOUNDS = { - "coinrun": (5, 10), - "starpilot": (2.5, 64), - "caveflyer": (3.5, 12), - "dodgeball": (1.5, 19), - "fruitbot": (-1.5, 32.4), - "chaser": (0.5, 13), - "miner": (1.5, 13), - "jumper": (3, 10), - "leaper": (3, 10), - "maze": (5, 10), - "bigfish": (1, 40), - "heist": (3.5, 10), - "climber": (2, 12.6), - "plunder": (4.5, 30), - "ninja": (3.5, 10), - "bossfight": (0.5, 13), -} - - -def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): - def thunk(): - env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) - env = GymV21CompatibilityV0(env=env) - if curriculum is not None: - env = ProcgenTaskWrapper(env, env_id, seed=seed) - env = MultiProcessingSyncWrapper( - env, - curriculum.get_components(), - update_on_step=False, - task_space=env.task_space, - ) - return env - return thunk - - -def wrap_vecenv(vecenv): - vecenv.is_vector_env = True - vecenv = VecMonitor(venv=vecenv, filename=None, keep_buf=100) - vecenv = VecNormalize(venv=vecenv, ob=False, ret=True) - return vecenv - - -def slow_level_replay_evaluate( - env_name, - policy, - num_episodes, - device, - num_levels=0 -): - policy.eval() - - eval_envs = ProcgenEnv( - num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False - ) - eval_envs = VecExtractDictObs(eval_envs, "rgb") - eval_envs = wrap_vecenv(eval_envs) - eval_obs, _ = eval_envs.reset() - eval_episode_rewards = [] - - while len(eval_episode_rewards) < num_episodes: - with torch.no_grad(): - eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) - - eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) - for i, info in enumerate(infos): - if 'episode' in info.keys(): - eval_episode_rewards.append(info['episode']['r']) - - mean_returns = np.mean(eval_episode_rewards) - stddev_returns = np.std(eval_episode_rewards) - env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] - normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) - policy.train() - return mean_returns, stddev_returns, normalized_mean_returns - - -def level_replay_evaluate( - env_name, - policy, - num_episodes, - device, - num_levels=0 -): - policy.eval() - - eval_envs = ProcgenEnv( - num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False - ) - eval_envs = VecExtractDictObs(eval_envs, "rgb") - eval_envs = wrap_vecenv(eval_envs) - eval_obs, _ = eval_envs.reset() - eval_episode_rewards = [-1] * num_episodes - - while -1 in eval_episode_rewards: - with torch.no_grad(): - eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) - - eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) - for i, info in enumerate(infos): - if 'episode' in info.keys() and eval_episode_rewards[i] == -1: - eval_episode_rewards[i] = info['episode']['r'] - - # print(eval_episode_rewards) - mean_returns = np.mean(eval_episode_rewards) - stddev_returns = np.std(eval_episode_rewards) - env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] - normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) - policy.train() - return mean_returns, stddev_returns, normalized_mean_returns - - -def make_value_fn(): - def get_value(obs): - obs = np.array(obs) - with torch.no_grad(): - return agent.get_value(torch.Tensor(obs).to(device)) - return get_value - - -if __name__ == "__main__": - args = parse_args() - run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" - if args.track: - import wandb - - wandb.init( - project=args.wandb_project_name, - entity=args.wandb_entity, - sync_tensorboard=True, - config=vars(args), - name=run_name, - monitor_gym=True, - save_code=True, - dir=args.logging_dir - ) - # wandb.run.log_code("./syllabus/examples") - - writer = SummaryWriter(os.path.join(args.logging_dir, f"./runs/{run_name}")) - writer.add_text( - "hyperparameters", - "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), - ) - - # TRY NOT TO MODIFY: seeding - random.seed(args.seed) - np.random.seed(args.seed) - torch.manual_seed(args.seed) - torch.backends.cudnn.deterministic = args.torch_deterministic - - device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu") - print("Device:", device) - - # Curriculum setup - curriculum = None - if args.curriculum: - sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") - sample_env = GymV21CompatibilityV0(env=sample_env) - # code to edit - # sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) - sample_env = MinigridTaskWrapper(sample_env, args.env_id, seed=args.seed) - - # Intialize Curriculum Method - if args.curriculum_method == "plr": - print("Using prioritized level replay.") - curriculum = PrioritizedLevelReplay( - sample_env.task_space, - sample_env.observation_space, - num_steps=args.num_steps, - num_processes=args.num_envs, - gamma=args.gamma, - gae_lambda=args.gae_lambda, - task_sampler_kwargs_dict={"strategy": "value_l1"}, - get_value=make_value_fn(), - ) - elif args.curriculum_method == "dr": - print("Using domain randomization.") - curriculum = DomainRandomization(sample_env.task_space) - elif args.curriculum_method == "lp": - print("Using learning progress.") - curriculum = LearningProgressCurriculum(sample_env.task_space) - elif args.curriculum_method == "sq": - print("Using sequential curriculum.") - curricula = [] - stopping = [] - for i in range(199): - curricula.append(i + 1) - stopping.append("steps>=50000") - curricula.append(list(range(i + 1))) - stopping.append("steps>=50000") - curriculum = SequentialCurriculum(curricula, stopping[:-1], sample_env.task_space) - else: - raise ValueError(f"Unknown curriculum method {args.curriculum_method}") - curriculum = make_multiprocessing_curriculum(curriculum) - del sample_env - - # env setup - print("Creating env") - envs = gym.vector.AsyncVectorEnv( - [ - make_env( - args.env_id, - args.seed + i, - curriculum=curriculum if args.curriculum else None, - num_levels=1 if args.curriculum else 0 - ) - for i in range(args.num_envs) - ] - ) - envs = wrap_vecenv(envs) - - assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" - print("Creating agent") - agent = ProcgenAgent( - envs.single_observation_space.shape, - envs.single_action_space.n, - arch="large", - base_kwargs={'recurrent': False, 'hidden_size': 256} - ).to(device) - optimizer = optim.Adam(agent.parameters(), lr=args.learning_rate, eps=1e-5) - - # ALGO Logic: Storage setup - obs = torch.zeros((args.num_steps, args.num_envs) + envs.single_observation_space.shape).to(device) - actions = torch.zeros((args.num_steps, args.num_envs) + envs.single_action_space.shape).to(device) - logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device) - rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) - dones = torch.zeros((args.num_steps, args.num_envs)).to(device) - values = torch.zeros((args.num_steps, args.num_envs)).to(device) - - # TRY NOT TO MODIFY: start the game - global_step = 0 - start_time = time.time() - next_obs, _ = envs.reset() - next_obs = torch.Tensor(next_obs).to(device) - next_done = torch.zeros(args.num_envs).to(device) - num_updates = args.total_timesteps // args.batch_size - episode_rewards = deque(maxlen=10) - completed_episodes = 0 - - for update in range(1, num_updates + 1): - # Annealing the rate if instructed to do so. - if args.anneal_lr: - frac = 1.0 - (update - 1.0) / num_updates - lrnow = frac * args.learning_rate - optimizer.param_groups[0]["lr"] = lrnow - - for step in range(0, args.num_steps): - global_step += 1 * args.num_envs - obs[step] = next_obs - dones[step] = next_done - - # ALGO LOGIC: action logic - with torch.no_grad(): - action, logprob, _, value = agent.get_action_and_value(next_obs) - values[step] = value.flatten() - actions[step] = action - logprobs[step] = logprob - - # TRY NOT TO MODIFY: execute the game and log data. - next_obs, reward, term, trunc, info = envs.step(action.cpu().numpy()) - done = np.logical_or(term, trunc) - rewards[step] = torch.tensor(reward).to(device).view(-1) - next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) - completed_episodes += sum(done) - - for item in info: - if "episode" in item.keys(): - episode_rewards.append(item['episode']['r']) - print(f"global_step={global_step}, episodic_return={item['episode']['r']}") - writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) - if curriculum is not None: - curriculum.log_metrics(writer, global_step) - break - - # bootstrap value if not done - with torch.no_grad(): - next_value = agent.get_value(next_obs).reshape(1, -1) - if args.gae: - advantages = torch.zeros_like(rewards).to(device) - lastgaelam = 0 - for t in reversed(range(args.num_steps)): - if t == args.num_steps - 1: - nextnonterminal = 1.0 - next_done - nextvalues = next_value - else: - nextnonterminal = 1.0 - dones[t + 1] - nextvalues = values[t + 1] - delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t] - advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam - returns = advantages + values - else: - returns = torch.zeros_like(rewards).to(device) - for t in reversed(range(args.num_steps)): - if t == args.num_steps - 1: - nextnonterminal = 1.0 - next_done - next_return = next_value - else: - nextnonterminal = 1.0 - dones[t + 1] - next_return = returns[t + 1] - returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return - advantages = returns - values - - # flatten the batch - b_obs = obs.reshape((-1,) + envs.single_observation_space.shape) - b_logprobs = logprobs.reshape(-1) - b_actions = actions.reshape((-1,) + envs.single_action_space.shape) - b_advantages = advantages.reshape(-1) - b_returns = returns.reshape(-1) - b_values = values.reshape(-1) - - # Optimizing the policy and value network - b_inds = np.arange(args.batch_size) - clipfracs = [] - for epoch in range(args.update_epochs): - np.random.shuffle(b_inds) - for start in range(0, args.batch_size, args.minibatch_size): - end = start + args.minibatch_size - mb_inds = b_inds[start:end] - - _, newlogprob, entropy, newvalue = agent.get_action_and_value(b_obs[mb_inds], b_actions.long()[mb_inds]) - logratio = newlogprob - b_logprobs[mb_inds] - ratio = logratio.exp() - - with torch.no_grad(): - # calculate approx_kl http://joschu.net/blog/kl-approx.html - old_approx_kl = (-logratio).mean() - approx_kl = ((ratio - 1) - logratio).mean() - clipfracs += [((ratio - 1.0).abs() > args.clip_coef).float().mean().item()] - - mb_advantages = b_advantages[mb_inds] - if args.norm_adv: - mb_advantages = (mb_advantages - mb_advantages.mean()) / (mb_advantages.std() + 1e-8) - - # Policy loss - pg_loss1 = -mb_advantages * ratio - pg_loss2 = -mb_advantages * torch.clamp(ratio, 1 - args.clip_coef, 1 + args.clip_coef) - pg_loss = torch.max(pg_loss1, pg_loss2).mean() - - # Value loss - newvalue = newvalue.view(-1) - if args.clip_vloss: - v_loss_unclipped = (newvalue - b_returns[mb_inds]) ** 2 - v_clipped = b_values[mb_inds] + torch.clamp( - newvalue - b_values[mb_inds], - -args.clip_coef, - args.clip_coef, - ) - v_loss_clipped = (v_clipped - b_returns[mb_inds]) ** 2 - v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped) - v_loss = 0.5 * v_loss_max.mean() - else: - v_loss = 0.5 * ((newvalue - b_returns[mb_inds]) ** 2).mean() - - entropy_loss = entropy.mean() - loss = pg_loss - args.ent_coef * entropy_loss + v_loss * args.vf_coef - - optimizer.zero_grad() - loss.backward() - nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm) - optimizer.step() - - if args.target_kl is not None: - if approx_kl > args.target_kl: - break - - y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy() - var_y = np.var(y_true) - explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y - - # Evaluate agent - mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=0 - ) - slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=0 - ) - mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=200 - ) - slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=200 - ) - - # TRY NOT TO MODIFY: record rewards for plotting purposes - writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) - writer.add_scalar("charts/episode_returns", np.mean(episode_rewards), global_step) - writer.add_scalar("losses/value_loss", v_loss.item(), global_step) - writer.add_scalar("losses/policy_loss", pg_loss.item(), global_step) - writer.add_scalar("losses/entropy", entropy_loss.item(), global_step) - writer.add_scalar("losses/old_approx_kl", old_approx_kl.item(), global_step) - writer.add_scalar("losses/approx_kl", approx_kl.item(), global_step) - writer.add_scalar("losses/clipfrac", np.mean(clipfracs), global_step) - writer.add_scalar("losses/explained_variance", explained_var, global_step) - print("SPS:", int(global_step / (time.time() - start_time))) - writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) - - writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) - writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) - writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) - writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) - writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) - writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) - - writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) - writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) - writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) - writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) - writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) - writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) - - writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) - - envs.close() - writer.close() diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/conda-environment.yaml b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/conda-environment.yaml deleted file mode 100644 index cd0b0b09..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/conda-environment.yaml +++ /dev/null @@ -1,165 +0,0 @@ -name: test2_py -channels: - - defaults -dependencies: - - _libgcc_mutex=0.1=main - - _openmp_mutex=5.1=1_gnu - - ca-certificates=2024.3.11=h06a4308_0 - - ld_impl_linux-64=2.38=h1181459_1 - - libffi=3.3=he6710b0_2 - - libgcc-ng=11.2.0=h1234567_1 - - libgomp=11.2.0=h1234567_1 - - libstdcxx-ng=11.2.0=h1234567_1 - - ncurses=6.4=h6a678d5_0 - - openssl=1.1.1w=h7f8727e_0 - - pip=23.3.1=py38h06a4308_0 - - python=3.8.5=h7579374_1 - - readline=8.2=h5eee18b_0 - - setuptools=68.2.2=py38h06a4308_0 - - sqlite=3.41.2=h5eee18b_0 - - tk=8.6.12=h1ccaba5_0 - - wheel=0.41.2=py38h06a4308_0 - - xz=5.4.6=h5eee18b_0 - - zlib=1.2.13=h5eee18b_0 - - pip: - - absl-py==2.1.0 - - aiosignal==1.3.1 - - alabaster==0.7.13 - - appdirs==1.4.4 - - attrs==23.2.0 - - babel==2.14.0 - - beautifulsoup4==4.12.3 - - cachetools==5.3.3 - - certifi==2024.2.2 - - cffi==1.16.0 - - charset-normalizer==3.3.2 - - click==8.1.7 - - cloudpickle==3.0.0 - - cmake==3.29.2 - - contourpy==1.1.1 - - cycler==0.12.1 - - dm-tree==0.1.8 - - docker-pycreds==0.4.0 - - docutils==0.20.1 - - exceptiongroup==1.2.0 - - farama-notifications==0.0.4 - - filelock==3.13.4 - - fonttools==4.51.0 - - frozenlist==1.4.1 - - fsspec==2024.3.1 - - furo==2024.1.29 - - future==1.0.0 - - gitdb==4.0.11 - - gitpython==3.1.43 - - glcontext==2.5.0 - - glfw==1.12.0 - - google-auth==2.29.0 - - google-auth-oauthlib==1.0.0 - - grpcio==1.62.1 - - gym==0.23.0 - - gym-notices==0.0.8 - - gymnasium==0.28.1 - - idna==3.7 - - imageio==2.34.0 - - imageio-ffmpeg==0.3.0 - - imagesize==1.4.1 - - importlib-metadata==7.1.0 - - importlib-resources==6.4.0 - - iniconfig==2.0.0 - - jax-jumpy==1.0.0 - - jinja2==3.1.3 - - jsonschema==4.21.1 - - jsonschema-specifications==2023.12.1 - - kiwisolver==1.4.5 - - lazy-loader==0.4 - - lz4==4.3.3 - - markdown==3.6 - - markdown-it-py==3.0.0 - - markupsafe==2.1.5 - - matplotlib==3.7.5 - - mdurl==0.1.2 - - moderngl==5.10.0 - - mpmath==1.3.0 - - msgpack==1.0.8 - - networkx==3.1 - - numpy==1.24.4 - - nvidia-cublas-cu12==12.1.3.1 - - nvidia-cuda-cupti-cu12==12.1.105 - - nvidia-cuda-nvrtc-cu12==12.1.105 - - nvidia-cuda-runtime-cu12==12.1.105 - - nvidia-cudnn-cu12==8.9.2.26 - - nvidia-cufft-cu12==11.0.2.54 - - nvidia-curand-cu12==10.3.2.106 - - nvidia-cusolver-cu12==11.4.5.107 - - nvidia-cusparse-cu12==12.1.0.106 - - nvidia-nccl-cu12==2.19.3 - - nvidia-nvjitlink-cu12==12.4.127 - - nvidia-nvtx-cu12==12.1.105 - - oauthlib==3.2.2 - - packaging==24.0 - - pandas==2.0.3 - - pillow==10.3.0 - - pkgutil-resolve-name==1.3.10 - - pluggy==1.4.0 - - protobuf==4.25.3 - - psutil==5.9.8 - - py-cpuinfo==9.0.0 - - pyarrow==15.0.2 - - pyasn1==0.6.0 - - pyasn1-modules==0.4.0 - - pycparser==2.22 - - pyenchant==3.2.2 - - pyglet==1.4.11 - - pygments==2.17.2 - - pyparsing==3.1.2 - - pytest==8.1.1 - - pytest-benchmark==4.0.0 - - python-dateutil==2.9.0.post0 - - pytz==2024.1 - - pywavelets==1.4.1 - - pyyaml==6.0.1 - - ray==2.10.0 - - referencing==0.34.0 - - requests==2.31.0 - - requests-oauthlib==2.0.0 - - rich==13.7.1 - - rpds-py==0.18.0 - - rsa==4.9 - - scikit-image==0.21.0 - - scipy==1.10.0 - - sentry-sdk==1.45.0 - - setproctitle==1.3.3 - - shellingham==1.5.4 - - shimmy==1.3.0 - - six==1.16.0 - - smmap==5.0.1 - - snowballstemmer==2.2.0 - - soupsieve==2.5 - - sphinx==7.1.2 - - sphinx-basic-ng==1.0.0b2 - - sphinx-tabs==3.4.5 - - sphinxcontrib-applehelp==1.0.4 - - sphinxcontrib-devhelp==1.0.2 - - sphinxcontrib-htmlhelp==2.0.1 - - sphinxcontrib-jsmath==1.0.1 - - sphinxcontrib-qthelp==1.0.3 - - sphinxcontrib-serializinghtml==1.1.5 - - sphinxcontrib-spelling==8.0.0 - - syllabus-rl==0.5 - - sympy==1.12 - - tensorboard==2.14.0 - - tensorboard-data-server==0.7.2 - - tensorboardx==2.6.2.2 - - tifffile==2023.7.10 - - tomli==2.0.1 - - torch==2.2.2 - - triton==2.2.0 - - typer==0.12.3 - - typing-extensions==4.11.0 - - tzdata==2024.1 - - urllib3==2.2.1 - - wandb==0.16.6 - - werkzeug==3.0.2 - - zipp==3.18.1 -prefix: /home/user/miniconda/envs/test2_py - diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/config.yaml b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/config.yaml deleted file mode 100644 index fc31f40a..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/config.yaml +++ /dev/null @@ -1,130 +0,0 @@ -wandb_version: 1 - -exp_name: - desc: null - value: cleanrl_procgen_plr -seed: - desc: null - value: 1 -torch_deterministic: - desc: null - value: true -cuda: - desc: null - value: true -track: - desc: null - value: true -wandb_project_name: - desc: null - value: syllabus -wandb_entity: - desc: null - value: null -capture_video: - desc: null - value: false -logging_dir: - desc: null - value: . -env_id: - desc: null - value: bigfish -total_timesteps: - desc: null - value: 25000000 -learning_rate: - desc: null - value: 0.0005 -num_envs: - desc: null - value: 64 -num_steps: - desc: null - value: 256 -anneal_lr: - desc: null - value: false -gae: - desc: null - value: true -gamma: - desc: null - value: 0.999 -gae_lambda: - desc: null - value: 0.95 -num_minibatches: - desc: null - value: 8 -update_epochs: - desc: null - value: 3 -norm_adv: - desc: null - value: true -clip_coef: - desc: null - value: 0.2 -clip_vloss: - desc: null - value: true -ent_coef: - desc: null - value: 0.01 -vf_coef: - desc: null - value: 0.5 -max_grad_norm: - desc: null - value: 0.5 -target_kl: - desc: null - value: null -full_dist: - desc: null - value: true -curriculum: - desc: null - value: true -curriculum_method: - desc: null - value: plr -num_eval_episodes: - desc: null - value: 10 -batch_size: - desc: null - value: 16384 -minibatch_size: - desc: null - value: 2048 -_wandb: - desc: null - value: - code_path: code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py - python_version: 3.8.5 - cli_version: 0.16.6 - framework: torch - is_jupyter_run: false - is_kaggle_kernel: false - start_time: 1713845396.0 - t: - 1: - - 1 - - 30 - - 55 - 2: - - 1 - - 30 - - 55 - 3: - - 13 - - 16 - - 23 - - 35 - 4: 3.8.5 - 5: 0.16.6 - 8: - - 5 - 13: linux-x86_64 diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/diff.patch b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/diff.patch deleted file mode 100644 index f683cbce..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/diff.patch +++ /dev/null @@ -1,133 +0,0 @@ -diff --git a/setup.py b/setup.py -index 31e09f2..22a94e8 100644 ---- a/setup.py -+++ b/setup.py -@@ -2,7 +2,7 @@ from setuptools import find_packages, setup - - - extras = dict() --extras['test'] = ['cmake', 'ninja', 'nle>=0.9.0', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] -+extras['test'] = ['cmake', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] - extras['docs'] = ['sphinx-tabs', 'sphinxcontrib-spelling', 'furo'] - extras['all'] = extras['test'] + extras['docs'] - -diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -index dabcd50..b807304 100644 ---- a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -+++ b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -@@ -136,7 +136,7 @@ def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): - env = MultiProcessingSyncWrapper( - env, - curriculum.get_components(), -- update_on_step=curriculum.requires_step_updates, -+ update_on_step=False, - task_space=env.task_space, - ) - return env -@@ -150,37 +150,31 @@ def wrap_vecenv(vecenv): - return vecenv - - --def full_level_replay_evaluate( -+def slow_level_replay_evaluate( - env_name, - policy, - num_episodes, - device, -- num_levels=1 # Not used -+ num_levels=0 - ): - policy.eval() - - eval_envs = ProcgenEnv( -- num_envs=args.num_eval_episodes, env_name=env_name, num_levels=1, start_level=0, distribution_mode="easy", paint_vel_info=False -+ num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False - ) - eval_envs = VecExtractDictObs(eval_envs, "rgb") - eval_envs = wrap_vecenv(eval_envs) -- -- # Seed environments -- seeds = [int.from_bytes(os.urandom(3), byteorder="little") for _ in range(num_episodes)] -- for i, seed in enumerate(seeds): -- eval_envs.seed(seed, i) -- - eval_obs, _ = eval_envs.reset() -- eval_episode_rewards = [-1] * num_episodes -+ eval_episode_rewards = [] - -- while -1 in eval_episode_rewards: -+ while len(eval_episode_rewards) < num_episodes: - with torch.no_grad(): - eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) - - eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) - for i, info in enumerate(infos): -- if 'episode' in info.keys() and eval_episode_rewards[i] == -1: -- eval_episode_rewards[i] = info['episode']['r'] -+ if 'episode' in info.keys(): -+ eval_episode_rewards.append(info['episode']['r']) - - mean_returns = np.mean(eval_episode_rewards) - stddev_returns = np.std(eval_episode_rewards) -@@ -251,7 +245,7 @@ if __name__ == "__main__": - ) - # wandb.run.log_code("./syllabus/examples") - -- writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) -+ writer = SummaryWriter(os.path.join(args.logging_dir, f"./runs/{run_name}")) - writer.add_text( - "hyperparameters", - "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), -@@ -271,7 +265,9 @@ if __name__ == "__main__": - if args.curriculum: - sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") - sample_env = GymV21CompatibilityV0(env=sample_env) -- sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) -+ # code to edit -+ # sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) -+ sample_env = MinigridTaskWrapper(sample_env, args.env_id, seed=args.seed) - - # Intialize Curriculum Method - if args.curriculum_method == "plr": -@@ -485,13 +481,13 @@ if __name__ == "__main__": - mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=0 - ) -- full_mean_eval_returns, full_stddev_eval_returns, full_normalized_mean_eval_returns = full_level_replay_evaluate( -+ slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=0 - ) - mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=200 - ) -- full_mean_train_returns, full_stddev_train_returns, full_normalized_mean_train_returns = full_level_replay_evaluate( -+ slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=200 - ) - -@@ -510,17 +506,17 @@ if __name__ == "__main__": - - writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) - writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) -- writer.add_scalar("test_eval/stddev_eval_return", stddev_eval_returns, global_step) -- writer.add_scalar("test_eval/full_mean_episode_return", full_mean_eval_returns, global_step) -- writer.add_scalar("test_eval/full_normalized_mean_eval_return", full_normalized_mean_eval_returns, global_step) -- writer.add_scalar("test_eval/full_stddev_eval_return", full_stddev_eval_returns, global_step) -+ writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) - - writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) - writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) -- writer.add_scalar("train_eval/stddev_train_return", stddev_train_returns, global_step) -- writer.add_scalar("train_eval/full_mean_episode_return", full_mean_train_returns, global_step) -- writer.add_scalar("train_eval/full_normalized_mean_train_return", full_normalized_mean_train_returns, global_step) -- writer.add_scalar("train_eval/full_stddev_train_return", full_stddev_train_returns, global_step) -+ writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) - - writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) - diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/events.out.tfevents.1713845400.f411843fc70b.2432.0 b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/events.out.tfevents.1713845400.f411843fc70b.2432.0 deleted file mode 120000 index 24fc08a3..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/events.out.tfevents.1713845400.f411843fc70b.2432.0 +++ /dev/null @@ -1 +0,0 @@ -/data/averma/MARL/Syllabus/syllabus/examples/training_scripts/runs/bigfish__cleanrl_procgen_plr__1__1713845394/events.out.tfevents.1713845400.f411843fc70b.2432.0 \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/requirements.txt b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/requirements.txt deleted file mode 100644 index 7f33d240..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/requirements.txt +++ /dev/null @@ -1,146 +0,0 @@ -Babel==2.14.0 -Farama-Notifications==0.0.4 -GitPython==3.1.43 -Jinja2==3.1.3 -Markdown==3.6 -MarkupSafe==2.1.5 -PyWavelets==1.4.1 -PyYAML==6.0.1 -Pygments==2.17.2 -Shimmy==1.3.0 -Sphinx==7.1.2 -Syllabus-RL==0.5 -Werkzeug==3.0.2 -absl-py==2.1.0 -aiosignal==1.3.1 -alabaster==0.7.13 -appdirs==1.4.4 -attrs==23.2.0 -beautifulsoup4==4.12.3 -cachetools==5.3.3 -certifi==2024.2.2 -cffi==1.16.0 -charset-normalizer==3.3.2 -click==8.1.7 -cloudpickle==3.0.0 -cmake==3.29.2 -colorama==0.4.6 -contourpy==1.1.1 -cycler==0.12.1 -dm-tree==0.1.8 -docker-pycreds==0.4.0 -docutils==0.20.1 -exceptiongroup==1.2.0 -filelock==3.13.4 -fonttools==4.51.0 -frozenlist==1.4.1 -fsspec==2024.3.1 -furo==2024.1.29 -future==1.0.0 -gitdb==4.0.11 -glcontext==2.5.0 -glfw==1.12.0 -google-auth-oauthlib==1.0.0 -google-auth==2.29.0 -grpcio==1.62.1 -gym-notices==0.0.8 -gym==0.23.0 -gymnasium==0.28.1 -idna==3.7 -imageio-ffmpeg==0.3.0 -imageio==2.34.0 -imagesize==1.4.1 -importlib_metadata==7.1.0 -importlib_resources==6.4.0 -iniconfig==2.0.0 -jax-jumpy==1.0.0 -jsonschema-specifications==2023.12.1 -jsonschema==4.21.1 -kiwisolver==1.4.5 -lazy_loader==0.4 -lz4==4.3.3 -markdown-it-py==3.0.0 -matplotlib==3.7.5 -mdurl==0.1.2 -moderngl==5.10.0 -mpmath==1.3.0 -msgpack==1.0.8 -networkx==3.1 -numpy==1.24.4 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-nccl-cu12==2.19.3 -nvidia-nvjitlink-cu12==12.4.127 -nvidia-nvtx-cu12==12.1.105 -oauthlib==3.2.2 -packaging==24.0 -pandas==2.0.3 -pillow==10.3.0 -pip==23.3.1 -pkgutil_resolve_name==1.3.10 -pluggy==1.4.0 -procgen==0.9.5+ed4be81 -protobuf==4.25.3 -psutil==5.9.8 -psutil==5.9.8 -py-cpuinfo==9.0.0 -pyarrow==15.0.2 -pyasn1==0.6.0 -pyasn1_modules==0.4.0 -pycparser==2.22 -pyenchant==3.2.2 -pyglet==1.4.11 -pyparsing==3.1.2 -pytest-benchmark==4.0.0 -pytest==8.1.1 -python-dateutil==2.9.0.post0 -pytz==2024.1 -ray==2.10.0 -referencing==0.34.0 -requests-oauthlib==2.0.0 -requests==2.31.0 -rich==13.7.1 -rpds-py==0.18.0 -rsa==4.9 -scikit-image==0.21.0 -scipy==1.10.0 -sentry-sdk==1.45.0 -setproctitle==1.2.2 -setproctitle==1.3.3 -setuptools==68.2.2 -shellingham==1.5.4 -six==1.16.0 -smmap==5.0.1 -snowballstemmer==2.2.0 -soupsieve==2.5 -sphinx-basic-ng==1.0.0b2 -sphinx-tabs==3.4.5 -sphinxcontrib-applehelp==1.0.4 -sphinxcontrib-devhelp==1.0.2 -sphinxcontrib-htmlhelp==2.0.1 -sphinxcontrib-jsmath==1.0.1 -sphinxcontrib-qthelp==1.0.3 -sphinxcontrib-serializinghtml==1.1.5 -sphinxcontrib-spelling==8.0.0 -sympy==1.12 -tensorboard-data-server==0.7.2 -tensorboard==2.14.0 -tensorboardX==2.6.2.2 -tifffile==2023.7.10 -tomli==2.0.1 -torch==2.2.2 -triton==2.2.0 -typer==0.12.3 -typing_extensions==4.11.0 -tzdata==2024.1 -urllib3==2.2.1 -wandb==0.16.6 -wheel==0.41.2 -zipp==3.18.1 \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch deleted file mode 100644 index 66d57e37..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch +++ /dev/null @@ -1,1419 +0,0 @@ -diff --git a/setup.py b/setup.py -index 31e09f2..22a94e8 100644 ---- a/setup.py -+++ b/setup.py -@@ -2,7 +2,7 @@ from setuptools import find_packages, setup - - - extras = dict() --extras['test'] = ['cmake', 'ninja', 'nle>=0.9.0', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] -+extras['test'] = ['cmake', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] - extras['docs'] = ['sphinx-tabs', 'sphinxcontrib-spelling', 'furo'] - extras['all'] = extras['test'] + extras['docs'] - -diff --git a/syllabus/core/curriculum_base.py b/syllabus/core/curriculum_base.py -index 03284da..4ca9aeb 100644 ---- a/syllabus/core/curriculum_base.py -+++ b/syllabus/core/curriculum_base.py -@@ -76,7 +76,7 @@ class Curriculum: - """ - self.completed_tasks += 1 - -- def update_on_step(self, obs: typing.Any, rew: float, term: bool, trunc: bool, info: dict, env_id: int = None) -> None: -+ def update_on_step(self, task: typing.Any, obs: typing.Any, rew: float, term: bool, trunc: bool, info: dict, env_id: int = None) -> None: - """ Update the curriculum with the current step results from the environment. - - :param obs: Observation from teh environment -@@ -88,7 +88,7 @@ class Curriculum: - """ - raise NotImplementedError("This curriculum does not require step updates. Set update_on_step for the environment sync wrapper to False to improve performance and prevent this error.") - -- def update_on_step_batch(self, step_results: List[typing.Tuple[int, int, int, int, int]], env_id: int = None) -> None: -+ def update_on_step_batch(self, step_results: List[typing.Tuple[Any, Any, int, int, int, int]], env_id: int = None) -> None: - """Update the curriculum with a batch of step results from the environment. - - This method can be overridden to provide a more efficient implementation. It is used -@@ -96,9 +96,9 @@ class Curriculum: - - :param step_results: List of step results - """ -- obs, rews, terms, truncs, infos = tuple(step_results) -+ tasks, obs, rews, terms, truncs, infos = tuple(step_results) - for i in range(len(obs)): -- self.update_on_step(obs[i], rews[i], terms[i], truncs[i], infos[i], env_id=env_id) -+ self.update_on_step(tasks[i], obs[i], rews[i], terms[i], truncs[i], infos[i], env_id=env_id) - - def update_on_episode(self, episode_return: float, episode_length: int, episode_task: Any, env_id: int = None) -> None: - """Update the curriculum with episode results from the environment. -diff --git a/syllabus/core/curriculum_sync_wrapper.py b/syllabus/core/curriculum_sync_wrapper.py -index 6e069d8..f986643 100644 ---- a/syllabus/core/curriculum_sync_wrapper.py -+++ b/syllabus/core/curriculum_sync_wrapper.py -@@ -29,6 +29,14 @@ class CurriculumWrapper: - def tasks(self): - return self.task_space.tasks - -+ @property -+ def requires_step_updates(self): -+ return self.curriculum.requires_step_updates -+ -+ @property -+ def requires_episode_updates(self): -+ return self.curriculum.requires_episode_updates -+ - def get_tasks(self, task_space=None): - return self.task_space.get_tasks(gym_space=task_space) - -diff --git a/syllabus/core/environment_sync_wrapper.py b/syllabus/core/environment_sync_wrapper.py -index c995aa1..6edee7c 100644 ---- a/syllabus/core/environment_sync_wrapper.py -+++ b/syllabus/core/environment_sync_wrapper.py -@@ -19,7 +19,8 @@ class MultiProcessingSyncWrapper(gym.Wrapper): - def __init__(self, - env, - components: MultiProcessingComponents, -- update_on_step: bool = True, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? -+ update_on_step: bool = False, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? -+ update_on_progress: bool = False, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? - batch_size: int = 100, - buffer_size: int = 2, # Having an extra task in the buffer minimizes wait time at reset - task_space: TaskSpace = None, -@@ -34,6 +35,7 @@ class MultiProcessingSyncWrapper(gym.Wrapper): - self.update_queue = components.update_queue - self.task_space = task_space - self.update_on_step = update_on_step -+ self.update_on_progress = update_on_progress - self.batch_size = batch_size - self.global_task_completion = global_task_completion - self.task_progress = 0.0 -@@ -125,17 +127,21 @@ class MultiProcessingSyncWrapper(gym.Wrapper): - def _package_step_updates(self): - step_batch = { - "update_type": "step_batch", -- "metrics": ([self._obs[:self._batch_step], self._rews[:self._batch_step], self._terms[:self._batch_step], self._truncs[:self._batch_step], self._infos[:self._batch_step]],), -+ "metrics": ([self._tasks[:self._batch_step], self._obs[:self._batch_step], self._rews[:self._batch_step], self._terms[:self._batch_step], self._truncs[:self._batch_step], self._infos[:self._batch_step]],), - "env_id": self.instance_id, - "request_sample": False - } -- task_batch = { -- "update_type": "task_progress_batch", -- "metrics": (self._tasks[:self._batch_step], self._task_progresses[:self._batch_step],), -- "env_id": self.instance_id, -- "request_sample": False -- } -- return [step_batch, task_batch] -+ update = [step_batch] -+ -+ if self.update_on_progress: -+ task_batch = { -+ "update_type": "task_progress_batch", -+ "metrics": (self._tasks[:self._batch_step], self._task_progresses[:self._batch_step],), -+ "env_id": self.instance_id, -+ "request_sample": False -+ } -+ update.append(task_batch) -+ return update - - def add_task(self, task): - update = { -diff --git a/syllabus/curricula/annealing_box.py b/syllabus/curricula/annealing_box.py -index 6c565ec..101981c 100644 ---- a/syllabus/curricula/annealing_box.py -+++ b/syllabus/curricula/annealing_box.py -@@ -49,8 +49,8 @@ class AnnealingBoxCurriculum(Curriculum): - """ - # Linear annealing from start_values to end_values - annealed_values = ( -- self.start_values + (self.end_values - self.start_values) * -- np.minimum(self.current_step, self.total_steps) / self.total_steps -+ self.start_values + (self.end_values - self.start_values) * -+ np.minimum(self.current_step, self.total_steps) / self.total_steps - ) - -- return [annealed_values.copy() for _ in range(k)] -\ No newline at end of file -+ return [annealed_values.copy() for _ in range(k)] -diff --git a/syllabus/curricula/noop.py b/syllabus/curricula/noop.py -index f6bd5dc..fb5d8ae 100644 ---- a/syllabus/curricula/noop.py -+++ b/syllabus/curricula/noop.py -@@ -28,7 +28,7 @@ class NoopCurriculum(Curriculum): - """ - pass - -- def update_on_step(self, obs, rew, term, trunc, info, env_id: int = None) -> None: -+ def update_on_step(self, task, obs, rew, term, trunc, info, env_id: int = None) -> None: - """ - Update the curriculum with the current step results from the environment. - """ -diff --git a/syllabus/curricula/plr/plr_wrapper.py b/syllabus/curricula/plr/plr_wrapper.py -index 9515df4..9c808dd 100644 ---- a/syllabus/curricula/plr/plr_wrapper.py -+++ b/syllabus/curricula/plr/plr_wrapper.py -@@ -23,16 +23,15 @@ class RolloutStorage(object): - get_value=None, - ): - self.num_steps = num_steps -- self.buffer_steps = num_steps * 2 # Hack to prevent overflow from lagging updates. -+ self.buffer_steps = num_steps * 4 # Hack to prevent overflow from lagging updates. - self.num_processes = num_processes - self._requires_value_buffers = requires_value_buffers - self._get_value = get_value - self.tasks = torch.zeros(self.buffer_steps, num_processes, 1, dtype=torch.int) - self.masks = torch.ones(self.buffer_steps + 1, num_processes, 1) - self.obs = [[[0] for _ in range(self.num_processes)]] * self.buffer_steps -- self._fill = torch.zeros(self.buffer_steps, num_processes, 1) - self.env_steps = [0] * num_processes -- self.should_update = False -+ self.ready_buffers = set() - - if requires_value_buffers: - self.returns = torch.zeros(self.buffer_steps + 1, num_processes, 1) -@@ -46,12 +45,10 @@ class RolloutStorage(object): - self.action_log_dist = torch.zeros(self.buffer_steps, num_processes, action_space.n) - - self.num_steps = num_steps -- self.step = 0 - - def to(self, device): - self.masks = self.masks.to(device) - self.tasks = self.tasks.to(device) -- self._fill = self._fill.to(device) - if self._requires_value_buffers: - self.rewards = self.rewards.to(device) - self.value_preds = self.value_preds.to(device) -@@ -59,108 +56,79 @@ class RolloutStorage(object): - else: - self.action_log_dist = self.action_log_dist.to(device) - -- def insert(self, masks, action_log_dist=None, value_preds=None, rewards=None, tasks=None): -- if self._requires_value_buffers: -- assert (value_preds is not None and rewards is not None), "Selected strategy requires value_preds and rewards" -- if len(rewards.shape) == 3: -- rewards = rewards.squeeze(2) -- self.value_preds[self.step].copy_(torch.as_tensor(value_preds)) -- self.rewards[self.step].copy_(torch.as_tensor(rewards)[:, None]) -- self.masks[self.step + 1].copy_(torch.as_tensor(masks)[:, None]) -- else: -- self.action_log_dist[self.step].copy_(action_log_dist) -- if tasks is not None: -- assert isinstance(tasks[0], int), "Provided task must be an integer" -- self.tasks[self.step].copy_(torch.as_tensor(tasks)[:, None]) -- self.step = (self.step + 1) % self.num_steps -- - def insert_at_index(self, env_index, mask=None, action_log_dist=None, obs=None, reward=None, task=None, steps=1): -- if env_index >= self.num_processes: -- warnings.warn(f"Env index {env_index} is greater than the number of processes {self.num_processes}. Using index {env_index % self.num_processes} instead.") -- env_index = env_index % self.num_processes -- - step = self.env_steps[env_index] - end_step = step + steps -- # Update buffer fill traacker, and check for common usage errors. -- try: -- if end_step > len(self._fill): -- raise IndexError -- self._fill[step:end_step, env_index] = 1 -- except IndexError as e: -- if any(self._fill[:][env_index] == 0): -- raise UsageError(f"Step {step} + {steps} = {end_step} is out of range for env index {env_index}. Your value for PLR's num_processes may be too high.") from e -- else: -- raise UsageError(f"Step {step} + {steps} = {end_step} is out of range for env index {env_index}. Your value for PLR's num_processes may be too low.") from e - - if mask is not None: - self.masks[step + 1:end_step + 1, env_index].copy_(torch.as_tensor(mask[:, None])) -+ - if obs is not None: - for s in range(step, end_step): - self.obs[s][env_index] = obs[s - step] -+ - if reward is not None: - self.rewards[step:end_step, env_index].copy_(torch.as_tensor(reward[:, None])) -+ - if action_log_dist is not None: - self.action_log_dist[step:end_step, env_index].copy_(torch.as_tensor(action_log_dist[:, None])) -+ - if task is not None: - try: -- task = int(task) -+ int(task[0]) - except TypeError: -- assert isinstance(task, int), f"Provided task must be an integer, got {task} with type {type(task)} instead." -- self.tasks[step:end_step, env_index].copy_(torch.as_tensor(task)) -- else: -- self.env_steps[env_index] += steps -- # Hack for now, we call insert_at_index twice -- while all(self._fill[self.step] == 1): -- self.step = (self.step + 1) % self.buffer_steps -- # Check if we have enough steps to compute a task sampler update -- if self.step == self.num_steps + 1: -- self.should_update = True -- -- def _get_values(self): -+ assert isinstance(task, int), f"Provided task must be an integer, got {task[0]} with type {type(task[0])} instead." -+ self.tasks[step:end_step, env_index].copy_(torch.as_tensor(np.array(task)[:, None])) -+ -+ self.env_steps[env_index] += steps -+ if env_index not in self.ready_buffers and self.env_steps[env_index] >= self.num_steps: -+ self.ready_buffers.add(env_index) -+ -+ def _get_values(self, env_index): - if self._get_value is None: - raise UsageError("Selected strategy requires value predictions. Please provide get_value function.") -- for step in range(self.num_steps): -- values = self._get_value(self.obs[step]) -+ for step in range(0, self.num_steps, self.num_processes): -+ obs = self.obs[step: step + self.num_processes][env_index] -+ values = self._get_value(obs) -+ -+ # Reshape values if necessary - if len(values.shape) == 3: - warnings.warn(f"Value function returned a 3D tensor of shape {values.shape}. Attempting to squeeze last dimension.") - values = torch.squeeze(values, -1) - if len(values.shape) == 1: - warnings.warn(f"Value function returned a 1D tensor of shape {values.shape}. Attempting to unsqueeze last dimension.") - values = torch.unsqueeze(values, -1) -- self.value_preds[step].copy_(values) - -- def after_update(self): -+ self.value_preds[step: step + self.num_processes, env_index].copy_(values) -+ -+ def after_update(self, env_index): - # After consuming the first num_steps of data, remove them and shift the remaining data in the buffer -- self.tasks[0: self.num_steps].copy_(self.tasks[self.num_steps: self.buffer_steps]) -- self.masks[0: self.num_steps].copy_(self.masks[self.num_steps: self.buffer_steps]) -- self.obs[0: self.num_steps][:] = self.obs[self.num_steps: self.buffer_steps][:] -+ self.tasks = self.tasks.roll(-self.num_steps, 0) -+ self.masks = self.masks.roll(-self.num_steps, 0) -+ self.obs[0:][env_index] = self.obs[self.num_steps: self.buffer_steps][env_index] - - if self._requires_value_buffers: -- self.returns[0: self.num_steps].copy_(self.returns[self.num_steps: self.buffer_steps]) -- self.rewards[0: self.num_steps].copy_(self.rewards[self.num_steps: self.buffer_steps]) -- self.value_preds[0: self.num_steps].copy_(self.value_preds[self.num_steps: self.buffer_steps]) -+ self.returns = self.returns.roll(-self.num_steps, 0) -+ self.rewards = self.rewards.roll(-self.num_steps, 0) -+ self.value_preds = self.value_preds.roll(-self.num_steps, 0) - else: -- self.action_log_dist[0: self.num_steps].copy_(self.action_log_dist[self.num_steps: self.buffer_steps]) -+ self.action_log_dist = self.action_log_dist.roll(-self.num_steps, 0) - -- self._fill[0: self.num_steps].copy_(self._fill[self.num_steps: self.buffer_steps]) -- self._fill[self.num_steps: self.buffer_steps].copy_(0) -+ self.env_steps[env_index] -= self.num_steps -+ self.ready_buffers.remove(env_index) - -- self.env_steps = [steps - self.num_steps for steps in self.env_steps] -- self.should_update = False -- self.step = self.step - self.num_steps -- -- def compute_returns(self, gamma, gae_lambda): -+ def compute_returns(self, gamma, gae_lambda, env_index): - assert self._requires_value_buffers, "Selected strategy does not use compute_rewards." -- self._get_values() -+ self._get_values(env_index) - gae = 0 - for step in reversed(range(self.rewards.size(0), self.num_steps)): - delta = ( -- self.rewards[step] -- + gamma * self.value_preds[step + 1] * self.masks[step + 1] -- - self.value_preds[step] -+ self.rewards[step, env_index] -+ + gamma * self.value_preds[step + 1, env_index] * self.masks[step + 1, env_index] -+ - self.value_preds[step, env_index] - ) -- gae = delta + gamma * gae_lambda * self.masks[step + 1] * gae -- self.returns[step] = gae + self.value_preds[step] -+ gae = delta + gamma * gae_lambda * self.masks[step + 1, env_index] * gae -+ self.returns[step, env_index] = gae + self.value_preds[step, env_index] - - - def null(x): -@@ -252,11 +220,15 @@ class PrioritizedLevelReplay(Curriculum): - else: - return [self._task_sampler.sample() for _ in range(k)] - -- def update_on_step(self, obs, rew, term, trunc, info, env_id: int = None) -> None: -+ def update_on_step(self, task, obs, rew, term, trunc, info, env_id: int = None) -> None: - """ - Update the curriculum with the current step results from the environment. - """ - assert env_id is not None, "env_id must be provided for PLR updates." -+ if env_id >= self._num_processes: -+ warnings.warn(f"Env index {env_id} is greater than the number of processes {self._num_processes}. Using index {env_id % self._num_processes} instead.") -+ env_id = env_id % self._num_processes -+ - # Update rollouts - self._rollouts.insert_at_index( - env_id, -@@ -266,14 +238,22 @@ class PrioritizedLevelReplay(Curriculum): - obs=np.array([obs]), - ) - -+ # Update task sampler -+ if env_id in self._rollouts.ready_buffers: -+ self._update_sampler(env_id) -+ - def update_on_step_batch( -- self, step_results: List[Tuple[Any, int, bool, bool, Dict]], env_id: int = None -+ self, step_results: List[Tuple[int, Any, int, bool, bool, Dict]], env_id: int = None - ) -> None: - """ - Update the curriculum with a batch of step results from the environment. - """ - assert env_id is not None, "env_id must be provided for PLR updates." -- obs, rews, terms, truncs, infos = step_results -+ if env_id >= self._num_processes: -+ warnings.warn(f"Env index {env_id} is greater than the number of processes {self._num_processes}. Using index {env_id % self._num_processes} instead.") -+ env_id = env_id % self._num_processes -+ -+ tasks, obs, rews, terms, truncs, infos = step_results - self._rollouts.insert_at_index( - env_id, - mask=np.logical_not(np.logical_or(terms, truncs)), -@@ -281,25 +261,19 @@ class PrioritizedLevelReplay(Curriculum): - reward=rews, - obs=obs, - steps=len(rews), -+ task=tasks, - ) - -- def update_task_progress(self, task: Any, success_prob: float, env_id: int = None) -> None: -- """ -- Update the curriculum with a task and its success probability upon -- success or failure. -- """ -- assert env_id is not None, "env_id must be provided for PLR updates." -- self._rollouts.insert_at_index( -- env_id, -- task=task, -- ) - # Update task sampler -- if self._rollouts.should_update: -- if self._task_sampler.requires_value_buffers: -- self._rollouts.compute_returns(self._gamma, self._gae_lambda) -- self._task_sampler.update_with_rollouts(self._rollouts) -- self._rollouts.after_update() -- self._task_sampler.after_update() -+ if env_id in self._rollouts.ready_buffers: -+ self._update_sampler(env_id) -+ -+ def _update_sampler(self, env_id): -+ if self._task_sampler.requires_value_buffers: -+ self._rollouts.compute_returns(self._gamma, self._gae_lambda, env_id) -+ self._task_sampler.update_with_rollouts(self._rollouts, env_id) -+ self._rollouts.after_update(env_id) -+ self._task_sampler.after_update() - - def _enumerate_tasks(self, space): - assert isinstance(space, Discrete) or isinstance(space, MultiDiscrete), f"Unsupported task space {space}: Expected Discrete or MultiDiscrete" -@@ -312,10 +286,10 @@ class PrioritizedLevelReplay(Curriculum): - """ - Log the task distribution to the provided tensorboard writer. - """ -- super().log_metrics(writer, step) -+ # super().log_metrics(writer, step) - metrics = self._task_sampler.metrics() - writer.add_scalar("curriculum/proportion_seen", metrics["proportion_seen"], step) - writer.add_scalar("curriculum/score", metrics["score"], step) -- for task in list(self.task_space.tasks)[:10]: -- writer.add_scalar(f"curriculum/task_{task - 1}_score", metrics["task_scores"][task - 1], step) -- writer.add_scalar(f"curriculum/task_{task - 1}_staleness", metrics["task_staleness"][task - 1], step) -+ # for task in list(self.task_space.tasks)[:10]: -+ # writer.add_scalar(f"curriculum/task_{task - 1}_score", metrics["task_scores"][task - 1], step) -+ # writer.add_scalar(f"curriculum/task_{task - 1}_staleness", metrics["task_staleness"][task - 1], step) -diff --git a/syllabus/curricula/plr/task_sampler.py b/syllabus/curricula/plr/task_sampler.py -index 15ad485..c1e97a1 100644 ---- a/syllabus/curricula/plr/task_sampler.py -+++ b/syllabus/curricula/plr/task_sampler.py -@@ -73,7 +73,7 @@ class TaskSampler: - 'Must provide action space to PLR if using "policy_entropy", "least_confidence", or "min_margin" strategies' - ) - -- def update_with_rollouts(self, rollouts): -+ def update_with_rollouts(self, rollouts, actor_id=None): - if self.strategy == "random": - return - -@@ -93,7 +93,7 @@ class TaskSampler: - else: - raise ValueError(f"Unsupported strategy, {self.strategy}") - -- self._update_with_rollouts(rollouts, score_function) -+ self._update_with_rollouts(rollouts, score_function, actor_index=actor_id) - - def update_task_score(self, actor_index, task_idx, score, num_steps): - score = self._partial_update_task_score(actor_index, task_idx, score, num_steps, done=True) -@@ -165,14 +165,15 @@ class TaskSampler: - def requires_value_buffers(self): - return self.strategy in ["gae", "value_l1", "one_step_td_error"] - -- def _update_with_rollouts(self, rollouts, score_function): -+ def _update_with_rollouts(self, rollouts, score_function, actor_index=None): - tasks = rollouts.tasks - if not self.requires_value_buffers: - policy_logits = rollouts.action_log_dist - done = ~(rollouts.masks > 0) - total_steps, num_actors = rollouts.tasks.shape[:2] - -- for actor_index in range(num_actors): -+ actors = [actor_index] if actor_index is not None else range(num_actors) -+ for actor_index in actors: - done_steps = done[:, actor_index].nonzero()[:total_steps, 0] - start_t = 0 - -diff --git a/syllabus/curricula/sequential.py b/syllabus/curricula/sequential.py -index baa1263..ec3b8b0 100644 ---- a/syllabus/curricula/sequential.py -+++ b/syllabus/curricula/sequential.py -@@ -177,9 +177,9 @@ class SequentialCurriculum(Curriculum): - if self.current_curriculum.requires_episode_updates: - self.current_curriculum.update_on_episode(episode_return, episode_len, episode_task, env_id) - -- def update_on_step(self, obs, rew, term, trunc, info, env_id=None): -+ def update_on_step(self, task, obs, rew, term, trunc, info, env_id=None): - if self.current_curriculum.requires_step_updates: -- self.current_curriculum.update_on_step(obs, rew, term, trunc, info, env_id) -+ self.current_curriculum.update_on_step(task, obs, rew, term, trunc, info, env_id) - - def update_on_step_batch(self, step_results, env_id=None): - if self.current_curriculum.requires_step_updates: -diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py b/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py -index a6d469e..b848d69 100644 ---- a/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py -+++ b/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py -@@ -14,6 +14,7 @@ import gym as openai_gym - import gymnasium as gym - import numpy as np - import procgen # noqa: F401 -+from procgen import ProcgenEnv - import torch - import torch.nn as nn - import torch.optim as optim -@@ -21,10 +22,10 @@ from shimmy.openai_gym_compatibility import GymV21CompatibilityV0 - from torch.utils.tensorboard import SummaryWriter - - from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum --from syllabus.curricula import DomainRandomization, LearningProgressCurriculum, CentralizedPrioritizedLevelReplay -+from syllabus.curricula import CentralizedPrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum - from syllabus.examples.models import ProcgenAgent - from syllabus.examples.task_wrappers import ProcgenTaskWrapper --from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize -+from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs - - - def parse_args(): -@@ -46,6 +47,8 @@ def parse_args(): - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="weather to capture videos of the agent performances (check out `videos` folder)") -+ parser.add_argument("--logging-dir", type=str, default=".", -+ help="the base directory for logging and wandb storage.") - - # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="starpilot", -@@ -124,15 +127,15 @@ PROCGEN_RETURN_BOUNDS = { - } - - --def make_env(env_id, seed, curriculum_components=None, start_level=0, num_levels=1): -+def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): - def thunk(): - env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) - env = GymV21CompatibilityV0(env=env) -- env = ProcgenTaskWrapper(env, env_id, seed=seed) -- if curriculum_components is not None: -+ if curriculum is not None: -+ env = ProcgenTaskWrapper(env, env_id, seed=seed) - env = MultiProcessingSyncWrapper( - env, -- curriculum_components, -+ curriculum.get_components(), - update_on_step=False, - task_space=env.task_space, - ) -@@ -147,36 +150,38 @@ def wrap_vecenv(vecenv): - return vecenv - - --def level_replay_evaluate( -+def full_level_replay_evaluate( - env_name, - policy, - num_episodes, - device, -- num_levels=0 -+ num_levels=1 # Not used - ): - policy.eval() -- eval_envs = gym.vector.SyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, task_queue, update_queue, num_levels=num_levels) -- for i in range(1) -- ] -+ -+ eval_envs = ProcgenEnv( -+ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=1, start_level=0, distribution_mode="easy", paint_vel_info=False - ) -+ eval_envs = VecExtractDictObs(eval_envs, "rgb") - eval_envs = wrap_vecenv(eval_envs) - -- eval_episode_rewards = [] -+ # Seed environments -+ seeds = [int.from_bytes(os.urandom(3), byteorder="little") for _ in range(num_episodes)] -+ for i, seed in enumerate(seeds): -+ eval_envs.seed(seed, i) -+ - eval_obs, _ = eval_envs.reset() -+ eval_episode_rewards = [-1] * num_episodes - -- while len(eval_episode_rewards) < num_episodes: -+ while -1 in eval_episode_rewards: - with torch.no_grad(): - eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) - -- eval_obs, _, truncs, terms, infos = eval_envs.step(np.array([eval_action.cpu().numpy()])) -- -- for info in infos: -- if 'episode' in info.keys(): -- eval_episode_rewards.append(info['episode']['r']) -+ eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) -+ for i, info in enumerate(infos): -+ if 'episode' in info.keys() and eval_episode_rewards[i] == -1: -+ eval_episode_rewards[i] = info['episode']['r'] - -- eval_envs.close() - mean_returns = np.mean(eval_episode_rewards) - stddev_returns = np.std(eval_episode_rewards) - env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] -@@ -185,8 +190,7 @@ def level_replay_evaluate( - return mean_returns, stddev_returns, normalized_mean_returns - - --def fast_level_replay_evaluate( -- eval_envs, -+def level_replay_evaluate( - env_name, - policy, - num_episodes, -@@ -194,9 +198,13 @@ def fast_level_replay_evaluate( - num_levels=0 - ): - policy.eval() -- possible_seeds = np.arange(0, num_levels + 1) -- eval_obs, _ = eval_envs.reset(seed=list(np.random.choice(possible_seeds, size=num_episodes))) - -+ eval_envs = ProcgenEnv( -+ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False -+ ) -+ eval_envs = VecExtractDictObs(eval_envs, "rgb") -+ eval_envs = wrap_vecenv(eval_envs) -+ eval_obs, _ = eval_envs.reset() - eval_episode_rewards = [-1] * num_episodes - - while -1 in eval_episode_rewards: -@@ -231,10 +239,11 @@ if __name__ == "__main__": - name=run_name, - monitor_gym=True, - save_code=True, -- # dir="/fs/nexus-scratch/rsulli/" -+ dir=args.logging_dir - ) -- wandb.run.log_code("./syllabus/examples") -- writer = SummaryWriter(f"./runs/{run_name}") -+ # wandb.run.log_code("./syllabus/examples") -+ -+ writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) - writer.add_text( - "hyperparameters", - "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), -@@ -250,7 +259,7 @@ if __name__ == "__main__": - print("Device:", device) - - # Curriculum setup -- task_queue = update_queue = None -+ curriculum = None - if args.curriculum: - sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") - sample_env = GymV21CompatibilityV0(env=sample_env) -@@ -273,6 +282,16 @@ if __name__ == "__main__": - elif args.curriculum_method == "lp": - print("Using learning progress.") - curriculum = LearningProgressCurriculum(sample_env.task_space) -+ elif args.curriculum_method == "sq": -+ print("Using sequential curriculum.") -+ curricula = [] -+ stopping = [] -+ for i in range(199): -+ curricula.append(i + 1) -+ stopping.append("steps>=50000") -+ curricula.append(list(range(i + 1))) -+ stopping.append("steps>=50000") -+ curriculum = SequentialCurriculum(curricula, stopping[:-1], sample_env.task_space) - else: - raise ValueError(f"Unknown curriculum method {args.curriculum_method}") - curriculum = make_multiprocessing_curriculum(curriculum) -@@ -285,7 +304,7 @@ if __name__ == "__main__": - make_env( - args.env_id, - args.seed + i, -- curriculum_components=curriculum.get_components() if args.curriculum else None, -+ curriculum=curriculum if args.curriculum else None, - num_levels=1 if args.curriculum else 0 - ) - for i in range(args.num_envs) -@@ -293,22 +312,6 @@ if __name__ == "__main__": - ) - envs = wrap_vecenv(envs) - -- test_eval_envs = gym.vector.AsyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, num_levels=0) -- for i in range(args.num_eval_episodes) -- ] -- ) -- test_eval_envs = wrap_vecenv(test_eval_envs) -- -- train_eval_envs = gym.vector.AsyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, num_levels=200) -- for i in range(args.num_eval_episodes) -- ] -- ) -- train_eval_envs = wrap_vecenv(train_eval_envs) -- - assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" - print("Creating agent") - agent = ProcgenAgent( -@@ -369,6 +372,8 @@ if __name__ == "__main__": - print(f"global_step={global_step}, episodic_return={item['episode']['r']}") - writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) -+ if curriculum is not None: -+ curriculum.log_metrics(writer, global_step) - break - - # Syllabus curriculum update -@@ -388,8 +393,6 @@ if __name__ == "__main__": - }, - } - curriculum.update(update) -- #if args.curriculum: -- # curriculum.log_metrics(writer, global_step) - - # bootstrap value if not done - with torch.no_grad(): -@@ -487,8 +490,18 @@ if __name__ == "__main__": - explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y - - # Evaluate agent -- mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = fast_level_replay_evaluate(test_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=0) -- mean_train_returns, stddev_train_returns, normalized_mean_train_returns = fast_level_replay_evaluate(train_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=200) -+ mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 -+ ) -+ full_mean_eval_returns, full_stddev_eval_returns, full_normalized_mean_eval_returns = full_level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 -+ ) -+ mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 -+ ) -+ full_mean_train_returns, full_stddev_train_returns, full_normalized_mean_train_returns = full_level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 -+ ) - - # TRY NOT TO MODIFY: record rewards for plotting purposes - writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) -@@ -502,12 +515,21 @@ if __name__ == "__main__": - writer.add_scalar("losses/explained_variance", explained_var, global_step) - print("SPS:", int(global_step / (time.time() - start_time))) - writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) -+ - writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) - writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) -- writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/stddev_eval_return", stddev_eval_returns, global_step) -+ writer.add_scalar("test_eval/full_mean_episode_return", full_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/full_normalized_mean_eval_return", full_normalized_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/full_stddev_eval_return", full_stddev_eval_returns, global_step) -+ - writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) - writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) -- writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) -+ writer.add_scalar("train_eval/stddev_train_return", stddev_train_returns, global_step) -+ writer.add_scalar("train_eval/full_mean_episode_return", full_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/full_normalized_mean_train_return", full_normalized_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/full_stddev_train_return", full_stddev_train_returns, global_step) -+ - writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) - - envs.close() -diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -index e13c22e..b807304 100644 ---- a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -+++ b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -@@ -14,6 +14,7 @@ import gym as openai_gym - import gymnasium as gym - import numpy as np - import procgen # noqa: F401 -+from procgen import ProcgenEnv - import torch - import torch.nn as nn - import torch.optim as optim -@@ -24,7 +25,7 @@ from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curri - from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum - from syllabus.examples.models import ProcgenAgent - from syllabus.examples.task_wrappers import ProcgenTaskWrapper --from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize -+from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs - - - def parse_args(): -@@ -126,18 +127,17 @@ PROCGEN_RETURN_BOUNDS = { - } - - --def make_env(env_id, seed, curriculum_components=None, start_level=0, num_levels=1): -+def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): - def thunk(): - env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) - env = GymV21CompatibilityV0(env=env) -- env = ProcgenTaskWrapper(env, env_id, seed=seed) -- if curriculum_components is not None: -+ if curriculum is not None: -+ env = ProcgenTaskWrapper(env, env_id, seed=seed) - env = MultiProcessingSyncWrapper( - env, -- curriculum_components, -+ curriculum.get_components(), - update_on_step=False, - task_space=env.task_space, -- buffer_size=4, - ) - return env - return thunk -@@ -150,7 +150,7 @@ def wrap_vecenv(vecenv): - return vecenv - - --def level_replay_evaluate( -+def slow_level_replay_evaluate( - env_name, - policy, - num_episodes, -@@ -158,28 +158,24 @@ def level_replay_evaluate( - num_levels=0 - ): - policy.eval() -- eval_envs = gym.vector.SyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, task_queue, update_queue, num_levels=num_levels) -- for i in range(1) -- ] -+ -+ eval_envs = ProcgenEnv( -+ num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False - ) -+ eval_envs = VecExtractDictObs(eval_envs, "rgb") - eval_envs = wrap_vecenv(eval_envs) -- -- eval_episode_rewards = [] - eval_obs, _ = eval_envs.reset() -+ eval_episode_rewards = [] - - while len(eval_episode_rewards) < num_episodes: - with torch.no_grad(): - eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) - -- eval_obs, _, truncs, terms, infos = eval_envs.step(np.array([eval_action.cpu().numpy()])) -- -- for info in infos: -+ eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) -+ for i, info in enumerate(infos): - if 'episode' in info.keys(): - eval_episode_rewards.append(info['episode']['r']) - -- eval_envs.close() - mean_returns = np.mean(eval_episode_rewards) - stddev_returns = np.std(eval_episode_rewards) - env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] -@@ -188,8 +184,7 @@ def level_replay_evaluate( - return mean_returns, stddev_returns, normalized_mean_returns - - --def fast_level_replay_evaluate( -- eval_envs, -+def level_replay_evaluate( - env_name, - policy, - num_episodes, -@@ -198,15 +193,12 @@ def fast_level_replay_evaluate( - ): - policy.eval() - -- # Choose evaluation seeds -- if num_levels == 0: -- seeds = np.random.randint(0, 2 ** 16 - 1, size=num_episodes) -- else: -- seeds = np.random.choice(np.arange(0, num_levels), size=num_episodes) -- -- seed_envs = [(int(seed), env) for seed, env in zip(seeds, range(num_episodes))] -- eval_obs, _ = eval_envs.reset(seed=seed_envs) -- -+ eval_envs = ProcgenEnv( -+ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False -+ ) -+ eval_envs = VecExtractDictObs(eval_envs, "rgb") -+ eval_envs = wrap_vecenv(eval_envs) -+ eval_obs, _ = eval_envs.reset() - eval_episode_rewards = [-1] * num_episodes - - while -1 in eval_episode_rewards: -@@ -251,9 +243,9 @@ if __name__ == "__main__": - save_code=True, - dir=args.logging_dir - ) -- wandb.run.log_code(os.path.join(args.logging_dir, "/syllabus/examples")) -+ # wandb.run.log_code("./syllabus/examples") - -- writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) -+ writer = SummaryWriter(os.path.join(args.logging_dir, f"./runs/{run_name}")) - writer.add_text( - "hyperparameters", - "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), -@@ -273,7 +265,9 @@ if __name__ == "__main__": - if args.curriculum: - sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") - sample_env = GymV21CompatibilityV0(env=sample_env) -- sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) -+ # code to edit -+ # sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) -+ sample_env = MinigridTaskWrapper(sample_env, args.env_id, seed=args.seed) - - # Intialize Curriculum Method - if args.curriculum_method == "plr": -@@ -316,7 +310,7 @@ if __name__ == "__main__": - make_env( - args.env_id, - args.seed + i, -- curriculum_components=curriculum.get_components() if args.curriculum else None, -+ curriculum=curriculum if args.curriculum else None, - num_levels=1 if args.curriculum else 0 - ) - for i in range(args.num_envs) -@@ -324,22 +318,6 @@ if __name__ == "__main__": - ) - envs = wrap_vecenv(envs) - -- test_eval_envs = gym.vector.AsyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, num_levels=0) -- for i in range(args.num_eval_episodes) -- ] -- ) -- test_eval_envs = wrap_vecenv(test_eval_envs) -- -- train_eval_envs = gym.vector.AsyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, num_levels=200) -- for i in range(args.num_eval_episodes) -- ] -- ) -- train_eval_envs = wrap_vecenv(train_eval_envs) -- - assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" - print("Creating agent") - agent = ProcgenAgent( -@@ -500,8 +478,18 @@ if __name__ == "__main__": - explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y - - # Evaluate agent -- mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = fast_level_replay_evaluate(test_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=0) -- mean_train_returns, stddev_train_returns, normalized_mean_train_returns = fast_level_replay_evaluate(train_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=200) -+ mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 -+ ) -+ slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 -+ ) -+ mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 -+ ) -+ slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 -+ ) - - # TRY NOT TO MODIFY: record rewards for plotting purposes - writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) -@@ -515,12 +503,21 @@ if __name__ == "__main__": - writer.add_scalar("losses/explained_variance", explained_var, global_step) - print("SPS:", int(global_step / (time.time() - start_time))) - writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) -+ - writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) - writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) - writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) -+ - writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) - writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) - writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) -+ - writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) - - envs.close() -diff --git a/syllabus/examples/utils/vecenv.py b/syllabus/examples/utils/vecenv.py -index 6e5a0a9..af3b187 100644 ---- a/syllabus/examples/utils/vecenv.py -+++ b/syllabus/examples/utils/vecenv.py -@@ -1,7 +1,6 @@ - import time - from collections import deque - --import gym - import numpy as np - - -@@ -154,12 +153,20 @@ class VecEnvObservationWrapper(VecEnvWrapper): - pass - - def reset(self): -- obs, infos = self.venv.reset() -+ outputs = self.venv.reset() -+ if len(outputs) == 2: -+ obs, infos = outputs -+ else: -+ obs, infos = outputs, {} - return self.process(obs), infos - - def step_wait(self): -- print(self.venv) -- obs, rews, terms, truncs, infos = self.venv.step_wait() -+ env_outputs = self.venv.step_wait() -+ if len(env_outputs) == 4: -+ obs, rews, terms, infos = env_outputs -+ truncs = np.zeros_like(terms) -+ else: -+ obs, rews, terms, truncs, infos = env_outputs - return self.process(obs), rews, terms, truncs, infos - - -@@ -209,7 +216,10 @@ class VecNormalize(VecEnvWrapper): - - def reset(self, seed=None): - self.ret = np.zeros(self.num_envs) -- obs, infos = self.venv.reset(seed=seed) -+ if seed is not None: -+ obs, infos = self.venv.reset(seed=seed) -+ else: -+ obs, infos = self.venv.reset() - return self._obfilt(obs), infos - - -@@ -228,7 +238,10 @@ class VecMonitor(VecEnvWrapper): - self.eplen_buf = deque([], maxlen=keep_buf) - - def reset(self, seed=None): -- obs, infos = self.venv.reset(seed=seed) -+ if seed is not None: -+ obs, infos = self.venv.reset(seed=seed) -+ else: -+ obs, infos = self.venv.reset() - self.eprets = np.zeros(self.num_envs, 'f') - self.eplens = np.zeros(self.num_envs, 'i') - return obs, infos -@@ -239,7 +252,8 @@ class VecMonitor(VecEnvWrapper): - self.eprets += rews - self.eplens += 1 - # Convert dict of lists to list of dicts -- infos = [dict(zip(infos, t)) for t in zip(*infos.values())] -+ if isinstance(infos, dict): -+ infos = [dict(zip(infos, t)) for t in zip(*infos.values())] - newinfos = list(infos[:]) - for i in range(len(dones)): - if dones[i]: -diff --git a/syllabus/task_space/task_space.py b/syllabus/task_space/task_space.py -index 316e2f2..1ef674b 100644 ---- a/syllabus/task_space/task_space.py -+++ b/syllabus/task_space/task_space.py -@@ -7,20 +7,53 @@ from gymnasium.spaces import Box, Dict, Discrete, MultiBinary, MultiDiscrete, Sp - - class TaskSpace(): - def __init__(self, gym_space: Union[Space, int], tasks=None): -- if isinstance(gym_space, int): -- # Syntactic sugar for discrete space -- gym_space = Discrete(gym_space) -+ -+ if not isinstance(gym_space, Space): -+ gym_space = self._create_gym_space(gym_space) - - self.gym_space = gym_space - -- # Autogenerate task names for discrete spaces -- if isinstance(gym_space, Discrete): -- if tasks is None: -- tasks = range(gym_space.n) -+ # Autogenerate task names -+ if tasks is None: -+ tasks = self._generate_task_names(gym_space) - - self._tasks = set(tasks) if tasks is not None else None - self._encoder, self._decoder = self._make_task_encoder(gym_space, tasks) - -+ def _create_gym_space(self, gym_space): -+ if isinstance(gym_space, int): -+ # Syntactic sugar for discrete space -+ gym_space = Discrete(gym_space) -+ elif isinstance(gym_space, tuple): -+ # Syntactic sugar for discrete space -+ gym_space = MultiDiscrete(gym_space) -+ elif isinstance(gym_space, list): -+ # Syntactic sugar for tuple space -+ spaces = [] -+ for i, value in enumerate(gym_space): -+ spaces[i] = self._create_gym_space(value) -+ gym_space = Tuple(spaces) -+ elif isinstance(gym_space, dict): -+ # Syntactic sugar for dict space -+ spaces = {} -+ for key, value in gym_space.items(): -+ spaces[key] = self._create_gym_space(value) -+ gym_space = Dict(spaces) -+ return gym_space -+ -+ def _generate_task_names(self, gym_space): -+ if isinstance(gym_space, Discrete): -+ tasks = tuple(range(gym_space.n)) -+ elif isinstance(gym_space, MultiDiscrete): -+ tasks = [tuple(range(dim)) for dim in gym_space.nvec] -+ elif isinstance(gym_space, Tuple): -+ tasks = [self._generate_task_names(value) for value in gym_space.spaces] -+ elif isinstance(gym_space, Dict): -+ tasks = {key: tuple(self._generate_task_names(value)) for key, value in gym_space.spaces.items()} -+ else: -+ tasks = None -+ return tasks -+ - def _make_task_encoder(self, space, tasks): - if isinstance(space, Discrete): - assert space.n == len(tasks), f"Number of tasks ({space.n}) must match number of discrete options ({len(tasks)})" -@@ -28,14 +61,46 @@ class TaskSpace(): - self._decode_map = {i: task for i, task in enumerate(tasks)} - encoder = lambda task: self._encode_map[task] if task in self._encode_map else None - decoder = lambda task: self._decode_map[task] if task in self._decode_map else None -+ -+ elif isinstance(space, Box): -+ encoder = lambda task: task if space.contains(np.asarray(task, dtype=space.dtype)) else None -+ decoder = lambda task: task if space.contains(np.asarray(task, dtype=space.dtype)) else None - elif isinstance(space, Tuple): -- for i, task in enumerate(tasks): -- assert self.count_tasks(space.spaces[i]) == len(task), "Each task must have number of components equal to Tuple space length. Got {len(task)} components and space length {self.count_tasks(space.spaces[i])}." -+ -+ assert len(space.spaces) == len(tasks), f"Number of task ({len(space.spaces)})must match options in Tuple ({len(tasks)})" - results = [list(self._make_task_encoder(s, t)) for (s, t) in zip(space.spaces, tasks)] - encoders = [r[0] for r in results] - decoders = [r[1] for r in results] - encoder = lambda task: [e(t) for e, t in zip(encoders, task)] - decoder = lambda task: [d(t) for d, t in zip(decoders, task)] -+ -+ elif isinstance(space, MultiDiscrete): -+ assert len(space.nvec) == len(tasks), f"Number of steps in a tasks ({len(space.nvec)}) must match number of discrete options ({len(tasks)})" -+ -+ combinations = [p for p in itertools.product(*tasks)] -+ encode_map = {task: i for i, task in enumerate(combinations)} -+ decode_map = {i: task for i, task in enumerate(combinations)} -+ -+ encoder = lambda task: encode_map[task] if task in encode_map else None -+ decoder = lambda task: decode_map[task] if task in decode_map else None -+ -+ elif isinstance(space, Dict): -+ -+ def helper(task, spaces, tasks, action="encode"): -+ # Iteratively encodes or decodes each space in the dictionary -+ output = {} -+ if (isinstance(spaces, dict) or isinstance(spaces, Dict)): -+ for key, value in spaces.items(): -+ if (isinstance(value, dict) or isinstance(value, Dict)): -+ temp = helper(task[key], value, tasks[key], action) -+ output.update({key: temp}) -+ else: -+ encoder, decoder = self._make_task_encoder(value, tasks[key]) -+ output[key] = encoder(task[key]) if action == "encode" else decoder(task[key]) -+ return output -+ -+ encoder = lambda task: helper(task, space.spaces, tasks, "encode") -+ decoder = lambda task: helper(task, space.spaces, tasks, "decode") - else: - encoder = lambda task: task - decoder = lambda task: task -@@ -152,6 +217,7 @@ class TaskSpace(): - return Discrete(self.gym_space.n + amount) - - def sample(self): -+ assert isinstance(self.gym_space, Discrete) or isinstance(self.gym_space, Box) or isinstance(self.gym_space, Dict) or isinstance(self.gym_space, Tuple) - return self.decode(self.gym_space.sample()) - - def list_tasks(self): -diff --git a/syllabus/task_space/test_task_space.py b/syllabus/task_space/test_task_space.py -index 0ec6b4e..109d0a7 100644 ---- a/syllabus/task_space/test_task_space.py -+++ b/syllabus/task_space/test_task_space.py -@@ -2,33 +2,148 @@ import gymnasium as gym - from syllabus.task_space import TaskSpace - - if __name__ == "__main__": -+ # Discrete Tests - task_space = TaskSpace(gym.spaces.Discrete(3), ["a", "b", "c"]) -+ - assert task_space.encode("a") == 0, f"Expected 0, got {task_space.encode('a')}" - assert task_space.encode("b") == 1, f"Expected 1, got {task_space.encode('b')}" - assert task_space.encode("c") == 2, f"Expected 2, got {task_space.encode('c')}" -- assert task_space.encode("d") == None, f"Expected None, got {task_space.encode('d')}" -+ assert task_space.encode("d") is None, f"Expected None, got {task_space.encode('d')}" - - assert task_space.decode(0) == "a", f"Expected a, got {task_space.decode(0)}" - assert task_space.decode(1) == "b", f"Expected b, got {task_space.decode(1)}" - assert task_space.decode(2) == "c", f"Expected c, got {task_space.decode(2)}" -- assert task_space.decode(3) == None, f"Expected None, got {task_space.decode(3)}" -+ assert task_space.decode(3) is None, f"Expected None, got {task_space.decode(3)}" - print("Discrete tests passed!") - -+ # MultiDiscrete Tests -+ task_space = TaskSpace(gym.spaces.MultiDiscrete([3, 2]), [("a", "b", "c"), (1, 0)]) -+ -+ assert task_space.encode(('a', 1)) == 0, f"Expected 0, got {task_space.encode(('a', 1))}" -+ assert task_space.encode(('b', 0)) == 3, f"Expected 3, got {task_space.encode(('b', 0))}" -+ assert task_space.encode(('c', 1)) == 4, f"Expected 4, got {task_space.encode(('c', 1))}" -+ -+ assert task_space.decode(3) == ('b', 0), f"Expected ('b', 0), got {task_space.decode(3)}" -+ assert task_space.decode(5) == ('c', 0), f"Expected ('c', 0), got {task_space.decode(5)}" -+ print("MultiDiscrete tests passed!") -+ -+ # Box Tests - task_space = TaskSpace(gym.spaces.Box(low=0, high=1, shape=(2,)), [(0, 0), (0, 1), (1, 0), (1, 1)]) -+ - assert task_space.encode([0.0, 0.0]) == [0.0, 0.0], f"Expected [0.0, 0.0], got {task_space.encode([0.0, 0.0])}" - assert task_space.encode([0.0, 0.1]) == [0.0, 0.1], f"Expected [0.0, 0.1], got {task_space.encode([0.0, 0.1])}" - assert task_space.encode([0.1, 0.1]) == [0.1, 0.1], f"Expected [0.1, 0.1], got {task_space.encode([0.1, 0.1])}" - assert task_space.encode([1.0, 0.1]) == [1.0, 0.1], f"Expected [1.0, 0.1], got {task_space.encode([1.0, 0.1])}" - assert task_space.encode([1.0, 1.0]) == [1.0, 1.0], f"Expected [1.0, 1.0], got {task_space.encode([1.0, 1.0])}" -- assert task_space.encode([1.2, 1.0]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" -- assert task_space.encode([1.0, 1.2]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" -- assert task_space.encode([-0.1, 1.0]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" -+ assert task_space.encode([1.2, 1.0]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" -+ assert task_space.encode([1.0, 1.2]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" -+ assert task_space.encode([-0.1, 1.0]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" - - assert task_space.decode([1.0, 1.0]) == [1.0, 1.0], f"Expected [1.0, 1.0], got {task_space.decode([1.0, 1.0])}" - assert task_space.decode([0.1, 0.1]) == [0.1, 0.1], f"Expected [0.1, 0.1], got {task_space.decode([0.1, 0.1])}" -- assert task_space.decode([-0.1, 1.0]) == None, f"Expected None, got {task_space.decode([1.2, 1.0])}" -+ assert task_space.decode([-0.1, 1.0]) is None, f"Expected None, got {task_space.decode([1.2, 1.0])}" - print("Box tests passed!") - -+ # Tuple Tests -+ task_spaces = (gym.spaces.MultiDiscrete([3, 2]), gym.spaces.Discrete(3)) -+ task_names = ((("a", "b", "c"), (1, 0)), ("X", "Y", "Z")) -+ task_space = TaskSpace(gym.spaces.Tuple(task_spaces), task_names) -+ -+ assert task_space.encode((('a', 0), 'Y')) == [1, 1], f"Expected 0, got {task_space.encode((('a', 1),'Y'))}" -+ assert task_space.decode([0, 1]) == [('a', 1), 'Y'], f"Expected 0, got {task_space.decode([0, 1])}" -+ print("Tuple tests passed!") -+ -+ # Dictionary Tests -+ task_spaces = gym.spaces.Dict({ -+ "ext_controller": gym.spaces.MultiDiscrete([5, 2, 2]), -+ "inner_state": gym.spaces.Dict( -+ { -+ "charge": gym.spaces.Discrete(10), -+ "system_checks": gym.spaces.Tuple((gym.spaces.MultiDiscrete([3, 2]), gym.spaces.Discrete(3))), -+ "job_status": gym.spaces.Dict( -+ { -+ "task": gym.spaces.Discrete(5), -+ "progress": gym.spaces.Box(low=0, high=1, shape=(2,)), -+ } -+ ), -+ } -+ ), -+ }) -+ task_names = { -+ "ext_controller": [("a", "b", "c", "d", "e"), (1, 0), ("X", "Y")], -+ "inner_state": { -+ "charge": [0, 1, 13, 3, 94, 35, 6, 37, 8, 9], -+ "system_checks": ((("a", "b", "c"), (1, 0)), ("X", "Y", "Z")), -+ "job_status": { -+ "task": ["A", "B", "C", "D", "E"], -+ "progress": [(0, 0), (0, 1), (1, 0), (1, 1)], -+ } -+ } -+ } -+ task_space = TaskSpace(task_spaces, task_names) -+ -+ test_val = { -+ "ext_controller": ('b', 1, 'X'), -+ 'inner_state': { -+ 'charge': 1, -+ 'system_checks': [('a', 0), 'Y'], -+ 'job_status': {'task': 'C', 'progress': [0.0, 0.0]} -+ } -+ } -+ decode_val = { -+ "ext_controller": 4, -+ "inner_state": { -+ "charge": 1, -+ "system_checks": [1, 1], -+ "job_status": {"progress": [0.0, 0.0], "task": 2}, -+ }, -+ } -+ -+ assert task_space.encode(test_val) == decode_val, f"Expected {decode_val}, \n but got {task_space.encode(test_val)}" -+ assert task_space.decode(decode_val) == test_val, f"Expected {test_val}, \n but got {task_space.decode(decode_val)}" -+ -+ test_val_2 = { -+ "ext_controller": ("e", 1, "Y"), -+ "inner_state": { -+ "charge": 37, -+ "system_checks": [("b", 0), "Z"], -+ "job_status": {"progress": [0.0, 0.1], "task": "D"}, -+ }, -+ } -+ decode_val_2 = { -+ "ext_controller": 17, -+ "inner_state": { -+ "charge": 7, -+ "system_checks": [3, 2], -+ "job_status": {"progress": [0.0, 0.1], "task": 3}, -+ }, -+ } -+ -+ assert task_space.encode(test_val_2) == decode_val_2, f"Expected {decode_val_2}, \n but got {task_space.encode(test_val_2)}" -+ assert task_space.decode(decode_val_2) == test_val_2, f"Expected {test_val_2}, \n but got {task_space.decode(decode_val_2)}" -+ -+ test_val_3 = { -+ "ext_controller": ("e", 1, "X"), -+ "inner_state": { -+ "charge": 8, -+ "system_checks": [("c", 0), "X"], -+ "job_status": {"progress": [0.5, 0.1], "task": "E"}, -+ }, -+ } -+ decode_val_3 = { -+ "ext_controller": 16, -+ "inner_state": { -+ "charge": 8, -+ "system_checks": [5, 0], -+ "job_status": {"progress": [0.5, 0.1], "task": 4}, -+ }, -+ } -+ -+ assert task_space.encode(test_val_3) == decode_val_3, f"Expected {decode_val_3}, \n but got {task_space.encode(test_val_3)}" -+ assert task_space.decode(decode_val_3) == test_val_3, f"Expected {test_val_3}, \n but got {task_space.decode(decode_val_3)}" -+ -+ print("Dictionary tests passed!") -+ - # Test syntactic sugar - task_space = TaskSpace(3) - assert task_space.encode(0) == 0, f"Expected 0, got {task_space.encode(0)}" -@@ -36,4 +151,32 @@ if __name__ == "__main__": - assert task_space.encode(2) == 2, f"Expected 2, got {task_space.encode(2)}" - assert task_space.encode(3) is None, f"Expected None, got {task_space.encode(3)}" - -+ task_space = TaskSpace((2, 4)) -+ assert task_space.encode((0, 0)) == 0, f"Expected 0, got {task_space.encode((0, 0))}" -+ assert task_space.encode((0, 1)) == 1, f"Expected 1, got {task_space.encode((0, 1))}" -+ assert task_space.encode((1, 0)) == 4, f"Expected 2, got {task_space.encode((1, 0))}" -+ assert task_space.encode((3, 3)) is None, f"Expected None, got {task_space.encode((3, 3))}" -+ -+ task_space = TaskSpace((2, 4)) -+ assert task_space.encode((0, 0)) == 0, f"Expected 0, got {task_space.encode((0, 0))}" -+ assert task_space.encode((0, 1)) == 1, f"Expected 1, got {task_space.encode((0, 1))}" -+ assert task_space.encode((1, 0)) == 4, f"Expected 2, got {task_space.encode((1, 0))}" -+ assert task_space.encode((3, 3)) is None, f"Expected None, got {task_space.encode((3, 3))}" -+ -+ task_space = TaskSpace({"map": 5, "level": (4, 10), "difficulty": 3}) -+ -+ encoding = task_space.encode({"map": 0, "level": (0, 0), "difficulty": 0}) -+ expected = {"map": 0, "level": 0, "difficulty": 0} -+ -+ encoding = task_space.encode({"map": 4, "level": (3, 9), "difficulty": 2}) -+ expected = {"map": 4, "level": 39, "difficulty": 2} -+ assert encoding == expected, f"Expected {expected}, got {encoding}" -+ -+ encoding = task_space.encode({"map": 2, "level": (2, 0), "difficulty": 1}) -+ expected = {"map": 2, "level": 20, "difficulty": 1} -+ assert encoding == expected, f"Expected {expected}, got {encoding}" -+ -+ encoding = task_space.encode({"map": 5, "level": (2, 11), "difficulty": -1}) -+ expected = {"map": None, "level": None, "difficulty": None} -+ assert encoding == expected, f"Expected {expected}, got {encoding}" - print("All tests passed!") -diff --git a/syllabus/tests/utils.py b/syllabus/tests/utils.py -index 314a29c..98bac82 100644 ---- a/syllabus/tests/utils.py -+++ b/syllabus/tests/utils.py -@@ -57,7 +57,7 @@ def run_episode(env, new_task=None, curriculum=None, env_id=0): - action = env.action_space.sample() - obs, rew, term, trunc, info = env.step(action) - if curriculum and curriculum.requires_step_updates: -- curriculum.update_on_step(obs, rew, term, trunc, info, env_id=env_id) -+ curriculum.update_on_step(env.task_space.encode(env.task), obs, rew, term, trunc, info, env_id=env_id) - curriculum.update_task_progress(env.task_space.encode(env.task), info["task_completion"], env_id=env_id) - ep_rew += rew - ep_len += 1 -@@ -87,7 +87,7 @@ def run_set_length(env, curriculum=None, episodes=None, steps=None, env_id=0, en - action = env.action_space.sample() - obs, rew, term, trunc, info = env.step(action) - if curriculum and curriculum.requires_step_updates: -- curriculum.update_on_step(obs, rew, term, trunc, info, env_id=env_id) -+ curriculum.update_on_step(env.task_space.encode(env.task), obs, rew, term, trunc, info, env_id=env_id) - curriculum.update_task_progress(env.task_space.encode(env.task), info["task_completion"], env_id=env_id) - ep_rew += rew - ep_len += 1 -diff --git a/tests/multiprocessing_smoke_tests.py b/tests/multiprocessing_smoke_tests.py -index 9db9f47..b788179 100644 ---- a/tests/multiprocessing_smoke_tests.py -+++ b/tests/multiprocessing_smoke_tests.py -@@ -21,23 +21,23 @@ nethack_env = create_nethack_env() - cartpole_env = create_cartpole_env() - - curricula = [ -- (NoopCurriculum, create_nethack_env, (NetHackScore, nethack_env.task_space), {}), -- (DomainRandomization, create_nethack_env, (nethack_env.task_space,), {}), -- # (LearningProgressCurriculum, create_nethack_env, (nethack_env.task_space,), {}), -- (CentralizedPrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space,), {"device": "cpu", "suppress_usage_warnings": True, "num_processes": N_ENVS}), -- (PrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space, nethack_env.observation_space), { -- "get_value": get_test_values, -- "device": "cpu", -- "num_processes": N_ENVS, -- "num_steps": 2048 -- }), -- (SimpleBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), {}), -- (AnnealingBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), { -- 'start_values': [-0.02, 0.02], -- 'end_values': [-0.3, 0.3], -- 'total_steps': [10] -- }), -- (SequentialCurriculum, create_nethack_env, ([CentralizedPrioritizedLevelReplay(nethack_env.task_space, device="cpu", suppress_usage_warnings=True, num_processes=N_ENVS), PrioritizedLevelReplay(nethack_env.task_space, nethack_env.observation_space, get_value=get_test_values, device="cpu", num_processes=N_ENVS, num_steps=2048), NetHackScore, [NetHackScout, NetHackStaircase]], ["steps>1000", "episodes>=50", "tasks>20"], nethack_env.task_space), {}), -+ (NoopCurriculum, create_nethack_env, (NetHackScore, nethack_env.task_space), {}), -+ (DomainRandomization, create_nethack_env, (nethack_env.task_space,), {}), -+ # (LearningProgressCurriculum, create_nethack_env, (nethack_env.task_space,), {}), -+ (CentralizedPrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space,), {"device": "cpu", "suppress_usage_warnings": True, "num_processes": N_ENVS}), -+ (PrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space, nethack_env.observation_space), { -+ "get_value": get_test_values, -+ "device": "cpu", -+ "num_processes": N_ENVS, -+ "num_steps": 2048 -+ }), -+ (SimpleBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), {}), -+ (AnnealingBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), { -+ 'start_values': [-0.02, 0.02], -+ 'end_values': [-0.3, 0.3], -+ 'total_steps': [10] -+ }), -+ (SequentialCurriculum, create_nethack_env, ([CentralizedPrioritizedLevelReplay(nethack_env.task_space, device="cpu", suppress_usage_warnings=True, num_processes=N_ENVS), PrioritizedLevelReplay(nethack_env.task_space, nethack_env.observation_space, get_value=get_test_values, device="cpu", num_processes=N_ENVS, num_steps=2048), NetHackScore, [NetHackScout, NetHackStaircase]], ["steps>1000", "episodes>=50", "tasks>20"], nethack_env.task_space), {}), - ] - - test_names = [curriculum_args[0].__name__ for curriculum_args in curricula] diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/wandb-metadata.json b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/wandb-metadata.json deleted file mode 100644 index 8d950c03..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/wandb-metadata.json +++ /dev/null @@ -1,167 +0,0 @@ -{ - "os": "Linux-3.10.0-1160.11.1.el7.x86_64-x86_64-with-glibc2.10", - "python": "3.8.5", - "heartbeatAt": "2024-04-23T04:09:57.173591", - "startedAt": "2024-04-23T04:09:56.534155", - "docker": null, - "cuda": "10.1.243", - "args": [ - "--curriculum", - "True", - "--track", - "True", - "--env-id", - "bigfish" - ], - "state": "running", - "program": "cleanrl_procgen_plr.py", - "codePathLocal": "cleanrl_procgen_plr.py", - "codePath": "syllabus/examples/training_scripts/cleanrl_procgen_plr.py", - "git": { - "remote": "https://github.com/RoseyGreenBlue/Syllabus.git", - "commit": "63dc8f62e4d9d567eb92bb2f6c2bb186a0dc8ffb" - }, - "email": "djhaayusv04@gmail.com", - "root": "/data/averma/MARL/Syllabus", - "host": "f411843fc70b", - "username": "root", - "executable": "/home/user/miniconda/envs/test2_py/bin/python", - "cpu_count": 12, - "cpu_count_logical": 24, - "cpu_freq": { - "current": 1261.3982916666669, - "min": 1200.0, - "max": 3700.0 - }, - "cpu_freq_per_core": [ - { - "current": 1200.292, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.085, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.292, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1281.64, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1260.473, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.5, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1316.503, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1301.354, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1271.057, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1244.287, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1199.877, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1398.474, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1242.834, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1258.605, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1210.668, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1446.826, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1216.687, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1211.083, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1312.976, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1207.971, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1266.699, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1577.355, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1244.494, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1237.023, - "min": 1200.0, - "max": 3700.0 - } - ], - "disk": { - "/": { - "total": 5952.626953125, - "used": 988.7801742553711 - } - }, - "memory": { - "total": 251.63711166381836 - } -} diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/wandb-summary.json b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/wandb-summary.json deleted file mode 100644 index 8488a97c..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/files/wandb-summary.json +++ /dev/null @@ -1 +0,0 @@ -{"_wandb": {"runtime": 5}, "global_step": 0, "_timestamp": 1713845400.8411036, "_runtime": 4.293798446655273, "_step": 0} \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/run-maennc1u.wandb b/syllabus/examples/training_scripts/wandb/run-20240423_040956-maennc1u/run-maennc1u.wandb deleted file mode 100644 index 3c6de985d34cce1e3733f2be400ec45f0ad6d2e3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5972 zcmcgwe~cV;74O{cUH86sz20qGdR=~WSrl4mXXp3MoNXxww59SZmA0WlC-dVwJ2N-G zIy1YsdxWGO5(q*d;Ew{57y|(be-xFZ3Q;5y5rURPp(dJGNFXF8f(k!(uzjwoUUMr zHdvmPSXR<8uQB{H#EO?k5z4s=_B@TQy;=99AWP&1<_YRQYnMjjz@*uL#JiwRvbCDTpYzXh~|kL!&B)8O`=s0d*~%dp%Mcd zLlg-j9<4?>uja~t2nquNhbGW)GSG&|FuVY63@e_80o(y6+P;mVOjQA(7B{QWU{!Hl zAZL&wB}oDu1Y+4yTs5T(MtO*Tb%LD`t17W(Vz5bw@b0Qwpo3d74t&i_23hbU=lP*4 zD|&sw2)L#>Rv>FWHh>I@WD+y!P~Gt(&}%sZsz7l2wJZcmLzpY;MprR$WtFQmv!F88l0GOkdH?Cs{tr zE3!<(Uxfzmcvj-qq(nr@U7>D#-IM z#3YWuFM>EaK$HeqQY87yApvQD6s89@5%#EKd9}vYhA7Aa_Y83ek!BnR(NwuywPLfT zlA7<9_xTZS?hY~brkxrtf1$n3Nq8l{NnmwNGz127x}@_$1*;OHs*E9M@Q)S+g-Yax zx@ga>t4r^XiUP980pW6Yk$mrhFYv*nPqeJ$F9FZT* z9Zl2h*ga1?lqsyd2G=bOPc1!^cju1XeI1~#wAb{Y-t+bE0V+{kI^Nn#Eug%M#hadZ7BB5uZho0jfzp)&8-n)IC z=D|mPy_{fP`r^P3lQdo1lcpcO|J(mrnrhm|C+`4z>+IcK@}B%X*sClvvzDIUzq$LD zBYVMz^X#=9A5Q%J&)`EgNzx?gIe0-oCX9_|Yc-cwIN8|I3_Tx>xUA{gdzR z2CpXViH=t zcT?#NG%yqPI%KIiqg^dK%g_|(r6qA%zn`pEG+!Rm5| zzRNoQs}}&;h4aLCX_T(olpe8Iq(XKR@eq;C5#xjRf9dU~-_H=ai>BUL*^TpoU!BYp zpf-g9WQM$rtVK!{-;BfLY=P1vxkVQ?ps`-*?rEs-BvB+QVgX8@RAhN+jYAcnJW2=P z94vhcjf9qUJcBqeW7v6ehbOIL@FJTgk9&g(j&2RDkKd1|Vp*P=Mkm zsF4&2-!km^ZSPo;?a1gp8AI|N8Pg}@61}9L0!((XY>=mWK)J=jEJKPNQ2q=V1hk|z?p>@DvLz`$yOF*;u$i4Y{xG3$)r{c`6LGVOk(H?1lSVxGsBWp5`9b9PYt-! z74~tHt@LW1$&Jv}Jd>Orhx*vbvLt90+79*c)26QFY#%?VY$1?-e#F)^rix4-KN&8a zkCv>Do?cLd7k-pjD;EAA*0!(s%fEQo$u6EgHq>p;b1fD0{CWU!(tw8T_2 zji;fF!D5!x3=Jy+XDF&v*+bk+92#<>mu{Hvgy0H zT#Im6CLJG2hH_*oAugvT%SsqqhN405CW@7$Ay|&xV5TlBbUZUtiqQHHAFf#;cGH^9 z2>m(ik#Qr=PkPdRn%h51AH4B^vftlz@G3{yf5lZ`^qqCfGOAYm0P~Dqx*3hN=zv*E zdnNhFnTmZYZ40=*%d}W$SJS+)vCp3cTW@blERZW2^N^4eQMQI!kRj0v;LcJ*-jzxhrsl#IE`?}Q#W541{%~)6M zhBl{BqSiRfiy^Oxid$s@lZsV(Mu%2aO)&-2f^Lh5O_Pi5xmkKv6(Yx=W-HCc>^yX; z*vJq$$y0Tcvv^UkLmpd{+f?;RgPWNNX2MyWW9rsnj;HBJnxSR`IuyJ*W<085@U!+@ z)o6nMwrWaZ;M!h8U@IauCsDIPv(5>Dpb4%!%gnkVZO^%N!7}t9bR+C=G*eeG{cl5; B89x93 diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py deleted file mode 100644 index 70a27f6a..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py +++ /dev/null @@ -1,524 +0,0 @@ -""" An example applying Syllabus Prioritized Level Replay to Procgen. This code is based on https://github.com/facebookresearch/level-replay/blob/main/train.py - -NOTE: In order to efficiently change the seed of a procgen environment directly without reinitializing it, -we rely on Minqi Jiang's custom branch of procgen found here: https://github.com/minqi/procgen -""" -import argparse -import os -import random -import time -from collections import deque -from distutils.util import strtobool - -import gym as openai_gym -import gymnasium as gym -import numpy as np -import procgen # noqa: F401 -from procgen import ProcgenEnv -import torch -import torch.nn as nn -import torch.optim as optim -from shimmy.openai_gym_compatibility import GymV21CompatibilityV0 -from torch.utils.tensorboard import SummaryWriter - -from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum -from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum -from syllabus.examples.models import ProcgenAgent -from syllabus.examples.task_wrappers import ProcgenTaskWrapper, MinigridTaskWrapper -from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs - - -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="syllabus", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="weather to capture videos of the agent performances (check out `videos` folder)") - parser.add_argument("--logging-dir", type=str, default=".", - help="the base directory for logging and wandb storage.") - - # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="starpilot", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=int(25e6), - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=5e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=64, - help="the number of parallel game environments") - parser.add_argument("--num-steps", type=int, default=256, - help="the number of steps to run in each environment per policy rollout") - parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="Toggle learning rate annealing for policy and value networks") - parser.add_argument("--gae", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Use GAE for advantage computation") - parser.add_argument("--gamma", type=float, default=0.999, - help="the discount factor gamma") - parser.add_argument("--gae-lambda", type=float, default=0.95, - help="the lambda for the general advantage estimation") - parser.add_argument("--num-minibatches", type=int, default=8, - help="the number of mini-batches") - parser.add_argument("--update-epochs", type=int, default=3, - help="the K epochs to update the policy") - parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles advantages normalization") - parser.add_argument("--clip-coef", type=float, default=0.2, - help="the surrogate clipping coefficient") - parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles whether or not to use a clipped loss for the value function, as per the paper.") - parser.add_argument("--ent-coef", type=float, default=0.01, - help="coefficient of the entropy") - parser.add_argument("--vf-coef", type=float, default=0.5, - help="coefficient of the value function") - parser.add_argument("--max-grad-norm", type=float, default=0.5, - help="the maximum norm for the gradient clipping") - parser.add_argument("--target-kl", type=float, default=None, - help="the target KL divergence threshold") - - # Procgen arguments - parser.add_argument("--full-dist", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Train on full distribution of levels.") - - # Curriculum arguments - parser.add_argument("--curriculum", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will use curriculum learning") - parser.add_argument("--curriculum-method", type=str, default="plr", - help="curriculum method to use") - parser.add_argument("--num-eval-episodes", type=int, default=10, - help="the number of episodes to evaluate the agent on after each policy update.") - - args = parser.parse_args() - args.batch_size = int(args.num_envs * args.num_steps) - args.minibatch_size = int(args.batch_size // args.num_minibatches) - # fmt: on - return args - - -PROCGEN_RETURN_BOUNDS = { - "coinrun": (5, 10), - "starpilot": (2.5, 64), - "caveflyer": (3.5, 12), - "dodgeball": (1.5, 19), - "fruitbot": (-1.5, 32.4), - "chaser": (0.5, 13), - "miner": (1.5, 13), - "jumper": (3, 10), - "leaper": (3, 10), - "maze": (5, 10), - "bigfish": (1, 40), - "heist": (3.5, 10), - "climber": (2, 12.6), - "plunder": (4.5, 30), - "ninja": (3.5, 10), - "bossfight": (0.5, 13), -} - - -def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): - def thunk(): - env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) - env = GymV21CompatibilityV0(env=env) - if curriculum is not None: - env = ProcgenTaskWrapper(env, env_id, seed=seed) - env = MultiProcessingSyncWrapper( - env, - curriculum.get_components(), - update_on_step=False, - task_space=env.task_space, - ) - return env - return thunk - - -def wrap_vecenv(vecenv): - vecenv.is_vector_env = True - vecenv = VecMonitor(venv=vecenv, filename=None, keep_buf=100) - vecenv = VecNormalize(venv=vecenv, ob=False, ret=True) - return vecenv - - -def slow_level_replay_evaluate( - env_name, - policy, - num_episodes, - device, - num_levels=0 -): - policy.eval() - - eval_envs = ProcgenEnv( - num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False - ) - eval_envs = VecExtractDictObs(eval_envs, "rgb") - eval_envs = wrap_vecenv(eval_envs) - eval_obs, _ = eval_envs.reset() - eval_episode_rewards = [] - - while len(eval_episode_rewards) < num_episodes: - with torch.no_grad(): - eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) - - eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) - for i, info in enumerate(infos): - if 'episode' in info.keys(): - eval_episode_rewards.append(info['episode']['r']) - - mean_returns = np.mean(eval_episode_rewards) - stddev_returns = np.std(eval_episode_rewards) - env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] - normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) - policy.train() - return mean_returns, stddev_returns, normalized_mean_returns - - -def level_replay_evaluate( - env_name, - policy, - num_episodes, - device, - num_levels=0 -): - policy.eval() - - eval_envs = ProcgenEnv( - num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False - ) - eval_envs = VecExtractDictObs(eval_envs, "rgb") - eval_envs = wrap_vecenv(eval_envs) - eval_obs, _ = eval_envs.reset() - eval_episode_rewards = [-1] * num_episodes - - while -1 in eval_episode_rewards: - with torch.no_grad(): - eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) - - eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) - for i, info in enumerate(infos): - if 'episode' in info.keys() and eval_episode_rewards[i] == -1: - eval_episode_rewards[i] = info['episode']['r'] - - # print(eval_episode_rewards) - mean_returns = np.mean(eval_episode_rewards) - stddev_returns = np.std(eval_episode_rewards) - env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] - normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) - policy.train() - return mean_returns, stddev_returns, normalized_mean_returns - - -def make_value_fn(): - def get_value(obs): - obs = np.array(obs) - with torch.no_grad(): - return agent.get_value(torch.Tensor(obs).to(device)) - return get_value - - -if __name__ == "__main__": - args = parse_args() - run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" - if args.track: - import wandb - - wandb.init( - project=args.wandb_project_name, - entity=args.wandb_entity, - sync_tensorboard=True, - config=vars(args), - name=run_name, - monitor_gym=True, - save_code=True, - dir=args.logging_dir - ) - # wandb.run.log_code("./syllabus/examples") - - writer = SummaryWriter(os.path.join(args.logging_dir, f"./runs/{run_name}")) - writer.add_text( - "hyperparameters", - "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), - ) - - # TRY NOT TO MODIFY: seeding - random.seed(args.seed) - np.random.seed(args.seed) - torch.manual_seed(args.seed) - torch.backends.cudnn.deterministic = args.torch_deterministic - - device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu") - print("Device:", device) - - # Curriculum setup - curriculum = None - if args.curriculum: - sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") - sample_env = GymV21CompatibilityV0(env=sample_env) - # code to edit - # sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) - sample_env = MinigridTaskWrapper(sample_env, args.env_id, seed=args.seed) - - # Intialize Curriculum Method - if args.curriculum_method == "plr": - print("Using prioritized level replay.") - curriculum = PrioritizedLevelReplay( - sample_env.task_space, - sample_env.observation_space, - num_steps=args.num_steps, - num_processes=args.num_envs, - gamma=args.gamma, - gae_lambda=args.gae_lambda, - task_sampler_kwargs_dict={"strategy": "value_l1"}, - get_value=make_value_fn(), - ) - elif args.curriculum_method == "dr": - print("Using domain randomization.") - curriculum = DomainRandomization(sample_env.task_space) - elif args.curriculum_method == "lp": - print("Using learning progress.") - curriculum = LearningProgressCurriculum(sample_env.task_space) - elif args.curriculum_method == "sq": - print("Using sequential curriculum.") - curricula = [] - stopping = [] - for i in range(199): - curricula.append(i + 1) - stopping.append("steps>=50000") - curricula.append(list(range(i + 1))) - stopping.append("steps>=50000") - curriculum = SequentialCurriculum(curricula, stopping[:-1], sample_env.task_space) - else: - raise ValueError(f"Unknown curriculum method {args.curriculum_method}") - curriculum = make_multiprocessing_curriculum(curriculum) - del sample_env - - # env setup - print("Creating env") - envs = gym.vector.AsyncVectorEnv( - [ - make_env( - args.env_id, - args.seed + i, - curriculum=curriculum if args.curriculum else None, - num_levels=1 if args.curriculum else 0 - ) - for i in range(args.num_envs) - ] - ) - envs = wrap_vecenv(envs) - - assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" - print("Creating agent") - agent = ProcgenAgent( - envs.single_observation_space.shape, - envs.single_action_space.n, - arch="large", - base_kwargs={'recurrent': False, 'hidden_size': 256} - ).to(device) - optimizer = optim.Adam(agent.parameters(), lr=args.learning_rate, eps=1e-5) - - # ALGO Logic: Storage setup - obs = torch.zeros((args.num_steps, args.num_envs) + envs.single_observation_space.shape).to(device) - actions = torch.zeros((args.num_steps, args.num_envs) + envs.single_action_space.shape).to(device) - logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device) - rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) - dones = torch.zeros((args.num_steps, args.num_envs)).to(device) - values = torch.zeros((args.num_steps, args.num_envs)).to(device) - - # TRY NOT TO MODIFY: start the game - global_step = 0 - start_time = time.time() - next_obs, _ = envs.reset() - next_obs = torch.Tensor(next_obs).to(device) - next_done = torch.zeros(args.num_envs).to(device) - num_updates = args.total_timesteps // args.batch_size - episode_rewards = deque(maxlen=10) - completed_episodes = 0 - - for update in range(1, num_updates + 1): - # Annealing the rate if instructed to do so. - if args.anneal_lr: - frac = 1.0 - (update - 1.0) / num_updates - lrnow = frac * args.learning_rate - optimizer.param_groups[0]["lr"] = lrnow - - for step in range(0, args.num_steps): - global_step += 1 * args.num_envs - obs[step] = next_obs - dones[step] = next_done - - # ALGO LOGIC: action logic - with torch.no_grad(): - action, logprob, _, value = agent.get_action_and_value(next_obs) - values[step] = value.flatten() - actions[step] = action - logprobs[step] = logprob - - # TRY NOT TO MODIFY: execute the game and log data. - next_obs, reward, term, trunc, info = envs.step(action.cpu().numpy()) - done = np.logical_or(term, trunc) - rewards[step] = torch.tensor(reward).to(device).view(-1) - next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) - completed_episodes += sum(done) - - for item in info: - if "episode" in item.keys(): - episode_rewards.append(item['episode']['r']) - print(f"global_step={global_step}, episodic_return={item['episode']['r']}") - writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) - if curriculum is not None: - curriculum.log_metrics(writer, global_step) - break - - # bootstrap value if not done - with torch.no_grad(): - next_value = agent.get_value(next_obs).reshape(1, -1) - if args.gae: - advantages = torch.zeros_like(rewards).to(device) - lastgaelam = 0 - for t in reversed(range(args.num_steps)): - if t == args.num_steps - 1: - nextnonterminal = 1.0 - next_done - nextvalues = next_value - else: - nextnonterminal = 1.0 - dones[t + 1] - nextvalues = values[t + 1] - delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t] - advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam - returns = advantages + values - else: - returns = torch.zeros_like(rewards).to(device) - for t in reversed(range(args.num_steps)): - if t == args.num_steps - 1: - nextnonterminal = 1.0 - next_done - next_return = next_value - else: - nextnonterminal = 1.0 - dones[t + 1] - next_return = returns[t + 1] - returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return - advantages = returns - values - - # flatten the batch - b_obs = obs.reshape((-1,) + envs.single_observation_space.shape) - b_logprobs = logprobs.reshape(-1) - b_actions = actions.reshape((-1,) + envs.single_action_space.shape) - b_advantages = advantages.reshape(-1) - b_returns = returns.reshape(-1) - b_values = values.reshape(-1) - - # Optimizing the policy and value network - b_inds = np.arange(args.batch_size) - clipfracs = [] - for epoch in range(args.update_epochs): - np.random.shuffle(b_inds) - for start in range(0, args.batch_size, args.minibatch_size): - end = start + args.minibatch_size - mb_inds = b_inds[start:end] - - _, newlogprob, entropy, newvalue = agent.get_action_and_value(b_obs[mb_inds], b_actions.long()[mb_inds]) - logratio = newlogprob - b_logprobs[mb_inds] - ratio = logratio.exp() - - with torch.no_grad(): - # calculate approx_kl http://joschu.net/blog/kl-approx.html - old_approx_kl = (-logratio).mean() - approx_kl = ((ratio - 1) - logratio).mean() - clipfracs += [((ratio - 1.0).abs() > args.clip_coef).float().mean().item()] - - mb_advantages = b_advantages[mb_inds] - if args.norm_adv: - mb_advantages = (mb_advantages - mb_advantages.mean()) / (mb_advantages.std() + 1e-8) - - # Policy loss - pg_loss1 = -mb_advantages * ratio - pg_loss2 = -mb_advantages * torch.clamp(ratio, 1 - args.clip_coef, 1 + args.clip_coef) - pg_loss = torch.max(pg_loss1, pg_loss2).mean() - - # Value loss - newvalue = newvalue.view(-1) - if args.clip_vloss: - v_loss_unclipped = (newvalue - b_returns[mb_inds]) ** 2 - v_clipped = b_values[mb_inds] + torch.clamp( - newvalue - b_values[mb_inds], - -args.clip_coef, - args.clip_coef, - ) - v_loss_clipped = (v_clipped - b_returns[mb_inds]) ** 2 - v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped) - v_loss = 0.5 * v_loss_max.mean() - else: - v_loss = 0.5 * ((newvalue - b_returns[mb_inds]) ** 2).mean() - - entropy_loss = entropy.mean() - loss = pg_loss - args.ent_coef * entropy_loss + v_loss * args.vf_coef - - optimizer.zero_grad() - loss.backward() - nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm) - optimizer.step() - - if args.target_kl is not None: - if approx_kl > args.target_kl: - break - - y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy() - var_y = np.var(y_true) - explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y - - # Evaluate agent - mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=0 - ) - slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=0 - ) - mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=200 - ) - slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=200 - ) - - # TRY NOT TO MODIFY: record rewards for plotting purposes - writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) - writer.add_scalar("charts/episode_returns", np.mean(episode_rewards), global_step) - writer.add_scalar("losses/value_loss", v_loss.item(), global_step) - writer.add_scalar("losses/policy_loss", pg_loss.item(), global_step) - writer.add_scalar("losses/entropy", entropy_loss.item(), global_step) - writer.add_scalar("losses/old_approx_kl", old_approx_kl.item(), global_step) - writer.add_scalar("losses/approx_kl", approx_kl.item(), global_step) - writer.add_scalar("losses/clipfrac", np.mean(clipfracs), global_step) - writer.add_scalar("losses/explained_variance", explained_var, global_step) - print("SPS:", int(global_step / (time.time() - start_time))) - writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) - - writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) - writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) - writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) - writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) - writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) - writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) - - writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) - writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) - writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) - writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) - writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) - writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) - - writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) - - envs.close() - writer.close() diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/conda-environment.yaml b/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/conda-environment.yaml deleted file mode 100644 index cd0b0b09..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/conda-environment.yaml +++ /dev/null @@ -1,165 +0,0 @@ -name: test2_py -channels: - - defaults -dependencies: - - _libgcc_mutex=0.1=main - - _openmp_mutex=5.1=1_gnu - - ca-certificates=2024.3.11=h06a4308_0 - - ld_impl_linux-64=2.38=h1181459_1 - - libffi=3.3=he6710b0_2 - - libgcc-ng=11.2.0=h1234567_1 - - libgomp=11.2.0=h1234567_1 - - libstdcxx-ng=11.2.0=h1234567_1 - - ncurses=6.4=h6a678d5_0 - - openssl=1.1.1w=h7f8727e_0 - - pip=23.3.1=py38h06a4308_0 - - python=3.8.5=h7579374_1 - - readline=8.2=h5eee18b_0 - - setuptools=68.2.2=py38h06a4308_0 - - sqlite=3.41.2=h5eee18b_0 - - tk=8.6.12=h1ccaba5_0 - - wheel=0.41.2=py38h06a4308_0 - - xz=5.4.6=h5eee18b_0 - - zlib=1.2.13=h5eee18b_0 - - pip: - - absl-py==2.1.0 - - aiosignal==1.3.1 - - alabaster==0.7.13 - - appdirs==1.4.4 - - attrs==23.2.0 - - babel==2.14.0 - - beautifulsoup4==4.12.3 - - cachetools==5.3.3 - - certifi==2024.2.2 - - cffi==1.16.0 - - charset-normalizer==3.3.2 - - click==8.1.7 - - cloudpickle==3.0.0 - - cmake==3.29.2 - - contourpy==1.1.1 - - cycler==0.12.1 - - dm-tree==0.1.8 - - docker-pycreds==0.4.0 - - docutils==0.20.1 - - exceptiongroup==1.2.0 - - farama-notifications==0.0.4 - - filelock==3.13.4 - - fonttools==4.51.0 - - frozenlist==1.4.1 - - fsspec==2024.3.1 - - furo==2024.1.29 - - future==1.0.0 - - gitdb==4.0.11 - - gitpython==3.1.43 - - glcontext==2.5.0 - - glfw==1.12.0 - - google-auth==2.29.0 - - google-auth-oauthlib==1.0.0 - - grpcio==1.62.1 - - gym==0.23.0 - - gym-notices==0.0.8 - - gymnasium==0.28.1 - - idna==3.7 - - imageio==2.34.0 - - imageio-ffmpeg==0.3.0 - - imagesize==1.4.1 - - importlib-metadata==7.1.0 - - importlib-resources==6.4.0 - - iniconfig==2.0.0 - - jax-jumpy==1.0.0 - - jinja2==3.1.3 - - jsonschema==4.21.1 - - jsonschema-specifications==2023.12.1 - - kiwisolver==1.4.5 - - lazy-loader==0.4 - - lz4==4.3.3 - - markdown==3.6 - - markdown-it-py==3.0.0 - - markupsafe==2.1.5 - - matplotlib==3.7.5 - - mdurl==0.1.2 - - moderngl==5.10.0 - - mpmath==1.3.0 - - msgpack==1.0.8 - - networkx==3.1 - - numpy==1.24.4 - - nvidia-cublas-cu12==12.1.3.1 - - nvidia-cuda-cupti-cu12==12.1.105 - - nvidia-cuda-nvrtc-cu12==12.1.105 - - nvidia-cuda-runtime-cu12==12.1.105 - - nvidia-cudnn-cu12==8.9.2.26 - - nvidia-cufft-cu12==11.0.2.54 - - nvidia-curand-cu12==10.3.2.106 - - nvidia-cusolver-cu12==11.4.5.107 - - nvidia-cusparse-cu12==12.1.0.106 - - nvidia-nccl-cu12==2.19.3 - - nvidia-nvjitlink-cu12==12.4.127 - - nvidia-nvtx-cu12==12.1.105 - - oauthlib==3.2.2 - - packaging==24.0 - - pandas==2.0.3 - - pillow==10.3.0 - - pkgutil-resolve-name==1.3.10 - - pluggy==1.4.0 - - protobuf==4.25.3 - - psutil==5.9.8 - - py-cpuinfo==9.0.0 - - pyarrow==15.0.2 - - pyasn1==0.6.0 - - pyasn1-modules==0.4.0 - - pycparser==2.22 - - pyenchant==3.2.2 - - pyglet==1.4.11 - - pygments==2.17.2 - - pyparsing==3.1.2 - - pytest==8.1.1 - - pytest-benchmark==4.0.0 - - python-dateutil==2.9.0.post0 - - pytz==2024.1 - - pywavelets==1.4.1 - - pyyaml==6.0.1 - - ray==2.10.0 - - referencing==0.34.0 - - requests==2.31.0 - - requests-oauthlib==2.0.0 - - rich==13.7.1 - - rpds-py==0.18.0 - - rsa==4.9 - - scikit-image==0.21.0 - - scipy==1.10.0 - - sentry-sdk==1.45.0 - - setproctitle==1.3.3 - - shellingham==1.5.4 - - shimmy==1.3.0 - - six==1.16.0 - - smmap==5.0.1 - - snowballstemmer==2.2.0 - - soupsieve==2.5 - - sphinx==7.1.2 - - sphinx-basic-ng==1.0.0b2 - - sphinx-tabs==3.4.5 - - sphinxcontrib-applehelp==1.0.4 - - sphinxcontrib-devhelp==1.0.2 - - sphinxcontrib-htmlhelp==2.0.1 - - sphinxcontrib-jsmath==1.0.1 - - sphinxcontrib-qthelp==1.0.3 - - sphinxcontrib-serializinghtml==1.1.5 - - sphinxcontrib-spelling==8.0.0 - - syllabus-rl==0.5 - - sympy==1.12 - - tensorboard==2.14.0 - - tensorboard-data-server==0.7.2 - - tensorboardx==2.6.2.2 - - tifffile==2023.7.10 - - tomli==2.0.1 - - torch==2.2.2 - - triton==2.2.0 - - typer==0.12.3 - - typing-extensions==4.11.0 - - tzdata==2024.1 - - urllib3==2.2.1 - - wandb==0.16.6 - - werkzeug==3.0.2 - - zipp==3.18.1 -prefix: /home/user/miniconda/envs/test2_py - diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/config.yaml b/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/config.yaml deleted file mode 100644 index 19f16db4..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/config.yaml +++ /dev/null @@ -1,126 +0,0 @@ -wandb_version: 1 - -exp_name: - desc: null - value: cleanrl_procgen_plr -seed: - desc: null - value: 1 -torch_deterministic: - desc: null - value: true -cuda: - desc: null - value: true -track: - desc: null - value: true -wandb_project_name: - desc: null - value: syllabus -wandb_entity: - desc: null - value: null -capture_video: - desc: null - value: false -logging_dir: - desc: null - value: . -env_id: - desc: null - value: bigfish -total_timesteps: - desc: null - value: 25000000 -learning_rate: - desc: null - value: 0.0005 -num_envs: - desc: null - value: 64 -num_steps: - desc: null - value: 256 -anneal_lr: - desc: null - value: false -gae: - desc: null - value: true -gamma: - desc: null - value: 0.999 -gae_lambda: - desc: null - value: 0.95 -num_minibatches: - desc: null - value: 8 -update_epochs: - desc: null - value: 3 -norm_adv: - desc: null - value: true -clip_coef: - desc: null - value: 0.2 -clip_vloss: - desc: null - value: true -ent_coef: - desc: null - value: 0.01 -vf_coef: - desc: null - value: 0.5 -max_grad_norm: - desc: null - value: 0.5 -target_kl: - desc: null - value: null -full_dist: - desc: null - value: true -curriculum: - desc: null - value: true -curriculum_method: - desc: null - value: plr -num_eval_episodes: - desc: null - value: 10 -batch_size: - desc: null - value: 16384 -minibatch_size: - desc: null - value: 2048 -_wandb: - desc: null - value: - code_path: code/syllabus/examples/training_scripts/cleanrl_procgen_plr.py - python_version: 3.8.5 - cli_version: 0.16.6 - framework: torch - is_jupyter_run: false - is_kaggle_kernel: false - start_time: 1713845639.0 - t: - 1: - - 1 - - 30 - - 55 - 3: - - 13 - - 16 - - 23 - - 35 - 4: 3.8.5 - 5: 0.16.6 - 8: - - 5 - 13: linux-x86_64 diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/diff.patch b/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/diff.patch deleted file mode 100644 index 40d0796c..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/diff.patch +++ /dev/null @@ -1,142 +0,0 @@ -diff --git a/setup.py b/setup.py -index 31e09f2..22a94e8 100644 ---- a/setup.py -+++ b/setup.py -@@ -2,7 +2,7 @@ from setuptools import find_packages, setup - - - extras = dict() --extras['test'] = ['cmake', 'ninja', 'nle>=0.9.0', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] -+extras['test'] = ['cmake', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] - extras['docs'] = ['sphinx-tabs', 'sphinxcontrib-spelling', 'furo'] - extras['all'] = extras['test'] + extras['docs'] - -diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -index dabcd50..70a27f6 100644 ---- a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -+++ b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -@@ -24,7 +24,7 @@ from torch.utils.tensorboard import SummaryWriter - from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum - from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum - from syllabus.examples.models import ProcgenAgent --from syllabus.examples.task_wrappers import ProcgenTaskWrapper -+from syllabus.examples.task_wrappers import ProcgenTaskWrapper, MinigridTaskWrapper - from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs - - -@@ -136,7 +136,7 @@ def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): - env = MultiProcessingSyncWrapper( - env, - curriculum.get_components(), -- update_on_step=curriculum.requires_step_updates, -+ update_on_step=False, - task_space=env.task_space, - ) - return env -@@ -150,37 +150,31 @@ def wrap_vecenv(vecenv): - return vecenv - - --def full_level_replay_evaluate( -+def slow_level_replay_evaluate( - env_name, - policy, - num_episodes, - device, -- num_levels=1 # Not used -+ num_levels=0 - ): - policy.eval() - - eval_envs = ProcgenEnv( -- num_envs=args.num_eval_episodes, env_name=env_name, num_levels=1, start_level=0, distribution_mode="easy", paint_vel_info=False -+ num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False - ) - eval_envs = VecExtractDictObs(eval_envs, "rgb") - eval_envs = wrap_vecenv(eval_envs) -- -- # Seed environments -- seeds = [int.from_bytes(os.urandom(3), byteorder="little") for _ in range(num_episodes)] -- for i, seed in enumerate(seeds): -- eval_envs.seed(seed, i) -- - eval_obs, _ = eval_envs.reset() -- eval_episode_rewards = [-1] * num_episodes -+ eval_episode_rewards = [] - -- while -1 in eval_episode_rewards: -+ while len(eval_episode_rewards) < num_episodes: - with torch.no_grad(): - eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) - - eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) - for i, info in enumerate(infos): -- if 'episode' in info.keys() and eval_episode_rewards[i] == -1: -- eval_episode_rewards[i] = info['episode']['r'] -+ if 'episode' in info.keys(): -+ eval_episode_rewards.append(info['episode']['r']) - - mean_returns = np.mean(eval_episode_rewards) - stddev_returns = np.std(eval_episode_rewards) -@@ -251,7 +245,7 @@ if __name__ == "__main__": - ) - # wandb.run.log_code("./syllabus/examples") - -- writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) -+ writer = SummaryWriter(os.path.join(args.logging_dir, f"./runs/{run_name}")) - writer.add_text( - "hyperparameters", - "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), -@@ -271,7 +265,9 @@ if __name__ == "__main__": - if args.curriculum: - sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") - sample_env = GymV21CompatibilityV0(env=sample_env) -- sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) -+ # code to edit -+ # sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) -+ sample_env = MinigridTaskWrapper(sample_env, args.env_id, seed=args.seed) - - # Intialize Curriculum Method - if args.curriculum_method == "plr": -@@ -485,13 +481,13 @@ if __name__ == "__main__": - mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=0 - ) -- full_mean_eval_returns, full_stddev_eval_returns, full_normalized_mean_eval_returns = full_level_replay_evaluate( -+ slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=0 - ) - mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=200 - ) -- full_mean_train_returns, full_stddev_train_returns, full_normalized_mean_train_returns = full_level_replay_evaluate( -+ slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( - args.env_id, agent, args.num_eval_episodes, device, num_levels=200 - ) - -@@ -510,17 +506,17 @@ if __name__ == "__main__": - - writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) - writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) -- writer.add_scalar("test_eval/stddev_eval_return", stddev_eval_returns, global_step) -- writer.add_scalar("test_eval/full_mean_episode_return", full_mean_eval_returns, global_step) -- writer.add_scalar("test_eval/full_normalized_mean_eval_return", full_normalized_mean_eval_returns, global_step) -- writer.add_scalar("test_eval/full_stddev_eval_return", full_stddev_eval_returns, global_step) -+ writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) - - writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) - writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) -- writer.add_scalar("train_eval/stddev_train_return", stddev_train_returns, global_step) -- writer.add_scalar("train_eval/full_mean_episode_return", full_mean_train_returns, global_step) -- writer.add_scalar("train_eval/full_normalized_mean_train_return", full_normalized_mean_train_returns, global_step) -- writer.add_scalar("train_eval/full_stddev_train_return", full_stddev_train_returns, global_step) -+ writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) - - writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) - diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch b/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch deleted file mode 100644 index b3eac157..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/upstream_diff_64a98eada0f5549603091d9236731d61fe0bce82.patch +++ /dev/null @@ -1,1421 +0,0 @@ -diff --git a/setup.py b/setup.py -index 31e09f2..22a94e8 100644 ---- a/setup.py -+++ b/setup.py -@@ -2,7 +2,7 @@ from setuptools import find_packages, setup - - - extras = dict() --extras['test'] = ['cmake', 'ninja', 'nle>=0.9.0', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] -+extras['test'] = ['cmake', 'matplotlib>=3.7.1', 'scipy==1.10.0', 'tensorboard>=2.13.0', 'shimmy'] - extras['docs'] = ['sphinx-tabs', 'sphinxcontrib-spelling', 'furo'] - extras['all'] = extras['test'] + extras['docs'] - -diff --git a/syllabus/core/curriculum_base.py b/syllabus/core/curriculum_base.py -index 03284da..4ca9aeb 100644 ---- a/syllabus/core/curriculum_base.py -+++ b/syllabus/core/curriculum_base.py -@@ -76,7 +76,7 @@ class Curriculum: - """ - self.completed_tasks += 1 - -- def update_on_step(self, obs: typing.Any, rew: float, term: bool, trunc: bool, info: dict, env_id: int = None) -> None: -+ def update_on_step(self, task: typing.Any, obs: typing.Any, rew: float, term: bool, trunc: bool, info: dict, env_id: int = None) -> None: - """ Update the curriculum with the current step results from the environment. - - :param obs: Observation from teh environment -@@ -88,7 +88,7 @@ class Curriculum: - """ - raise NotImplementedError("This curriculum does not require step updates. Set update_on_step for the environment sync wrapper to False to improve performance and prevent this error.") - -- def update_on_step_batch(self, step_results: List[typing.Tuple[int, int, int, int, int]], env_id: int = None) -> None: -+ def update_on_step_batch(self, step_results: List[typing.Tuple[Any, Any, int, int, int, int]], env_id: int = None) -> None: - """Update the curriculum with a batch of step results from the environment. - - This method can be overridden to provide a more efficient implementation. It is used -@@ -96,9 +96,9 @@ class Curriculum: - - :param step_results: List of step results - """ -- obs, rews, terms, truncs, infos = tuple(step_results) -+ tasks, obs, rews, terms, truncs, infos = tuple(step_results) - for i in range(len(obs)): -- self.update_on_step(obs[i], rews[i], terms[i], truncs[i], infos[i], env_id=env_id) -+ self.update_on_step(tasks[i], obs[i], rews[i], terms[i], truncs[i], infos[i], env_id=env_id) - - def update_on_episode(self, episode_return: float, episode_length: int, episode_task: Any, env_id: int = None) -> None: - """Update the curriculum with episode results from the environment. -diff --git a/syllabus/core/curriculum_sync_wrapper.py b/syllabus/core/curriculum_sync_wrapper.py -index 6e069d8..f986643 100644 ---- a/syllabus/core/curriculum_sync_wrapper.py -+++ b/syllabus/core/curriculum_sync_wrapper.py -@@ -29,6 +29,14 @@ class CurriculumWrapper: - def tasks(self): - return self.task_space.tasks - -+ @property -+ def requires_step_updates(self): -+ return self.curriculum.requires_step_updates -+ -+ @property -+ def requires_episode_updates(self): -+ return self.curriculum.requires_episode_updates -+ - def get_tasks(self, task_space=None): - return self.task_space.get_tasks(gym_space=task_space) - -diff --git a/syllabus/core/environment_sync_wrapper.py b/syllabus/core/environment_sync_wrapper.py -index c995aa1..6edee7c 100644 ---- a/syllabus/core/environment_sync_wrapper.py -+++ b/syllabus/core/environment_sync_wrapper.py -@@ -19,7 +19,8 @@ class MultiProcessingSyncWrapper(gym.Wrapper): - def __init__(self, - env, - components: MultiProcessingComponents, -- update_on_step: bool = True, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? -+ update_on_step: bool = False, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? -+ update_on_progress: bool = False, # TODO: Fine grained control over which step elements are used. Controlled by curriculum? - batch_size: int = 100, - buffer_size: int = 2, # Having an extra task in the buffer minimizes wait time at reset - task_space: TaskSpace = None, -@@ -34,6 +35,7 @@ class MultiProcessingSyncWrapper(gym.Wrapper): - self.update_queue = components.update_queue - self.task_space = task_space - self.update_on_step = update_on_step -+ self.update_on_progress = update_on_progress - self.batch_size = batch_size - self.global_task_completion = global_task_completion - self.task_progress = 0.0 -@@ -125,17 +127,21 @@ class MultiProcessingSyncWrapper(gym.Wrapper): - def _package_step_updates(self): - step_batch = { - "update_type": "step_batch", -- "metrics": ([self._obs[:self._batch_step], self._rews[:self._batch_step], self._terms[:self._batch_step], self._truncs[:self._batch_step], self._infos[:self._batch_step]],), -+ "metrics": ([self._tasks[:self._batch_step], self._obs[:self._batch_step], self._rews[:self._batch_step], self._terms[:self._batch_step], self._truncs[:self._batch_step], self._infos[:self._batch_step]],), - "env_id": self.instance_id, - "request_sample": False - } -- task_batch = { -- "update_type": "task_progress_batch", -- "metrics": (self._tasks[:self._batch_step], self._task_progresses[:self._batch_step],), -- "env_id": self.instance_id, -- "request_sample": False -- } -- return [step_batch, task_batch] -+ update = [step_batch] -+ -+ if self.update_on_progress: -+ task_batch = { -+ "update_type": "task_progress_batch", -+ "metrics": (self._tasks[:self._batch_step], self._task_progresses[:self._batch_step],), -+ "env_id": self.instance_id, -+ "request_sample": False -+ } -+ update.append(task_batch) -+ return update - - def add_task(self, task): - update = { -diff --git a/syllabus/curricula/annealing_box.py b/syllabus/curricula/annealing_box.py -index 6c565ec..101981c 100644 ---- a/syllabus/curricula/annealing_box.py -+++ b/syllabus/curricula/annealing_box.py -@@ -49,8 +49,8 @@ class AnnealingBoxCurriculum(Curriculum): - """ - # Linear annealing from start_values to end_values - annealed_values = ( -- self.start_values + (self.end_values - self.start_values) * -- np.minimum(self.current_step, self.total_steps) / self.total_steps -+ self.start_values + (self.end_values - self.start_values) * -+ np.minimum(self.current_step, self.total_steps) / self.total_steps - ) - -- return [annealed_values.copy() for _ in range(k)] -\ No newline at end of file -+ return [annealed_values.copy() for _ in range(k)] -diff --git a/syllabus/curricula/noop.py b/syllabus/curricula/noop.py -index f6bd5dc..fb5d8ae 100644 ---- a/syllabus/curricula/noop.py -+++ b/syllabus/curricula/noop.py -@@ -28,7 +28,7 @@ class NoopCurriculum(Curriculum): - """ - pass - -- def update_on_step(self, obs, rew, term, trunc, info, env_id: int = None) -> None: -+ def update_on_step(self, task, obs, rew, term, trunc, info, env_id: int = None) -> None: - """ - Update the curriculum with the current step results from the environment. - """ -diff --git a/syllabus/curricula/plr/plr_wrapper.py b/syllabus/curricula/plr/plr_wrapper.py -index 9515df4..9c808dd 100644 ---- a/syllabus/curricula/plr/plr_wrapper.py -+++ b/syllabus/curricula/plr/plr_wrapper.py -@@ -23,16 +23,15 @@ class RolloutStorage(object): - get_value=None, - ): - self.num_steps = num_steps -- self.buffer_steps = num_steps * 2 # Hack to prevent overflow from lagging updates. -+ self.buffer_steps = num_steps * 4 # Hack to prevent overflow from lagging updates. - self.num_processes = num_processes - self._requires_value_buffers = requires_value_buffers - self._get_value = get_value - self.tasks = torch.zeros(self.buffer_steps, num_processes, 1, dtype=torch.int) - self.masks = torch.ones(self.buffer_steps + 1, num_processes, 1) - self.obs = [[[0] for _ in range(self.num_processes)]] * self.buffer_steps -- self._fill = torch.zeros(self.buffer_steps, num_processes, 1) - self.env_steps = [0] * num_processes -- self.should_update = False -+ self.ready_buffers = set() - - if requires_value_buffers: - self.returns = torch.zeros(self.buffer_steps + 1, num_processes, 1) -@@ -46,12 +45,10 @@ class RolloutStorage(object): - self.action_log_dist = torch.zeros(self.buffer_steps, num_processes, action_space.n) - - self.num_steps = num_steps -- self.step = 0 - - def to(self, device): - self.masks = self.masks.to(device) - self.tasks = self.tasks.to(device) -- self._fill = self._fill.to(device) - if self._requires_value_buffers: - self.rewards = self.rewards.to(device) - self.value_preds = self.value_preds.to(device) -@@ -59,108 +56,79 @@ class RolloutStorage(object): - else: - self.action_log_dist = self.action_log_dist.to(device) - -- def insert(self, masks, action_log_dist=None, value_preds=None, rewards=None, tasks=None): -- if self._requires_value_buffers: -- assert (value_preds is not None and rewards is not None), "Selected strategy requires value_preds and rewards" -- if len(rewards.shape) == 3: -- rewards = rewards.squeeze(2) -- self.value_preds[self.step].copy_(torch.as_tensor(value_preds)) -- self.rewards[self.step].copy_(torch.as_tensor(rewards)[:, None]) -- self.masks[self.step + 1].copy_(torch.as_tensor(masks)[:, None]) -- else: -- self.action_log_dist[self.step].copy_(action_log_dist) -- if tasks is not None: -- assert isinstance(tasks[0], int), "Provided task must be an integer" -- self.tasks[self.step].copy_(torch.as_tensor(tasks)[:, None]) -- self.step = (self.step + 1) % self.num_steps -- - def insert_at_index(self, env_index, mask=None, action_log_dist=None, obs=None, reward=None, task=None, steps=1): -- if env_index >= self.num_processes: -- warnings.warn(f"Env index {env_index} is greater than the number of processes {self.num_processes}. Using index {env_index % self.num_processes} instead.") -- env_index = env_index % self.num_processes -- - step = self.env_steps[env_index] - end_step = step + steps -- # Update buffer fill traacker, and check for common usage errors. -- try: -- if end_step > len(self._fill): -- raise IndexError -- self._fill[step:end_step, env_index] = 1 -- except IndexError as e: -- if any(self._fill[:][env_index] == 0): -- raise UsageError(f"Step {step} + {steps} = {end_step} is out of range for env index {env_index}. Your value for PLR's num_processes may be too high.") from e -- else: -- raise UsageError(f"Step {step} + {steps} = {end_step} is out of range for env index {env_index}. Your value for PLR's num_processes may be too low.") from e - - if mask is not None: - self.masks[step + 1:end_step + 1, env_index].copy_(torch.as_tensor(mask[:, None])) -+ - if obs is not None: - for s in range(step, end_step): - self.obs[s][env_index] = obs[s - step] -+ - if reward is not None: - self.rewards[step:end_step, env_index].copy_(torch.as_tensor(reward[:, None])) -+ - if action_log_dist is not None: - self.action_log_dist[step:end_step, env_index].copy_(torch.as_tensor(action_log_dist[:, None])) -+ - if task is not None: - try: -- task = int(task) -+ int(task[0]) - except TypeError: -- assert isinstance(task, int), f"Provided task must be an integer, got {task} with type {type(task)} instead." -- self.tasks[step:end_step, env_index].copy_(torch.as_tensor(task)) -- else: -- self.env_steps[env_index] += steps -- # Hack for now, we call insert_at_index twice -- while all(self._fill[self.step] == 1): -- self.step = (self.step + 1) % self.buffer_steps -- # Check if we have enough steps to compute a task sampler update -- if self.step == self.num_steps + 1: -- self.should_update = True -- -- def _get_values(self): -+ assert isinstance(task, int), f"Provided task must be an integer, got {task[0]} with type {type(task[0])} instead." -+ self.tasks[step:end_step, env_index].copy_(torch.as_tensor(np.array(task)[:, None])) -+ -+ self.env_steps[env_index] += steps -+ if env_index not in self.ready_buffers and self.env_steps[env_index] >= self.num_steps: -+ self.ready_buffers.add(env_index) -+ -+ def _get_values(self, env_index): - if self._get_value is None: - raise UsageError("Selected strategy requires value predictions. Please provide get_value function.") -- for step in range(self.num_steps): -- values = self._get_value(self.obs[step]) -+ for step in range(0, self.num_steps, self.num_processes): -+ obs = self.obs[step: step + self.num_processes][env_index] -+ values = self._get_value(obs) -+ -+ # Reshape values if necessary - if len(values.shape) == 3: - warnings.warn(f"Value function returned a 3D tensor of shape {values.shape}. Attempting to squeeze last dimension.") - values = torch.squeeze(values, -1) - if len(values.shape) == 1: - warnings.warn(f"Value function returned a 1D tensor of shape {values.shape}. Attempting to unsqueeze last dimension.") - values = torch.unsqueeze(values, -1) -- self.value_preds[step].copy_(values) - -- def after_update(self): -+ self.value_preds[step: step + self.num_processes, env_index].copy_(values) -+ -+ def after_update(self, env_index): - # After consuming the first num_steps of data, remove them and shift the remaining data in the buffer -- self.tasks[0: self.num_steps].copy_(self.tasks[self.num_steps: self.buffer_steps]) -- self.masks[0: self.num_steps].copy_(self.masks[self.num_steps: self.buffer_steps]) -- self.obs[0: self.num_steps][:] = self.obs[self.num_steps: self.buffer_steps][:] -+ self.tasks = self.tasks.roll(-self.num_steps, 0) -+ self.masks = self.masks.roll(-self.num_steps, 0) -+ self.obs[0:][env_index] = self.obs[self.num_steps: self.buffer_steps][env_index] - - if self._requires_value_buffers: -- self.returns[0: self.num_steps].copy_(self.returns[self.num_steps: self.buffer_steps]) -- self.rewards[0: self.num_steps].copy_(self.rewards[self.num_steps: self.buffer_steps]) -- self.value_preds[0: self.num_steps].copy_(self.value_preds[self.num_steps: self.buffer_steps]) -+ self.returns = self.returns.roll(-self.num_steps, 0) -+ self.rewards = self.rewards.roll(-self.num_steps, 0) -+ self.value_preds = self.value_preds.roll(-self.num_steps, 0) - else: -- self.action_log_dist[0: self.num_steps].copy_(self.action_log_dist[self.num_steps: self.buffer_steps]) -+ self.action_log_dist = self.action_log_dist.roll(-self.num_steps, 0) - -- self._fill[0: self.num_steps].copy_(self._fill[self.num_steps: self.buffer_steps]) -- self._fill[self.num_steps: self.buffer_steps].copy_(0) -+ self.env_steps[env_index] -= self.num_steps -+ self.ready_buffers.remove(env_index) - -- self.env_steps = [steps - self.num_steps for steps in self.env_steps] -- self.should_update = False -- self.step = self.step - self.num_steps -- -- def compute_returns(self, gamma, gae_lambda): -+ def compute_returns(self, gamma, gae_lambda, env_index): - assert self._requires_value_buffers, "Selected strategy does not use compute_rewards." -- self._get_values() -+ self._get_values(env_index) - gae = 0 - for step in reversed(range(self.rewards.size(0), self.num_steps)): - delta = ( -- self.rewards[step] -- + gamma * self.value_preds[step + 1] * self.masks[step + 1] -- - self.value_preds[step] -+ self.rewards[step, env_index] -+ + gamma * self.value_preds[step + 1, env_index] * self.masks[step + 1, env_index] -+ - self.value_preds[step, env_index] - ) -- gae = delta + gamma * gae_lambda * self.masks[step + 1] * gae -- self.returns[step] = gae + self.value_preds[step] -+ gae = delta + gamma * gae_lambda * self.masks[step + 1, env_index] * gae -+ self.returns[step, env_index] = gae + self.value_preds[step, env_index] - - - def null(x): -@@ -252,11 +220,15 @@ class PrioritizedLevelReplay(Curriculum): - else: - return [self._task_sampler.sample() for _ in range(k)] - -- def update_on_step(self, obs, rew, term, trunc, info, env_id: int = None) -> None: -+ def update_on_step(self, task, obs, rew, term, trunc, info, env_id: int = None) -> None: - """ - Update the curriculum with the current step results from the environment. - """ - assert env_id is not None, "env_id must be provided for PLR updates." -+ if env_id >= self._num_processes: -+ warnings.warn(f"Env index {env_id} is greater than the number of processes {self._num_processes}. Using index {env_id % self._num_processes} instead.") -+ env_id = env_id % self._num_processes -+ - # Update rollouts - self._rollouts.insert_at_index( - env_id, -@@ -266,14 +238,22 @@ class PrioritizedLevelReplay(Curriculum): - obs=np.array([obs]), - ) - -+ # Update task sampler -+ if env_id in self._rollouts.ready_buffers: -+ self._update_sampler(env_id) -+ - def update_on_step_batch( -- self, step_results: List[Tuple[Any, int, bool, bool, Dict]], env_id: int = None -+ self, step_results: List[Tuple[int, Any, int, bool, bool, Dict]], env_id: int = None - ) -> None: - """ - Update the curriculum with a batch of step results from the environment. - """ - assert env_id is not None, "env_id must be provided for PLR updates." -- obs, rews, terms, truncs, infos = step_results -+ if env_id >= self._num_processes: -+ warnings.warn(f"Env index {env_id} is greater than the number of processes {self._num_processes}. Using index {env_id % self._num_processes} instead.") -+ env_id = env_id % self._num_processes -+ -+ tasks, obs, rews, terms, truncs, infos = step_results - self._rollouts.insert_at_index( - env_id, - mask=np.logical_not(np.logical_or(terms, truncs)), -@@ -281,25 +261,19 @@ class PrioritizedLevelReplay(Curriculum): - reward=rews, - obs=obs, - steps=len(rews), -+ task=tasks, - ) - -- def update_task_progress(self, task: Any, success_prob: float, env_id: int = None) -> None: -- """ -- Update the curriculum with a task and its success probability upon -- success or failure. -- """ -- assert env_id is not None, "env_id must be provided for PLR updates." -- self._rollouts.insert_at_index( -- env_id, -- task=task, -- ) - # Update task sampler -- if self._rollouts.should_update: -- if self._task_sampler.requires_value_buffers: -- self._rollouts.compute_returns(self._gamma, self._gae_lambda) -- self._task_sampler.update_with_rollouts(self._rollouts) -- self._rollouts.after_update() -- self._task_sampler.after_update() -+ if env_id in self._rollouts.ready_buffers: -+ self._update_sampler(env_id) -+ -+ def _update_sampler(self, env_id): -+ if self._task_sampler.requires_value_buffers: -+ self._rollouts.compute_returns(self._gamma, self._gae_lambda, env_id) -+ self._task_sampler.update_with_rollouts(self._rollouts, env_id) -+ self._rollouts.after_update(env_id) -+ self._task_sampler.after_update() - - def _enumerate_tasks(self, space): - assert isinstance(space, Discrete) or isinstance(space, MultiDiscrete), f"Unsupported task space {space}: Expected Discrete or MultiDiscrete" -@@ -312,10 +286,10 @@ class PrioritizedLevelReplay(Curriculum): - """ - Log the task distribution to the provided tensorboard writer. - """ -- super().log_metrics(writer, step) -+ # super().log_metrics(writer, step) - metrics = self._task_sampler.metrics() - writer.add_scalar("curriculum/proportion_seen", metrics["proportion_seen"], step) - writer.add_scalar("curriculum/score", metrics["score"], step) -- for task in list(self.task_space.tasks)[:10]: -- writer.add_scalar(f"curriculum/task_{task - 1}_score", metrics["task_scores"][task - 1], step) -- writer.add_scalar(f"curriculum/task_{task - 1}_staleness", metrics["task_staleness"][task - 1], step) -+ # for task in list(self.task_space.tasks)[:10]: -+ # writer.add_scalar(f"curriculum/task_{task - 1}_score", metrics["task_scores"][task - 1], step) -+ # writer.add_scalar(f"curriculum/task_{task - 1}_staleness", metrics["task_staleness"][task - 1], step) -diff --git a/syllabus/curricula/plr/task_sampler.py b/syllabus/curricula/plr/task_sampler.py -index 15ad485..c1e97a1 100644 ---- a/syllabus/curricula/plr/task_sampler.py -+++ b/syllabus/curricula/plr/task_sampler.py -@@ -73,7 +73,7 @@ class TaskSampler: - 'Must provide action space to PLR if using "policy_entropy", "least_confidence", or "min_margin" strategies' - ) - -- def update_with_rollouts(self, rollouts): -+ def update_with_rollouts(self, rollouts, actor_id=None): - if self.strategy == "random": - return - -@@ -93,7 +93,7 @@ class TaskSampler: - else: - raise ValueError(f"Unsupported strategy, {self.strategy}") - -- self._update_with_rollouts(rollouts, score_function) -+ self._update_with_rollouts(rollouts, score_function, actor_index=actor_id) - - def update_task_score(self, actor_index, task_idx, score, num_steps): - score = self._partial_update_task_score(actor_index, task_idx, score, num_steps, done=True) -@@ -165,14 +165,15 @@ class TaskSampler: - def requires_value_buffers(self): - return self.strategy in ["gae", "value_l1", "one_step_td_error"] - -- def _update_with_rollouts(self, rollouts, score_function): -+ def _update_with_rollouts(self, rollouts, score_function, actor_index=None): - tasks = rollouts.tasks - if not self.requires_value_buffers: - policy_logits = rollouts.action_log_dist - done = ~(rollouts.masks > 0) - total_steps, num_actors = rollouts.tasks.shape[:2] - -- for actor_index in range(num_actors): -+ actors = [actor_index] if actor_index is not None else range(num_actors) -+ for actor_index in actors: - done_steps = done[:, actor_index].nonzero()[:total_steps, 0] - start_t = 0 - -diff --git a/syllabus/curricula/sequential.py b/syllabus/curricula/sequential.py -index baa1263..ec3b8b0 100644 ---- a/syllabus/curricula/sequential.py -+++ b/syllabus/curricula/sequential.py -@@ -177,9 +177,9 @@ class SequentialCurriculum(Curriculum): - if self.current_curriculum.requires_episode_updates: - self.current_curriculum.update_on_episode(episode_return, episode_len, episode_task, env_id) - -- def update_on_step(self, obs, rew, term, trunc, info, env_id=None): -+ def update_on_step(self, task, obs, rew, term, trunc, info, env_id=None): - if self.current_curriculum.requires_step_updates: -- self.current_curriculum.update_on_step(obs, rew, term, trunc, info, env_id) -+ self.current_curriculum.update_on_step(task, obs, rew, term, trunc, info, env_id) - - def update_on_step_batch(self, step_results, env_id=None): - if self.current_curriculum.requires_step_updates: -diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py b/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py -index a6d469e..b848d69 100644 ---- a/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py -+++ b/syllabus/examples/training_scripts/cleanrl_procgen_centralplr.py -@@ -14,6 +14,7 @@ import gym as openai_gym - import gymnasium as gym - import numpy as np - import procgen # noqa: F401 -+from procgen import ProcgenEnv - import torch - import torch.nn as nn - import torch.optim as optim -@@ -21,10 +22,10 @@ from shimmy.openai_gym_compatibility import GymV21CompatibilityV0 - from torch.utils.tensorboard import SummaryWriter - - from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum --from syllabus.curricula import DomainRandomization, LearningProgressCurriculum, CentralizedPrioritizedLevelReplay -+from syllabus.curricula import CentralizedPrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum - from syllabus.examples.models import ProcgenAgent - from syllabus.examples.task_wrappers import ProcgenTaskWrapper --from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize -+from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs - - - def parse_args(): -@@ -46,6 +47,8 @@ def parse_args(): - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="weather to capture videos of the agent performances (check out `videos` folder)") -+ parser.add_argument("--logging-dir", type=str, default=".", -+ help="the base directory for logging and wandb storage.") - - # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="starpilot", -@@ -124,15 +127,15 @@ PROCGEN_RETURN_BOUNDS = { - } - - --def make_env(env_id, seed, curriculum_components=None, start_level=0, num_levels=1): -+def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): - def thunk(): - env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) - env = GymV21CompatibilityV0(env=env) -- env = ProcgenTaskWrapper(env, env_id, seed=seed) -- if curriculum_components is not None: -+ if curriculum is not None: -+ env = ProcgenTaskWrapper(env, env_id, seed=seed) - env = MultiProcessingSyncWrapper( - env, -- curriculum_components, -+ curriculum.get_components(), - update_on_step=False, - task_space=env.task_space, - ) -@@ -147,36 +150,38 @@ def wrap_vecenv(vecenv): - return vecenv - - --def level_replay_evaluate( -+def full_level_replay_evaluate( - env_name, - policy, - num_episodes, - device, -- num_levels=0 -+ num_levels=1 # Not used - ): - policy.eval() -- eval_envs = gym.vector.SyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, task_queue, update_queue, num_levels=num_levels) -- for i in range(1) -- ] -+ -+ eval_envs = ProcgenEnv( -+ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=1, start_level=0, distribution_mode="easy", paint_vel_info=False - ) -+ eval_envs = VecExtractDictObs(eval_envs, "rgb") - eval_envs = wrap_vecenv(eval_envs) - -- eval_episode_rewards = [] -+ # Seed environments -+ seeds = [int.from_bytes(os.urandom(3), byteorder="little") for _ in range(num_episodes)] -+ for i, seed in enumerate(seeds): -+ eval_envs.seed(seed, i) -+ - eval_obs, _ = eval_envs.reset() -+ eval_episode_rewards = [-1] * num_episodes - -- while len(eval_episode_rewards) < num_episodes: -+ while -1 in eval_episode_rewards: - with torch.no_grad(): - eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) - -- eval_obs, _, truncs, terms, infos = eval_envs.step(np.array([eval_action.cpu().numpy()])) -- -- for info in infos: -- if 'episode' in info.keys(): -- eval_episode_rewards.append(info['episode']['r']) -+ eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) -+ for i, info in enumerate(infos): -+ if 'episode' in info.keys() and eval_episode_rewards[i] == -1: -+ eval_episode_rewards[i] = info['episode']['r'] - -- eval_envs.close() - mean_returns = np.mean(eval_episode_rewards) - stddev_returns = np.std(eval_episode_rewards) - env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] -@@ -185,8 +190,7 @@ def level_replay_evaluate( - return mean_returns, stddev_returns, normalized_mean_returns - - --def fast_level_replay_evaluate( -- eval_envs, -+def level_replay_evaluate( - env_name, - policy, - num_episodes, -@@ -194,9 +198,13 @@ def fast_level_replay_evaluate( - num_levels=0 - ): - policy.eval() -- possible_seeds = np.arange(0, num_levels + 1) -- eval_obs, _ = eval_envs.reset(seed=list(np.random.choice(possible_seeds, size=num_episodes))) - -+ eval_envs = ProcgenEnv( -+ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False -+ ) -+ eval_envs = VecExtractDictObs(eval_envs, "rgb") -+ eval_envs = wrap_vecenv(eval_envs) -+ eval_obs, _ = eval_envs.reset() - eval_episode_rewards = [-1] * num_episodes - - while -1 in eval_episode_rewards: -@@ -231,10 +239,11 @@ if __name__ == "__main__": - name=run_name, - monitor_gym=True, - save_code=True, -- # dir="/fs/nexus-scratch/rsulli/" -+ dir=args.logging_dir - ) -- wandb.run.log_code("./syllabus/examples") -- writer = SummaryWriter(f"./runs/{run_name}") -+ # wandb.run.log_code("./syllabus/examples") -+ -+ writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) - writer.add_text( - "hyperparameters", - "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), -@@ -250,7 +259,7 @@ if __name__ == "__main__": - print("Device:", device) - - # Curriculum setup -- task_queue = update_queue = None -+ curriculum = None - if args.curriculum: - sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") - sample_env = GymV21CompatibilityV0(env=sample_env) -@@ -273,6 +282,16 @@ if __name__ == "__main__": - elif args.curriculum_method == "lp": - print("Using learning progress.") - curriculum = LearningProgressCurriculum(sample_env.task_space) -+ elif args.curriculum_method == "sq": -+ print("Using sequential curriculum.") -+ curricula = [] -+ stopping = [] -+ for i in range(199): -+ curricula.append(i + 1) -+ stopping.append("steps>=50000") -+ curricula.append(list(range(i + 1))) -+ stopping.append("steps>=50000") -+ curriculum = SequentialCurriculum(curricula, stopping[:-1], sample_env.task_space) - else: - raise ValueError(f"Unknown curriculum method {args.curriculum_method}") - curriculum = make_multiprocessing_curriculum(curriculum) -@@ -285,7 +304,7 @@ if __name__ == "__main__": - make_env( - args.env_id, - args.seed + i, -- curriculum_components=curriculum.get_components() if args.curriculum else None, -+ curriculum=curriculum if args.curriculum else None, - num_levels=1 if args.curriculum else 0 - ) - for i in range(args.num_envs) -@@ -293,22 +312,6 @@ if __name__ == "__main__": - ) - envs = wrap_vecenv(envs) - -- test_eval_envs = gym.vector.AsyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, num_levels=0) -- for i in range(args.num_eval_episodes) -- ] -- ) -- test_eval_envs = wrap_vecenv(test_eval_envs) -- -- train_eval_envs = gym.vector.AsyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, num_levels=200) -- for i in range(args.num_eval_episodes) -- ] -- ) -- train_eval_envs = wrap_vecenv(train_eval_envs) -- - assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" - print("Creating agent") - agent = ProcgenAgent( -@@ -369,6 +372,8 @@ if __name__ == "__main__": - print(f"global_step={global_step}, episodic_return={item['episode']['r']}") - writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) -+ if curriculum is not None: -+ curriculum.log_metrics(writer, global_step) - break - - # Syllabus curriculum update -@@ -388,8 +393,6 @@ if __name__ == "__main__": - }, - } - curriculum.update(update) -- #if args.curriculum: -- # curriculum.log_metrics(writer, global_step) - - # bootstrap value if not done - with torch.no_grad(): -@@ -487,8 +490,18 @@ if __name__ == "__main__": - explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y - - # Evaluate agent -- mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = fast_level_replay_evaluate(test_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=0) -- mean_train_returns, stddev_train_returns, normalized_mean_train_returns = fast_level_replay_evaluate(train_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=200) -+ mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 -+ ) -+ full_mean_eval_returns, full_stddev_eval_returns, full_normalized_mean_eval_returns = full_level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 -+ ) -+ mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 -+ ) -+ full_mean_train_returns, full_stddev_train_returns, full_normalized_mean_train_returns = full_level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 -+ ) - - # TRY NOT TO MODIFY: record rewards for plotting purposes - writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) -@@ -502,12 +515,21 @@ if __name__ == "__main__": - writer.add_scalar("losses/explained_variance", explained_var, global_step) - print("SPS:", int(global_step / (time.time() - start_time))) - writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) -+ - writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) - writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) -- writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/stddev_eval_return", stddev_eval_returns, global_step) -+ writer.add_scalar("test_eval/full_mean_episode_return", full_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/full_normalized_mean_eval_return", full_normalized_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/full_stddev_eval_return", full_stddev_eval_returns, global_step) -+ - writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) - writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) -- writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) -+ writer.add_scalar("train_eval/stddev_train_return", stddev_train_returns, global_step) -+ writer.add_scalar("train_eval/full_mean_episode_return", full_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/full_normalized_mean_train_return", full_normalized_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/full_stddev_train_return", full_stddev_train_returns, global_step) -+ - writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) - - envs.close() -diff --git a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -index e13c22e..70a27f6 100644 ---- a/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -+++ b/syllabus/examples/training_scripts/cleanrl_procgen_plr.py -@@ -14,6 +14,7 @@ import gym as openai_gym - import gymnasium as gym - import numpy as np - import procgen # noqa: F401 -+from procgen import ProcgenEnv - import torch - import torch.nn as nn - import torch.optim as optim -@@ -23,8 +24,8 @@ from torch.utils.tensorboard import SummaryWriter - from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum - from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum - from syllabus.examples.models import ProcgenAgent --from syllabus.examples.task_wrappers import ProcgenTaskWrapper --from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize -+from syllabus.examples.task_wrappers import ProcgenTaskWrapper, MinigridTaskWrapper -+from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs - - - def parse_args(): -@@ -126,18 +127,17 @@ PROCGEN_RETURN_BOUNDS = { - } - - --def make_env(env_id, seed, curriculum_components=None, start_level=0, num_levels=1): -+def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): - def thunk(): - env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) - env = GymV21CompatibilityV0(env=env) -- env = ProcgenTaskWrapper(env, env_id, seed=seed) -- if curriculum_components is not None: -+ if curriculum is not None: -+ env = ProcgenTaskWrapper(env, env_id, seed=seed) - env = MultiProcessingSyncWrapper( - env, -- curriculum_components, -+ curriculum.get_components(), - update_on_step=False, - task_space=env.task_space, -- buffer_size=4, - ) - return env - return thunk -@@ -150,7 +150,7 @@ def wrap_vecenv(vecenv): - return vecenv - - --def level_replay_evaluate( -+def slow_level_replay_evaluate( - env_name, - policy, - num_episodes, -@@ -158,28 +158,24 @@ def level_replay_evaluate( - num_levels=0 - ): - policy.eval() -- eval_envs = gym.vector.SyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, task_queue, update_queue, num_levels=num_levels) -- for i in range(1) -- ] -+ -+ eval_envs = ProcgenEnv( -+ num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False - ) -+ eval_envs = VecExtractDictObs(eval_envs, "rgb") - eval_envs = wrap_vecenv(eval_envs) -- -- eval_episode_rewards = [] - eval_obs, _ = eval_envs.reset() -+ eval_episode_rewards = [] - - while len(eval_episode_rewards) < num_episodes: - with torch.no_grad(): - eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) - -- eval_obs, _, truncs, terms, infos = eval_envs.step(np.array([eval_action.cpu().numpy()])) -- -- for info in infos: -+ eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) -+ for i, info in enumerate(infos): - if 'episode' in info.keys(): - eval_episode_rewards.append(info['episode']['r']) - -- eval_envs.close() - mean_returns = np.mean(eval_episode_rewards) - stddev_returns = np.std(eval_episode_rewards) - env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] -@@ -188,8 +184,7 @@ def level_replay_evaluate( - return mean_returns, stddev_returns, normalized_mean_returns - - --def fast_level_replay_evaluate( -- eval_envs, -+def level_replay_evaluate( - env_name, - policy, - num_episodes, -@@ -198,15 +193,12 @@ def fast_level_replay_evaluate( - ): - policy.eval() - -- # Choose evaluation seeds -- if num_levels == 0: -- seeds = np.random.randint(0, 2 ** 16 - 1, size=num_episodes) -- else: -- seeds = np.random.choice(np.arange(0, num_levels), size=num_episodes) -- -- seed_envs = [(int(seed), env) for seed, env in zip(seeds, range(num_episodes))] -- eval_obs, _ = eval_envs.reset(seed=seed_envs) -- -+ eval_envs = ProcgenEnv( -+ num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False -+ ) -+ eval_envs = VecExtractDictObs(eval_envs, "rgb") -+ eval_envs = wrap_vecenv(eval_envs) -+ eval_obs, _ = eval_envs.reset() - eval_episode_rewards = [-1] * num_episodes - - while -1 in eval_episode_rewards: -@@ -251,9 +243,9 @@ if __name__ == "__main__": - save_code=True, - dir=args.logging_dir - ) -- wandb.run.log_code(os.path.join(args.logging_dir, "/syllabus/examples")) -+ # wandb.run.log_code("./syllabus/examples") - -- writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) -+ writer = SummaryWriter(os.path.join(args.logging_dir, f"./runs/{run_name}")) - writer.add_text( - "hyperparameters", - "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), -@@ -273,7 +265,9 @@ if __name__ == "__main__": - if args.curriculum: - sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") - sample_env = GymV21CompatibilityV0(env=sample_env) -- sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) -+ # code to edit -+ # sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) -+ sample_env = MinigridTaskWrapper(sample_env, args.env_id, seed=args.seed) - - # Intialize Curriculum Method - if args.curriculum_method == "plr": -@@ -316,7 +310,7 @@ if __name__ == "__main__": - make_env( - args.env_id, - args.seed + i, -- curriculum_components=curriculum.get_components() if args.curriculum else None, -+ curriculum=curriculum if args.curriculum else None, - num_levels=1 if args.curriculum else 0 - ) - for i in range(args.num_envs) -@@ -324,22 +318,6 @@ if __name__ == "__main__": - ) - envs = wrap_vecenv(envs) - -- test_eval_envs = gym.vector.AsyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, num_levels=0) -- for i in range(args.num_eval_episodes) -- ] -- ) -- test_eval_envs = wrap_vecenv(test_eval_envs) -- -- train_eval_envs = gym.vector.AsyncVectorEnv( -- [ -- make_env(args.env_id, args.seed + i, num_levels=200) -- for i in range(args.num_eval_episodes) -- ] -- ) -- train_eval_envs = wrap_vecenv(train_eval_envs) -- - assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" - print("Creating agent") - agent = ProcgenAgent( -@@ -500,8 +478,18 @@ if __name__ == "__main__": - explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y - - # Evaluate agent -- mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = fast_level_replay_evaluate(test_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=0) -- mean_train_returns, stddev_train_returns, normalized_mean_train_returns = fast_level_replay_evaluate(train_eval_envs, args.env_id, agent, args.num_eval_episodes, device, num_levels=200) -+ mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 -+ ) -+ slow_mean_eval_returns, slow_stddev_eval_returns, slow_normalized_mean_eval_returns = slow_level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=0 -+ ) -+ mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 -+ ) -+ slow_mean_train_returns, slow_stddev_train_returns, slow_normalized_mean_train_returns = level_replay_evaluate( -+ args.env_id, agent, args.num_eval_episodes, device, num_levels=200 -+ ) - - # TRY NOT TO MODIFY: record rewards for plotting purposes - writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) -@@ -515,12 +503,21 @@ if __name__ == "__main__": - writer.add_scalar("losses/explained_variance", explained_var, global_step) - print("SPS:", int(global_step / (time.time() - start_time))) - writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) -+ - writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) - writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) - writer.add_scalar("test_eval/stddev_eval_return", mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_mean_episode_return", slow_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_normalized_mean_eval_return", slow_normalized_mean_eval_returns, global_step) -+ writer.add_scalar("test_eval/slow_stddev_eval_return", slow_mean_eval_returns, global_step) -+ - writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) - writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) - writer.add_scalar("train_eval/stddev_train_return", mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_mean_episode_return", slow_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_normalized_mean_train_return", slow_normalized_mean_train_returns, global_step) -+ writer.add_scalar("train_eval/slow_stddev_train_return", slow_mean_train_returns, global_step) -+ - writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) - - envs.close() -diff --git a/syllabus/examples/utils/vecenv.py b/syllabus/examples/utils/vecenv.py -index 6e5a0a9..af3b187 100644 ---- a/syllabus/examples/utils/vecenv.py -+++ b/syllabus/examples/utils/vecenv.py -@@ -1,7 +1,6 @@ - import time - from collections import deque - --import gym - import numpy as np - - -@@ -154,12 +153,20 @@ class VecEnvObservationWrapper(VecEnvWrapper): - pass - - def reset(self): -- obs, infos = self.venv.reset() -+ outputs = self.venv.reset() -+ if len(outputs) == 2: -+ obs, infos = outputs -+ else: -+ obs, infos = outputs, {} - return self.process(obs), infos - - def step_wait(self): -- print(self.venv) -- obs, rews, terms, truncs, infos = self.venv.step_wait() -+ env_outputs = self.venv.step_wait() -+ if len(env_outputs) == 4: -+ obs, rews, terms, infos = env_outputs -+ truncs = np.zeros_like(terms) -+ else: -+ obs, rews, terms, truncs, infos = env_outputs - return self.process(obs), rews, terms, truncs, infos - - -@@ -209,7 +216,10 @@ class VecNormalize(VecEnvWrapper): - - def reset(self, seed=None): - self.ret = np.zeros(self.num_envs) -- obs, infos = self.venv.reset(seed=seed) -+ if seed is not None: -+ obs, infos = self.venv.reset(seed=seed) -+ else: -+ obs, infos = self.venv.reset() - return self._obfilt(obs), infos - - -@@ -228,7 +238,10 @@ class VecMonitor(VecEnvWrapper): - self.eplen_buf = deque([], maxlen=keep_buf) - - def reset(self, seed=None): -- obs, infos = self.venv.reset(seed=seed) -+ if seed is not None: -+ obs, infos = self.venv.reset(seed=seed) -+ else: -+ obs, infos = self.venv.reset() - self.eprets = np.zeros(self.num_envs, 'f') - self.eplens = np.zeros(self.num_envs, 'i') - return obs, infos -@@ -239,7 +252,8 @@ class VecMonitor(VecEnvWrapper): - self.eprets += rews - self.eplens += 1 - # Convert dict of lists to list of dicts -- infos = [dict(zip(infos, t)) for t in zip(*infos.values())] -+ if isinstance(infos, dict): -+ infos = [dict(zip(infos, t)) for t in zip(*infos.values())] - newinfos = list(infos[:]) - for i in range(len(dones)): - if dones[i]: -diff --git a/syllabus/task_space/task_space.py b/syllabus/task_space/task_space.py -index 316e2f2..1ef674b 100644 ---- a/syllabus/task_space/task_space.py -+++ b/syllabus/task_space/task_space.py -@@ -7,20 +7,53 @@ from gymnasium.spaces import Box, Dict, Discrete, MultiBinary, MultiDiscrete, Sp - - class TaskSpace(): - def __init__(self, gym_space: Union[Space, int], tasks=None): -- if isinstance(gym_space, int): -- # Syntactic sugar for discrete space -- gym_space = Discrete(gym_space) -+ -+ if not isinstance(gym_space, Space): -+ gym_space = self._create_gym_space(gym_space) - - self.gym_space = gym_space - -- # Autogenerate task names for discrete spaces -- if isinstance(gym_space, Discrete): -- if tasks is None: -- tasks = range(gym_space.n) -+ # Autogenerate task names -+ if tasks is None: -+ tasks = self._generate_task_names(gym_space) - - self._tasks = set(tasks) if tasks is not None else None - self._encoder, self._decoder = self._make_task_encoder(gym_space, tasks) - -+ def _create_gym_space(self, gym_space): -+ if isinstance(gym_space, int): -+ # Syntactic sugar for discrete space -+ gym_space = Discrete(gym_space) -+ elif isinstance(gym_space, tuple): -+ # Syntactic sugar for discrete space -+ gym_space = MultiDiscrete(gym_space) -+ elif isinstance(gym_space, list): -+ # Syntactic sugar for tuple space -+ spaces = [] -+ for i, value in enumerate(gym_space): -+ spaces[i] = self._create_gym_space(value) -+ gym_space = Tuple(spaces) -+ elif isinstance(gym_space, dict): -+ # Syntactic sugar for dict space -+ spaces = {} -+ for key, value in gym_space.items(): -+ spaces[key] = self._create_gym_space(value) -+ gym_space = Dict(spaces) -+ return gym_space -+ -+ def _generate_task_names(self, gym_space): -+ if isinstance(gym_space, Discrete): -+ tasks = tuple(range(gym_space.n)) -+ elif isinstance(gym_space, MultiDiscrete): -+ tasks = [tuple(range(dim)) for dim in gym_space.nvec] -+ elif isinstance(gym_space, Tuple): -+ tasks = [self._generate_task_names(value) for value in gym_space.spaces] -+ elif isinstance(gym_space, Dict): -+ tasks = {key: tuple(self._generate_task_names(value)) for key, value in gym_space.spaces.items()} -+ else: -+ tasks = None -+ return tasks -+ - def _make_task_encoder(self, space, tasks): - if isinstance(space, Discrete): - assert space.n == len(tasks), f"Number of tasks ({space.n}) must match number of discrete options ({len(tasks)})" -@@ -28,14 +61,46 @@ class TaskSpace(): - self._decode_map = {i: task for i, task in enumerate(tasks)} - encoder = lambda task: self._encode_map[task] if task in self._encode_map else None - decoder = lambda task: self._decode_map[task] if task in self._decode_map else None -+ -+ elif isinstance(space, Box): -+ encoder = lambda task: task if space.contains(np.asarray(task, dtype=space.dtype)) else None -+ decoder = lambda task: task if space.contains(np.asarray(task, dtype=space.dtype)) else None - elif isinstance(space, Tuple): -- for i, task in enumerate(tasks): -- assert self.count_tasks(space.spaces[i]) == len(task), "Each task must have number of components equal to Tuple space length. Got {len(task)} components and space length {self.count_tasks(space.spaces[i])}." -+ -+ assert len(space.spaces) == len(tasks), f"Number of task ({len(space.spaces)})must match options in Tuple ({len(tasks)})" - results = [list(self._make_task_encoder(s, t)) for (s, t) in zip(space.spaces, tasks)] - encoders = [r[0] for r in results] - decoders = [r[1] for r in results] - encoder = lambda task: [e(t) for e, t in zip(encoders, task)] - decoder = lambda task: [d(t) for d, t in zip(decoders, task)] -+ -+ elif isinstance(space, MultiDiscrete): -+ assert len(space.nvec) == len(tasks), f"Number of steps in a tasks ({len(space.nvec)}) must match number of discrete options ({len(tasks)})" -+ -+ combinations = [p for p in itertools.product(*tasks)] -+ encode_map = {task: i for i, task in enumerate(combinations)} -+ decode_map = {i: task for i, task in enumerate(combinations)} -+ -+ encoder = lambda task: encode_map[task] if task in encode_map else None -+ decoder = lambda task: decode_map[task] if task in decode_map else None -+ -+ elif isinstance(space, Dict): -+ -+ def helper(task, spaces, tasks, action="encode"): -+ # Iteratively encodes or decodes each space in the dictionary -+ output = {} -+ if (isinstance(spaces, dict) or isinstance(spaces, Dict)): -+ for key, value in spaces.items(): -+ if (isinstance(value, dict) or isinstance(value, Dict)): -+ temp = helper(task[key], value, tasks[key], action) -+ output.update({key: temp}) -+ else: -+ encoder, decoder = self._make_task_encoder(value, tasks[key]) -+ output[key] = encoder(task[key]) if action == "encode" else decoder(task[key]) -+ return output -+ -+ encoder = lambda task: helper(task, space.spaces, tasks, "encode") -+ decoder = lambda task: helper(task, space.spaces, tasks, "decode") - else: - encoder = lambda task: task - decoder = lambda task: task -@@ -152,6 +217,7 @@ class TaskSpace(): - return Discrete(self.gym_space.n + amount) - - def sample(self): -+ assert isinstance(self.gym_space, Discrete) or isinstance(self.gym_space, Box) or isinstance(self.gym_space, Dict) or isinstance(self.gym_space, Tuple) - return self.decode(self.gym_space.sample()) - - def list_tasks(self): -diff --git a/syllabus/task_space/test_task_space.py b/syllabus/task_space/test_task_space.py -index 0ec6b4e..109d0a7 100644 ---- a/syllabus/task_space/test_task_space.py -+++ b/syllabus/task_space/test_task_space.py -@@ -2,33 +2,148 @@ import gymnasium as gym - from syllabus.task_space import TaskSpace - - if __name__ == "__main__": -+ # Discrete Tests - task_space = TaskSpace(gym.spaces.Discrete(3), ["a", "b", "c"]) -+ - assert task_space.encode("a") == 0, f"Expected 0, got {task_space.encode('a')}" - assert task_space.encode("b") == 1, f"Expected 1, got {task_space.encode('b')}" - assert task_space.encode("c") == 2, f"Expected 2, got {task_space.encode('c')}" -- assert task_space.encode("d") == None, f"Expected None, got {task_space.encode('d')}" -+ assert task_space.encode("d") is None, f"Expected None, got {task_space.encode('d')}" - - assert task_space.decode(0) == "a", f"Expected a, got {task_space.decode(0)}" - assert task_space.decode(1) == "b", f"Expected b, got {task_space.decode(1)}" - assert task_space.decode(2) == "c", f"Expected c, got {task_space.decode(2)}" -- assert task_space.decode(3) == None, f"Expected None, got {task_space.decode(3)}" -+ assert task_space.decode(3) is None, f"Expected None, got {task_space.decode(3)}" - print("Discrete tests passed!") - -+ # MultiDiscrete Tests -+ task_space = TaskSpace(gym.spaces.MultiDiscrete([3, 2]), [("a", "b", "c"), (1, 0)]) -+ -+ assert task_space.encode(('a', 1)) == 0, f"Expected 0, got {task_space.encode(('a', 1))}" -+ assert task_space.encode(('b', 0)) == 3, f"Expected 3, got {task_space.encode(('b', 0))}" -+ assert task_space.encode(('c', 1)) == 4, f"Expected 4, got {task_space.encode(('c', 1))}" -+ -+ assert task_space.decode(3) == ('b', 0), f"Expected ('b', 0), got {task_space.decode(3)}" -+ assert task_space.decode(5) == ('c', 0), f"Expected ('c', 0), got {task_space.decode(5)}" -+ print("MultiDiscrete tests passed!") -+ -+ # Box Tests - task_space = TaskSpace(gym.spaces.Box(low=0, high=1, shape=(2,)), [(0, 0), (0, 1), (1, 0), (1, 1)]) -+ - assert task_space.encode([0.0, 0.0]) == [0.0, 0.0], f"Expected [0.0, 0.0], got {task_space.encode([0.0, 0.0])}" - assert task_space.encode([0.0, 0.1]) == [0.0, 0.1], f"Expected [0.0, 0.1], got {task_space.encode([0.0, 0.1])}" - assert task_space.encode([0.1, 0.1]) == [0.1, 0.1], f"Expected [0.1, 0.1], got {task_space.encode([0.1, 0.1])}" - assert task_space.encode([1.0, 0.1]) == [1.0, 0.1], f"Expected [1.0, 0.1], got {task_space.encode([1.0, 0.1])}" - assert task_space.encode([1.0, 1.0]) == [1.0, 1.0], f"Expected [1.0, 1.0], got {task_space.encode([1.0, 1.0])}" -- assert task_space.encode([1.2, 1.0]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" -- assert task_space.encode([1.0, 1.2]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" -- assert task_space.encode([-0.1, 1.0]) == None, f"Expected None, got {task_space.encode([1.2, 1.0])}" -+ assert task_space.encode([1.2, 1.0]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" -+ assert task_space.encode([1.0, 1.2]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" -+ assert task_space.encode([-0.1, 1.0]) is None, f"Expected None, got {task_space.encode([1.2, 1.0])}" - - assert task_space.decode([1.0, 1.0]) == [1.0, 1.0], f"Expected [1.0, 1.0], got {task_space.decode([1.0, 1.0])}" - assert task_space.decode([0.1, 0.1]) == [0.1, 0.1], f"Expected [0.1, 0.1], got {task_space.decode([0.1, 0.1])}" -- assert task_space.decode([-0.1, 1.0]) == None, f"Expected None, got {task_space.decode([1.2, 1.0])}" -+ assert task_space.decode([-0.1, 1.0]) is None, f"Expected None, got {task_space.decode([1.2, 1.0])}" - print("Box tests passed!") - -+ # Tuple Tests -+ task_spaces = (gym.spaces.MultiDiscrete([3, 2]), gym.spaces.Discrete(3)) -+ task_names = ((("a", "b", "c"), (1, 0)), ("X", "Y", "Z")) -+ task_space = TaskSpace(gym.spaces.Tuple(task_spaces), task_names) -+ -+ assert task_space.encode((('a', 0), 'Y')) == [1, 1], f"Expected 0, got {task_space.encode((('a', 1),'Y'))}" -+ assert task_space.decode([0, 1]) == [('a', 1), 'Y'], f"Expected 0, got {task_space.decode([0, 1])}" -+ print("Tuple tests passed!") -+ -+ # Dictionary Tests -+ task_spaces = gym.spaces.Dict({ -+ "ext_controller": gym.spaces.MultiDiscrete([5, 2, 2]), -+ "inner_state": gym.spaces.Dict( -+ { -+ "charge": gym.spaces.Discrete(10), -+ "system_checks": gym.spaces.Tuple((gym.spaces.MultiDiscrete([3, 2]), gym.spaces.Discrete(3))), -+ "job_status": gym.spaces.Dict( -+ { -+ "task": gym.spaces.Discrete(5), -+ "progress": gym.spaces.Box(low=0, high=1, shape=(2,)), -+ } -+ ), -+ } -+ ), -+ }) -+ task_names = { -+ "ext_controller": [("a", "b", "c", "d", "e"), (1, 0), ("X", "Y")], -+ "inner_state": { -+ "charge": [0, 1, 13, 3, 94, 35, 6, 37, 8, 9], -+ "system_checks": ((("a", "b", "c"), (1, 0)), ("X", "Y", "Z")), -+ "job_status": { -+ "task": ["A", "B", "C", "D", "E"], -+ "progress": [(0, 0), (0, 1), (1, 0), (1, 1)], -+ } -+ } -+ } -+ task_space = TaskSpace(task_spaces, task_names) -+ -+ test_val = { -+ "ext_controller": ('b', 1, 'X'), -+ 'inner_state': { -+ 'charge': 1, -+ 'system_checks': [('a', 0), 'Y'], -+ 'job_status': {'task': 'C', 'progress': [0.0, 0.0]} -+ } -+ } -+ decode_val = { -+ "ext_controller": 4, -+ "inner_state": { -+ "charge": 1, -+ "system_checks": [1, 1], -+ "job_status": {"progress": [0.0, 0.0], "task": 2}, -+ }, -+ } -+ -+ assert task_space.encode(test_val) == decode_val, f"Expected {decode_val}, \n but got {task_space.encode(test_val)}" -+ assert task_space.decode(decode_val) == test_val, f"Expected {test_val}, \n but got {task_space.decode(decode_val)}" -+ -+ test_val_2 = { -+ "ext_controller": ("e", 1, "Y"), -+ "inner_state": { -+ "charge": 37, -+ "system_checks": [("b", 0), "Z"], -+ "job_status": {"progress": [0.0, 0.1], "task": "D"}, -+ }, -+ } -+ decode_val_2 = { -+ "ext_controller": 17, -+ "inner_state": { -+ "charge": 7, -+ "system_checks": [3, 2], -+ "job_status": {"progress": [0.0, 0.1], "task": 3}, -+ }, -+ } -+ -+ assert task_space.encode(test_val_2) == decode_val_2, f"Expected {decode_val_2}, \n but got {task_space.encode(test_val_2)}" -+ assert task_space.decode(decode_val_2) == test_val_2, f"Expected {test_val_2}, \n but got {task_space.decode(decode_val_2)}" -+ -+ test_val_3 = { -+ "ext_controller": ("e", 1, "X"), -+ "inner_state": { -+ "charge": 8, -+ "system_checks": [("c", 0), "X"], -+ "job_status": {"progress": [0.5, 0.1], "task": "E"}, -+ }, -+ } -+ decode_val_3 = { -+ "ext_controller": 16, -+ "inner_state": { -+ "charge": 8, -+ "system_checks": [5, 0], -+ "job_status": {"progress": [0.5, 0.1], "task": 4}, -+ }, -+ } -+ -+ assert task_space.encode(test_val_3) == decode_val_3, f"Expected {decode_val_3}, \n but got {task_space.encode(test_val_3)}" -+ assert task_space.decode(decode_val_3) == test_val_3, f"Expected {test_val_3}, \n but got {task_space.decode(decode_val_3)}" -+ -+ print("Dictionary tests passed!") -+ - # Test syntactic sugar - task_space = TaskSpace(3) - assert task_space.encode(0) == 0, f"Expected 0, got {task_space.encode(0)}" -@@ -36,4 +151,32 @@ if __name__ == "__main__": - assert task_space.encode(2) == 2, f"Expected 2, got {task_space.encode(2)}" - assert task_space.encode(3) is None, f"Expected None, got {task_space.encode(3)}" - -+ task_space = TaskSpace((2, 4)) -+ assert task_space.encode((0, 0)) == 0, f"Expected 0, got {task_space.encode((0, 0))}" -+ assert task_space.encode((0, 1)) == 1, f"Expected 1, got {task_space.encode((0, 1))}" -+ assert task_space.encode((1, 0)) == 4, f"Expected 2, got {task_space.encode((1, 0))}" -+ assert task_space.encode((3, 3)) is None, f"Expected None, got {task_space.encode((3, 3))}" -+ -+ task_space = TaskSpace((2, 4)) -+ assert task_space.encode((0, 0)) == 0, f"Expected 0, got {task_space.encode((0, 0))}" -+ assert task_space.encode((0, 1)) == 1, f"Expected 1, got {task_space.encode((0, 1))}" -+ assert task_space.encode((1, 0)) == 4, f"Expected 2, got {task_space.encode((1, 0))}" -+ assert task_space.encode((3, 3)) is None, f"Expected None, got {task_space.encode((3, 3))}" -+ -+ task_space = TaskSpace({"map": 5, "level": (4, 10), "difficulty": 3}) -+ -+ encoding = task_space.encode({"map": 0, "level": (0, 0), "difficulty": 0}) -+ expected = {"map": 0, "level": 0, "difficulty": 0} -+ -+ encoding = task_space.encode({"map": 4, "level": (3, 9), "difficulty": 2}) -+ expected = {"map": 4, "level": 39, "difficulty": 2} -+ assert encoding == expected, f"Expected {expected}, got {encoding}" -+ -+ encoding = task_space.encode({"map": 2, "level": (2, 0), "difficulty": 1}) -+ expected = {"map": 2, "level": 20, "difficulty": 1} -+ assert encoding == expected, f"Expected {expected}, got {encoding}" -+ -+ encoding = task_space.encode({"map": 5, "level": (2, 11), "difficulty": -1}) -+ expected = {"map": None, "level": None, "difficulty": None} -+ assert encoding == expected, f"Expected {expected}, got {encoding}" - print("All tests passed!") -diff --git a/syllabus/tests/utils.py b/syllabus/tests/utils.py -index 314a29c..98bac82 100644 ---- a/syllabus/tests/utils.py -+++ b/syllabus/tests/utils.py -@@ -57,7 +57,7 @@ def run_episode(env, new_task=None, curriculum=None, env_id=0): - action = env.action_space.sample() - obs, rew, term, trunc, info = env.step(action) - if curriculum and curriculum.requires_step_updates: -- curriculum.update_on_step(obs, rew, term, trunc, info, env_id=env_id) -+ curriculum.update_on_step(env.task_space.encode(env.task), obs, rew, term, trunc, info, env_id=env_id) - curriculum.update_task_progress(env.task_space.encode(env.task), info["task_completion"], env_id=env_id) - ep_rew += rew - ep_len += 1 -@@ -87,7 +87,7 @@ def run_set_length(env, curriculum=None, episodes=None, steps=None, env_id=0, en - action = env.action_space.sample() - obs, rew, term, trunc, info = env.step(action) - if curriculum and curriculum.requires_step_updates: -- curriculum.update_on_step(obs, rew, term, trunc, info, env_id=env_id) -+ curriculum.update_on_step(env.task_space.encode(env.task), obs, rew, term, trunc, info, env_id=env_id) - curriculum.update_task_progress(env.task_space.encode(env.task), info["task_completion"], env_id=env_id) - ep_rew += rew - ep_len += 1 -diff --git a/tests/multiprocessing_smoke_tests.py b/tests/multiprocessing_smoke_tests.py -index 9db9f47..b788179 100644 ---- a/tests/multiprocessing_smoke_tests.py -+++ b/tests/multiprocessing_smoke_tests.py -@@ -21,23 +21,23 @@ nethack_env = create_nethack_env() - cartpole_env = create_cartpole_env() - - curricula = [ -- (NoopCurriculum, create_nethack_env, (NetHackScore, nethack_env.task_space), {}), -- (DomainRandomization, create_nethack_env, (nethack_env.task_space,), {}), -- # (LearningProgressCurriculum, create_nethack_env, (nethack_env.task_space,), {}), -- (CentralizedPrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space,), {"device": "cpu", "suppress_usage_warnings": True, "num_processes": N_ENVS}), -- (PrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space, nethack_env.observation_space), { -- "get_value": get_test_values, -- "device": "cpu", -- "num_processes": N_ENVS, -- "num_steps": 2048 -- }), -- (SimpleBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), {}), -- (AnnealingBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), { -- 'start_values': [-0.02, 0.02], -- 'end_values': [-0.3, 0.3], -- 'total_steps': [10] -- }), -- (SequentialCurriculum, create_nethack_env, ([CentralizedPrioritizedLevelReplay(nethack_env.task_space, device="cpu", suppress_usage_warnings=True, num_processes=N_ENVS), PrioritizedLevelReplay(nethack_env.task_space, nethack_env.observation_space, get_value=get_test_values, device="cpu", num_processes=N_ENVS, num_steps=2048), NetHackScore, [NetHackScout, NetHackStaircase]], ["steps>1000", "episodes>=50", "tasks>20"], nethack_env.task_space), {}), -+ (NoopCurriculum, create_nethack_env, (NetHackScore, nethack_env.task_space), {}), -+ (DomainRandomization, create_nethack_env, (nethack_env.task_space,), {}), -+ # (LearningProgressCurriculum, create_nethack_env, (nethack_env.task_space,), {}), -+ (CentralizedPrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space,), {"device": "cpu", "suppress_usage_warnings": True, "num_processes": N_ENVS}), -+ (PrioritizedLevelReplay, create_nethack_env, (nethack_env.task_space, nethack_env.observation_space), { -+ "get_value": get_test_values, -+ "device": "cpu", -+ "num_processes": N_ENVS, -+ "num_steps": 2048 -+ }), -+ (SimpleBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), {}), -+ (AnnealingBoxCurriculum, create_cartpole_env, (cartpole_env.task_space,), { -+ 'start_values': [-0.02, 0.02], -+ 'end_values': [-0.3, 0.3], -+ 'total_steps': [10] -+ }), -+ (SequentialCurriculum, create_nethack_env, ([CentralizedPrioritizedLevelReplay(nethack_env.task_space, device="cpu", suppress_usage_warnings=True, num_processes=N_ENVS), PrioritizedLevelReplay(nethack_env.task_space, nethack_env.observation_space, get_value=get_test_values, device="cpu", num_processes=N_ENVS, num_steps=2048), NetHackScore, [NetHackScout, NetHackStaircase]], ["steps>1000", "episodes>=50", "tasks>20"], nethack_env.task_space), {}), - ] - - test_names = [curriculum_args[0].__name__ for curriculum_args in curricula] diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/wandb-metadata.json b/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/wandb-metadata.json deleted file mode 100644 index 40a4901d..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/wandb-metadata.json +++ /dev/null @@ -1,167 +0,0 @@ -{ - "os": "Linux-3.10.0-1160.11.1.el7.x86_64-x86_64-with-glibc2.10", - "python": "3.8.5", - "heartbeatAt": "2024-04-23T04:14:00.420002", - "startedAt": "2024-04-23T04:13:59.807444", - "docker": null, - "cuda": "10.1.243", - "args": [ - "--curriculum", - "True", - "--track", - "True", - "--env-id", - "bigfish" - ], - "state": "running", - "program": "cleanrl_procgen_plr.py", - "codePathLocal": "cleanrl_procgen_plr.py", - "codePath": "syllabus/examples/training_scripts/cleanrl_procgen_plr.py", - "git": { - "remote": "https://github.com/RoseyGreenBlue/Syllabus.git", - "commit": "63dc8f62e4d9d567eb92bb2f6c2bb186a0dc8ffb" - }, - "email": "djhaayusv04@gmail.com", - "root": "/data/averma/MARL/Syllabus", - "host": "f411843fc70b", - "username": "root", - "executable": "/home/user/miniconda/envs/test2_py/bin/python", - "cpu_count": 12, - "cpu_count_logical": 24, - "cpu_freq": { - "current": 1261.035125, - "min": 1200.0, - "max": 3700.0 - }, - "cpu_freq_per_core": [ - { - "current": 1500.573, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1204.028, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1199.877, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1385.607, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1783.215, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1370.458, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.085, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.292, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1216.064, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.292, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.5, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.292, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1441.638, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1207.141, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1397.229, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1199.877, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1499.951, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1204.858, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.292, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1202.575, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1200.085, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1244.079, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1243.872, - "min": 1200.0, - "max": 3700.0 - }, - { - "current": 1210.253, - "min": 1200.0, - "max": 3700.0 - } - ], - "disk": { - "/": { - "total": 5952.626953125, - "used": 988.7802200317383 - } - }, - "memory": { - "total": 251.63711166381836 - } -} diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/wandb-summary.json b/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/wandb-summary.json deleted file mode 100644 index 4ac1ba99..00000000 --- a/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/files/wandb-summary.json +++ /dev/null @@ -1 +0,0 @@ -{"_wandb": {"runtime": 3}} \ No newline at end of file diff --git a/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/run-4m0uhqaw.wandb b/syllabus/examples/training_scripts/wandb/run-20240423_041359-4m0uhqaw/run-4m0uhqaw.wandb deleted file mode 100644 index 574c4e238df8ee3afeed73d09d49399c09331206..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2455 zcmbVOON$&;6z)6yNX<>pGv;AMAr>fuA-C(@)k6q6o2FK(52|9>gwrCJ0RE$JzeL!=k=X)?y2q9 zpWFHQ>&Cl{n=keoNI(-Li5E=YGhN3pOqn|Rpl@1^=Xk!R4F&_|yRvs5HNNd)Ea;J3 zjNYKPT0i~yecD38(gex>tu1vmYd9Jjw?prCYTZNkg}EhB$PdDhIz=u$=wJy8Y(KSQ z8nFpFEx941aTeNXmbgO}+i93d7~@vXm^VR4!B62+JjvXV?J>@>D2Ric2kr#5cvdjH zf?ICk(b?lFZu5+~d$Z?f@a8QVdroP7m$|%li>1Se@M+wg9$_&L_yK6fMHu3>0&?0ifii;*2G|a1uh!dZDh=0lmF1Ua9%DE$^$ z7Pv)NfbP`qdAu#=I}v3MseSnrj3&IugZ+%h%KwR}@$x;O9)v(KD? z>Nu)zy6{i2Oe&YS?;N>yKL5_opN31TIYH}ismO*x{XXp<1bhpM@WwHK-khTDn3svBvZT z3}*Pyn{gC_`96fqW1!k%|0o3GKDTHc#VU)n+TbWORTJ@M$%wK@Ng2ruJC2j{wcY|L zV6<qYQLrb%B?v2s6b6UXF(PZ)W5}qYKp3=}%KUo|-APz)e%1r>`zjqY`#$4DLZADQ){wzLQnTc4GWCw(fX#npBA$w^D_NS`Lnx>F}Dz4(r^aM Date: Fri, 31 May 2024 22:42:37 +0000 Subject: [PATCH 03/10] minigrid wrapper nearly done - see syllabus/examples/training_scripts/test_minigrid_wrapper.py --- .../minigrid_task_wrapper_verma.py | 45 +- syllabus/examples/training_scripts/core.10201 | 0 syllabus/examples/training_scripts/core.10374 | 0 .../training_scripts/test_minigrid_wrapper.py | 450 +++++++++++++++--- 4 files changed, 422 insertions(+), 73 deletions(-) create mode 100644 syllabus/examples/training_scripts/core.10201 create mode 100644 syllabus/examples/training_scripts/core.10374 diff --git a/syllabus/examples/task_wrappers/minigrid_task_wrapper_verma.py b/syllabus/examples/task_wrappers/minigrid_task_wrapper_verma.py index 3e36b8e7..cf440903 100644 --- a/syllabus/examples/task_wrappers/minigrid_task_wrapper_verma.py +++ b/syllabus/examples/task_wrappers/minigrid_task_wrapper_verma.py @@ -2,27 +2,50 @@ import numpy as np from syllabus.core import TaskWrapper from syllabus.task_space import TaskSpace -from minigrid.wrappers import RGBImgPartialObsWrapper, ImgObsWrapper - +from gym_minigrid.wrappers import FullyObsWrapper, ImgObsWrapper +from shimmy.openai_gym_compatibility import GymV21CompatibilityV0 +from gymnasium.spaces import Box class MinigridTaskWrapperVerma(TaskWrapper): def __init__(self, env: gym.Env, env_id, seed=0): super().__init__(env) + self.env.unwrapped.seed(seed) self.task_space = TaskSpace(gym.spaces.Discrete(200), list(np.arange(0, 200))) self.env_id = env_id self.task = seed self.episode_return = 0 - - env_fn = [partial(self._make_minigrid_env, env_name, seeds[i]) for i in range(num_envs)] + m, n, c = self.env.observation_space.shape + self.observation_space = Box( + self.observation_space.low[0, 0, 0], + self.observation_space.high[0, 0, 0], + [c, m, n], + dtype=self.observation_space.dtype) + + def observation(self, obs): + obs = obs.transpose(2, 0, 1) + return obs + + def reset(self, new_task=None, **kwargs): + self.episode_return = 0.0 + if new_task is not None: + self.change_task(new_task) + obs, info = self.env.reset(**kwargs) + return self.observation(obs), info - self.observation_space = self.env.observation_space + def change_task(self, new_task: int): + """ + Change task by directly editing environment class. - @staticmethod - def _make_minigrid_env(env_name, seed): + Ignores requests for unknown tasks or task changes outside of a reset. + """ + seed = int(new_task) + self.task = seed self.seed(seed) - env = FullyObsWrapper(env) - env = ImgObsWrapper(env) - return env def seed(self, seed): - self.env.gym_env.unwrapped._venv.seed(int(seed), 0) + self.env.unwrapped.seed(int(seed)) + + def step(self, action): + obs, rew, term, trunc, info = self.env.step(action) + self.episode_return += rew + return self.observation(obs), rew, term, trunc, info diff --git a/syllabus/examples/training_scripts/core.10201 b/syllabus/examples/training_scripts/core.10201 new file mode 100644 index 00000000..e69de29b diff --git a/syllabus/examples/training_scripts/core.10374 b/syllabus/examples/training_scripts/core.10374 new file mode 100644 index 00000000..e69de29b diff --git a/syllabus/examples/training_scripts/test_minigrid_wrapper.py b/syllabus/examples/training_scripts/test_minigrid_wrapper.py index 092d8848..fd9645c7 100644 --- a/syllabus/examples/training_scripts/test_minigrid_wrapper.py +++ b/syllabus/examples/training_scripts/test_minigrid_wrapper.py @@ -17,12 +17,17 @@ from torch.utils.tensorboard import SummaryWriter from syllabus.core import MultiProcessingSyncWrapper, make_multiprocessing_curriculum -from syllabus.curricula import PrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum -from syllabus.examples.models import ProcgenAgent -from syllabus.examples.task_wrappers import ProcgenTaskWrapper, MinigridTaskWrapper +from syllabus.curricula import CentralizedPrioritizedLevelReplay, DomainRandomization, LearningProgressCurriculum, SequentialCurriculum +from syllabus.examples.models import ProcgenAgent, MinigridAgent +from syllabus.examples.task_wrappers import ProcgenTaskWrapper from syllabus.examples.utils.vecenv import VecMonitor, VecNormalize, VecExtractDictObs + +from gym_minigrid.wrappers import FullyObsWrapper, ImgObsWrapper sys.path.append("/data/averma/MARL/Syllabus/syllabus/examples/task_wrappers") +sys.path.append("/data/averma/MARL/Syllabus/syllabus/examples/models") +from minigrid_model_verma import * from minigrid_task_wrapper_verma import * +import torch.nn as nn def parse_args(): @@ -124,6 +129,26 @@ def parse_args(): } +def make_env_minigrid(env_name, seed, curriculum=None): + def thunk(): + env = openai_gym.make(env_name) + if curriculum is not None: + env = FullyObsWrapper(env) + env = ImgObsWrapper(env) + env = GymV21CompatibilityV0(env=env) + env = MinigridTaskWrapperVerma(env=env, env_id=env_name, seed=seed) + env = MultiProcessingSyncWrapper( + env, + curriculum.get_components(), + update_on_step=False, + task_space=env.task_space, + ) + else: + env = GymV21CompatibilityV0(env=env) + return env + + return thunk + def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): def thunk(): env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) @@ -147,41 +172,40 @@ def wrap_vecenv(vecenv): return vecenv -def slow_level_replay_evaluate( - env_name, - policy, - num_episodes, - device, - num_levels=0 -): - policy.eval() - - eval_envs = ProcgenEnv( - num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False - ) - eval_envs = VecExtractDictObs(eval_envs, "rgb") - eval_envs = wrap_vecenv(eval_envs) - eval_obs, _ = eval_envs.reset() - eval_episode_rewards = [] - - while len(eval_episode_rewards) < num_episodes: - with torch.no_grad(): - eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) - - eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) - for i, info in enumerate(infos): - if 'episode' in info.keys(): - eval_episode_rewards.append(info['episode']['r']) - - mean_returns = np.mean(eval_episode_rewards) - stddev_returns = np.std(eval_episode_rewards) - env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] - normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) - policy.train() - return mean_returns, stddev_returns, normalized_mean_returns - - -def level_replay_evaluate( +# def slow_level_replay_evaluate( +# env_name, +# policy, +# num_episodes, +# device, +# num_levels=0 +# ): +# policy.eval() +# +# eval_envs = ProcgenEnv( +# num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False +# ) +# eval_envs = VecExtractDictObs(eval_envs, "rgb") +# eval_envs = wrap_vecenv(eval_envs) +# eval_obs, _ = eval_envs.reset() +# eval_episode_rewards = [] +# +# while len(eval_episode_rewards) < num_episodes: +# with torch.no_grad(): +# eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) +# +# eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) +# for i, info in enumerate(infos): +# if 'episode' in info.keys(): +# eval_episode_rewards.append(info['episode']['r']) +# +# mean_returns = np.mean(eval_episode_rewards) +# stddev_returns = np.std(eval_episode_rewards) +# env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] +# normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) +# policy.train() +# return mean_returns, stddev_returns, normalized_mean_returns + +def level_replay_evaluate_minidgrid( env_name, policy, num_episodes, @@ -189,20 +213,27 @@ def level_replay_evaluate( num_levels=0 ): policy.eval() - - eval_envs = ProcgenEnv( - num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False + eval_envs = gym.vector.AsyncVectorEnv( + [ + make_env_minigrid( + env_name, + args.seed + i, + curriculum=curriculum if args.curriculum else None + ) + for i in range(args.num_envs) + ] ) - eval_envs = VecExtractDictObs(eval_envs, "rgb") eval_envs = wrap_vecenv(eval_envs) eval_obs, _ = eval_envs.reset() eval_episode_rewards = [-1] * num_episodes while -1 in eval_episode_rewards: with torch.no_grad(): - eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) + eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device)) eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) + # len(infos) = 64 + # num_episodes = 10 for i, info in enumerate(infos): if 'episode' in info.keys() and eval_episode_rewards[i] == -1: eval_episode_rewards[i] = info['episode']['r'] @@ -210,12 +241,49 @@ def level_replay_evaluate( # print(eval_episode_rewards) mean_returns = np.mean(eval_episode_rewards) stddev_returns = np.std(eval_episode_rewards) - env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] + # env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] + env_min = 0 + env_max = 1 normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) policy.train() return mean_returns, stddev_returns, normalized_mean_returns +# def level_replay_evaluate( +# env_name, +# policy, +# num_episodes, +# device, +# num_levels=0 +# ): +# policy.eval() +# +# eval_envs = ProcgenEnv( +# num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False +# ) +# eval_envs = VecExtractDictObs(eval_envs, "rgb") +# eval_envs = wrap_vecenv(eval_envs) +# eval_obs, _ = eval_envs.reset() +# eval_episode_rewards = [-1] * num_episodes +# +# while -1 in eval_episode_rewards: +# with torch.no_grad(): +# eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) +# +# eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) +# for i, info in enumerate(infos): +# if 'episode' in info.keys() and eval_episode_rewards[i] == -1: +# eval_episode_rewards[i] = info['episode']['r'] +# +# # print(eval_episode_rewards) +# mean_returns = np.mean(eval_episode_rewards) +# stddev_returns = np.std(eval_episode_rewards) +# env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] +# normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) +# policy.train() +# return mean_returns, stddev_returns, normalized_mean_returns + + def make_value_fn(): def get_value(obs): obs = np.array(obs) @@ -231,6 +299,7 @@ def print_values(obj): if __name__ == "__main__": + args = parse_args() run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" random.seed(args.seed) @@ -246,22 +315,279 @@ def print_values(obj): if args.curriculum: print("args:\n--------------") print(f"{args}\n-------------\n") - sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") + + # sample_env = openai_gym.make(f"procgen-{args.env_id}-v0") + # sample_env = GymV21CompatibilityV0(env=sample_env) + # sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) + + env_name = "MiniGrid-ObstructedMaze-Full-v0" + args.env_id = env_name + + + sample_env = openai_gym.make(env_name) + sample_env = FullyObsWrapper(sample_env) + sample_env = ImgObsWrapper(sample_env) sample_env = GymV21CompatibilityV0(env=sample_env) - procgen_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) - minigrid_env = MinigridTaskWrapperVerma(sample_env, args.env_id, seed=args.seed) - # print() - # print("procgen_env attr") - print_values(procgen_env.env) - - # seeds = [int.from_bytes(os.urandom(4), byteorder="little") for _ in range(args.num_envs)] - seeds = [int(s) for s in np.random.choice(10, args.num_envs)] - print(seeds) - - # print("procgen_env.env attr:") - # print_values(procgen_env.env) - # - # print("procgen_env.env.gym_env attr:") - # print_values(procgen_env.env.gym_env) - - + sample_env = MinigridTaskWrapperVerma(sample_env, args.env_id, seed=args.seed) + + print(f"has curriculum: {args.curriculum}") + + if args.curriculum_method == "plr": + print("Using prioritized level replay.") + curriculum = CentralizedPrioritizedLevelReplay( + sample_env.task_space, + num_steps=args.num_steps, + num_processes=args.num_envs, + gamma=args.gamma, + gae_lambda=args.gae_lambda, + task_sampler_kwargs_dict={"strategy": "value_l1"} + ) + elif args.curriculum_method == "dr": + print("Using domain randomization.") + curriculum = DomainRandomization(sample_env.task_space) + elif args.curriculum_method == "lp": + print("Using learning progress.") + curriculum = LearningProgressCurriculum(sample_env.task_space) + elif args.curriculum_method == "sq": + print("Using sequential curriculum.") + curricula = [] + stopping = [] + for i in range(199): + curricula.append(i + 1) + stopping.append("steps>=50000") + curricula.append(list(range(i + 1))) + stopping.append("steps>=50000") + curriculum = SequentialCurriculum(curricula, stopping[:-1], sample_env.task_space) + else: + raise ValueError(f"Unknown curriculum method {args.curriculum_method}") + curriculum = make_multiprocessing_curriculum(curriculum) + del sample_env + + # env setup + print("Creating env") + + # dummy_env = env_test[0]() + + envs = gym.vector.AsyncVectorEnv( + [ + make_env_minigrid( + env_name, + args.seed + i, + curriculum=curriculum if args.curriculum else None + ) + for i in range(args.num_envs) + ] + ) + next_obs, _ = envs.reset() + envs = wrap_vecenv(envs) + assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" + + agent = MinigridAgentVerma( + envs.single_observation_space.shape, + envs.single_action_space.n, + arch="large", + base_kwargs={'recurrent': False, 'hidden_size': 256} + ).to(device) + optimizer = optim.Adam(agent.parameters(), lr=args.learning_rate, eps=1e-5) + + + # ALGO Logic: Storage setup + obs = torch.zeros((args.num_steps, args.num_envs) + envs.single_observation_space.shape).to(device) + actions = torch.zeros((args.num_steps, args.num_envs) + envs.single_action_space.shape).to(device) + logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device) + rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) + dones = torch.zeros((args.num_steps, args.num_envs)).to(device) + values = torch.zeros((args.num_steps, args.num_envs)).to(device) + + # TRY NOT TO MODIFY: start the game + global_step = 0 + start_time = time.time() + next_obs, _ = envs.reset() + next_obs = torch.Tensor(next_obs).to(device) + next_done = torch.zeros(args.num_envs).to(device) + num_updates = args.total_timesteps // args.batch_size + episode_rewards = deque(maxlen=10) + completed_episodes = 0 + + for update in range(1, num_updates + 1): + # Annealing the rate if instructed to do so. + if args.anneal_lr: + frac = 1.0 - (update - 1.0) / num_updates + lrnow = frac * args.learning_rate + optimizer.param_groups[0]["lr"] = lrnow + + for step in range(0, args.num_steps): + global_step += 1 * args.num_envs + obs[step] = next_obs + dones[step] = next_done + + # ALGO LOGIC: action logic + with torch.no_grad(): + action, logprob, _, value = agent.get_action_and_value(next_obs) + values[step] = value.flatten() + actions[step] = action + logprobs[step] = logprob + + # TRY NOT TO MODIFY: execute the game and log data. + next_obs, reward, term, trunc, info = envs.step(action.cpu().numpy()) + done = np.logical_or(term, trunc) + rewards[step] = torch.tensor(reward).to(device).view(-1) + next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) + completed_episodes += sum(done) + + for item in info: + if "episode" in item.keys(): + episode_rewards.append(item['episode']['r']) + print(f"global_step={global_step}, episodic_return={item['episode']['r']}") + writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) + if curriculum is not None: + curriculum.log_metrics(writer, global_step) + break + + # Syllabus curriculum update + if args.curriculum and args.curriculum_method == "plr": + with torch.no_grad(): + next_value = agent.get_value(next_obs) + tasks = envs.get_attr("task") + + update = { + "update_type": "on_demand", + "metrics": { + "value": value, + "next_value": next_value, + "rew": reward, + "dones": done, + "tasks": tasks, + }, + } + curriculum.update(update) + + # bootstrap value if not done + with torch.no_grad(): + next_value = agent.get_value(next_obs).reshape(1, -1) + if args.gae: + advantages = torch.zeros_like(rewards).to(device) + lastgaelam = 0 + for t in reversed(range(args.num_steps)): + if t == args.num_steps - 1: + nextnonterminal = 1.0 - next_done + nextvalues = next_value + else: + nextnonterminal = 1.0 - dones[t + 1] + nextvalues = values[t + 1] + delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t] + advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam + returns = advantages + values + else: + returns = torch.zeros_like(rewards).to(device) + for t in reversed(range(args.num_steps)): + if t == args.num_steps - 1: + nextnonterminal = 1.0 - next_done + next_return = next_value + else: + nextnonterminal = 1.0 - dones[t + 1] + next_return = returns[t + 1] + returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return + advantages = returns - values + + # flatten the batch + b_obs = obs.reshape((-1,) + envs.single_observation_space.shape) + b_logprobs = logprobs.reshape(-1) + b_actions = actions.reshape((-1,) + envs.single_action_space.shape) + b_advantages = advantages.reshape(-1) + b_returns = returns.reshape(-1) + b_values = values.reshape(-1) + + # Optimizing the policy and value network + b_inds = np.arange(args.batch_size) + clipfracs = [] + for epoch in range(args.update_epochs): + np.random.shuffle(b_inds) + for start in range(0, args.batch_size, args.minibatch_size): + end = start + args.minibatch_size + mb_inds = b_inds[start:end] + + _, newlogprob, entropy, newvalue = agent.get_action_and_value(b_obs[mb_inds], b_actions.long()[mb_inds]) + logratio = newlogprob - b_logprobs[mb_inds] + ratio = logratio.exp() + + with torch.no_grad(): + # calculate approx_kl http://joschu.net/blog/kl-approx.html + old_approx_kl = (-logratio).mean() + approx_kl = ((ratio - 1) - logratio).mean() + clipfracs += [((ratio - 1.0).abs() > args.clip_coef).float().mean().item()] + + mb_advantages = b_advantages[mb_inds] + if args.norm_adv: + mb_advantages = (mb_advantages - mb_advantages.mean()) / (mb_advantages.std() + 1e-8) + + # Policy loss + pg_loss1 = -mb_advantages * ratio + pg_loss2 = -mb_advantages * torch.clamp(ratio, 1 - args.clip_coef, 1 + args.clip_coef) + pg_loss = torch.max(pg_loss1, pg_loss2).mean() + + # Value loss + newvalue = newvalue.view(-1) + if args.clip_vloss: + v_loss_unclipped = (newvalue - b_returns[mb_inds]) ** 2 + v_clipped = b_values[mb_inds] + torch.clamp( + newvalue - b_values[mb_inds], + -args.clip_coef, + args.clip_coef, + ) + v_loss_clipped = (v_clipped - b_returns[mb_inds]) ** 2 + v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped) + v_loss = 0.5 * v_loss_max.mean() + else: + v_loss = 0.5 * ((newvalue - b_returns[mb_inds]) ** 2).mean() + + entropy_loss = entropy.mean() + loss = pg_loss - args.ent_coef * entropy_loss + v_loss * args.vf_coef + + optimizer.zero_grad() + loss.backward() + nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm) + optimizer.step() + + if args.target_kl is not None: + if approx_kl > args.target_kl: + break + + y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy() + var_y = np.var(y_true) + explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y + + # Evaluate agent + mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate_minidgrid( + args.env_id, agent, args.num_eval_episodes, device, num_levels=0 + ) + mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate_minidgrid( + args.env_id, agent, args.num_eval_episodes, device, num_levels=200 + ) + + # TRY NOT TO MODIFY: record rewards for plotting purposes + writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) + writer.add_scalar("charts/episode_returns", np.mean(episode_rewards), global_step) + writer.add_scalar("losses/value_loss", v_loss.item(), global_step) + writer.add_scalar("losses/policy_loss", pg_loss.item(), global_step) + writer.add_scalar("losses/entropy", entropy_loss.item(), global_step) + writer.add_scalar("losses/old_approx_kl", old_approx_kl.item(), global_step) + writer.add_scalar("losses/approx_kl", approx_kl.item(), global_step) + writer.add_scalar("losses/clipfrac", np.mean(clipfracs), global_step) + writer.add_scalar("losses/explained_variance", explained_var, global_step) + print("SPS:", int(global_step / (time.time() - start_time))) + writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) + + writer.add_scalar("test_eval/mean_episode_return", mean_eval_returns, global_step) + writer.add_scalar("test_eval/normalized_mean_eval_return", normalized_mean_eval_returns, global_step) + writer.add_scalar("test_eval/stddev_eval_return", stddev_eval_returns, global_step) + + writer.add_scalar("train_eval/mean_episode_return", mean_train_returns, global_step) + writer.add_scalar("train_eval/normalized_mean_train_return", normalized_mean_train_returns, global_step) + writer.add_scalar("train_eval/stddev_train_return", stddev_train_returns, global_step) + + writer.add_scalar("curriculum/completed_episodes", completed_episodes, step) + + envs.close() + writer.close() From 824bb56a402a5055e2b4e8cec01e1b24416571e4 Mon Sep 17 00:00:00 2001 From: Aayush Date: Fri, 31 May 2024 22:43:33 +0000 Subject: [PATCH 04/10] minigrid wrapper nearly done - see syllabus/examples/training_scripts/test_minigrid_wrapper.py --- syllabus/examples/training_scripts/core.10201 | 0 syllabus/examples/training_scripts/core.10374 | 0 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 syllabus/examples/training_scripts/core.10201 delete mode 100644 syllabus/examples/training_scripts/core.10374 diff --git a/syllabus/examples/training_scripts/core.10201 b/syllabus/examples/training_scripts/core.10201 deleted file mode 100644 index e69de29b..00000000 diff --git a/syllabus/examples/training_scripts/core.10374 b/syllabus/examples/training_scripts/core.10374 deleted file mode 100644 index e69de29b..00000000 From d89537d184f01994313ee12e67dd981268ba7d77 Mon Sep 17 00:00:00 2001 From: RoseyGreenBlue Date: Thu, 20 Jun 2024 06:08:47 +0000 Subject: [PATCH 05/10] changed level_replay_evaluate_minigrid so that the dimensions of infos is correct --- .../.cleanrl_procgen_centralplr.py.swp | Bin 0 -> 16384 bytes .../.test_minigrid_wrapper.py.swp | Bin 0 -> 57344 bytes .../training_scripts/test_minigrid_wrapper.py | 28 ++++++++++++++++-- 3 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 syllabus/examples/training_scripts/.cleanrl_procgen_centralplr.py.swp create mode 100644 syllabus/examples/training_scripts/.test_minigrid_wrapper.py.swp diff --git a/syllabus/examples/training_scripts/.cleanrl_procgen_centralplr.py.swp b/syllabus/examples/training_scripts/.cleanrl_procgen_centralplr.py.swp new file mode 100644 index 0000000000000000000000000000000000000000..9c53cdaef195ffc2a766d364710d3690f56147e0 GIT binary patch literal 16384 zcmeHOO>87b6)u*a5J&)p3%^n^7Gf;T&)Q@+izRF2-F2cw>&>!jyiz1IuI{dxDO_FE zuBx^@E1`%BClCUO1LB5+!~rgF;l>ehM!CQZ1d1XMmq_KtK7^*CH>Gfyh8)ATkgchzvvq zA_I|u$iRDu0arXmUWUDwg3TM@_kDA}{|VRKkbYw>|E-Xpg!CWG<^Mb6?}YT%=kos* z^1m9=UkC~`db}C(p9}dvp38qD;qSTw;m_t zx4<3XpC2aVyTAtU%3~-4ECH{5h>)KFDe#XE67n)|9r*hP2>BcEJK)>EZQv5{oA(p) z5O@Z76E8o02{_;>;3we1^Kr{_a*>OkagDhw)?3?pG?JX+_=52V8mM88Q>GMW(mxKh zp@o(rogguz2TYcnD(()o*|V)>;@d8mvE1}nmJOI0l{t<+v|HV7-AGrOR)@Mt!Ph0r zlZ?@$HJWHG+twMUtGlD+$zm>0uq(1gG)DBvaA;Y_Od+Ipty=gJ-H;<~gd6A77CaS0klMoPl8de4IA;^V#PzfE?PTX`(DucG^J{d4mkr?#yeQb z3RfCV4@AbbZCHdBwUc2I$(c&IrR{Xg(>)4Lqq}wCU6>&=Zk8A88zgkAN{zxT|lY(R*f`+|lYn^V*zLOprOhi0GhYw<;fI z;u!^6&GEK#dtAUfs^ua>J7Zz#iLcT_Atg;X#i_S+k7F=moYMzfjK$@?Wg4uhS?vGvTjXWTN-3P)nQ{ z!;3{?te@>e+V%>}hUf^3v6J)mU@#Pt4+hIU17{OOyM=S0t*{K$0TW&KvwwBp+X!*f z-71u7*KySn9p-KgiVg{aqdLs;5teN)uOaXo8jXb{98K*tM^k&H6Vmn$I8LA*P>~nf zIJ(2rEv*E4x#REe`1x33rLUegkG#*iTOxHYCibjm9kY9bL&FNJ&bDdyZhhKy8+i}v zth1`;I^%M_-iBWw=4zy+F(NG`Yh>I+8N^A@HoI$tUqAirZGOPzJzhvQ>Cjs`$8p?y zSpGEP7v_Xk9eRI)6(_sF9G0By)DpKt%nK_DaP%Y5fHjr#zyxi{4iYM!)h z?ixQOtyYU1a%#AoP_5{eQ2T-R@kAIqeNF6(2a-WUB)nA~}BdvlFGuc$T%i+ps&hlmsfMk9o#W5~iPiy*a`H`%eA zsjxMKiKyWhVjpiAjs?LFXz|bojKXz@H-~OuV^kk&r*LjL@tMuZcRJ;9v~oFG`H1N;&D`ab~Q1fBqXfpR|w9soPQCgAI?0P%_pL6Hlc7}L47!pYHPCpd%AMR|*Bs1;S$$N>u zf14ojCNUfELRf6?{0FDECw8juY|AI#FHUI{FK~n!aC|-NTbE`0U{I$Rj?V@P&7P8K zJ9LB{A(I3Dd8mFPHf7zk(o@Z==G9g<$nhW9^KuqtKbpWwN+4)(Y|q{rtP?-h0k_-t(T1d*1iF=bYN~zDu?{=T@c*d|p&2yzf(Cv-Cu# z_1EhQg((DjiSgh z|8MpGZ@;im_$%-^@F2JoTo2v~t^~Wm1)vC?1HS(=g~FG?pMdMYF0d2)1B(AuU>bY} zCI3C(-QZ2&MDPHHgZF_YZ~#=nFN1Ah7Mud^#rSXtYyu~Ox1)@21b>RseLZ+NxEBS# z3CiH>DCr*p>%kvm7`P5hgLU8}@M(+*Zv$Jwao`gu=U0RC!S~S1{}Aj2e}ha!E;m>u z|0hxcny9nFXTfWAXN!A+`T3UTv^=*bJ`qmO?R$>f>3D8SE}KEm=>{#ozRb;rv*fl0 zp40LA2ZP>XSez^f?XKGk@t1HLjjG$5AGEzrzf>%jHIZ_wSDbSC%Uy4_<+f`L*EzJ& zsRco+6!v@lK>a#2IXUGtyrw&7^=B_~TcJ1QbRZSZ7B3)jJ?Mn}**(31H>J{(G>1Pr zGyMga8?}{ zim1(z-|5GUuRr^oBjXRbxviGha^~H(mt<@e!3rB>Bh2LDoU_)#M7p?V%ZiiMk<{wiYVOZ;*a-*S>F z6D~^Y&-TulT#HcrM#Mv0Nyf{9Vkr9!n=~~#t`5bp@AkTWE9e(>;uFrM)_lDL|GV8TLSxJujWxN6tj&~qAo&#U)?-m+?&Et;>iQ)!$B`B$UX2R(P*tBhyd z#4AHBoBN94+H#Tn)*K!5Txv~E8d%-!_6I%BS@IiR5S1ic=tf2-(sny_FLX-v1+Tv7 z(9)a(I`9Fq(Q0_T$?^P}6v`@+<@lNDP{@SeUv^4;&uvdi4N-DFJ#@Og;7W>OJmV@b z%~fbvzEkyNN6=YQzc!RA9c0^EOVWnRvI=i{;uHf#>mj#liTm^u>UGh8&VnC0aa(l` z`mL5z^BkP(>Wdzgk$Uc2=K1pr{ZQ4;3w>(uYE4=qTPgkIOE;OH`hZuH&4kx+Yb`QS zTeh#I@sgIvN5-~#n;qRtF3I%)8hd@AQWKeX8es)SI-N?x>*ENCKV#VU>-vRyV0;D? znQ|)LGnSvwL((Ii|9E~Sj%6^_;m@q4qnD??2Taba^#1C1Ws?~X>ub&JHOoxdVN}?)PPRMaYLumDKvXQ+lhQx`);^s9a^i(JJvDL&KL^W4jNu- zI7ql`@+BUzokpp6M1Ho4q_p^_M<$sEp{8;;zub;Ib}PnXH?(=XXwYgc@2rKFX;0oM z=aTk(e935DWMq52iuDVw4Ela6tSotT-s+arKCiw#==cbE%Hj78d6TsmBdM#chorab zZ}IDW$ge~;#lNMys~6OHJ?^1=43{fK*HRpcw@F^DQ_gndv#c(M2-F9?o?jodTpM*W zQ5bN-d}CM7mw|}<+~zHLt=(R?NVZdWMk&zujK01XN(xuS&hC2W=XwU-Mh? zUN@{=yxiWmer9v>(Z1U;j7g{htMQf$Mt*axg2lT;qupZ2S1-rn{fu9B6#9!ck za67mLTn#P-GvLek4tx~c41BNvO5pSO5qucbK><95QRQ>sK5!$r82l|h1=oYO0b5qd zZ&PkET_4unmfI^8>kDp=eGt5^9|jG-UhR2I=XEf!V@7Mw74?0uxPPwLEAF3i=37C{ zZB=F3R$Ho*{k z4`TcaE57~={c!&DcpoXPP{gMEf|Fto5k1}JHY>>%HKPWZ4CBN?B6_E;}=q4Vg z*fFx0lTz6q5!W`-l75&ZU0L$IgQfCJnoq-6r(0oy*r#?>gB}x~JxslQyeR7F(3a&X zN7E-z9F)p~%wR-y3c^(hTRHZ{O1(QMO;(g)vox8eIN|KseYvw^=N@OzPG|ehEtgz$ z`9@|Lz4`#J6!B3|t`aiY4S$Yo`cX+23b!$**1|{(=0-|XnvXb)zD@;^f>zEhaVT|J zX|~)xLNy(p1hXGaWlA^`7N(r)l<}8gqDSoYm3gmU)uF0np{fYRcZ~? z)%l*=kV&6p2;2#_UA)uTw)5gkHg9w^SxMWkrzIjq8fPM^ekoc>f!RV$1xU1AspK=x z88UYjN+DLIe51h9W~8j8&0r>;_e#@KaZq^@%H)-k_(X0eOriDMHkl0u-EeN2-Ui)lg>JE7^>=faW?29 zRQ|LNQ?=ELWslq=?JCk!fz3lWJ)GZM-S9YLI|bEY$6 zrIOLB%sSJG&`_^O_LYUc)OHWGyw2>*v~EjD!fJ{!q|3lnrW(1*IP2Ew?A6?UeW4mM z52O-^DhH%>-NP9u2urD=PIe-3ENV~`X-tQfia-k?j_MwmSMvI%O^M%3Uok5bnRcki zKl3nI5v^P@I}Jr8>N0eV(+xV}Q)e#DovNLQ+A5q)j$~K0r>9B7^wq0VapW_h{>wkp zSq|TbuO(1CXEq!EbX1L&$8lgyOuW+#<>R|u`;qmxL6jQKnwi6lv= zK2*87oay+ZPAc>@RdF0iKT&hy^2j=LlnR#SocbkQ6uk(Oz~Z++pIzA$GO3QI0>8I* zmP)$gn{XOTj4#Wpn$SW}wmjn%ORsu@BBV#`#mzmO`oy1*FlOvMA-^5E=Rr4qZ{sWC zglaoAp&2n1%ZX%Gnb0rAfztwKcQ&fkKJhf(+Uqg?Y!>%+78wC_5;fCld;Nu=;asES z8k4H(`$@$?YH9rX7Wu`&VKd8xA>z$tYP%nFyNG0NV|fPMC(d<6Id+6ymw|dD0U<9R z8_TLDc!M(ZIe+%-X+9ZRjb+KT5n4>zm-GnASL1`Bk@7J~L{lyZE&EYQPds{?Iq$ro zai`50qMO@au+ii@%7cC+`v2XislR|uD*C@&KmU1j{fEG(z#ZTw@CtAm_y_d>524#%0RAt!{O^Mfm;@z2D=B<~wE&NT-ve(3mw*?7zeC4=7(4`S2Umif;4~oX z1^y6x4BQCjz(#Nicp3P6bp3~b=>OZmBsd;C2Rwkje-+paevGbvD+oXb>;e`jo$v3-|H> z#QKi;YUI8r6yZ2}WN#>gwX*UvO(BMSz7!D(QH9Y`6+^zd)Pl{j=4rW#nfs z9X?v?iERu+DpOo`2{IB-K1n+IPOLW_wY^0MW!M*D4kZAYN>~`;XDiqkbO3E-NQ-MV zV?$)mN3`m9WmCg#mvp%+6Td7^Vz!N`lHRQ1J7v0;xxsLD#w(w#g_W!T1<8qgWT8!U zk=Z{M314$MD2F!jfU4z#%=DhIQMWt#pIvMikaCWGoSOUUybfc zsb_W?o$N3xw0a#_?R%q;_QMn~;MR7y=jK1U3OJS;Jab#(w^qFtPyrcS+u(HKx z(|Y8G@QOjF#riaJ!_EMyaFAspvMcL@SSO`&DAb4RdQ?B{z{<245k#`N=yDM0r4|11 z6<_rK5FK|jda&sKWuufnjGo^yy8dU;?f)~l8oVC72z(x0{)6Dx!47a55Pkj~;O$@l zroeIFW9aQS0$Ja`5u68}2OdX{{}XTv=zv#)@1ny$2tEb2gBOFZp}+rc@Bj$G>%foT z_r2g&-~q+|F;b{=GPwkD3FH#UCGf12fV?7qk{_%4cUXp6bsJ0RX$?uaU6cE6r_TUH z-uKDNIUf3~65k)s<0P4_xBRYnN+>s`xe}8PcIH|Yi!InnP@g0T)J2t=rBkqRlHY6-QBBb(uPNFD zyHm7e9jxqY6S1(+?{_z@TX$s;))xj9Hndq+gW9^qR#}IsEcDy05jL{*j*(zma`>&7 zY*ko0Zu}RL8M2R&N5?zJmADNdjat?G4&xF_#*^PDQ*j(d%q;B*l82{>DTe zofg*w_5U<4>Pm^FkPMXky%HZPsxXrolNVawnKfQ&;<%=!$>pUJw>_JRv{@8p{0iB! zfE~vgW_nz;xG3&qqSe}%rqmE>!Wda?`HP-$j14nw3=%zCN-@Di6*_ZV#fih|y3=yQ z{=Dn4#|>QJP0@FU>yXM8L8@jSoUB?EQz_WICcib^)hep4?isb#wgfoNBri!x@frfQ zld?5d^#A{aj(r)rx9I;9X2^G&(f`lj`$gb|;H&8U9{|4sHi4Iblfe(r{T~D$0@s3V z;MG8U{XY*r31of2TfueU3UEI75jKFo2JZ*gfj0p0>;DqGd<2MHK=}Fs{M`Uf246w< z|8ww8a5YZ30|z|C{QVcf zJ>Uj#J~#<{nfd#_1Wj-Tcn&xYJi$EvZvfdR@MYjW=JW3b*Mcj+yMc&4`N;={(a4U$ zWj-)8FWG!xn0}4q14BE6OD_CK>WI9J;#XBP1RDx%wuvFA(Hbn(tf86?lbRPR*sd!d z7$ygOkyxyF?6(?2%dUzWjujsYpHy%qy;3jk!0{=`FjmXXAfhu zhxnGY)BsmC#DBedP^>P{&FeKqHk(B$P3Ere5d;?-}_ zyN0Dv$=6M`$<4AoAzMlJS$O4k&00Iy=mdS%U!idpY%u$6J(LISinjEa#bB}Bz=k|L z5?7uaj%R{xK>J-3Z}70V(NW)H!l|Y}IQbqE4C4ZU%(}{gO=bAI7}rQb#}{J=E4dR` zeP@R&;zZ9H56R6nnMoIoolLg%CJ-~x)rQ9wzwF6565cF*n}yWKGbJl(g_`rKG*2*RTwmJjEo$CqP<;tk+4iZIfPu_ce}1 z^ZkXah2gD{h2f(AzZG5eR&-d=|Lys8|AfB(B)AW}9c%^1gGbQ!KL`$kdGK@KQFQ(H zfa}2qupS%_K7x)fdjAbz9-IPXZU0l?)8IYeE#S@I9B?Z5@96vYgFC>v;Opr8e+2^I zfK$NBz~|8W#TP*K{ujUgw-~+ui|F{j2`&WRLa)CITm-g)uc6QX3UI(LfImi$|0VE$ z$oqG|dx4U}vvS(&*k#HoJVSq`&SzEmXnWIjqib=g`J*>wjgahwP#DvYctiP}s`#d@ z3;Q@Gu>woo(W83EX57X9F0tG-en(|6oggoBgL;=&It;fnoX)t%N0*>VC0?6sc8(ZH zdzkoGX|XyjB`oFCH!Q2f&0*r>N+O~?EQ$m@3S`XFqe^Xzm=XyOlO87}vunqtyDpUu znGH7*wS{%XNGZldGHNF~#IW`II%Yvzv;sEF zSaN&5_`H!y>gEc3txVF+%Sm~4^(3^t6|R)am8nJ3!;p)9n>pc4Z+m)(GoTD zydpO}^qX5M*y~ z)f*4+p~zT@d`z=4l#z7TX!4Q1Ih>8;(<8{mgtJwuuyPX-B{hDXr@AO8QW-lg`vx`9 zp?cM2Q3j^k3v)iXNxcih@nZe#h-g z#z+}XFdSE*_a;|PMY7RNRSLA4=`XID3EQ2h4RKVsSYS3q)gj7(7}7&zBp7ofOFhii zI-}AOGL*IH6e&m3MElC|iP_R*2h7~9?(w*jB8lFs+;)uj)F=~@CqtMz zRyiB;B}^v9#Ya4aOxec{JI)zrIO|vRqgvV4HI$6LtRid6oRx3#I}U}Y^xQfzO6E5E zMd2vc_2|;f9dkWeXmLiXCJY3jV=q?DY9Yyw{hghFJXNJ5R8{$mS}QDt@>&C?N<+8M zs4%k0$GM6dbrgk~q~%y=+P@ob_h9*w?_u$Zs8VcXSBgTChWxf|S&3swDtx-FOPGkR z@=;Uy%U+%G(yy-;$Uqpw$9f4ER~_8FYKm`ELbV!4~lI;K%6r_kiC3zYY$78E`5%1;{>r z9|3Oymx1%aBk26U3J!wf!F}ldcZ0jY0{Cfg4;iTeS-<}vx_=M68vIZ6{_DX7;Bj>S zFMv0L7X#U!?_O{j_$oFLIrs0^zzmoMuK@)Sp?oq1sIeI*S4y>zvZR#Ig2!lEsbS90 z0Iy((XHt(8$I)R3BgxU5!@As>Xpb4q$T)aNxlfLDhO)Az)W(Qu4C!%F%9YY}(w~YW zbwY2kw{oS_RLhJ$PTm?=338=0S4!0(Rioz1@k(i9zUMdM5mR~$Xh!cxt6)EaTIoi8OhaB)pO=*DQ}5fEnQWAjr2CroS$uKY1;S5nrM<12y+i#67$QNMC_<|^!td& zq+c#V%%+1fUQREWM?4WEcO0g}1duAC)bZ^CEX0nn|qzl72n{amN^V_vn zKKxj&)BIvhNZ%-@0~mvlSTxfCE2uy$5@flGS3Y-A>HnhE?w5b)|GSj-&%S)<{CmNv z;EU+_w}Ll<6Tlv%75H0p`1gT#fa}1og13Ms*a6mqQ@{_= z@1Fp&Ki}Qp7H}BI9{sz(x!~u)@!%Wi{(lM{0S|+Rz%AgdU=ExM#6Iv1>;fMLi(nSW zUVi_G-2MXG1>Os821=G9$7C@wZ$NdpIO_mQ*;HZ{#1(1O0vJEDM8*F`JM2o_#6Fj! z1})ZK0ZC;=I3UUNO>U6CmCp{1$9yKbgf=Im z?uidr)w^#%Ofn@CXfha--+EgV_Tljwv+^%35CW`Sx-kQ(BWI-;vZxLnRGSWlc`lY) z+RQG=$Vc-QA8lHZ&0K_v%92h|jt=fG3_6RlfxKLdZunv&V8kH%xHv>E5VPVfL_BJE zK@u$1c>OlzP;cD6N?4@n$u^iWEOz|Pm9E_1pzcrG``y-{!wK*5!Gy1a*mC&;IaQ{2CbSu-}J);c|zxYRz#`oUM-$zC1EJK$z)$wQ{RxheI-Y#X540} zp1wPi&K4F8wrUaLL-}p?27bR5C^pL~(uVbwvuq9;L1W(In2eUh7(3_U_(My>RE=9a}D? z!s2bGN=(~=mEBui+e4*0>Po3a>9w@P6<-4NBwoiFZ5c1Kj!^{9Y?Cw`I6*?fQ@=tx zGZHV@N2oDzqn zBc6ccv70Q%bUc?ezh>pF?z<|+)6lRW$D+W%piTEwDwfM6Q5J$_n*vrHaeJIiA3Hvs z?y{0xYj)!{Ww$Pd(?RHsPd$!R)*Bn!)Ny&0nc$|&fyuMV6>&8N=uY~cc>+Lx_p%q zmzZUVd1N_KoI7JPw9A|Xz2@9V1=cgI&KtLp8`ixtHC>mZP-MvsE+ykjN5?HMwrp0^ zb`5bUNbkN{y%8Rm)brc8vWofHQR{HIDRZ?xt;CVxs_c+Go)>78B}!R{tiX?Su!Z4* zwb!N{vF$|Dud9S9QQ=OhHxzhBtwtKp#Zt2@8&@3D^vx{p_=E{GKIQ3|)v~@vHQ&%V zxWL}$((Uz>L$B^f99NeE$B!r*fi0!9aPTS#9bSG)Ze4jhuF@E!Y|CnH(28tr?R(YM zU`sXm9e3;@O+T0yUkqEh=pUkjA1`Ue^4LONzg9^nRz%pD7q!ddcm^bBnW)dG#NMd) zvk2+r?P)X8HuS4*Jiq!q$O}$3y|7HDUT+PP3wzl>#kP*!6HFC3q=yj>HciOj^$ zT}$GoR@z!mFX{Ezcp)l4hsis?y@tYL`AS(6v&;nE1E+%@VheZ-$oYTb4{#$m2$sMA$Qc0Q zAMitL1a?0FIsfl&@bAGgSOk}WOTalmd4RPb%=4{~0>ZQ#9N3A_n>A31*y+z$R7kTU~c559+O;bY)F z@J>(z)8HxW3ts_pe&7wD4>o}HUXDDQztwS%tjE=H zj;PO_aEvge6zoU06Ls9KEOSVoVw=&gP7cu+)3t6)1YNc3)cFyv6i3Ib8yv~BB7cm# zLa}4$E!HsB_;o$`kd#z*52^F4`$%~yWR=CF^SV@f5}_KoW(AjCbAR%=7G!2Osgu`K z#4c((%8~u&M+0)K+tx#jYwGH8s2QtbHioSg)Rm+*7tiY_7-ghx)740AW6xjv3+fo zGwC8(raQR_hi*>uA9FH0ZEo4FE)HPd^mdqOyE`MesKe8P&af36)cF}+(lRvi)F%NH zM0ENm+#~rN*7dTqM3!dtBNhi<$^pf;+Nd9?Iu<=jR5H>3 zPeG~sebF7!{}1pbAJPB+4m<|_4EzCbz{|mt==*XWz&k+=RKaV&so;0e@kRgN3|b-ne?PbiTmeo4vhSbh^v8j(8J+(3_zu9o1($;hft(R=JdiyB9|4~R z{{!3uJ`8>n$QpqO@LVAK1%3d$A6x>az?tBs;K!oRgHM9r0=Ix&;3eRG^!QJLo4_Iv zUEc?CF5n5^3+VH=gExSS!Aro&;H&8K9|Z%j1N{$J2ff9^TitP7ZVB4F z>UOisNznGMR_3%LW{;>cKc#mkHbjnu!^ZHIBP;dZ>TK4YIU%LmV7M@Q4QG&w-aNm| z{Bcpu3shS(#nGK^;=QHx=`8W}sQpoyENw70za0%BeTejVl&cr&kaCOOFU$_Cwt$TT zs>{)knH3`)ELF!g%PfuF7hzY=4|;x|Q_maQWc}=JwZb*+MUtviT-XTOcE!6?7R4q@ zZhZaMd&Lx_smMoDZx-<^NHi#R+Zk#?a*S<3#o%9@W17tNmHb=cP-X?Oe$ALyo}!=j zafu^9cI9_l94xMK@jarZ9bV!`D+C{QeOWca@S9ZfCF3U~6p1KdPlF>HB6@6Z@T@2T zycmb^GQ()BK6MVt*^UoMS@@Dya!hn0EqOXAd4DH|7BTN@l=%?JBa`}2ck zs9{E;GT9oaYTV%~I_2c&ijJA5kg8`|g3#JH6s~N7jn;iVR!B8KnQK-hk($^~oGWb- z+M<@X%LWvB#BWD5vgn-^N~oMFKbiu&UkxzsNZZoBGjS`!hO0aL zO4ieHj|Lr8#o?e?)O~|)&4;U1b9tz%LPR$t1-TFxp2YqilZT6nSIG#}Qhqj+u~R^V zRYj@F8ErI=Dn8b1SX1SNs3NL=x`g5e9aW-5Z}~92SHt6QDj$!WId1Hi&I{7YlE<~! z@}dC746Lh3bQjKo+Le{6dS6wmO3P7c7!T$WPesQOs%D@mMpX4GQ#a*-e9Xh)&E|AT z8RIG{dUDtAI9J(L15MbT6-be~y`H-aN5%z8G_r^pbvxm0r(d>BPRI~fI<21?9+S2t z8Goz2f%G%%&?^|q42j&frb=b}jK+jD+}IM7?<^zEm{|>7VwM1H<>}08#Kdy*+=A)2 zJ9Z-mYXDk(3L`4ltg@sh$(qK{qE``UxGS9ynFvL9`%GHs)O$vaj_EPtN!4g1^RLQm zC;I;j(7EpcqW{0jsG8qL&;K^~H25G`0t2uU%z#JH^<^)>L*SReh2S(GYXjZ}WPLyj zycV1Q9!Br~1d#mz4}k69B=CH2BKRihe-S(iJ`NN=hL`*+mq0FoTmrcSatY)T$R&_V zAeX@ZmlDuA&(ob_&mBAU`nx%$$Z1>M{({&I(uN(>)7-IByZGgfoyN?)76at!RuNU% Yj=&-!b#pa)NEBwgHAI5Nnb=?We`jhlU;qFB literal 0 HcmV?d00001 diff --git a/syllabus/examples/training_scripts/test_minigrid_wrapper.py b/syllabus/examples/training_scripts/test_minigrid_wrapper.py index fd9645c7..5b263e75 100644 --- a/syllabus/examples/training_scripts/test_minigrid_wrapper.py +++ b/syllabus/examples/training_scripts/test_minigrid_wrapper.py @@ -205,7 +205,7 @@ def wrap_vecenv(vecenv): # policy.train() # return mean_returns, stddev_returns, normalized_mean_returns -def level_replay_evaluate_minidgrid( +def level_replay_evaluate_minigrid( env_name, policy, num_episodes, @@ -220,6 +220,7 @@ def level_replay_evaluate_minidgrid( args.seed + i, curriculum=curriculum if args.curriculum else None ) + # for i in range(args.num_envs) for i in range(args.num_envs) ] ) @@ -390,6 +391,27 @@ def print_values(obj): ).to(device) optimizer = optim.Adam(agent.parameters(), lr=args.learning_rate, eps=1e-5) + # eval_envs = gym.vector.AsyncVectorEnv( + # [ + # make_env_minigrid( + # env_name, + # args.seed + i, + # curriculum=curriculum if args.curriculum else None + # ) + # for i in range(args.num_envs) + # ] + # ) + # + # eval_envs = wrap_vecenv(eval_envs) + # eval_obs, _ = eval_envs.reset() + # with torch.no_grad(): + # eval_action, _, _, _ = agent.get_action_and_value(torch.Tensor(eval_obs).to(device)) + # eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) + # print(len(eval_obs)) + # print(len(infos)) + # print(args.num_envs) + # print(args.num_eval_episodes) + # ALGO Logic: Storage setup obs = torch.zeros((args.num_steps, args.num_envs) + envs.single_observation_space.shape).to(device) @@ -559,10 +581,10 @@ def print_values(obj): explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y # Evaluate agent - mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate_minidgrid( + mean_eval_returns, stddev_eval_returns, normalized_mean_eval_returns = level_replay_evaluate_minigrid( args.env_id, agent, args.num_eval_episodes, device, num_levels=0 ) - mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate_minidgrid( + mean_train_returns, stddev_train_returns, normalized_mean_train_returns = level_replay_evaluate_minigrid( args.env_id, agent, args.num_eval_episodes, device, num_levels=200 ) From ce8530d002cc4eda9c64950a2b00cc544446db04 Mon Sep 17 00:00:00 2001 From: RoseyGreenBlue Date: Thu, 20 Jun 2024 06:15:23 +0000 Subject: [PATCH 06/10] changed level_replay_evaluate_minigrid so that the dimensions of infos is correct --- .../.cleanrl_procgen_centralplr.py.swp | Bin 16384 -> 0 bytes .../.test_minigrid_wrapper.py.swp | Bin 57344 -> 0 bytes 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 syllabus/examples/training_scripts/.cleanrl_procgen_centralplr.py.swp delete mode 100644 syllabus/examples/training_scripts/.test_minigrid_wrapper.py.swp diff --git a/syllabus/examples/training_scripts/.cleanrl_procgen_centralplr.py.swp b/syllabus/examples/training_scripts/.cleanrl_procgen_centralplr.py.swp deleted file mode 100644 index 9c53cdaef195ffc2a766d364710d3690f56147e0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16384 zcmeHOO>87b6)u*a5J&)p3%^n^7Gf;T&)Q@+izRF2-F2cw>&>!jyiz1IuI{dxDO_FE zuBx^@E1`%BClCUO1LB5+!~rgF;l>ehM!CQZ1d1XMmq_KtK7^*CH>Gfyh8)ATkgchzvvq zA_I|u$iRDu0arXmUWUDwg3TM@_kDA}{|VRKkbYw>|E-Xpg!CWG<^Mb6?}YT%=kos* z^1m9=UkC~`db}C(p9}dvp38qD;qSTw;m_t zx4<3XpC2aVyTAtU%3~-4ECH{5h>)KFDe#XE67n)|9r*hP2>BcEJK)>EZQv5{oA(p) z5O@Z76E8o02{_;>;3we1^Kr{_a*>OkagDhw)?3?pG?JX+_=52V8mM88Q>GMW(mxKh zp@o(rogguz2TYcnD(()o*|V)>;@d8mvE1}nmJOI0l{t<+v|HV7-AGrOR)@Mt!Ph0r zlZ?@$HJWHG+twMUtGlD+$zm>0uq(1gG)DBvaA;Y_Od+Ipty=gJ-H;<~gd6A77CaS0klMoPl8de4IA;^V#PzfE?PTX`(DucG^J{d4mkr?#yeQb z3RfCV4@AbbZCHdBwUc2I$(c&IrR{Xg(>)4Lqq}wCU6>&=Zk8A88zgkAN{zxT|lY(R*f`+|lYn^V*zLOprOhi0GhYw<;fI z;u!^6&GEK#dtAUfs^ua>J7Zz#iLcT_Atg;X#i_S+k7F=moYMzfjK$@?Wg4uhS?vGvTjXWTN-3P)nQ{ z!;3{?te@>e+V%>}hUf^3v6J)mU@#Pt4+hIU17{OOyM=S0t*{K$0TW&KvwwBp+X!*f z-71u7*KySn9p-KgiVg{aqdLs;5teN)uOaXo8jXb{98K*tM^k&H6Vmn$I8LA*P>~nf zIJ(2rEv*E4x#REe`1x33rLUegkG#*iTOxHYCibjm9kY9bL&FNJ&bDdyZhhKy8+i}v zth1`;I^%M_-iBWw=4zy+F(NG`Yh>I+8N^A@HoI$tUqAirZGOPzJzhvQ>Cjs`$8p?y zSpGEP7v_Xk9eRI)6(_sF9G0By)DpKt%nK_DaP%Y5fHjr#zyxi{4iYM!)h z?ixQOtyYU1a%#AoP_5{eQ2T-R@kAIqeNF6(2a-WUB)nA~}BdvlFGuc$T%i+ps&hlmsfMk9o#W5~iPiy*a`H`%eA zsjxMKiKyWhVjpiAjs?LFXz|bojKXz@H-~OuV^kk&r*LjL@tMuZcRJ;9v~oFG`H1N;&D`ab~Q1fBqXfpR|w9soPQCgAI?0P%_pL6Hlc7}L47!pYHPCpd%AMR|*Bs1;S$$N>u zf14ojCNUfELRf6?{0FDECw8juY|AI#FHUI{FK~n!aC|-NTbE`0U{I$Rj?V@P&7P8K zJ9LB{A(I3Dd8mFPHf7zk(o@Z==G9g<$nhW9^KuqtKbpWwN+4)(Y|q{rtP?-h0k_-t(T1d*1iF=bYN~zDu?{=T@c*d|p&2yzf(Cv-Cu# z_1EhQg((DjiSgh z|8MpGZ@;im_$%-^@F2JoTo2v~t^~Wm1)vC?1HS(=g~FG?pMdMYF0d2)1B(AuU>bY} zCI3C(-QZ2&MDPHHgZF_YZ~#=nFN1Ah7Mud^#rSXtYyu~Ox1)@21b>RseLZ+NxEBS# z3CiH>DCr*p>%kvm7`P5hgLU8}@M(+*Zv$Jwao`gu=U0RC!S~S1{}Aj2e}ha!E;m>u z|0hxcny9nFXTfWAXN!A+`T3UTv^=*bJ`qmO?R$>f>3D8SE}KEm=>{#ozRb;rv*fl0 zp40LA2ZP>XSez^f?XKGk@t1HLjjG$5AGEzrzf>%jHIZ_wSDbSC%Uy4_<+f`L*EzJ& zsRco+6!v@lK>a#2IXUGtyrw&7^=B_~TcJ1QbRZSZ7B3)jJ?Mn}**(31H>J{(G>1Pr zGyMga8?}{ zim1(z-|5GUuRr^oBjXRbxviGha^~H(mt<@e!3rB>Bh2LDoU_)#M7p?V%ZiiMk<{wiYVOZ;*a-*S>F z6D~^Y&-TulT#HcrM#Mv0Nyf{9Vkr9!n=~~#t`5bp@AkTWE9e(>;uFrM)_lDL|GV8TLSxJujWxN6tj&~qAo&#U)?-m+?&Et;>iQ)!$B`B$UX2R(P*tBhyd z#4AHBoBN94+H#Tn)*K!5Txv~E8d%-!_6I%BS@IiR5S1ic=tf2-(sny_FLX-v1+Tv7 z(9)a(I`9Fq(Q0_T$?^P}6v`@+<@lNDP{@SeUv^4;&uvdi4N-DFJ#@Og;7W>OJmV@b z%~fbvzEkyNN6=YQzc!RA9c0^EOVWnRvI=i{;uHf#>mj#liTm^u>UGh8&VnC0aa(l` z`mL5z^BkP(>Wdzgk$Uc2=K1pr{ZQ4;3w>(uYE4=qTPgkIOE;OH`hZuH&4kx+Yb`QS zTeh#I@sgIvN5-~#n;qRtF3I%)8hd@AQWKeX8es)SI-N?x>*ENCKV#VU>-vRyV0;D? znQ|)LGnSvwL((Ii|9E~Sj%6^_;m@q4qnD??2Taba^#1C1Ws?~X>ub&JHOoxdVN}?)PPRMaYLumDKvXQ+lhQx`);^s9a^i(JJvDL&KL^W4jNu- zI7ql`@+BUzokpp6M1Ho4q_p^_M<$sEp{8;;zub;Ib}PnXH?(=XXwYgc@2rKFX;0oM z=aTk(e935DWMq52iuDVw4Ela6tSotT-s+arKCiw#==cbE%Hj78d6TsmBdM#chorab zZ}IDW$ge~;#lNMys~6OHJ?^1=43{fK*HRpcw@F^DQ_gndv#c(M2-F9?o?jodTpM*W zQ5bN-d}CM7mw|}<+~zHLt=(R?NVZdWMk&zujK01XN(xuS&hC2W=XwU-Mh? zUN@{=yxiWmer9v>(Z1U;j7g{htMQf$Mt*axg2lT;qupZ2S1-rn{fu9B6#9!ck za67mLTn#P-GvLek4tx~c41BNvO5pSO5qucbK><95QRQ>sK5!$r82l|h1=oYO0b5qd zZ&PkET_4unmfI^8>kDp=eGt5^9|jG-UhR2I=XEf!V@7Mw74?0uxPPwLEAF3i=37C{ zZB=F3R$Ho*{k z4`TcaE57~={c!&DcpoXPP{gMEf|Fto5k1}JHY>>%HKPWZ4CBN?B6_E;}=q4Vg z*fFx0lTz6q5!W`-l75&ZU0L$IgQfCJnoq-6r(0oy*r#?>gB}x~JxslQyeR7F(3a&X zN7E-z9F)p~%wR-y3c^(hTRHZ{O1(QMO;(g)vox8eIN|KseYvw^=N@OzPG|ehEtgz$ z`9@|Lz4`#J6!B3|t`aiY4S$Yo`cX+23b!$**1|{(=0-|XnvXb)zD@;^f>zEhaVT|J zX|~)xLNy(p1hXGaWlA^`7N(r)l<}8gqDSoYm3gmU)uF0np{fYRcZ~? z)%l*=kV&6p2;2#_UA)uTw)5gkHg9w^SxMWkrzIjq8fPM^ekoc>f!RV$1xU1AspK=x z88UYjN+DLIe51h9W~8j8&0r>;_e#@KaZq^@%H)-k_(X0eOriDMHkl0u-EeN2-Ui)lg>JE7^>=faW?29 zRQ|LNQ?=ELWslq=?JCk!fz3lWJ)GZM-S9YLI|bEY$6 zrIOLB%sSJG&`_^O_LYUc)OHWGyw2>*v~EjD!fJ{!q|3lnrW(1*IP2Ew?A6?UeW4mM z52O-^DhH%>-NP9u2urD=PIe-3ENV~`X-tQfia-k?j_MwmSMvI%O^M%3Uok5bnRcki zKl3nI5v^P@I}Jr8>N0eV(+xV}Q)e#DovNLQ+A5q)j$~K0r>9B7^wq0VapW_h{>wkp zSq|TbuO(1CXEq!EbX1L&$8lgyOuW+#<>R|u`;qmxL6jQKnwi6lv= zK2*87oay+ZPAc>@RdF0iKT&hy^2j=LlnR#SocbkQ6uk(Oz~Z++pIzA$GO3QI0>8I* zmP)$gn{XOTj4#Wpn$SW}wmjn%ORsu@BBV#`#mzmO`oy1*FlOvMA-^5E=Rr4qZ{sWC zglaoAp&2n1%ZX%Gnb0rAfztwKcQ&fkKJhf(+Uqg?Y!>%+78wC_5;fCld;Nu=;asES z8k4H(`$@$?YH9rX7Wu`&VKd8xA>z$tYP%nFyNG0NV|fPMC(d<6Id+6ymw|dD0U<9R z8_TLDc!M(ZIe+%-X+9ZRjb+KT5n4>zm-GnASL1`Bk@7J~L{lyZE&EYQPds{?Iq$ro zai`50qMO@au+ii@%7cC+`v2XislR|uD*C@&KmU1j{fEG(z#ZTw@CtAm_y_d>524#%0RAt!{O^Mfm;@z2D=B<~wE&NT-ve(3mw*?7zeC4=7(4`S2Umif;4~oX z1^y6x4BQCjz(#Nicp3P6bp3~b=>OZmBsd;C2Rwkje-+paevGbvD+oXb>;e`jo$v3-|H> z#QKi;YUI8r6yZ2}WN#>gwX*UvO(BMSz7!D(QH9Y`6+^zd)Pl{j=4rW#nfs z9X?v?iERu+DpOo`2{IB-K1n+IPOLW_wY^0MW!M*D4kZAYN>~`;XDiqkbO3E-NQ-MV zV?$)mN3`m9WmCg#mvp%+6Td7^Vz!N`lHRQ1J7v0;xxsLD#w(w#g_W!T1<8qgWT8!U zk=Z{M314$MD2F!jfU4z#%=DhIQMWt#pIvMikaCWGoSOUUybfc zsb_W?o$N3xw0a#_?R%q;_QMn~;MR7y=jK1U3OJS;Jab#(w^qFtPyrcS+u(HKx z(|Y8G@QOjF#riaJ!_EMyaFAspvMcL@SSO`&DAb4RdQ?B{z{<245k#`N=yDM0r4|11 z6<_rK5FK|jda&sKWuufnjGo^yy8dU;?f)~l8oVC72z(x0{)6Dx!47a55Pkj~;O$@l zroeIFW9aQS0$Ja`5u68}2OdX{{}XTv=zv#)@1ny$2tEb2gBOFZp}+rc@Bj$G>%foT z_r2g&-~q+|F;b{=GPwkD3FH#UCGf12fV?7qk{_%4cUXp6bsJ0RX$?uaU6cE6r_TUH z-uKDNIUf3~65k)s<0P4_xBRYnN+>s`xe}8PcIH|Yi!InnP@g0T)J2t=rBkqRlHY6-QBBb(uPNFD zyHm7e9jxqY6S1(+?{_z@TX$s;))xj9Hndq+gW9^qR#}IsEcDy05jL{*j*(zma`>&7 zY*ko0Zu}RL8M2R&N5?zJmADNdjat?G4&xF_#*^PDQ*j(d%q;B*l82{>DTe zofg*w_5U<4>Pm^FkPMXky%HZPsxXrolNVawnKfQ&;<%=!$>pUJw>_JRv{@8p{0iB! zfE~vgW_nz;xG3&qqSe}%rqmE>!Wda?`HP-$j14nw3=%zCN-@Di6*_ZV#fih|y3=yQ z{=Dn4#|>QJP0@FU>yXM8L8@jSoUB?EQz_WICcib^)hep4?isb#wgfoNBri!x@frfQ zld?5d^#A{aj(r)rx9I;9X2^G&(f`lj`$gb|;H&8U9{|4sHi4Iblfe(r{T~D$0@s3V z;MG8U{XY*r31of2TfueU3UEI75jKFo2JZ*gfj0p0>;DqGd<2MHK=}Fs{M`Uf246w< z|8ww8a5YZ30|z|C{QVcf zJ>Uj#J~#<{nfd#_1Wj-Tcn&xYJi$EvZvfdR@MYjW=JW3b*Mcj+yMc&4`N;={(a4U$ zWj-)8FWG!xn0}4q14BE6OD_CK>WI9J;#XBP1RDx%wuvFA(Hbn(tf86?lbRPR*sd!d z7$ygOkyxyF?6(?2%dUzWjujsYpHy%qy;3jk!0{=`FjmXXAfhu zhxnGY)BsmC#DBedP^>P{&FeKqHk(B$P3Ere5d;?-}_ zyN0Dv$=6M`$<4AoAzMlJS$O4k&00Iy=mdS%U!idpY%u$6J(LISinjEa#bB}Bz=k|L z5?7uaj%R{xK>J-3Z}70V(NW)H!l|Y}IQbqE4C4ZU%(}{gO=bAI7}rQb#}{J=E4dR` zeP@R&;zZ9H56R6nnMoIoolLg%CJ-~x)rQ9wzwF6565cF*n}yWKGbJl(g_`rKG*2*RTwmJjEo$CqP<;tk+4iZIfPu_ce}1 z^ZkXah2gD{h2f(AzZG5eR&-d=|Lys8|AfB(B)AW}9c%^1gGbQ!KL`$kdGK@KQFQ(H zfa}2qupS%_K7x)fdjAbz9-IPXZU0l?)8IYeE#S@I9B?Z5@96vYgFC>v;Opr8e+2^I zfK$NBz~|8W#TP*K{ujUgw-~+ui|F{j2`&WRLa)CITm-g)uc6QX3UI(LfImi$|0VE$ z$oqG|dx4U}vvS(&*k#HoJVSq`&SzEmXnWIjqib=g`J*>wjgahwP#DvYctiP}s`#d@ z3;Q@Gu>woo(W83EX57X9F0tG-en(|6oggoBgL;=&It;fnoX)t%N0*>VC0?6sc8(ZH zdzkoGX|XyjB`oFCH!Q2f&0*r>N+O~?EQ$m@3S`XFqe^Xzm=XyOlO87}vunqtyDpUu znGH7*wS{%XNGZldGHNF~#IW`II%Yvzv;sEF zSaN&5_`H!y>gEc3txVF+%Sm~4^(3^t6|R)am8nJ3!;p)9n>pc4Z+m)(GoTD zydpO}^qX5M*y~ z)f*4+p~zT@d`z=4l#z7TX!4Q1Ih>8;(<8{mgtJwuuyPX-B{hDXr@AO8QW-lg`vx`9 zp?cM2Q3j^k3v)iXNxcih@nZe#h-g z#z+}XFdSE*_a;|PMY7RNRSLA4=`XID3EQ2h4RKVsSYS3q)gj7(7}7&zBp7ofOFhii zI-}AOGL*IH6e&m3MElC|iP_R*2h7~9?(w*jB8lFs+;)uj)F=~@CqtMz zRyiB;B}^v9#Ya4aOxec{JI)zrIO|vRqgvV4HI$6LtRid6oRx3#I}U}Y^xQfzO6E5E zMd2vc_2|;f9dkWeXmLiXCJY3jV=q?DY9Yyw{hghFJXNJ5R8{$mS}QDt@>&C?N<+8M zs4%k0$GM6dbrgk~q~%y=+P@ob_h9*w?_u$Zs8VcXSBgTChWxf|S&3swDtx-FOPGkR z@=;Uy%U+%G(yy-;$Uqpw$9f4ER~_8FYKm`ELbV!4~lI;K%6r_kiC3zYY$78E`5%1;{>r z9|3Oymx1%aBk26U3J!wf!F}ldcZ0jY0{Cfg4;iTeS-<}vx_=M68vIZ6{_DX7;Bj>S zFMv0L7X#U!?_O{j_$oFLIrs0^zzmoMuK@)Sp?oq1sIeI*S4y>zvZR#Ig2!lEsbS90 z0Iy((XHt(8$I)R3BgxU5!@As>Xpb4q$T)aNxlfLDhO)Az)W(Qu4C!%F%9YY}(w~YW zbwY2kw{oS_RLhJ$PTm?=338=0S4!0(Rioz1@k(i9zUMdM5mR~$Xh!cxt6)EaTIoi8OhaB)pO=*DQ}5fEnQWAjr2CroS$uKY1;S5nrM<12y+i#67$QNMC_<|^!td& zq+c#V%%+1fUQREWM?4WEcO0g}1duAC)bZ^CEX0nn|qzl72n{amN^V_vn zKKxj&)BIvhNZ%-@0~mvlSTxfCE2uy$5@flGS3Y-A>HnhE?w5b)|GSj-&%S)<{CmNv z;EU+_w}Ll<6Tlv%75H0p`1gT#fa}1og13Ms*a6mqQ@{_= z@1Fp&Ki}Qp7H}BI9{sz(x!~u)@!%Wi{(lM{0S|+Rz%AgdU=ExM#6Iv1>;fMLi(nSW zUVi_G-2MXG1>Os821=G9$7C@wZ$NdpIO_mQ*;HZ{#1(1O0vJEDM8*F`JM2o_#6Fj! z1})ZK0ZC;=I3UUNO>U6CmCp{1$9yKbgf=Im z?uidr)w^#%Ofn@CXfha--+EgV_Tljwv+^%35CW`Sx-kQ(BWI-;vZxLnRGSWlc`lY) z+RQG=$Vc-QA8lHZ&0K_v%92h|jt=fG3_6RlfxKLdZunv&V8kH%xHv>E5VPVfL_BJE zK@u$1c>OlzP;cD6N?4@n$u^iWEOz|Pm9E_1pzcrG``y-{!wK*5!Gy1a*mC&;IaQ{2CbSu-}J);c|zxYRz#`oUM-$zC1EJK$z)$wQ{RxheI-Y#X540} zp1wPi&K4F8wrUaLL-}p?27bR5C^pL~(uVbwvuq9;L1W(In2eUh7(3_U_(My>RE=9a}D? z!s2bGN=(~=mEBui+e4*0>Po3a>9w@P6<-4NBwoiFZ5c1Kj!^{9Y?Cw`I6*?fQ@=tx zGZHV@N2oDzqn zBc6ccv70Q%bUc?ezh>pF?z<|+)6lRW$D+W%piTEwDwfM6Q5J$_n*vrHaeJIiA3Hvs z?y{0xYj)!{Ww$Pd(?RHsPd$!R)*Bn!)Ny&0nc$|&fyuMV6>&8N=uY~cc>+Lx_p%q zmzZUVd1N_KoI7JPw9A|Xz2@9V1=cgI&KtLp8`ixtHC>mZP-MvsE+ykjN5?HMwrp0^ zb`5bUNbkN{y%8Rm)brc8vWofHQR{HIDRZ?xt;CVxs_c+Go)>78B}!R{tiX?Su!Z4* zwb!N{vF$|Dud9S9QQ=OhHxzhBtwtKp#Zt2@8&@3D^vx{p_=E{GKIQ3|)v~@vHQ&%V zxWL}$((Uz>L$B^f99NeE$B!r*fi0!9aPTS#9bSG)Ze4jhuF@E!Y|CnH(28tr?R(YM zU`sXm9e3;@O+T0yUkqEh=pUkjA1`Ue^4LONzg9^nRz%pD7q!ddcm^bBnW)dG#NMd) zvk2+r?P)X8HuS4*Jiq!q$O}$3y|7HDUT+PP3wzl>#kP*!6HFC3q=yj>HciOj^$ zT}$GoR@z!mFX{Ezcp)l4hsis?y@tYL`AS(6v&;nE1E+%@VheZ-$oYTb4{#$m2$sMA$Qc0Q zAMitL1a?0FIsfl&@bAGgSOk}WOTalmd4RPb%=4{~0>ZQ#9N3A_n>A31*y+z$R7kTU~c559+O;bY)F z@J>(z)8HxW3ts_pe&7wD4>o}HUXDDQztwS%tjE=H zj;PO_aEvge6zoU06Ls9KEOSVoVw=&gP7cu+)3t6)1YNc3)cFyv6i3Ib8yv~BB7cm# zLa}4$E!HsB_;o$`kd#z*52^F4`$%~yWR=CF^SV@f5}_KoW(AjCbAR%=7G!2Osgu`K z#4c((%8~u&M+0)K+tx#jYwGH8s2QtbHioSg)Rm+*7tiY_7-ghx)740AW6xjv3+fo zGwC8(raQR_hi*>uA9FH0ZEo4FE)HPd^mdqOyE`MesKe8P&af36)cF}+(lRvi)F%NH zM0ENm+#~rN*7dTqM3!dtBNhi<$^pf;+Nd9?Iu<=jR5H>3 zPeG~sebF7!{}1pbAJPB+4m<|_4EzCbz{|mt==*XWz&k+=RKaV&so;0e@kRgN3|b-ne?PbiTmeo4vhSbh^v8j(8J+(3_zu9o1($;hft(R=JdiyB9|4~R z{{!3uJ`8>n$QpqO@LVAK1%3d$A6x>az?tBs;K!oRgHM9r0=Ix&;3eRG^!QJLo4_Iv zUEc?CF5n5^3+VH=gExSS!Aro&;H&8K9|Z%j1N{$J2ff9^TitP7ZVB4F z>UOisNznGMR_3%LW{;>cKc#mkHbjnu!^ZHIBP;dZ>TK4YIU%LmV7M@Q4QG&w-aNm| z{Bcpu3shS(#nGK^;=QHx=`8W}sQpoyENw70za0%BeTejVl&cr&kaCOOFU$_Cwt$TT zs>{)knH3`)ELF!g%PfuF7hzY=4|;x|Q_maQWc}=JwZb*+MUtviT-XTOcE!6?7R4q@ zZhZaMd&Lx_smMoDZx-<^NHi#R+Zk#?a*S<3#o%9@W17tNmHb=cP-X?Oe$ALyo}!=j zafu^9cI9_l94xMK@jarZ9bV!`D+C{QeOWca@S9ZfCF3U~6p1KdPlF>HB6@6Z@T@2T zycmb^GQ()BK6MVt*^UoMS@@Dya!hn0EqOXAd4DH|7BTN@l=%?JBa`}2ck zs9{E;GT9oaYTV%~I_2c&ijJA5kg8`|g3#JH6s~N7jn;iVR!B8KnQK-hk($^~oGWb- z+M<@X%LWvB#BWD5vgn-^N~oMFKbiu&UkxzsNZZoBGjS`!hO0aL zO4ieHj|Lr8#o?e?)O~|)&4;U1b9tz%LPR$t1-TFxp2YqilZT6nSIG#}Qhqj+u~R^V zRYj@F8ErI=Dn8b1SX1SNs3NL=x`g5e9aW-5Z}~92SHt6QDj$!WId1Hi&I{7YlE<~! z@}dC746Lh3bQjKo+Le{6dS6wmO3P7c7!T$WPesQOs%D@mMpX4GQ#a*-e9Xh)&E|AT z8RIG{dUDtAI9J(L15MbT6-be~y`H-aN5%z8G_r^pbvxm0r(d>BPRI~fI<21?9+S2t z8Goz2f%G%%&?^|q42j&frb=b}jK+jD+}IM7?<^zEm{|>7VwM1H<>}08#Kdy*+=A)2 zJ9Z-mYXDk(3L`4ltg@sh$(qK{qE``UxGS9ynFvL9`%GHs)O$vaj_EPtN!4g1^RLQm zC;I;j(7EpcqW{0jsG8qL&;K^~H25G`0t2uU%z#JH^<^)>L*SReh2S(GYXjZ}WPLyj zycV1Q9!Br~1d#mz4}k69B=CH2BKRihe-S(iJ`NN=hL`*+mq0FoTmrcSatY)T$R&_V zAeX@ZmlDuA&(ob_&mBAU`nx%$$Z1>M{({&I(uN(>)7-IByZGgfoyN?)76at!RuNU% Yj=&-!b#pa)NEBwgHAI5Nnb=?We`jhlU;qFB From 0f39c8a71a70697b842f8249ce50b6248ec09e6a Mon Sep 17 00:00:00 2001 From: RoseyGreenBlue Date: Sun, 14 Jul 2024 03:07:05 +0000 Subject: [PATCH 07/10] final changes to minigrid code - ready for testing --- syllabus/core/task_interface/task_wrapper.py | 6 +- syllabus/examples/custom_envs/__init__.py | 7 + .../custom_envs/obstructedmaze_fixedgrid.py | 232 ++++++++++++++++++ .../custom_envs/obstructedmaze_gamut.py | 185 ++++++++++++++ .../training_scripts/test_minigrid_wrapper.py | 160 ++++-------- 5 files changed, 476 insertions(+), 114 deletions(-) create mode 100644 syllabus/examples/custom_envs/__init__.py create mode 100644 syllabus/examples/custom_envs/obstructedmaze_fixedgrid.py create mode 100644 syllabus/examples/custom_envs/obstructedmaze_gamut.py diff --git a/syllabus/core/task_interface/task_wrapper.py b/syllabus/core/task_interface/task_wrapper.py index de938e37..e114a2b3 100644 --- a/syllabus/core/task_interface/task_wrapper.py +++ b/syllabus/core/task_interface/task_wrapper.py @@ -85,7 +85,7 @@ def __getattr__(self, attr): return env_attr -<<<<<<< HEAD +# <<<<<<< HEAD # class PettingZooTaskWrapper(TaskWrapper, BaseParallelWraper): # def __init__(self, env: pettingzoo.ParallelEnv): # super().__init__(env) @@ -102,7 +102,7 @@ def __getattr__(self, attr): # def get_current_task(self): # return self.current_task -======= +# ======= class PettingZooTaskWrapper(BaseParallelWrapper): def __init__(self, env: pettingzoo.ParallelEnv): super().__init__(env) @@ -177,4 +177,4 @@ def _task_completion(self, obs, rew, term, trunc, info) -> float: """ # return 1.0 if term or trunc else 0.0 return info ->>>>>>> b88c2fcba4658545e156188c85f48f0b1e54aab2 +# >>>>>>> b88c2fcba4658545e156188c85f48f0b1e54aab2 diff --git a/syllabus/examples/custom_envs/__init__.py b/syllabus/examples/custom_envs/__init__.py new file mode 100644 index 00000000..d80c350e --- /dev/null +++ b/syllabus/examples/custom_envs/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +from custom_envs.obstructedmaze_gamut import ObstructedMazeGamut diff --git a/syllabus/examples/custom_envs/obstructedmaze_fixedgrid.py b/syllabus/examples/custom_envs/obstructedmaze_fixedgrid.py new file mode 100644 index 00000000..89f9e53e --- /dev/null +++ b/syllabus/examples/custom_envs/obstructedmaze_fixedgrid.py @@ -0,0 +1,232 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +from gym_minigrid.minigrid import * +from gym_minigrid.roomgrid import RoomGrid +from gym_minigrid.register import register + +class ObstructedMazeEnvFixedGrid(RoomGrid): + """ + A blue ball is hidden in the maze. Doors may be locked, + doors may be obstructed by a ball and keys may be hidden in boxes. + """ + + def __init__(self, + num_rows, + num_cols, + num_rooms_visited, + seed=None + ): + room_size = 7 + max_steps = 4*num_rooms_visited*room_size**2 + + super().__init__( + room_size=room_size, + num_rows=num_rows, + num_cols=num_cols, + frame_rows=3, + frame_cols=3, + max_steps=max_steps, + seed=seed + ) + + def _gen_grid(self, width, height): + super()._gen_grid(width, height) + + # Define all possible colors for doors + self.door_colors = self._rand_subset(COLOR_NAMES, len(COLOR_NAMES)) + # Define the color of the ball to pick up + self.ball_to_find_color = COLOR_NAMES[0] + # Define the color of the balls that obstruct doors + self.blocking_ball_color = COLOR_NAMES[1] + # Define the color of boxes in which keys are hidden + self.box_color = COLOR_NAMES[2] + + self.mission = "pick up the %s ball" % self.ball_to_find_color + + def step(self, action): + obs, reward, done, info = super().step(action) + + if action == self.actions.pickup: + if self.carrying and self.carrying == self.obj: + reward = self._reward() + done = True + + return obs, reward, done, info + + def add_door(self, i, j, door_idx=0, color=None, locked=False, key_in_box=False, blocked=False): + """ + Add a door. If the door must be locked, it also adds the key. + If the key must be hidden, it is put in a box. If the door must + be obstructed, it adds a ball in front of the door. + """ + + door, door_pos = super().add_door(i, j, door_idx, color, locked=locked) + + if blocked: + vec = DIR_TO_VEC[door_idx] + blocking_ball = Ball(self.blocking_ball_color) if blocked else None + self.grid.set(door_pos[0]-vec[0], door_pos[1]-vec[1], blocking_ball) + + if locked: + obj = Key(door.color) + if key_in_box: + box = Box(self.box_color) if key_in_box else None + box.contains = obj + obj = box + self.place_in_room(i, j, obj) + + return door, door_pos + +class ObstructedMaze_1Dlhb(ObstructedMazeEnvFixedGrid): + """ + A blue ball is hidden in a 2x1 maze. A locked door separates + rooms. Doors are obstructed by a ball and keys are hidden in boxes. + """ + + def __init__(self, key_in_box=True, blocked=True, seed=None): + self.key_in_box = key_in_box + self.blocked = blocked + + super().__init__( + num_rows=1, + num_cols=2, + num_rooms_visited=2, + seed=seed + ) + + def _gen_grid(self, width, height): + super()._gen_grid(width, height) + + self.add_door(0, 0, door_idx=0, color=self.door_colors[0], + locked=True, + key_in_box=self.key_in_box, + blocked=self.blocked) + + self.obj, _ = self.add_object(1, 0, "ball", color=self.ball_to_find_color) + self.place_agent(0, 0) + +class ObstructedMaze_1Dl(ObstructedMaze_1Dlhb): + def __init__(self, seed=None): + super().__init__(False, False, seed) + +class ObstructedMaze_1Dlh(ObstructedMaze_1Dlhb): + def __init__(self, seed=None): + super().__init__(True, False, seed) + +class ObstructedMaze_Full(ObstructedMazeEnvFixedGrid): + """ + A blue ball is hidden in one of the 4 corners of a 3x3 maze. Doors + are locked, doors are obstructed by a ball and keys are hidden in + boxes. + """ + + def __init__(self, agent_room=(1, 1), key_in_box=True, blocked=True, + num_quarters=4, num_rooms_visited=25, seed=None): + self.agent_room = agent_room + self.key_in_box = key_in_box + self.blocked = blocked + self.num_quarters = num_quarters + + super().__init__( + num_rows=3, + num_cols=3, + num_rooms_visited=num_rooms_visited, + seed=seed + ) + + def _gen_grid(self, width, height): + super()._gen_grid(width, height) + + middle_room = (1, 1) + # Define positions of "side rooms" i.e. rooms that are neither + # corners nor the center. + side_rooms = [(2, 1), (1, 2), (0, 1), (1, 0)][:self.num_quarters] + for i in range(len(side_rooms)): + side_room = side_rooms[i] + + # Add a door between the center room and the side room + self.add_door(*middle_room, door_idx=i, color=self.door_colors[i], locked=False) + + for k in [-1, 1]: + # Add a door to each side of the side room + self.add_door(*side_room, locked=True, + door_idx=(i+k)%4, + color=self.door_colors[(i+k)%len(self.door_colors)], + key_in_box=self.key_in_box, + blocked=self.blocked) + + corners = [(2, 0), (2, 2), (0, 2), (0, 0)][:self.num_quarters] + ball_room = self._rand_elem(corners) + + self.obj, _ = self.add_object(*ball_room, "ball", color=self.ball_to_find_color) + self.place_agent(*self.agent_room) + +class ObstructedMaze_2Dl(ObstructedMaze_Full): + def __init__(self, seed=None): + super().__init__((2, 1), False, False, 1, 4, seed) + +class ObstructedMaze_2Dlh(ObstructedMaze_Full): + def __init__(self, seed=None): + super().__init__((2, 1), True, False, 1, 4, seed) + + +class ObstructedMaze_2Dlhb(ObstructedMaze_Full): + def __init__(self, seed=None): + super().__init__((2, 1), True, True, 1, 4, seed) + +class ObstructedMaze_1Q(ObstructedMaze_Full): + def __init__(self, seed=None): + super().__init__((1, 1), True, True, 1, 5, seed) + +class ObstructedMaze_2Q(ObstructedMaze_Full): + def __init__(self, seed=None): + super().__init__((1, 1), True, True, 2, 11, seed) + +register( + id="MiniGrid-ObstructedMaze-1Dl-fixed_grid-v0", + entry_point=f"{__name__}:ObstructedMaze_1Dl" +) + +register( + id="MiniGrid-ObstructedMaze-1Dlh-fixed_grid-v0", + entry_point=f"{__name__}:ObstructedMaze_1Dlh" +) + +register( + id="MiniGrid-ObstructedMaze-1Dlhb-fixed_grid-v0", + entry_point=f"{__name__}:ObstructedMaze_1Dlhb" +) + +register( + id="MiniGrid-ObstructedMaze-2Dl-fixed_grid-v0", + entry_point=f"{__name__}:ObstructedMaze_2Dl" +) + +register( + id="MiniGrid-ObstructedMaze-2Dlh-fixed_grid-v0", + entry_point=f"{__name__}:ObstructedMaze_2Dlh" +) + +register( + id="MiniGrid-ObstructedMaze-2Dlhb-fixed_grid-v0", + entry_point=f"{__name__}:ObstructedMaze_2Dlhb" +) + +register( + id="MiniGrid-ObstructedMaze-1Q-fixed_grid-v0", + entry_point=f"{__name__}:ObstructedMaze_1Q" +) + +register( + id="MiniGrid-ObstructedMaze-2Q-fixed_grid-v0", + entry_point=f"{__name__}:ObstructedMaze_2Q" +) + +register( + id="MiniGrid-ObstructedMaze-Full-fixed_grid-v0", + entry_point=f"{__name__}:ObstructedMaze_Full" +) \ No newline at end of file diff --git a/syllabus/examples/custom_envs/obstructedmaze_gamut.py b/syllabus/examples/custom_envs/obstructedmaze_gamut.py new file mode 100644 index 00000000..56efa0d2 --- /dev/null +++ b/syllabus/examples/custom_envs/obstructedmaze_gamut.py @@ -0,0 +1,185 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +import gym +from gym_minigrid.register import register + +from custom_envs.obstructedmaze_fixedgrid import ObstructedMazeEnvFixedGrid + + +ALL_SUBENVS = [ + 'MiniGrid-ObstructedMaze-1Dl-fixed_grid-v0', + 'MiniGrid-ObstructedMaze-1Dlh-fixed_grid-v0', + 'MiniGrid-ObstructedMaze-1Dlhb-fixed_grid-v0', + 'MiniGrid-ObstructedMaze-2Dl-fixed_grid-v0', + 'MiniGrid-ObstructedMaze-2Dlh-fixed_grid-v0', + 'MiniGrid-ObstructedMaze-2Dlhb-fixed_grid-v0', + 'MiniGrid-ObstructedMaze-1Q-fixed_grid-v0', + 'MiniGrid-ObstructedMaze-2Q-fixed_grid-v0', + 'MiniGrid-ObstructedMaze-Full-fixed_grid-v0' +] + +TILE_PIXELS = 32 + + +class ObstructedMazeGamut(gym.Env): + def __init__(self, distribution='easy', max_difficulty=None, seed=1337): + + self.distribution = distribution + if distribution == 'easy': + self.max_difficulty = 3 + elif distribution == 'medium': + self.max_difficulty = 6 + elif distribution == 'hard': + self.max_difficulty = 9 + else: + raise ValueError(f'Unsupported distribution {distribution}.') + + if max_difficulty is not None: + self.max_difficulty = max_difficulty + + self.subenvs = [] + for env_name in ALL_SUBENVS[:self.max_difficulty]: + self.subenvs.append(gym.make(env_name)) + + self.num_subenvs = len(self.subenvs) + + self.seed(seed) + self.reset() + + @property + def actions(self): + return self.env.actions + + @property + def agent_view_size(self): + return self.env.agent_view_size + + @property + def reward_range(self): + return self.env.reward_range + + @property + def window(self): + return self.env.window + + @property + def width(self): + return self.env.width + + @property + def height(self): + return self.env.height + + @property + def grid(self): + return self.env.grid + + @property + def max_steps(self): + return self.env.max_steps + + @property + def see_through_walls(self): + return self.env.see_through_walls + + @property + def agent_pos(self): + return self.env.agent_pos + + @property + def agent_dir(self): + return self.env.agent_dir + + @property + def step_count(self): + return self.env.step_count + + @property + def carrying(self): + return self.env.carrying + + @property + def observation_space(self): + return self.env.observation_space + + @property + def action_space(self): + return self.env.action_space + + @property + def steps_remaining(self): + return self.env.steps_remaining + + def __str__(self): + return self.env.__str__() + + def reset(self): + return self.env.reset() + + def seed(self, seed=1337): + env_index = seed % self.num_subenvs + self.env = self.subenvs[env_index] + self.env.seed(seed) + + def hash(self, size=16): + return self.env.hash(size) + + def relative_coords(self, x, y): + return self.env.relative_coords(x, y) + + def in_view(self, x, y): + return self.env.in_view(x, y) + + def agent_sees(self, x, y): + return self.env.agent_sees(x, y) + + def step(self, action): + return self.env.step(action) + + def gen_obs_grid(self): + return self.env.gen_obs_grid() + + def gen_obs(self): + return self.env.gen_obs() + + def get_obs_render(self, obs, tile_size=TILE_PIXELS//2): + return self.env.get_obs_render(obs, tile_size) + + def render(self, mode='human', close=False, highlight=True, tile_size=TILE_PIXELS): + return self.env.render(mode, close, highlight, tile_size) + + def close(self): + return self.env.close() + + +class ObstructedMazeGamut_Easy(ObstructedMazeGamut): + def __init__(self, seed=1337): + super().__init__(distribution='easy', seed=seed) + +class ObstructedMazeGamut_Medium(ObstructedMazeGamut): + def __init__(self, seed=1337): + super().__init__(distribution='medium', seed=seed) + +class ObstructedMazeGamut_Hard(ObstructedMazeGamut): + def __init__(self, seed=1337): + super().__init__(distribution='hard', seed=seed) + + +register( + id="MiniGrid-ObstructedMazeGamut-Easy-v0", + entry_point=f"{__name__}:ObstructedMazeGamut_Easy" +) + +register( + id="MiniGrid-ObstructedMazeGamut-Medium-v0", + entry_point=f"{__name__}:ObstructedMazeGamut_Medium" +) + +register( + id="MiniGrid-ObstructedMazeGamut-Hard-v0", + entry_point=f"{__name__}:ObstructedMazeGamut_Hard" +) diff --git a/syllabus/examples/training_scripts/test_minigrid_wrapper.py b/syllabus/examples/training_scripts/test_minigrid_wrapper.py index 5b263e75..80c19e78 100644 --- a/syllabus/examples/training_scripts/test_minigrid_wrapper.py +++ b/syllabus/examples/training_scripts/test_minigrid_wrapper.py @@ -149,62 +149,12 @@ def thunk(): return thunk -def make_env(env_id, seed, curriculum=None, start_level=0, num_levels=1): - def thunk(): - env = openai_gym.make(f"procgen-{env_id}-v0", distribution_mode="easy", start_level=start_level, num_levels=num_levels) - env = GymV21CompatibilityV0(env=env) - if curriculum is not None: - env = ProcgenTaskWrapper(env, env_id, seed=seed) - env = MultiProcessingSyncWrapper( - env, - curriculum.get_components(), - update_on_step=False, - task_space=env.task_space, - ) - return env - return thunk - - def wrap_vecenv(vecenv): vecenv.is_vector_env = True vecenv = VecMonitor(venv=vecenv, filename=None, keep_buf=100) vecenv = VecNormalize(venv=vecenv, ob=False, ret=True) return vecenv - -# def slow_level_replay_evaluate( -# env_name, -# policy, -# num_episodes, -# device, -# num_levels=0 -# ): -# policy.eval() -# -# eval_envs = ProcgenEnv( -# num_envs=1, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False -# ) -# eval_envs = VecExtractDictObs(eval_envs, "rgb") -# eval_envs = wrap_vecenv(eval_envs) -# eval_obs, _ = eval_envs.reset() -# eval_episode_rewards = [] -# -# while len(eval_episode_rewards) < num_episodes: -# with torch.no_grad(): -# eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) -# -# eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) -# for i, info in enumerate(infos): -# if 'episode' in info.keys(): -# eval_episode_rewards.append(info['episode']['r']) -# -# mean_returns = np.mean(eval_episode_rewards) -# stddev_returns = np.std(eval_episode_rewards) -# env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] -# normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) -# policy.train() -# return mean_returns, stddev_returns, normalized_mean_returns - def level_replay_evaluate_minigrid( env_name, policy, @@ -221,7 +171,7 @@ def level_replay_evaluate_minigrid( curriculum=curriculum if args.curriculum else None ) # for i in range(args.num_envs) - for i in range(args.num_envs) + for i in range(num_episodes) ] ) eval_envs = wrap_vecenv(eval_envs) @@ -235,9 +185,13 @@ def level_replay_evaluate_minigrid( eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) # len(infos) = 64 # num_episodes = 10 + # print("info length: %d"%len(infos)) + # print("num_episode length: %d"%num_episodes) + sys.stdout.flush() for i, info in enumerate(infos): if 'episode' in info.keys() and eval_episode_rewards[i] == -1: eval_episode_rewards[i] = info['episode']['r'] + print(f"level replay eval works! {eval_episode_rewards[i]}") # print(eval_episode_rewards) mean_returns = np.mean(eval_episode_rewards) @@ -250,41 +204,6 @@ def level_replay_evaluate_minigrid( return mean_returns, stddev_returns, normalized_mean_returns -# def level_replay_evaluate( -# env_name, -# policy, -# num_episodes, -# device, -# num_levels=0 -# ): -# policy.eval() -# -# eval_envs = ProcgenEnv( -# num_envs=args.num_eval_episodes, env_name=env_name, num_levels=num_levels, start_level=0, distribution_mode="easy", paint_vel_info=False -# ) -# eval_envs = VecExtractDictObs(eval_envs, "rgb") -# eval_envs = wrap_vecenv(eval_envs) -# eval_obs, _ = eval_envs.reset() -# eval_episode_rewards = [-1] * num_episodes -# -# while -1 in eval_episode_rewards: -# with torch.no_grad(): -# eval_action, _, _, _ = policy.get_action_and_value(torch.Tensor(eval_obs).to(device), deterministic=False) -# -# eval_obs, _, truncs, terms, infos = eval_envs.step(eval_action.cpu().numpy()) -# for i, info in enumerate(infos): -# if 'episode' in info.keys() and eval_episode_rewards[i] == -1: -# eval_episode_rewards[i] = info['episode']['r'] -# -# # print(eval_episode_rewards) -# mean_returns = np.mean(eval_episode_rewards) -# stddev_returns = np.std(eval_episode_rewards) -# env_min, env_max = PROCGEN_RETURN_BOUNDS[args.env_id] -# normalized_mean_returns = (mean_returns - env_min) / (env_max - env_min) -# policy.train() -# return mean_returns, stddev_returns, normalized_mean_returns - - def make_value_fn(): def get_value(obs): obs = np.array(obs) @@ -301,18 +220,42 @@ def print_values(obj): if __name__ == "__main__": + args = parse_args() + env_name = "MiniGrid-MultiRoom-N4-Random-v0" + args.env_id = env_name run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.backends.cudnn.deterministic = args.torch_deterministic - device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu") + print("Device:", device) + if args.track: + import wandb + + wandb.init( + project=args.wandb_project_name, + entity=args.wandb_entity, + sync_tensorboard=True, + config=vars(args), + name=run_name, + monitor_gym=True, + save_code=True, + dir=args.logging_dir + ) + # Curriculum setup curriculum = None + + writer = SummaryWriter(os.path.join(args.logging_dir, "./runs/{run_name}")) + writer.add_text( + "hyperparameters", + "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), + ) + if args.curriculum: print("args:\n--------------") print(f"{args}\n-------------\n") @@ -321,10 +264,6 @@ def print_values(obj): # sample_env = GymV21CompatibilityV0(env=sample_env) # sample_env = ProcgenTaskWrapper(sample_env, args.env_id, seed=args.seed) - env_name = "MiniGrid-ObstructedMaze-Full-v0" - args.env_id = env_name - - sample_env = openai_gym.make(env_name) sample_env = FullyObsWrapper(sample_env) sample_env = ImgObsWrapper(sample_env) @@ -335,30 +274,31 @@ def print_values(obj): if args.curriculum_method == "plr": print("Using prioritized level replay.") + task_sampler_kwargs_dict = {"strategy": "value_l1", "temperature":0.1, "staleness_coef":0.3} curriculum = CentralizedPrioritizedLevelReplay( sample_env.task_space, num_steps=args.num_steps, num_processes=args.num_envs, gamma=args.gamma, gae_lambda=args.gae_lambda, - task_sampler_kwargs_dict={"strategy": "value_l1"} + task_sampler_kwargs_dict=task_sampler_kwargs_dict ) - elif args.curriculum_method == "dr": - print("Using domain randomization.") - curriculum = DomainRandomization(sample_env.task_space) - elif args.curriculum_method == "lp": - print("Using learning progress.") - curriculum = LearningProgressCurriculum(sample_env.task_space) - elif args.curriculum_method == "sq": - print("Using sequential curriculum.") - curricula = [] - stopping = [] - for i in range(199): - curricula.append(i + 1) - stopping.append("steps>=50000") - curricula.append(list(range(i + 1))) - stopping.append("steps>=50000") - curriculum = SequentialCurriculum(curricula, stopping[:-1], sample_env.task_space) + # elif args.curriculum_method == "dr": + # print("Using domain randomization.") + # curriculum = DomainRandomization(sample_env.task_space) + # elif args.curriculum_method == "lp": + # print("Using learning progress.") + # curriculum = LearningProgressCurriculum(sample_env.task_space) + # elif args.curriculum_method == "sq": + # print("Using sequential curriculum.") + # curricula = [] + # stopping = [] + # for i in range(199): + # curricula.append(i + 1) + # stopping.append("steps>=50000") + # curricula.append(list(range(i + 1))) + # stopping.append("steps>=50000") + # curriculum = SequentialCurriculum(curricula, stopping[:-1], sample_env.task_space) else: raise ValueError(f"Unknown curriculum method {args.curriculum_method}") curriculum = make_multiprocessing_curriculum(curriculum) @@ -367,8 +307,6 @@ def print_values(obj): # env setup print("Creating env") - # dummy_env = env_test[0]() - envs = gym.vector.AsyncVectorEnv( [ make_env_minigrid( @@ -379,8 +317,8 @@ def print_values(obj): for i in range(args.num_envs) ] ) - next_obs, _ = envs.reset() envs = wrap_vecenv(envs) + next_obs, _ = envs.reset() assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" agent = MinigridAgentVerma( From 7ba1e0677616531fb09c725b46e2a650c5db33d7 Mon Sep 17 00:00:00 2001 From: RoseyGreenBlue Date: Wed, 7 Aug 2024 17:24:52 -0700 Subject: [PATCH 08/10] ready for testing --- tests/cleanrl_cartpole_test.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 tests/cleanrl_cartpole_test.sh diff --git a/tests/cleanrl_cartpole_test.sh b/tests/cleanrl_cartpole_test.sh old mode 100755 new mode 100644 From 196959b22c289800b33eb3327d0a54c4c8755116 Mon Sep 17 00:00:00 2001 From: RoseyGreenBlue Date: Wed, 7 Aug 2024 17:25:55 -0700 Subject: [PATCH 09/10] model files --- .../__pycache__/__init__.cpython-38.pyc | Bin 0 -> 256 bytes .../__pycache__/__init__.cpython-39.pyc | Bin 0 -> 256 bytes .../__pycache__/minigrid_model.cpython-38.pyc | Bin 0 -> 6373 bytes .../__pycache__/minigrid_model.cpython-39.pyc | Bin 0 -> 6359 bytes .../minigrid_model_verma.cpython-38.pyc | Bin 0 -> 6538 bytes .../minigrid_model_verma.cpython-39.pyc | Bin 0 -> 6536 bytes .../__pycache__/procgen_model.cpython-38.pyc | Bin 0 -> 12396 bytes .../__pycache__/procgen_model.cpython-39.pyc | Bin 0 -> 12385 bytes .../examples/models/minigrid_model_verma.py | 185 ++++++++++++++++++ 9 files changed, 185 insertions(+) create mode 100644 syllabus/examples/models/__pycache__/__init__.cpython-38.pyc create mode 100644 syllabus/examples/models/__pycache__/__init__.cpython-39.pyc create mode 100644 syllabus/examples/models/__pycache__/minigrid_model.cpython-38.pyc create mode 100644 syllabus/examples/models/__pycache__/minigrid_model.cpython-39.pyc create mode 100644 syllabus/examples/models/__pycache__/minigrid_model_verma.cpython-38.pyc create mode 100644 syllabus/examples/models/__pycache__/minigrid_model_verma.cpython-39.pyc create mode 100644 syllabus/examples/models/__pycache__/procgen_model.cpython-38.pyc create mode 100644 syllabus/examples/models/__pycache__/procgen_model.cpython-39.pyc create mode 100644 syllabus/examples/models/minigrid_model_verma.py diff --git a/syllabus/examples/models/__pycache__/__init__.cpython-38.pyc b/syllabus/examples/models/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..214f3fd7422c888eb24fd19e22a9627557da8ff4 GIT binary patch literal 256 zcmWIL<>g`kf^Yj3(rSS8V-N=!FabFZKwK;XBvKes7;_kM8KW2(L2RZRrd;MIW+0n6 zm_d`}B_mLYCgUw$-^{$s^rFlZ$Mn>^5}*K2Kv8}&kPYVhX|hD|g`kf^Yj3(rSS8V-N=!FabFZKwK;XBvKes7;_kM8KW2(L2RZRrd;MIW+0n6 zm_d`}B_mLYCgUw$-^{$s^rFlZ$Mn>^5}*K2Kv8}&kPYVhX|hD|7E&n9ozfCCJ-wkA(IdrMIa#vo82t&CMY|K;snqLXtlfAG)kYEr~nTX3$ob zOWH?T+m;pSAg@SQR*^fhChN%E+nQXEjU!DqLVd5gS35M?b+j$YCA2Mt=H3EYYG`>z zE~906+|oeHid;p@YS_StMJcwl=H+AHrL~R)U_i*8d{m}%3n^Oz7>I$kr|pS>A;lwI zXagN}eNP+;lr=1+w;qea?D$dAGz$CUFzW2)g(>~qFU;-8&zfdocT?Gm!*;Fg^`a!o z+b(r-QPGBvHRU4v=i3`Jb;G|Os;<9r=jI3RZEQV=V}HAsZDc2f@WAix$6>aCg~J#b zR_v%qdR$-Me^8jThm8d@0-9J7b)jk~1$5+-kIEJjpB8-7a-1m_IIk{+)Q_}711B_E zXC@KGTS!@hC+&$L$Rfo^-_joIEk)}-HVQpSc+bKO(j?3MB=6wW z73vD$nbTCz$Rzj*3iBi&atsY=jKrY{VwnFCF%<2s=vZor0I~S=;A$CFN@@k5{~Tkh z!nk+N!3&c*n&#>86)~)6fIdMl_Vdsq8B~{Ox!G-s>gmFSMMg+zLKnJbHUWSL=g}V{ZLoek%J@yv1g6QGKf7}io34rb9;sB`Uv*po;jZ7 zpvT>zx#nby1(K4DIhlRhW6SYuHeY7{`RW?JccCPaA4mOgt<_p9txU!}r4A}4P!$4@ zMXZ29SZa=aD^2ddE=%oPJn-*Fq4Ii3w3Di?r|7VXltL{66t%bfJlJia>vdVUw^iEP zr%F>VD&s8`QF31DYX1Iy{D3W-M z)?}3BD%$SlQJSo!+k0V<^OzX{Aah0tQOZbza_vwXijg)1N+a?odh7TJK6n5t>+OfC zurs#Mh1*NAd%ZC1hvX%)Fy2A#dR`FwS>}0DwxEf1H)w;>QuVLQW4~T58E`)a(sv@7 zjM>-GP+~TKJtoXXVrZajLU;sO=b=TdLx?70ZM4+aFo(@`v!Y%`xp3mN>hlf90Pj08l}ZQ(pIbw|^` zFb(~4)6_rHpY*StIp}2S87@p*GjXAG%;cz~@d%y=wwcfKAtcj~QMahv4a0pDP`Btv zSz-IC)AbM3tC&77uO>Oq@1o;`1DDPqMk63tj^`zQH}t%s>XFNZZ%4WAdG~sLJnpG@ zo=gL9nE0x`Mc_LC#lo2PK^Vs;{Aa2${fKnxB>+M&a{wG+8g--Ya%+HwnUDBRUq-yU zNKGWEh}O1PHEhtyE^1Rc&|iPLR=hkhEv5PdC!5m7RHg`na@N%x@xwqn)Q9>=6IyQM z=Fmb61?!cYdj?rWiYqASdt?qYuOcaivi{dj^h+d03y(d_t91ph#e#4dFOO0&YnRpzV)JyVMkg4SkC8 z3>r&=S&$x=+N++S1{OaCthp69fscJ9@1QRuh0}mne`Ok@@d(%q(iimxf$tHxPJj-k z-XZWNfz#zrMf?UjsFR~p7V#<*J3!ma$NQrt`w76bfXkJM^>^E5IJWNEflI zz$Kz8YY=@$uJYwUwB7Q;(5{x(gm#UUq7E6nR5b2@&lE<#pT<$}p#NpoiV!0ibzU#+vD1di?mhRBLii71O*yaP!rr^!O~6lpgx!yJE}mT9PqyKCGDY&+(BL$*wVeK$?7$v ztJ(nfs?*x4Hn5O1&xI{T@~YU!%xi#kv{We^Ii$T8M%oAwzSp?8KVPI-P;8gxj0}uk z%Ab+O03i;_%lQfUO zio71TH7}QX%IeNy$M=HjRtTS)P-vvqFt#v12;cjtaPRmB@26>uD5e^9{Z8oNT5`X= zT9(7^b|__(bUbo&?Z#}KqeAr-FZLm#(V_~!mnv`D&qBUb6n4NOQrZlY;|hvw)K_Va zl`Fa+I;Dkh;flO$*WV9I12J}?h3Ttcx2VyY-d(6*Cu=u$BD&oKyMB`3rdNohjJ0OH zEvwv*uu(>;NVWtzr7!6wVlDVwhA92Rb{*m9FFY{~$1o9n*-oG-d z1QU~a#uY=jOXEW-%2BD_+ANj*JnveMGEap;Pbo0w6HF<+tlociZo}zu|IViU#Vv|| z)15VyB;M}9ItRTMBl7G73vV})GE6+&OmTZ6uK#j^m(hP^4zH(7yBJNf2f6kMo<3k< zL<`1vrIqII(&fDiI)w6VrBZcXIKl?>R}*|h|MDC@V;j6Vf8!9j3|-%EtJfGiIP3g)#kt+S|eDH#6bzRh-}GTg>~PLdm%55Fpq=MVzJCU3Mfe< zi#kDXyw{XxV#jA-^sj+TM%PbGm~eY_B>YgMs_q!JfPyT~@d1-&95WfO2@6R1!2%dS zumC|gO8SHa)QUQV;>yRs#|D#3ZKKST8b=f=q=xfcwc81EkIy~A5RMm5@a|uo!*eop zMqtP}Pq6(lD*v^>Kv?*M5E2rWVFlvhl;W|2S0p|m3?KJ)uS+43h;+Ohp8Xbs%G>A+lfiDi|NI=;=I6pC zk+*^>Z#EY$41Wp-=-?ihKTtm-j64GG5Fn*g34whA7e+|r(mp`SD7J$&Hyf`aJ~WU_ z$EdD2wxSV4ubGde?*>vvuNbHC62;tY15c|67x37MryJNKr&u^S?9C2zVn&Qfktc$& zh6;-?$CtQKo>&ot`Ve8hO)`U37bDee0<%ouBmV*I9Pv>^Sr@0ihPPjzNN8(w=1rEE zXd&IA=|=!hM+Ko388kw5%An=e(1v6}DoG2+HHRb?b&^>!XU@P-2k5K9(ACiH;C2g_8@g zniY7J(rPT@tS+-OE>?+xW0mj|Sz3laV#?T}o!Ya8zJx%4!h?q1e_=)nm<{ub;DKWk z4{dYNK8taWP&!3~OT;my0De!KlNcfHu(+iu%15cC!ouBX|AC^XA(a6X4pVF$kH!?m zmA5T=ujS{-G5~v##w|=g+GHGX@ujA)@9* OW8+_3U0q&T4*mrT$z=cl literal 0 HcmV?d00001 diff --git a/syllabus/examples/models/__pycache__/minigrid_model.cpython-39.pyc b/syllabus/examples/models/__pycache__/minigrid_model.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..34242b83b60ba3a3b47b034b90328149eadc55ff GIT binary patch literal 6359 zcmcIo%Wot{8Sm=X^gKLvZ100jAXY>|CJ!4$AR!2w-7N7YC_9Sc1kea*wY%Em_V#pp zt9v%yp;tn&MGj~=@E6RS&|Y$b3x5FrL*Ec5pE-e&EWhvT@pxuzuK+=}>Z^CvSKqtJ zTUe-R`103o?EGa_)BZ-4>CZ&v22%Qa0I5k4X&u42-qATXdS=HIn!F?pX&#A=B`s+m zX&qZuq=UR7tFngNk#*TXUcIf!1=&2(WHZqBYJ2rVqtig!qFh4TQef^aprwwMXXG+k zmd7nkw5-Tgw5$eAj98RnOKV*|23}hGSOEHj?8!%EO1F^GHGo{?+Mc#2azlzoy3leR zb$w493Y0Z0rMDl8!t8os+%gLLlOXKwW`!xe%qz_8&`VopVfPZ*kAhCU>~+I9%sN%- zA2qvX?(Pd+MJNPJrGQOj_qOyIn_6jDFZ4h@{pXrGxx z7;hn^O`fzTh9HX+BYjJIthW`d``9S-IOaWzs-MJZ=Ea!{T8RTbX$eS-3g^wp>upQ# z`T}ZyN74pY2!D6|%_#A`D7}tCIih9oii-9v>WRvBlWr1w5ii`EU)bVR8W@}Ew5(89 z0MDGJf<`96H&B=-0g+>9NMj@pO%TKUkBFgYZ$-yaO9Y6;rw3Qds8UiZ0E6ckTNOn8 za}HjZ)X_3ekFSVfMHBS#`;nIgF3F&}M9a-?Q`AluCM?Dxp^K|e)HT#ErpI&WJwp#; zX#W4Gah1@Yq6UO!_Z(^*6FXbhJTdfmzcAB#DtihUc*V<@J|V;9CyE$9Lk3!Zq8YzN zCD(qW{o?*Z0dfe4gnp>2>&QWjTUrW|L(Op0BOpdk=~cdQmtC*4pj0(z2x7Q|g^!($v88G-RdogHl`U zTSQR43yiC!<|I+Tt$afqzGyeps2s?W&Um(U2n*udRry^eX6wdqB7o6 z5e?@ht?upbM-SMlxgQmSBXN)W9cTfn}VZd34j`5kc z18XFWBd23>*_0N_a2T>eF5@$p>`n#B=@d(w;Xxp8K^?k@3ZWZ=7Sm>`Wle@@ro!!h z7AEmpvb`7h8IPF}05V~O5T%r~CeseJp%`gHpfn;MqPLHq;Dgn-(*Ay+3Oi-1TvYpU zdaoY@gMd6k8bmwDtFG%uUYfe@l>WD{?gnj8TBpI4dFAKZlQDY@4JBqd zY%pOq5<>%J6T&0NA`dNU9YQo2Yoo2cg*j}ZTNU*(%7qgpU3Xt4+eKxJcws_1J^}I& zv%*OGJ@pC(wsb}N6&24X+NJ7sT8WJf+wtnjI_VqeNXb5Fx-Fb1re6EXH1sb`Q~yGL zGPri;n3IWTcrbCs#DmfmlWUU3BlsFvV?NDCkV`{`y`pkA2=-Ax-J%1fh3%EpJ~R_Bf_bd00_a<0dRzAG>k@-TYWUle8hG7QsUf2N+L-?w2sA! zVS`R~QJ>O)!TQs6;^m29DV4`R*_1Y>GQ|{>v!-T<7;^1UAL=7bXql0jLkm$8Y*%LP z8DtYFlAti}k(q05MN-sc{jc5VdqAgj94}3sbloj&(K#-p`>WN@Z~t|0^p>KNv>a9i zbpvJMHhWC>)V~z=`(Ye-s&u+`#1#Z1tPi!g-@}akewG$>DkuGnDs%dwZesCUNE|h3 zqN(e!g*JZui^1x7Sf9~*LX}q_-O9Bg+{Q@2DTy4q{;0G{Em7Fer!Y&QuQZqi>2ayM z>KSTa@ngVxTY(Gs#8dJP`chIj4R{S!rZF0ifW;tvQEw9XA%W`z=wRwy0&fvGUH(+W zZ=i!Z+3T{1*O=Ho+Gal9A1&$60j339u1suywPOZj>whRZHp-5yB3c!=Hq>MtqVLF6 zzAlJPwY(y9YURbCQ)i`UKn5=r%{$;T1Zn_8cd}{2d0)=wG`^wj}hcxlwsU; z$<1||vvrOM)!V$-M~Fg;8oXYj+-)xn_!?2zK8r|cGfa*uD6Uc8q&Zfm=t}667NQDQ z&nkPx-q=oz`^kLIt~Nr@0f-ea7GQ;uv?kLd0dHHS23x<$j8dQc^{- zCD18-NjDK`!Q(PS=^u905sv=C6Vq@E6S0^5m2Gn_nqS&>LvQGVE3-;4F_~vvF@(D` zKBS@?lj`lwQrXY*u8lBtRp9rP0%JbKl+w%UgJ@R6}`ZVF_~SdJ$9oD zGRW`(0`r{03bWTLnxz8n1YXuxL0VY%y{I4X;t2CNBrO)p+@*k$G_q(AbjN#5c_wyz z21fr5$fR`b)PxDQ*FeG#MXKqJVGAh8@*E#9X~r>=@tUxJlpidB0R#&Wf}^BQSU|0) zQy{K9418=b$<#K=OsR21p#o|+&sA5uLFV$gM;OBK;tAe^t8;ixhRz5KIp+zsKSSle z78nQ%pAbSq!ZNHtJe*QIcJOY*Cxihcp(sx{ClDMW`_wA|T2+>S^5`M8^7*?|+9g0j zR4h&Lmjirtm-ThJTOT|>N4EL7a7pB?pvs%Ag$u)< z!T~zC1!fP_F9{=;z`F!UDOF5hpTLC?61lVwk@(&OYi>4QMSN%=nT}Cgaco5+h+Z=v zN#6~ml-@8-<0Xo@I|d$85ia0)6;C#>M^3SDa@d<)=){Z|lOj(9V+|D+VU90xpFFW5 z2=yVte3xVfs~$$G+XQBrz(@WQ+BxE*h_WG0eGPBFK9SJ&=FBTBG0{T0MbnP}o{kDa zD>7(=>Xbpttf39bgjA9ij%yA{Eb1h)WX_!2PzUI%!O+#wUd8(^nKugt&f22aUapa` zE9JRyp*+LoC-CO#+rW3~4oD0u4A>*}J(R|Fu9WQ%4e;vf5w!tMLV@;O??T00EGuleelAJ6fhg+7s2z!CLY-4qJ0+QKB07q z2$zUsN&)*BPI-ZFsiYsqh^g_$el4St) zB8^*^ezHk9;NnY7Vc+4qIBN!Pb{<}MUoNlg*C~QY>D>y*iH1?L9n-P#FRrdGuPpoj E0*zc`7XSbN literal 0 HcmV?d00001 diff --git a/syllabus/examples/models/__pycache__/minigrid_model_verma.cpython-38.pyc b/syllabus/examples/models/__pycache__/minigrid_model_verma.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c4275b5b4b174268824aa3f39a708ba3a4c52485 GIT binary patch literal 6538 zcmcIoOK&5`5uP_^NQ#mrtsm=0PSoiA23?F*_*KNc!CkYdr&XV^2TP zQP!}OQ9IL>)eNGzYAWY`7&Uh@Wr-jQl(iiNY1L9rI}x2$s7iUSAH`9oJnH14q#vGX zvV_lhp?O@~Xt&i_)Y&~eTg6&Sak&X)CQP4hUh3N(s4qNzO#iope&vl>Y zt{*&57X8V_vZ;ei)0gzJE*DU0(8fm(m1z*~8K$Fp22i zLP{$$3OMz5QnYda8QVV=5mi_4au(v3cs(^CcVHPh;>!W~6J<*n1D#a4Z&PdE8B%T5 zqn8jKcdMKI(?bOgnA)0tQwM`4WvU1P$U{Dqp6R z#-gjd(K_j4=txOSG{e!|Z!N>sjBhN{_}a3JuZ(ZI*Dq{zJoUmUjM1DN>!rL`;j85R%1;S_q1k3gP zIB18yuZljQG|U0YW#7No30lLRg71r@ffJ^4l^c|iD656x**C&gYjpn1HRc~H`x?g4 z{!$m2t6OH-EPIk#8%Q$`ojX10E6A==6G1AX)ooS{2Yhl=X-WsW>))*vuheR5`6hGq zX^ehKKc)kj6Tl2c_QdEL0}UPlo19~_XJ)`7m|tUHoS@&g1)1AR%n;c&HbVrG^PooV z#Y4hu`+tquuj6lw+1p3CHpLg6S+bSvo=)~b`p6%km3zmnFLPLi+Xounxf$dfY_U~z>g^VF=cO2 zMjMhum;4E3ljSEmN8EfKiTBJpS7Hv&AkYYHd5eF<=$`hj_WMI@5F3S8VVoH92EHun zd-j05N_`J#yYI;^w->k$CD65d)^L`K9&d-{TH`S`OgH(bquG-l2UyOt+4Z%$KVaqk z074K2Eg$vrnoq*sfo7^M-rZH;g!F}IWW@C}jsbc;s;pkDf&I08f4z9h|- z1lrb_PwS!1_`t~Z48-j_+|fGU93LJknAyiS8B3?@8NrQqP8^&l5iI+>IlC(Ux< zzgBRP%XpXYmA^u3gSw_4K5sOnze6^SaAIX_{=K>t4jq6Kz3!mw3J;#HUN4BEC;|Ii zvC2V(Uib2l0zn1`67>?JVi}lnO|9I8K9KAEQPPSU4}MKvd-oY;fr z!!_=J!EXnxR;@w28a)Kuv~?3HeGaa31{ZvSiakqc`>wnmX~!0YBKr`2VZ<)A+}@4=)Z`9=K4%1X$~Xep8~p3uJ+2HF5ofnT}0KU<_(0BVos3`~q(${rDB z4+4(za<&3oSRLSSDCFv2V=F+15iAT4`=7!z|YFv7LsXh zt3H}T;)m40%Fbw5+zR1(#iSVWJ#;DS%DWpJeUv0E$Z#=g2hGq&D08S*^K#hU z4uy!~rqBLPWv*Ui#6;Y)>&v%!%}-EPMR-Ap^tXdFlsBo_X|Sl|C~dKqN#ZR(piVoJ z6k)nKI(u+T{B$=sKuk&CJanj(70AY}D$$z$KJc%Zs>)78QBh+zh+~9`N{@4eV;p0x zV+Vp*@JpnWP)*YT;JRfj85R@{9vJ5+{oC-6C7^XKm3 z7J*^1vnJ!%-#uDq1iYG>CtyO9uK>?62EXhuTN853uAH!dUYJLX=vj~H`_Fe20b zgo_jizg~k`T&X452XqzC21i2io|K6knQ&)lz6<$wQf0S;bU&SADjT`(m|VU4 zOBvY!cB6Bz`W6oZvt&Z|BkeGVF_~N**`%%o5oW+}sB8+@X5T_pa!uU{gRCP%@bNHc zbwXYl(E-OYYBB%%6o-)NsxsyN@JDkzS0+Xu<98`}teS4Yl$DY2LgR{tYdSh~>9NaB z0yAd>glx$85MDq9hd@l$n6+T=_z5-ApEBYV!tpU95g~pb2*j*#jwvtdKHNTbWUINZAo(S30{NqW4`ScGJn(tyB%YXd+pzSzK`)Nh2Ca zGmnYdO{A1A)h4bOT`Ja1-1Nc+!QC%zod9UAS|08ARuhWDzSMkLb zqZ?aa{)QSBrf&#S_UGgpbvSqV;)kOa)))TgW#r*9JP(gmt$T-L-X#pn4gw~@V8k9#qa}yh zo}pGtQ$Y|d;L`#@4mp!ZF1q9g^)2v8=bU2rk*{j_mSi|ULZWMW`Zd+nRbN$gu(VV# z@cZnw>$_j98^#y(GW)afavdf43y3tNXc=|EPqS|F(`wswTNrXpTGDW*}z`@pEX zGRM!H^!Vw$Z^*nXJTPP-H23rSg%hh@L|ajo&{hiV{SsOVa!HmkdkHL|#+NLi#ST=D%46!0gLM@@zppB1zRHs3_XPl;yVrQwqu1zVV`M@}_uurRYZW8gm zjgpjk(!Ll#I8qGFZR6CeDO&f`(qjts7m zYW)kv`10zwY2)oyrxCQ0Yp7&{PAy(ktymotGZAH7HWz`iT^mcAD zIW)2aoFWr~77ZNB|jckD{h+6}Gxnd%8pQ zI5n3cf=0I$q@hpxrkmv8bZ3_G_;YheHXw#Y(Y@zWO1OpvOxxI;5O z&~|cHrI+q)vP=UD%>AsFUpqa;<)PB)D@aiz1fA2w1Rb57XA5S4^wGs(?smhl7ZR`}VQUx9yze(!L6Z3XjOIL# zb?Lkl5~d+~k1cY)kuBNaej0-x)ydkKv)53`IP1fBiL;>?Sg705911e!fkUkWcf_?- zQ_o@!+jcdlou^T+krdCF*5b zX`;G1KVBz!3>^vW%rITyeQTSAZ*0r_+P2NF%x`;F&uw!u@!T0qu$;~yLvq42X*~o; z!H#)1pHxk4$*8S!cf#-h6}&sNp+viZYPN$T^&+M(7_PPnZ7b*)%m2zb#%Ki8%Jcm= zXotS93qD~qoB`@3-@n@pTBDwv@5@dD=S!!mHi?iT>!s1jH^NqHeEKXimL8jW1>+>=h*67DbNVc*BqKB==U8-&h`Q)M4pYE5JBW3q)~A3 zkPzGXUnBPG_!=X2@2wfpZ20WaegC|}I^WX#Z2cea|Fiet7m5U=Na-(yy;sqhkOwn` zDO~)`uX-AAi2iHtx?0gN7cFAj=C2p`39{_3TKFd=&j9iG#UX2dy^LU5s@Dgd5Z{b zND*J^Cq$;pPi>B*`8*2mnQgAf5}re#G1l@H|A^6j<6YzThu9!CikQMYG1WCZS=INQ zA;pwpA6VNJs>kg)euohVvlMD|0kmEqVCB&;#^VB$$~5b`#ibM zDTS!#vxEXE2B#7ABBNpnm~ut0-G)6-;Qdjj6*cbvnqv0eGc3j%jkKd)zo{VR#t027 z+yQsr4qB~RgJd=S5pdJiEtKRr1kNb}@F^4K^!;Kv~gp()^ zfwpvqgq!Zr07V`(6x`#$;m!8oy+1Q{m;=6gu(;e4#<7d1hi9(u%KYQVx~`%;ZuE1r zZ|g?iK{5P1Tq=rRz<2Dl2wFl*ft2xtzP&UwhR6y0^5y;MGR*=|=V{K+!swOs5oz_I z;Ha;rYruu|Ar6NUuE7=d666btHFXQ(I;#-5PK2a)Mz{u`cp48TXWt;o%7PZsX>O}N znnB`+)WF8hXjs?|5qQO980tNAY5U{w)~7muJ2?8N(`iA63sF00hCULRL%p8W!}d-n zWfV7kj&{lm?*&FoWIcPndYjk$1a)0N4AfEnPLPD^IyJiuR+S8;ZH_TXz10WQ>7_R}$JpvP zfFKe45+xy2GXwx!*yf69!{88qF-7g)Zr;P&OTx2k7@+%&>+(~Szjob{Su%T1U0@Fr zmpF%vlp9iEWF(VibTVsO85X>VmPJ%3MTxJ%Mpr2u(OnG4!Y;E{x#+92gTK$0d&}Dd zhMnDtierE8XoC^(a&DfSu;TAUQiidQ)E0>uDM5F_oYmXBh`H~Wd+s)3A|=9z)c6yw zQK0;y2Di9Y>!cshML-)83FUiAb=25}yCe6V%eIqfryV4Bk_Dl%lj}{`6}=}fVt3>$ zx8?#l3P$MQ_yk+1^aa8;11=m{G6ir0xN!14+-h>Gjehi?-$Bjp+o-vHQ*HI#finc? z#2#vZFM^-_+%dg*$eHqJ$@g=8ub=NbyNKsbk%zD~KZ_4n3+hum;P#Okr(vw>cvHV6 zLOy`Q=q#$f!NVXd)uG#wb{NE%Od*eIQrD6UQ(!nuHUVsNY@y4UrS677+EpRsco?+0 zA+L<=fHN7roIQQYL&$V>iKsvN(1Oghh0(|OTtX46A#Av^5(@sXxPs|fu7E8)cF{>- z=A43%51Aao3wXgHkdO@)Ed)F{1Zp*@b=y$;7>N555}LY0gk*(ed?E<7N39W&i{h(% zGfaIJ!=EvPvySmLdQV@(_GJ8=(tnm&Mvb8bmcXEocy2 z9JIt6IK1IJYS5lR4*F7nzU2FbzDfGRTLykkx4fVuoNtj9t2Rirc%du^bTWZ-sCGbv zG)%9|oImG{Dxp3G*a^z&m5^jcAdwAE{)$0aY%`|>PzSU3!bNgiTmXKKTp+MutGaY) zB&$8}gd2wRzLLaNNQBH*r9=*i91*!R*!>W_@1k&+PQh-uYKnq|VtZC$&2trvXrRpf zEWBMuN$5j8`CV%@?$FJchf{Nm0DfadAd@s4jaVK^L1oy!*{>QcdqtSX!@;KE`I zHi(*_Drg-Yx*{-E?+}?k4&L|Q(VkESNf|;(OdZ&ihmDEy*0$zsj{uPDj{)!jD&Nfy z+a<16ShXkMmpTI%AP0p-yE%IEkoD3yfFMJk*SFLW`U(KWBHHt~geEX_`uSaFo35n! zZK~*{OBEfVLrNiZXYJsENWi=)je8`Ri5o= zUeJ1-Bsj8=AeI?&oMOrd0H^R*HdllP5Ja4B_Fg`(4}2DjE8!+>3*hx8@i5KfSgZd) zZBuTzR-!g?d4EIU><~eX!!1>jr9CU!L0olkUyzWqrD*r@H6YGm=UQ5GpLqSR(^QA;gSE?!nefGiW3OWjWF{m08`@ zU7wzp+4Ct^D}Wwb20X@qA;1gh#{7B#M6dxI0c=^YV!PFPa6#8JPxJJSR@dcj z)D5|tU8`>Cab2!!8Jezj?Rp-Rr#;iN9_e+*%X#)AtzPi*axZ$0+)G};D}r9Wu6ZS| z{7Cc4zO^y8Yu4vMne*mBnFr+rC<~yR@D@N>@Qsa=8x_zNK|ASHK&vG0RX|zvmOxob zC`+K6@=k+tI-#5b<&1Y0l(YURv~n8dbKWw_%gGaGKsoPS0Of)_aTet#yeCn9Qp)F0 ze#)z&T=kdG!m_8|)@sijl5eYr;FH!Q+tMHNysqI2sz`?VP}|Tp^r7kLj}2WL8YmkZ z`mT<=hNg_w13j|V8tq=qjO_bRKvN~$zWe$mzIv%~&sW{XrJGmZym9IF!%nAhXFIqQq#6G~qr27dgG*@G?*x~& zRKL0A_gpDn+>`i$0vm~@FX&}m%^}z1jME?UIb1T?C1gTODbz7t!_z(Ek+y4M zg68VXOC0cRT*17&w4v{TE1te@+|~|^RmFA>%*g2Vq|d0(?DvAO(FJ$@}W+p>62?w4;W|{-!#-8b!`}(d0URXa6FBI9MPB5u3;R285Eu%R}l8o&% za*?I{&UVcj%}|{sc^)iiZg(1??^4jzxvU{+l3bM5$diB=Vd#ct<6r#X_~~B2b2HO5 z_pJZF$#ROL$sQUg)^gKxHF{f-72H+fv9lz*CT8hO&XVnBV*N`f47Jy_&)(b7F7RopSxp}vvZrzYzgwx@3v)IjKYDI5FH_q$f|mV=r?i|<<5Gdb{zn2^3bsS*D$os*NgWL0<{6$;Q$j^Vwo@L_>X#mkziAMiFv=Py#uJAK=GXDnD3n zZ21r)aQeW&-S4#2B3_=Ja~=%5-%#EQs0*Z9eQ+Wf)WxK36~lI1x7XT!c%T%Py@X`d4zu==t#(DXk*20v* zPlx>Ua^NeBwU@`;eh+4=o(D6Ws1OT_8@2|Qg`sh|dcPg6SHpF`y4CQ!c5khE$6tpe z^;NZ{`rYbVD>3)SEd+RBF@TS<5<|_kJeovvHOeP_ zMAq%AH$MRm=7Q}lU#ZJzgWRNj0td}CKU6P)Rx{P}tg;$vEr{$*EXrB{Yf*6BW~UJZ zVC#sFKaMAV443dxgIo?O*_1D)Z8uXk;z6x|E0dxxp&(KO0X?Jb>pLb)I;AQtTSLn; z&H^?tOXHljq6$>IsZTG+(5+W~(11fy0d)uMK@^_j{2YgsScNk=boLh|G2` ztQBI8<=VZi?JzREb{Dw8yi2iYh5XFi7TBj*gMBGzN$9b?Ckl_e#Y zP%NyX5&Xi?Qk_uWEAUya_0U+e_6-<_nI7Qw*~?f-rmd;oYWdZA?$cLqeDd{p;Y^Wa z^$G?Z*=vN|pr)&fVp-5ke>}HJVBlP|$KFZhCbtWrFA`?pjk)U#@D@f{& z>faCCzUS4;$!fWz)SYI(Gesw5YS{#wK7n?Ei%2x!xrJ4-q_vFY`I^UWf~PHE@s+8q1bb z7PM%BF!4+EI`|mK$HAjOB_Yx|5+WB?+7Ep18XT&%zG}m}8q{K>98s!Xn<{r_n`%() z-`Rlv%mO6Ak^|xu(ESRvU2RX_*Y*q=C@47m;padHz!9>b$Ktw2h3#H&ciZ=2p!9fk zg&8A|yNRbgT&Y<)Gd<*saZ3~0lUhp|4rL}D4$w}9v#;-&$V1{`JK?VHMvbFsg;6XlcfMi#t}`vOvt8EkjeD&DCO8Q`hfj87b+0bD)xHj`gu z@&*%G+bOxffr`)J8p*v`E+|r)xF&vFQ~m-@>NvSi-2!n3Y>1y7mBz*V;`bNx=Q6i6 z)`tlrN^TRZIFKL~uGksaGxiZkh_{S@Tr(8UdI;xi7jW(75~pBh`H1zhjil2!y%k|R z;;}f5|JQr2chQ?{Lk1roV@>8b%x*bnDQll!`pfJ8F?jUTip)@Fm>jn$`#dVs?TT*W zZ+vg??1$?Zz)#RnbtVF;6!gfx(eC*T71H66q2ueHMf>h}(w z6QNcgVQ!=dWfti#@8U|_xuQN;93MtvY7X+pn;P-B-27#cm+gC^m^VadL_8xj<+Q7FdZ>8iGtOOapbFB z60IylD^ErCt$wH7d^mVQ6!K~_?5h{9DfnE?WM3$BxXs;0r?c9m-lacIgG<0G7$QlX z@>mby&}f^ET5fAQRtUJ?#iGD{dKs>af&0foI6-$m73Owwy@jOS1pQN?4d*C_CA7|A zpE0z)Lfy@W4&AGb!Vo)Ol#5;tX-U$u=(W9i$bl$wbL7Py0uZ#?Tr9qKSVi%T%#PoS z%C}`se)w7=@T20Jez2P4^EYqY5~6hnRkBD3@rV@1POU|wT%Thk1kog5xx3Y#DBf!& z&QeqWz`OB2P?RAtOx$5K*P{|UaN});+Ti#m4VDb|%b*9#NJKky%P7MyvSGH$$a6Pr zW49QuV}>3xAV&}38XIzu#5M|1GJO?l!G5E1J8+fX+*S%xI?56$g~$GoEyxDd7%V1T zj=%Wfx~*Znw;6rtx2mewbJrhSOowqk8-u&v_B;d=5wU}F&+WsJqY5yhZ)Bq~2hSv< z`abPU012g-QZ2mUjwc2Z4^Gd%`Ho5FeFskd9wM&*7f2?hduVXer0(KzI8?H39pn|w zZMyJw0)e@Ejn1|o&12#F%5C|La2tUELXl24C za#2lOGK*J{N#PLNk97Q&jNeet#&g)Qdk%8uKDLd++=jK2$Hvmeb`ZhVcxJh=nTh)# z-o)q7I=qeNaZ`k|XwLJo!RrFt1HgvbX2m@woEdx@@x-DN@!h2tB`)pclp_=Me}NYy zmdmYM(XenihHVUr$<*=4#M%)kvssTDfn?pVEl%g6%Ulu@CF7QXMtZWLm(i3t-~wU+ zuB_QNIUE6F=-6N(8AMh+0ft39n~BjC6ygOynBEjI%I(-Y04|7;jb+FW^9WUQL9qYk z66{%6h5}&E83N*3@Hb|cfNorrr*`%kTJQRe9=37WI;i`s&vhHYW}qG-KfMwQV=EB* zongtt-w5zp)kcqcY(SiKZu&Ze6{{aUZem5qbC*c9a^Mq>~S4 zY{*JRym<=524dMhsvnBnzmE0uOw!MYGbuMRGjzO-o1Nbm+4)iZZlWR@>$j5hJLY9T zxsOvE^@#1#Ajdkx){a*a3dcX;lI^KDo_3-nP!z@583jq@GN45qR}c>(kLWKI11@M1 z_0475yn<@ug+Wzx;O2HGY`<`$@z7V*TfS=gTY#>fRIMCdVYg{F$cy+h;wIw%2gr=v zNuVgrugNH{x-c~1<6~DqXl=cx-PBxGw z;CETJn3UjO!w^%?l9Lwoi;qc)w|&G^dOTxcEEJm_oI0+%>N9vOvfe^I$^lgS*g7@E z*H$iT=Hh}IZ>OSMsBp-35bvtgXW0dzPW>vABlPlHpa;(&0Y1ZaTDaylo7elcHJ+~+ zgN4My9VxBo8DGYahK$Q_4Oj3ilA$if3+#u<0uI59F-O0M1`b~OxIgq&yrF)B$!{XT z(b~vHErL*=OH?YJZoQlog(Fi|NUXts;U#ffHB71q(=#}c4nK|m87u17*5R|U&BH1W}*PtQnWps z%v7*d0QH#$*_gW96YI84OIeKqV8LnwK_x)$=>C<1G>4mr(H*_S1Pdww*@P zPlf+$Y(b&|jGjh0#*<%+6~TJbzwx#@pOG&sX^A4GewfK`AxY0%iaocv-sqvzAb#u! z8OJGS<{=tO@Di7>5rQAGIO6@$BF$wbXRqN3K8_?4X~qmIiH3g!Pdf@A{IMWi4|xizz8ij>pYp_I?b=QcWm*YdH= z*!kYPT1PX3^CHbNf*yms^6|(aV?~EmBohuZN5f0ReGi$DxR1d-K6-%ITb_PS+ovcK zam={P1CxXbWe_ULpqdHQ4)ePZcR+t(uZTSx;ss$D`8m&u%>yX&;R#Y0lSn{*Av}rJ zDw(n@rhs3|jUA55VcDVPVC9$M-~zc3dtfEE+t~Eo4(%R-PrSXD+%G5hm*m7X%D3WE zXi*mR`)omUCAJy-1|zW`Cjh>|Zal=GYQNbD6X-HoY zxr!*$C=`+@Q;b(BIPy&{9_Alw)TfbIL!f79?eSz5<{=4yV~+#OA=TL6`79RA%RPpE zB~%Lm9cbWm2~Xz``r#?AJ%&*ShWcIf6vM+4%>51$1ea}mG~o9oLTLoiA&@h^_aGZe49taa&UT_W24od*sM~} z$umKqTgYSxL{l2?4bvP>0uvhX2t3*+O7R(%adw!MZRW?r5qb4TXj^@Q$saSJM5#Yv z@@GtFk`Llj?sywa$T|w6Ugk{aQJGj>$j5??6V%GsTqiN=m@^154dh+CwR>Z-zmbWi z1}(EwN)_GI<&(vdpiC2liC-#*vqt)Vb5D8flYI0s4O(NEHRG7tc#R7fd=be}NEOP* z2QDkOMxB3!_dpUm(G z$ixpPx#dh0gTC@1d3H#)r~@X9)5Qn+d(6?M*A`=-A_qq1dSt=`l43z2VvpcR{V9v| zU1JHNuX+>^so!SB5oq%U7qx=Q7;P%!SaWJT2%DkJ@rrW;tz?Rm_@Rkv(6tf8$y6m( zBG!y6ktoo((%4hxd`e%`UvXw5w=%QipW)0>G=O%%jzdG1Cgzmt2wYU3%`_(48p%Hw z<~kdsut_XqL6LU6^nVn!nISKZxu<60o?0;5JQsig&$ejgsC+B_PV*|h)ePe}&4g*@ zE-VxH=TR8y2&gfFg}4=BRCdeUad@JO&r#`Nia~xX-}6E&4ThBacuQ>|L0$qB1cBsJ z5!Gd7k4g4eq_0Q%{YY=(q=DZ|9VJF00N??#;0+#=n!wIw%2e4RV4i0hNsM) zab6o?ZW1JgcXwc}!j(fu+Q=~$J>o)W+Q0X&@WU>QuZu5E;*erue#P_4_5AzpgE;WU z6On0|*#WIHng(o#J~%xwjV!vjGO;9-vCMdU6@`h3(Erhg_<|4<;UZBF^3Vb-j-0`~ zh7V0%9y$y(_H5iO1RISoht0FImrsmI0eJ`X;?Pv@q7LPaBHAd)b2KuAVG;4+QY&{G z+ibj>(Ii~?HyBi{r2ZDSqr%46&GvENp?;3_Z!>up2@YcVYwG8btCizjqF5tFFQfU^ zHa^qH4&4>c2-V*)*=BN&Nr%ZVGx-%J((a_1MoZ*q68UWOyg0IZmuqkuuQ1TeP&2+W zf%ZDsGMC{I4W6D^lj#;9*VM~Zd}Ow=Qkyvw6r0D@R)5drSey6PsBNVJ;pp=@C#Oim z2Rnk<=c61>P`4hcC)vz5kwlI#@*+NJCQR2C)8fU}c27d>3_6VR>Sy@o91{U<0rST! zK8qx>Z^~DMG59}C@@10e#=klV;)CBCti8kJ3rr;D@ipdXxCNaY(_z8CF@Zm_^$$9ZcIfQE@7lmX<0@l`EAq3r|%pEWEJr)c*j%|MegM literal 0 HcmV?d00001 diff --git a/syllabus/examples/models/__pycache__/procgen_model.cpython-39.pyc b/syllabus/examples/models/__pycache__/procgen_model.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c71f46c123cabc37eb1e40707f73dbb0a3c7a96b GIT binary patch literal 12385 zcmcIqU635tRqns(>G|2!YIe1fRe0HP{j+7DtP9BCmwj{7ohUA&lFT86>Prm-0u0^ z4NeuzRNub$+}pSBJ@=gNoO^E9XJ(29ey;?tH2(1y^+5G>pIq%(hW8 z$)EySC4H^}%6zZ@ z%0fz60OeG$2+CqgIR(n;;0!2d!c*vF5#_VN63RUN6L|w&S*r(T;;hwMovNL<_ruoOdYm{xJ+3Ft?N&XiI*Gg44Yu21Qqr}4tJ8{; z0xKn}_`(B2m2mt1YZv+G#rnNaZPqW|xccVxi?<%O+x6Sq(Zwjsgb(VQTkSBqh>pW{ zba6{{8*5?5m*R!3hl#^iohYt%VjrwBAIL;? zQbx^iMyO@mz1HpAyA)^@I`N=> zuN5kPyVGiR)uyjFSw+!R6rrT_YCUeOucGQwkQ825-R&(Fs#cOuU#XVZcw7XNQhjTy z{ZLTIXMIFDH5?CcF>4mh3jTArt7k!Nuxj+jfnUKDQAR^CH83+~%?_-^C`sN-l6A%MjBFq4l1ZECDz>l+3VYFV~ z3L!w?^nrzY*lwzMJUlt)JQ#Stu7Vd(7s+eQ-idTj7t*>_4BPYlPJJ`<{iNsPvym4K)$+$t0b8xb)-xSjkut?g8$|x1k0Kf6<$FdGQwBdh z@-xVRuQ1R-9(VUWn5tSH%Lt*swtAjbPF<}v44?xuwayv~Mqr&bc5;2Tvq?D(iquPK$=GpYfMy?xKcp@g{oHAzZ=V6jcJgtq zpAXDMBe3t-yXKD9_xkxJ<=0tNPhdQv{UXU#SN#MD9B$$TE$B?E+ezFgR;?gR?9KXv z#BO!sYC&@>*XnF-$B7-ZHh~??`xO18)NHk5AO#DxQ(?0iHsaR3&>tZuUN>t!6L(|S z&UDpk6-o);JEVq+rsRjpTU4NZU8gpuqx+G%Ut@vvFEJb(#ZNuJT|8B z^isH1*VxxLkbvWGDDGJ5c>piuPFBF!aB$BBbirI)`M`b*@kh?jh$j!c^qZ>#24Yq?Kfz5dD9^s?C^ z$LbXfI&s$syiwIuS5ee!%T=xxrBPz8ByK~a`VhZ=mQDG%dY#D%lElV>Kp?0Z>I#yY zr@HqezZ(R#a=KPNDK)RrZBNiinOHP|r%#}t=mHW0i0)vG9Qn)rdv1YP@0knMux6I^ z-|L+mu3?IfRZp=Wb&&}HOTEa1>M6sQ*CjKl3OB-bJ5yuP<(Jt`d={btAB{XD1jn-D zl?5vrAdLMot&Tnh@^JtuU`deljs(fMmDYnWxCS?Bt*ct_sd`n7l|#yM(4xZK-lh^P zcW-Y%d!`|haLEDjis*F3#;&nv?i+g+O%&7{UNE5(@J7Ib7K_846t+9j-R&@hi8A%- z3NwZfcLS{hT$y1yJvrnHx~DPi$?T;Jhccsw1FVzb?3;Tw@{o8~PdMp&ISz6U&W{Wg zmXt%)(wx`h_1fJvAG?a%Nj@EK;=up7FA$a3(e|cV#WPjtCvHi;4vc}1uf}JS6tZrj zH8hLpejAo+dLKAe@qNd({@Zb^A6OssmZoMocyP*#rD?jx-pF^TJ|LWC2vCs1Dkt+6 zMn)A(@^`{;3lIqxA;NB@M+yNKD=O(35A;0j$h5WgJTdK8T z%z%>Hgends(xRoEfjw&q_xV-SzU zY5c$5XT6KJ$p)nN@e$SxoQCOb<}_vP^9z6Z`oH%c{hT5*)M+NiZNxs0%80=l3pnfI zZ~dV6?1y_XV4skoYD@%DDd>rNz10cpDskI##~x8Muh`k%gi{5393>^@VcDes_W+*O zghEPx571p27MQg>x3_TAxTf%hbH2tWslS~^thva0K>^Vbvz8BvK?$z77c58&%tYKwB44$lj&{{b zqLpQ6<*CHI*=@HP4|`9DLSAjeUG>5>1%IoN?hAzumwB_^Zm%||ciE5A;83{?(HD?p zPPx`YxHHDKp_W?4juQjycd;mNo?eC{W8wa>7%tG=PsO>NTxTw=w?Y3@?7}t5VF{hH z*k{1Gcj(+hfAg_N2Wz9y$L<&9Vvs{xlC&(k?XDhjFp3-e$2V@WWVl}eJz7Fyn9vZ@vC8m_To|r0 z@{90sUF;b3S`H9o3h3w~Tw_P{ZM2<2bj)0ZYVf^DxfS^;Y-}rqDIH~vltQyVW)HGS zwR-dED@PyvaIdXmytf#E=r)&Cr{k|bxR4Fw++Ym;dMgMJL`0+x&ONseM~*7Oh`u!# zmEC(L9n}x%XADdz$&_s24R<^mOaiz)`}R9F-S-{1`Fn`C0$?DWl<>aAO_REd=5VQG z-P*`2n%r#R-4q6M_v-EKFqy@|ca`4^>+v>%1BejF#3LmS9sx&xj=oY{T!4q%n@uN{ zV$sT&UF4$LxMUWWk;xzt8<1@Lj*Q<@&+0ksxE&8Udmr1zes06r$zx;bVmpW+tDaeI zWM<+*h)3}`^bU{XdE68sEtv^IZ16S#@DZRxZL{JY6V43&jrd~834QnJNr_WCKIOzl z{a@e#iQz)f&7$St@+{Zt71ODs$=KRu2qd!}H3aFpVOyNd#gw_EW=h5_15M0yLocH# zbHD|}0(@Ds?{GK*$k4IgTsnwB^%x))(RLt4S5VLkfbhI6VwBr)cK}=vBNxk%@8=Ps z=7M1V%_Z1#unYx2pVtS(HQ{qiF9BV-Bv0+^GNis4);rk94c0;3XMJw79^Hx5L*yq{ zVs2ywwC`EwLilNq6DBd43&{lPo_;R+2lOdUgkeG;t)f}BdZ$ySOtytOH_&{wYOVl% zkK3Ik9>#%iO#D#5MNyY^=!y=D+F`QGqXXh=mXybCcTye&86kBj8 zV?R4BMXw<6wdG z7$9S9VUX26TN<&+fR~VaggN>rj*qZOb41glC;lN!4QL zNni5ej15`Is5ei4XsxttAF;&}k^7gC_dJunXULh18v`?Ryp5Zl-kNB4UP&k%|A^(6`$kH`29O&+P1}e0_PrnZi9%eI z{t;D9H<2aacUiWXl;GdM5Hrt`lNR;UMLxO5=HOZ|7v6WB z>|U{YbE#)LR9123@FagUU}XO_T+y>g`lc8yupOof_yhCB9Q`30HnZ=H`axgA6Y95^ z{5BFCs10q?JP5U!RF(8JYvr^koS3jaVg>#a4~e^KU@}FRoZg9S_*vx7T2a5r7uK1S z28nQo)?CE#Uy&JXyk;vxYk)TP%S3Tn;`1W${2YN2uRli^O1zg80sF1St2h|GlL|hz z60HEIGRxRd2=Gg#2)@A4nCnJ{=W+2K(OM1Vp0dX65%;tVDIprcZ4V?LM}-*3W4$$+ zZsGB>03Q;Jg(~f1X9&$1kzK@$Re(3iU|^~RBAMWnymTj2ov^JPZixlIgo?MYm(IhX zZPy!NCiGut4-yf$gj|wiB>6?H0M47?^|#fz0r`@Wo+wi4$C&&MlI*Od*zp_d^$uPd z>1IdBHcmJPo}sk>?{EPdAow5iL*5@P(p*+@_8P9}<46V~&3J(pX*%}I;glrJiEuNF z)VF@x;61ZgwoFR>0XkCOK$1C>l<&mD!3dN-ewf&R@ey$pNo0Uvb~p`@X!!MTadB=m zGL1~MO}#9~N;cX5KCYo;mlbIZTw}k1WM2k(49g)>YewLF(`?!z{7EU(Jo2j>}2eq2Xbc3p%nurfB!85p zI3Qbu2o{m6h(-JfqDC#)+q>D6lWVMejE|POpW*Q{&Z5AGIZ*i1k z!_^<#sZ!9%Gl8L-$P8eJ_Ec{Svm6crQyM`DJlZEzu?@>OIvkW;=0{@@dG$^7t-i(N z+e|1?>Q9*b857#&1AWA;_rS!hqk!sV&U6-)sp*A$%$Ybqt&Gfd9H@>tgb@2c-syea z8{_XA+G%Rg5?@NGqK|s!WU(Y*(*R-Ym&xI@rT*W1Q=a;yUwuqs*9de?d8IBM;{rxs zLUI&R#q#9=?K8xi-X?;c5Oo*jd`ij1uHX>@nR^Anqhee_9#A{LqnUV?6yQ-I#iJAP z9G3j#0FQu7{92Ox%~UbyC?Aq%hh&R7U_!Xnr}=x#(XdzNHBgZQLz_LcV**K9Pzc*Y zI8uMgA{|#PL3C7)0wVRhtT+N~-r%BEP^rL_Gjg!d{cj%c@-aM z#yUtdW}3ML8wmXKDD+K4(-^)&&D4C+GJ&i$)H(ct|#XG#BAWGfgepBB}Qu*Z}_zUqM@for*X9C5O1lr*KkG4 zNcgc0519k&oH4}QG(rj&@4#M#KZh3?L+@DhhznsD|JJ?2FS{_lO?+;mn{*T$mCxn;?7m0e1 zhZbOQSz$a|m{`?h))btrEX(ML(z(a03~ zMFfXS&D<^QvGMGHCb7!D!Jw)o^|!bk6*IaKW5sQ!-0Hj{fy+Dv|x$*(bye#cp!ERdsV*t6aVbXfN;*I*Hk zFv`q0Gd?qc_IlVam*Elho}OBh$sQor)XPOxNbK;)UjRN8;>^I1KaZ=lSFe z69H}k^T#YcizIPx$Y+Ea{GTTI63H{8ADu+{ Date: Sat, 9 Nov 2024 20:18:24 -0500 Subject: [PATCH 10/10] Created using Colab --- cmsc320_hw4.ipynb | 770 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 770 insertions(+) create mode 100644 cmsc320_hw4.ipynb diff --git a/cmsc320_hw4.ipynb b/cmsc320_hw4.ipynb new file mode 100644 index 00000000..8ab9fbfc --- /dev/null +++ b/cmsc320_hw4.ipynb @@ -0,0 +1,770 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyNlOgZTGUUqhQby0su1ySpw", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4n8qy4J_Tb7B", + "outputId": "c16ec0f2-dbb4-4d6d-f491-e25edf3dcff8" + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "1fERlpkyTSF4" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import pprint\n", + "import os\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from sklearn.neural_network import MLPClassifier\n", + "from sklearn.svm import LinearSVC\n", + "from sklearn.metrics import confusion_matrix\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.neural_network import MLPClassifier\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.decomposition import PCA" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Loading data" + ], + "metadata": { + "id": "Pd-palSEoc54" + } + }, + { + "cell_type": "code", + "source": [ + "# 60/20/20 train/test/valid split\n", + "\n", + "path = \"./drive/MyDrive/cmsc320/HW4/homework4.csv\"\n", + "df = pd.read_csv(path)\n", + "\n", + "index = [\"%d\"%i for i in range(12)] + [\"Results\"]\n", + "data = np.array(df[index])\n", + "\n", + "#regularize\n", + "means = np.average(data[:,:12], axis=0)\n", + "stds = np.std(data[:,:12], axis=0)\n", + "data[:,:12] = np.subtract(np.divide(data[:,:12], stds), means)\n", + "\n", + "#shuffle data\n", + "np.random.shuffle(data)" + ], + "metadata": { + "id": "vyu85xCKTtWt" + }, + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "This is model 1 trained on 10-fold CV: A SVM. The model achieves a peak accuracy of 75.1% accuracy and a peak precision of 32.0% across all runs" + ], + "metadata": { + "id": "xn_67dMSoKkM" + } + }, + { + "cell_type": "code", + "source": [ + "# 10-fold Cross validation\n", + "kfold = KFold(n_splits=10)\n", + "\n", + "for i, (train_index, test_index) in enumerate(kfold.split(data)):\n", + " train = data[train_index, :]\n", + " test = data[test_index, :]\n", + "\n", + " svm_classifier = LinearSVC(class_weight='balanced')\n", + " svm_classifier.fit(train[:, :12], train[:,12])\n", + "\n", + " output = svm_classifier.predict(test[:,:12])\n", + " #C_ij = i actual, j predicted\n", + " c_matrix = confusion_matrix(test[:,12], output)\n", + " print(\"confusion matrix:\")\n", + " print(c_matrix)\n", + " print(\"true positive:\\t%d\\ntrue negative:\\t%d\\nfalse positive:\\t%d\\nfalse negative:\\t%d\"%(c_matrix[1,1], c_matrix[0,0], c_matrix[0,1], c_matrix[1,0]))\n", + " precision = c_matrix[1,1] / (c_matrix[1,1] + c_matrix[0,1])\n", + " recall = c_matrix[1,1] / (c_matrix[1,1] + c_matrix[1,0])\n", + " print(\"\\naccuracy:\\t%f\"%(np.sum(np.equal(output, test[:,12])) / 1000))\n", + " print(\"precision:\\t%f\\nrecall:\\t\\t%f\"%(precision, recall))\n", + " print(\"\\n\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Ov6GhBNMlQ-E", + "outputId": "ccc87c4e-d257-4625-9f91-b697776a5a6b" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "confusion matrix:\n", + "[[612 239]\n", + " [ 52 97]]\n", + "true positive:\t97\n", + "true negative:\t612\n", + "false positive:\t239\n", + "false negative:\t52\n", + "\n", + "accuracy:\t0.709000\n", + "precision:\t0.288690\n", + "recall:\t\t0.651007\n", + "\n", + "\n", + "confusion matrix:\n", + "[[637 234]\n", + " [ 45 84]]\n", + "true positive:\t84\n", + "true negative:\t637\n", + "false positive:\t234\n", + "false negative:\t45\n", + "\n", + "accuracy:\t0.721000\n", + "precision:\t0.264151\n", + "recall:\t\t0.651163\n", + "\n", + "\n", + "confusion matrix:\n", + "[[653 193]\n", + " [ 63 91]]\n", + "true positive:\t91\n", + "true negative:\t653\n", + "false positive:\t193\n", + "false negative:\t63\n", + "\n", + "accuracy:\t0.744000\n", + "precision:\t0.320423\n", + "recall:\t\t0.590909\n", + "\n", + "\n", + "confusion matrix:\n", + "[[650 196]\n", + " [ 58 96]]\n", + "true positive:\t96\n", + "true negative:\t650\n", + "false positive:\t196\n", + "false negative:\t58\n", + "\n", + "accuracy:\t0.746000\n", + "precision:\t0.328767\n", + "recall:\t\t0.623377\n", + "\n", + "\n", + "confusion matrix:\n", + "[[644 221]\n", + " [ 55 80]]\n", + "true positive:\t80\n", + "true negative:\t644\n", + "false positive:\t221\n", + "false negative:\t55\n", + "\n", + "accuracy:\t0.724000\n", + "precision:\t0.265781\n", + "recall:\t\t0.592593\n", + "\n", + "\n", + "confusion matrix:\n", + "[[633 223]\n", + " [ 45 99]]\n", + "true positive:\t99\n", + "true negative:\t633\n", + "false positive:\t223\n", + "false negative:\t45\n", + "\n", + "accuracy:\t0.732000\n", + "precision:\t0.307453\n", + "recall:\t\t0.687500\n", + "\n", + "\n", + "confusion matrix:\n", + "[[648 220]\n", + " [ 47 85]]\n", + "true positive:\t85\n", + "true negative:\t648\n", + "false positive:\t220\n", + "false negative:\t47\n", + "\n", + "accuracy:\t0.733000\n", + "precision:\t0.278689\n", + "recall:\t\t0.643939\n", + "\n", + "\n", + "confusion matrix:\n", + "[[666 196]\n", + " [ 53 85]]\n", + "true positive:\t85\n", + "true negative:\t666\n", + "false positive:\t196\n", + "false negative:\t53\n", + "\n", + "accuracy:\t0.751000\n", + "precision:\t0.302491\n", + "recall:\t\t0.615942\n", + "\n", + "\n", + "confusion matrix:\n", + "[[662 208]\n", + " [ 44 86]]\n", + "true positive:\t86\n", + "true negative:\t662\n", + "false positive:\t208\n", + "false negative:\t44\n", + "\n", + "accuracy:\t0.748000\n", + "precision:\t0.292517\n", + "recall:\t\t0.661538\n", + "\n", + "\n", + "confusion matrix:\n", + "[[634 222]\n", + " [ 57 87]]\n", + "true positive:\t87\n", + "true negative:\t634\n", + "false positive:\t222\n", + "false negative:\t57\n", + "\n", + "accuracy:\t0.721000\n", + "precision:\t0.281553\n", + "recall:\t\t0.604167\n", + "\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "This is model 2: A neural network. The model achieves above a 90% accuracy on almost every iteration of the 10-fold CV and has above 50% precision for every iteration" + ], + "metadata": { + "id": "air1sHHBn3zr" + } + }, + { + "cell_type": "code", + "source": [ + "# 10-fold Cross validation\n", + "kfold = KFold(n_splits=10)\n", + "\n", + "for i, (train_index, test_index) in enumerate(kfold.split(data)):\n", + " train = data[train_index, :]\n", + " test = data[test_index, :]\n", + "\n", + " nn_classifier = MLPClassifier()\n", + " nn_classifier.fit(train[:, :12], train[:,12])\n", + "\n", + " output = nn_classifier.predict(test[:,:12])\n", + " #C_ij = i actual, j predicted\n", + " c_matrix = confusion_matrix(test[:,12], output)\n", + " print(\"confusion matrix:\")\n", + " print(c_matrix)\n", + " print(\"true positive:\\t%d\\ntrue negative:\\t%d\\nfalse positive:\\t%d\\nfalse negative:\\t%d\"%(c_matrix[1,1], c_matrix[0,0], c_matrix[0,1], c_matrix[1,0]))\n", + " precision = c_matrix[1,1] / (c_matrix[1,1] + c_matrix[0,1])\n", + " recall = c_matrix[1,1] / (c_matrix[1,1] + c_matrix[1,0])\n", + " print(\"\\naccuracy:\\t%f\"%(np.sum(np.equal(output, test[:,12])) / 1000))\n", + " print(\"precision:\\t%f\\nrecall:\\t\\t%f\"%(precision, recall))\n", + " print(\"\\n\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "oTjOYzBub9qx", + "outputId": "e75ae2af-9651-4f98-dc3e-fa7720e61ad8" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "confusion matrix:\n", + "[[846 5]\n", + " [ 83 66]]\n", + "true positive:\t66\n", + "true negative:\t846\n", + "false positive:\t5\n", + "false negative:\t83\n", + "\n", + "accuracy:\t0.912000\n", + "precision:\t0.929577\n", + "recall:\t\t0.442953\n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:690: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "confusion matrix:\n", + "[[861 10]\n", + " [ 60 69]]\n", + "true positive:\t69\n", + "true negative:\t861\n", + "false positive:\t10\n", + "false negative:\t60\n", + "\n", + "accuracy:\t0.930000\n", + "precision:\t0.873418\n", + "recall:\t\t0.534884\n", + "\n", + "\n", + "confusion matrix:\n", + "[[830 16]\n", + " [ 91 63]]\n", + "true positive:\t63\n", + "true negative:\t830\n", + "false positive:\t16\n", + "false negative:\t91\n", + "\n", + "accuracy:\t0.893000\n", + "precision:\t0.797468\n", + "recall:\t\t0.409091\n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:690: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "confusion matrix:\n", + "[[837 9]\n", + " [ 86 68]]\n", + "true positive:\t68\n", + "true negative:\t837\n", + "false positive:\t9\n", + "false negative:\t86\n", + "\n", + "accuracy:\t0.905000\n", + "precision:\t0.883117\n", + "recall:\t\t0.441558\n", + "\n", + "\n", + "confusion matrix:\n", + "[[855 10]\n", + " [ 83 52]]\n", + "true positive:\t52\n", + "true negative:\t855\n", + "false positive:\t10\n", + "false negative:\t83\n", + "\n", + "accuracy:\t0.907000\n", + "precision:\t0.838710\n", + "recall:\t\t0.385185\n", + "\n", + "\n", + "confusion matrix:\n", + "[[849 7]\n", + " [ 73 71]]\n", + "true positive:\t71\n", + "true negative:\t849\n", + "false positive:\t7\n", + "false negative:\t73\n", + "\n", + "accuracy:\t0.920000\n", + "precision:\t0.910256\n", + "recall:\t\t0.493056\n", + "\n", + "\n", + "confusion matrix:\n", + "[[861 7]\n", + " [ 68 64]]\n", + "true positive:\t64\n", + "true negative:\t861\n", + "false positive:\t7\n", + "false negative:\t68\n", + "\n", + "accuracy:\t0.925000\n", + "precision:\t0.901408\n", + "recall:\t\t0.484848\n", + "\n", + "\n", + "confusion matrix:\n", + "[[857 5]\n", + " [ 86 52]]\n", + "true positive:\t52\n", + "true negative:\t857\n", + "false positive:\t5\n", + "false negative:\t86\n", + "\n", + "accuracy:\t0.909000\n", + "precision:\t0.912281\n", + "recall:\t\t0.376812\n", + "\n", + "\n", + "confusion matrix:\n", + "[[862 8]\n", + " [ 72 58]]\n", + "true positive:\t58\n", + "true negative:\t862\n", + "false positive:\t8\n", + "false negative:\t72\n", + "\n", + "accuracy:\t0.920000\n", + "precision:\t0.878788\n", + "recall:\t\t0.446154\n", + "\n", + "\n", + "confusion matrix:\n", + "[[847 9]\n", + " [ 76 68]]\n", + "true positive:\t68\n", + "true negative:\t847\n", + "false positive:\t9\n", + "false negative:\t76\n", + "\n", + "accuracy:\t0.915000\n", + "precision:\t0.883117\n", + "recall:\t\t0.472222\n", + "\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "This is model 3: A Decision Tree. The maximum accuracy and Recall achieved over 10-fold CV is 86.7% accuracy and 49.6% precision" + ], + "metadata": { + "id": "DrJ21i6rnmS8" + } + }, + { + "cell_type": "code", + "source": [ + "# 10-fold Cross validation\n", + "kfold = KFold(n_splits=10)\n", + "\n", + "for i, (train_index, test_index) in enumerate(kfold.split(data)):\n", + " train = data[train_index, :]\n", + " test = data[test_index, :]\n", + "\n", + " dtree_classifier = DecisionTreeClassifier()\n", + " dtree_classifier.fit(train[:, :12], train[:,12])\n", + "\n", + " output = dtree_classifier.predict(test[:,:12])\n", + " print(\"iter %d:\\n------------------\"%(i+1))\n", + "\n", + " #C_ij = i actual, j predicted\n", + " c_matrix = confusion_matrix(test[:,12], output)\n", + " print(\"confusion matrix:\")\n", + " print(c_matrix)\n", + " print(\"true positive:\\t%d\\ntrue negative:\\t%d\\nfalse positive:\\t%d\\nfalse negative:\\t%d\"%(c_matrix[1,1], c_matrix[0,0], c_matrix[0,1], c_matrix[1,0]))\n", + " precision = c_matrix[1,1] / (c_matrix[1,1] + c_matrix[0,1])\n", + " recall = c_matrix[1,1] / (c_matrix[1,1] + c_matrix[1,0])\n", + " print(\"\\naccuracy:\\t%f\"%(np.sum(np.equal(output, test[:,12])) / 1000))\n", + " print(\"precision:\\t%f\\nrecall:\\t\\t%f\"%(precision, recall))\n", + " print(\"\\n\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_NHWq08yBroB", + "outputId": "6dab9505-0938-4676-e910-1d24747b1759" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "iter 1:\n", + "------------------\n", + "confusion matrix:\n", + "[[779 72]\n", + " [ 80 69]]\n", + "true positive:\t69\n", + "true negative:\t779\n", + "false positive:\t72\n", + "false negative:\t80\n", + "\n", + "accuracy:\t0.848000\n", + "precision:\t0.489362\n", + "recall:\t\t0.463087\n", + "\n", + "\n", + "iter 2:\n", + "------------------\n", + "confusion matrix:\n", + "[[767 104]\n", + " [ 59 70]]\n", + "true positive:\t70\n", + "true negative:\t767\n", + "false positive:\t104\n", + "false negative:\t59\n", + "\n", + "accuracy:\t0.837000\n", + "precision:\t0.402299\n", + "recall:\t\t0.542636\n", + "\n", + "\n", + "iter 3:\n", + "------------------\n", + "confusion matrix:\n", + "[[769 77]\n", + " [ 85 69]]\n", + "true positive:\t69\n", + "true negative:\t769\n", + "false positive:\t77\n", + "false negative:\t85\n", + "\n", + "accuracy:\t0.838000\n", + "precision:\t0.472603\n", + "recall:\t\t0.448052\n", + "\n", + "\n", + "iter 4:\n", + "------------------\n", + "confusion matrix:\n", + "[[762 84]\n", + " [ 90 64]]\n", + "true positive:\t64\n", + "true negative:\t762\n", + "false positive:\t84\n", + "false negative:\t90\n", + "\n", + "accuracy:\t0.826000\n", + "precision:\t0.432432\n", + "recall:\t\t0.415584\n", + "\n", + "\n", + "iter 5:\n", + "------------------\n", + "confusion matrix:\n", + "[[776 89]\n", + " [ 81 54]]\n", + "true positive:\t54\n", + "true negative:\t776\n", + "false positive:\t89\n", + "false negative:\t81\n", + "\n", + "accuracy:\t0.830000\n", + "precision:\t0.377622\n", + "recall:\t\t0.400000\n", + "\n", + "\n", + "iter 6:\n", + "------------------\n", + "confusion matrix:\n", + "[[768 88]\n", + " [ 76 68]]\n", + "true positive:\t68\n", + "true negative:\t768\n", + "false positive:\t88\n", + "false negative:\t76\n", + "\n", + "accuracy:\t0.836000\n", + "precision:\t0.435897\n", + "recall:\t\t0.472222\n", + "\n", + "\n", + "iter 7:\n", + "------------------\n", + "confusion matrix:\n", + "[[804 64]\n", + " [ 69 63]]\n", + "true positive:\t63\n", + "true negative:\t804\n", + "false positive:\t64\n", + "false negative:\t69\n", + "\n", + "accuracy:\t0.867000\n", + "precision:\t0.496063\n", + "recall:\t\t0.477273\n", + "\n", + "\n", + "iter 8:\n", + "------------------\n", + "confusion matrix:\n", + "[[780 82]\n", + " [ 86 52]]\n", + "true positive:\t52\n", + "true negative:\t780\n", + "false positive:\t82\n", + "false negative:\t86\n", + "\n", + "accuracy:\t0.832000\n", + "precision:\t0.388060\n", + "recall:\t\t0.376812\n", + "\n", + "\n", + "iter 9:\n", + "------------------\n", + "confusion matrix:\n", + "[[787 83]\n", + " [ 80 50]]\n", + "true positive:\t50\n", + "true negative:\t787\n", + "false positive:\t83\n", + "false negative:\t80\n", + "\n", + "accuracy:\t0.837000\n", + "precision:\t0.375940\n", + "recall:\t\t0.384615\n", + "\n", + "\n", + "iter 10:\n", + "------------------\n", + "confusion matrix:\n", + "[[767 89]\n", + " [ 64 80]]\n", + "true positive:\t80\n", + "true negative:\t767\n", + "false positive:\t89\n", + "false negative:\t64\n", + "\n", + "accuracy:\t0.847000\n", + "precision:\t0.473373\n", + "recall:\t\t0.555556\n", + "\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "plot_data = np.zeros((11,2))\n", + "\n", + "for i in range(12,1,-1):\n", + " pca = PCA(n_components = i)\n", + " data_transformed = np.ascontiguousarray(pca.fit_transform(data[:,:i])).astype(float)\n", + "\n", + " train = data_transformed[:9000,:]\n", + " test = data_transformed[9000:,:]\n", + "\n", + " nn_classifier = MLPClassifier()\n", + " nn_classifier.fit(train[:,:-1], data[:9000,-1])\n", + " output = nn_classifier.predict(test[:,:-1])\n", + "\n", + " accuracy = np.sum(np.equal(output, data[9000:,-1])) / 1000\n", + " plot_data[12 - i] = [12-i,accuracy]\n", + "print()\n", + "\n", + "plt.plot(plot_data[:,0], plot_data[:,1])\n", + "plt.title(\"neural network accuracy as increased dimensions are reduced via PCA\")\n", + "plt.xlabel(\"dimensions reduced\")\n", + "plt.ylabel(\"accuracy\")\n", + "plt.show()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 472 + }, + "id": "W1TtAYJqlk2C", + "outputId": "6a8f0005-862b-4ccc-c924-faa5f2ca4fe7" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "

" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAm4AAAHHCAYAAAAGU9SoAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAABwGklEQVR4nO3deVxUVf8H8M/MwAw7CIwgiICIomnumguuJGpZlqlpPxdMK5fcnieT3Eozs73csh41S03NrUWzDPd9wV1EWVRE2WWXbeb8/kAmRwYFRC4z83m/XvNS7tzlexfu/XLOuefIhBACRERERFTjyaUOgIiIiIjKh4kbERERkZFg4kZERERkJJi4ERERERkJJm5ERERERoKJGxEREZGRYOJGREREZCSYuBEREREZCSZuREREREaCiVs5XLt2DTKZDD/88IPUodQI77//PmQyGVJSUqQOhaoYr/WKedzj9cMPP0Amk+HatWu6ad26dUO3bt2qJD4pGdo3evKM4bjLZDK8//77VbKuvXv3QiaTYe/evVWyPmPAxK2G27FjR5Vd4ERERFS2kkSw5GNpaYn69etj+PDhiImJKTV/ZmYmPvjgAzRv3hx2dnawtrZG06ZN8e677+LWrVsGtzFo0CDIZDK8++67lYrRolJLUbXZsWMHlixZwuSNqoW3tzfu3r0LS0tLqUMxW3///bfUIVSJYcOG4dVXX4VKpZI6FDJhXbp0wd27d6FUKqt0vRMnTkTbtm1RWFiI8PBwfPfdd9i+fTvOnz8PDw8PAEBMTAyCgoJw48YNDBw4EG+88QaUSiXOnTuHFStWYOvWrbhy5YreejMzM/H777/Dx8cHP//8Mz7++GPIZLIKxWaWiVteXh6USiXkchY4VkROTg5sbW2lDqNamds+y2QyWFlZVes2tVotCgoKqn27NVVVP4CkolAooFAopA6jyhUVFUGr1Vb5eXpS6zV1crn8idw7AgMD8corrwAAQkJC0LBhQ0ycOBGrV69GaGgoioqK8PLLLyMxMRF79+5F586d9ZafP38+Fi5cWGq9mzdvhkajwcqVK9GjRw/s378fXbt2rVBs1ZK5lLSJioqKwsiRI+Hk5ARHR0eEhIQgNze31Pxr1qxB69atYW1tDWdnZ7z66quIi4vTm8fHxwcjR44steyD7UNKij3Xr1+PmTNnwtPTEzY2NsjMzERaWhr++9//olmzZrCzs4ODgwP69OmDs2fPVmo/S9oWHDp0CFOnToVarYatrS1eeuklJCcnl5r/zz//RGBgIGxtbWFvb4/nnnsOFy9e1H0/cuRILFmyBAD0im4BoFWrVnj55Zf11tesWTPIZDKcO3dON23Dhg2QyWSIiIjQTTt9+jT69OkDBwcH2NnZoWfPnjh69KjBfdm3bx/GjRuH2rVro27dumXu+/Xr19GgQQM0bdoUiYmJD51v3LhxaNSoEaytreHi4oKBAwcabI+Rnp6OKVOmwMfHByqVCnXr1sXw4cP12tbl5eXh/fffR8OGDWFlZYU6derg5ZdfRnR0NICy2z8Yaps0cuRI2NnZITo6Gn379oW9vT1ee+01AMCBAwcwcOBA1KtXDyqVCl5eXpgyZQru3r1bKu7Lly9j0KBBUKvVsLa2RqNGjTBjxgwAwJ49eyCTybB169ZSy61btw4ymQxHjhwp8/hV5JpdtGgRnnrqKdjY2KBWrVpo06YN1q1bV+a6H3Vc4uPj0b9/f9jZ2UGtVuO///0vNBqN3vJarRZff/01mjVrBisrK6jVavTu3RsnT57UzSOTyTBhwgSsXbsWTz31FFQqFXbu3AkAiI+Px6hRo+Dm5gaVSoWnnnoKK1eu1NtGQUEBZs+ejdatW8PR0RG2trYIDAzEnj17Su3P+vXr0bp1a9jb28PBwQHNmjXD119/rTdPeno6Jk+eDC8vL6hUKjRo0AALFy6EVqstNd/IkSPh6OgIJycnjBgxAunp6Q89nve7ePEievToAWtra9StWxcffvhhqW0AZd/DNm7ciA8++ACenp6wt7fHK6+8goyMDOTn52Py5MmoXbs27OzsEBISgvz8/FLrLc99tVu3bmjatCkuXbqE7t27w8bGBp6envjkk09Kre9R11dZba2WLl2qO+8eHh4YP358qeNYlXEYUt5rqOT34bPPPsNXX30FPz8/qFQqXLp0CUDx7/orr7wCZ2dnWFlZoU2bNvjtt98euu2qXG95r6my2pQZeo6W576bn5+POXPmoEGDBrr74bRp00pdd/n5+ZgyZQrUajXs7e3xwgsv4ObNm488PomJibCwsMAHH3xQ6rvIyEjIZDIsXrwYgOF7fEXu1+XVo0cPAEBsbCyA4gTs7NmzmDFjRqmkDQAcHBwwf/78UtPXrl2LZ599Ft27d0fjxo2xdu3aCsdSrSVugwYNgq+vLxYsWIDw8HD873//Q+3atfWy0vnz52PWrFkYNGgQRo8ejeTkZCxatAhdunTB6dOn4eTkVKltz5s3D0qlEv/973+Rn58PpVKJS5cuYdu2bRg4cCB8fX2RmJiI5cuXo2vXrrh06ZKuOLSi3n77bdSqVQtz5szBtWvX8NVXX2HChAnYsGGDbp6ffvoJI0aMQHBwMBYuXIjc3FwsW7YMnTt3xunTp+Hj44M333wTt27dwq5du/DTTz/pbSMwMBA///yz7ue0tDRcvHgRcrkcBw4cwNNPPw2g+AJWq9Vo3LgxgOJf9MDAQDg4OGDatGmwtLTE8uXL0a1bN+zbtw/t27fX2864ceOgVqsxe/Zs5OTkGNzf6Oho9OjRA87Ozti1axdcXV3LPDYnTpzA4cOH8eqrr6Ju3bq4du0ali1bhm7duuHSpUuwsbEBAGRnZyMwMBAREREYNWoUWrVqhZSUFPz222+4efMmXF1dodFo8PzzzyMsLAyvvvoqJk2ahKysLOzatQsXLlyAn59fBc5asaKiIgQHB6Nz58747LPPdPH88ssvyM3NxdixY+Hi4oLjx49j0aJFuHnzJn755Rfd8ufOnUNgYCAsLS3xxhtvwMfHB9HR0fj9998xf/58dOvWDV5eXli7di1eeuklvW2vXbsWfn5+6NChQ5nxxcTElOua/f777zFx4kS88sormDRpEvLy8nDu3DkcO3YMQ4cOrfBx0Wg0CA4ORvv27fHZZ5/hn3/+weeffw4/Pz+MHTtWN9/rr7+OH374AX369MHo0aNRVFSEAwcO4OjRo2jTpo1uvt27d2Pjxo2YMGECXF1d4ePjg8TERDzzzDO6xE6tVuPPP//E66+/jszMTEyePBlAcVXD//73PwwZMgRjxoxBVlYWVqxYgeDgYBw/fhwtWrQAAOzatQtDhgxBz549dfeYiIgIHDp0CJMmTQIA5ObmomvXroiPj8ebb76JevXq4fDhwwgNDcXt27fx1VdfAQCEEHjxxRdx8OBBvPXWW2jcuDG2bt2KESNGlOv4JSQkoHv37igqKsL06dNha2uL7777DtbW1uU+BwsWLIC1tTWmT5+OqKgoLFq0CJaWlpDL5bhz5w7ef/99HD16FD/88AN8fX0xe/Zs3bIVua/euXMHvXv3xssvv4xBgwZh06ZNePfdd9GsWTP06dMHQOWvr/fffx8ffPABgoKCMHbsWERGRmLZsmU4ceIEDh06pFdF/yTjKO81VGLVqlXIy8vDG2+8AZVKBWdnZ1y8eBGdOnWCp6en7pxu3LgR/fv3x+bNm0v9fhvyOOutimvqQeW572q1Wrzwwgs4ePAg3njjDTRu3Bjnz5/Hl19+iStXrmDbtm269Y0ePRpr1qzB0KFD0bFjR+zevRvPPffcI+Nwc3ND165dsXHjRsyZM0fvuw0bNkChUGDgwIFlLl/e+3VFlBQGuLi4AIAukR42bFi513Hr1i3s2bMHq1evBgAMGTIEX375JRYvXlyxklZRDebMmSMAiFGjRulNf+mll4SLi4vu52vXrgmFQiHmz5+vN9/58+eFhYWF3nRvb28xYsSIUtvq2rWr6Nq1q+7nPXv2CACifv36Ijc3V2/evLw8odFo9KbFxsYKlUol5s6dqzcNgFi1atVD93PVqlUCgAgKChJarVY3fcqUKUKhUIj09HQhhBBZWVnCyclJjBkzRm/5hIQE4ejoqDd9/PjxwtBp+uWXXwQAcenSJSGEEL/99ptQqVTihRdeEIMHD9bN9/TTT4uXXnpJ93P//v2FUqkU0dHRumm3bt0S9vb2okuXLqX2pXPnzqKoqEhv2yXnMzk5WURERAgPDw/Rtm1bkZaW9tDjI4QodQ6EEOLIkSMCgPjxxx9102bPni0AiC1btpSav+TYrly5UgAQX3zxRZnzlJz/PXv26H1v6JyOGDFCABDTp08vV9wLFiwQMplMXL9+XTetS5cuwt7eXm/a/fEIIURoaKhQqVS660EIIZKSkoSFhYWYM2dOqe3cr7zX7Isvviieeuqph67LkIcdl/vXL4QQLVu2FK1bt9b9vHv3bgFATJw4sdR6799/AEIul4uLFy/qzfP666+LOnXqiJSUFL3pr776qnB0dNSdg6KiIpGfn683z507d4Sbm5vePWbSpEnCwcGh1PV7v3nz5glbW1tx5coVvenTp08XCoVC3LhxQwghxLZt2wQA8cknn+jmKSoqEoGBgeW6N0yePFkAEMeOHdNNS0pKEo6OjgKAiI2N1U0v6x7WtGlTUVBQoJs+ZMgQIZPJRJ8+ffS21aFDB+Ht7a37uSL31a5du5b6XczPzxfu7u5iwIABumnlub5K7iEl+5aUlCSUSqXo1auX3jW8ePFiAUCsXLnyicRhSHmvoZLfBwcHB5GUlKQ3f8+ePUWzZs1EXl6ebppWqxUdO3YU/v7+D91+Vay3ItcUAIP3lgefo+W57/70009CLpeLAwcO6H3/7bffCgDi0KFDQgghzpw5IwCIcePG6c03dOjQMuO53/LlywUAcf78eb3pTZo0ET169ND9bOgeX977tSEl61u5cqVITk4Wt27dEtu3bxc+Pj5CJpOJEydOCCGK73+Ojo4PXdeDPvvsM2FtbS0yMzOFEEJcuXJFABBbt26t0HqqtZHXW2+9pfdzYGAgUlNTkZmZCQDYsmULtFotBg0ahJSUFN3H3d0d/v7+BqtCymvEiBGl/hJRqVS6dm4ajQapqamws7NDo0aNEB4eXultvfHGG3qNDQMDA6HRaHD9+nUAxSUB6enpGDJkiN5+KhQKtG/fvlz7GRgYCADYv38/gOKStbZt2+LZZ5/FgQMHABQXeV+4cEE3r0ajwd9//43+/fujfv36unXVqVMHQ4cOxcGDB3XnosSYMWPKbKdy4cIFdO3aFT4+Pvjnn39Qq1atR8Z9/zkoLCxEamoqGjRoACcnJ71jvnnzZjRv3tzgX60lx3bz5s1wdXXF22+/XeY8lXF/CZKhuHNycpCSkoKOHTtCCIHTp08DAJKTk7F//36MGjUK9erVKzOe4cOHIz8/H5s2bdJN27BhA4qKivB///d/D42tvNesk5MTbt68iRMnTlRgzx/O0O/v/W9Zbd68GTKZrNRfyEDp89G1a1c0adJE97MQAps3b0a/fv0ghND7vQgODkZGRoZu/xQKhe6vU61Wi7S0NBQVFaFNmzaljkFOTg527dpV5j798ssvCAwMRK1atfS2GRQUBI1Go/v92rFjBywsLPSuDYVCYfDaM2THjh145pln0K5dO900tVqtq4ovj+HDh+uVSLVv3x5CCIwaNUpvvvbt2yMuLg5FRUUAKn5ftbOz07sOlUol2rVrp3euK3N9/fPPPygoKMDkyZP12hePGTMGDg4O2L59e7XEAZT/GioxYMAAqNVq3c9paWnYvXs3Bg0ahKysLN0xTU1NRXBwMK5evYr4+PhHxvE4662Ka+pB5bnv/vLLL2jcuDECAgL0rqeSqsSS62nHjh0Aihv536+k5PxRXn75ZVhYWOjVVF24cAGXLl3C4MGDH7psee7XjzJq1Cio1Wp4eHjgueeeQ05ODlavXq2rOcjMzIS9vX251lVi7dq1eO6553TL+fv7o3Xr1hWuLq3WxO3Bh1nJg/7OnTsAgKtXr0IIAX9/f6jVar1PREQEkpKSKr1tX1/fUtO0Wi2+/PJL+Pv7Q6VSwdXVFWq1GufOnUNGRkalt1We/QSK68wf3M+///67XPvp5uYGf39/XZJ24MABBAYGokuXLrh16xZiYmJw6NAhaLVaXeKWnJyM3NxcNGrUqNT6GjduDK1WW6rNi6HjVqJfv36wt7fHX3/9BQcHh0fGDAB3797F7Nmzde2JSo55enq63jGPjo5G06ZNH7qu6OhoNGrUCBYWVVfjb2FhYbAt340bNzBy5Eg4Ozvr2niVNCgtibvkgfKouAMCAtC2bVu9X9a1a9fimWeeQYMGDR66bHmv2XfffRd2dnZo164d/P39MX78eBw6dKh8B8GAkvZq96tVq5bumgaKz4eHhwecnZ0fub4Hr6vk5GSkp6fju+++K/U7ERISAgB6vxerV6/G008/DSsrK7i4uECtVmP79u16x2DcuHFo2LAh+vTpg7p162LUqFG6tnQlrl69ip07d5baZlBQkN42r1+/jjp16sDOzk5veUO/S4Zcv34d/v7+paaXd3mg9H3F0dERAODl5VVqular1R2Lit5X69atWyrRfvBcV+b6KvnD9cF9ViqVqF+/vu77Jx1HifJcQyUevF6joqIghMCsWbNKHdOSP1zKcx9/nPVWxTX1oPLcd69evYqLFy+Wiq9hw4al4pPL5aWarJQ3PldXV/Ts2RMbN27UTduwYQMsLCxKte9+UHnu148ye/Zs7Nq1C7t378a5c+dw69YtvWpRBwcHZGVllWtdQHEzjdOnT6NTp06IiorSfbp164Y//vijVKHJw1RrG7eySm6EEACKH0oymQx//vmnwXnvv2mWVaKi0WgMLmuo3v+jjz7CrFmzMGrUKMybNw/Ozs6Qy+WYPHmywQae5VWe/QSK27m5u7uXmq+8iUjnzp0RFhaGu3fv4tSpU5g9ezaaNm0KJycnHDhwABEREbCzs0PLli0ruSeGj1uJAQMGYPXq1Vi7di3efPPNcq3v7bffxqpVqzB58mR06NABjo6OkMlkePXVVx/rmJflYdeJIfeXaN0/77PPPou0tDS8++67CAgIgK2tLeLj4zFy5MhKxT18+HBMmjQJN2/eRH5+Po4ePaprbPsw5b1mGzdujMjISPzxxx/YuXMnNm/ejKVLl2L27NkGG/w+SlW/HfjgdVUS+//93/+V2W6spN3mmjVrMHLkSPTv3x/vvPMOateuDYVCgQULFujaoQBA7dq1cebMGfz111/4888/8eeff2LVqlUYPny4ro2JVqvFs88+i2nTphncZsnDqCYo6xxU5X21POsDqv76MuRJxlHea6hEWdfrf//7XwQHBxvcxqP+CHuS6y2vsu6DD6PVatGsWTN88cUXBr9/8A+Jx/Hqq68iJCQEZ86cQYsWLbBx40b07Nnzoe2oq+p+3axZM90fcIYEBATg9OnTiIuLK9c+r1mzBgAwZcoUTJkypdT3mzdv1v2R+ig1qjsQPz8/CCHg6+v7yBtmrVq1DL7Rdf36db1qwIfZtGkTunfvjhUrVuhNT09Pf+iF8bhK/gKpXbv2Qy8M4OFVfoGBgVi1ahXWr18PjUaDjh07Qi6Xo3PnzrrErWPHjroboFqtho2NDSIjI0ut6/Lly5DL5RX6pfv0009hYWGBcePGwd7evlyN3jdt2oQRI0bg888/103Ly8srdS79/Pxw4cKFh67Lz88Px44dQ2FhYZn9jpWUdj64/gf/un+Y8+fP48qVK1i9ejWGDx+um/5gFVzJdfeouIHiG9LUqVPx888/6/pNe1TxP1Cxa9bW1haDBw/G4MGDUVBQgJdffhnz589HaGjoE3l93s/PD3/99RfS0tLKVep2v5K3zjQazSN/JzZt2oT69etjy5Yter8fhqpolUol+vXrh379+kGr1WLcuHFYvnw5Zs2ahQYNGsDPzw/Z2dmP3Ka3tzfCwsKQnZ2tl+gY+l0qa/mSkvb7lXf5x1GR+2pFVPT68vb2BlC8z/ffowsKChAbG/vIc1BVcQAVu4YMKYnf0tKy0nE/7norck0Zel4WFBTg9u3betPKe989e/Ysevbs+dDnk7e3N7Rara5m5GHxlaV///548803ddWlV65cQWho6EOXKe/9+nH169cPP//8M9asWfPImIQQWLduHbp3745x48aV+n7evHlYu3ZtuRO3GtWR2csvvwyFQoEPPvhA768qoHjHU1NTdT/7+fnh6NGjKCgo0E37448/SlX1PYxCoSi1nV9++aVcbRMeR3BwMBwcHPDRRx+hsLCw1Pf3dx1S0oeYoSS1pAp04cKFePrpp3VVJ4GBgQgLC8PJkyd18wDF+9urVy/8+uuveq/oJyYmYt26dejcuXO5qzyB4qTyu+++wyuvvIIRI0aU6zV4Q8d80aJFpf7yGzBgAM6ePWuw24yS5QcMGICUlBSDJVUl83h7e0OhUOjaKpVYunTpI2O9P+b711ny/we7lVCr1ejSpQtWrlyJGzduGIynhKurK/r06YM1a9Zg7dq16N27d7n+WCjvNXv/7wpQnMA0adIEQgiD11xVGDBgAIQQBks6Hoz5QQqFAgMGDMDmzZsNPjju/50wdD6OHTtWqhuVB4+BXC7XldqVdFswaNAgHDlyBH/99Vepbaanp+vaifXt2xdFRUVYtmyZ7nuNRoNFixY9dL9K9O3bF0ePHsXx48f19qkyXQFUVEXuq+VVmesrKCgISqUS33zzjV4cK1asQEZGRrneNqyKOIDyX0NlqV27Nrp164bly5eXSn4AGOz+qarXW5Frys/Pr9Q98LvvvqvUfXfQoEGIj4/H999/X2qeu3fv6nofKHnz95tvvtGbp+RN7fJwcnJCcHAwNm7ciPXr10OpVKJ///4PXaa89+vH9corr6BZs2aYP3++wesmKytL1w3UoUOHcO3aNYSEhOCVV14p9Rk8eDD27NlT5kgLD6pxJW4ffvghQkNDce3aNfTv3x/29vaIjY3F1q1b8cYbb+C///0vgOLXjDdt2oTevXtj0KBBiI6Oxpo1ayrUBcTzzz+PuXPnIiQkBB07dsT58+exdu3acpfYVZaDgwOWLVuGYcOGoVWrVnj11VehVqtx48YNbN++HZ06ddIlI61btwZQ3MAzODgYCoUCr776KoDiInN3d3dERkbqNZLu0qWLbiiN+xM3APjwww+xa9cudO7cGePGjYOFhQWWL1+O/Px8g30kPYpcLseaNWvQv39/DBo0CDt27NA1UjXk+eefx08//QRHR0c0adIER44cwT///KN7xbrEO++8g02bNmHgwIEYNWoUWrdujbS0NPz222/49ttv0bx5cwwfPhw//vgjpk6diuPHjyMwMBA5OTn4559/MG7cOLz44otwdHTEwIEDsWjRIshkMvj5+eGPP/6oUHvJgIAA+Pn54b///S/i4+Ph4OCAzZs367W1KfHNN9+gc+fOaNWqFd544w34+vri2rVr2L59O86cOaM37/Dhw3UdPM6bN69csZT3mu3Vqxfc3d3RqVMnuLm5ISIiAosXL9ZrGFvVunfvjmHDhuGbb77B1atX0bt3b2i1Whw4cADdu3fHhAkTHrr8xx9/jD179qB9+/YYM2YMmjRpgrS0NISHh+Off/5BWlqa7hhs2bIFL730Ep577jnExsbi22+/RZMmTZCdna1b3+jRo5GWloYePXqgbt26uH79OhYtWoQWLVrousd555138Ntvv+H555/HyJEj0bp1a+Tk5OD8+fPYtGkTrl27BldXV/Tr1w+dOnXC9OnTce3aNTRp0gRbtmwpd3uZadOm4aeffkLv3r0xadIkXdcN3t7eev0uPgkVua+WV2WuL7VajdDQUHzwwQfo3bs3XnjhBURGRmLp0qVo27btI1/Mqao4gPJfQw+zZMkSdO7cGc2aNcOYMWNQv359JCYm4siRI7h582al+wMt73orck2NHj0ab731FgYMGIBnn30WZ8+exV9//VXqj8Xy3HeHDRuGjRs34q233sKePXvQqVMnaDQaXL58GRs3bsRff/2FNm3aoEWLFhgyZAiWLl2KjIwMdOzYEWFhYYiKiqrQ8Rg8eDD+7//+D0uXLkVwcPAjuwSryP36cVhaWmLLli0ICgpCly5dMGjQIHTq1AmWlpa4ePEi1q1bh1q1amH+/PlYu3YtFApFmX+cvPDCC5gxYwbWr1+PqVOnPnrjFXoHtZLu7z7ifg++Ll5i8+bNonPnzsLW1lbY2tqKgIAAMX78eBEZGak33+effy48PT2FSqUSnTp1EidPnizzVfpffvmlVFx5eXniP//5j6hTp46wtrYWnTp1EkeOHCm1jop2B1LyuvCDMTzYJcWePXtEcHCwcHR0FFZWVsLPz0+MHDlSnDx5UjdPUVGRePvtt4VarRYymaxU1yADBw4UAMSGDRt00woKCoSNjY1QKpXi7t27peIMDw8XwcHBws7OTtjY2Iju3buLw4cPl2tfhDB8PnNzc0XXrl2FnZ2dOHr0aJnH6M6dOyIkJES4uroKOzs7ERwcLC5fvmywe5fU1FQxYcIE4enpKZRKpahbt64YMWKEXncRubm5YsaMGcLX11dYWloKd3d38corr+h1d5KcnCwGDBggbGxsRK1atcSbb74pLly4YLDbC1tbW4NxX7p0SQQFBQk7Ozvh6uoqxowZI86ePWvwurhw4YJ46aWXhJOTk7CyshKNGjUSs2bNKrXO/Px8UatWLeHo6GjwPBlS3mt2+fLlokuXLsLFxUWoVCrh5+cn3nnnHZGRkfHQ9ZfVHYih41JyHdyvqKhIfPrppyIgIEAolUqhVqtFnz59xKlTp3TzABDjx483uP3ExEQxfvx44eXlpTufPXv2FN99951uHq1WKz766CPh7e0tVCqVaNmypfjjjz/EiBEj9LrB2LRpk+jVq5eoXbu2UCqVol69euLNN98Ut2/f1ttmVlaWCA0NFQ0aNBBKpVK4urqKjh07is8++0yv+43U1FQxbNgw4eDgIBwdHcWwYcPE6dOny3VvEEKIc+fOia5duworKyvh6ekp5s2bJ1asWFHu7kAevIeV9Tta1v22PPfVrl27Guxe48FjW57rq6z7++LFi0VAQICwtLQUbm5uYuzYseLOnTt681RlHIaU9xoq+X349NNPDa4nOjpaDB8+XLi7uwtLS0vh6ekpnn/+ebFp06aHbr+q1lvea0qj0Yh3331XuLq6ChsbGxEcHCyioqIqfd8tKCgQCxcuFE899ZRQqVSiVq1aonXr1uKDDz7QO/Z3794VEydOFC4uLsLW1lb069dPxMXFlas7kBKZmZnC2tpaABBr1qwp9b2h52tF7tdlrc9QzmDInTt3xOzZs0WzZs2EjY2NsLKyEk2bNhWhoaHi9u3boqCgQLi4uIjAwMCHrsfX11e0bNmyXNuUCfGIOgwieiKKiorg4eGBfv36lWqzRkREZEiNauNGZE62bduG5ORkvQa0RERED8MSN6JqduzYMZw7dw7z5s2Dq6vrY3X2TERE5oUlbkTVbNmyZRg7dixq166NH3/8UepwiIjIiLDEjYiIiMhIsMSNiIiIyEgwcSMiIiIyEjWiA94lS5bg008/RUJCApo3b45FixahXbt2BuctLCzEggULsHr1asTHx6NRo0ZYuHAhevfurZtnwYIF2LJlCy5fvgxra2t07NgRCxcuLPfgtlqtFrdu3YK9vf1Dh/QgIiKimkMIgaysLHh4eJQad9pklKu3tydo/fr1QqlUipUrV4qLFy+KMWPGCCcnJ5GYmGhw/mnTpgkPDw+xfft2ER0dLZYuXSqsrKxEeHi4bp7g4GCxatUqceHCBXHmzBnRt29fUa9ePZGdnV2umEo6COSHH3744YcffozvExcXVyU5Sk0k+csJ7du3R9u2bXVDPGm1Wnh5eeHtt9/G9OnTS83v4eGBGTNmYPz48bppAwYMgLW1NdasWWNwG8nJyahduzb27duHLl26PDKmjIwMODk5IS4urkJjdxIREZF0MjMz4eXlhfT0dN343aZG0qrSgoICnDp1CqGhobppcrkcQUFBZQ72m5+fDysrK71p1tbWOHjwYJnbKRlP0NnZuVxxlVSPOjg4MHEjIiIyMqbczEnSCuCUlBRoNBq4ubnpTXdzc0NCQoLBZYKDg/HFF1/g6tWr0Gq12LVrF7Zs2YLbt28bnF+r1WLy5Mno1KkTmjZtanCe/Px8ZGZm6n2IiIiIahqja7n39ddfw9/fHwEBAVAqlZgwYQJCQkLKbIQ4fvx4XLhwAevXry9znQsWLICjo6Pu4+Xl9aTCJyIiIqo0SRM3V1dXKBQKJCYm6k1PTEyEu7u7wWXUajW2bduGnJwcXL9+HZcvX4adnR3q169fat4JEybgjz/+wJ49e1C3bt0y4wgNDUVGRobuExcX93g7RkRERPQESJq4KZVKtG7dGmFhYbppWq0WYWFh6NChw0OXtbKygqenJ4qKirB582a8+OKLuu+EEJgwYQK2bt2K3bt3w9fX96HrUqlUuvZsbNdGRERENZXk/bhNnToVI0aMQJs2bdCuXTt89dVXyMnJQUhICABg+PDh8PT0xIIFCwAUD9AdHx+PFi1aID4+Hu+//z60Wi2mTZumW+f48eOxbt06/Prrr7C3t9e1l3N0dIS1tXX17yQRERFRFZA8cRs8eDCSk5Mxe/ZsJCQkoEWLFti5c6fuhYUbN27otV/Ly8vDzJkzERMTAzs7O/Tt2xc//fQTnJycdPMsW7YMANCtWze9ba1atQojR4580rtERERE9ERI3o9bTZSZmQlHR0dkZGSw2pSIiMhImMPz2+jeKiUiIiIyV0zciIiIiIwEEzciIiIiI8HEjYiIiMhIMHEjIiIiMhJM3Iio0vIKNbhboJE6DCIisyF5P25EZFzi0nKxJzIJuy8n4Uh0KjRagTY+tdAjoDa6N6qNBrXtIJPJpA6TiMgksR83A8yhHxii8irUaHHy2h1dshaVlP3Q+evWsi5O4gJqo0N9F1hZKqopUiIyd+bw/GbiZoA5nHiih0nOysfeyCTsjUzG/qvJyMor0n2nkMvQ2rsWujeqjR4BtWFlKceey0nYHZmMo9GpKNBodfNaWcrRyc8V3QKK5/V04pBzRPTkmMPzm4mbAeZw4onup9UKnI/PwJ7IJOy5nISzNzP0vne2VaJbQzW6B9RGF381HG0sDa4nt6AIh6JSsftyEvZGJuF2Rp7e943c7NH9XhLXqp4TLBRsZktEVcccnt9M3AwwhxNPlJlXiINXU+4lWclIyc7X+76ppwN6NCqu8ny6rhMU8oq1WxNC4HJCFnZfLk4Gw2/cgfa+u42DlQW6NFSjR0BtdG2ohoudqip2i4jMmDk8v5m4GWAOJ57MjxAC0cnZ2H25uK3ayWt3UHRfJmWrVCDQX43uAWp0a1Qbbg5WVbr9OzkF2H81GXsuJ2HvlWSk5xbqvpPJgBZeTrrq16c8HPiCAxFVmDk8v5m4GWAOJ57MQ16hBkdiUrH3chJ2RyYhLu2u3vf1XW11VZdtfZyhtKieqkuNVuBM3J17SWQyIm5n6n1f216F7vdK+zr7u8JOxRfgiejRzOH5zcTNAHM48WS64tPvYs+96slD0SnIK/z3ZQGlQo729Z11XXf4uNpKGOm/bmfcxd7IZOy+nIRDUSnIva9vOEuFDO18nXWlcfXVdhJGSkQ1mTk8v5m4GWAOJ55MR5FGi/Ab6boXAi4nZOl97+5ghe4BanRvVBudGrjCtoaXXuUXaXAsJq24bVxkEq6n5up97+Nig+73Es/29Z2hsmB3I0RUzBye30zcDDCHE0/GLS2nAHsjk7AnMhn7ryQj4+6/7cXkMqBlvX87xG1cx96o24vF3GuXtycyCcdj01Co+feWZaNUoFMDV92+ujtWbbs8IjIu5vD8ZuJmgDmceDIuQghcvJV5r7+0JJyJS8f9v7lONpbo2rC4VK1rQzVq2SqlC/YJys4vwsGrKcVVwZFJSMrSfxO2cR0H9AgoflO1hVetCr8JS0TGzRye30zcDDCHE08136OSlAB3e/S492JBCy/z6xPtUclsrZJk9l53I042ppnMEtG/zOH5zcTNAHM48VQzlVQL7o1MxrHYVL1qQWvL+6oFA9So48hRCO6Xmp2PfVeKX3DYfyUZmfeN9iCXAa3q1dK9QRvgbtzVx0RkmDk8v5m4GWAOJ96Q3IIixKbk4FpKLq6l5iAmOQfXUnNwIy0Xnk7Wen1syVkFVSXyizQ4Hpum66T22gMN8b1dbHTdYrT3dea4n+V0/wsbey4nITJR/4UNF1sl6qtt4etqCx9XW9S/96+Piy2PMZERM4fnNxM3A0z5xOcXaXAjNRexKTnFSVpqju7/iZn5j14BALW9Ct0bFbcj6tTAFfZWhoc/IsMSMvJ0A7Y/2PWFhby464uSQdrru9qyZKgK3LyTiz2RydhroIuUB3k4WsHHtTip872XzPmqbeFVy6ba+rkjosox5ed3CSZuBhj7iS/SaHHzzl3EpuYgNlk/ObuVfldv2KEHOdlYFj+wXGx1D6+6tax1QxcZ6mOrrQ8TjYcp6Wx2z+XiarxLD3Q2W5IId29U3NksE+EnK69QgyuJWbrS5diUbMSm5iI2OVuvevVBCrkMdWtZFydyrvqldR5O1nwRgqgGMPbnd3kwcTPAGE68VitwOzMP11JyEJOSg2v3PrEpxVWbRQ/JzuxUFvBxtYGPy79VRCUPokc14C5v1V6PgNpoZ8ZVe+m5Bdh3pXh4p31XknHngeGdmtd1YtVzDSOEwJ3cwn9Lo1Ny9Eqm7/+D5UFKhRxeztbwdbWDr6sNfF3t4ONqA19XW7g7WPGPGaJqYgzP78fFxM2AmnLihRBIzs7/t1QgJVf3MLmWmoP8orKre1QWcl3JQHFi9u/DRG2nqrIHSUxyNvZEFico5tyYvmRA9T2RxQntqev6A6rblwyo3qg2ujZSw5UDqhsVIQSSsvINJnXX03JR8JDfRWtLBbxdbFBfXVzten+bOhdbJZM6oipUU57fTxITNwOq+8Sn5xboPxBSixO1aym5yM4vu+rGQi5DPRcbvWrNko+7g1W1l+KUdF9R3DFsUqk2c43rOOjaxrWsZ/x9bOUWFOFwVCp2RyZh7+Uk3MrI0/u+oZtd8VuMjWqjlXctWJpZdx3mQqMVuJV+F9dSc/RKwGNTchB35y40Dyn9trey0G9Ld18VrKM1q8yJKoqJm5l6Uif+dsZdnLx2515y9m+idn812oNkMuja1ejefLv3F7unk3WN7burvB3G9giojS7+xtNh7I3UXOy+nIjdkck4GpOqV9KispCjUwPXe8MxqVG3lo2EkVJNUFjS3vReiXnJH2SxKTm4lXEXD7v7utgqdW+61lfbolW9Wujg51J9wRMZISZuZupJnfgNJ27g3c3nDX7n7mB1r03Mv21kfF1t4OVsYxJjMablFGDflSTsvpyMfZFJpfrYqqlDNBUUaXHyelpxAno5CdHJOXrfezpZ6zrB7eDnYrZt+qji8go1uJGWq+t2Jza5+A+6ayk5pTpbLjHvxacwrINP9QZKZESYuJmpJ3XiT9+4g/nbI0p1NeDjagMbZc0e+LsqPWpQ9DqOVujWqLjUSopB0ZOy8rD3Xru9A1dT9KqrFXIZ2njX0iVrDWrb1Zgkk0xHdn7Rv+1ZU3Jw9mYG/olIhFIhx6axHfB0XSepQySqkZi4mSlzOPE1SXz63eJhnQz0saVUyNG+vrMuUfJ2sa3y7Wu1AufiM3SJ5LmbGXrfu9op0bVh8fY7+7uy7RFVOyEE3vjpFHZdSkTdWtbY/nYgHG14HRI9yBye30zcDDCHE19T5RVqcDQmVdc2Li7trt739dW26HFvJIG2Ps6V7hA1M68QB66kYPflJOy7koSU7AK975+u66gbseBpT0d210GSy7hbiOcXHUBc2l0ENXbD98Nbs7SX6AHm8Pxm4maAOZx4YyCEQHRytq7j2hPX0vT6p7NTWaDzve5GujVSo7aD1UPXFZVUPA7onsgknLx2p9S6Av2LXyzo1kiN2vZlr4tIKudvZmDAssMo0GjxXt8AvNHFT+qQiGoUc3h+M3EzwBxOvDHKzCvEwaspukHYU7L1G3A39XTQlcY1r+uEAo0WR0pK7y4n4eYd/dI7P7WtbsSHNt6VL70jqk4/Hb2OWdsuQCGXYf0bz6Ctj7PUIRHVGObw/GbiZoA5nHhjp9UKXLiVoRvB4ewD7dKcbZXILSjSby9nIUeH+i66t1frubC7DjI+QghMWn8Gv529BTcHFbZPDGSHzkT3mMPzm4mbAeZw4k1Ncla+boip/VeSkXXvTdA6jla6TnA7NnAxq7d3yXTl5BfhhcUHEZ2cg84NXLF6VDuj79CaqCqYw/ObiZsB5nDiTVmhRotzN9Nhp7JEQzd210GmKTIhCy8uOYi8Qi0mB/ljclBDqUMikpw5PL/ZqIdMjqVCjtbezmjkXnM68iWqao3c7TG/fzMAwNdhV3HgarLEERFRdWDiRkRkpAa0rotX23pBCGDy+jNIeGC8XCIyPUzciIiM2PsvPIXGdRyQmlOAt38OR6FG++iFiMhoMXEjIjJiVpYKLH2tFexUFjhx7Q4++ytS6pCI6Ali4kZEZOR8XW3xyStPAwCW74/BrkuJEkdERE8KEzciIhPQt1kdjOzoAwD4z8YziEvLlTYgInoimLgREZmI9/o2RgsvJ2TmFWH8unDkF2mkDomIqhgTNyIiE6G0kGPJa63gZGOJczczMH97hNQhEVEVY+JGRGRCPJ2s8cWg5gCAH49cx+9nb0kcERFVJSZuREQmpkeAG8Z18wMATN98DtHJ2RJHRERVhYkbEZEJmvpsQ7T3dUZOgQbj1oTjbgHbuxGZAiZuREQmyEIhx6IhLeFqp0JkYhZm/XpB6pCIqAowcSMiMlG1HazwzZAWkMuATaduYuOJOKlDIqLHxMSNiMiEdfRzxdRnGwIAZv16ARG3MyWOiIgeBxM3IiITN65bA3RtqEZ+kRbj1oYjK69Q6pCIqJKYuBERmTi5XIYvB7dAHUcrxKbkYPrm8xBCSB0WEVUCEzciIjPgbKvE4qGtYCGXYfv52/jxyHWpQyKiSmDiRkRkJlp718L0PgEAgA+3X8KZuHRpAyKiCmPiRkRkRl7v7IveT7mjUCMwfm040nMLpA6JiCqAiRsRkRmRyWT4ZODTqOdsg/j0u/jPxrPQatnejchYSJ64LVmyBD4+PrCyskL79u1x/PjxMuctLCzE3Llz4efnBysrKzRv3hw7d+7Um2f//v3o168fPDw8IJPJsG3btie8B0RExsXByhJLX2sFpYUcYZeTsHx/jNQhEVE5SZq4bdiwAVOnTsWcOXMQHh6O5s2bIzg4GElJSQbnnzlzJpYvX45Fixbh0qVLeOutt/DSSy/h9OnTunlycnLQvHlzLFmypLp2g4jI6DT1dMT7/Z4CAHz2dySOxaRKHBERlYdMSPhOePv27dG2bVssXrwYAKDVauHl5YW3334b06dPLzW/h4cHZsyYgfHjx+umDRgwANbW1lizZk2p+WUyGbZu3Yr+/ftXKK7MzEw4OjoiIyMDDg4OFdspIiIjIYTA1I1nsfV0PGrbq7B9YiDU9iqpwyKqNHN4fktW4lZQUIBTp04hKCjo32DkcgQFBeHIkSMGl8nPz4eVlZXeNGtraxw8ePCJxkpEZIpkMhk+7N8UDWrbISkrH5PWn4aG7d2IajTJEreUlBRoNBq4ubnpTXdzc0NCQoLBZYKDg/HFF1/g6tWr0Gq12LVrF7Zs2YLbt28/Viz5+fnIzMzU+xARmQNblQWWvdYK1pYKHI5Oxdf/XJE6JCJ6CMlfTqiIr7/+Gv7+/ggICIBSqcSECRMQEhICufzxdmPBggVwdHTUfby8vKooYiKims/fzR4fvdwUALBoTxT2XUmWOCIiKotkiZurqysUCgUSExP1picmJsLd3d3gMmq1Gtu2bUNOTg6uX7+Oy5cvw87ODvXr13+sWEJDQ5GRkaH7xMXFPdb6iIiMzUst62JIu3oQApiy4QxuZ9yVOiQiMkCyxE2pVKJ169YICwvTTdNqtQgLC0OHDh0euqyVlRU8PT1RVFSEzZs348UXX3ysWFQqFRwcHPQ+RETmZk6/JnjKwwFpOQWYsO40CjVaqUMiogdIWlU6depUfP/991i9ejUiIiIwduxY5OTkICQkBAAwfPhwhIaG6uY/duwYtmzZgpiYGBw4cAC9e/eGVqvFtGnTdPNkZ2fjzJkzOHPmDAAgNjYWZ86cwY0bN6p134iIjI2VpQJLX2sFe5UFTl2/g092XpY6JCJ6gIWUGx88eDCSk5Mxe/ZsJCQkoEWLFti5c6fuhYUbN27otV/Ly8vDzJkzERMTAzs7O/Tt2xc//fQTnJycdPOcPHkS3bt31/08depUAMCIESPwww8/VMt+EREZK28XW3w68Gm8tSYc3x+IRRsfZwQ/Zbj5ChFVP0n7caupzKEfGCKih5n3xyWsOBgLeysLbH87EPVcbKQOieiRzOH5bVRvlRIRUfWY3icAreo5ISuvCGPXnkJeoUbqkIgITNyIiMgAS4Uci4e2Qi0bS1y8lYl5f1ySOiQiAhM3IiIqg4eTNb4c3AIyGbD22A38eiZe6pCIzB4TNyIiKlO3RrUxoXsDAEDolvOISsqSOCIi88bEjYiIHmpyUEN0qO+C3AINxq4JR25BkdQhEZktJm5ERPRQCrkMXw9pAbW9CleTsjFz6wWwQwIiaTBxIyKiR6ptb4VFQ1pCLgO2nI7H+hMcGpBICkzciIioXJ6p74L/9GoEAJjz20VcvJUhcURE5oeJGxERldvYrn7o3kiNgiItxq0NR2ZeodQhEZkVJm5ERFRucrkMXwxqAU8na1xPzcW0X86xvRtRNWLiRkREFVLLVonFQ1vCUiHDzosJWHXomtQhEZkNJm5ERFRhLevVwnt9GwMAPtoRgfAbdySOiMg8MHEjIqJKGdnRB32buaNIKzBhbTju5BRIHRKRyWPiRkRElSKTyfDxgKfh42KDWxl5mLLxDLRatncjepKYuBERUaU5WFli6WutobKQY29kMpbti5Y6JCKTxsSNiIgeSxMPB8x98SkAwOd/R+JwdIrEERGZLiZuRET02Aa18cLLrTyhFcDEn88gKStP6pCITBITNyIiemwymQwf9m+Khm52SMnOx8SfT6NIo5U6LCKTYyF1AEREZBpslBZY+lprvLD4II7GpOHzXVfwVlc/qcOqEJWFHFaWCqnDICqTTLDL61IyMzPh6OiIjIwMODg4SB0OEZFR+fVMPCatPyN1GJViqZBhxYi26NJQLXUoVAnm8PxmVSkREVWpF1t44u0eDWAhl0kdSoUVagQ2h9+UOgyiMrGqlIiIqtx/ejXCxJ7+MKY6nZPX0jD0f8dw8GoKtFoBuREmnmT6mLgREdETYakwrkqdNj7OsFUqkJpTgEu3M9HU01HqkIhKMa7fKiIioidEaSFHBz8XAMCBq+yLjmomJm5ERET3BPoXv5Rw4GqyxJEQGcbEjYiI6J5Af1cAwMlrd5BbUCRxNESlMXEjIiK6x9fVFp5O1ijQaHEsJk3qcIhKYeJGRER0j0wmQ5eGxaVu+1ldSjUQEzciIqL7/NvOjS8oUM3DxI2IiOg+nfxcIZcBUUnZuJV+V+pwiPQwcSMiIrqPo40lmns5AQAOstSNahgmbkRERA8oqS5lOzeqaZi4ERERPaDLvW5BDkalQKM1onG7yOQxcSMiInpAcy8n2KsskJ5biIu3MqQOh0iHiRsREdEDLBUc/opqJiZuREREBgQ2vNfO7QrbuVHNwcSNiIjIgJJ2bqeu30F2Poe/opqBiRsREZEB3i62qOdsgyKtwNHoVKnDIQLAxI2IiKhMJYPOH2C3IFRDMHEjIiIqA4e/opqGiRsREVEZOjZwgUIuQ0xKDuLScqUOh4iJGxERUVkcrCzRsmT4qyiWupH0mLgRERE9xL/VpWznRtJj4kZERPQQgQ3vDX91lcNfkfSYuBERET3E056OcLCyQGZeEc7dTJc6HDJzTNyIiIgewkIhR6cGJd2CsJ0bSYuJGxER0SOwnRvVFEzciIiIHqGkI97wG+nIyiuUOBoyZ0zciIiIHsHL2Qa+rrbQaAUOc/grkhATNyIionLg8FdUEzBxIyIiKgcOf0U1ARM3IiKicnimvjMs5DJcT83F9dQcqcMhM8XEjYiIqBzsrSzRyrsWAJa6kXSYuBEREZVTF7ZzI4kxcSMiIiqnknZuh6NSUaTRShwNmaMakbgtWbIEPj4+sLKyQvv27XH8+PEy5y0sLMTcuXPh5+cHKysrNG/eHDt37nysdRIREZVHU09HONlYIiu/CGc5/BVJQPLEbcOGDZg6dSrmzJmD8PBwNG/eHMHBwUhKSjI4/8yZM7F8+XIsWrQIly5dwltvvYWXXnoJp0+frvQ6iYiIykMhl+mGv9p/he3cqPrJhBBCygDat2+Ptm3bYvHixQAArVYLLy8vvP3225g+fXqp+T08PDBjxgyMHz9eN23AgAGwtrbGmjVrKrXOB2VmZsLR0REZGRlwcHCoit0kIiITseHEDby7+Txa1XPClnGdpA6H7mMOz29JS9wKCgpw6tQpBAUF6abJ5XIEBQXhyJEjBpfJz8+HlZWV3jRra2scPHiw0uskIiIqr8732rmdiUtHxl0Of0XVS9LELSUlBRqNBm5ubnrT3dzckJCQYHCZ4OBgfPHFF7h69Sq0Wi127dqFLVu24Pbt25VeZ35+PjIzM/U+REREhng6WcNPbQutAA5HsbqUqpfkbdwq6uuvv4a/vz8CAgKgVCoxYcIEhISEQC6v/K4sWLAAjo6Ouo+Xl1cVRkxERKam5O3S/ezPjaqZpImbq6srFAoFEhMT9aYnJibC3d3d4DJqtRrbtm1DTk4Orl+/jsuXL8POzg7169ev9DpDQ0ORkZGh+8TFxVXB3hERkanq0rDkBYVkSNxUnMyMpImbUqlE69atERYWppum1WoRFhaGDh06PHRZKysreHp6oqioCJs3b8aLL75Y6XWqVCo4ODjofYiIiMrS3tcFlgoZ4tPv4lpqrtThkBmRvKp06tSp+P7777F69WpERERg7NixyMnJQUhICABg+PDhCA0N1c1/7NgxbNmyBTExMThw4AB69+4NrVaLadOmlXudREREj8NWZYE23s4AOIoCVS8LqQMYPHgwkpOTMXv2bCQkJKBFixbYuXOn7uWCGzdu6LVfy8vLw8yZMxETEwM7Ozv07dsXP/30E5ycnMq9TiIioscV2NAVR2JSsf9KCoZ38JE6HDITkvfjVhOZQz8wRET0eC7EZ+D5RQdhq1TgzJxesFRIXoll9szh+c2rjIiIqBKa1HGAs60SOQUanL6RLnU4ZCaYuBEREVWCXC5D53vDX7GdG1UXJm5ERESVFOh/r1sQ9udG1YSJGxERUSWVdMR77mY60nMLJI6GzAETNyIiokpyd7RCQzc7CAEcikqVOhwyA0zciIiIHoNu+KsrbOdGTx4TNyIiosdQ0s7twFUOf0VPHhM3IiKix9De1wVKCzluZeQhOjlH6nDIxDFxIyIiegzWSgXa+XD4K6oeTNyIiIge07/VpewWhJ4sJm5ERESPqeQFhSPRqcgv0kgcDZkyJm5ERESPKcDdHq52Ktwt1CD8errU4ZAJY+JGRET0mORymd7bpURPChM3IiKiKsB2blQdKpW47dmzp6rjICIiMmolA85fuJWB1Ox8iaMhU1WpxK13797w8/PDhx9+iLi4uKqOiYiIyOjUdrBCgLt98fBX0Rz+ip6MSiVu8fHxmDBhAjZt2oT69esjODgYGzduREEBB9glIiLz1aUhh7+iJ6tSiZurqyumTJmCM2fO4NixY2jYsCHGjRsHDw8PTJw4EWfPnq3qOImIiGo8Dn9FT9pjv5zQqlUrhIaGYsKECcjOzsbKlSvRunVrBAYG4uLFi1URIxERkVFo6+MMlYUciZn5uJqULXU4ZIIqnbgVFhZi06ZN6Nu3L7y9vfHXX39h8eLFSExMRFRUFLy9vTFw4MCqjJWIiKhGs7JUoJ1v8fBXrC6lJ6FSidvbb7+NOnXq4M0330TDhg1x+vRpHDlyBKNHj4atrS18fHzw2Wef4fLly1UdLxERUY3W9V47N3YLQk+CRWUWunTpEhYtWoSXX34ZKpXK4Dyurq7sNoSIiMxO8fBXETgWm4q8Qg2sLBVSh0QmpFKJW1hY2KNXbGGBrl27Vmb1RERERquhmx1q26uQlJWPU9fvoNO9/t2IqkKlqkoXLFiAlStXlpq+cuVKLFy48LGDIiIiMlYymUw36Px+Dn9FVaxSidvy5csREBBQavpTTz2Fb7/99rGDIiIiMmZdGt7rFuQK27lR1apU4paQkIA6deqUmq5Wq3H79u3HDoqIiMiYlVSPXrqdieQsDn9FVadSiZuXlxcOHTpUavqhQ4fg4eHx2EEREREZM1c7FZ7ycAAAHIpiqRtVnUq9nDBmzBhMnjwZhYWF6NGjB4DiFxamTZuG//znP1UaIBERkTEK9Ffj4q1M7L+ajP4tPaUOh0xEpRK3d955B6mpqRg3bpxufFIrKyu8++67CA0NrdIAiYiIjFEXf1d8uy8aB66mQAgBmUwmdUhkAmTiMQZTy87ORkREBKytreHv719mn27GJjMzE46OjsjIyICDg4PU4RARkRHKL9Kg+Qd/I69Qiz8nBaJxHT5PnjRzeH4/1lildnZ2aNu2LZo2bWoySRsREVFVUFko8Ex9FwDFg84TVYVKVZUCwMmTJ7Fx40bcuHFDV11aYsuWLY8dGBERkbHr4q/G3shkHLiagje6+EkdDpmASpW4rV+/Hh07dkRERAS2bt2KwsJCXLx4Ebt374ajo2NVx0hERGSUSvpzOxabhrxCjcTRkCmoVOL20Ucf4csvv8Tvv/8OpVKJr7/+GpcvX8agQYNQr169qo6RiIjIKPmp7VDH0QoFRVocj02TOhwyAZVK3KKjo/Hcc88BAJRKJXJyciCTyTBlyhR89913VRogERGRsSoe/ureKAps50ZVoFKJW61atZCVlQUA8PT0xIULFwAA6enpyM3NrbroiIiIjFzJuKUHrrIjXnp8lUrcunTpgl27dgEABg4ciEmTJmHMmDEYMmQIevbsWaUBEhERGbNODVwhkwGXE7KQlJkndThk5Cr1VunixYuRl1d88c2YMQOWlpY4fPgwBgwYgJkzZ1ZpgERERMbM2VaJZp6OOHczAweupmBA67pSh0RGrMKJW1FREf744w8EBwcDAORyOaZPn17lgREREZmKQH/Xe4lbMhM3eiwVriq1sLDAW2+9pStxIyIiooe7v52bVlvpAYuIKtfGrV27djhz5kwVh0JERGSaWtWrBRulAqk5Bbh0O1PqcMiIVaqN27hx4zB16lTExcWhdevWsLW11fv+6aefrpLgiIiITIHSQo4O9V0QdjkJB66moKknO6unyqnUIPNyeemCOplMBiEEZDIZNBrj7h3aHAapJSKi6vXDoVi8//sldPRzwboxz0gdjkkyh+d3pUrcYmNjqzoOIiIik9alYXE7t5PX7iC3oAg2ykoPF05mrFJXjbe3d1XHQUREZNJ8XW3h6WSN+PS7OBabhu6NaksdEhmhSiVuP/7440O/Hz58eKWCISIiMlUymQxdGrri5+NxOHAlhYkbVUqlErdJkybp/VxYWIjc3FwolUrY2NgwcSMiIjIg0F9dnLhx3FKqpEp1B3Lnzh29T3Z2NiIjI9G5c2f8/PPPVR0jERGRSejo5wK5DLialI3bGXelDoeMUKUSN0P8/f3x8ccflyqNIyIiomJONko8XdcJAAedp8qpssQNKB5V4datW1W5SiIiIpPSxd8VABM3qpxKtXH77bff9H4WQuD27dtYvHgxOnXqVCWBERERmaLAhmp8szsKB68mQ6sVkMtlUodERqRSiVv//v31fpbJZFCr1ejRowc+//zzqoiLiIjIJLXwcoKdygJ3cgtx4VaGruqUqDwqlbhptdqqjoOIiMgsWCrk6ODngl2XEnHgagoTN6qQKm3jRkRERI9W0s5t/xV2C0IVU6nEbcCAAVi4cGGp6Z988gkGDhz42EERERGZspLhr8Jv3EF2fpHE0ZAxqVTitn//fvTt27fU9D59+mD//v0VWteSJUvg4+MDKysrtG/fHsePH3/o/F999RUaNWoEa2treHl5YcqUKcjLy9N9n5WVhcmTJ8Pb2xvW1tbo2LEjTpw4UaGYiIiIniRvF1vUc7ZBoUbgWEyq1OGQEalU4padnQ2lUllquqWlJTIzM8u9ng0bNmDq1KmYM2cOwsPD0bx5cwQHByMpKcng/OvWrcP06dMxZ84cREREYMWKFdiwYQPee+893TyjR4/Grl278NNPP+H8+fPo1asXgoKCEB8fX/EdJSIiekIC2S0IVUKlErdmzZphw4YNpaavX78eTZo0Kfd6vvjiC4wZMwYhISFo0qQJvv32W9jY2GDlypUG5z98+DA6deqEoUOHwsfHB7169cKQIUN0pXR3797F5s2b8cknn6BLly5o0KAB3n//fTRo0ADLli2rzK4SERE9EYH+xdWl+zn8FVVApd4qnTVrFl5++WVER0ejR48eAICwsDD8/PPP+OWXX8q1joKCApw6dQqhoaG6aXK5HEFBQThy5IjBZTp27Ig1a9bg+PHjaNeuHWJiYrBjxw4MGzYMAFBUVASNRgMrKyu95aytrXHw4MEyY8nPz0d+fr7u54qUGhIREVVGBz8XKOQyxCTn4OadXNStZSN1SGQEKlXi1q9fP2zbtg1RUVEYN24c/vOf/+DmzZv4559/SvXxVpaUlBRoNBq4ubnpTXdzc0NCQoLBZYYOHYq5c+eic+fOsLS0hJ+fH7p166arKrW3t0eHDh0wb9483Lp1CxqNBmvWrMGRI0dw+/btMmNZsGABHB0ddR8vL6/yHQgiIqJKcrS2RAsvJwDAQVaXUjlVujuQ5557DocOHUJOTg5SUlKwe/dudO3atSpjK2Xv3r346KOPsHTpUoSHh2PLli3Yvn075s2bp5vnp59+ghACnp6eUKlU+OabbzBkyBDI5WXvamhoKDIyMnSfuLi4J7ofREREANu5UcVVqqr0xIkT0Gq1aN++vd70Y8eOQaFQoE2bNo9ch6urKxQKBRITE/WmJyYmwt3d3eAys2bNwrBhwzB69GgAxW3tcnJy8MYbb2DGjBmQy+Xw8/PDvn37kJOTg8zMTNSpUweDBw9G/fr1y4xFpVJBpVI9MmYiIqKqFOivxlf/XMXBqBRotAIKDn9Fj1CpErfx48cbLJWKj4/H+PHjy7UOpVKJ1q1bIywsTDdNq9UiLCwMHTp0MLhMbm5uqZIzhUIBoHi81PvZ2tqiTp06uHPnDv766y+8+OKL5YqLiIioujSv6wh7Kwtk3C3EuZvpUodDRqBSJW6XLl1Cq1atSk1v2bIlLl26VO71TJ06FSNGjECbNm3Qrl07fPXVV8jJyUFISAgAYPjw4fD09MSCBQsAFLet++KLL9CyZUu0b98eUVFRmDVrFvr166dL4P766y8IIdCoUSNERUXhnXfeQUBAgG6dRERENYWFQo5Ofq7YeTEBB66moGW9WlKHRDVcpRI3lUqFxMTEUtWPt2/fhoVF+Vc5ePBgJCcnY/bs2UhISECLFi2wc+dO3QsLN27c0CthmzlzJmQyGWbOnIn4+Hio1Wr069cP8+fP182TkZGB0NBQ3Lx5E87OzhgwYADmz58PS0vLyuwqERHRExXYsCRxS8bEnv5Sh0M1nEw8WMdYDkOGDMHt27fx66+/wtHREQCQnp6O/v37o3bt2ti4cWOVB1qdMjMz4ejoiIyMDDg4OEgdDhERmbC4tFwEfrIHCrkMZ2Y/C3srFjRUljk8vyvVxu2zzz5DXFwcvL290b17d3Tv3h2+vr5ISEjA559/XtUxEhERmSwvZxv4utpCoxU4Es3hr+jhKpW4eXp64ty5c/jkk0/QpEkTtG7dGl9//TXOnz/PPtCIiIgqiN2CUHlVqo0bUPzWZufOnVGvXj0UFBQAAP78808AwAsvvFA10REREZmBQH81fjxyHQc4/BU9QqUSt5iYGLz00ks4f/48ZDIZhBCQyf7te0aj0VRZgERERKbumfrOsJDLcC01FzdSc1HPhcNfkWGVqiqdNGkSfH19kZSUBBsbG1y4cAH79u1DmzZtsHfv3ioOkYiIyLTZW1mi1b2uQA5EsdSNylapxO3IkSOYO3cuXF1dIZfLoVAo0LlzZyxYsAATJ06s6hiJiIhMnq6d2xW2c6OyVSpx02g0sLe3B1A8dNWtW7cAAN7e3oiMjKy66IiIiMxEYEM1AOBQdAqKNFqJo6GaqlJt3Jo2bYqzZ8/C19cX7du3xyeffAKlUonvvvvuoWOCEhERkWHNPB3haG2JjLuFOHszA629OYoClVapEreZM2dCqy3+a2Du3LmIjY1FYGAgduzYgW+++aZKAyQiIjIHCrkMnRsUV5fuv8J2bmRYpUrcgoODdf9v0KABLl++jLS0NNSqVUvv7VIiIiIqv0B/V2w/fxsHriZjyrMNpQ6HaqBKlbgZ4uzszKSNiIjoMZS0czsTl46Mu4USR0M1UZUlbkRERPR4PJ2s4ae2hVYAR6L5dimVxsSNiIioBgn0Ly5128/hr8gAJm5EREQ1SJeG/76gIISQOBqqaZi4ERER1SDtfV1gqZDh5p27uJ6aK3U4VMMwcSMiIqpBbFUWuj7cOOg8PYiJGxERUQ3Ddm5UFiZuRERENUyXe4nbkehUFHL4K7oPEzciIqIa5ikPB9SysUR2fhHOxKVLHQ7VIEzciIiIahi5XIbOJdWlHP6K7sPEjYiIqAYK9L/XLQjbudF9mLgRERHVQCWJ27mb6UjPLZA4GqopmLgRERHVQHUcrdHQzQ5CAIeiUqUOh2oIJm5EREQ1VEm3IOzPjUowcSMiIqqhSqpLD1xN4fBXBICJGxERUY3V3tcFSoUc8el3EZOSI3U4VAMwcSMiIqqhrJUKtPW9N/wVuwUhMHEjIiKq0f5t58ZuQYiJGxERUY1W0s7tSEwqCoo4/JW5Y+JGRERUgzV2d4CrnRK5BRqE37gjdTgkMSZuRERENZhcLkPnBiVvl7Kdm7lj4kZERFTDBerGLWU7N3PHxI2IiKiGK2nnduFWBlKz8yWOhqTExI2IiKiGq+1ghQB3++Lhr6I5/JU5Y+JGRERkBLo0vNctCPtzM2tM3IiIiIwAh78igIkbERGRUWjr4wyVhRwJmXmISsqWOhySCBM3IiIiI2BlqUA7X2cAwH6OomC2mLgREREZiS664a/Yzs1cMXEjIiIyEoENi9u5HY1JRX6RRuJoSApM3IiIiIxEIzd7qO1VyCvU4tQ1Dn9ljpi4ERERGQmZTKZ7u5Tt3MwTEzciIiIj0kU3/BXbuZkjJm5ERERGpNO9Aecv3c5EchaHvzI3TNyIiIiMiNpehSZ1HAAAh6JYXWpumLgREREZmZLhr/azWxCzw8SNiIjIyHTh8Fdmi4kbERGRkWntUwtWlnIkZ+UjMjFL6nCoGjFxIyIiMjIqCwWeqe8CADhwhe3czAkTNyIiIiMU6M92buaIiRsREZERKmnndjw2DXmFHP7KXDBxIyIiMkINatvB3cEK+UVanLiWJnU4VE2YuBERERmh+4e/2nE+QeJoqLowcSMiIjJSA1rXBQBsPnUTCRl5EkdD1YGJGxERkZF6pr4L2vk4o0CjxfL90VKHQ9VA8sRtyZIl8PHxgZWVFdq3b4/jx48/dP6vvvoKjRo1grW1Nby8vDBlyhTk5f37V4ZGo8GsWbPg6+sLa2tr+Pn5Yd68eeygkIiITNLbPRsAANYdu8GxS82ApInbhg0bMHXqVMyZMwfh4eFo3rw5goODkZSUZHD+devWYfr06ZgzZw4iIiKwYsUKbNiwAe+9955unoULF2LZsmVYvHgxIiIisHDhQnzyySdYtGhRde0WERFRtencwBUtvJyQX6TF/w7ESB0OPWGSJm5ffPEFxowZg5CQEDRp0gTffvstbGxssHLlSoPzHz58GJ06dcLQoUPh4+ODXr16YciQIXqldIcPH8aLL76I5557Dj4+PnjllVfQq1evR5bkERERGSOZTIaJ90rdfjp6HWk5BRJHRE+SZIlbQUEBTp06haCgoH+DkcsRFBSEI0eOGFymY8eOOHXqlC4Ji4mJwY4dO9C3b1+9ecLCwnDlyhUAwNmzZ3Hw4EH06dOnzFjy8/ORmZmp9yEiIjIW3RvVRlNPB+QWaLDiIEvdTJlkiVtKSgo0Gg3c3Nz0pru5uSEhwfBrzUOHDsXcuXPRuXNnWFpaws/PD926ddOrKp0+fTpeffVVBAQEwNLSEi1btsTkyZPx2muvlRnLggUL4OjoqPt4eXlVzU4SERFVA5lMhgnd/QEAqw9fR0ZuocQR0ZMi+csJFbF371589NFHWLp0KcLDw7FlyxZs374d8+bN082zceNGrF27FuvWrUN4eDhWr16Nzz77DKtXry5zvaGhocjIyNB94uLiqmN3iIiIqkyvJm4IcLdHdn4RVh2OlTocekIspNqwq6srFAoFEhMT9aYnJibC3d3d4DKzZs3CsGHDMHr0aABAs2bNkJOTgzfeeAMzZsyAXC7HO++8oyt1K5nn+vXrWLBgAUaMGGFwvSqVCiqVqgr3joiIqHrJ5TJM6NEAE9adxsqDsXi9sy/srSylDouqmGQlbkqlEq1bt0ZYWJhumlarRVhYGDp06GBwmdzcXMjl+iErFAoA0HX3UdY8Wq22KsMnIiKqcfo0rQM/tS0y84rw45HrUodDT4CkVaVTp07F999/j9WrVyMiIgJjx45FTk4OQkJCAADDhw9HaGiobv5+/fph2bJlWL9+PWJjY7Fr1y7MmjUL/fr10yVw/fr1w/z587F9+3Zcu3YNW7duxRdffIGXXnpJkn0kIiKqLop7pW4AsOJgLHLyiySOiKqaZFWlADB48GAkJydj9uzZSEhIQIsWLbBz507dCws3btzQKz2bOXMmZDIZZs6cifj4eKjVal2iVmLRokWYNWsWxo0bh6SkJHh4eODNN9/E7Nmzq33/iIiIqlu/pz3w1T9XcT01F2uPXccbXfykDomqkExwSIFSMjMz4ejoiIyMDDg4OEgdDhERUYVsPBGHaZvPwdVOhYPvdoeVpULqkKqFOTy/jeqtUiIiInq0l1p5wtPJGinZ+fj5+A2pw6EqxMSNiIjIxFgq5BjXvbiKdPm+GOQXaSSOiKoKEzciIiIT9ErruqjjaIWEzDz8cvKm1OFQFWHiRkREZIJUFgq82aU+AGDZ3mgUatgtlilg4kZERGSiXm1XD652KsSn38XW8Hipw6EqwMSNiIjIRFlZ/lvqtnhPFIpY6mb0mLgRERGZsNeeqQdnWyVupOXit7O3pA6HHhMTNyIiIhNmo7TA6519ARSXumm07L7VmDFxIyIiMnHDO3jD0doSMck52HH+ttTh0GNg4kZERGTi7K0sMarTvVK33VHQstTNaDFxIyIiMgMjO/nAXmWByMQs/H0pUepwqJKYuBEREZkBR2tLjOjoAwBYtPsqOFS5cWLiRkREZCZGdfaFjVKBi7cysftyktThUCUwcSMiIjITzrZKDHvGGwDwze4olroZISZuREREZmR0YH1YWcpxNi4dB66mSB0OVRATNyIiIjOitldhaLviUje2dTM+TNyIiIjMzJtd60NpIceJa3dwNCZN6nCoApi4ERERmRk3BysMbuMFoLjUjYwHEzciIiIz9FY3P1gqZDgcnYqT11jqZiyYuBEREZkhTydrDGhVF0DxG6ZkHJi4ERERmalx3RpAIZdh/5VknIlLlzocKgcmbkRERGaqnosNXmzhAQBYzLZuRoGJGxERkRkb370B5DLgn4gkXLyVIXU49AhM3IiIiMyYn9oOzz9dUurGtm41HRM3IiIiMzehRwMAwJ8XEnAlMUviaOhhmLgRERGZuYZu9ujT1B0AS91qOiZuREREpCt1+/3cLUQnZ0scDZWFiRsRERHhKQ9HBDWuDSGAJXtY6lZTMXEjIiIiAMDbPfwBAL+euYUbqbkSR0OGMHEjIiIiAEBzLyd0baiGRiuwdC9L3WoiJm5ERESkM7FncVu3zeE3EZ9+V+Jo6EFM3IiIiEintbczOvq5oFAj8O3eaKnDoQcwcSMiIiI9JW3dNpyIQ0JGnsTR0P2YuBEREZGeZ+o7o61PLRRotFi+n6VuNQkTNyIiItIjk8l0pW7rjt1Acla+xBFRCSZuREREVEqgvytaeDkhv0iL/x2IkTocuoeJGxEREZUik8l0b5j+dPQ60nIKJI6IACZuREREVIbujWqjqacDcgs0WHkwVupwCEzciIiIqAwymQwTuhe3dVt9+Boy7hZKHBExcSMiIqIy9WrihkZu9sjKL8IPh65JHY7ZY+JGREREZZLLZZjQo7it24qDMcjKY6mblJi4ERER0UP1bVYH9dW2yMwrwo9Hrksdjllj4kZEREQPpZDL8Lau1C0WuQVFEkdkvpi4ERER0SP1e9oD3i42SMspwNqjN6QOx2wxcSMiIqJHslDIMb5bcanb8v0xyCvUSByReWLiRkREROXyUitPeDpZIyU7H+uPs9RNCkzciIiIqFwsFXKM7eYHAPh2Xwzyi1jqVt2YuBEREVG5DWxTF+4OVkjIzMMvJ29KHY7ZYeJGRERE5aayUODNrvUBAMv2RqNQo5U4IvPCxI2IiIgqZEi7enC1UyE+/S62hsdLHY5ZYeJGREREFWJlqcCbXYpL3ZbsjUIRS92qDRM3IiIiqrDXnqkHZ1slrqfm4vdzt6QOx2wwcSMiIqIKs1Fa4PXOvgCAxbujoNEKiSMyD0zciIiIqFKGd/CGo7UlopNz8OeF21KHYxaYuBEREVGl2FtZIqSTDwBgUVgUtCx1e+JqROK2ZMkS+Pj4wMrKCu3bt8fx48cfOv9XX32FRo0awdraGl5eXpgyZQry8vJ03/v4+EAmk5X6jB8//knvChERkVkJ6egLO5UFIhOz8PelRKnDMXmSJ24bNmzA1KlTMWfOHISHh6N58+YIDg5GUlKSwfnXrVuH6dOnY86cOYiIiMCKFSuwYcMGvPfee7p5Tpw4gdu3b+s+u3btAgAMHDiwWvaJiIjIXDjaWGJkRx8AwKLdVyEES92eJMkTty+++AJjxoxBSEgImjRpgm+//RY2NjZYuXKlwfkPHz6MTp06YejQofDx8UGvXr0wZMgQvVI6tVoNd3d33eePP/6An58funbtWl27RUREZDZGdfaFjVKBi7cysSfScMELVQ1JE7eCggKcOnUKQUFBumlyuRxBQUE4cuSIwWU6duyIU6dO6RK1mJgY7NixA3379i1zG2vWrMGoUaMgk8kMzpOfn4/MzEy9DxEREZWPs60Sw57xBgB8ExbFUrcnSNLELSUlBRqNBm5ubnrT3dzckJCQYHCZoUOHYu7cuejcuTMsLS3h5+eHbt266VWV3m/btm1IT0/HyJEjy4xjwYIFcHR01H28vLwqvU9ERETmaHRgfVhZynEmLh0Ho1KkDsdkSV5VWlF79+7FRx99hKVLlyI8PBxbtmzB9u3bMW/ePIPzr1ixAn369IGHh0eZ6wwNDUVGRobuExcX96TCJyIiMklqexWGtKsHAPgmjG3dnhQLKTfu6uoKhUKBxET9t1ASExPh7u5ucJlZs2Zh2LBhGD16NACgWbNmyMnJwRtvvIEZM2ZALv83F71+/Tr++ecfbNmy5aFxqFQqqFSqx9wbIiIi8/ZmFz+sPXoDJ67dwdGYNHTwc5E6JJMjaYmbUqlE69atERYWppum1WoRFhaGDh06GFwmNzdXLzkDAIVCAQClsvtVq1ahdu3aeO6556o4ciIiInqQu6MVBrWtC6D4DVOqepJXlU6dOhXff/89Vq9ejYiICIwdOxY5OTkICQkBAAwfPhyhoaG6+fv164dly5Zh/fr1iI2Nxa5duzBr1iz069dPl8ABxQngqlWrMGLECFhYSFqwSEREZDbGdmsAS4UMh6NTcep6mtThmBzJM5rBgwcjOTkZs2fPRkJCAlq0aIGdO3fqXli4ceOGXgnbzJkzIZPJMHPmTMTHx0OtVqNfv36YP3++3nr/+ecf3LhxA6NGjarW/SEiIjJnnk7WGNCqLtafiMM3YVFYPaqd1CGZFJlg68FSMjMz4ejoiIyMDDg4OEgdDhERkVG5kZqL7p/vhUYr8Ov4Tmju5VQt2zWH57fkVaVERERkWuq52ODFFsW9OSzaHSVxNKaFiRsRERFVufHdG0AmA/6JSMTFWxlSh2MymLgRERFRlfNT2+H5p4tL3Raz1K3KMHEjIiKiJ2JC9wYAgD8vJOBKYpbE0ZgGJm5ERET0RDRyt0efpsUd6rPUrWowcSMiIqInZkKP4lK3P87dQkxytsTRGD8mbkRERPTEPOXhiKDGtaEVwJI90VKHY/SYuBEREdET9XYPfwDAtjPxuJGaK3E0xo2JGxERET1Rzb2c0KWhGhqtwLJ9bOv2OJi4ERER0RM38V5bt02nbiI+/a7E0RgvJm5ERET0xLXxcUaH+i4o1Ah8u5dt3SqLiRsRERFVi4k9i9u6bTgZh8TMPImjMU5M3IiIiKhaPFPfGW19aqGgSIvl+2KkDscoMXEjIiKiaiGTyfB2D3/IZUBuQZHU4RglC6kDICIiIvMR6O+Kfe90h5ezjdShGCWWuBEREVG1kclkTNoeAxM3IiIiIiPBxI2IiIjISDBxIyIiIjISTNyIiIiIjAQTNyIiIiIjwcSNiIiIyEgwcSMiIiIyEkzciIiIiIwEEzciIiIiI8HEjYiIiMhIMHEjIiIiMhJM3IiIiIiMBBM3IiIiIiNhIXUANZEQAgCQmZkpcSRERERUXiXP7ZLnuCli4mZAVlYWAMDLy0viSIiIiKiisrKy4OjoKHUYT4RMmHJaWklarRa3bt2Cvb09ZDJZla47MzMTXl5eiIuLg4ODQ5Wum/7F41w9eJyrB49z9eBxrj5P6lgLIZCVlQUPDw/I5abZGowlbgbI5XLUrVv3iW7DwcGBN4ZqwONcPXicqwePc/Xgca4+T+JYm2pJWwnTTEeJiIiITBATNyIiIiIjwcStmqlUKsyZMwcqlUrqUEwaj3P14HGuHjzO1YPHufrwWFceX04gIiIiMhIscSMiIiIyEkzciIiIiIwEEzciIiIiI8HEjYiIiMhIMHGrRkuWLIGPjw+srKzQvn17HD9+XOqQTM6CBQvQtm1b2Nvbo3bt2ujfvz8iIyOlDsukffzxx5DJZJg8ebLUoZik+Ph4/N///R9cXFxgbW2NZs2a4eTJk1KHZVI0Gg1mzZoFX19fWFtbw8/PD/PmzTPp8S6rw/79+9GvXz94eHhAJpNh27Ztet8LITB79mzUqVMH1tbWCAoKwtWrV6UJ1ogwcasmGzZswNSpUzFnzhyEh4ejefPmCA4ORlJSktShmZR9+/Zh/PjxOHr0KHbt2oXCwkL06tULOTk5Uodmkk6cOIHly5fj6aefljoUk3Tnzh106tQJlpaW+PPPP3Hp0iV8/vnnqFWrltShmZSFCxdi2bJlWLx4MSIiIrBw4UJ88sknWLRokdShGbWcnBw0b94cS5YsMfj9J598gm+++Qbffvstjh07BltbWwQHByMvL6+aIzUygqpFu3btxPjx43U/azQa4eHhIRYsWCBhVKYvKSlJABD79u2TOhSTk5WVJfz9/cWuXbtE165dxaRJk6QOyeS8++67onPnzlKHYfKee+45MWrUKL1pL7/8snjttdckisj0ABBbt27V/azVaoW7u7v49NNPddPS09OFSqUSP//8swQRGg+WuFWDgoICnDp1CkFBQbppcrkcQUFBOHLkiISRmb6MjAwAgLOzs8SRmJ7x48fjueee07uuqWr99ttvaNOmDQYOHIjatWujZcuW+P7776UOy+R07NgRYWFhuHLlCgDg7NmzOHjwIPr06SNxZKYrNjYWCQkJevcPR0dHtG/fns/FR+Ag89UgJSUFGo0Gbm5uetPd3Nxw+fJliaIyfVqtFpMnT0anTp3QtGlTqcMxKevXr0d4eDhOnDghdSgmLSYmBsuWLcPUqVPx3nvv4cSJE5g4cSKUSiVGjBghdXgmY/r06cjMzERAQAAUCgU0Gg3mz5+P1157TerQTFZCQgIAGHwulnxHhjFxI5M1fvx4XLhwAQcPHpQ6FJMSFxeHSZMmYdeuXbCyspI6HJOm1WrRpk0bfPTRRwCAli1b4sKFC/j222+ZuFWhjRs3Yu3atVi3bh2eeuopnDlzBpMnT4aHhwePM9U4rCqtBq6urlAoFEhMTNSbnpiYCHd3d4miMm0TJkzAH3/8gT179qBu3bpSh2NSTp06haSkJLRq1QoWFhawsLDAvn378M0338DCwgIajUbqEE1GnTp10KRJE71pjRs3xo0bNySKyDS98847mD59Ol599VU0a9YMw4YNw5QpU7BgwQKpQzNZJc8+PhcrjolbNVAqlWjdujXCwsJ007RaLcLCwtChQwcJIzM9QghMmDABW7duxe7du+Hr6yt1SCanZ8+eOH/+PM6cOaP7tGnTBq+99hrOnDkDhUIhdYgmo1OnTqW6s7ly5Qq8vb0lisg05ebmQi7XfxwqFApotVqJIjJ9vr6+cHd313suZmZm4tixY3wuPgKrSqvJ1KlTMWLECLRp0wbt2rXDV199hZycHISEhEgdmkkZP3481q1bh19//RX29va6thKOjo6wtraWODrTYG9vX6rNoK2tLVxcXNiWsIpNmTIFHTt2xEcffYRBgwbh+PHj+O677/Ddd99JHZpJ6devH+bPn4969erhqaeewunTp/HFF19g1KhRUodm1LKzsxEVFaX7OTY2FmfOnIGzszPq1auHyZMn48MPP4S/vz98fX0xa9YseHh4oH///tIFbQykfq3VnCxatEjUq1dPKJVK0a5dO3H06FGpQzI5AAx+Vq1aJXVoJo3dgTw5v//+u2jatKlQqVQiICBAfPfdd1KHZHIyMzPFpEmTRL169YSVlZWoX7++mDFjhsjPz5c6NKO2Z88eg/fjESNGCCGKuwSZNWuWcHNzEyqVSvTs2VNERkZKG7QRkAnBrqGJiIiIjAHbuBEREREZCSZuREREREaCiRsRERGRkWDiRkRERGQkmLgRERERGQkmbkRERERGgokbERERkZFg4kZkhrp164bJkyfrfvbx8cFXX30lWTzlce3aNchkMpw5c0bqUKrE+++/jxYtWkgdRqlrgYhqNg55RUQ4ceIEbG1tpQ7joby8vHD79m24urpKHQoRkWSYuBER1Gq11CE8kkKhgLu7u9Rh6CkoKIBSqZQ6DCIyI6wqJTJxOTk5GD58OOzs7FCnTh18/vnnpeZ5sKpUJpNh+fLleP7552FjY4PGjRvjyJEjiIqKQrdu3WBra4uOHTsiOjpabz2//vorWrVqBSsrK9SvXx8ffPABioqK9Nb7v//9Dy+99BJsbGzg7++P3377Tff9nTt38Nprr0GtVsPa2hr+/v5YtWoVAMNVpfv27UO7du2gUqlQp04dTJ8+XW973bp1w8SJEzFt2jQ4OzvD3d0d77//vu57IQTef/991KtXDyqVCh4eHpg4cWKZx7KkevN///sffH19YWVlBQBIT0/H6NGjoVar4eDggB49euDs2bN6y3788cdwc3ODvb09Xn/9deTl5el9b6jKsn///hg5cqTu5/z8fLz77rvw8vKCSqVCgwYNsGLFCt33Fy5cQJ8+fWBnZwc3NzcMGzYMKSkpuu/Lcy0QUc3GxI3IxL3zzjvYt28ffv31V/z999/Yu3cvwsPDH7ncvHnzMHz4cJw5cwYBAQEYOnQo3nzzTYSGhuLkyZMQQmDChAm6+Q8cOIDhw4dj0qRJuHTpEpYvX44ffvgB8+fP11vvBx98gEGDBuHcuXPo27cvXnvtNaSlpQEAZs2ahUuXLuHPP/9EREQEli1bVmbVaHx8PPr27Yu2bdvi7NmzWLZsGVasWIEPP/xQb77Vq1fD1tYWx44dwyeffIK5c+di165dAIDNmzfjyy+/xPLly3H16lVs27YNzZo1e+hxiYqKwubNm7FlyxZdEjlw4EAkJSXhzz//xKlTp9CqVSv07NlTt18bN27E+++/j48++ggnT55EnTp1sHTp0keegwcNHz4cP//8M7755htERERg+fLlsLOzA1CcPPbo0QMtW7bEyZMnsXPnTiQmJmLQoEG65St7LRBRDSLtGPdE9CRlZWUJpVIpNm7cqJuWmpoqrK2txaRJk3TTvL29xZdffqn7GYCYOXOm7ucjR44IAGLFihW6aT///LOwsrLS/dyzZ0/x0Ucf6W3/p59+EnXq1ClzvdnZ2QKA+PPPP4UQQvTr10+EhIQY3JfY2FgBQJw+fVoIIcR7770nGjVqJLRarW6eJUuWCDs7O6HRaIQQQnTt2lV07txZbz1t27YV7777rhBCiM8//1w0bNhQFBQUGNzmg+bMmSMsLS1FUlKSbtqBAweEg4ODyMvL05vXz89PLF++XAghRIcOHcS4ceP0vm/fvr1o3ry57ueuXbvqnRMhhHjxxRfFiBEjhBBCREZGCgBi165dBmObN2+e6NWrl960uLg4AUBERkaW+1ogopqNJW5EJiw6OhoFBQVo3769bpqzszMaNWr0yGWffvpp3f/d3NwAQK80ys3NDXl5ecjMzAQAnD17FnPnzoWdnZ3uM2bMGNy+fRu5ubkG12trawsHBwckJSUBAMaOHYv169ejRYsWmDZtGg4fPlxmfBEREejQoQNkMpluWqdOnZCdnY2bN28a3B4A1KlTR7e9gQMH4u7du6hfvz7GjBmDrVu36lW1GuLt7a3XJvDs2bPIzs6Gi4uL3r7HxsbqqpIjIiL0zgEAdOjQ4aHbedCZM2egUCjQtWtXg9+fPXsWe/bs0YshICAAQPF18DjXAhHVHHw5gYgMsrS01P2/JDkyNE2r1QIAsrOz8cEHH+Dll18uta6StmAPrqNkPSXr6NOnD65fv44dO3Zg165d6NmzJ8aPH4/PPvusSvbjwe15eXkhMjIS//zzD3bt2oVx48bh008/xb59+0otV+LBt2+zs7NRp04d7N27t9S8Tk5O5Y5TLpdDCKE3rbCwUPd/a2vrhy6fnZ2Nfv36YeHChaW+q1OnDqKiosodCxHVXCxxIzJhfn5+sLS0xLFjx3TT7ty5gytXrlT5tlq1aoXIyEg0aNCg1EcuL/+tRq1WY8SIEVizZg2++uorfPfddwbnK3lh4v5k59ChQ7C3t0fdunXLvT1ra2v069cP33zzDfbu3YsjR47g/Pnz5V6+VatWSEhIgIWFRan9Lmmf17hxY71zAABHjx7V+1mtVuP27du6nzUaDS5cuKD7uVmzZtBqtdi3b1+ZcVy8eBE+Pj6l4rC1ta3Wa4GInhwmbkQmzM7ODq+//jreeecd7N69GxcuXMDIkSMrlEiV1+zZs/Hjjz/igw8+wMWLFxEREYH169dj5syZFVrHr7/+iqioKFy8eBF//PEHGjdubHDecePGIS4uDm+//TYuX76MX3/9FXPmzMHUqVPLvX8//PADVqxYgQsXLiAmJgZr1qyBtbU1vL29yx1zUFAQOnTogP79++Pvv//GtWvXcPjwYcyYMQMnT54EAEyaNAkrV67EqlWrcOXKFcyZMwcXL17UW0+PHj2wfft2bN++HZcvX8bYsWORnp6u+97HxwcjRozAqFGjsG3bNsTGxmLv3r3YuHEjAGD8+PFIS0vDkCFDcOLECURHR+Ovv/5CSEgINBpNtV4LRPTksKqUyMR9+umnumo0e3t7/Oc//0FGRkaVbyc4OBh//PEH5s6di4ULF8LS0hIBAQEYPXp0udehVCoRGhqKa9euwdraGoGBgVi/fr3BeT09PbFjxw688847aN68OZydnfH6669XKFF0cnLCxx9/jKlTp0Kj0aBZs2b4/fff4eLiUu51yGQy7NixAzNmzEBISAiSk5Ph7u6OLl266NoGDh48GNHR0Zg2bRry8vIwYMAAjB07Fn/99ZduPaNGjcLZs2cxfPhwWFhYYMqUKejevbvetpYtW4b33nsP48aNQ2pqKurVq4f33nsPAODh4YFDhw7h3XffRa9evZCfnw9vb2/07t1bl5xV17VARE+OTDzYqIKIiIiIaiSWkRMREREZCSZuREREREaCiRsRERGRkWDiRkRERGQkmLgRERERGQkmbkRERERGgokbERERkZFg4kZERERkJJi4ERERERkJJm5ERERERoKJGxEREZGRYOJGREREZCT+HxAw0pgsenzMAAAAAElFTkSuQmCC\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "As can be seen, the accuracy of the model drops off once PCA drops 9 dimensions. i.e. the accuracy drops off when PCA projects the data to 3 dimensions or less" + ], + "metadata": { + "id": "eDLdI9APnJzQ" + } + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "0GqOmTCkonxi" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file