diff --git a/setup.py b/setup.py index 1c2c85af6..1aa407456 100644 --- a/setup.py +++ b/setup.py @@ -203,7 +203,7 @@ def get_local_version(version: "ScmVersion", time_format="%Y%m%d") -> str: "rich", "scikit-learn>=0.21.2", "seals~=0.2.1", - "stable-baselines3~=2.0", + "stable-baselines3~=2.2.1", "sacred>=0.8.4", "tensorboard>=1.14", "huggingface_sb3~=3.0", diff --git a/tests/algorithms/conftest.py b/tests/algorithms/conftest.py index a453f047d..4201a26ed 100644 --- a/tests/algorithms/conftest.py +++ b/tests/algorithms/conftest.py @@ -1,7 +1,6 @@ """Fixtures common across algorithm tests.""" from typing import Sequence -import gymnasium as gym import pytest from stable_baselines3.common import envs from stable_baselines3.common.policies import BasePolicy @@ -113,20 +112,10 @@ def pendulum_single_venv(rng) -> VecEnv: ) -# TODO(GH#794): Remove after https://github.com/DLR-RM/stable-baselines3/pull/1676 -# merged and released. -class FloatReward(gym.RewardWrapper): - """Typecasts reward to a float.""" - - def reward(self, reward): - return float(reward) - - @pytest.fixture def multi_obs_venv() -> VecEnv: def make_env(): env = envs.SimpleMultiObsEnv(channel_last=False) - env = FloatReward(env) return RolloutInfoWrapper(env) return DummyVecEnv([make_env, make_env])