diff --git a/docs/_static/envs/img_push_box_env_1_mover.png b/docs/_static/envs/img_push_box_env_1_mover.png index f5ba988..95f95a6 100644 Binary files a/docs/_static/envs/img_push_box_env_1_mover.png and b/docs/_static/envs/img_push_box_env_1_mover.png differ diff --git a/docs/_static/envs/img_push_l_env_1_mover.png b/docs/_static/envs/img_push_l_env_1_mover.png new file mode 100644 index 0000000..623e4a3 Binary files /dev/null and b/docs/_static/envs/img_push_l_env_1_mover.png differ diff --git a/docs/_static/envs/img_push_t_env_1_mover.png b/docs/_static/envs/img_push_t_env_1_mover.png index dd68834..5f53a8f 100644 Binary files a/docs/_static/envs/img_push_t_env_1_mover.png and b/docs/_static/envs/img_push_t_env_1_mover.png differ diff --git a/docs/_static/envs/img_push_x_env_1_mover.png b/docs/_static/envs/img_push_x_env_1_mover.png new file mode 100644 index 0000000..f7faeab Binary files /dev/null and b/docs/_static/envs/img_push_x_env_1_mover.png differ diff --git a/docs/_static/envs/img_state_based_pusing_env_with_static_obstacles.png b/docs/_static/envs/img_state_based_pusing_env_with_static_obstacles.png index 2115932..9a5c129 100644 Binary files a/docs/_static/envs/img_state_based_pusing_env_with_static_obstacles.png and b/docs/_static/envs/img_state_based_pusing_env_with_static_obstacles.png differ diff --git a/docs/environments.rst b/docs/environments.rst index 84b072a..ca5e684 100644 --- a/docs/environments.rst +++ b/docs/environments.rst @@ -17,6 +17,8 @@ A detailed documentation of all environments can be found in the following subse environments/state_based_global_pushing_env environments/state_based_push_t_env + environments/state_based_push_x_env + environments/state_based_push_l_env environments/state_based_push_box_env environments/state_based_static_obstacle_pushing_env diff --git a/docs/environments/long_horizon_global_trajectory_planning_env.rst b/docs/environments/long_horizon_global_trajectory_planning_env.rst index a76d2ef..c6b50e8 100644 --- a/docs/environments/long_horizon_global_trajectory_planning_env.rst +++ b/docs/environments/long_horizon_global_trajectory_planning_env.rst @@ -136,6 +136,8 @@ To use the example, please install Stable-Baselines3 as described in the from stable_baselines3 import SAC, HerReplayBuffer import magbotsim + gym.register_envs(magbotsim) + render_mode = None mover_params = {'size': np.array([0.113 / 2, 0.113 / 2, 0.012 / 2]), 'mass': 0.628} collision_params = {'shape': 'box', 'size': np.array([0.113 / 2 + 1e-6, 0.113 / 2 + 1e-6]), 'offset': 0.0, 'offset_wall': 0.0} diff --git a/docs/environments/state_based_global_pushing_env.rst b/docs/environments/state_based_global_pushing_env.rst index e82271d..7f00cff 100644 --- a/docs/environments/state_based_global_pushing_env.rst +++ b/docs/environments/state_based_global_pushing_env.rst @@ -177,6 +177,8 @@ described in the `documentation `_. To use the example, please install Stable-Baselines3 as +described in the `documentation `_. + +.. note:: + This is a simplified example that is not guaranteed to converge, as the default parameters are used. However, it is important to note that + the parameter ``copy_info_dict`` is set to ``True``. This way, it is not necessary to check for collision again to compute the reward when a + transition is relabeled by HER, since the information is already available in the ``info``-dict. + + +.. code-block:: python + + import numpy as np + import gymnasium as gym + from stable_baselines3 import SAC, HerReplayBuffer + import magbotsim + + gym.register_envs(magbotsim) + + render_mode = None + mover_params = {'size': np.array([0.155 / 2, 0.155 / 2, 0.012 / 2]), 'mass': 1.24} + collision_params = {'shape': 'box', 'size': np.array([0.155 / 2 + 1e-6, 0.155 / 2 + 1e-6]), 'offset': 0.0, 'offset_wall': 0.0} + env_params = {'mover_params': mover_params, 'collision_params': collision_params, 'render_mode': render_mode} + + env = gym.make('StateBasedPushLEnv-v0', **env_params) + # copy_info_dict=True, as information about collisions is stored in the info dictionary to avoid + # computationally expensive collision checking calculations when the data is relabeled (HER) + model = SAC( + policy='MultiInputPolicy', + env=env, + replay_buffer_class=HerReplayBuffer, + replay_buffer_kwargs={'copy_info_dict': True}, + verbose=1 + ) + model.learn(total_timesteps=int(1e6)) + +Version History +--------------- +- v0: initial version of the environment + +Parameters +---------- +.. automodule:: magbotsim.rl_envs.object_manipulation.pushing.state_based_push_l_env + :members: + :no-index: + :show-inheritance: diff --git a/docs/environments/state_based_push_t_env.rst b/docs/environments/state_based_push_t_env.rst index 05681cb..b5441cc 100644 --- a/docs/environments/state_based_push_t_env.rst +++ b/docs/environments/state_based_push_t_env.rst @@ -37,6 +37,8 @@ described in the `documentation `_. To use the example, please install Stable-Baselines3 as +described in the `documentation `_. + +.. note:: + This is a simplified example that is not guaranteed to converge, as the default parameters are used. However, it is important to note that + the parameter ``copy_info_dict`` is set to ``True``. This way, it is not necessary to check for collision again to compute the reward when a + transition is relabeled by HER, since the information is already available in the ``info``-dict. + + +.. code-block:: python + + import numpy as np + import gymnasium as gym + from stable_baselines3 import SAC, HerReplayBuffer + import magbotsim + + gym.register_envs(magbotsim) + + render_mode = None + mover_params = {'size': np.array([0.155 / 2, 0.155 / 2, 0.012 / 2]), 'mass': 1.24} + collision_params = {'shape': 'box', 'size': np.array([0.155 / 2 + 1e-6, 0.155 / 2 + 1e-6]), 'offset': 0.0, 'offset_wall': 0.0} + env_params = {'mover_params': mover_params, 'collision_params': collision_params, 'render_mode': render_mode} + + env = gym.make('StateBasedPushXEnv-v0', **env_params) + # copy_info_dict=True, as information about collisions is stored in the info dictionary to avoid + # computationally expensive collision checking calculations when the data is relabeled (HER) + model = SAC( + policy='MultiInputPolicy', + env=env, + replay_buffer_class=HerReplayBuffer, + replay_buffer_kwargs={'copy_info_dict': True}, + verbose=1 + ) + model.learn(total_timesteps=int(1e6)) + +Version History +--------------- +- v0: initial version of the environment + +Parameters +---------- +.. automodule:: magbotsim.rl_envs.object_manipulation.pushing.state_based_push_x_env + :members: + :no-index: + :show-inheritance: diff --git a/docs/environments/state_based_static_obstacle_pushing_env.rst b/docs/environments/state_based_static_obstacle_pushing_env.rst index 4aa89f6..80346f4 100644 --- a/docs/environments/state_based_static_obstacle_pushing_env.rst +++ b/docs/environments/state_based_static_obstacle_pushing_env.rst @@ -129,6 +129,8 @@ described in the `documentation