diff --git a/docs/_static/envs/img_push_box_env_1_mover.png b/docs/_static/envs/img_push_box_env_1_mover.png
index f5ba988..95f95a6 100644
Binary files a/docs/_static/envs/img_push_box_env_1_mover.png and b/docs/_static/envs/img_push_box_env_1_mover.png differ
diff --git a/docs/_static/envs/img_push_l_env_1_mover.png b/docs/_static/envs/img_push_l_env_1_mover.png
new file mode 100644
index 0000000..623e4a3
Binary files /dev/null and b/docs/_static/envs/img_push_l_env_1_mover.png differ
diff --git a/docs/_static/envs/img_push_t_env_1_mover.png b/docs/_static/envs/img_push_t_env_1_mover.png
index dd68834..5f53a8f 100644
Binary files a/docs/_static/envs/img_push_t_env_1_mover.png and b/docs/_static/envs/img_push_t_env_1_mover.png differ
diff --git a/docs/_static/envs/img_push_x_env_1_mover.png b/docs/_static/envs/img_push_x_env_1_mover.png
new file mode 100644
index 0000000..f7faeab
Binary files /dev/null and b/docs/_static/envs/img_push_x_env_1_mover.png differ
diff --git a/docs/_static/envs/img_state_based_pusing_env_with_static_obstacles.png b/docs/_static/envs/img_state_based_pusing_env_with_static_obstacles.png
index 2115932..9a5c129 100644
Binary files a/docs/_static/envs/img_state_based_pusing_env_with_static_obstacles.png and b/docs/_static/envs/img_state_based_pusing_env_with_static_obstacles.png differ
diff --git a/docs/environments.rst b/docs/environments.rst
index 84b072a..ca5e684 100644
--- a/docs/environments.rst
+++ b/docs/environments.rst
@@ -17,6 +17,8 @@ A detailed documentation of all environments can be found in the following subse
environments/state_based_global_pushing_env
environments/state_based_push_t_env
+ environments/state_based_push_x_env
+ environments/state_based_push_l_env
environments/state_based_push_box_env
environments/state_based_static_obstacle_pushing_env
diff --git a/docs/environments/long_horizon_global_trajectory_planning_env.rst b/docs/environments/long_horizon_global_trajectory_planning_env.rst
index a76d2ef..c6b50e8 100644
--- a/docs/environments/long_horizon_global_trajectory_planning_env.rst
+++ b/docs/environments/long_horizon_global_trajectory_planning_env.rst
@@ -136,6 +136,8 @@ To use the example, please install Stable-Baselines3 as described in the
from stable_baselines3 import SAC, HerReplayBuffer
import magbotsim
+ gym.register_envs(magbotsim)
+
render_mode = None
mover_params = {'size': np.array([0.113 / 2, 0.113 / 2, 0.012 / 2]), 'mass': 0.628}
collision_params = {'shape': 'box', 'size': np.array([0.113 / 2 + 1e-6, 0.113 / 2 + 1e-6]), 'offset': 0.0, 'offset_wall': 0.0}
diff --git a/docs/environments/state_based_global_pushing_env.rst b/docs/environments/state_based_global_pushing_env.rst
index e82271d..7f00cff 100644
--- a/docs/environments/state_based_global_pushing_env.rst
+++ b/docs/environments/state_based_global_pushing_env.rst
@@ -177,6 +177,8 @@ described in the `documentation `_. To use the example, please install Stable-Baselines3 as
+described in the `documentation `_.
+
+.. note::
+ This is a simplified example that is not guaranteed to converge, as the default parameters are used. However, it is important to note that
+ the parameter ``copy_info_dict`` is set to ``True``. This way, it is not necessary to check for collision again to compute the reward when a
+ transition is relabeled by HER, since the information is already available in the ``info``-dict.
+
+
+.. code-block:: python
+
+ import numpy as np
+ import gymnasium as gym
+ from stable_baselines3 import SAC, HerReplayBuffer
+ import magbotsim
+
+ gym.register_envs(magbotsim)
+
+ render_mode = None
+ mover_params = {'size': np.array([0.155 / 2, 0.155 / 2, 0.012 / 2]), 'mass': 1.24}
+ collision_params = {'shape': 'box', 'size': np.array([0.155 / 2 + 1e-6, 0.155 / 2 + 1e-6]), 'offset': 0.0, 'offset_wall': 0.0}
+ env_params = {'mover_params': mover_params, 'collision_params': collision_params, 'render_mode': render_mode}
+
+ env = gym.make('StateBasedPushLEnv-v0', **env_params)
+ # copy_info_dict=True, as information about collisions is stored in the info dictionary to avoid
+ # computationally expensive collision checking calculations when the data is relabeled (HER)
+ model = SAC(
+ policy='MultiInputPolicy',
+ env=env,
+ replay_buffer_class=HerReplayBuffer,
+ replay_buffer_kwargs={'copy_info_dict': True},
+ verbose=1
+ )
+ model.learn(total_timesteps=int(1e6))
+
+Version History
+---------------
+- v0: initial version of the environment
+
+Parameters
+----------
+.. automodule:: magbotsim.rl_envs.object_manipulation.pushing.state_based_push_l_env
+ :members:
+ :no-index:
+ :show-inheritance:
diff --git a/docs/environments/state_based_push_t_env.rst b/docs/environments/state_based_push_t_env.rst
index 05681cb..b5441cc 100644
--- a/docs/environments/state_based_push_t_env.rst
+++ b/docs/environments/state_based_push_t_env.rst
@@ -37,6 +37,8 @@ described in the `documentation `_. To use the example, please install Stable-Baselines3 as
+described in the `documentation `_.
+
+.. note::
+ This is a simplified example that is not guaranteed to converge, as the default parameters are used. However, it is important to note that
+ the parameter ``copy_info_dict`` is set to ``True``. This way, it is not necessary to check for collision again to compute the reward when a
+ transition is relabeled by HER, since the information is already available in the ``info``-dict.
+
+
+.. code-block:: python
+
+ import numpy as np
+ import gymnasium as gym
+ from stable_baselines3 import SAC, HerReplayBuffer
+ import magbotsim
+
+ gym.register_envs(magbotsim)
+
+ render_mode = None
+ mover_params = {'size': np.array([0.155 / 2, 0.155 / 2, 0.012 / 2]), 'mass': 1.24}
+ collision_params = {'shape': 'box', 'size': np.array([0.155 / 2 + 1e-6, 0.155 / 2 + 1e-6]), 'offset': 0.0, 'offset_wall': 0.0}
+ env_params = {'mover_params': mover_params, 'collision_params': collision_params, 'render_mode': render_mode}
+
+ env = gym.make('StateBasedPushXEnv-v0', **env_params)
+ # copy_info_dict=True, as information about collisions is stored in the info dictionary to avoid
+ # computationally expensive collision checking calculations when the data is relabeled (HER)
+ model = SAC(
+ policy='MultiInputPolicy',
+ env=env,
+ replay_buffer_class=HerReplayBuffer,
+ replay_buffer_kwargs={'copy_info_dict': True},
+ verbose=1
+ )
+ model.learn(total_timesteps=int(1e6))
+
+Version History
+---------------
+- v0: initial version of the environment
+
+Parameters
+----------
+.. automodule:: magbotsim.rl_envs.object_manipulation.pushing.state_based_push_x_env
+ :members:
+ :no-index:
+ :show-inheritance:
diff --git a/docs/environments/state_based_static_obstacle_pushing_env.rst b/docs/environments/state_based_static_obstacle_pushing_env.rst
index 4aa89f6..80346f4 100644
--- a/docs/environments/state_based_static_obstacle_pushing_env.rst
+++ b/docs/environments/state_based_static_obstacle_pushing_env.rst
@@ -129,6 +129,8 @@ described in the `documentation