Skip to content

Commit 7752cb3

Browse files
committed
environment changed to accomodate robot, modularised training and running model
1 parent 16d3218 commit 7752cb3

File tree

6 files changed

+126
-79
lines changed

6 files changed

+126
-79
lines changed
150 Bytes
Binary file not shown.
6.69 KB
Binary file not shown.

envs/simple_biped_env.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import gymnasium as gym
2+
from gymnasium import spaces
3+
import pybullet as p
4+
import pybullet_data
5+
import numpy as np
6+
7+
class simpleBipedEnv(gym.Env):
8+
def __init__(self, render=False, max_steps=1000):
9+
super(simpleBipedEnv, self).__init__()
10+
self.render_mode = render
11+
self.physicsClient = p.connect(p.GUI if render else p.DIRECT)
12+
p.setAdditionalSearchPath(pybullet_data.getDataPath())
13+
14+
num_joints = 2 # left_hip, right_hip
15+
self.action_space = spaces.Box(low=-1, high=1, shape=(num_joints,), dtype=np.float32)
16+
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2*num_joints + 6,), dtype=np.float32)
17+
self.max_steps = max_steps
18+
self.current_step = 0
19+
self.last_action = None
20+
21+
22+
23+
def reset(self, seed=None, options=None):
24+
p.resetSimulation()
25+
p.setGravity(0, 0, -9.8)
26+
plane_id = p.loadURDF("plane.urdf")
27+
self.robot_id = p.loadURDF("walkers/simple_biped.urdf", [0, 0, 1.0])
28+
base_pos = p.getBasePositionAndOrientation(self.robot_id)[0]
29+
print(f"Robot spawned at position: {base_pos}")
30+
self.current_step = 0
31+
return self._get_obs(), {}
32+
33+
def step(self, action):
34+
num_joints = p.getNumJoints(self.robot_id)
35+
if np.isscalar(action) or (isinstance(action, np.ndarray) and action.shape == (1,)):
36+
action = np.full((num_joints,), action if np.isscalar(action) else action[0], dtype=np.float32)
37+
else:
38+
action = np.asarray(action, dtype=np.float32)
39+
if action.shape[0] != num_joints:
40+
raise ValueError(f"Action shape {action.shape} does not match number of joints {num_joints}")
41+
42+
self.last_action = action
43+
44+
p.setJointMotorControlArray(
45+
bodyUniqueId=self.robot_id,
46+
jointIndices=list(range(num_joints)),
47+
controlMode=p.TORQUE_CONTROL,
48+
forces=action.tolist()
49+
)
50+
p.stepSimulation()
51+
obs = self._get_obs()
52+
reward = self._compute_reward()
53+
done = self._check_termination()
54+
55+
self.current_step += 1
56+
truncated = self.current_step >= self.max_steps
57+
58+
return obs, reward, done, truncated, {}
59+
60+
def _get_obs(self):
61+
joint_states = p.getJointStates(self.robot_id, range(p.getNumJoints(self.robot_id)))
62+
joint_positions = [state[0] for state in joint_states]
63+
joint_velocities = [state[1] for state in joint_states]
64+
base_pos, base_ori = p.getBasePositionAndOrientation(self.robot_id)
65+
base_vel, base_ang_vel = p.getBaseVelocity(self.robot_id)
66+
return np.array(joint_positions + joint_velocities + list(base_pos) + list(base_vel), dtype=np.float32)
67+
68+
69+
def _check_termination(self):
70+
base_pos = p.getBasePositionAndOrientation(self.robot_id)[0]
71+
return base_pos[2] < 0.5 # fallen
72+
73+
def _compute_reward(self):
74+
base_pos, _ = p.getBasePositionAndOrientation(self.robot_id)
75+
base_vel, _ = p.getBaseVelocity(self.robot_id)
76+
77+
forward_reward = base_vel[0] # reward x velocity
78+
alive_bonus = 0.5 # encourage staying up
79+
torque_penalty = 0.001 * np.sum(np.square(self.last_action))
80+
81+
return forward_reward + alive_bonus - torque_penalty
82+
83+
84+
def render(self):
85+
if self.render_mode:
86+
if hasattr(self, 'robot_id'):
87+
base_pos = p.getBasePositionAndOrientation(self.robot_id)[0]
88+
p.resetDebugVisualizerCamera(
89+
cameraDistance=2.0,
90+
cameraYaw=45,
91+
cameraPitch=-30,
92+
cameraTargetPosition=base_pos
93+
)
94+
else:
95+
pass
96+
97+
def close(self):
98+
p.disconnect()

envs/walker_env.py

Lines changed: 0 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,68 +0,0 @@
1-
import gymnasium as gym
2-
from gymnasium import spaces
3-
import pybullet as p
4-
import pybullet_data
5-
import numpy as np
6-
7-
class WalkerEnv(gym.Env):
8-
def __init__(self, render=False):
9-
super(WalkerEnv, self).__init__()
10-
self.render_mode = render
11-
self.physicsClient = p.connect(p.GUI if render else p.DIRECT)
12-
p.setAdditionalSearchPath(pybullet_data.getDataPath())
13-
14-
self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32)
15-
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float32)
16-
17-
def reset(self, seed=None, options=None):
18-
p.resetSimulation()
19-
p.setGravity(0, 0, -9.8)
20-
plane_id = p.loadURDF("plane.urdf")
21-
self.robot_id = p.loadURDF("walkers/simple_biped.urdf", [0, 0, 1.0])
22-
base_pos = p.getBasePositionAndOrientation(self.robot_id)[0]
23-
print(f"Robot spawned at position: {base_pos}")
24-
return self._get_obs(), {}
25-
26-
def step(self, action):
27-
# apply motor torques or joint control here
28-
# p.setJointMotorControlArray(...)
29-
p.stepSimulation()
30-
obs = self._get_obs()
31-
reward = self._compute_reward()
32-
done = self._check_termination()
33-
return obs, reward, done, False, {}
34-
35-
def _get_obs(self):
36-
joint_states = p.getJointStates(self.robot_id, range(p.getNumJoints(self.robot_id)))
37-
joint_positions = [state[0] for state in joint_states]
38-
joint_velocities = [state[1] for state in joint_states]
39-
# For simple_biped.urdf, only one joint: hip_joint
40-
# So obs = [position, velocity], shape=(2,)
41-
return np.array(joint_positions + joint_velocities, dtype=np.float32)
42-
43-
def _compute_reward(self):
44-
# reward forward movement, penalize falls
45-
base_pos = p.getBasePositionAndOrientation(self.robot_id)[0]
46-
return base_pos[0] # reward forward x-motion
47-
48-
def _check_termination(self):
49-
base_pos = p.getBasePositionAndOrientation(self.robot_id)[0]
50-
return base_pos[2] < 0.5 # fallen
51-
52-
def render(self):
53-
if self.render_mode:
54-
# Focus camera on robot's current position
55-
if hasattr(self, 'robot_id'):
56-
base_pos = p.getBasePositionAndOrientation(self.robot_id)[0]
57-
p.resetDebugVisualizerCamera(
58-
cameraDistance=2.0,
59-
cameraYaw=45,
60-
cameraPitch=-30,
61-
cameraTargetPosition=base_pos
62-
)
63-
# Rendering handled by PyBullet GUI
64-
else:
65-
pass # No rendering in DIRECT mode
66-
67-
def close(self):
68-
p.disconnect()

models/humanoid_ppo.zip

2.33 KB
Binary file not shown.

models/test.py

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,37 @@
22
import os
33
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
44
from stable_baselines3 import PPO
5-
from envs.walker_env import WalkerEnv
5+
from envs.simple_biped_env import simpleBipedEnv
66
from stable_baselines3.common.vec_env import DummyVecEnv
77

8-
env = DummyVecEnv([lambda: WalkerEnv(render=False)])
9-
model = PPO("MlpPolicy", env, verbose=1)
10-
model.learn(total_timesteps=5000)
11-
model.save("models/humanoid_ppo")
8+
def train():
9+
env = DummyVecEnv([lambda: simpleBipedEnv(render=False)])
10+
model = PPO("MlpPolicy", env, verbose=1)
11+
model.learn(total_timesteps=5000)
12+
model.save("models/humanoid_ppo")
13+
env.close()
1214

15+
def run():
16+
env = DummyVecEnv([lambda: simpleBipedEnv(render=True)])
17+
model = PPO.load("models/humanoid_ppo", env=env)
1318

14-
env = WalkerEnv(render=True)
15-
obs, _ = env.reset()
16-
done = False
17-
while not done:
18-
action, _ = model.predict(obs)
19-
obs, reward, done, _, _ = env.step(action)
19+
result = env.reset()
20+
if isinstance(result, tuple):
21+
obs, info = result
22+
else:
23+
obs = result
24+
info = {}
25+
26+
done = False
27+
total_reward = 0.0
28+
while not done:
29+
action, _ = model.predict(obs)
30+
obs, reward, done, _ = env.step(action)
31+
total_reward += reward
32+
print(f"Episode finished. Total reward: {total_reward}")
33+
env.close()
34+
35+
train()
36+
#run()
2037

2138
input("Press Enter to exit and close the visualization window...")

0 commit comments

Comments
 (0)