Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified envs/__pycache__/simple_biped_env.cpython-312.pyc
Binary file not shown.
Binary file added envs/__pycache__/simple_biped_env.cpython-313.pyc
Binary file not shown.
52 changes: 33 additions & 19 deletions envs/simple_biped_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,50 @@ def __init__(self, render=False, max_steps=10000):
super(simpleBipedEnv, self).__init__()
self.render_mode = render
self.physicsClient = p.connect(p.GUI if render else p.DIRECT)
self.robot_id = p.loadURDF("walkers/simple_biped.urdf", [0, 0, 1.0])
p.setAdditionalSearchPath(pybullet_data.getDataPath())
self.robot_id = p.loadURDF("walkers/simple_biped.urdf", [0, 0, 1.0])
self.plane_id = p.loadURDF("plane.urdf")

# Find foot link indices (adjust names to match your URDF!)
self.left_foot = 1 # replace with actual index
self.right_foot = 3 # replace with actual index

num_joints = 4 # left_hip, right_hip, left_knee, right_knee
num_joints = 4
self.action_space = spaces.Box(low=-1, high=1, shape=(num_joints,), dtype=np.float32)
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2*num_joints + 6,), dtype=np.float32)
self.max_steps = max_steps
self.current_step = 0
self.last_action = None
self.off_ground_counter = 0 # track consecutive steps with no contacts

def _feet_in_contact(self):
left_contacts = p.getContactPoints(self.robot_id, self.plane_id, linkIndexA=self.left_foot)
right_contacts = p.getContactPoints(self.robot_id, self.plane_id, linkIndexA=self.right_foot)
return len(left_contacts) > 0, len(right_contacts) > 0

def _compute_reward(self):
base_pos, _ = p.getBasePositionAndOrientation(self.robot_id)
forward_reward = base_pos[0]
alive_bonus = 1 if not self._check_termination() else -500

# --- Penalize if both feet off ground for too long ---
left_contact, right_contact = self._feet_in_contact()
if not left_contact and not right_contact:
self.off_ground_counter += 1
else:
self.off_ground_counter = 0

airborne_penalty = -10 if self.off_ground_counter > 500 else 0 # tune threshold & penalty

return forward_reward + alive_bonus + airborne_penalty



def reset(self, seed=None, options=None):
p.resetBasePositionAndOrientation(self.robot_id, [0,0,1], [0,0,0,1])
num_joints = p.getNumJoints(self.robot_id)
for j in range(num_joints):
p.resetJointState(self.robot_id, j, targetValue=0.0, targetVelocity=0.0)
p.setGravity(0, 0, -9.8)
plane_id = p.loadURDF("plane.urdf")
base_pos = p.getBasePositionAndOrientation(self.robot_id)[0]
Expand All @@ -45,7 +75,7 @@ def step(self, action):
current_positions = np.array([state[0] for state in joint_states], dtype=np.float32)

# Apply relative change to joint angles
delta = action * 0.05 # scale step size as needed
delta = action * 0.2 # scale step size as needed
target_positions = current_positions + delta

p.setJointMotorControlArray(
Expand Down Expand Up @@ -78,22 +108,6 @@ def _check_termination(self):
base_pos = p.getBasePositionAndOrientation(self.robot_id)[0]
return base_pos[2] < 0.2 or base_pos[2] > 1.5 # fallen

def _compute_reward(self):
base_pos, _ = p.getBasePositionAndOrientation(self.robot_id)
base_vel, _ = p.getBaseVelocity(self.robot_id)

forward_reward = base_vel[0] # reward x velocity
if not self._check_termination():
alive_bonus = 1
else:
alive_bonus = -500
torque_penalty = 0.001 * np.sum(np.square(self.last_action))

#print(forward_reward, alive_bonus, torque_penalty)

return forward_reward + alive_bonus - torque_penalty


def render(self):
if self.render_mode:
if hasattr(self, 'robot_id'):
Expand Down
17 changes: 12 additions & 5 deletions envs/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,21 @@
max_episode_steps=10000,
)

def train():
def train_PPO():
LOG_DIR = "./logs/"
env = gym.make('simpleBiped-v0', render=False)
model = DDPG("MlpPolicy", env, verbose=1, device="auto", tensorboard_log=LOG_DIR)
model.learn(total_timesteps=10000000)
model = PPO("MlpPolicy", env, verbose=1, learning_rate=0.001, device="auto", tensorboard_log=LOG_DIR)
model.learn(total_timesteps=500000)
model.save("../models/humanoid_ppo")
env.close()


def train_DDPG():
LOG_DIR = "./logs/"
env = gym.make('simpleBiped-v0', render_mode=True)
model = DDPG("MlpPolicy", env, verbose=1, device="auto", tensorboard_log=LOG_DIR)
model.learn(total_timesteps=10000000)
model.save("../models/humanoid_ddpg")
env.close()

def run():
env = DummyVecEnv([lambda: simpleBipedEnv(render=True)])
Expand Down Expand Up @@ -58,6 +64,7 @@ def test():
print(f"Test episode finished. Total reward: {total_reward}")
env.close()

train()
train_PPO()
#train_DDPG()
#run()
#test()
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ gymnasium==0.29.1
pybullet==3.2.5
stable-baselines3==2.2.1
numpy>=1.24.0
matplotlib>=3.7.0
matplotlib>=3.7.0
tensorboard==2.20.0