Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added envs/__pycache__/__init__.cpython-312.pyc
Binary file not shown.
Binary file added envs/__pycache__/simple_biped_env.cpython-312.pyc
Binary file not shown.
111 changes: 111 additions & 0 deletions envs/simple_biped_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import gymnasium as gym
from gymnasium import spaces
import pybullet as p
import pybullet_data
import numpy as np

class simpleBipedEnv(gym.Env):
def __init__(self, render=False, max_steps=10000):
super(simpleBipedEnv, self).__init__()
self.render_mode = render
self.physicsClient = p.connect(p.GUI if render else p.DIRECT)
self.robot_id = p.loadURDF("walkers/simple_biped.urdf", [0, 0, 1.0])
p.setAdditionalSearchPath(pybullet_data.getDataPath())

num_joints = 4 # left_hip, right_hip, left_knee, right_knee
self.action_space = spaces.Box(low=-1, high=1, shape=(num_joints,), dtype=np.float32)
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2*num_joints + 6,), dtype=np.float32)
self.max_steps = max_steps
self.current_step = 0
self.last_action = None



def reset(self, seed=None, options=None):
p.resetBasePositionAndOrientation(self.robot_id, [0,0,1], [0,0,0,1])
p.setGravity(0, 0, -9.8)
plane_id = p.loadURDF("plane.urdf")
base_pos = p.getBasePositionAndOrientation(self.robot_id)[0]
self.current_step = 0
return self._get_obs(), {}

def step(self, action):
num_joints = p.getNumJoints(self.robot_id)
if np.isscalar(action) or (isinstance(action, np.ndarray) and action.shape == (1,)):
action = np.full((num_joints,), action if np.isscalar(action) else action[0], dtype=np.float32)
else:
action = np.asarray(action, dtype=np.float32)
if action.shape[0] != num_joints:
raise ValueError(f"Action shape {action.shape} does not match number of joints {num_joints}")

self.last_action = action

# Get current joint positions
joint_states = p.getJointStates(self.robot_id, range(num_joints))
current_positions = np.array([state[0] for state in joint_states], dtype=np.float32)

# Apply relative change to joint angles
delta = action * 0.05 # scale step size as needed
target_positions = current_positions + delta

p.setJointMotorControlArray(
bodyUniqueId=self.robot_id,
jointIndices=list(range(num_joints)),
controlMode=p.POSITION_CONTROL,
targetPositions=target_positions,
forces=[200] * num_joints # max force
)
p.stepSimulation()
obs = self._get_obs()
reward = self._compute_reward()
done = self._check_termination()

self.current_step += 1
truncated = self.current_step >= self.max_steps

return obs, reward, done, truncated, {}

def _get_obs(self):
joint_states = p.getJointStates(self.robot_id, range(p.getNumJoints(self.robot_id)))
joint_positions = [state[0] for state in joint_states]
joint_velocities = [state[1] for state in joint_states]
base_pos, base_ori = p.getBasePositionAndOrientation(self.robot_id)
base_vel, base_ang_vel = p.getBaseVelocity(self.robot_id)
return np.array(joint_positions + joint_velocities + list(base_pos) + list(base_vel), dtype=np.float32)


def _check_termination(self):
base_pos = p.getBasePositionAndOrientation(self.robot_id)[0]
return base_pos[2] < 0.2 or base_pos[2] > 1.5 # fallen

def _compute_reward(self):
base_pos, _ = p.getBasePositionAndOrientation(self.robot_id)
base_vel, _ = p.getBaseVelocity(self.robot_id)

forward_reward = base_vel[0] # reward x velocity
if not self._check_termination():
alive_bonus = 1
else:
alive_bonus = -500
torque_penalty = 0.001 * np.sum(np.square(self.last_action))

#print(forward_reward, alive_bonus, torque_penalty)

return forward_reward + alive_bonus - torque_penalty


def render(self):
if self.render_mode:
if hasattr(self, 'robot_id'):
base_pos = p.getBasePositionAndOrientation(self.robot_id)[0]
p.resetDebugVisualizerCamera(
cameraDistance=2.0,
cameraYaw=45,
cameraPitch=-30,
cameraTargetPosition=base_pos
)
else:
pass

def close(self):
p.disconnect()
63 changes: 63 additions & 0 deletions envs/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from stable_baselines3 import DDPG, PPO
from envs.simple_biped_env import simpleBipedEnv
from stable_baselines3.common.vec_env import DummyVecEnv
import gymnasium as gym
from gymnasium.envs.registration import register

from simple_biped_env import simpleBipedEnv

register(
id='simpleBiped-v0',
entry_point='envs.simple_biped_env:simpleBipedEnv',
max_episode_steps=10000,
)

def train():
LOG_DIR = "./logs/"
env = gym.make('simpleBiped-v0', render=False)
model = DDPG("MlpPolicy", env, verbose=1, device="auto", tensorboard_log=LOG_DIR)
model.learn(total_timesteps=10000000)
model.save("../models/humanoid_ppo")
env.close()



def run():
env = DummyVecEnv([lambda: simpleBipedEnv(render=True)])
model = PPO.load("models/humanoid_ppo", env=env)
for _ in range(5): # Run 3 episodes
result = env.reset()
if isinstance(result, tuple):
obs, info = result
else:
obs = result
info = {}

done = False
total_reward = 0.0
while not done:
action, _ = model.predict(obs)
obs, reward, done, _ = env.step(action)
total_reward += reward
print(f"Episode finished. Total reward: {total_reward}")
env.close()

def test():
env = simpleBipedEnv(render=True)
for _ in range(10): # Run 1 test episode
obs = env.reset()[0]
done = False
total_reward = 0.0
while not done:
action = env.action_space.sample() # Random action for testing
obs, reward, done, _, _ = env.step(action)
total_reward += reward
print(f"Test episode finished. Total reward: {total_reward}")
env.close()

train()
#run()
#test()
68 changes: 0 additions & 68 deletions envs/walker_env.py
Original file line number Diff line number Diff line change
@@ -1,68 +0,0 @@
import gymnasium as gym
from gymnasium import spaces
import pybullet as p
import pybullet_data
import numpy as np

class WalkerEnv(gym.Env):
def __init__(self, render=False):
super(WalkerEnv, self).__init__()
self.render_mode = render
self.physicsClient = p.connect(p.GUI if render else p.DIRECT)
p.setAdditionalSearchPath(pybullet_data.getDataPath())

self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32)
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float32)

def reset(self, seed=None, options=None):
p.resetSimulation()
p.setGravity(0, 0, -9.8)
plane_id = p.loadURDF("plane.urdf")
self.robot_id = p.loadURDF("walkers/simple_biped.urdf", [0, 0, 1.0])
base_pos = p.getBasePositionAndOrientation(self.robot_id)[0]
print(f"Robot spawned at position: {base_pos}")
return self._get_obs(), {}

def step(self, action):
# apply motor torques or joint control here
# p.setJointMotorControlArray(...)
p.stepSimulation()
obs = self._get_obs()
reward = self._compute_reward()
done = self._check_termination()
return obs, reward, done, False, {}

def _get_obs(self):
joint_states = p.getJointStates(self.robot_id, range(p.getNumJoints(self.robot_id)))
joint_positions = [state[0] for state in joint_states]
joint_velocities = [state[1] for state in joint_states]
# For simple_biped.urdf, only one joint: hip_joint
# So obs = [position, velocity], shape=(2,)
return np.array(joint_positions + joint_velocities, dtype=np.float32)

def _compute_reward(self):
# reward forward movement, penalize falls
base_pos = p.getBasePositionAndOrientation(self.robot_id)[0]
return base_pos[0] # reward forward x-motion

def _check_termination(self):
base_pos = p.getBasePositionAndOrientation(self.robot_id)[0]
return base_pos[2] < 0.5 # fallen

def render(self):
if self.render_mode:
# Focus camera on robot's current position
if hasattr(self, 'robot_id'):
base_pos = p.getBasePositionAndOrientation(self.robot_id)[0]
p.resetDebugVisualizerCamera(
cameraDistance=2.0,
cameraYaw=45,
cameraPitch=-30,
cameraTargetPosition=base_pos
)
# Rendering handled by PyBullet GUI
else:
pass # No rendering in DIRECT mode

def close(self):
p.disconnect()
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified models/humanoid_ppo.zip
Binary file not shown.
21 changes: 0 additions & 21 deletions models/test.py

This file was deleted.

Loading