isaac-sim · ACK101101 · Dec 4, 2023 · Dec 9, 2023 · Dec 15, 2023 · Dec 15, 2023
diff --git a/Biped_AlexKumar.pdf b/Biped_AlexKumar.pdf
diff --git a/README.md b/README.md
@@ -1,3 +1,52 @@
+# Biped Branch
+
+## About this repository
+
+This is the readme for the biped branch of OmniIsaacGymEnvs. To replicate the work, convert the biped URDF to a USD using the built in URDF converter. Make sure to make it instanceable (so it can run parallel efficiently) and remove the fixed base link (so it can walk and doesn't float in the air).
+
+[To view the report for this project](https://github.com/ACK101101/OmniIsaacGymEnvs/blob/biped/Biped_AlexKumar.pdf)
+
+## Important files
+Experiment Runner Bash Script: ./omniisaacgymenvs/auto_runner.sh
+
+Analyze Experiment Python Notebook: ./analysis.ipynb
+
+**Plots Directory:** ./plots/
+Folder for storing tensor plots (from tensorboard information), reward plots (including component-wise breakdowns of the reward function), and metrics plots (for comparing performance across experiments)
+
+**Utils:** ./omniisaacgymenvs/utils/task_util.py; ./omniisaacgymenvs/utils/config_utils/sim_config.py
+
+Added Biped to task_map, maps task name to environment
+
+**Configs:** ./omniisaacgymenvs/cfg/task/Biped.yaml; ./omniisaacgymenvs/cfg/train/BipedPPO.yaml
+
+Set up environment, simulation, and experiment variables (reward strengths, etc)
+
+Added speedWeight, terminationHeading, terminationUp, progress_reward
+
+--
+
+Set up learning variables (learning rate, epochs, etc)
+
+Experimented with lower minibatch_size and number of environments, resolved to defaults after subpar performance
+
+**Articulations:** ./omniisaacgymenvs/robots/articulations/biped.py
+
+Loads in the USD model
+
+**Tasks:** ./omniisaacgymenvs/tasks/base/rl_task.py; ./omniisaacgymenvs/tasks/biped.py; ./omniisaacgymenvs/tasks/shared/locomotion.py
+
+Added logger to save reward components to a text file during training
+
+--
+
+Set up crucial functions to be used when training, such as initializing the environment using the config, populating it with the robot models, resetting a particular environment when conditions are met, and getting joint information
+
+--
+
+Perhaps the most important file. Gets information to compile the reward buffer, uses observations and config variables to compute the reward buffer. Added steps for sending reward information to be logged for post-experiment analysis. Added extra reset conditions and their corresponding death costs, added speed reward and progress word, modified alive reward to alive/dead reward to be more interpretable
+
+---
 # Omniverse Isaac Gym Reinforcement Learning Environments for Isaac Sim
 
 ## About this repository

diff --git a/analysis.ipynb b/analysis.ipynb
diff --git a/analysis.py b/analysis.py
@@ -0,0 +1,51 @@
+import matplotlib.pyplot as plt
+import pandas as pd
+
+# basic params
+experiment = "Biped_08"
+file = f"omniisaacgym_envs/runs/{experiment}/{experiment}.txt"
+col_names = ["Ave Reward", "Progress Reward", "Alive Reward",
+              "Up Reward", "Heading Reward", "Action Cost",
+              "Energy Cost", "DOF Limit Cost"]
+
+# parse log and make pd df
+df = []
+with open(file, 'r') as f:
+    row = []
+    for line in f:
+        split = line.split(":")
+        rew_type, val = split[0], split[1].strip()
+
+        if rew_type[:4] != "Step":
+            row.append(float(val))
+        else:
+            df.append(row)
+            row = []
+
+df = pd.DataFrame(df[1:][:])
+df.columns = col_names
+df = df.reset_index()
+
+# plot
+# Create a bar chart for stacked categories
+fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
+# fig, ax1 = plt.subplots(figsize=(10, 6))
+stacked_cols = col_names[1:]
+stacked_cols.append('index')
+line_cols = ['Ave Reward', 'index']
+
+df[stacked_cols].plot(x='index', kind='bar', stacked=True, ax=ax1)
+ax1.set_xlabel('Episode Num * 100')
+ax1.tick_params(axis='x', labelsize=5)
+ax1.set_ylabel('Reward Components')
+ax1.legend(loc='center left', bbox_to_anchor=(1, 0.5))
+
+# Create a line plot for 'Ave Reward'
+df[line_cols].plot(x='index', kind='line', color='black', marker='o', ax=ax2)
+ax2.set_xlabel('Episode Num * 100')
+ax2.set_ylabel('Ave Reward')
+
+plt.tight_layout()
+plt.savefig(f"{experiment}.png")
+plt.show()
+
diff --git a/omniisaacgymenvs/auto_runner.sh b/omniisaacgymenvs/auto_runner.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+export PYTHON_PATH=~/.local/share/ov/pkg/isaac_sim-2023.1.0/python.sh
+
+$PYTHON_PATH  scripts/rlgames_train.py task=Biped experiment=Biped_final_0 max_iterations=2000 headless=True task.env.deathCost=-1.0 task.env.terminationHeight=0.0 task.env.terminationHeading=0.0 task.env.terminationUp=0.0 task.env.progress_reward=1.0 train.params.config.learning_rate=5e-4
+$PYTHON_PATH  scripts/rlgames_train.py task=Biped experiment=Biped_final_1 max_iterations=2000 headless=True task.env.deathCost=-1.0 task.env.terminationHeight=0.0 task.env.terminationHeading=0.0 task.env.terminationUp=0.0 task.env.progress_reward=1.0 train.params.config.learning_rate=5e-5
+$PYTHON_PATH  scripts/rlgames_train.py task=Biped experiment=Biped_final_2 max_iterations=2000 headless=True task.env.deathCost=-5.0 task.env.terminationHeight=0.0 task.env.terminationHeading=0.0 task.env.terminationUp=0.0 task.env.progress_reward=1.0 train.params.config.learning_rate=5e-5
+$PYTHON_PATH  scripts/rlgames_train.py task=Biped experiment=Biped_final_3 max_iterations=2000 headless=True task.env.deathCost=-5.0 task.env.terminationHeight=0.7 task.env.terminationHeading=0.0 task.env.terminationUp=0.0 task.env.progress_reward=1.0 train.params.config.learning_rate=5e-5
+$PYTHON_PATH  scripts/rlgames_train.py task=Biped experiment=Biped_final_4 max_iterations=2000 headless=True task.env.deathCost=-5.0 task.env.terminationHeight=0.7 task.env.terminationHeading=0.0 task.env.terminationUp=0.0 task.env.progress_reward=5.0 train.params.config.learning_rate=5e-5
+$PYTHON_PATH  scripts/rlgames_train.py task=Biped experiment=Biped_final_5 max_iterations=2000 headless=True task.env.deathCost=-5.0 task.env.terminationHeight=0.7 task.env.terminationHeading=0.0 task.env.terminationUp=0.7 task.env.progress_reward=5.0 train.params.config.learning_rate=5e-5
+$PYTHON_PATH  scripts/rlgames_train.py task=Biped experiment=Biped_final_6 max_iterations=2000 headless=True task.env.deathCost=-5.0 task.env.terminationHeight=0.7 task.env.terminationHeading=0.5 task.env.terminationUp=0.7 task.env.progress_reward=5.0 train.params.config.learning_rate=5e-5
+$PYTHON_PATH  scripts/rlgames_train.py task=Biped experiment=Biped_final_7 max_iterations=2000 headless=True task.env.deathCost=-5.0 task.env.terminationHeight=0.7 task.env.terminationHeading=0.5 task.env.terminationUp=0.7 task.env.progress_reward=5.0 train.params.config.learning_rate=5e-5 task.env.actionsCost=0.03 task.env.energyCost=0.1
+
+# run without resets
+# run with lower lr
+# run with higher death cost
+# run with height reset
+# increase progress reward
+# run with up reset
+# run with heading reset
+# run with higher action and energy cost
+
+# $PYTHON_PATH scripts/rlgames_train.py task=Biped test=True checkpoint=runs/Biped_15_4/nn/Biped_15_4.pth num_envs=2
diff --git a/omniisaacgymenvs/cfg/task/Biped.yaml b/omniisaacgymenvs/cfg/task/Biped.yaml
@@ -0,0 +1,94 @@
+# used to create the object
+name: Biped
+
+physics_engine: ${..physics_engine}
+
+# if given, will override the device setting in gym.
+env: 
+#  numEnvs: ${...num_envs}
+  numEnvs: ${resolve_default:4096,${...num_envs}}
+  envSpacing: 5
+  episodeLength: 1000 
+  enableDebugVis: False
+
+  clipActions: 1.0
+
+  powerScale: 1.0
+  controlFrequencyInv: 2 # 60 Hz
+
+  # reward parameters
+  headingWeight: 0.75  # direction? changed from 0.75
+  upWeight: 0.1       # changed from 0.1
+  speedWeight: 3.0
+
+  # cost parameters
+  actionsCost: 0.01
+  energyCost: 0.05
+  dofVelocityScale: 0.1
+  angularVelocityScale: 0.25
+  contactForceScale: 0.01
+  jointsAtLimitCost: 0.25   # changed from 0.25
+  deathCost: -5.0          # changed from -1.0
+  terminationHeight: 0.7    # changed!
+  terminationHeading: 0.5   # added
+  terminationUp: 0.7        # added
+  alive_reward_scale: 2.0   # changed from 2.0
+  progress_reward: 5.0     # added
+
+sim:
+  dt: 0.0083 # 1/120 s
+  use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
+  gravity: [0.0, 0.0, -9.81]
+  add_ground_plane: True
+  add_distant_light: False
+  use_fabric: True
+  enable_scene_query_support: False
+  disable_contact_processing: False
+
+  # set to True if you use camera sensors in the environment
+  enable_cameras: False
+
+  default_physics_material:
+    static_friction: 1.0
+    dynamic_friction: 1.0
+    restitution: 0.0
+
+  physx:
+    worker_thread_count: ${....num_threads}
+    solver_type: ${....solver_type}
+    use_gpu: ${eq:${....sim_device},"gpu"} # set to False to run on CPU
+    solver_position_iteration_count: 4
+    solver_velocity_iteration_count: 0
+    bounce_threshold_velocity: 0.2
+    friction_offset_threshold: 0.04
+    friction_correlation_distance: 0.025
+    enable_sleeping: True
+    enable_stabilization: True
+    max_depenetration_velocity: 10.0
+
+    # GPU buffers
+    gpu_max_rigid_contact_count: 524288
+    gpu_max_rigid_patch_count: 81920
+    gpu_found_lost_pairs_capacity: 8192
+    gpu_found_lost_aggregate_pairs_capacity: 262144
+    gpu_total_aggregate_pairs_capacity: 8192
+    gpu_max_soft_body_contacts: 1048576
+    gpu_max_particle_contacts: 1048576
+    gpu_heap_capacity: 67108864
+    gpu_temp_buffer_capacity: 16777216
+    gpu_max_num_partitions: 8
+
+  Biped:
+    # -1 to use default values
+    override_usd_defaults: False
+    enable_self_collisions: True
+    enable_gyroscopic_forces: True
+    # also in stage params
+    # per-actor
+    solver_position_iteration_count: 4
+    solver_velocity_iteration_count: 0
+    sleep_threshold: 0.005
+    stabilization_threshold: 0.001
+    # per-body
+    density: -1
+    max_depenetration_velocity: 10.0
diff --git a/omniisaacgymenvs/cfg/task/BipedSAC.yaml b/omniisaacgymenvs/cfg/task/BipedSAC.yaml
@@ -0,0 +1,8 @@
+# used to create the object
+defaults:
+  - Biped
+  - _self_
+
+# if given, will override the device setting in gym.
+env:
+  numEnvs: ${resolve_default:64,${...num_envs}}
diff --git a/omniisaacgymenvs/cfg/task/MyCartpole.yaml b/omniisaacgymenvs/cfg/task/MyCartpole.yaml
@@ -0,0 +1,105 @@
+# name of the task - this should match the name used in the task mapping dictionary in task_util.py
+name: MyCartpole
+
+# physics engine - only physx is currently supported. This value does not need to be modified.
+physics_engine: ${..physics_engine}
+
+# task-related parameters
+env:
+  # number of environments to create
+  numEnvs: ${resolve_default:512,${...num_envs}}
+  # spacing between each environment (in meters)
+  envSpacing: 4.0
+
+  # Cartpole reset distance limit
+  resetDist: 3.0
+  # Cartpole effort scaling
+  maxEffort: 400.0
+
+  # clip values in observation buffer to be within this range (-5.0 to +5.0)
+  clipObservations: 5.0
+  # clip values in actions to be within this range (-1.0 to +1.0)
+  clipActions: 1.0
+  # perform 2 simulation steps for every action (applies actions every 2 simulation steps)
+  controlFrequencyInv: 2 # 60 Hz
+
+# simulation related parameters
+sim:
+  # simulation dt (dt between each simulation step)
+  dt: 0.0083 # 1/120 s
+  # whether to use the GPU pipeline - data returned from Isaac Sim APIs will be on the GPU if set to True
+  use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
+  # gravity vector for the simulation scene
+  gravity: [0.0, 0.0, -9.81]
+
+  # whether to add a ground plane to the world
+  add_ground_plane: True
+  # whether to add lighting to the world
+  add_distant_light: False
+
+  # enable flatcache - this is required for rendering
+  use_flatcache: True
+  # disable scene query - this will disable interaction with the scene to improve performance
+  # this must be set to True for ray casting
+  enable_scene_query_support: False
+  # disable additional contact processing to improve performance. This should be set to True when using RigidContactView
+  disable_contact_processing: False
+
+  # set to True if you use camera sensors in the environment
+  enable_cameras: False
+
+  # default parameters if no additional physics materials are specified
+  default_physics_material:
+    static_friction: 1.0
+    dynamic_friction: 1.0
+    restitution: 0.0
+
+  # PhysX related parameters
+  # Additional USD physics schema documentation can be found here: https://docs.omniverse.nvidia.com/kit/docs/omni_usd_schema_physics/104.2/class_physx_schema_physx_scene_a_p_i.html
+  physx:
+    worker_thread_count: ${....num_threads}
+    solver_type: ${....solver_type}
+    use_gpu: ${eq:${....sim_device},"gpu"} # set to False to run on CPU
+    solver_position_iteration_count: 4
+    solver_velocity_iteration_count: 0
+    contact_offset: 0.02
+    rest_offset: 0.001
+    bounce_threshold_velocity: 0.2
+    friction_offset_threshold: 0.04
+    friction_correlation_distance: 0.025
+    enable_sleeping: True
+    enable_stabilization: True
+    max_depenetration_velocity: 100.0
+
+    # GPU buffers
+    gpu_max_rigid_contact_count: 524288
+    gpu_max_rigid_patch_count: 81920
+    gpu_found_lost_pairs_capacity: 1024
+    gpu_found_lost_aggregate_pairs_capacity: 262144
+    gpu_total_aggregate_pairs_capacity: 1024
+    gpu_max_soft_body_contacts: 1048576
+    gpu_max_particle_contacts: 1048576
+    gpu_heap_capacity: 67108864
+    gpu_temp_buffer_capacity: 16777216
+    gpu_max_num_partitions: 8
+
+  # each asset in the task can override physics parameters defined in the scene
+  # the name of the asset must match the name of the ArticulationView for the asset in the task
+  # additional Articulation and rigid body documentation can be found at https://docs.omniverse.nvidia.com/kit/docs/omni_usd_schema_physics/104.2/class_physx_schema_physx_articulation_a_p_i.html and https://docs.omniverse.nvidia.com/kit/docs/omni_usd_schema_physics/104.2/class_physx_schema_physx_rigid_body_a_p_i.html
+  Cartpole:
+    # a value of -1 means to use the same values defined in the physics scene
+    override_usd_defaults: False
+    enable_self_collisions: False
+    enable_gyroscopic_forces: True
+    # also in stage params
+    # per-actor
+    solver_position_iteration_count: 4
+    solver_velocity_iteration_count: 0
+    sleep_threshold: 0.005
+    stabilization_threshold: 0.001
+    # per-body
+    density: -1
+    max_depenetration_velocity: 100.0
+    # per-shape
+    contact_offset: 0.02
+    rest_offset: 0.001