Originally a video game, Lunar Lander provides an interesting control problem for engineers and a benchmark for AI algorithms. The goal is to optimize the rocket's trajectory and find the best way to navigate a rocket from its starting point to a landing pad.
The full flight mission succeeds when you land the craft safely between the flags, using as little fuel as possible. Instead of trying to learn autonomy for an entire complex task all at once, we divide the flight task into three separate skills.
A complete agent should be able to stabilize, orient over the landing zone, and land. Let's train our first agent to learn the stabilize skill only. As you can see above in the video, a competent stabilize skill will hover the craft.
pip install composabl
from composabl.agent import Agent, Skill, Scenario
config = {
"env": {
"name": "lunar_lander_sim",
"compute": "local"
"config": {
"address": "localhost:1337",
}
},
"license": <LICENSE_KEY>,
"training": {}
}
agent = Agent(config)
stabilize_scenarios = [
{
"angle": 0,
"horizontal_position": [-0.2, 0.2],
"vertical_position": [-0.5, -0.5],
"velocity": [-0.2, 0.2],
},
{
"angle": -.17,
"horizontal_position": [-0.5, 0.5],
"vertical_position": [-0.5, -0.25],
"velocity": 0
},
{
"angle": 0.12,
"horizontal_position": [-0.7, 0.7],
"vertical_position": [-.65, -0.1],
"velocity": 0
}
]
def stabilize_reward():
if {{{prev_state}}} is None:
reward = 0
VIEWPORT_W = 600
VIEWPORT_H = 400
SCALE = 30.0
reward = 0
# has the angle moved closer to 0?
if abs({{prev_state[4]}}) >= abs({{state[4]}}) + 0.1 * 180 / math.pi:
reward += 1
# has the x position remained stable?
if abs({{prev_state[0]}} - {{state[0]}}) <= 0.01 * (VIEWPORT_W / SCALE / 2):
reward += 1
else:
reward -= 1
# has the y position remained stable?
if abs({{prev_state[1]}} - {{state[1]}}) <= 0.01 * (VIEWPORT_W / SCALE / 2):
reward += 1
else:
reward -= 1
stabilize_skill = Skill.remote("stabilize", stabilize_reward(), trainable=True)
for scenario_dict in stabilize_scenarios:
scenario = Scenario(scenario_dict)
stabilize_skill.add_scenario(scenario)
agent.add_skill(stabilize_skill)
agent.train(train_iters=5000)