CartPole#

This example shows how OptFlow solves the CartPole problem simulated by Gym.

# Author: Anonymized for paper review

import optflow as flow
from optflow.contrib.gym_op import Gym, GymStep, GymReset

gym_init = Gym('CartPole-v1')
gym_reset = GymReset()
gym_step = GymStep()

env = gym_init()
env, state = gym_reset(env)
total_reward = flow.Constant(0.)
x = flow.Variable(cat=flow.categorical, size=2, shape=(flow.infinite, ))
with flow.ForLoop(flow.infinite) as t:
    env, state, reward, done = gym_step(env, x[t])
    total_reward += reward
    with flow.IfCond(done):
        flow.break_loop()

prob = flow.Problem(objective=total_reward, sense=flow.maximize)
prob.train(flow.rl, params={'num_episodes': 100})
gym_init.recording = True
opt_obj = prob.solve(flow.rl)
print(opt_obj, x.optimized_value)


[Run Online Demo] (password: )