CartPole#
This example shows how OptFlow solves the CartPole problem simulated by Gym.
# Author: Anonymized for paper review
import optflow as flow
from optflow.contrib.gym_op import Gym, GymStep, GymReset
gym_init = Gym('CartPole-v1')
gym_reset = GymReset()
gym_step = GymStep()
env = gym_init()
env, state = gym_reset(env)
total_reward = flow.Constant(0.)
x = flow.Variable(cat=flow.categorical, size=2, shape=(flow.infinite, ))
with flow.ForLoop(flow.infinite) as t:
env, state, reward, done = gym_step(env, x[t])
total_reward += reward
with flow.IfCond(done):
flow.break_loop()
prob = flow.Problem(objective=total_reward, sense=flow.maximize)
prob.train(flow.rl, params={'num_episodes': 100})
gym_init.recording = True
opt_obj = prob.solve(flow.rl)
print(opt_obj, x.optimized_value)
[Run Online Demo] (password: )