I can run the default agent without issues, but had some trouble when running TRPO and DQN.
I tried removing both the save and config directories and rebuilding with trpo using 'python run.py -b TRPO'. The config directory seemed to be correctly created. The containing agent.json file had the correct property "type": "trpo_agent".
When I tried running the agent (python run.py) a save directory containing a PPO_0 directory was created. I tried changing the default agent in run.py to be TRPO as well but had the same result. The model could not evaluate and produced the following error.
Traceback (most recent call last):
File "run.py", line 110, in <module>
session.loadSession()
File "/Users/Oscar/Documents/Cryptotrading/Tradz/TradzQAI/core/session/local.py", line 46, in loadSession
self.initAgent()
File "/Users/Oscar/Documents/Cryptotrading/Tradz/TradzQAI/core/session/local.py", line 57, in initAgent
self.agent = self.agent(env=self.env, device=self.device)._get()
File "/Users/Oscar/Documents/Cryptotrading/Tradz/TradzQAI/agents/PPO.py", line 6, in __init__
Agent.__init__(self, env=env, device=device)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/TradzQAI/agents/agent.py", line 27, in __init__
device=device
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/agents/agent.py", line 283, in from_spec
kwargs=kwargs
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/util.py", line 192, in get_object
return obj(*args, **kwargs)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/agents/trpo_agent.py", line 161, in __init__
entropy_regularization=entropy_regularization
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/agents/learning_agent.py", line 141, in __init__
batching_capacity=batching_capacity
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/agents/agent.py", line 80, in __init__
self.model = self.initialize_model()
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/agents/trpo_agent.py", line 189, in initialize_model
likelihood_ratio_clipping=self.likelihood_ratio_clipping
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/models/pg_prob_ratio_model.py", line 88, in __init__
gae_lambda=gae_lambda
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/models/pg_model.py", line 98, in __init__
requires_deterministic=False
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/models/distribution_model.py", line 90, in __init__
discount=discount
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/models/memory_model.py", line 114, in __init__
reward_preprocessing=reward_preprocessing
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/models/model.py", line 217, in __init__
self.setup()
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/models/model.py", line 290, in setup
independent=independent
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/models/memory_model.py", line 605, in create_operations
independent=independent
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/models/model.py", line 1195, in create_operations
self.create_observe_operations(reward=reward, terminal=terminal)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/models/model.py", line 1129, in create_observe_operations
reward=reward
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorflow/python/ops/template.py", line 368, in __call__
return self._call_func(args, kwargs)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorflow/python/ops/template.py", line 311, in _call_func
result = self._func(*args, **kwargs)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/models/memory_model.py", line 566, in tf_observe_timestep
return tf.cond(pred=optimize, true_fn=true_fn, false_fn=tf.no_op)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
return func(*args, **kwargs)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2076, in cond
orig_res_t, res_t = context_t.BuildCondBranch(true_fn)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py", line 1920, in BuildCondBranch
original_result = fn()
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/models/memory_model.py", line 562, in true_fn
optimize = self.fn_optimization(**batch)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorflow/python/ops/template.py", line 368, in __call__
return self._call_func(args, kwargs)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorflow/python/ops/template.py", line 311, in _call_func
result = self._func(*args, **kwargs)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/models/pg_model.py", line 305, in tf_optimization
next_internals=next_internals
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/models/memory_model.py", line 491, in tf_optimization
return self.optimizer.minimize(**arguments)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/core/optimizers/optimizer.py", line 139, in minimize
deltas = self.step(time=time, variables=variables, **kwargs)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorflow/python/ops/template.py", line 368, in __call__
return self._call_func(args, kwargs)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorflow/python/ops/template.py", line 311, in _call_func
result = self._func(*args, **kwargs)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/core/optimizers/optimized_step.py", line 102, in tf_step
**kwargs
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorflow/python/ops/template.py", line 368, in __call__
return self._call_func(args, kwargs)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorflow/python/ops/template.py", line 311, in _call_func
result = self._func(*args, **kwargs)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/core/optimizers/natural_gradient.py", line 123, in tf_step
deltas = self.solver.solve(fn_x=fisher_matrix_product, x_init=None, b=[-grad for grad in loss_gradients])
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorflow/python/ops/template.py", line 368, in __call__
return self._call_func(args, kwargs)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorflow/python/ops/template.py", line 311, in _call_func
result = self._func(*args, **kwargs)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/core/optimizers/solvers/conjugate_gradient.py", line 80, in tf_solve
return super(ConjugateGradient, self).tf_solve(fn_x, x_init, b)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/core/optimizers/solvers/iterative.py", line 78, in tf_solve
args = tf.while_loop(cond=self.next_step, body=self.step, loop_vars=args)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py", line 3274, in while_loop
return_same_structure)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2994, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2929, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorflow/python/ops/template.py", line 368, in __call__
return self._call_func(args, kwargs)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorflow/python/ops/template.py", line 311, in _call_func
result = self._func(*args, **kwargs)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/core/optimizers/solvers/conjugate_gradient.py", line 128, in tf_step
A_conjugate = self.fn_x(conjugate)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorforce/core/optimizers/natural_gradient.py", line 112, in fisher_matrix_product
return tf.gradients(ys=delta_kldiv_gradients, xs=variables)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py", line 596, in gradients
gate_gradients, aggregation_method, stop_gradients)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py", line 749, in _GradientsHelper
_RaiseNoGradWrtInitialLoopValError(op, from_ops, xs)
File "/Users/Oscar/Documents/Cryptotrading/Tradz/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py", line 441, in _RaiseNoGradWrtInitialLoopValError
% target_op.name)
ValueError: Cannot compute gradient inside while loop with respect to op 'trpo/actions-and-internals/layered-network/apply/dense0/apply/linear/apply/W'. We do not support taking the gradient wrt or through the initial value of a loop variable. Gradients can be computed through loop invariants or wrt the input parameters to the loop body.
Switching to dqn produced the same problems with the file structure, although the model could evaluate.
bug