Thank you for your great video's, insights, codes, etc Much appreciated and inspiring.
After Pong want to try other Atari games like Spaceinvaders, Pacman, etc and explore the boundaries of the policy gradient approach. Unfortunately got error when I simply replaced inner_env = gym.make('Pong-v0') to inner_env = gym.make('SpaceInvaders-v0'). Saw the code was quite resilient to capture any Atari game and the training also started but gave an error in "writing summary". Any pointers what might have caused it?
`args: {'n_epoch': 6000, 'batch_size': 10000, 'output_dir': '/tmp/pong_output', 'job_dir': '/tmp/pong_output', 'restore': False, 'render': False, 'save_checkpoint_steps': 1, 'learning_rate': 0.005, 'decay': 0.99, 'gamma': 0.99, 'laziness': 0.01, 'hidden_dim': 200, 'max_to_keep': 6000}
epoch 1
Rollout phase
D:/OneDrive/Robotics/AINN/pong_google_cloud/tensorflow-without-a-phd/tensorflow-rl-spaceinvaders/trainer/task.py:217: RuntimeWarning: invalid value encountered in true_divide
prwd /= np.std(prwd)
Train phase
rollout reward: -4.323713773987632
Writing summary
Traceback (most recent call last):
File "", line 1, in
runfile('D:/OneDrive/Robotics/AINN/pong_google_cloud/tensorflow-without-a-phd/tensorflow-rl-spaceinvaders/trainer/task.py', wdir='D:/OneDrive/Robotics/AINN/pong_google_cloud/tensorflow-without-a-phd/tensorflow-rl-spaceinvaders/trainer')
File "D:\anaconda\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 668, in runfile
execfile(filename, namespace)
File "D:\anaconda\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 108, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "D:/OneDrive/Robotics/AINN/pong_google_cloud/tensorflow-without-a-phd/tensorflow-rl-spaceinvaders/trainer/task.py", line 316, in
main(args)
File "D:/OneDrive/Robotics/AINN/pong_google_cloud/tensorflow-without-a-phd/tensorflow-rl-spaceinvaders/trainer/task.py", line 249, in main
summary = sess.run(merged, feed_dict=feed_dict)
File "D:\anaconda\lib\site-packages\tensorflow\python\client\session.py", line 877, in run
run_metadata_ptr)
File "D:\anaconda\lib\site-packages\tensorflow\python\client\session.py", line 1100, in _run
feed_dict_tensor, options, run_metadata)
File "D:\anaconda\lib\site-packages\tensorflow\python\client\session.py", line 1272, in _do_run
run_metadata)
File "D:\anaconda\lib\site-packages\tensorflow\python\client\session.py", line 1291, in _do_call
raise type(e)(node_def, op, message)
InvalidArgumentError: Nan in summary histogram for: summaries/train/model/dense_2/kernel
[[Node: summaries/train/model/dense_2/kernel = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](summaries/train/model/dense_2/kernel/tag, summaries/train/model/dense_2/kernel/ReadVariableOp)]]
Caused by op 'summaries/train/model/dense_2/kernel', defined at:
File "D:\anaconda\lib\runpy.py", line 193, in _run_module_as_main
"main", mod_spec)
File "D:\anaconda\lib\runpy.py", line 85, in run_code
exec(code, run_globals)
File "D:\anaconda\lib\site-packages\spyder_kernels\console_main.py", line 11, in
start.main()
File "D:\anaconda\lib\site-packages\spyder_kernels\console\start.py", line 296, in main
kernel.start()
File "D:\anaconda\lib\site-packages\ipykernel\kernelapp.py", line 486, in start
self.io_loop.start()
File "D:\anaconda\lib\site-packages\tornado\platform\asyncio.py", line 127, in start
self.asyncio_loop.run_forever()
File "D:\anaconda\lib\asyncio\base_events.py", line 422, in run_forever
self._run_once()
File "D:\anaconda\lib\asyncio\base_events.py", line 1434, in _run_once
handle._run()
File "D:\anaconda\lib\asyncio\events.py", line 145, in _run
self._callback(*self._args)
File "D:\anaconda\lib\site-packages\tornado\platform\asyncio.py", line 117, in _handle_events
handler_func(fileobj, events)
File "D:\anaconda\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
return fn(*args, **kwargs)
File "D:\anaconda\lib\site-packages\zmq\eventloop\zmqstream.py", line 450, in _handle_events
self._handle_recv()
File "D:\anaconda\lib\site-packages\zmq\eventloop\zmqstream.py", line 480, in _handle_recv
self._run_callback(callback, msg)
File "D:\anaconda\lib\site-packages\zmq\eventloop\zmqstream.py", line 432, in _run_callback
callback(*args, **kwargs)
File "D:\anaconda\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
return fn(*args, **kwargs)
File "D:\anaconda\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "D:\anaconda\lib\site-packages\ipykernel\kernelbase.py", line 233, in dispatch_shell
handler(stream, idents, msg)
File "D:\anaconda\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "D:\anaconda\lib\site-packages\ipykernel\ipkernel.py", line 208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "D:\anaconda\lib\site-packages\ipykernel\zmqshell.py", line 537, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "D:\anaconda\lib\site-packages\IPython\core\interactiveshell.py", line 2662, in run_cell
raw_cell, store_history, silent, shell_futures)
File "D:\anaconda\lib\site-packages\IPython\core\interactiveshell.py", line 2785, in _run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "D:\anaconda\lib\site-packages\IPython\core\interactiveshell.py", line 2907, in run_ast_nodes
if self.run_code(code, result):
File "D:\anaconda\lib\site-packages\IPython\core\interactiveshell.py", line 2961, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "", line 1, in
runfile('D:/OneDrive/Robotics/AINN/pong_google_cloud/tensorflow-without-a-phd/tensorflow-rl-spaceinvaders/trainer/task.py', wdir='D:/OneDrive/Robotics/AINN/pong_google_cloud/tensorflow-without-a-phd/tensorflow-rl-spaceinvaders/trainer')
File "D:\anaconda\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 668, in runfile
execfile(filename, namespace)
File "D:\anaconda\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 108, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "D:/OneDrive/Robotics/AINN/pong_google_cloud/tensorflow-without-a-phd/tensorflow-rl-spaceinvaders/trainer/task.py", line 316, in
main(args)
File "D:/OneDrive/Robotics/AINN/pong_google_cloud/tensorflow-without-a-phd/tensorflow-rl-spaceinvaders/trainer/task.py", line 148, in main
tf.summary.histogram(var.op.name, var)
File "D:\anaconda\lib\site-packages\tensorflow\python\summary\summary.py", line 187, in histogram
tag=tag, values=values, name=scope)
File "D:\anaconda\lib\site-packages\tensorflow\python\ops\gen_logging_ops.py", line 282, in histogram_summary
"HistogramSummary", tag=tag, values=values, name=name)
File "D:\anaconda\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "D:\anaconda\lib\site-packages\tensorflow\python\util\deprecation.py", line 454, in new_func
return func(*args, **kwargs)
File "D:\anaconda\lib\site-packages\tensorflow\python\framework\ops.py", line 3155, in create_op
op_def=op_def)
File "D:\anaconda\lib\site-packages\tensorflow\python\framework\ops.py", line 1717, in init
self._traceback = tf_stack.extract_stack()
InvalidArgumentError (see above for traceback): Nan in summary histogram for: summaries/train/model/dense_2/kernel
[[Node: summaries/train/model/dense_2/kernel = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](summaries/train/model/dense_2/kernel/tag, summaries/train/model/dense_2/kernel/ReadVariableOp)]]`
It is running on Anaconda on a Windows machine. Thank you.