My training code:
# initialize learning (G)
n_step_epoch = round(n_epoch_init // batch_size)
for step, (lr_patchs, hr_patchs) in enumerate(train_ds):
step_time = time.time()
with tf.GradientTape() as tape:
fake_hr_patchs = G(lr_patchs)
mse_loss = tl.cost.mean_squared_error(fake_hr_patchs, hr_patchs, is_mean=True)
grad = tape.gradient(mse_loss, G.trainable_weights)
g_optimizer_init.apply_gradients(zip(grad, G.trainable_weights))
step += 1
epoch = step//n_step_epoch
print("Epoch: [{}/{}] step: [{}/{}] time: {}s, mse: {} ".format(
epoch, n_epoch_init, step, n_step_epoch, time.time() - step_time, mse_loss))
if (epoch != 0) and (step % n_step_epoch == 0):
tl.vis.save_images(fake_hr_patchs.numpy(), [ni, ni], save_dir_gan + '/train_g_init_{}.png'.format(epoch))
if (epoch >= n_epoch_init):
break
# adversarial learning (G, D)
n_step_epoch = round(n_epoch // batch_size)
for step, (lr_patchs, hr_patchs) in enumerate(train_ds):
with tf.GradientTape(persistent=True) as tape:
fake_patchs = G(lr_patchs)
logits_fake = D(fake_patchs)
logits_real = D(hr_patchs)
feature_fake = VGG((fake_patchs+1)/2.)
feature_real = VGG((hr_patchs+1)/2.)
d_loss1 = tl.cost.sigmoid_cross_entropy(logits_real, tf.ones_like(logits_real))
d_loss2 = tl.cost.sigmoid_cross_entropy(logits_fake, tf.zeros_like(logits_fake))
d_loss = d_loss1 + d_loss2
g_gan_loss = 1e-3 * tl.cost.sigmoid_cross_entropy(logits_fake, tf.ones_like(logits_fake))
mse_loss = tl.cost.mean_squared_error(fake_patchs, hr_patchs, is_mean=True)
vgg_loss = 2e-6 * tl.cost.mean_squared_error(feature_fake, feature_real, is_mean=True)
g_loss = mse_loss + vgg_loss + g_gan_loss
grad = tape.gradient(g_loss, G.trainable_weights)
g_optimizer.apply_gradients(zip(grad, G.trainable_weights))
grad = tape.gradient(d_loss, D.weights)
d_optimizer.apply_gradients(zip(grad, D.trainable_weights))
step += 1
epoch = step//n_step_epoch
print("Epoch: [{}/{}] step: [{}/{}] time: {}s, g_loss(mse:{}, vgg:{}, adv:{}) d_loss: {}".format(
epoch, n_epoch_init, step, n_step_epoch, time.time() - step_time, mse_loss, vgg_loss, g_gan_loss, d_loss))
# update learning rate
if epoch != 0 and (epoch % decay_every == 0):
new_lr_decay = lr_decay**(epoch // decay_every)
lr_v.assign(lr_init * new_lr_decay)
log = " ** new learning rate: %f (for GAN)" % (lr_init * new_lr_decay)
print(log)
if (epoch != 0) and (step % n_step_epoch == 0):
tl.vis.save_images(fake_hr_patchs.numpy(), [ni, ni], save_dir_gan + '/train_g_{}.png'.format(epoch))
G.save_weights(checkpoint_dir + '/g_{}.h5'.format(tl.global_flag['mode']))
D.save_weights(checkpoint_dir + '/d_{}.h5'.format(tl.global_flag['mode']))
if (epoch >= n_epoch):
break
My error:
File "train.py", line 370, in <module>
File "train.py", line 125, in train
with tf.GradientTape(persistent=True) as tape:
File "F:\Python\Python37\lib\site-packages\tensorlayer\models\core.py", line 295, in __call__
return self.forward(inputs, **kwargs)
File "F:\Python\Python37\lib\site-packages\tensorlayer\models\core.py", line 338, in forward
memory[node.name] = node(node_input)
File "F:\Python\Python37\lib\site-packages\tensorlayer\layers\core.py", line 433, in __call__
outputs = self.layer.forward(inputs, **kwargs)
File "F:\Python\Python37\lib\site-packages\tensorlayer\layers\dense\base_dense.py", line 106, in forward
z = tf.matmul(inputs, self.W)
File "F:\Python\Python37\lib\site-packages\tensorflow\python\util\dispatch.py", line 180, in wrapper
return target(*args, **kwargs)
File "F:\Python\Python37\lib\site-packages\tensorflow\python\ops\math_ops.py", line 2647, in matmul
a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
File "F:\Python\Python37\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 6285, in mat_mul
_six.raise_from(_core._status_to_exception(e.code, message), None)
File "<string>", line 3, in raise_from
tensorflow.python.framework.errors_impl.InvalidArgumentError: Matrix size-incompatible: In[0]: [1,18432], In[1]: [512,1] [Op:MatMul] name: MatMul/
My loading of images:
def generator_train():
i = 0
while i < len(train_hr_imgs):
yield train_hr_imgs[i], train_lr_imgs[i]
i+=1
def _map_fn_train(imgh, imgl):
hr_patch = imgh
lr_patch = imgl
hr_patch = hr_patch / (255. / 2.)
hr_patch = hr_patch - 1.
lr_patch = lr_patch / (255. / 2.)
lr_patch = lr_patch - 1.
return lr_patch, hr_patch
train_ds = tf.data.Dataset.from_generator(generator_train, output_types=(tf.float32, tf.float32))
train_ds = train_ds.map(_map_fn_train, num_parallel_calls=multiprocessing.cpu_count())
I prescale the input images to 384 (HR) and 96 (LR)
Any idea how to fix this?