First issue in scaling up:
Looks like one needs to analyze all the photos to train with, to get preprocess() factors like in this example:
http://blog.outcome.io/pytorch-quick-start-classifying-an-image/
Seems someone would have written a program by now to take a set of images and output the numbers for
normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
preprocess = transforms.Compose([
transforms.Scale(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
normalize
])
Here's a stab at it:
import numpy as np
from PIL import Image
pic_dir = '~/images/prun299'
fileList = pic_dir + '/files'
pixCount = 0
RGB = [0.0, 0.0, 0.0]
with open(fileList) as fp:
for line in fp:
file = pic_dir + "/" + line.rstrip()
try:
im = Image.open(file)
except Exception, e:
print "None: %s %s" % (file, str(e))
continue
for x in range(im.width):
for y in range(im.height):
pix = im.getpixel((x, y))
RGB[0] += pix[0]
RGB[1] += pix[1]
RGB[2] += pix[2]
pixCount += im.width * im.height
im.close()
RGB[0] /= pixCount
RGB[1] /= pixCount
RGB[2] /= pixCount
DEV = [0.0, 0.0, 0.0]
print('pass 2')
with open(fileList) as fp:
for line in fp:
#print('line ' + line)
file = pic_dir + "/" + line.rstrip()
try:
im = Image.open(file)
except:
continue
#print('file ' + file)
for x in range(im.width):
for y in range(im.height):
pix = im.getpixel((x, y))
d = RGB[0] - pix[0]
DEV[0] += d * d
d = RGB[1] - pix[1]
DEV[1] += d * d
d = RGB[2] - pix[2]
DEV[2] += d * d
im.close()
DEV[0] /= pixCount
DEV[1] /= pixCount
DEV[2] /= pixCount
DEV = np.sqrt(DEV)
RGB[0] /= 255
RGB[1] /= 255
RGB[2] /= 255
DEV[0] /= 255
DEV[1] /= 255
DEV[2] /= 255
print('mean=[' + str(RGB[0]) + ', ' + str(RGB[1]) + ', ' + str(RGB[2]) + '],')
print('std=[' + str(DEV[0]) + ', ' + str(DEV[1]) + ', ' + str(DEV[2]) + ']')
# 6764 files:
# mean=[0.3876046197, 0.3751385941, 0.3667266388],
# std=[0.2649736267, 0.2584158245, 0.2701408752]
Resulting in this initial loader, keras version works, this one untested.
from torchvision import models, transforms
from PIL import Image
pair_dir = '~/pb'
pic_dir = '~/images/prun299'
image_size = 299
normalize = transforms.Normalize(
mean=[0.3876046197, 0.3751385941, 0.3667266388],
std=[0.2649736267, 0.2584158245, 0.2701408752]
)
preprocess = transforms.Compose([
transforms.Scale(256),
transforms.CenterCrop(image_size),
transforms.ToTensor(),
normalize
])
file_map = {}
def load_preproc():
print('Loading pics')
scan_file(pair_dir + '/test.neg')
scan_file(pair_dir + '/test.pos')
scan_file(pair_dir + '/train.pos')
scan_file(pair_dir + '/train.neg')
def scan_file(fname):
print('Scan file: ' + fname)
ct = 0
ct2 = 0
with open(fname) as fp:
for line in fp:
fname1, fname2 = line.split()
if (type(file_map.get(fname1, None)) is NoneType):
ct += 1
img_pil = Image.open(pic_dir + '/' + fname1)
img_tensor = preprocess(img_pil)
img_tensor.unsqueeze_(0)
file_map[fname1] = img_tensor
else:
ct2 += 1
if (type(file_map.get(fname2, None)) is NoneType):
ct += 1
img_pil = Image.open(pic_dir + '/' + fname2)
img_tensor = preprocess(img_pil)
img_tensor.unsqueeze_(0)
file_map[fname2] = img_tensor
else:
ct2 += 1
print(' loaded: ' + str(ct) + ' already loaded: ' + str(ct2))
Here's how the keras file-load-preproc portion looks:
from keras.preprocessing import image
from keras.applications.inception_v3 import preprocess_input
im1 = image.load_img(pic_dir + '/' + fname1, target_size=input_dim)
x = image.img_to_array(im1)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)[0]
file_map[fname1] = x