Why are my predictions bad, if my accuracy in train is roughly 100% (Keras CNN)

Question

In my CNN i have to handle 2 classes in a binary system, I have 700 images each class to train, and others to validation. This is my train.py:

#import tensorflow as tf
import cv2
import os
import numpy as np
from keras.layers.core import Flatten, Dense, Dropout, Reshape
from keras.models import Model
from keras.layers import Input, ZeroPadding2D, Dropout
from keras import optimizers
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping
from keras.applications.vgg16 import VGG16
TRAIN_DIR = 'train/'
TEST_DIR = 'test/'
v = 'v/'
BATCH_SIZE = 32
NUM_EPOCHS = 5
def crop_img(img, h, w):
    h_margin = (img.shape[0] - h) // 2 if img.shape[0] > h else 0
    w_margin = (img.shape[1] - w) // 2 if img.shape[1] > w else 0
crop_img = img[h_margin:h + h_margin,w_margin:w + w_margin,:]

return crop_img


def subtract_gaussian_blur(img):
return cv2.addWeighted(img, 4, cv2.GaussianBlur(img, (0, 0), 5), -4, 128)


def ReadImages(Path):
    LabelList = list()
    ImageCV = list()
    classes = ["nonPdr", "pdr"]
FolderList = [f for f in os.listdir(Path) if not f.startswith('.')]

for File in FolderList:
    for index, Image in enumerate(os.listdir(os.path.join(Path, File))):

        ImageCV.append(cv2.resize(cv2.imread(os.path.join(Path, File) + os.path.sep + Image), (224,224)))

        LabelList.append(classes.index(os.path.splitext(File)[0])) 

        img_crop = crop_img(ImageCV[index].copy(), 224, 224)

        ImageCV[index] = subtract_gaussian_blur(img_crop.copy())

return ImageCV, LabelList


data, labels = ReadImages(TRAIN_DIR)
valid, vlabels = ReadImages(TEST_DIR)
vgg16_model = VGG16(weights="imagenet", include_top=True)
base_model = Model(input=vgg16_model.input, 
                   output=vgg16_model.get_layer("block5_pool").output)
base_out = base_model.output
base_out = Reshape((25088,))(base_out)
top_fc1 = Dense(4096, activation="relu")(base_out)
top_fc1 = Dropout(0.5)(base_out)
top_fc1 = Dense(4096, activation="relu")(base_out)
top_fc1 = Dropout(0.5)(base_out)
top_fc1 = Dense(64, activation="relu")(base_out)
top_fc1 = Dropout(0.5)(base_out)
top_preds = Dense(1, activation="sigmoid")(top_fc1)
for layer in base_model.layers[0:14]:
    layer.trainable = False
model = Model(input=base_model.input, output=top_preds)
sgd = SGD(lr=1e-4, momentum=0.9)
model.compile(optimizer=sgd, loss="binary_crossentropy", metrics=["accuracy"])
data = np.asarray(data)
valid = np.asarray(valid)
data = data.astype('float32')
valid = valid.astype('float32')
data /= 255
valid /= 255
labels = np.array(labels)
perm = np.random.permutation(len(data))
data = data[perm]
labels = labels[perm]
datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)
datagen.fit(data) 
mean = datagen.mean #This result I put manually in predict.py

std = datagen.std #This result I put manually in predict.py
print(mean, "mean")
print(std, "std")
es = EarlyStopping(monitor='val_loss', verbose=1)
model.fit_generator(datagen.flow(data, np.array(labels), batch_size=32), 
                    steps_per_epoch=len(data) / 32, epochs=15,
                    validation_data=(valid, np.array(vlabels)),
                    nb_val_samples=72, callbacks=[es])
model.save('model.h5')

And after Run this code, it return a strange result of roughly 100% of accuracy after 5 or 6 epochs. So I try to run my predict.py code: (I know that I have to encapsulate some methods, but for now I just copy and paste all from train)

from keras.models import load_model
import cv2
import os
import numpy as np
TEST_DIR = 'v/0/'
pdr = 0
nonPdr = 0
model = load_model('model.h5')
def normalize(x, mean, std):
    x[..., 0] -= mean[0]
    x[..., 1] -= mean[1]
    x[..., 2] -= mean[2]
    x[..., 0] /= std[0]
    x[..., 1] /= std[1]
    x[..., 2] /= std[2]
    return x
def crop_img(img, h, w):
    h_margin = (img.shape[0] - h) // 2 if img.shape[0] > h else 0
    w_margin = (img.shape[1] - w) // 2 if img.shape[1] > w else 0
crop_img = img[h_margin:h + h_margin,w_margin:w + w_margin,:]

return crop_img


def subtract_gaussian_blur(img):
return cv2.addWeighted(img, 4, cv2.GaussianBlur(img, (0, 0), 5), -4, 128)


for filename in os.listdir(r'v/0/'):
    if filename.endswith(".jpg") or filename.endswith(".ppm") or filename.endswith(".jpeg") or filename.endswith(".png"):
        ImageCV = cv2.resize(cv2.imread(os.path.join(TEST_DIR) + filename), (224,224))
    img_crop = crop_img(ImageCV.copy(), 224, 224)

    ImageCV = subtract_gaussian_blur(img_crop.copy())

    ImageCV = np.asarray(ImageCV)

    ImageCV = ImageCV.astype('float32')

    ImageCV /= 255  

    ImageCV = np.expand_dims(ImageCV, axis=0)
    ImageCV = normalize(ImageCV, [0.23883381, 0.23883381, 0.23883381], [0.20992693, 0.25749, 0.26330808]) #Values from train

    prob = model.predict(ImageCV)
    if prob &lt;= 0.75:  #.75 = 80% | .70=79% &gt;&gt;&gt;&gt; .70 = 82% | .75 = 79%
        print(&quot;nonPDR &gt;&gt;&gt;&quot;, filename)
        nonPdr += 1
    else:
        print(&quot;PDR &gt;&gt;&gt;&quot;, filename)
        pdr += 1
    print(prob)

print("Number of retinas with PDR: ",pdr)
print("Number of retinas without PDR: ",nonPdr)

The problem is: when I try to predict, roughly all of my preds are poor (the prediction are nonPdr, or class 0, to all images). I already tried to cut off the data augmentation to test, and the result doesn't change how I want. I tried too change my model, change the preprocess (this preprocess is the best I can use for this project) and never happens.

How can I deal with this?

UPDATE

As @serali said, I tried to cut some layers to reduce the overfitting. This is my model now:

vgg16_model = VGG16(weights="imagenet", include_top=True)
#visualize layers

print("VGG16 model layers")
for i, layer in enumerate(vgg16_model.layers):
    print(i, layer.name, layer.output_shape)
(2) remove the top layer
base_model = Model(input=vgg16_model.input, 
                   output=vgg16_model.get_layer("block1_pool").output)
(3) attach a new top layer
base_out = base_model.output
top_fc1 = GlobalAveragePooling2D()(base_out)
top_fc2 = Dense(16, activation='relu')(top_fc1)
top_fc3 = Dropout(0.5)(top_fc2)
top_preds = Dense(1, activation="sigmoid")(top_fc3)
(5) create new hybrid model
model = Model(input=base_model.input, output=top_preds)

As you can see, I cut in the first convolutional block, so my model looked like this:

0 input_1 (None, 224, 224, 3)
1 block1_conv1 (None, 224, 224, 64)
2 block1_conv2 (None, 224, 224, 64)
3 block1_pool (None, 112, 112, 64)
top_fc1 = GlobalAveragePooling2D()(base_out)
top_fc2 = Dense(16, activation='relu')(top_fc1)
top_fc3 = Dropout(0.5)(top_fc2)
top_preds = Dense(1, activation="sigmoid")(top_fc3)

But, when I try to predict the same images I've trained, the prediction is wrong (with foreign images the result is the same). So, how can I improve this?

score 7 · Accepted Answer · answered Oct 26 '19 at 22:58

This phenomenon is called overfitting. In short it means that your CNN has memorized the dataset, achieving $100\%$ training accuracy. This knowledge, however, doesn't generalize well to unseen data.

I'd suggest reading this post for more details on overfitting and ways to combat it.

leon dobrzinsky · Answer 2 · 2019-10-27T14:19:21.210

When getting something like a 100% after 6 epochs, it's almost certain (in my experience at least) that something is wrong at an earlier stage than training... I would start by debugging and verifying that label extraction in ReadImages is working as expected and comparing manually at least some of the predictions. A less likely possibility is that there might be something wrong with the train/validation sets themselves....You could try checking they have enough variability for example

Why are my predictions bad, if my accuracy in train is roughly 100% (Keras CNN)

UPDATE

(2) remove the top layer

(3) attach a new top layer

(5) create new hybrid model

2 Answers2

Linked