additional

682cd4ec · banglv · d7b11a94 · 682cd4ec · 682cd4ec
Commit 682cd4ec authored Dec 24, 2019 by banglv
Hide whitespace changes
Inline Side-by-side

Showing with 482 additions and 0 deletions

backbones.py backbones.py +159 -0

dual_network.py dual_network.py +323 -0

No files found.
--- a/backbones.py
+++ b/backbones.py
+from keras import Input, Model  # , Model, optimizers
+# from keras.layers import Dense, BatchNormalization, GlobalAveragePooling2D
+# import numpy as np
+# import keras.backend as K
+import keras
+import keras_applications
+from keras_applications.resnet import preprocess_input
+# from MonoNetworkTraining.metrics import ArcFace
+# from MonoNetworkTraining._VTnet_data_loader import VTnetDataLoader
+# import os
+# import warnings
+# import matplotlib.pyplot as plt
+# from keras.layers import Conv2D, MaxPooling2D, PReLU, Layer, AveragePooling2D, Flatten, Dropout
+
+def base_model_create(input_tensor, mode='r101'):
+    # input_tensor = Input(shape=(224, 224, 3))
+    if mode == 'r50':
+        base_model = keras_applications.resnet.ResNet50(include_top=False,
+                                                           weights='imagenet',
+                                                           backend=keras.backend,
+                                                           layers=keras.layers,
+                                                           models=keras.models,
+                                                           utils=keras.utils,
+                                                           input_tensor=input_tensor,
+                                                        input_shape=None, pooling=None, classes=1000)
+        return base_model
+    elif mode == 'r101':
+        base_model = keras_applications.resnet.ResNet101(include_top=False,
+                                                       weights='imagenet',
+                                                       backend=keras.backend,
+                                                       layers=keras.layers,
+                                                       models=keras.models,
+                                                       utils=keras.utils,
+                                                       input_tensor=input_tensor, input_shape=None, pooling=None, classes=1000)
+        return base_model
+    elif mode == 'r152':
+        base_model = keras_applications.resnet.ResNet152(include_top=False, weights='imagenet',
+                                                        backend=keras.backend,
+                                                        layers=keras.layers,
+                                                        models=keras.models,
+                                                        utils=keras.utils,
+                                                        input_tensor=input_tensor, input_shape=None, pooling=None, classes=1000)
+        return base_model
+    elif mode == 'r50v2':
+        base_model = keras_applications.resnet_v2.ResNet50V2(include_top=False, weights='imagenet',
+                                                backend=keras.backend,
+                                                layers=keras.layers,
+                                                models=keras.models,
+                                                utils=keras.utils,
+                                                input_tensor=input_tensor, input_shape=None, pooling=None, classes=1000)
+        return base_model
+    elif mode == 'r101v2':
+        base_model = keras_applications.resnet_v2.ResNet101V2(include_top=False, weights='imagenet',
+                                                 backend=keras.backend,
+                                                 layers=keras.layers,
+                                                 models=keras.models,
+                                                 utils=keras.utils,
+                                                 input_tensor=input_tensor,
+                                                 input_shape=None, pooling=None, classes=1000)
+        return base_model
+    elif mode == 'r152v2':
+        base_model = keras_applications.resnet_v2.ResNet152V2(include_top=False, weights='imagenet',
+                                                 backend=keras.backend,
+                                                 layers=keras.layers,
+                                                 models=keras.models,
+                                                 utils=keras.utils,
+                                                 input_tensor=input_tensor, input_shape=None, pooling=None, classes=1000)
+        return base_model
+    elif mode == 'irv2':
+        base_model = keras_applications.inception_resnet_v2.InceptionResNetV2(include_top=False, weights='imagenet',
+                                                                 backend=keras.backend,
+                                                                 layers=keras.layers,
+                                                                 models=keras.models,
+                                                                 utils=keras.utils,
+                                                                 input_tensor=input_tensor, input_shape=None, pooling=None, classes=1000)
+        return base_model
+    elif mode == 'mb':
+        base_model = keras_applications.mobilenet.MobileNet(input_shape=None,
+                                                            alpha=1.0, depth_multiplier=1, dropout=1e-3,
+                                                            include_top=False, weights='imagenet',
+                                                           backend=keras.backend,
+                                                           layers=keras.layers,
+                                                           models=keras.models,
+                                                           utils=keras.utils,
+                                                           input_tensor=input_tensor, pooling=None, classes=1000)
+        return base_model
+    elif mode == 'mbv2':
+        base_model = keras_applications.mobilenet_v2.MobileNetV2(input_shape=None,
+                                                                 alpha=1.0, include_top=False, weights='imagenet',
+                                                                   backend=keras.backend,
+                                                                   layers=keras.layers,
+                                                                   models=keras.models,
+                                                                   utils=keras.utils,
+                                                                    input_tensor=input_tensor, pooling=None, classes=1000)
+        return base_model
+    elif mode == 'd121':
+        base_model = keras_applications.densenet.DenseNet121(include_top=False, weights='imagenet',
+                                                backend=keras.backend,
+                                                layers=keras.layers,
+                                                models=keras.models,
+                                                utils=keras.utils,
+                                                input_tensor=input_tensor, input_shape=None, pooling=None, classes=1000)
+        return base_model
+    elif mode == 'd169':
+        base_model = keras_applications.densenet.DenseNet169(include_top=False, weights='imagenet',
+                                                backend=keras.backend,
+                                                layers=keras.layers,
+                                                models=keras.models,
+                                                utils=keras.utils,
+                                                input_tensor=input_tensor, input_shape=None, pooling=None, classes=1000)
+        return base_model
+    elif mode == 'd201':
+        base_model = keras_applications.densenet.DenseNet201(include_top=False, weights='imagenet',
+                                                backend=keras.backend,
+                                                layers=keras.layers,
+                                                models=keras.models,
+                                                utils=keras.utils,
+                                                input_tensor=input_tensor, input_shape=None, pooling=None, classes=1000)
+        return base_model
+    elif mode == 'nn':
+        base_model = keras_applications.nasnet.NASNetLarge(input_shape=None, include_top=False, weights='imagenet',
+                                              backend=keras.backend,
+                                              layers=keras.layers,
+                                              models=keras.models,
+                                              utils=keras.utils,
+                                              input_tensor=input_tensor, pooling=None, classes=1000)
+        return base_model
+    elif mode == 'nm':
+        base_model = keras_applications.nasnet.NASNetMobile(input_shape=None, include_top=False, weights='imagenet',
+                                               backend=keras.backend,
+                                               layers=keras.layers,
+                                               models=keras.models,
+                                               utils=keras.utils,
+                                               input_tensor=input_tensor, pooling=None, classes=1000)
+        return base_model
+    elif mode == 'v16':
+        base_model = keras_applications.vgg16.VGG16(include_top=False, weights='imagenet',
+                                                    input_tensor=input_tensor,
+                                                    input_shape=None, pooling=None, classes=1000)
+        return base_model
+    elif mode == 'v19':
+        base_model = keras_applications.vgg19.VGG19(include_top=False,
+                                                    weights='imagenet',
+                                                    input_tensor=input_tensor,
+                                                    input_shape=None, pooling=None, classes=1000)
+        return base_model
+
+    elif mode == 'inceptionV3':
+        base_model = keras_applications.inception_v3.InceptionV3(include_top=False,
+                weights='imagenet',
+                input_tensor=input_tensor,
+                 backend=keras.backend,
+                 layers=keras.layers,
+                 models=keras.models,
+                 utils=keras.utils,
+                input_shape=None,
+                pooling=None,
+                classes=1000)
+        return base_model
\ No newline at end of file
--- a/dual_network.py
+++ b/dual_network.py
+import os
+
+from keras.layers import Lambda, GlobalAveragePooling2D, Flatten
+import keras.backend as K
+import tensorflow as tf
+import numpy as np
+from keras.optimizers import SGD
+from keras.regularizers import l2
+
+from DualTraining.data_loader import FaceDataLoader
+# from modified_sgd import Modified_SGD
+from keras.layers import Input
+from keras import initializers
+from keras.models import Model
+from keras.layers import Dropout, Dense, BatchNormalization
+
+from DualTraining.modified_sgd import Modified_SGD
+from FaceRecognitionVHT.options import get_dual_network_args
+from examples.MonoNetworkTraining.backbones import base_model_create
+from sklearn.metrics import roc_auc_score
+
+opts = get_dual_network_args()
+
+def initialize_bias(shape, name=None):
+    """
+        The paper, http://www.cs.utoronto.ca/~gkoch/files/msc-thesis.pdf
+        suggests to initialize CNN layer bias with mean as 0.5 and standard deviation of 0.01
+    """
+    return np.random.normal(loc = 0.5, scale = 1e-2, size = shape)
+
+def initialize_weights(shape, name=None):
+    """
+        The paper, http://www.cs.utoronto.ca/~gkoch/files/msc-thesis.pdf
+        suggests to initialize CNN layer weights with mean as 0.0 and standard deviation of 0.01
+    """
+    return np.random.normal(loc = 0.0, scale = 1e-2, size = shape)
+
+def auroc(y_true, y_pred):
+    return tf.py_func(roc_auc_score, (y_true, y_pred), tf.double)
+
+def euclidean_distance(vects):
+    x, y = vects
+    return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))
+
+def contrastive_loss(y_true, y_pred):
+    '''Contrastive loss from Hadsell-et-al.'06
+    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
+    '''
+    margin = 1
+    return K.mean(y_true * K.square(y_pred) +
+                  (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))
+
+def focal_loss(gamma=2., alpha=.25):
+    def focal_loss_fixed(y_true, y_pred):
+        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
+        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
+        return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(K.epsilon()+pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0 + K.epsilon()))
+    return focal_loss_fixed
+
+def cosine_distance(vests):
+    x, y = vests
+    x = K.l2_normalize(x, axis=-1)
+    y = K.l2_normalize(y, axis=-1)
+    return -K.mean(x * y, axis=-1, keepdims=True)
+
+def cos_dist_output_shape(shapes):
+    shape1, shape2 = shapes
+    return (shape1[0], 1)
+
+class DualNetwork:
+    """Class that constructs the Siamese Net for training
+
+    This Class was constructed to create the siamese net and train it.
+
+    Attributes:
+        input_shape: image size
+        model: current siamese model
+        learning_rate: SGD learning rate
+        omniglot_loader: instance of OmniglotLoader
+        summary_writer: tensorflow writer to store the logs
+    """
+
+    def __init__(self, dataset_path, save_path, learning_rate, batch_size, use_augmentation):
+        """Inits SiameseNetwork with the provided values for the attributes.
+
+        It also constructs the siamese network architecture, creates a dataset
+        loader and opens the log file.
+
+        Arguments:
+            dataset_path: path of Omniglot dataset
+            learning_rate: SGD learning rate
+            batch_size: size of the batch to be used in training
+            use_augmentation: boolean that allows us to select if data augmentation
+                is used or not
+            learning_rate_multipliers: learning-rate multipliers (relative to the learning_rate
+                chosen) that will be applied to each fo the conv and dense layers
+                for example:
+                    # Setting the Learning rate multipliers
+                    LR_mult_dict = {}
+                    LR_mult_dict['conv1']=1
+                    LR_mult_dict['conv2']=1
+                    LR_mult_dict['dense1']=2
+                    LR_mult_dict['dense2']=2
+            l2_regularization_penalization: l2 penalization for each layer.
+                for example:
+                    # Setting the Learning rate multipliers
+                    L2_dictionary = {}
+                    L2_dictionary['conv1']=0.1
+                    L2_dictionary['conv2']=0.001
+                    L2_dictionary['dense1']=0.001
+                    L2_dictionary['dense2']=0.01
+            tensorboard_log_path: path to store the logs
+        """
+        self.based_path = save_path
+        if os.path.exists(self.based_path) == False:
+            os.makedirs(self.based_path)
+
+        self.model_name = opts.model_name
+        self.img_width, self.img_height = 112, 112
+        self.input_shape = (self.img_height, self.img_width, 3)
+        self.input_tensor = Input(shape=self.input_shape)
+        self.threshold = 0.5
+        self.learning_rate = learning_rate
+        self.data_loader = FaceDataLoader(dataset_path=dataset_path,
+                                          use_augmentation=use_augmentation,
+                                          batch_size=batch_size)
+        # self.summary_writer = tf.summary.FileWriter(tensorboard_log_path)
+        self._construct_dual_architecture()
+
+    def create_base_network(self, backbone='inceptionV3'):
+        base_model = base_model_create(self.input_tensor, mode=backbone)
+
+        # for layer in base_model.layers[:-80]:
+        #     layer.trainable = False
+        #
+        # for layer in base_model.layers[-80:]:
+        #     layer.trainable = True
+
+        layer_dict = dict([(layer.name, layer) for layer in base_model.layers])
+
+        x = base_model.layers[-3].output
+
+        x = BatchNormalization()(x)
+
+        x = Dropout(0.2)(x)
+
+        x = Flatten()(x)
+
+        x = Dense(512, activation='relu', name='dense512')(x)
+
+        model = Model(inputs=base_model.input, outputs=x)
+
+        print('--------------------n_layers of model %d-----------------------' % len(model.layers))
+
+        model.summary()
+
+        return model
+
+    def _dual_base_line(self):
+        """ Constructs the siamese architecture and stores it in the class
+
+                Arguments:
+                    learning_rate_multipliers
+                """
+        convolutional_net = self.create_base_network()
+
+        # Now the pairs of images
+        input_image_1 = Input(self.input_shape)
+        input_image_2 = Input(self.input_shape)
+
+        encoded_image_1 = convolutional_net(input_image_1)  # 512D
+        encoded_image_2 = convolutional_net(input_image_2)
+
+        # L1 distance layer between the two encoded outputs
+        # One could use Subtract from Keras, but we want the absolute value
+
+        l1_distance_layer = Lambda(lambda tensors: K.abs(tensors[0] - tensors[1]))
+
+        l1_distance = l1_distance_layer([encoded_image_1, encoded_image_2])
+
+        cosine_distance_layer = Lambda(cosine_distance, output_shape=cos_dist_output_shape)([encoded_image_1, encoded_image_2])
+
+        # Same class or not prediction
+        prediction = Dense(units=1, activation='sigmoid')(l1_distance)
+
+        model = Model(inputs=[input_image_1, input_image_2], outputs=prediction)
+
+        optimizer = Modified_SGD(lr=self.learning_rate,
+                                 momentum=0.95)
+
+        model.compile(loss='binary_crossentropy',
+                      metrics=['binary_accuracy'],
+                      optimizer=optimizer)
+        #
+        # model.compile(loss=[focal_loss(alpha=.15, gamma=1.1)], metrics=['accuracy'], optimizer=optimizer)
+
+        return model
+
+    def _construct_dual_architecture(self):
+        """ Constructs the siamese architecture and stores it in the class
+
+        Arguments:
+            learning_rate_multipliers
+        """
+        self.model = self._dual_base_line()
+
+    def train_dual_network(self, number_of_iterations, support_set_size,
+                          final_momentum, momentum_slope, evaluate_each,
+                          model_name):
+        """ Train the Siamese net
+
+        This is the main function for training the siamese net.
+        In each every evaluate_each train iterations we evaluate one-shot tasks in
+        validation and evaluation set. We also write to the log file.
+
+        Arguments:
+            number_of_iterations: maximum number of iterations to train.
+            support_set_size: number of characters to use in the support set
+                in one-shot tasks.
+            final_momentum: mu_j in the paper. Each layer starts at 0.5 momentum
+                but evolves linearly to mu_j
+            momentum_slope: slope of the momentum evolution. In the paper we are
+                only told that this momentum evolves linearly. Because of that I
+                defined a slope to be passed to the training.
+            evaluate each: number of iterations defined to evaluate the one-shot
+                tasks.
+            model_name: save_name of the model
+
+        Returns:
+            Evaluation Accuracy
+        """
+        self.based_path = opts.path_to_save
+
+        if not os.path.exists(self.based_path):
+            os.makedirs(self.based_path)
+
+        # First of all let's divide randomly the 30 train alphabets in train
+        # and validation with 24 for training and 6 for validation
+        self.data_loader.split_train_datasets()
+
+        # Variables that will store 100 iterations losses and accuracies
+        # after evaluate_each iterations these will be passed to tensorboard logs
+        train_losses = np.zeros(shape=(evaluate_each))
+        train_accuracies = np.zeros(shape=(evaluate_each))
+        count = 0
+        earrly_stop = 0
+        # Stop criteria variables
+        best_validation_accuracy = 0.0
+        best_accuracy_iteration = 0
+        validation_accuracy = 0.0
+
+        # Train loop
+        for iteration in range(number_of_iterations):
+
+            # train set
+            images, labels = self.data_loader.get_train_batch()
+            train_loss, train_accuracy = self.model.train_on_batch(images, labels)
+
+            # Decay learning rate 1 % per 500 iterations (in the paper the decay is
+            # 1% per epoch). Also update linearly the momentum (starting from 0.5 to 1)
+            if (iteration + 1) % 500 == 0:
+                K.set_value(self.model.optimizer.lr, K.get_value(self.model.optimizer.lr) * 0.99)
+
+            if K.get_value(self.model.optimizer.momentum) < final_momentum:
+                K.set_value(self.model.optimizer.momentum, K.get_value(self.model.optimizer.momentum) + momentum_slope)
+
+            train_losses[count] = train_loss
+            train_accuracies[count] = train_accuracy
+
+            # validation set
+            count += 1
+            print('Iteration %d/%d: Train loss: %f, Train Accuracy: %f, lr = %f' %
+                  (iteration + 1, number_of_iterations, train_loss, train_accuracy,
+                   K.get_value(self.model.optimizer.lr)))
+
+            # Each 100 iterations perform a one_shot_task and write to tensorboard the
+            # stored losses and accuracies
+            if (iteration + 1) % evaluate_each == 0:
+                number_of_runs_per_alphabet = 10
+                # use a support set size equal to the number of character in the alphabet
+                validation_accuracy = self.data_loader.one_shot_test(self.model, support_set_size,
+                                                                         number_of_runs_per_alphabet,
+                                                                         is_validation=False)
+
+                # self._write_logs_to_tensorboard(iteration, train_losses, train_accuracies, validation_accuracy, evaluate_each)
+                count = 0
+
+                # Some hyperparameters lead to 100%, although the output is almost the same in
+                # all images.
+                if (validation_accuracy == 1.0 and train_accuracy == 0.5):
+                    print('Early Stopping: Gradient Explosion')
+                    print('Validation Accuracy = ' +
+                          str(best_validation_accuracy))
+                    return 0
+                elif train_accuracy == 0.0:
+                    return 0
+                else:
+                    # Save the model
+                    if validation_accuracy > best_validation_accuracy:
+                        best_validation_accuracy = validation_accuracy
+                        best_accuracy_iteration = iteration
+
+                        model_json = self.model.to_json()
+
+                        with open(self.based_path + '/' + model_name + '.json', "w") as json_file:
+                            json_file.write(model_json)
+
+                        self.model.save_weights(self.based_path + '/' + model_name + '.h5')
+                        print('Model saved!!!')
+
+            # # If accuracy does not improve for 10000 batches stop the training
+            # if iteration - best_accuracy_iteration > 50000:
+            #     self.model.load_weights('./models_siamese/siamese_net_vgg16_new_1.h5')
+            #     print('--------------Model reload!!!----------------')
+            limit = 100000
+            if iteration - best_accuracy_iteration > limit:
+                print('Early Stopping: validation accuracy did not increase for %d iterations' % limit)
+                print('Best Validation Accuracy = ' + str(best_validation_accuracy))
+                print('Validation Accuracy = ' + str(best_validation_accuracy))
+                break
+
+        print('Trained Ended!')
+        return best_validation_accuracy