diff options
author | Michael Foiani <sotech117@michaels-mbp-3.devices.brown.edu> | 2022-05-02 17:43:39 -0400 |
---|---|---|
committer | Michael Foiani <sotech117@michaels-mbp-3.devices.brown.edu> | 2022-05-02 17:43:39 -0400 |
commit | 8fd2dc0bed674e9098e4de312f571e6ba9a70550 (patch) | |
tree | 037df062239abe47b3531f502612d4e3df64b572 | |
parent | 06c92ad29268525c151dc96323e8a40b75e0d9c8 (diff) |
Basic start. Implemented skeleton of loss functions.
-rw-r--r-- | hyperparameters.py | 55 | ||||
-rw-r--r-- | losses.py | 129 | ||||
-rw-r--r-- | main.py | 248 | ||||
-rw-r--r-- | preprocess.py | 224 |
4 files changed, 656 insertions, 0 deletions
diff --git a/hyperparameters.py b/hyperparameters.py new file mode 100644 index 00000000..487023f3 --- /dev/null +++ b/hyperparameters.py @@ -0,0 +1,55 @@ +""" +Homework 5 - CNNs +CS1430 - Computer Vision +Brown University +""" + +""" +Number of epochs. If you experiment with more complex networks you +might need to increase this. Likewise if you add regularization that +slows training. +""" +num_epochs = 50 + +""" +A critical parameter that can dramatically affect whether training +succeeds or fails. The value for this depends significantly on which +optimizer is used. Refer to the default learning rate parameter +""" +learning_rate = 1e-4 + +""" +Momentum on the gradient (if you use a momentum-based optimizer) +""" +momentum = 0.01 + +""" +Resize image size for task 1. Task 3 must have an image size of 224, +so that is hard-coded elsewhere. +""" +img_size = 224 + +""" +Sample size for calculating the mean and standard deviation of the +training data. This many images will be randomly seleted to be read +into memory temporarily. +""" +preprocess_sample_size = 400 + +""" +Maximum number of weight files to save to checkpoint directory. If +set to a number <= 0, then all weight files of every epoch will be +saved. Otherwise, only the weights with highest accuracy will be saved. +""" +max_num_weights = 5 + +""" +Defines the number of training examples per batch. +You don't need to modify this. +""" +batch_size = 10 + +""" +The number of image scene classes. Don't change this. +""" +num_classes = 15 diff --git a/losses.py b/losses.py new file mode 100644 index 00000000..93449962 --- /dev/null +++ b/losses.py @@ -0,0 +1,129 @@ +import tensorflow as tf +from tensorflow.keras.layers import \ + Conv2D, MaxPool2D, Dropout, Flatten, Dense + +import numpy as np +import hyperparameters as hp +class YourModel(tf.keras.Model): + """ Your own neural network model. """ + + def __init__(self): + super(YourModel, self).__init__() + + self.alpha = 1 + self.beta = 1 + + self.optimizer = tf.keras.optimizers.RMSprop(learning_rate=1e-4, momentum=0.01) + + self.vgg16 = [ + # Block 1 + Conv2D(64, 3, 1, padding="same", + activation="relu", name="block1_conv1"), + Conv2D(64, 3, 1, padding="same", + activation="relu", name="block1_conv2"), + MaxPool2D(2, name="block1_pool"), + # Block 2 + Conv2D(128, 3, 1, padding="same", + activation="relu", name="block2_conv1"), + Conv2D(128, 3, 1, padding="same", + activation="relu", name="block2_conv2"), + MaxPool2D(2, name="block2_pool"), + # Block 3 + Conv2D(256, 3, 1, padding="same", + activation="relu", name="block3_conv1"), + Conv2D(256, 3, 1, padding="same", + activation="relu", name="block3_conv2"), + Conv2D(256, 3, 1, padding="same", + activation="relu", name="block3_conv3"), + MaxPool2D(2, name="block3_pool"), + # Block 4 + Conv2D(512, 3, 1, padding="same", + activation="relu", name="block4_conv1"), + Conv2D(512, 3, 1, padding="same", + activation="relu", name="block4_conv2"), + Conv2D(512, 3, 1, padding="same", + activation="relu", name="block4_conv3"), + MaxPool2D(2, name="block4_pool"), + # Block 5 + Conv2D(512, 3, 1, padding="same", + activation="relu", name="block5_conv1"), + Conv2D(512, 3, 1, padding="same", + activation="relu", name="block5_conv2"), + Conv2D(512, 3, 1, padding="same", + activation="relu", name="block5_conv3"), + MaxPool2D(2, name="block5_pool"), + ] + + self.head = [ + # Dropout(.2), + # Dense(256, activation='silu'), + # Dense(512, activation='silu'), + # Dropout(.3), + # tf.keras.layers.GlobalAveragePooling2D(), + # Dense(15, activation='softmax') + ] + + self.vgg16 = tf.keras.Sequential(self.vgg16, name="vgg_base") + self.head = tf.keras.Sequential(self.head, name="vgg_head") + + self.indexed_layers = [layer for layer in self.vgg16 if layer.name.contains("conv1")] + self.desired = [layer.name for layer in self.vgg16 if layer.name.contains("conv1")] + + def forward_pass(self, x): + layers = [] + for layer in self.vgg16.layers: + # pass the x through + x = layer(x) + print("Sotech117 is so so sus") + + # save the output of each layer if it is in the desired list + if layer.name in self.desired: + layers.append(x) + + return x, np.array(layers) + + + def loss_function(self, p, a, x): + _, photo_layers = self.forward_pass(p) + _, art_layers = self.forward_pass(a) + _, input_layers = self.forward_pass(x) + + + + def content_loss(photo_layers, input_layers): + L_content = tf.reduce_mean(tf.square(photo_layers - input_layers)) + return L_content + + def layer_loss(art_layers, input_layers, layer): + + #vectorize the inputs + art_vector = art_layers.reshape(-1, 224**2) + input_vector = input_layers.reshape(-1, 224**2) + + # get the gram matrix + input_dim = input_layers.shape[0] + G = np.zeros((input_dim, input_dim)) + + for i in range(input_dim): + for j in range(input_dim): + k = np.dot(input_layers[i], art_layers[j]) + G[i,j] = k + + # get the loss per each lateral layer + # N depends on # of filters in the layer, M depends on hight and width of feature map + M_l = art_layers.shape[0] * art_layers.shape[1] + + # layer.filteres might not work + E_l = 1/4 * (layer.filters**(-2)) * (M_l**(-2)) * np.sum(np.square(G - input_layers)) + + # while Sotech is botty: + # Jayson_tatum.tear_acl() + # return ("this is just another day") + + def style_loss(self, art_layers, input_layers): + L_style = 0 + for layer in self.indexed_layers: + L_style += self.layer_loss(art_layers, input_layers, layer) + return L_style + + diff --git a/main.py b/main.py new file mode 100644 index 00000000..ca87788d --- /dev/null +++ b/main.py @@ -0,0 +1,248 @@ +import os +import sys +import argparse +import re +from datetime import datetime +import tensorflow as tf + +import hyperparameters as hp +from models import YourModel, VGGModel +from preprocess import Datasets +from skimage.transform import resize +from tensorboard_utils import \ + ImageLabelingLogger, ConfusionMatrixLogger, CustomModelSaver + +from skimage.io import imread +from lime import lime_image +from skimage.segmentation import mark_boundaries +from matplotlib import pyplot as plt +import numpy as np + +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' + + +def parse_args(): + """ Perform command-line argument parsing. """ + + parser = argparse.ArgumentParser( + description="Let's train some neural nets!", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--task', + required=True, + choices=['1', '3'], + help='''Which task of the assignment to run - + training from scratch (1), or fine tuning VGG-16 (3).''') + parser.add_argument( + '--data', + default='..'+os.sep+'data'+os.sep, + help='Location where the dataset is stored.') + parser.add_argument( + '--load-vgg', + default='vgg16_imagenet.h5', + help='''Path to pre-trained VGG-16 file (only applicable to + task 3).''') + parser.add_argument( + '--load-checkpoint', + default=None, + help='''Path to model checkpoint file (should end with the + extension .h5). Checkpoints are automatically saved when you + train your model. If you want to continue training from where + you left off, this is how you would load your weights.''') + parser.add_argument( + '--confusion', + action='store_true', + help='''Log a confusion matrix at the end of each + epoch (viewable in Tensorboard). This is turned off + by default as it takes a little bit of time to complete.''') + parser.add_argument( + '--evaluate', + action='store_true', + help='''Skips training and evaluates on the test set once. + You can use this to test an already trained model by loading + its checkpoint.''') + parser.add_argument( + '--lime-image', + default='test/Bedroom/image_0003.jpg', + help='''Name of an image in the dataset to use for LIME evaluation.''') + + return parser.parse_args() + + +def LIME_explainer(model, path, preprocess_fn): + """ + This function takes in a trained model and a path to an image and outputs 5 + visual explanations using the LIME model + """ + + def image_and_mask(title, positive_only=True, num_features=5, + hide_rest=True): + temp, mask = explanation.get_image_and_mask( + explanation.top_labels[0], positive_only=positive_only, + num_features=num_features, hide_rest=hide_rest) + plt.imshow(mark_boundaries(temp / 2 + 0.5, mask)) + plt.title(title) + plt.show() + + image = imread(path) + if len(image.shape) == 2: + image = np.stack([image, image, image], axis=-1) + image = preprocess_fn(image) + image = resize(image, (hp.img_size, hp.img_size, 3)) + + explainer = lime_image.LimeImageExplainer() + + explanation = explainer.explain_instance( + image.astype('double'), model.predict, top_labels=5, hide_color=0, + num_samples=1000) + + # The top 5 superpixels that are most positive towards the class with the + # rest of the image hidden + image_and_mask("Top 5 superpixels", positive_only=True, num_features=5, + hide_rest=True) + + # The top 5 superpixels with the rest of the image present + image_and_mask("Top 5 with the rest of the image present", + positive_only=True, num_features=5, hide_rest=False) + + # The 'pros and cons' (pros in green, cons in red) + image_and_mask("Pros(green) and Cons(red)", + positive_only=False, num_features=10, hide_rest=False) + + # Select the same class explained on the figures above. + ind = explanation.top_labels[0] + # Map each explanation weight to the corresponding superpixel + dict_heatmap = dict(explanation.local_exp[ind]) + heatmap = np.vectorize(dict_heatmap.get)(explanation.segments) + plt.imshow(heatmap, cmap='RdBu', vmin=-heatmap.max(), vmax=heatmap.max()) + plt.colorbar() + plt.title("Map each explanation weight to the corresponding superpixel") + plt.show() + + +def train(model, datasets, checkpoint_path, logs_path, init_epoch): + """ Training routine. """ + + # Keras callbacks for training + callback_list = [ + tf.keras.callbacks.TensorBoard( + log_dir=logs_path, + update_freq='batch', + profile_batch=0), + ImageLabelingLogger(logs_path, datasets), + CustomModelSaver(checkpoint_path, ARGS.task, hp.max_num_weights) + ] + + # Include confusion logger in callbacks if flag set + if ARGS.confusion: + callback_list.append(ConfusionMatrixLogger(logs_path, datasets)) + + # Begin training + model.fit( + x=datasets.train_data, + validation_data=datasets.test_data, + epochs=hp.num_epochs, + batch_size=None, + callbacks=callback_list, + initial_epoch=init_epoch, + ) + + +def test(model, test_data): + """ Testing routine. """ + + # Run model on test set + model.evaluate( + x=test_data, + verbose=1, + ) + + +def main(): + """ Main function. """ + + time_now = datetime.now() + timestamp = time_now.strftime("%m%d%y-%H%M%S") + init_epoch = 0 + + # If loading from a checkpoint, the loaded checkpoint's directory + # will be used for future checkpoints + if ARGS.load_checkpoint is not None: + ARGS.load_checkpoint = os.path.abspath(ARGS.load_checkpoint) + + # Get timestamp and epoch from filename + regex = r"(?:.+)(?:\.e)(\d+)(?:.+)(?:.h5)" + init_epoch = int(re.match(regex, ARGS.load_checkpoint).group(1)) + 1 + timestamp = os.path.basename(os.path.dirname(ARGS.load_checkpoint)) + + # If paths provided by program arguments are accurate, then this will + # ensure they are used. If not, these directories/files will be + # set relative to the directory of run.py + if os.path.exists(ARGS.data): + ARGS.data = os.path.abspath(ARGS.data) + if os.path.exists(ARGS.load_vgg): + ARGS.load_vgg = os.path.abspath(ARGS.load_vgg) + + # Run script from location of run.py + os.chdir(sys.path[0]) + + datasets = Datasets(ARGS.data, ARGS.task) + + if ARGS.task == '1': + model = YourModel() + model(tf.keras.Input(shape=(hp.img_size, hp.img_size, 3))) + checkpoint_path = "checkpoints" + os.sep + \ + "your_model" + os.sep + timestamp + os.sep + logs_path = "logs" + os.sep + "your_model" + \ + os.sep + timestamp + os.sep + + # Print summary of model + model.summary() + else: + model = VGGModel() + checkpoint_path = "checkpoints" + os.sep + \ + "vgg_model" + os.sep + timestamp + os.sep + logs_path = "logs" + os.sep + "vgg_model" + \ + os.sep + timestamp + os.sep + model(tf.keras.Input(shape=(224, 224, 3))) + + # Print summaries for both parts of the model + model.vgg16.summary() + model.head.summary() + + # Load base of VGG model + model.vgg16.load_weights(ARGS.load_vgg, by_name=True) + + # Load checkpoints + if ARGS.load_checkpoint is not None: + if ARGS.task == '1': + model.load_weights(ARGS.load_checkpoint, by_name=False) + else: + model.head.load_weights(ARGS.load_checkpoint, by_name=False) + + # Make checkpoint directory if needed + if not ARGS.evaluate and not os.path.exists(checkpoint_path): + os.makedirs(checkpoint_path) + + # Compile model graph + model.compile( + optimizer=model.optimizer, + loss=model.loss_fn, + metrics=["sparse_categorical_accuracy"]) + + if ARGS.evaluate: + test(model, datasets.test_data) + + # TODO: change the image path to be the image of your choice by changing + # the lime-image flag when calling run.py to investigate + # i.e. python run.py --evaluate --lime-image test/Bedroom/image_003.jpg + path = ARGS.data + os.sep + ARGS.lime_image + LIME_explainer(model, path, datasets.preprocess_fn) + else: + train(model, datasets, checkpoint_path, logs_path, init_epoch) + + +# Make arguments global +ARGS = parse_args() + +main() diff --git a/preprocess.py b/preprocess.py new file mode 100644 index 00000000..b7cfdb67 --- /dev/null +++ b/preprocess.py @@ -0,0 +1,224 @@ +""" +Homework 5 - CNNs +CS1430 - Computer Vision +Brown University +""" + +import os +import random +import numpy as np +from PIL import Image +import tensorflow as tf + +import hyperparameters as hp + +class Datasets(): + """ Class for containing the training and test sets as well as + other useful data-related information. Contains the functions + for preprocessing. + """ + + def __init__(self, data_path, task): + + self.data_path = data_path + self.task = task + + # Dictionaries for (label index) <--> (class name) + self.idx_to_class = {} + self.class_to_idx = {} + + # For storing list of classes + self.classes = [""] * hp.num_classes + + # Mean and std for standardization + self.mean = np.zeros((3,)) + self.std = np.zeros((3,)) + self.calc_mean_and_std() + + # Setup data generators + self.train_data = self.get_data( + os.path.join(self.data_path, "train/"), task == '3', True, True) + self.test_data = self.get_data( + os.path.join(self.data_path, "test/"), task == '3', False, False) + + def calc_mean_and_std(self): + """ Calculate mean and standard deviation of a sample of the + training dataset for standardization. + + Arguments: none + + Returns: none + """ + + # Get list of all images in training directory + file_list = [] + for root, _, files in os.walk(os.path.join(self.data_path, "train/")): + for name in files: + if name.endswith(".jpg"): + file_list.append(os.path.join(root, name)) + + # Shuffle filepaths + random.shuffle(file_list) + + # Take sample of file paths + file_list = file_list[:hp.preprocess_sample_size] + + # Allocate space in memory for images + data_sample = np.zeros( + (hp.preprocess_sample_size, hp.img_size, hp.img_size, 3)) + + # Import images + for i, file_path in enumerate(file_list): + img = Image.open(file_path) + img = img.resize((hp.img_size, hp.img_size)) + img = np.array(img, dtype=np.float32) + img /= 255. + + # Grayscale -> RGB + if len(img.shape) == 2: + img = np.stack([img, img, img], axis=-1) + + data_sample[i] = img + + # TODO: Calculate the pixel-wise mean and standard deviation + # of the images in data_sample and store them in + # self.mean and self.std respectively. + # ========================================================== + + self.mean = np.mean(data_sample, axis=(1,2,3), dtype=np.float32) + self.std = np.std(data_sample, axis=(1,2,3), dtype=np.float32) + # ========================================================== + + print("Dataset mean: [{0:.4f}, {1:.4f}, {2:.4f}]".format( + self.mean[0], self.mean[1], self.mean[2])) + + print("Dataset std: [{0:.4f}, {1:.4f}, {2:.4f}]".format( + self.std[0], self.std[1], self.std[2])) + + def standardize(self, img): + """ Function for applying standardization to an input image. + + Arguments: + img - numpy array of shape (image size, image size, 3) + + Returns: + img - numpy array of shape (image size, image size, 3) + """ + + # TODO: Standardize the input image. Use self.mean and self.std + # that were calculated in calc_mean_and_std() to perform + # the standardization. + # ============================================================= + img = (img - np.max(self.mean))/np.max(self.std) + # ============================================================= + + return img + + def preprocess_fn(self, img): + """ Preprocess function for ImageDataGenerator. """ + + if self.task == '3': + img = tf.keras.applications.vgg16.preprocess_input(img) + else: + img = img / 255. + img = self.standardize(img) + return img + + def custom_preprocess_fn(self, img): + """ Custom preprocess function for ImageDataGenerator. """ + + if self.task == '3': + img = tf.keras.applications.vgg16.preprocess_input(img) + else: + img = img / 255. + img = self.standardize(img) + + # EXTRA CREDIT: + # Write your own custom data augmentation procedure, creating + # an effect that cannot be achieved using the arguments of + # ImageDataGenerator. This can potentially boost your accuracy + # in the validation set. Note that this augmentation should + # only be applied to some input images, so make use of the + # 'random' module to make sure this happens. Also, make sure + # that ImageDataGenerator uses *this* function for preprocessing + # on augmented data. + + if random.random() < 0.3: + img = img + tf.random.uniform( + (hp.img_size, hp.img_size, 1), + minval=-0.1, + maxval=0.1) + + return img + + def get_data(self, path, is_vgg, shuffle, augment): + """ Returns an image data generator which can be iterated + through for images and corresponding class labels. + + Arguments: + path - Filepath of the data being imported, such as + "../data/train" or "../data/test" + is_vgg - Boolean value indicating whether VGG preprocessing + should be applied to the images. + shuffle - Boolean value indicating whether the data should + be randomly shuffled. + augment - Boolean value indicating whether the data should + be augmented or not. + + Returns: + An iterable image-batch generator + """ + + if augment: + # TODO: Use the arguments of ImageDataGenerator() + # to augment the data. Leave the + # preprocessing_function argument as is unless + # you have written your own custom preprocessing + # function (see custom_preprocess_fn()). + # + # Documentation for ImageDataGenerator: https://bit.ly/2wN2EmK + # + # ============================================================ + + # data_gen = + data_gen = tf.keras.preprocessing.image.ImageDataGenerator( + preprocessing_function=self.preprocess_fn, rotation_range=20, width_shift_range=0.2, + height_shift_range=0.2, horizontal_flip=True, validation_split=0.2, fill_mode="reflect") + + # ============================================================ + else: + # Don't modify this + data_gen = tf.keras.preprocessing.image.ImageDataGenerator( + preprocessing_function=self.preprocess_fn) + + # VGG must take images of size 224x224 + img_size = 224 if is_vgg else hp.img_size + + classes_for_flow = None + + # Make sure all data generators are aligned in label indices + if bool(self.idx_to_class): + classes_for_flow = self.classes + + # Form image data generator from directory structure + data_gen = data_gen.flow_from_directory( + path, + target_size=(img_size, img_size), + class_mode='sparse', + batch_size=hp.batch_size, + shuffle=shuffle, + classes=classes_for_flow) + + # Setup the dictionaries if not already done + if not bool(self.idx_to_class): + unordered_classes = [] + for dir_name in os.listdir(path): + if os.path.isdir(os.path.join(path, dir_name)): + unordered_classes.append(dir_name) + + for img_class in unordered_classes: + self.idx_to_class[data_gen.class_indices[img_class]] = img_class + self.class_to_idx[img_class] = int(data_gen.class_indices[img_class]) + self.classes[int(data_gen.class_indices[img_class])] = img_class + + return data_gen |