aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Foiani <sotech117@michaels-mbp-3.devices.brown.edu>2022-05-02 17:43:39 -0400
committerMichael Foiani <sotech117@michaels-mbp-3.devices.brown.edu>2022-05-02 17:43:39 -0400
commit8fd2dc0bed674e9098e4de312f571e6ba9a70550 (patch)
tree037df062239abe47b3531f502612d4e3df64b572
parent06c92ad29268525c151dc96323e8a40b75e0d9c8 (diff)
Basic start. Implemented skeleton of loss functions.
-rw-r--r--hyperparameters.py55
-rw-r--r--losses.py129
-rw-r--r--main.py248
-rw-r--r--preprocess.py224
4 files changed, 656 insertions, 0 deletions
diff --git a/hyperparameters.py b/hyperparameters.py
new file mode 100644
index 00000000..487023f3
--- /dev/null
+++ b/hyperparameters.py
@@ -0,0 +1,55 @@
+"""
+Homework 5 - CNNs
+CS1430 - Computer Vision
+Brown University
+"""
+
+"""
+Number of epochs. If you experiment with more complex networks you
+might need to increase this. Likewise if you add regularization that
+slows training.
+"""
+num_epochs = 50
+
+"""
+A critical parameter that can dramatically affect whether training
+succeeds or fails. The value for this depends significantly on which
+optimizer is used. Refer to the default learning rate parameter
+"""
+learning_rate = 1e-4
+
+"""
+Momentum on the gradient (if you use a momentum-based optimizer)
+"""
+momentum = 0.01
+
+"""
+Resize image size for task 1. Task 3 must have an image size of 224,
+so that is hard-coded elsewhere.
+"""
+img_size = 224
+
+"""
+Sample size for calculating the mean and standard deviation of the
+training data. This many images will be randomly seleted to be read
+into memory temporarily.
+"""
+preprocess_sample_size = 400
+
+"""
+Maximum number of weight files to save to checkpoint directory. If
+set to a number <= 0, then all weight files of every epoch will be
+saved. Otherwise, only the weights with highest accuracy will be saved.
+"""
+max_num_weights = 5
+
+"""
+Defines the number of training examples per batch.
+You don't need to modify this.
+"""
+batch_size = 10
+
+"""
+The number of image scene classes. Don't change this.
+"""
+num_classes = 15
diff --git a/losses.py b/losses.py
new file mode 100644
index 00000000..93449962
--- /dev/null
+++ b/losses.py
@@ -0,0 +1,129 @@
+import tensorflow as tf
+from tensorflow.keras.layers import \
+ Conv2D, MaxPool2D, Dropout, Flatten, Dense
+
+import numpy as np
+import hyperparameters as hp
+class YourModel(tf.keras.Model):
+ """ Your own neural network model. """
+
+ def __init__(self):
+ super(YourModel, self).__init__()
+
+ self.alpha = 1
+ self.beta = 1
+
+ self.optimizer = tf.keras.optimizers.RMSprop(learning_rate=1e-4, momentum=0.01)
+
+ self.vgg16 = [
+ # Block 1
+ Conv2D(64, 3, 1, padding="same",
+ activation="relu", name="block1_conv1"),
+ Conv2D(64, 3, 1, padding="same",
+ activation="relu", name="block1_conv2"),
+ MaxPool2D(2, name="block1_pool"),
+ # Block 2
+ Conv2D(128, 3, 1, padding="same",
+ activation="relu", name="block2_conv1"),
+ Conv2D(128, 3, 1, padding="same",
+ activation="relu", name="block2_conv2"),
+ MaxPool2D(2, name="block2_pool"),
+ # Block 3
+ Conv2D(256, 3, 1, padding="same",
+ activation="relu", name="block3_conv1"),
+ Conv2D(256, 3, 1, padding="same",
+ activation="relu", name="block3_conv2"),
+ Conv2D(256, 3, 1, padding="same",
+ activation="relu", name="block3_conv3"),
+ MaxPool2D(2, name="block3_pool"),
+ # Block 4
+ Conv2D(512, 3, 1, padding="same",
+ activation="relu", name="block4_conv1"),
+ Conv2D(512, 3, 1, padding="same",
+ activation="relu", name="block4_conv2"),
+ Conv2D(512, 3, 1, padding="same",
+ activation="relu", name="block4_conv3"),
+ MaxPool2D(2, name="block4_pool"),
+ # Block 5
+ Conv2D(512, 3, 1, padding="same",
+ activation="relu", name="block5_conv1"),
+ Conv2D(512, 3, 1, padding="same",
+ activation="relu", name="block5_conv2"),
+ Conv2D(512, 3, 1, padding="same",
+ activation="relu", name="block5_conv3"),
+ MaxPool2D(2, name="block5_pool"),
+ ]
+
+ self.head = [
+ # Dropout(.2),
+ # Dense(256, activation='silu'),
+ # Dense(512, activation='silu'),
+ # Dropout(.3),
+ # tf.keras.layers.GlobalAveragePooling2D(),
+ # Dense(15, activation='softmax')
+ ]
+
+ self.vgg16 = tf.keras.Sequential(self.vgg16, name="vgg_base")
+ self.head = tf.keras.Sequential(self.head, name="vgg_head")
+
+ self.indexed_layers = [layer for layer in self.vgg16 if layer.name.contains("conv1")]
+ self.desired = [layer.name for layer in self.vgg16 if layer.name.contains("conv1")]
+
+ def forward_pass(self, x):
+ layers = []
+ for layer in self.vgg16.layers:
+ # pass the x through
+ x = layer(x)
+ print("Sotech117 is so so sus")
+
+ # save the output of each layer if it is in the desired list
+ if layer.name in self.desired:
+ layers.append(x)
+
+ return x, np.array(layers)
+
+
+ def loss_function(self, p, a, x):
+ _, photo_layers = self.forward_pass(p)
+ _, art_layers = self.forward_pass(a)
+ _, input_layers = self.forward_pass(x)
+
+
+
+ def content_loss(photo_layers, input_layers):
+ L_content = tf.reduce_mean(tf.square(photo_layers - input_layers))
+ return L_content
+
+ def layer_loss(art_layers, input_layers, layer):
+
+ #vectorize the inputs
+ art_vector = art_layers.reshape(-1, 224**2)
+ input_vector = input_layers.reshape(-1, 224**2)
+
+ # get the gram matrix
+ input_dim = input_layers.shape[0]
+ G = np.zeros((input_dim, input_dim))
+
+ for i in range(input_dim):
+ for j in range(input_dim):
+ k = np.dot(input_layers[i], art_layers[j])
+ G[i,j] = k
+
+ # get the loss per each lateral layer
+ # N depends on # of filters in the layer, M depends on hight and width of feature map
+ M_l = art_layers.shape[0] * art_layers.shape[1]
+
+ # layer.filteres might not work
+ E_l = 1/4 * (layer.filters**(-2)) * (M_l**(-2)) * np.sum(np.square(G - input_layers))
+
+ # while Sotech is botty:
+ # Jayson_tatum.tear_acl()
+ # return ("this is just another day")
+
+ def style_loss(self, art_layers, input_layers):
+ L_style = 0
+ for layer in self.indexed_layers:
+ L_style += self.layer_loss(art_layers, input_layers, layer)
+ return L_style
+
+
diff --git a/main.py b/main.py
new file mode 100644
index 00000000..ca87788d
--- /dev/null
+++ b/main.py
@@ -0,0 +1,248 @@
+import os
+import sys
+import argparse
+import re
+from datetime import datetime
+import tensorflow as tf
+
+import hyperparameters as hp
+from models import YourModel, VGGModel
+from preprocess import Datasets
+from skimage.transform import resize
+from tensorboard_utils import \
+ ImageLabelingLogger, ConfusionMatrixLogger, CustomModelSaver
+
+from skimage.io import imread
+from lime import lime_image
+from skimage.segmentation import mark_boundaries
+from matplotlib import pyplot as plt
+import numpy as np
+
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+
+
+def parse_args():
+ """ Perform command-line argument parsing. """
+
+ parser = argparse.ArgumentParser(
+ description="Let's train some neural nets!",
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+ parser.add_argument(
+ '--task',
+ required=True,
+ choices=['1', '3'],
+ help='''Which task of the assignment to run -
+ training from scratch (1), or fine tuning VGG-16 (3).''')
+ parser.add_argument(
+ '--data',
+ default='..'+os.sep+'data'+os.sep,
+ help='Location where the dataset is stored.')
+ parser.add_argument(
+ '--load-vgg',
+ default='vgg16_imagenet.h5',
+ help='''Path to pre-trained VGG-16 file (only applicable to
+ task 3).''')
+ parser.add_argument(
+ '--load-checkpoint',
+ default=None,
+ help='''Path to model checkpoint file (should end with the
+ extension .h5). Checkpoints are automatically saved when you
+ train your model. If you want to continue training from where
+ you left off, this is how you would load your weights.''')
+ parser.add_argument(
+ '--confusion',
+ action='store_true',
+ help='''Log a confusion matrix at the end of each
+ epoch (viewable in Tensorboard). This is turned off
+ by default as it takes a little bit of time to complete.''')
+ parser.add_argument(
+ '--evaluate',
+ action='store_true',
+ help='''Skips training and evaluates on the test set once.
+ You can use this to test an already trained model by loading
+ its checkpoint.''')
+ parser.add_argument(
+ '--lime-image',
+ default='test/Bedroom/image_0003.jpg',
+ help='''Name of an image in the dataset to use for LIME evaluation.''')
+
+ return parser.parse_args()
+
+
+def LIME_explainer(model, path, preprocess_fn):
+ """
+ This function takes in a trained model and a path to an image and outputs 5
+ visual explanations using the LIME model
+ """
+
+ def image_and_mask(title, positive_only=True, num_features=5,
+ hide_rest=True):
+ temp, mask = explanation.get_image_and_mask(
+ explanation.top_labels[0], positive_only=positive_only,
+ num_features=num_features, hide_rest=hide_rest)
+ plt.imshow(mark_boundaries(temp / 2 + 0.5, mask))
+ plt.title(title)
+ plt.show()
+
+ image = imread(path)
+ if len(image.shape) == 2:
+ image = np.stack([image, image, image], axis=-1)
+ image = preprocess_fn(image)
+ image = resize(image, (hp.img_size, hp.img_size, 3))
+
+ explainer = lime_image.LimeImageExplainer()
+
+ explanation = explainer.explain_instance(
+ image.astype('double'), model.predict, top_labels=5, hide_color=0,
+ num_samples=1000)
+
+ # The top 5 superpixels that are most positive towards the class with the
+ # rest of the image hidden
+ image_and_mask("Top 5 superpixels", positive_only=True, num_features=5,
+ hide_rest=True)
+
+ # The top 5 superpixels with the rest of the image present
+ image_and_mask("Top 5 with the rest of the image present",
+ positive_only=True, num_features=5, hide_rest=False)
+
+ # The 'pros and cons' (pros in green, cons in red)
+ image_and_mask("Pros(green) and Cons(red)",
+ positive_only=False, num_features=10, hide_rest=False)
+
+ # Select the same class explained on the figures above.
+ ind = explanation.top_labels[0]
+ # Map each explanation weight to the corresponding superpixel
+ dict_heatmap = dict(explanation.local_exp[ind])
+ heatmap = np.vectorize(dict_heatmap.get)(explanation.segments)
+ plt.imshow(heatmap, cmap='RdBu', vmin=-heatmap.max(), vmax=heatmap.max())
+ plt.colorbar()
+ plt.title("Map each explanation weight to the corresponding superpixel")
+ plt.show()
+
+
+def train(model, datasets, checkpoint_path, logs_path, init_epoch):
+ """ Training routine. """
+
+ # Keras callbacks for training
+ callback_list = [
+ tf.keras.callbacks.TensorBoard(
+ log_dir=logs_path,
+ update_freq='batch',
+ profile_batch=0),
+ ImageLabelingLogger(logs_path, datasets),
+ CustomModelSaver(checkpoint_path, ARGS.task, hp.max_num_weights)
+ ]
+
+ # Include confusion logger in callbacks if flag set
+ if ARGS.confusion:
+ callback_list.append(ConfusionMatrixLogger(logs_path, datasets))
+
+ # Begin training
+ model.fit(
+ x=datasets.train_data,
+ validation_data=datasets.test_data,
+ epochs=hp.num_epochs,
+ batch_size=None,
+ callbacks=callback_list,
+ initial_epoch=init_epoch,
+ )
+
+
+def test(model, test_data):
+ """ Testing routine. """
+
+ # Run model on test set
+ model.evaluate(
+ x=test_data,
+ verbose=1,
+ )
+
+
+def main():
+ """ Main function. """
+
+ time_now = datetime.now()
+ timestamp = time_now.strftime("%m%d%y-%H%M%S")
+ init_epoch = 0
+
+ # If loading from a checkpoint, the loaded checkpoint's directory
+ # will be used for future checkpoints
+ if ARGS.load_checkpoint is not None:
+ ARGS.load_checkpoint = os.path.abspath(ARGS.load_checkpoint)
+
+ # Get timestamp and epoch from filename
+ regex = r"(?:.+)(?:\.e)(\d+)(?:.+)(?:.h5)"
+ init_epoch = int(re.match(regex, ARGS.load_checkpoint).group(1)) + 1
+ timestamp = os.path.basename(os.path.dirname(ARGS.load_checkpoint))
+
+ # If paths provided by program arguments are accurate, then this will
+ # ensure they are used. If not, these directories/files will be
+ # set relative to the directory of run.py
+ if os.path.exists(ARGS.data):
+ ARGS.data = os.path.abspath(ARGS.data)
+ if os.path.exists(ARGS.load_vgg):
+ ARGS.load_vgg = os.path.abspath(ARGS.load_vgg)
+
+ # Run script from location of run.py
+ os.chdir(sys.path[0])
+
+ datasets = Datasets(ARGS.data, ARGS.task)
+
+ if ARGS.task == '1':
+ model = YourModel()
+ model(tf.keras.Input(shape=(hp.img_size, hp.img_size, 3)))
+ checkpoint_path = "checkpoints" + os.sep + \
+ "your_model" + os.sep + timestamp + os.sep
+ logs_path = "logs" + os.sep + "your_model" + \
+ os.sep + timestamp + os.sep
+
+ # Print summary of model
+ model.summary()
+ else:
+ model = VGGModel()
+ checkpoint_path = "checkpoints" + os.sep + \
+ "vgg_model" + os.sep + timestamp + os.sep
+ logs_path = "logs" + os.sep + "vgg_model" + \
+ os.sep + timestamp + os.sep
+ model(tf.keras.Input(shape=(224, 224, 3)))
+
+ # Print summaries for both parts of the model
+ model.vgg16.summary()
+ model.head.summary()
+
+ # Load base of VGG model
+ model.vgg16.load_weights(ARGS.load_vgg, by_name=True)
+
+ # Load checkpoints
+ if ARGS.load_checkpoint is not None:
+ if ARGS.task == '1':
+ model.load_weights(ARGS.load_checkpoint, by_name=False)
+ else:
+ model.head.load_weights(ARGS.load_checkpoint, by_name=False)
+
+ # Make checkpoint directory if needed
+ if not ARGS.evaluate and not os.path.exists(checkpoint_path):
+ os.makedirs(checkpoint_path)
+
+ # Compile model graph
+ model.compile(
+ optimizer=model.optimizer,
+ loss=model.loss_fn,
+ metrics=["sparse_categorical_accuracy"])
+
+ if ARGS.evaluate:
+ test(model, datasets.test_data)
+
+ # TODO: change the image path to be the image of your choice by changing
+ # the lime-image flag when calling run.py to investigate
+ # i.e. python run.py --evaluate --lime-image test/Bedroom/image_003.jpg
+ path = ARGS.data + os.sep + ARGS.lime_image
+ LIME_explainer(model, path, datasets.preprocess_fn)
+ else:
+ train(model, datasets, checkpoint_path, logs_path, init_epoch)
+
+
+# Make arguments global
+ARGS = parse_args()
+
+main()
diff --git a/preprocess.py b/preprocess.py
new file mode 100644
index 00000000..b7cfdb67
--- /dev/null
+++ b/preprocess.py
@@ -0,0 +1,224 @@
+"""
+Homework 5 - CNNs
+CS1430 - Computer Vision
+Brown University
+"""
+
+import os
+import random
+import numpy as np
+from PIL import Image
+import tensorflow as tf
+
+import hyperparameters as hp
+
+class Datasets():
+ """ Class for containing the training and test sets as well as
+ other useful data-related information. Contains the functions
+ for preprocessing.
+ """
+
+ def __init__(self, data_path, task):
+
+ self.data_path = data_path
+ self.task = task
+
+ # Dictionaries for (label index) <--> (class name)
+ self.idx_to_class = {}
+ self.class_to_idx = {}
+
+ # For storing list of classes
+ self.classes = [""] * hp.num_classes
+
+ # Mean and std for standardization
+ self.mean = np.zeros((3,))
+ self.std = np.zeros((3,))
+ self.calc_mean_and_std()
+
+ # Setup data generators
+ self.train_data = self.get_data(
+ os.path.join(self.data_path, "train/"), task == '3', True, True)
+ self.test_data = self.get_data(
+ os.path.join(self.data_path, "test/"), task == '3', False, False)
+
+ def calc_mean_and_std(self):
+ """ Calculate mean and standard deviation of a sample of the
+ training dataset for standardization.
+
+ Arguments: none
+
+ Returns: none
+ """
+
+ # Get list of all images in training directory
+ file_list = []
+ for root, _, files in os.walk(os.path.join(self.data_path, "train/")):
+ for name in files:
+ if name.endswith(".jpg"):
+ file_list.append(os.path.join(root, name))
+
+ # Shuffle filepaths
+ random.shuffle(file_list)
+
+ # Take sample of file paths
+ file_list = file_list[:hp.preprocess_sample_size]
+
+ # Allocate space in memory for images
+ data_sample = np.zeros(
+ (hp.preprocess_sample_size, hp.img_size, hp.img_size, 3))
+
+ # Import images
+ for i, file_path in enumerate(file_list):
+ img = Image.open(file_path)
+ img = img.resize((hp.img_size, hp.img_size))
+ img = np.array(img, dtype=np.float32)
+ img /= 255.
+
+ # Grayscale -> RGB
+ if len(img.shape) == 2:
+ img = np.stack([img, img, img], axis=-1)
+
+ data_sample[i] = img
+
+ # TODO: Calculate the pixel-wise mean and standard deviation
+ # of the images in data_sample and store them in
+ # self.mean and self.std respectively.
+ # ==========================================================
+
+ self.mean = np.mean(data_sample, axis=(1,2,3), dtype=np.float32)
+ self.std = np.std(data_sample, axis=(1,2,3), dtype=np.float32)
+ # ==========================================================
+
+ print("Dataset mean: [{0:.4f}, {1:.4f}, {2:.4f}]".format(
+ self.mean[0], self.mean[1], self.mean[2]))
+
+ print("Dataset std: [{0:.4f}, {1:.4f}, {2:.4f}]".format(
+ self.std[0], self.std[1], self.std[2]))
+
+ def standardize(self, img):
+ """ Function for applying standardization to an input image.
+
+ Arguments:
+ img - numpy array of shape (image size, image size, 3)
+
+ Returns:
+ img - numpy array of shape (image size, image size, 3)
+ """
+
+ # TODO: Standardize the input image. Use self.mean and self.std
+ # that were calculated in calc_mean_and_std() to perform
+ # the standardization.
+ # =============================================================
+ img = (img - np.max(self.mean))/np.max(self.std)
+ # =============================================================
+
+ return img
+
+ def preprocess_fn(self, img):
+ """ Preprocess function for ImageDataGenerator. """
+
+ if self.task == '3':
+ img = tf.keras.applications.vgg16.preprocess_input(img)
+ else:
+ img = img / 255.
+ img = self.standardize(img)
+ return img
+
+ def custom_preprocess_fn(self, img):
+ """ Custom preprocess function for ImageDataGenerator. """
+
+ if self.task == '3':
+ img = tf.keras.applications.vgg16.preprocess_input(img)
+ else:
+ img = img / 255.
+ img = self.standardize(img)
+
+ # EXTRA CREDIT:
+ # Write your own custom data augmentation procedure, creating
+ # an effect that cannot be achieved using the arguments of
+ # ImageDataGenerator. This can potentially boost your accuracy
+ # in the validation set. Note that this augmentation should
+ # only be applied to some input images, so make use of the
+ # 'random' module to make sure this happens. Also, make sure
+ # that ImageDataGenerator uses *this* function for preprocessing
+ # on augmented data.
+
+ if random.random() < 0.3:
+ img = img + tf.random.uniform(
+ (hp.img_size, hp.img_size, 1),
+ minval=-0.1,
+ maxval=0.1)
+
+ return img
+
+ def get_data(self, path, is_vgg, shuffle, augment):
+ """ Returns an image data generator which can be iterated
+ through for images and corresponding class labels.
+
+ Arguments:
+ path - Filepath of the data being imported, such as
+ "../data/train" or "../data/test"
+ is_vgg - Boolean value indicating whether VGG preprocessing
+ should be applied to the images.
+ shuffle - Boolean value indicating whether the data should
+ be randomly shuffled.
+ augment - Boolean value indicating whether the data should
+ be augmented or not.
+
+ Returns:
+ An iterable image-batch generator
+ """
+
+ if augment:
+ # TODO: Use the arguments of ImageDataGenerator()
+ # to augment the data. Leave the
+ # preprocessing_function argument as is unless
+ # you have written your own custom preprocessing
+ # function (see custom_preprocess_fn()).
+ #
+ # Documentation for ImageDataGenerator: https://bit.ly/2wN2EmK
+ #
+ # ============================================================
+
+ # data_gen =
+ data_gen = tf.keras.preprocessing.image.ImageDataGenerator(
+ preprocessing_function=self.preprocess_fn, rotation_range=20, width_shift_range=0.2,
+ height_shift_range=0.2, horizontal_flip=True, validation_split=0.2, fill_mode="reflect")
+
+ # ============================================================
+ else:
+ # Don't modify this
+ data_gen = tf.keras.preprocessing.image.ImageDataGenerator(
+ preprocessing_function=self.preprocess_fn)
+
+ # VGG must take images of size 224x224
+ img_size = 224 if is_vgg else hp.img_size
+
+ classes_for_flow = None
+
+ # Make sure all data generators are aligned in label indices
+ if bool(self.idx_to_class):
+ classes_for_flow = self.classes
+
+ # Form image data generator from directory structure
+ data_gen = data_gen.flow_from_directory(
+ path,
+ target_size=(img_size, img_size),
+ class_mode='sparse',
+ batch_size=hp.batch_size,
+ shuffle=shuffle,
+ classes=classes_for_flow)
+
+ # Setup the dictionaries if not already done
+ if not bool(self.idx_to_class):
+ unordered_classes = []
+ for dir_name in os.listdir(path):
+ if os.path.isdir(os.path.join(path, dir_name)):
+ unordered_classes.append(dir_name)
+
+ for img_class in unordered_classes:
+ self.idx_to_class[data_gen.class_indices[img_class]] = img_class
+ self.class_to_idx[img_class] = int(data_gen.class_indices[img_class])
+ self.classes[int(data_gen.class_indices[img_class])] = img_class
+
+ return data_gen