diff options
author | Michael Foiani <sotech117@michaels-mbp-4.devices.brown.edu> | 2022-05-09 01:15:32 -0400 |
---|---|---|
committer | Michael Foiani <sotech117@michaels-mbp-4.devices.brown.edu> | 2022-05-09 01:15:32 -0400 |
commit | 13826824ec58e85557efb3a12fb0456ffb20e46d (patch) | |
tree | e656abe2f8338860710bba1224c08ba5cd8bc0fa | |
parent | a0870ac3f1f84278c5b9fe7f78f6b1af1d1f33e9 (diff) |
Comment code, add example images, and rename files
-rw-r--r-- | __pycache__/hyperparameters.cpython-38.pyc | bin | 395 -> 387 bytes | |||
-rw-r--r-- | __pycache__/losses.cpython-38.pyc | bin | 3747 -> 0 bytes | |||
-rw-r--r-- | checkpoint.npy | bin | 861968 -> 861968 bytes | |||
-rw-r--r-- | hyperparameters.py | 2 | ||||
-rw-r--r-- | losses.py | 97 | ||||
-rw-r--r-- | main.py | 13 | ||||
-rw-r--r-- | model.py | 138 | ||||
-rw-r--r-- | save.jpg | bin | 111990 -> 15825 bytes | |||
-rw-r--r-- | save_images/epoch0.jpg | bin | 14178 -> 14166 bytes | |||
-rw-r--r-- | save_images/epoch100.jpg | bin | 18187 -> 18124 bytes | |||
-rw-r--r-- | save_images/epoch150.jpg | bin | 0 -> 17919 bytes | |||
-rw-r--r-- | save_images/epoch200.jpg | bin | 1783 -> 17601 bytes | |||
-rw-r--r-- | save_images/epoch250.jpg | bin | 0 -> 17380 bytes | |||
-rw-r--r-- | save_images/epoch300.jpg | bin | 1783 -> 17082 bytes | |||
-rw-r--r-- | save_images/epoch350.jpg | bin | 0 -> 16927 bytes | |||
-rw-r--r-- | save_images/epoch400.jpg | bin | 1783 -> 16752 bytes | |||
-rw-r--r-- | save_images/epoch450.jpg | bin | 0 -> 16571 bytes | |||
-rw-r--r-- | save_images/epoch50.jpg | bin | 17851 -> 17801 bytes | |||
-rw-r--r-- | save_images/epoch500.jpg | bin | 0 -> 16470 bytes | |||
-rw-r--r-- | save_images/epoch550.jpg | bin | 0 -> 16398 bytes | |||
-rw-r--r-- | save_images/epoch600.jpg | bin | 0 -> 16311 bytes | |||
-rw-r--r-- | save_images/epoch650.jpg | bin | 0 -> 16222 bytes | |||
-rw-r--r-- | save_images/epoch700.jpg | bin | 0 -> 16145 bytes | |||
-rw-r--r-- | save_images/epoch750.jpg | bin | 0 -> 16091 bytes | |||
-rw-r--r-- | save_images/epoch800.jpg | bin | 0 -> 16036 bytes | |||
-rw-r--r-- | save_images/epoch850.jpg | bin | 0 -> 15982 bytes | |||
-rw-r--r-- | save_images/epoch900.jpg | bin | 0 -> 15949 bytes | |||
-rw-r--r-- | save_images/epoch950.jpg | bin | 0 -> 15909 bytes |
28 files changed, 143 insertions, 107 deletions
diff --git a/__pycache__/hyperparameters.cpython-38.pyc b/__pycache__/hyperparameters.cpython-38.pyc Binary files differindex e046deb6..2ff6d7e9 100644 --- a/__pycache__/hyperparameters.cpython-38.pyc +++ b/__pycache__/hyperparameters.cpython-38.pyc diff --git a/__pycache__/losses.cpython-38.pyc b/__pycache__/losses.cpython-38.pyc Binary files differdeleted file mode 100644 index bd55640e..00000000 --- a/__pycache__/losses.cpython-38.pyc +++ /dev/null diff --git a/checkpoint.npy b/checkpoint.npy Binary files differindex 94160b43..73b09998 100644 --- a/checkpoint.npy +++ b/checkpoint.npy diff --git a/hyperparameters.py b/hyperparameters.py index 180eaf85..0ca70764 100644 --- a/hyperparameters.py +++ b/hyperparameters.py @@ -9,7 +9,7 @@ Number of epochs. If you experiment with more complex networks you might need to increase this. Likewise if you add regularization that slows training. """ -num_epochs = 500 +num_epochs = 1000 """ A critical parameter that can dramatically affect whether training diff --git a/losses.py b/losses.py deleted file mode 100644 index c618fd38..00000000 --- a/losses.py +++ /dev/null @@ -1,97 +0,0 @@ -import tensorflow as tf -import numpy as np -from tensorflow.keras.layers import \ - Conv2D, AveragePooling2D -from skimage import transform -import hyperparameters as hp - -def get_gram(style_output): - style_shape = tf.shape(style_output) - output = tf.linalg.einsum('bijc,bijd->bcd', style_output, style_output) - dimensions = style_shape[1] * style_shape[2] - dimensions = tf.cast(dimensions, tf.float32) - return output / dimensions - - -class YourModel(tf.keras.Model): - """ Your own neural network model. """ - - def __init__(self, content_image, style_image): #normalize these images to float values - super(YourModel, self).__init__() - - self.content_image = transform.resize(content_image, tf.shape(style_image), anti_aliasing=True, preserve_range=True) - self.content_image = tf.image.convert_image_dtype(self.content_image, tf.float32) - - self.style_image = transform.resize(style_image, tf.shape(style_image), anti_aliasing=True, preserve_range=True) - self.style_image = tf.image.convert_image_dtype(self.style_image, tf.float32) - - image = tf.image.convert_image_dtype(content_image, tf.float32) - self.x = tf.Variable([image]) - - self.content_weight = hp.alpha - self.style_weight = hp.beta - - self.photo_layers = ['block5_conv2'] - self.style_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1'] - - self.num_photo_layers = len(self.photo_layers) - self.num_style_layers = len(self.style_layers) - - self.optimizer = tf.keras.optimizers.Adam(learning_rate=hp.learning_rate, beta_1=hp.beta_1, epsilon=hp.epsilon) - - self.vgg16 = tf.keras.applications.VGG16(include_top=False, weights='vgg16_imagenet.h5') - - self.vgg16.trainable = False - - # creating the Gram Matrix - p_output = self.vgg16.get_layer(self.photo_layers[0]).output - style_output = [] - for layer in self.style_layers: - style_output.append(self.vgg16.get_layer(layer).output) - - G = [get_gram(x) for x in style_output] - - self.vgg16 = tf.keras.Model([self.vgg16.input], [p_output, G]) - - # figure this out Michael - img_to_np = lambda img: np.array([img * 255]) - - self.content_target = self.vgg16(img_to_np(content_image))[0] - self.style_target = self.vgg16(img_to_np(style_image))[1] - - # create a map of the layers to their corresponding number of filters if it is a convolutional layer - - def call(self, x): - # call onto our pretrained network, since we don't have a classifcation head to follow - x = self.vgg16(x * 255) - return x - - def loss_fn(self, x): - x = self.call(x) - - - content_l = self.content_loss(x[0], self.content_target) - style_l = self.style_loss(x[1], self.style_target) - return (self.content_weight * content_l) + (self.style_weight * style_l) - - def content_loss(self, photo_layers, input_layers): - return tf.reduce_mean(tf.square(photo_layers - input_layers)) - - def style_loss(self, art_layers, input_layers): - layer_losses = [] - for created, target in zip(art_layers, input_layers): - reduced = tf.reduce_mean(tf.square(created - target)) - layer_losses.append(reduced) - return tf.add_n(layer_losses) - - - def train_step(self, epoch): - with tf.GradientTape(watch_accessed_variables=False) as tape: - tape.watch(self.x) - # loss = self.loss_fn(self.content_image, self.style_image, self.x) - loss = self.loss_fn(self.x) - print('\rEpoch {}: Loss: {:.4f}'.format(epoch, loss), end='') - gradients = tape.gradient(loss, self.x) - self.optimizer.apply_gradients([(gradients, self.x)]) - self.x.assign(tf.clip_by_value(self.x, clip_value_min=0.0, clip_value_max=1.0)) - @@ -5,12 +5,8 @@ import argparse import tensorflow as tf from skimage import transform -import PIL.Image - import hyperparameters as hp -from losses import YourModel -# from tensorboard_utils import \ -# ImageLabelingLogger, ConfusionMatrixLogger, CustomModelSaver +from model import YourModel from skimage.io import imread, imsave from matplotlib import pyplot as plt @@ -42,9 +38,9 @@ def parse_args(): help='Y if you want to load the most recent weights' ) - return parser.parse_args() + def train(model: YourModel): for i in range(hp.num_epochs): if i % 50 == 0: @@ -55,6 +51,7 @@ def train(model: YourModel): np.save('checkpoint.npy', model.x) model.train_step(i) + def main(): """ Main function. """ if os.path.exists(ARGS.content): @@ -63,13 +60,11 @@ def main(): ARGS.style = os.path.abspath(ARGS.style) os.chdir(sys.path[0]) print('this is', ARGS.content) - - content_image = imread(ARGS.content) style_image = imread(ARGS.style) style_image = transform.resize(style_image, content_image.shape, anti_aliasing=True) - + my_model = YourModel(content_image=content_image, style_image=style_image) if (ARGS.load == 'Y'): diff --git a/model.py b/model.py new file mode 100644 index 00000000..d8ce4c88 --- /dev/null +++ b/model.py @@ -0,0 +1,138 @@ +import tensorflow as tf +import numpy as np +from tensorflow.keras.layers import \ + Conv2D, AveragePooling2D +from skimage import transform +import hyperparameters as hp + + +class YourModel(tf.keras.Model): + """ Your own neural network model. """ + + def __init__(self, content_image, style_image): + super(YourModel, self).__init__() + + # -------------------------------------------------------------------------------------------------------------- + # PART 1 : preprocess/init the CONTENT, STYLE, and CREATION IMAGES # + # -------------------------------------------------------------------------------------------------------------- + # 1) resize the content and style images to be the same size + self.content_image = transform.resize(content_image, tf.shape(style_image), anti_aliasing=True, + preserve_range=True) + self.style_image = transform.resize(style_image, tf.shape(style_image), anti_aliasing=True, preserve_range=True) + + # 2) convert the content and style images to float32 tensors for loss functions (from uint8) + self.content_image = tf.image.convert_image_dtype(self.content_image, tf.float32) + self.style_image = tf.image.convert_image_dtype(self.style_image, tf.float32) + + # 3) set the image we are creating as a copy of the tensor that represents the content image + # (we do this to give the creation image a good starting point) + image = tf.image.convert_image_dtype(content_image, tf.float32) + self.x = tf.Variable([image]) + + # -------------------------------------------------------------------------------------------------------------- + # PART 2 : load and configure vgg_16 network use (without classification head) # + # -------------------------------------------------------------------------------------------------------------- + # 1) load the pretrained vgg_16 network + self.vgg16 = tf.keras.applications.VGG16(include_top=False, weights='vgg16_imagenet.h5') + self.vgg16.trainable = False + + # 2) define the layers of the vgg_16 network from which we will extract the content and style features + self.photo_layers = ['block5_conv2'] + self.style_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1'] + + self.num_photo_layers = len(self.photo_layers) + self.num_style_layers = len(self.style_layers) + + # 3) get the output (filters and biases) for the defined photo and style layers above + # only using one filter for the photo layer, so oonly that outpur is needed for our model + p_output = self.vgg16.get_layer(self.photo_layers[0]).output + + # using multiple filters for the style layers, so we to create the Gram Matrix from each style layers' output + style_output = [] + for layer in self.style_layers: + style_output.append(self.vgg16.get_layer(layer).output) + + # map each style layer output to its Gram Matrix + G = [self.__get_gram(x) for x in style_output] + + # 4) create the vgg16 model from the photo and style layers + self.vgg16 = tf.keras.Model([self.vgg16.input], [p_output, G]) + + # -------------------------------------------------------------------------------------------------------------- + # PART 3 : assign our optimizers, loss weights, and loss/style targets # + # -------------------------------------------------------------------------------------------------------------- + # 1) use the adam optimizer with hyperparameters defined in the hyperparamters.py + self.optimizer = tf.keras.optimizers.Adam(learning_rate=hp.learning_rate, beta_1=hp.beta_1, epsilon=hp.epsilon) + + # 2) assign the loss weights from hyperparameters.py + self.content_weight = hp.alpha + self.style_weight = hp.beta + + # 3) get the targets that serve as the baseline of the content and style loss calculations + # covert images to their float -> numpy representations to call on our model for the targets + img_to_np = lambda img: np.array([img * 255]) + # content target is the first output of the vgg16 model since it is the output of the photo layer + self.content_target = self.vgg16(img_to_np(content_image))[0] + # style target is the second output of the vgg16 mode, the Gram Matrix of the style layers + self.style_target = self.vgg16(img_to_np(style_image))[1] + + # here for convention - here is the forward pass + def call(self, x): + # call only onto our created model + x = self.vgg16(x * 255) + return x + + def loss_fn(self, x): + # since our loss depends on the result of the forward pass (call), we call and get the results + x = self.call(x) + + # helper functions to calculate the content and style loss + content_l = self.__content_loss(x[0], self.content_target) + style_l = self.__style_loss(x[1], self.style_target) + + # return the weighted sum of the content and style loss + return (self.content_weight * content_l) + (self.style_weight * style_l) + + # called for each epoch and updates the model based on the optimizer and loss function + def train_step(self, epoch): + with tf.GradientTape(watch_accessed_variables=False) as tape: + # watch how the image changes for backpropagation + tape.watch(self.x) + + # calculate the loss + loss = self.loss_fn(self.x) + gradients = tape.gradient(loss, self.x) + + # print the progress of the training and loss + print('\rEpoch {}: Loss: {:.4f}'.format(epoch, loss), end='') + + # update the optimizer based on the gradients + self.optimizer.apply_gradients([(gradients, self.x)]) + # update the image we are creating + self.x.assign(tf.clip_by_value(self.x, clip_value_min=0.0, clip_value_max=1.0)) + + # ------------------------------------------------------------------------------------------------------------------ + # (STATIC) HELPER FUNCTIONS THAT IMPLEMENT THE CALCULATIONS FOR THE GRAM MATRIX AND LOSSES FROM THE REFERENCE + # PAPER (https://arxiv.org/pdf/1508.06576.pdf) + # ------------------------------------------------------------------------------------------------------------------ + @staticmethod + def __content_loss(photo_layers, input_layers): + return tf.reduce_mean(tf.square(photo_layers - input_layers)) + + @staticmethod + def __style_loss(art_layers, input_layers): + # each layer used for style has a loss + layer_losses = [] + for created, target in zip(art_layers, input_layers): + reduced = tf.reduce_mean(tf.square(created - target)) + layer_losses.append(reduced) + # the total style loss is the sum of each style layer loss + return tf.add_n(layer_losses) + + @staticmethod + def __get_gram(style_output): + style_shape = tf.shape(style_output) + output = tf.linalg.einsum('bijc,bijd->bcd', style_output, style_output) + dimensions = style_shape[1] * style_shape[2] + dimensions = tf.cast(dimensions, tf.float32) + return output / dimensions Binary files differdiff --git a/save_images/epoch0.jpg b/save_images/epoch0.jpg Binary files differindex 6b95468c..81c239a8 100644 --- a/save_images/epoch0.jpg +++ b/save_images/epoch0.jpg diff --git a/save_images/epoch100.jpg b/save_images/epoch100.jpg Binary files differindex 03a5b3d6..ad5de5f5 100644 --- a/save_images/epoch100.jpg +++ b/save_images/epoch100.jpg diff --git a/save_images/epoch150.jpg b/save_images/epoch150.jpg Binary files differnew file mode 100644 index 00000000..e18e565e --- /dev/null +++ b/save_images/epoch150.jpg diff --git a/save_images/epoch200.jpg b/save_images/epoch200.jpg Binary files differindex 1de6c903..1096ea7f 100644 --- a/save_images/epoch200.jpg +++ b/save_images/epoch200.jpg diff --git a/save_images/epoch250.jpg b/save_images/epoch250.jpg Binary files differnew file mode 100644 index 00000000..6b0be244 --- /dev/null +++ b/save_images/epoch250.jpg diff --git a/save_images/epoch300.jpg b/save_images/epoch300.jpg Binary files differindex 1de6c903..225947f4 100644 --- a/save_images/epoch300.jpg +++ b/save_images/epoch300.jpg diff --git a/save_images/epoch350.jpg b/save_images/epoch350.jpg Binary files differnew file mode 100644 index 00000000..eea0a167 --- /dev/null +++ b/save_images/epoch350.jpg diff --git a/save_images/epoch400.jpg b/save_images/epoch400.jpg Binary files differindex 1de6c903..6e286ae0 100644 --- a/save_images/epoch400.jpg +++ b/save_images/epoch400.jpg diff --git a/save_images/epoch450.jpg b/save_images/epoch450.jpg Binary files differnew file mode 100644 index 00000000..8ef2132c --- /dev/null +++ b/save_images/epoch450.jpg diff --git a/save_images/epoch50.jpg b/save_images/epoch50.jpg Binary files differindex b0d175ad..da06f7e1 100644 --- a/save_images/epoch50.jpg +++ b/save_images/epoch50.jpg diff --git a/save_images/epoch500.jpg b/save_images/epoch500.jpg Binary files differnew file mode 100644 index 00000000..db82fe4a --- /dev/null +++ b/save_images/epoch500.jpg diff --git a/save_images/epoch550.jpg b/save_images/epoch550.jpg Binary files differnew file mode 100644 index 00000000..0207735b --- /dev/null +++ b/save_images/epoch550.jpg diff --git a/save_images/epoch600.jpg b/save_images/epoch600.jpg Binary files differnew file mode 100644 index 00000000..6b3b999a --- /dev/null +++ b/save_images/epoch600.jpg diff --git a/save_images/epoch650.jpg b/save_images/epoch650.jpg Binary files differnew file mode 100644 index 00000000..29eab7a8 --- /dev/null +++ b/save_images/epoch650.jpg diff --git a/save_images/epoch700.jpg b/save_images/epoch700.jpg Binary files differnew file mode 100644 index 00000000..fd8ec867 --- /dev/null +++ b/save_images/epoch700.jpg diff --git a/save_images/epoch750.jpg b/save_images/epoch750.jpg Binary files differnew file mode 100644 index 00000000..7881cd98 --- /dev/null +++ b/save_images/epoch750.jpg diff --git a/save_images/epoch800.jpg b/save_images/epoch800.jpg Binary files differnew file mode 100644 index 00000000..5f115a9c --- /dev/null +++ b/save_images/epoch800.jpg diff --git a/save_images/epoch850.jpg b/save_images/epoch850.jpg Binary files differnew file mode 100644 index 00000000..ff1cab0d --- /dev/null +++ b/save_images/epoch850.jpg diff --git a/save_images/epoch900.jpg b/save_images/epoch900.jpg Binary files differnew file mode 100644 index 00000000..57964612 --- /dev/null +++ b/save_images/epoch900.jpg diff --git a/save_images/epoch950.jpg b/save_images/epoch950.jpg Binary files differnew file mode 100644 index 00000000..42deef1e --- /dev/null +++ b/save_images/epoch950.jpg |