aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Foiani <sotech117@michaels-mbp-4.devices.brown.edu>2022-05-09 01:15:32 -0400
committerMichael Foiani <sotech117@michaels-mbp-4.devices.brown.edu>2022-05-09 01:15:32 -0400
commit13826824ec58e85557efb3a12fb0456ffb20e46d (patch)
treee656abe2f8338860710bba1224c08ba5cd8bc0fa
parenta0870ac3f1f84278c5b9fe7f78f6b1af1d1f33e9 (diff)
Comment code, add example images, and rename files
-rw-r--r--__pycache__/hyperparameters.cpython-38.pycbin395 -> 387 bytes
-rw-r--r--__pycache__/losses.cpython-38.pycbin3747 -> 0 bytes
-rw-r--r--checkpoint.npybin861968 -> 861968 bytes
-rw-r--r--hyperparameters.py2
-rw-r--r--losses.py97
-rw-r--r--main.py13
-rw-r--r--model.py138
-rw-r--r--save.jpgbin111990 -> 15825 bytes
-rw-r--r--save_images/epoch0.jpgbin14178 -> 14166 bytes
-rw-r--r--save_images/epoch100.jpgbin18187 -> 18124 bytes
-rw-r--r--save_images/epoch150.jpgbin0 -> 17919 bytes
-rw-r--r--save_images/epoch200.jpgbin1783 -> 17601 bytes
-rw-r--r--save_images/epoch250.jpgbin0 -> 17380 bytes
-rw-r--r--save_images/epoch300.jpgbin1783 -> 17082 bytes
-rw-r--r--save_images/epoch350.jpgbin0 -> 16927 bytes
-rw-r--r--save_images/epoch400.jpgbin1783 -> 16752 bytes
-rw-r--r--save_images/epoch450.jpgbin0 -> 16571 bytes
-rw-r--r--save_images/epoch50.jpgbin17851 -> 17801 bytes
-rw-r--r--save_images/epoch500.jpgbin0 -> 16470 bytes
-rw-r--r--save_images/epoch550.jpgbin0 -> 16398 bytes
-rw-r--r--save_images/epoch600.jpgbin0 -> 16311 bytes
-rw-r--r--save_images/epoch650.jpgbin0 -> 16222 bytes
-rw-r--r--save_images/epoch700.jpgbin0 -> 16145 bytes
-rw-r--r--save_images/epoch750.jpgbin0 -> 16091 bytes
-rw-r--r--save_images/epoch800.jpgbin0 -> 16036 bytes
-rw-r--r--save_images/epoch850.jpgbin0 -> 15982 bytes
-rw-r--r--save_images/epoch900.jpgbin0 -> 15949 bytes
-rw-r--r--save_images/epoch950.jpgbin0 -> 15909 bytes
28 files changed, 143 insertions, 107 deletions
diff --git a/__pycache__/hyperparameters.cpython-38.pyc b/__pycache__/hyperparameters.cpython-38.pyc
index e046deb6..2ff6d7e9 100644
--- a/__pycache__/hyperparameters.cpython-38.pyc
+++ b/__pycache__/hyperparameters.cpython-38.pyc
Binary files differ
diff --git a/__pycache__/losses.cpython-38.pyc b/__pycache__/losses.cpython-38.pyc
deleted file mode 100644
index bd55640e..00000000
--- a/__pycache__/losses.cpython-38.pyc
+++ /dev/null
Binary files differ
diff --git a/checkpoint.npy b/checkpoint.npy
index 94160b43..73b09998 100644
--- a/checkpoint.npy
+++ b/checkpoint.npy
Binary files differ
diff --git a/hyperparameters.py b/hyperparameters.py
index 180eaf85..0ca70764 100644
--- a/hyperparameters.py
+++ b/hyperparameters.py
@@ -9,7 +9,7 @@ Number of epochs. If you experiment with more complex networks you
might need to increase this. Likewise if you add regularization that
slows training.
"""
-num_epochs = 500
+num_epochs = 1000
"""
A critical parameter that can dramatically affect whether training
diff --git a/losses.py b/losses.py
deleted file mode 100644
index c618fd38..00000000
--- a/losses.py
+++ /dev/null
@@ -1,97 +0,0 @@
-import tensorflow as tf
-import numpy as np
-from tensorflow.keras.layers import \
- Conv2D, AveragePooling2D
-from skimage import transform
-import hyperparameters as hp
-
-def get_gram(style_output):
- style_shape = tf.shape(style_output)
- output = tf.linalg.einsum('bijc,bijd->bcd', style_output, style_output)
- dimensions = style_shape[1] * style_shape[2]
- dimensions = tf.cast(dimensions, tf.float32)
- return output / dimensions
-
-
-class YourModel(tf.keras.Model):
- """ Your own neural network model. """
-
- def __init__(self, content_image, style_image): #normalize these images to float values
- super(YourModel, self).__init__()
-
- self.content_image = transform.resize(content_image, tf.shape(style_image), anti_aliasing=True, preserve_range=True)
- self.content_image = tf.image.convert_image_dtype(self.content_image, tf.float32)
-
- self.style_image = transform.resize(style_image, tf.shape(style_image), anti_aliasing=True, preserve_range=True)
- self.style_image = tf.image.convert_image_dtype(self.style_image, tf.float32)
-
- image = tf.image.convert_image_dtype(content_image, tf.float32)
- self.x = tf.Variable([image])
-
- self.content_weight = hp.alpha
- self.style_weight = hp.beta
-
- self.photo_layers = ['block5_conv2']
- self.style_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1']
-
- self.num_photo_layers = len(self.photo_layers)
- self.num_style_layers = len(self.style_layers)
-
- self.optimizer = tf.keras.optimizers.Adam(learning_rate=hp.learning_rate, beta_1=hp.beta_1, epsilon=hp.epsilon)
-
- self.vgg16 = tf.keras.applications.VGG16(include_top=False, weights='vgg16_imagenet.h5')
-
- self.vgg16.trainable = False
-
- # creating the Gram Matrix
- p_output = self.vgg16.get_layer(self.photo_layers[0]).output
- style_output = []
- for layer in self.style_layers:
- style_output.append(self.vgg16.get_layer(layer).output)
-
- G = [get_gram(x) for x in style_output]
-
- self.vgg16 = tf.keras.Model([self.vgg16.input], [p_output, G])
-
- # figure this out Michael
- img_to_np = lambda img: np.array([img * 255])
-
- self.content_target = self.vgg16(img_to_np(content_image))[0]
- self.style_target = self.vgg16(img_to_np(style_image))[1]
-
- # create a map of the layers to their corresponding number of filters if it is a convolutional layer
-
- def call(self, x):
- # call onto our pretrained network, since we don't have a classifcation head to follow
- x = self.vgg16(x * 255)
- return x
-
- def loss_fn(self, x):
- x = self.call(x)
-
-
- content_l = self.content_loss(x[0], self.content_target)
- style_l = self.style_loss(x[1], self.style_target)
- return (self.content_weight * content_l) + (self.style_weight * style_l)
-
- def content_loss(self, photo_layers, input_layers):
- return tf.reduce_mean(tf.square(photo_layers - input_layers))
-
- def style_loss(self, art_layers, input_layers):
- layer_losses = []
- for created, target in zip(art_layers, input_layers):
- reduced = tf.reduce_mean(tf.square(created - target))
- layer_losses.append(reduced)
- return tf.add_n(layer_losses)
-
-
- def train_step(self, epoch):
- with tf.GradientTape(watch_accessed_variables=False) as tape:
- tape.watch(self.x)
- # loss = self.loss_fn(self.content_image, self.style_image, self.x)
- loss = self.loss_fn(self.x)
- print('\rEpoch {}: Loss: {:.4f}'.format(epoch, loss), end='')
- gradients = tape.gradient(loss, self.x)
- self.optimizer.apply_gradients([(gradients, self.x)])
- self.x.assign(tf.clip_by_value(self.x, clip_value_min=0.0, clip_value_max=1.0))
-
diff --git a/main.py b/main.py
index b4c1b228..527394cb 100644
--- a/main.py
+++ b/main.py
@@ -5,12 +5,8 @@ import argparse
import tensorflow as tf
from skimage import transform
-import PIL.Image
-
import hyperparameters as hp
-from losses import YourModel
-# from tensorboard_utils import \
-# ImageLabelingLogger, ConfusionMatrixLogger, CustomModelSaver
+from model import YourModel
from skimage.io import imread, imsave
from matplotlib import pyplot as plt
@@ -42,9 +38,9 @@ def parse_args():
help='Y if you want to load the most recent weights'
)
-
return parser.parse_args()
+
def train(model: YourModel):
for i in range(hp.num_epochs):
if i % 50 == 0:
@@ -55,6 +51,7 @@ def train(model: YourModel):
np.save('checkpoint.npy', model.x)
model.train_step(i)
+
def main():
""" Main function. """
if os.path.exists(ARGS.content):
@@ -63,13 +60,11 @@ def main():
ARGS.style = os.path.abspath(ARGS.style)
os.chdir(sys.path[0])
print('this is', ARGS.content)
-
-
content_image = imread(ARGS.content)
style_image = imread(ARGS.style)
style_image = transform.resize(style_image, content_image.shape, anti_aliasing=True)
-
+
my_model = YourModel(content_image=content_image, style_image=style_image)
if (ARGS.load == 'Y'):
diff --git a/model.py b/model.py
new file mode 100644
index 00000000..d8ce4c88
--- /dev/null
+++ b/model.py
@@ -0,0 +1,138 @@
+import tensorflow as tf
+import numpy as np
+from tensorflow.keras.layers import \
+ Conv2D, AveragePooling2D
+from skimage import transform
+import hyperparameters as hp
+
+
+class YourModel(tf.keras.Model):
+ """ Your own neural network model. """
+
+ def __init__(self, content_image, style_image):
+ super(YourModel, self).__init__()
+
+ # --------------------------------------------------------------------------------------------------------------
+ # PART 1 : preprocess/init the CONTENT, STYLE, and CREATION IMAGES #
+ # --------------------------------------------------------------------------------------------------------------
+ # 1) resize the content and style images to be the same size
+ self.content_image = transform.resize(content_image, tf.shape(style_image), anti_aliasing=True,
+ preserve_range=True)
+ self.style_image = transform.resize(style_image, tf.shape(style_image), anti_aliasing=True, preserve_range=True)
+
+ # 2) convert the content and style images to float32 tensors for loss functions (from uint8)
+ self.content_image = tf.image.convert_image_dtype(self.content_image, tf.float32)
+ self.style_image = tf.image.convert_image_dtype(self.style_image, tf.float32)
+
+ # 3) set the image we are creating as a copy of the tensor that represents the content image
+ # (we do this to give the creation image a good starting point)
+ image = tf.image.convert_image_dtype(content_image, tf.float32)
+ self.x = tf.Variable([image])
+
+ # --------------------------------------------------------------------------------------------------------------
+ # PART 2 : load and configure vgg_16 network use (without classification head) #
+ # --------------------------------------------------------------------------------------------------------------
+ # 1) load the pretrained vgg_16 network
+ self.vgg16 = tf.keras.applications.VGG16(include_top=False, weights='vgg16_imagenet.h5')
+ self.vgg16.trainable = False
+
+ # 2) define the layers of the vgg_16 network from which we will extract the content and style features
+ self.photo_layers = ['block5_conv2']
+ self.style_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1']
+
+ self.num_photo_layers = len(self.photo_layers)
+ self.num_style_layers = len(self.style_layers)
+
+ # 3) get the output (filters and biases) for the defined photo and style layers above
+ # only using one filter for the photo layer, so oonly that outpur is needed for our model
+ p_output = self.vgg16.get_layer(self.photo_layers[0]).output
+
+ # using multiple filters for the style layers, so we to create the Gram Matrix from each style layers' output
+ style_output = []
+ for layer in self.style_layers:
+ style_output.append(self.vgg16.get_layer(layer).output)
+
+ # map each style layer output to its Gram Matrix
+ G = [self.__get_gram(x) for x in style_output]
+
+ # 4) create the vgg16 model from the photo and style layers
+ self.vgg16 = tf.keras.Model([self.vgg16.input], [p_output, G])
+
+ # --------------------------------------------------------------------------------------------------------------
+ # PART 3 : assign our optimizers, loss weights, and loss/style targets #
+ # --------------------------------------------------------------------------------------------------------------
+ # 1) use the adam optimizer with hyperparameters defined in the hyperparamters.py
+ self.optimizer = tf.keras.optimizers.Adam(learning_rate=hp.learning_rate, beta_1=hp.beta_1, epsilon=hp.epsilon)
+
+ # 2) assign the loss weights from hyperparameters.py
+ self.content_weight = hp.alpha
+ self.style_weight = hp.beta
+
+ # 3) get the targets that serve as the baseline of the content and style loss calculations
+ # covert images to their float -> numpy representations to call on our model for the targets
+ img_to_np = lambda img: np.array([img * 255])
+ # content target is the first output of the vgg16 model since it is the output of the photo layer
+ self.content_target = self.vgg16(img_to_np(content_image))[0]
+ # style target is the second output of the vgg16 mode, the Gram Matrix of the style layers
+ self.style_target = self.vgg16(img_to_np(style_image))[1]
+
+ # here for convention - here is the forward pass
+ def call(self, x):
+ # call only onto our created model
+ x = self.vgg16(x * 255)
+ return x
+
+ def loss_fn(self, x):
+ # since our loss depends on the result of the forward pass (call), we call and get the results
+ x = self.call(x)
+
+ # helper functions to calculate the content and style loss
+ content_l = self.__content_loss(x[0], self.content_target)
+ style_l = self.__style_loss(x[1], self.style_target)
+
+ # return the weighted sum of the content and style loss
+ return (self.content_weight * content_l) + (self.style_weight * style_l)
+
+ # called for each epoch and updates the model based on the optimizer and loss function
+ def train_step(self, epoch):
+ with tf.GradientTape(watch_accessed_variables=False) as tape:
+ # watch how the image changes for backpropagation
+ tape.watch(self.x)
+
+ # calculate the loss
+ loss = self.loss_fn(self.x)
+ gradients = tape.gradient(loss, self.x)
+
+ # print the progress of the training and loss
+ print('\rEpoch {}: Loss: {:.4f}'.format(epoch, loss), end='')
+
+ # update the optimizer based on the gradients
+ self.optimizer.apply_gradients([(gradients, self.x)])
+ # update the image we are creating
+ self.x.assign(tf.clip_by_value(self.x, clip_value_min=0.0, clip_value_max=1.0))
+
+ # ------------------------------------------------------------------------------------------------------------------
+ # (STATIC) HELPER FUNCTIONS THAT IMPLEMENT THE CALCULATIONS FOR THE GRAM MATRIX AND LOSSES FROM THE REFERENCE
+ # PAPER (https://arxiv.org/pdf/1508.06576.pdf)
+ # ------------------------------------------------------------------------------------------------------------------
+ @staticmethod
+ def __content_loss(photo_layers, input_layers):
+ return tf.reduce_mean(tf.square(photo_layers - input_layers))
+
+ @staticmethod
+ def __style_loss(art_layers, input_layers):
+ # each layer used for style has a loss
+ layer_losses = []
+ for created, target in zip(art_layers, input_layers):
+ reduced = tf.reduce_mean(tf.square(created - target))
+ layer_losses.append(reduced)
+ # the total style loss is the sum of each style layer loss
+ return tf.add_n(layer_losses)
+
+ @staticmethod
+ def __get_gram(style_output):
+ style_shape = tf.shape(style_output)
+ output = tf.linalg.einsum('bijc,bijd->bcd', style_output, style_output)
+ dimensions = style_shape[1] * style_shape[2]
+ dimensions = tf.cast(dimensions, tf.float32)
+ return output / dimensions
diff --git a/save.jpg b/save.jpg
index 9ee1a9a1..4da02115 100644
--- a/save.jpg
+++ b/save.jpg
Binary files differ
diff --git a/save_images/epoch0.jpg b/save_images/epoch0.jpg
index 6b95468c..81c239a8 100644
--- a/save_images/epoch0.jpg
+++ b/save_images/epoch0.jpg
Binary files differ
diff --git a/save_images/epoch100.jpg b/save_images/epoch100.jpg
index 03a5b3d6..ad5de5f5 100644
--- a/save_images/epoch100.jpg
+++ b/save_images/epoch100.jpg
Binary files differ
diff --git a/save_images/epoch150.jpg b/save_images/epoch150.jpg
new file mode 100644
index 00000000..e18e565e
--- /dev/null
+++ b/save_images/epoch150.jpg
Binary files differ
diff --git a/save_images/epoch200.jpg b/save_images/epoch200.jpg
index 1de6c903..1096ea7f 100644
--- a/save_images/epoch200.jpg
+++ b/save_images/epoch200.jpg
Binary files differ
diff --git a/save_images/epoch250.jpg b/save_images/epoch250.jpg
new file mode 100644
index 00000000..6b0be244
--- /dev/null
+++ b/save_images/epoch250.jpg
Binary files differ
diff --git a/save_images/epoch300.jpg b/save_images/epoch300.jpg
index 1de6c903..225947f4 100644
--- a/save_images/epoch300.jpg
+++ b/save_images/epoch300.jpg
Binary files differ
diff --git a/save_images/epoch350.jpg b/save_images/epoch350.jpg
new file mode 100644
index 00000000..eea0a167
--- /dev/null
+++ b/save_images/epoch350.jpg
Binary files differ
diff --git a/save_images/epoch400.jpg b/save_images/epoch400.jpg
index 1de6c903..6e286ae0 100644
--- a/save_images/epoch400.jpg
+++ b/save_images/epoch400.jpg
Binary files differ
diff --git a/save_images/epoch450.jpg b/save_images/epoch450.jpg
new file mode 100644
index 00000000..8ef2132c
--- /dev/null
+++ b/save_images/epoch450.jpg
Binary files differ
diff --git a/save_images/epoch50.jpg b/save_images/epoch50.jpg
index b0d175ad..da06f7e1 100644
--- a/save_images/epoch50.jpg
+++ b/save_images/epoch50.jpg
Binary files differ
diff --git a/save_images/epoch500.jpg b/save_images/epoch500.jpg
new file mode 100644
index 00000000..db82fe4a
--- /dev/null
+++ b/save_images/epoch500.jpg
Binary files differ
diff --git a/save_images/epoch550.jpg b/save_images/epoch550.jpg
new file mode 100644
index 00000000..0207735b
--- /dev/null
+++ b/save_images/epoch550.jpg
Binary files differ
diff --git a/save_images/epoch600.jpg b/save_images/epoch600.jpg
new file mode 100644
index 00000000..6b3b999a
--- /dev/null
+++ b/save_images/epoch600.jpg
Binary files differ
diff --git a/save_images/epoch650.jpg b/save_images/epoch650.jpg
new file mode 100644
index 00000000..29eab7a8
--- /dev/null
+++ b/save_images/epoch650.jpg
Binary files differ
diff --git a/save_images/epoch700.jpg b/save_images/epoch700.jpg
new file mode 100644
index 00000000..fd8ec867
--- /dev/null
+++ b/save_images/epoch700.jpg
Binary files differ
diff --git a/save_images/epoch750.jpg b/save_images/epoch750.jpg
new file mode 100644
index 00000000..7881cd98
--- /dev/null
+++ b/save_images/epoch750.jpg
Binary files differ
diff --git a/save_images/epoch800.jpg b/save_images/epoch800.jpg
new file mode 100644
index 00000000..5f115a9c
--- /dev/null
+++ b/save_images/epoch800.jpg
Binary files differ
diff --git a/save_images/epoch850.jpg b/save_images/epoch850.jpg
new file mode 100644
index 00000000..ff1cab0d
--- /dev/null
+++ b/save_images/epoch850.jpg
Binary files differ
diff --git a/save_images/epoch900.jpg b/save_images/epoch900.jpg
new file mode 100644
index 00000000..57964612
--- /dev/null
+++ b/save_images/epoch900.jpg
Binary files differ
diff --git a/save_images/epoch950.jpg b/save_images/epoch950.jpg
new file mode 100644
index 00000000..42deef1e
--- /dev/null
+++ b/save_images/epoch950.jpg
Binary files differ