aboutsummaryrefslogtreecommitdiff
path: root/losses.py
diff options
context:
space:
mode:
Diffstat (limited to 'losses.py')
-rw-r--r--losses.py229
1 files changed, 70 insertions, 159 deletions
diff --git a/losses.py b/losses.py
index 6405df6a..c618fd38 100644
--- a/losses.py
+++ b/losses.py
@@ -5,182 +5,93 @@ from tensorflow.keras.layers import \
from skimage import transform
import hyperparameters as hp
+def get_gram(style_output):
+ style_shape = tf.shape(style_output)
+ output = tf.linalg.einsum('bijc,bijd->bcd', style_output, style_output)
+ dimensions = style_shape[1] * style_shape[2]
+ dimensions = tf.cast(dimensions, tf.float32)
+ return output / dimensions
+
+
class YourModel(tf.keras.Model):
""" Your own neural network model. """
def __init__(self, content_image, style_image): #normalize these images to float values
super(YourModel, self).__init__()
- self.content_image = transform.resize(content_image, tf.shape(style_image), anti_aliasing=True, preserve_range=True, clip=False)
- self.content_image = tf.expand_dims(self.content_image, axis=0)
- # self.content_image = self.content_image.astype('uint8')
- print(self.content_image)
+ self.content_image = transform.resize(content_image, tf.shape(style_image), anti_aliasing=True, preserve_range=True)
+ self.content_image = tf.image.convert_image_dtype(self.content_image, tf.float32)
+
+ self.style_image = transform.resize(style_image, tf.shape(style_image), anti_aliasing=True, preserve_range=True)
+ self.style_image = tf.image.convert_image_dtype(self.style_image, tf.float32)
- #perhaps consider cropping to avoid distortion
- self.style_image = transform.resize(style_image, tf.shape(style_image), anti_aliasing=True, preserve_range=True, clip=False)
- self.style_image = tf.expand_dims(self.style_image, axis=0)
- # self.style_image = self.style_image.astype('uint8')
- #self.x = tf.Variable(initial_value = self.content_image.numpy().astype(np.float32), trainable=True)
- self.x = tf.Variable(initial_value = np.random.rand(self.content_image.shape[0],
- self.content_image.shape[1], self.content_image.shape[2], self.content_image.shape[3]).astype(np.float32), trainable=True)
- # self.x = self.x.astype('uint8')
-
- self.alpha = hp.alpha
- self.beta = hp.beta
-
- self.photo_layers = None
- self.art_layers = None
-
-
-
- #(self.x.shape)
-
- #print(self.content_image.shape, self.style_image.shape)
-
- self.optimizer = tf.keras.optimizers.Adam(hp.learning_rate)
-
- self.vgg16 = [
- # Block 1
- Conv2D(64, 3, 1, padding="same",
- activation="relu", name="block1_conv1"),
- Conv2D(64, 3, 1, padding="same",
- activation="relu", name="block1_conv2"),
- AveragePooling2D(2, name="block1_pool"),
- # Block 2
- Conv2D(128, 3, 1, padding="same",
- activation="relu", name="block2_conv1"),
- Conv2D(128, 3, 1, padding="same",
- activation="relu", name="block2_conv2"),
- AveragePooling2D(2, name="block2_pool"),
- # Block 3
- Conv2D(256, 3, 1, padding="same",
- activation="relu", name="block3_conv1"),
- Conv2D(256, 3, 1, padding="same",
- activation="relu", name="block3_conv2"),
- Conv2D(256, 3, 1, padding="same",
- activation="relu", name="block3_conv3"),
- AveragePooling2D(2, name="block3_pool"),
- # Block 4
- Conv2D(512, 3, 1, padding="same",
- activation="relu", name="block4_conv1"),
- Conv2D(512, 3, 1, padding="same",
- activation="relu", name="block4_conv2"),
- Conv2D(512, 3, 1, padding="same",
- activation="relu", name="block4_conv3"),
- AveragePooling2D(2, name="block4_pool"),
- # Block 5
- Conv2D(512, 3, 1, padding="same",
- activation="relu", name="block5_conv1"),
- Conv2D(512, 3, 1, padding="same",
- activation="relu", name="block5_conv2"),
- Conv2D(512, 3, 1, padding="same",
- activation="relu", name="block5_conv3"),
- AveragePooling2D(2, name="block5_pool"),
- ]
- for layer in self.vgg16:
- layer.trainable = False
-
- self.layer_to_filters = {layer.name: layer.filters for layer in self.vgg16 if "conv" in layer.name}
- self.indexed_layers = [layer for layer in self.vgg16 if "conv1" in layer.name]
- self.desired = [layer.name for layer in self.vgg16 if "conv1" in layer.name]
-
- self.vgg16 = tf.keras.Sequential(self.vgg16, name="vgg")
+ image = tf.image.convert_image_dtype(content_image, tf.float32)
+ self.x = tf.Variable([image])
+
+ self.content_weight = hp.alpha
+ self.style_weight = hp.beta
+
+ self.photo_layers = ['block5_conv2']
+ self.style_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1']
+
+ self.num_photo_layers = len(self.photo_layers)
+ self.num_style_layers = len(self.style_layers)
+
+ self.optimizer = tf.keras.optimizers.Adam(learning_rate=hp.learning_rate, beta_1=hp.beta_1, epsilon=hp.epsilon)
+
+ self.vgg16 = tf.keras.applications.VGG16(include_top=False, weights='vgg16_imagenet.h5')
+
+ self.vgg16.trainable = False
+
+ # creating the Gram Matrix
+ p_output = self.vgg16.get_layer(self.photo_layers[0]).output
+ style_output = []
+ for layer in self.style_layers:
+ style_output.append(self.vgg16.get_layer(layer).output)
+
+ G = [get_gram(x) for x in style_output]
+
+ self.vgg16 = tf.keras.Model([self.vgg16.input], [p_output, G])
+
+ # figure this out Michael
+ img_to_np = lambda img: np.array([img * 255])
+
+ self.content_target = self.vgg16(img_to_np(content_image))[0]
+ self.style_target = self.vgg16(img_to_np(style_image))[1]
# create a map of the layers to their corresponding number of filters if it is a convolutional layer
def call(self, x):
- layers = []
- for layer in self.vgg16.layers:
- # pass the x through
- x = layer(x)
- # print("Sotech117 is so so sus")
-
- # save the output of each layer if it is in the desired list
- if layer.name in self.desired:
- layers.append(x)
-
- return x, layers
-
- def loss_fn(self, p, a, x):
- # print(p)
- if(self.photo_layers == None):
- _, self.photo_layers = self.call(p)
- # print(a)
- if(self.art_layers == None):
- _, self.art_layers = self.call(a)
- # print(x)
- _, input_layers = self.call(x)
+ # call onto our pretrained network, since we don't have a classifcation head to follow
+ x = self.vgg16(x * 255)
+ return x
+
+ def loss_fn(self, x):
+ x = self.call(x)
- content_l = self.content_loss(self.photo_layers, input_layers)
- style_l = self.style_loss(self.art_layers, input_layers)
- # Equation 7
- print('style_loss', style_l)
- print('content_loss', content_l)
- return (self.alpha * content_l) + (self.beta * style_l)
+ content_l = self.content_loss(x[0], self.content_target)
+ style_l = self.style_loss(x[1], self.style_target)
+ return (self.content_weight * content_l) + (self.style_weight * style_l)
def content_loss(self, photo_layers, input_layers):
- L_content = tf.constant(0.0)
- for i in range(len(photo_layers)):
- pl = photo_layers[i]
- il = input_layers[i]
- L_content = tf.math.add(L_content, tf.reduce_sum(tf.square(pl - il)))
-
- #print('content loss', L_content)
- return L_content
-
- def layer_loss(self, art_layer, input_layer):
- # vectorize the art_layers
- art_layer = tf.reshape(art_layer, (-1, art_layer.shape[-1]))
- # # vectorize the input_layers
- input_layer = tf.reshape(input_layer, (-1, input_layer.shape[-1]))
- # get the gram matrices
- G_l = tf.matmul(tf.transpose(input_layer), input_layer)
- A_l = tf.matmul(tf.transpose(art_layer), art_layer)
-
+ return tf.reduce_mean(tf.square(photo_layers - input_layers))
- # vals = []
- # for i in range(input_dim):
- # vals_i = []
- # for j in range(input_dim):
- # il = tf.reshape(input_layers[i], [-1])
- # al = tf.reshape(art_layers[j], [-1])
- # k = tf.reduce_sum(tf.multiply(il, al))
- # vals_i.append(k)
- # vals.append(tf.stack(vals_i))
- # G = tf.stack(vals)
-
- # get the loss per each lateral layer
- # N depends on # of filters in the layer, M depends on hight and width of feature map
- # M_l = art_layer.shape[0]
- # N_l = art_layer.shape[1]
- M_l = self.content_image.shape[0]*self.content_image.shape[1]
- N_l = art_layer.shape[0]
-
- # layer.filters might not work
- E_l = 1/4 * (M_l**(-2)) *(N_l**(-2)) * tf.reduce_sum(tf.square(G_l - A_l))
+ def style_loss(self, art_layers, input_layers):
+ layer_losses = []
+ for created, target in zip(art_layers, input_layers):
+ reduced = tf.reduce_mean(tf.square(created - target))
+ layer_losses.append(reduced)
+ return tf.add_n(layer_losses)
- # while Sotech is botty:
- # Jayson_tatum.tear_acl()
- # return ("this is just another day")
- #print('Layer loss', E_l)
- return E_l
- def style_loss(self, art_layers, input_layers):
- L_style = tf.constant(0.0)
- for i in range(len(art_layers)):
- art_layer = art_layers[i]
- input_layer = input_layers[i]
- L_style = tf.math.add(L_style, (1/5)*self.layer_loss(art_layer, input_layer))
- #print('style loss', L_style)
- return L_style
-
- def train_step(self):
+ def train_step(self, epoch):
with tf.GradientTape(watch_accessed_variables=False) as tape:
tape.watch(self.x)
- loss = self.loss_fn(self.content_image, self.style_image, self.x)
- print('loss', loss)
- #print('self.x', self.x)
- gradients = tape.gradient(loss, [self.x])
- #print('gradients', gradients)
- self.optimizer.apply_gradients(zip(gradients, [self.x]))
+ # loss = self.loss_fn(self.content_image, self.style_image, self.x)
+ loss = self.loss_fn(self.x)
+ print('\rEpoch {}: Loss: {:.4f}'.format(epoch, loss), end='')
+ gradients = tape.gradient(loss, self.x)
+ self.optimizer.apply_gradients([(gradients, self.x)])
+ self.x.assign(tf.clip_by_value(self.x, clip_value_min=0.0, clip_value_max=1.0))
+