1 files changed, 70 insertions, 159 deletions
diff --git a/losses.py b/losses.py
index 6405df6a..c618fd38 100644
--- a/losses.py
+++ b/losses.py
@@ -5,182 +5,93 @@ from tensorflow.keras.layers import \
 from skimage import transform
 import hyperparameters as hp
 
+def get_gram(style_output):
+    style_shape = tf.shape(style_output)
+    output = tf.linalg.einsum('bijc,bijd->bcd', style_output, style_output)
+    dimensions = style_shape[1] * style_shape[2]
+    dimensions = tf.cast(dimensions, tf.float32)
+    return output / dimensions
+
+
 class YourModel(tf.keras.Model):
     """ Your own neural network model. """
 
     def __init__(self, content_image, style_image): #normalize these images to float values
         super(YourModel, self).__init__()
        
-        self.content_image = transform.resize(content_image, tf.shape(style_image), anti_aliasing=True, preserve_range=True, clip=False)
-        self.content_image = tf.expand_dims(self.content_image, axis=0)
-       #  self.content_image = self.content_image.astype('uint8')
-        print(self.content_image)
+        self.content_image = transform.resize(content_image, tf.shape(style_image), anti_aliasing=True, preserve_range=True)
+        self.content_image = tf.image.convert_image_dtype(self.content_image, tf.float32)
+        
+        self.style_image = transform.resize(style_image, tf.shape(style_image), anti_aliasing=True, preserve_range=True)
+        self.style_image = tf.image.convert_image_dtype(self.style_image, tf.float32)
         
-        #perhaps consider cropping to avoid distortion
-        self.style_image = transform.resize(style_image, tf.shape(style_image), anti_aliasing=True, preserve_range=True, clip=False)
-        self.style_image = tf.expand_dims(self.style_image, axis=0)
-       #  self.style_image = self.style_image.astype('uint8')
-        #self.x = tf.Variable(initial_value = self.content_image.numpy().astype(np.float32), trainable=True) 
-        self.x = tf.Variable(initial_value = np.random.rand(self.content_image.shape[0],
-        self.content_image.shape[1], self.content_image.shape[2], self.content_image.shape[3]).astype(np.float32), trainable=True)
-       #  self.x = self.x.astype('uint8')
-
-        self.alpha = hp.alpha
-        self.beta = hp.beta
-
-        self.photo_layers = None
-        self.art_layers = None
-
-
-
-        #(self.x.shape)
-
-        #print(self.content_image.shape, self.style_image.shape)
-
-        self.optimizer = tf.keras.optimizers.Adam(hp.learning_rate)
-
-        self.vgg16 = [
-            # Block 1
-            Conv2D(64, 3, 1, padding="same",
-                   activation="relu", name="block1_conv1"),
-            Conv2D(64, 3, 1, padding="same",
-                   activation="relu", name="block1_conv2"),
-            AveragePooling2D(2, name="block1_pool"),
-            # Block 2
-            Conv2D(128, 3, 1, padding="same",
-                   activation="relu", name="block2_conv1"),
-            Conv2D(128, 3, 1, padding="same",
-                   activation="relu", name="block2_conv2"),
-            AveragePooling2D(2, name="block2_pool"),
-            # Block 3
-            Conv2D(256, 3, 1, padding="same",
-                   activation="relu", name="block3_conv1"),  
-            Conv2D(256, 3, 1, padding="same",
-                   activation="relu", name="block3_conv2"),
-            Conv2D(256, 3, 1, padding="same",
-                   activation="relu", name="block3_conv3"),
-            AveragePooling2D(2, name="block3_pool"),
-            # Block 4
-            Conv2D(512, 3, 1, padding="same",
-                   activation="relu", name="block4_conv1"),
-            Conv2D(512, 3, 1, padding="same",
-                   activation="relu", name="block4_conv2"),
-            Conv2D(512, 3, 1, padding="same",
-                   activation="relu", name="block4_conv3"),
-            AveragePooling2D(2, name="block4_pool"),
-            # Block 5
-            Conv2D(512, 3, 1, padding="same",
-                   activation="relu", name="block5_conv1"),
-            Conv2D(512, 3, 1, padding="same",
-                   activation="relu", name="block5_conv2"),
-            Conv2D(512, 3, 1, padding="same",
-                   activation="relu", name="block5_conv3"),
-            AveragePooling2D(2, name="block5_pool"),
-        ]
-        for layer in self.vgg16:
-               layer.trainable = False
-
-        self.layer_to_filters = {layer.name: layer.filters for layer in self.vgg16 if "conv" in layer.name}
-        self.indexed_layers = [layer for layer in self.vgg16 if "conv1" in layer.name]
-        self.desired = [layer.name for layer in self.vgg16 if "conv1" in layer.name]
-
-        self.vgg16 = tf.keras.Sequential(self.vgg16, name="vgg")
+        image = tf.image.convert_image_dtype(content_image, tf.float32)
+        self.x = tf.Variable([image])
+
+        self.content_weight = hp.alpha
+        self.style_weight = hp.beta
+
+        self.photo_layers = ['block5_conv2']
+        self.style_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1']
+       
+        self.num_photo_layers = len(self.photo_layers)
+        self.num_style_layers = len(self.style_layers)
+
+        self.optimizer = tf.keras.optimizers.Adam(learning_rate=hp.learning_rate, beta_1=hp.beta_1, epsilon=hp.epsilon)
+
+        self.vgg16 = tf.keras.applications.VGG16(include_top=False, weights='vgg16_imagenet.h5')
+        
+        self.vgg16.trainable = False
+
+       #  creating the Gram Matrix
+        p_output = self.vgg16.get_layer(self.photo_layers[0]).output
+        style_output = []
+        for layer in self.style_layers:
+               style_output.append(self.vgg16.get_layer(layer).output)
+
+        G = [get_gram(x) for x in style_output]
+
+        self.vgg16 = tf.keras.Model([self.vgg16.input], [p_output, G])
+
+       #  figure this out Michael
+        img_to_np = lambda img: np.array([img * 255])
+
+        self.content_target = self.vgg16(img_to_np(content_image))[0]
+        self.style_target = self.vgg16(img_to_np(style_image))[1]
 
         # create a map of the layers to their corresponding number of filters if it is a convolutional layer
 
     def call(self, x):
-        layers = []
-        for layer in self.vgg16.layers:
-            # pass the x through
-            x = layer(x)
-       #      print("Sotech117 is so so sus")
-
-            # save the output of each layer if it is in the desired list
-            if layer.name in self.desired:
-              layers.append(x)
-            
-        return x, layers
-
-    def loss_fn(self, p, a, x):
-       # print(p)
-       if(self.photo_layers == None):
-              _, self.photo_layers = self.call(p)
-       # print(a)
-       if(self.art_layers == None): 
-              _, self.art_layers = self.call(a)
-       # print(x) 
-       _, input_layers = self.call(x)
+       # call onto our pretrained network, since we don't have a classifcation head to follow
+       x = self.vgg16(x * 255)
+       return x
+
+    def loss_fn(self, x):
+       x = self.call(x)
          
 
-       content_l = self.content_loss(self.photo_layers, input_layers)
-       style_l = self.style_loss(self.art_layers, input_layers)
-        # Equation 7
-       print('style_loss', style_l) 
-       print('content_loss', content_l) 
-       return (self.alpha * content_l) + (self.beta * style_l)
+       content_l = self.content_loss(x[0], self.content_target)
+       style_l = self.style_loss(x[1], self.style_target)
+       return (self.content_weight * content_l) + (self.style_weight * style_l)
         
     def content_loss(self, photo_layers, input_layers):
-        L_content = tf.constant(0.0)
-        for i in range(len(photo_layers)):
-              pl = photo_layers[i]
-              il = input_layers[i]
-              L_content = tf.math.add(L_content, tf.reduce_sum(tf.square(pl - il)))
-       
-        #print('content loss', L_content)
-        return L_content
-    
-    def layer_loss(self, art_layer, input_layer):
-       # vectorize the art_layers
-       art_layer = tf.reshape(art_layer, (-1, art_layer.shape[-1]))
-       # # vectorize the input_layers
-       input_layer = tf.reshape(input_layer, (-1, input_layer.shape[-1]))
-       # get the gram matrices
-       G_l = tf.matmul(tf.transpose(input_layer), input_layer)
-       A_l = tf.matmul(tf.transpose(art_layer), art_layer)
-       
+       return tf.reduce_mean(tf.square(photo_layers - input_layers))
 
-       # vals = []
-       # for i in range(input_dim):
-       #        vals_i = []
-       #        for j in range(input_dim):
-       #               il = tf.reshape(input_layers[i], [-1])
-       #               al = tf.reshape(art_layers[j], [-1])
-       #               k = tf.reduce_sum(tf.multiply(il, al))
-       #               vals_i.append(k)
-       #        vals.append(tf.stack(vals_i))
-       # G = tf.stack(vals)
-
-       # get the loss per each lateral layer
-       # N depends on # of filters in the layer, M depends on hight and width of feature map
-       # M_l = art_layer.shape[0]
-       # N_l = art_layer.shape[1]
-       M_l = self.content_image.shape[0]*self.content_image.shape[1]
-       N_l = art_layer.shape[0]
-       
-       # layer.filters might not work
-       E_l = 1/4 * (M_l**(-2)) *(N_l**(-2)) * tf.reduce_sum(tf.square(G_l - A_l))
+    def style_loss(self, art_layers, input_layers):
+       layer_losses = []
+       for created, target in zip(art_layers, input_layers):
+              reduced = tf.reduce_mean(tf.square(created - target))
+              layer_losses.append(reduced)
+       return tf.add_n(layer_losses)
 
-       # while Sotech is botty:
-       #         Jayson_tatum.tear_acl()
-       #         return ("this is just another day")
-       #print('Layer loss', E_l)
-       return E_l
 
-    def style_loss(self, art_layers, input_layers):
-        L_style = tf.constant(0.0)
-        for i in range(len(art_layers)):
-            art_layer = art_layers[i]
-            input_layer = input_layers[i]
-            L_style = tf.math.add(L_style, (1/5)*self.layer_loss(art_layer, input_layer))
-        #print('style loss', L_style)
-        return L_style
-            
-    def train_step(self):
+    def train_step(self, epoch):
        with tf.GradientTape(watch_accessed_variables=False) as tape:
               tape.watch(self.x)
-              loss = self.loss_fn(self.content_image, self.style_image, self.x)
-       print('loss', loss)
-       #print('self.x', self.x)
-       gradients = tape.gradient(loss, [self.x])
-       #print('gradients', gradients)
-       self.optimizer.apply_gradients(zip(gradients, [self.x]))
+              # loss = self.loss_fn(self.content_image, self.style_image, self.x)
+              loss = self.loss_fn(self.x)
+              print('\rEpoch {}: Loss: {:.4f}'.format(epoch, loss), end='')
+              gradients = tape.gradient(loss, self.x)
+       self.optimizer.apply_gradients([(gradients, self.x)])
+       self.x.assign(tf.clip_by_value(self.x, clip_value_min=0.0, clip_value_max=1.0))
+