diff options
author | David Doan <daviddoan@davids-mbp-3.devices.brown.edu> | 2022-05-09 00:25:39 -0400 |
---|---|---|
committer | David Doan <daviddoan@davids-mbp-3.devices.brown.edu> | 2022-05-09 00:25:39 -0400 |
commit | a0870ac3f1f84278c5b9fe7f78f6b1af1d1f33e9 (patch) | |
tree | 440f9c17f23042e32c7c495c49d36bb838fcd73a /losses.py | |
parent | 18f1f7bddcb63502120581f3fa24b980559ffa9f (diff) |
clean and refactor code for submission
Diffstat (limited to 'losses.py')
-rw-r--r-- | losses.py | 229 |
1 files changed, 70 insertions, 159 deletions
@@ -5,182 +5,93 @@ from tensorflow.keras.layers import \ from skimage import transform import hyperparameters as hp +def get_gram(style_output): + style_shape = tf.shape(style_output) + output = tf.linalg.einsum('bijc,bijd->bcd', style_output, style_output) + dimensions = style_shape[1] * style_shape[2] + dimensions = tf.cast(dimensions, tf.float32) + return output / dimensions + + class YourModel(tf.keras.Model): """ Your own neural network model. """ def __init__(self, content_image, style_image): #normalize these images to float values super(YourModel, self).__init__() - self.content_image = transform.resize(content_image, tf.shape(style_image), anti_aliasing=True, preserve_range=True, clip=False) - self.content_image = tf.expand_dims(self.content_image, axis=0) - # self.content_image = self.content_image.astype('uint8') - print(self.content_image) + self.content_image = transform.resize(content_image, tf.shape(style_image), anti_aliasing=True, preserve_range=True) + self.content_image = tf.image.convert_image_dtype(self.content_image, tf.float32) + + self.style_image = transform.resize(style_image, tf.shape(style_image), anti_aliasing=True, preserve_range=True) + self.style_image = tf.image.convert_image_dtype(self.style_image, tf.float32) - #perhaps consider cropping to avoid distortion - self.style_image = transform.resize(style_image, tf.shape(style_image), anti_aliasing=True, preserve_range=True, clip=False) - self.style_image = tf.expand_dims(self.style_image, axis=0) - # self.style_image = self.style_image.astype('uint8') - #self.x = tf.Variable(initial_value = self.content_image.numpy().astype(np.float32), trainable=True) - self.x = tf.Variable(initial_value = np.random.rand(self.content_image.shape[0], - self.content_image.shape[1], self.content_image.shape[2], self.content_image.shape[3]).astype(np.float32), trainable=True) - # self.x = self.x.astype('uint8') - - self.alpha = hp.alpha - self.beta = hp.beta - - self.photo_layers = None - self.art_layers = None - - - - #(self.x.shape) - - #print(self.content_image.shape, self.style_image.shape) - - self.optimizer = tf.keras.optimizers.Adam(hp.learning_rate) - - self.vgg16 = [ - # Block 1 - Conv2D(64, 3, 1, padding="same", - activation="relu", name="block1_conv1"), - Conv2D(64, 3, 1, padding="same", - activation="relu", name="block1_conv2"), - AveragePooling2D(2, name="block1_pool"), - # Block 2 - Conv2D(128, 3, 1, padding="same", - activation="relu", name="block2_conv1"), - Conv2D(128, 3, 1, padding="same", - activation="relu", name="block2_conv2"), - AveragePooling2D(2, name="block2_pool"), - # Block 3 - Conv2D(256, 3, 1, padding="same", - activation="relu", name="block3_conv1"), - Conv2D(256, 3, 1, padding="same", - activation="relu", name="block3_conv2"), - Conv2D(256, 3, 1, padding="same", - activation="relu", name="block3_conv3"), - AveragePooling2D(2, name="block3_pool"), - # Block 4 - Conv2D(512, 3, 1, padding="same", - activation="relu", name="block4_conv1"), - Conv2D(512, 3, 1, padding="same", - activation="relu", name="block4_conv2"), - Conv2D(512, 3, 1, padding="same", - activation="relu", name="block4_conv3"), - AveragePooling2D(2, name="block4_pool"), - # Block 5 - Conv2D(512, 3, 1, padding="same", - activation="relu", name="block5_conv1"), - Conv2D(512, 3, 1, padding="same", - activation="relu", name="block5_conv2"), - Conv2D(512, 3, 1, padding="same", - activation="relu", name="block5_conv3"), - AveragePooling2D(2, name="block5_pool"), - ] - for layer in self.vgg16: - layer.trainable = False - - self.layer_to_filters = {layer.name: layer.filters for layer in self.vgg16 if "conv" in layer.name} - self.indexed_layers = [layer for layer in self.vgg16 if "conv1" in layer.name] - self.desired = [layer.name for layer in self.vgg16 if "conv1" in layer.name] - - self.vgg16 = tf.keras.Sequential(self.vgg16, name="vgg") + image = tf.image.convert_image_dtype(content_image, tf.float32) + self.x = tf.Variable([image]) + + self.content_weight = hp.alpha + self.style_weight = hp.beta + + self.photo_layers = ['block5_conv2'] + self.style_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1'] + + self.num_photo_layers = len(self.photo_layers) + self.num_style_layers = len(self.style_layers) + + self.optimizer = tf.keras.optimizers.Adam(learning_rate=hp.learning_rate, beta_1=hp.beta_1, epsilon=hp.epsilon) + + self.vgg16 = tf.keras.applications.VGG16(include_top=False, weights='vgg16_imagenet.h5') + + self.vgg16.trainable = False + + # creating the Gram Matrix + p_output = self.vgg16.get_layer(self.photo_layers[0]).output + style_output = [] + for layer in self.style_layers: + style_output.append(self.vgg16.get_layer(layer).output) + + G = [get_gram(x) for x in style_output] + + self.vgg16 = tf.keras.Model([self.vgg16.input], [p_output, G]) + + # figure this out Michael + img_to_np = lambda img: np.array([img * 255]) + + self.content_target = self.vgg16(img_to_np(content_image))[0] + self.style_target = self.vgg16(img_to_np(style_image))[1] # create a map of the layers to their corresponding number of filters if it is a convolutional layer def call(self, x): - layers = [] - for layer in self.vgg16.layers: - # pass the x through - x = layer(x) - # print("Sotech117 is so so sus") - - # save the output of each layer if it is in the desired list - if layer.name in self.desired: - layers.append(x) - - return x, layers - - def loss_fn(self, p, a, x): - # print(p) - if(self.photo_layers == None): - _, self.photo_layers = self.call(p) - # print(a) - if(self.art_layers == None): - _, self.art_layers = self.call(a) - # print(x) - _, input_layers = self.call(x) + # call onto our pretrained network, since we don't have a classifcation head to follow + x = self.vgg16(x * 255) + return x + + def loss_fn(self, x): + x = self.call(x) - content_l = self.content_loss(self.photo_layers, input_layers) - style_l = self.style_loss(self.art_layers, input_layers) - # Equation 7 - print('style_loss', style_l) - print('content_loss', content_l) - return (self.alpha * content_l) + (self.beta * style_l) + content_l = self.content_loss(x[0], self.content_target) + style_l = self.style_loss(x[1], self.style_target) + return (self.content_weight * content_l) + (self.style_weight * style_l) def content_loss(self, photo_layers, input_layers): - L_content = tf.constant(0.0) - for i in range(len(photo_layers)): - pl = photo_layers[i] - il = input_layers[i] - L_content = tf.math.add(L_content, tf.reduce_sum(tf.square(pl - il))) - - #print('content loss', L_content) - return L_content - - def layer_loss(self, art_layer, input_layer): - # vectorize the art_layers - art_layer = tf.reshape(art_layer, (-1, art_layer.shape[-1])) - # # vectorize the input_layers - input_layer = tf.reshape(input_layer, (-1, input_layer.shape[-1])) - # get the gram matrices - G_l = tf.matmul(tf.transpose(input_layer), input_layer) - A_l = tf.matmul(tf.transpose(art_layer), art_layer) - + return tf.reduce_mean(tf.square(photo_layers - input_layers)) - # vals = [] - # for i in range(input_dim): - # vals_i = [] - # for j in range(input_dim): - # il = tf.reshape(input_layers[i], [-1]) - # al = tf.reshape(art_layers[j], [-1]) - # k = tf.reduce_sum(tf.multiply(il, al)) - # vals_i.append(k) - # vals.append(tf.stack(vals_i)) - # G = tf.stack(vals) - - # get the loss per each lateral layer - # N depends on # of filters in the layer, M depends on hight and width of feature map - # M_l = art_layer.shape[0] - # N_l = art_layer.shape[1] - M_l = self.content_image.shape[0]*self.content_image.shape[1] - N_l = art_layer.shape[0] - - # layer.filters might not work - E_l = 1/4 * (M_l**(-2)) *(N_l**(-2)) * tf.reduce_sum(tf.square(G_l - A_l)) + def style_loss(self, art_layers, input_layers): + layer_losses = [] + for created, target in zip(art_layers, input_layers): + reduced = tf.reduce_mean(tf.square(created - target)) + layer_losses.append(reduced) + return tf.add_n(layer_losses) - # while Sotech is botty: - # Jayson_tatum.tear_acl() - # return ("this is just another day") - #print('Layer loss', E_l) - return E_l - def style_loss(self, art_layers, input_layers): - L_style = tf.constant(0.0) - for i in range(len(art_layers)): - art_layer = art_layers[i] - input_layer = input_layers[i] - L_style = tf.math.add(L_style, (1/5)*self.layer_loss(art_layer, input_layer)) - #print('style loss', L_style) - return L_style - - def train_step(self): + def train_step(self, epoch): with tf.GradientTape(watch_accessed_variables=False) as tape: tape.watch(self.x) - loss = self.loss_fn(self.content_image, self.style_image, self.x) - print('loss', loss) - #print('self.x', self.x) - gradients = tape.gradient(loss, [self.x]) - #print('gradients', gradients) - self.optimizer.apply_gradients(zip(gradients, [self.x])) + # loss = self.loss_fn(self.content_image, self.style_image, self.x) + loss = self.loss_fn(self.x) + print('\rEpoch {}: Loss: {:.4f}'.format(epoch, loss), end='') + gradients = tape.gradient(loss, self.x) + self.optimizer.apply_gradients([(gradients, self.x)]) + self.x.assign(tf.clip_by_value(self.x, clip_value_min=0.0, clip_value_max=1.0)) + |