1 files changed, 108 insertions, 10 deletions
diff --git a/src/client/apis/gpt/GPT.ts b/src/client/apis/gpt/GPT.ts
index fe9aa8ded..e743f01f5 100644
--- a/src/client/apis/gpt/GPT.ts
+++ b/src/client/apis/gpt/GPT.ts
@@ -12,15 +12,17 @@ enum GPTCallType {
     DESCRIBE = 'describe',
     MERMAID = 'mermaid',
     DATA = 'data',
+    DRAW = 'draw',
+    COLOR = 'color',
     RUBRIC = 'rubric',
     TYPE = 'type',
     SUBSET = 'subset',
     INFO = 'info',
-    TEMPLATE = "template",
+    TEMPLATE = 'template',
     VIZSUM = 'vizsum',
     VIZSUM2 = 'vizsum2',
     FILL = 'fill',
-    COMPLETEPROMPT = 'completeprompt'
+    COMPLETEPROMPT = 'completeprompt',
 }
 
 type GPTCallOpts = {
@@ -63,11 +65,43 @@ const callTypeMap: { [type: string]: GPTCallOpts } = {
         temp: 0,
         prompt: 'List unique differences between the content of the UserAnswer and Rubric. Before each difference, label it and provide any additional information the UserAnswer missed and explain it in second person without separating it into UserAnswer and Rubric content and additional information. If there are no differences, say correct',
     },
-    template: { model: 'gpt-4-turbo', maxTokens: 512, temp: 0.5, prompt: 'You will be given a list of field descriptions for multiple templates in the format {field #0: “description”}{field #1: “description”}{...}, and a list of column descriptions in the format {“title”: “description”}{...}. Your job is to match columns with fields based on their descriptions. Your output should be in the following JSON format: {“Template title”:{“#”: “title”, “#”: “title”, “#”: “title” …}, “Template title”:{“#”: “title”, “#”: “title”, “#”: “title” …}} where “Template title” represents the template,  # represents the field # and “title” the title of the column assigned to it. A filled out example might look like {“fivefield2”:{“0”:”Name”, “1”:”Image”, “2”:”Caption”, “3”:”Position”, “4”:”Stats”}, “fivefield3”:{0:”Image”, 1:”Name”, 2:”Caption”, 3:”Stats”, 4:”Position”}. Include one object for each template. IT IS VERY IMPORTANT THAT YOU ONLY INCLUDE TEXT IN THE FORMAT ABOVE, WITH NO ADDITIONS WHATSOEVER. Do not include extraneous ‘#’ characters, ‘column’ for columns, or ‘template’ for templates: ONLY THE TITLES AND NUMBERS. There should never be one column assigned to more than one field (ie. if the “name” column is assigned to field 1, it can’t be assigned to any other fields) . Do this for each template whose fields are described. The descriptions are as follows:' },
-    vizsum: { model: 'gpt-4-turbo', maxTokens: 512, temp: 0.5, prompt: 'Your job is to provide brief descriptions for columns in a dataset based on example rows. Your descriptions should be geared towards how each column’s data might fit together into a visual template. Would they make good titles, main focuses, captions, descriptions, etc. Pay special attention to connections between columns, i.e. is there one column that specifically seems to describe/be related to another more than the rest? You should provide your analysis in JSON format like so: {“col1”:”description”, “col2”:”description”, …}. DO NOT INCLUDE ANY OTHER TEXT, ONLY THE JSON.'},
-    vizsum2: { model: 'gpt-4-turbo', maxTokens: 512, temp: 0.5, prompt: 'Your job is to provide structured information on columns in a dataset based on example rows. You will categorize each column in two ways: by type and size. The size categories are as follows: tiny (one or two words), small (a sentence/multiple words), medium (a few sentences), large (a longer paragraph), and huge (a very long or multiple paragraphs). The type categories are as follows: visual (links/file paths to images, pdfs, maps, or any other visual media type), and text (plain text that isn’t a link/file path). Visual media should be assumed to have size “medium” “large” or “huge”. You will give your responses in JSON format, like so: {“title (of column)”:{“type”:”text”, “size”:”small”}, “title (of column)”:{“type”:”visual”, “size”:”medium”}, …}. DO NOT INCLUDE ANY OTHER TEXT, ONLY THE JSON.'},
-    fill: { model: 'gpt-4-turbo', maxTokens: 512, temp: 0.5, prompt: 'Your job is to generate content for fields based on a user prompt and background context given to you. You will be given the content of the other fields present in the format: ---- Field # (field title): content ---- Field # (field title): content ----- (etc.) You will be given info on the columns to generate for in the format ---- title: , prompt: , word limit: , assigned field: ----. For each column, based on the prompt, word limit, and the context of existing fields, you should generate a short response in the following JSON format: {“___”(where ___ is the title from the column description with no additions): {“number”:”#” (where # is the assigned field of the column), “content”:”response” (where response is your response to the prompt in the column info)}}. Here’s another example of the format with only one column: {“position”: {“number”:”2”, “content”:”*your response goes here*”}}. ONLY INCLUDE THE JSON TEXT WITH NO OTHER ADDED TEXT. YOUR RESPONSE MUST BE VALID JSON. The word limit for each column applies only to that column’s response. Do not include speculation or information that you can’t glean from your factual knowledge or the content of the other fields (no description of images you can’t see, for example). You should include one object per column you are provided info on.'},
-    completeprompt: {model: 'gpt-4-turbo', maxTokens: 512, temp: 0.5, prompt: 'Your prompt is as follows:'},
+    template: {
+        model: 'gpt-4-turbo',
+        maxTokens: 512,
+        temp: 0.5,
+        prompt: 'You will be given a list of field descriptions for multiple templates in the format {field #0: “description”}{field #1: “description”}{...}, and a list of column descriptions in the format {“title”: “description”}{...}. Your job is to match columns with fields based on their descriptions. Your output should be in the following JSON format: {“Template title”:{“#”: “title”, “#”: “title”, “#”: “title” …}, “Template title”:{“#”: “title”, “#”: “title”, “#”: “title” …}} where “Template title” represents the template,  # represents the field # and “title” the title of the column assigned to it. A filled out example might look like {“fivefield2”:{“0”:”Name”, “1”:”Image”, “2”:”Caption”, “3”:”Position”, “4”:”Stats”}, “fivefield3”:{0:”Image”, 1:”Name”, 2:”Caption”, 3:”Stats”, 4:”Position”}. Include one object for each template. IT IS VERY IMPORTANT THAT YOU ONLY INCLUDE TEXT IN THE FORMAT ABOVE, WITH NO ADDITIONS WHATSOEVER. Do not include extraneous ‘#’ characters, ‘column’ for columns, or ‘template’ for templates: ONLY THE TITLES AND NUMBERS. There should never be one column assigned to more than one field (ie. if the “name” column is assigned to field 1, it can’t be assigned to any other fields) . Do this for each template whose fields are described. The descriptions are as follows:',
+    },
+    vizsum: {
+        model: 'gpt-4-turbo',
+        maxTokens: 512,
+        temp: 0.5,
+        prompt: 'Your job is to provide brief descriptions for columns in a dataset based on example rows. Your descriptions should be geared towards how each column’s data might fit together into a visual template. Would they make good titles, main focuses, captions, descriptions, etc. Pay special attention to connections between columns, i.e. is there one column that specifically seems to describe/be related to another more than the rest? You should provide your analysis in JSON format like so: {“col1”:”description”, “col2”:”description”, …}. DO NOT INCLUDE ANY OTHER TEXT, ONLY THE JSON.',
+    },
+    vizsum2: {
+        model: 'gpt-4-turbo',
+        maxTokens: 512,
+        temp: 0.5,
+        prompt: 'Your job is to provide structured information on columns in a dataset based on example rows. You will categorize each column in two ways: by type and size. The size categories are as follows: tiny (one or two words), small (a sentence/multiple words), medium (a few sentences), large (a longer paragraph), and huge (a very long or multiple paragraphs). The type categories are as follows: visual (links/file paths to images, pdfs, maps, or any other visual media type), and text (plain text that isn’t a link/file path). Visual media should be assumed to have size “medium” “large” or “huge”. You will give your responses in JSON format, like so: {“title (of column)”:{“type”:”text”, “size”:”small”}, “title (of column)”:{“type”:”visual”, “size”:”medium”}, …}. DO NOT INCLUDE ANY OTHER TEXT, ONLY THE JSON.',
+    },
+    fill: {
+        model: 'gpt-4-turbo',
+        maxTokens: 512,
+        temp: 0.5,
+        prompt: 'Your job is to generate content for fields based on a user prompt and background context given to you. You will be given the content of the other fields present in the format: ---- Field # (field title): content ---- Field # (field title): content ----- (etc.) You will be given info on the columns to generate for in the format ---- title: , prompt: , word limit: , assigned field: ----. For each column, based on the prompt, word limit, and the context of existing fields, you should generate a short response in the following JSON format: {“___”(where ___ is the title from the column description with no additions): {“number”:”#” (where # is the assigned field of the column), “content”:”response” (where response is your response to the prompt in the column info)}}. Here’s another example of the format with only one column: {“position”: {“number”:”2”, “content”:”*your response goes here*”}}. ONLY INCLUDE THE JSON TEXT WITH NO OTHER ADDED TEXT. YOUR RESPONSE MUST BE VALID JSON. The word limit for each column applies only to that column’s response. Do not include speculation or information that you can’t glean from your factual knowledge or the content of the other fields (no description of images you can’t see, for example). You should include one object per column you are provided info on.',
+    },
+    completeprompt: { model: 'gpt-4-turbo', maxTokens: 512, temp: 0.5, prompt: 'Your prompt is as follows:' },
+    draw: {
+        model: 'gpt-4o',
+        maxTokens: 1024,
+        temp: 0.8,
+        prompt: 'Given an item, a level of complexity from 1-10, and a size in pixels, generate a detailed and colored line drawing representation of it. Make sure every element has the stroke field filled out. More complex drawings will have much more detail and strokes. The drawing should be in SVG format with no additional text or comments. For path coordinates, make sure you format with a comma between numbers, like M100,200 C150,250 etc. The only supported commands are line, ellipse, circle, rect, polygon, and path with M, Q, C, and L so only use those.',
+    },
+    color: {
+        model: 'gpt-4o',
+        maxTokens: 1024,
+        temp: 0.5,
+        prompt: 'You will be coloring drawings. You will be given what the drawing is, then a list of descriptions for parts of the drawing. Based on each description, respond with the stroke and fill color that it should be. Follow the rules: 1. Avoid using black for stroke color 2. Make the stroke color 1-3 shades darker than the fill color 3. Use the same colors when possible. Format as {#abcdef #abcdef}, making sure theres a color for each description, and do not include any additional text.',
+    },
 };
 let lastCall = '';
 let lastResp = '';
@@ -77,10 +111,10 @@ let lastResp = '';
  * @param inputText Text to process
  * @returns AI Output
  */
-const gptAPICall = async (inputTextIn: string, callType: GPTCallType, prompt?: string) => {
+const gptAPICall = async (inputTextIn: string, callType: GPTCallType, prompt?: any, dontCache?: boolean) => {
     const inputText = [GPTCallType.SUMMARY, GPTCallType.FLASHCARD, GPTCallType.QUIZ].includes(callType) ? inputTextIn + '.' : inputTextIn;
     const opts: GPTCallOpts = callTypeMap[callType];
-    if (lastCall === inputText) return lastResp;
+    if (lastCall === inputText && dontCache !== true) return lastResp;
     try {
         lastCall = inputText;
 
@@ -163,5 +197,69 @@ const gptImageLabel = async (src: string): Promise<string> => {
         return 'Error connecting with API';
     }
 };
+const gptHandwriting = async (src: string): Promise<string> => {
+    try {
+        const response = await openai.chat.completions.create({
+            model: 'gpt-4o',
+            temperature: 0,
+            messages: [
+                {
+                    role: 'user',
+                    content: [
+                        { type: 'text', text: 'What is this does this handwriting say. Only return the text' },
+                        {
+                            type: 'image_url',
+                            image_url: {
+                                url: `${src}`,
+                                detail: 'low',
+                            },
+                        },
+                    ],
+                },
+            ],
+        });
+        if (response.choices[0].message.content) {
+            return response.choices[0].message.content;
+        }
+        return 'Missing labels';
+    } catch (err) {
+        console.log(err);
+        return 'Error connecting with API';
+    }
+};
+
+const gptDrawingColor = async (image: string, coords: string[]): Promise<string> => {
+    try {
+        const response = await openai.chat.completions.create({
+            model: 'gpt-4o',
+            temperature: 0,
+            messages: [
+                {
+                    role: 'user',
+                    content: [
+                        {
+                            type: 'text',
+                            text: `Identify what the drawing in the image represents in 1-5 words. Then, given a list of a list of coordinates, where each list is the coordinates for one stroke of the drawing, determine which part of the drawing it is. Return just what the item it is, followed by ~~~ then only your descriptions in a list like [description, description, ...]. Here are the coordinates: ${coords}`,
+                        },
+                        {
+                            type: 'image_url',
+                            image_url: {
+                                url: `${image}`,
+                                detail: 'low',
+                            },
+                        },
+                    ],
+                },
+            ],
+        });
+        if (response.choices[0].message.content) {
+            return response.choices[0].message.content;
+        }
+        return 'Missing labels';
+    } catch (err) {
+        console.log(err);
+        return 'Error connecting with API';
+    }
+};
 
-export { gptAPICall, gptImageCall, GPTCallType, gptImageLabel, gptGetEmbedding };
+export { gptAPICall, gptImageCall, GPTCallType, gptImageLabel, gptGetEmbedding, gptHandwriting, gptDrawingColor };