diff options
author | A.J. Shulman <Shulman.aj@gmail.com> | 2024-08-18 10:12:35 -0400 |
---|---|---|
committer | A.J. Shulman <Shulman.aj@gmail.com> | 2024-08-18 10:12:35 -0400 |
commit | 2c38022a7f21d4b498277b18ad31baf24ac3a143 (patch) | |
tree | 006e70734530ad5cc9e08a3cadea200cceefdba5 /src/client/views/nodes/ChatBox/StreamParser.ts | |
parent | daa72b906e3364c2b6a836533fc1980bb63ba303 (diff) |
Attempting streaming content
Diffstat (limited to 'src/client/views/nodes/ChatBox/StreamParser.ts')
-rw-r--r-- | src/client/views/nodes/ChatBox/StreamParser.ts | 125 |
1 files changed, 125 insertions, 0 deletions
diff --git a/src/client/views/nodes/ChatBox/StreamParser.ts b/src/client/views/nodes/ChatBox/StreamParser.ts new file mode 100644 index 000000000..9b087663a --- /dev/null +++ b/src/client/views/nodes/ChatBox/StreamParser.ts @@ -0,0 +1,125 @@ +import { AssistantMessage, ASSISTANT_ROLE, TEXT_TYPE, Citation, CHUNK_TYPE } from './types'; +import { v4 as uuidv4 } from 'uuid'; + +export class StreamParser { + private currentMessage: AssistantMessage; + private currentTag: string | null = null; + private buffer: string = ''; + private citationIndex: number = 1; + + constructor() { + this.currentMessage = { + role: ASSISTANT_ROLE.ASSISTANT, + content: [], + thoughts: [], + actions: [], + citations: [], + }; + } + + parse(chunk: string): AssistantMessage { + this.buffer += chunk; + + while (this.buffer.length > 0) { + if (this.currentTag === null) { + const openTagMatch = this.buffer.match(/<(\w+)>/); + if (openTagMatch) { + this.currentTag = openTagMatch[1]; + this.buffer = this.buffer.slice(openTagMatch.index! + openTagMatch[0].length); + } else { + break; + } + } else { + const closeTagIndex = this.buffer.indexOf(`</${this.currentTag}>`); + if (closeTagIndex !== -1) { + const content = this.buffer.slice(0, closeTagIndex); + this.processTag(this.currentTag, content); + this.buffer = this.buffer.slice(closeTagIndex + this.currentTag.length + 3); + this.currentTag = null; + } else { + break; + } + } + } + + return this.currentMessage; + } + + private processTag(tag: string, content: string) { + switch (tag) { + case 'thought': + this.currentMessage.thoughts!.push(content); + break; + case 'action': + this.currentMessage.actions!.push({ index: this.currentMessage.actions!.length, action: content, action_input: '' }); + break; + case 'action_input': + if (this.currentMessage.actions!.length > 0) { + this.currentMessage.actions![this.currentMessage.actions!.length - 1].action_input = content; + } + break; + case 'answer': + this.processAnswer(content); + break; + } + } + + private processAnswer(content: string) { + const groundedTextRegex = /<grounded_text citation_index="([^"]+)">([\s\S]*?)<\/grounded_text>/g; + let lastIndex = 0; + let match; + + while ((match = groundedTextRegex.exec(content)) !== null) { + const [fullMatch, citationIndex, groundedText] = match; + + // Add normal text before the grounded text + if (match.index > lastIndex) { + const normalText = content.slice(lastIndex, match.index).trim(); + if (normalText) { + this.currentMessage.content.push({ + index: this.currentMessage.content.length, + type: TEXT_TYPE.NORMAL, + text: normalText, + citation_ids: null, + }); + } + } + + // Add grounded text + const citation_id = uuidv4(); + this.currentMessage.content.push({ + index: this.currentMessage.content.length, + type: TEXT_TYPE.GROUNDED, + text: groundedText.trim(), + citation_ids: [citation_id], + }); + + // Add citation + this.currentMessage.citations!.push({ + citation_id, + chunk_id: '', + type: CHUNK_TYPE.TEXT, + direct_text: '', + }); + + lastIndex = match.index + fullMatch.length; + } + + // Add any remaining normal text after the last grounded text + if (lastIndex < content.length) { + const remainingText = content.slice(lastIndex).trim(); + if (remainingText) { + this.currentMessage.content.push({ + index: this.currentMessage.content.length, + type: TEXT_TYPE.NORMAL, + text: remainingText, + citation_ids: null, + }); + } + } + } + + getResult(): AssistantMessage { + return this.currentMessage; + } +} |