import { AssistantMessage, ASSISTANT_ROLE, TEXT_TYPE, Citation, CHUNK_TYPE } from './types'; import { v4 as uuidv4 } from 'uuid'; export class StreamParser { private currentMessage: AssistantMessage; private currentTag: string | null = null; private buffer: string = ''; private citationIndex: number = 1; constructor() { this.currentMessage = { role: ASSISTANT_ROLE.ASSISTANT, content: [], thoughts: [], actions: [], citations: [], }; } parse(chunk: string): AssistantMessage { this.buffer += chunk; while (this.buffer.length > 0) { if (this.currentTag === null) { const openTagMatch = this.buffer.match(/<(\w+)>/); if (openTagMatch) { this.currentTag = openTagMatch[1]; this.buffer = this.buffer.slice(openTagMatch.index! + openTagMatch[0].length); } else { break; } } else { const closeTagIndex = this.buffer.indexOf(``); if (closeTagIndex !== -1) { const content = this.buffer.slice(0, closeTagIndex); this.processTag(this.currentTag, content); this.buffer = this.buffer.slice(closeTagIndex + this.currentTag.length + 3); this.currentTag = null; } else { break; } } } return this.currentMessage; } private processTag(tag: string, content: string) { switch (tag) { case 'thought': this.currentMessage.thoughts!.push(content); break; case 'action': this.currentMessage.actions!.push({ index: this.currentMessage.actions!.length, action: content, action_input: '' }); break; case 'action_input': if (this.currentMessage.actions!.length > 0) { this.currentMessage.actions![this.currentMessage.actions!.length - 1].action_input = content; } break; case 'answer': this.processAnswer(content); break; } } private processAnswer(content: string) { const groundedTextRegex = /([\s\S]*?)<\/grounded_text>/g; let lastIndex = 0; let match; while ((match = groundedTextRegex.exec(content)) !== null) { const [fullMatch, citationIndex, groundedText] = match; // Add normal text before the grounded text if (match.index > lastIndex) { const normalText = content.slice(lastIndex, match.index).trim(); if (normalText) { this.currentMessage.content.push({ index: this.currentMessage.content.length, type: TEXT_TYPE.NORMAL, text: normalText, citation_ids: null, }); } } // Add grounded text const citation_id = uuidv4(); this.currentMessage.content.push({ index: this.currentMessage.content.length, type: TEXT_TYPE.GROUNDED, text: groundedText.trim(), citation_ids: [citation_id], }); // Add citation this.currentMessage.citations!.push({ citation_id, chunk_id: '', type: CHUNK_TYPE.TEXT, direct_text: '', }); lastIndex = match.index + fullMatch.length; } // Add any remaining normal text after the last grounded text if (lastIndex < content.length) { const remainingText = content.slice(lastIndex).trim(); if (remainingText) { this.currentMessage.content.push({ index: this.currentMessage.content.length, type: TEXT_TYPE.NORMAL, text: remainingText, citation_ids: null, }); } } } getResult(): AssistantMessage { return this.currentMessage; } }