diff options
-rw-r--r-- | src/client/apis/vectorstore/VectorstoreUpload.ts | 112 | ||||
-rw-r--r-- | src/server/ApiManagers/AssistantManager.ts | 41 |
2 files changed, 137 insertions, 16 deletions
diff --git a/src/client/apis/vectorstore/VectorstoreUpload.ts b/src/client/apis/vectorstore/VectorstoreUpload.ts new file mode 100644 index 000000000..78f652d9a --- /dev/null +++ b/src/client/apis/vectorstore/VectorstoreUpload.ts @@ -0,0 +1,112 @@ +import * as dotenv from 'dotenv'; +import { Pinecone, ServerlessSpec } from '@pinecone-database/pinecone'; +import { Configuration, OpenAI } from 'openai'; +import * as fs from 'fs'; +import * as path from 'path'; +import { Document } from './file_processing'; // Assuming you have this file +import { getSummarizedSystemPrompt, getSummarizedChunksPrompt } from './prompt_generator'; // Assuming you have this file +import { CohereClient } from 'cohere-ai'; + +dotenv.config(); + +const pinecone = new Pinecone({ + apiKey: process.env.PINECONE_API_KEY || '', +}); + +interface ChunkMetaData { + text: string; + type: string; + original_document: string; + file_path: string; + location: string; + start_page: number; + end_page: number; +} + +interface Chunk { + id: string; + values: number[]; + metadata: ChunkMetaData; +} + +class Vectorstore { + private documents: Document[]; + private index_name: string; + private index: any; // Type this properly based on Pinecone's TypeScript definitions + private documents_folder: string; + + constructor() { + this.documents = []; + this.index_name = 'pdf-chatbot'; + this.index = this.createIndex(); + this.documents_folder = path.join('output', 'documents'); + fs.mkdirSync(this.documents_folder, { recursive: true }); + } + + addDocument(document: Document): void { + this.documents.push(document); + this.indexDocument(document); + } + + private async indexDocument(document: Document): Promise<void> { + console.log('Uploading vectors to content namespace..'); + await this.index.upsert(document.chunks); + } + + async retrieve(query: string, top_k: number = 10): Promise<Chunk[]> { + console.log(`Retrieving chunks for query: ${query}`); + + const cohere = new CohereClient({ + token: process.env.COHERE_API_KEY || '', + }); + + try { + const embedResponse = await cohere.embed({ + texts: [query], + model: 'embed-english-v3.0', + inputType: 'search_query', + }); + + const queryEmb = embedResponse.embeddings[0]; + + const queryResponse = await this.index.query({ + vector: queryEmb, + topK: top_k, + includeValues: true, + includeMetadata: true, + }); + + return queryResponse.matches as Chunk[]; + } catch (e) { + console.error(`Error embedding query: ${e}`); + return []; + } + } + + getSummaries(): string { + const summaries = this.documents.map(doc => doc.summary); + return summaries.map((summary, i) => `${i + 1}. ${summary}`).join('\n') + '\n'; + } + + private async createIndex(): Promise<any> { + const indexes = await pinecone.listIndexes(); + if (indexes.includes(this.index_name)) { + console.log('Index already exists...'); + } else { + await pinecone.createIndex({ + name: this.index_name, + dimension: 1024, + metric: 'cosine', + spec: { + serverless: { + cloud: 'aws', + region: 'us-east-1', + }, + }, + }); + } + return pinecone.Index(this.index_name); + } +} + +export { Vectorstore, Chunk, ChunkMetaData }; diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index f0ca983d7..77d8af724 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -147,21 +147,30 @@ export default class AssistantManager extends ApiManager { } }, }); - // register({ - // method: Method.POST, - // subscription: '/uploadPDF', - // secureHandler: async ({ req, res }) => { - // const { file_path } = req.body; - // const fullPath = path.join(publicDirectory, file_path); - // const fileData = createReadStream(fullPath); - // try { - // const response = await axios.post('http://localhost:8080/uploadPDF', { fileData }); - // res.send({ response: response }); - // } catch (error: any) { - // console.error('Error communicating with chatbot:', error); - // res.status(500).send({ error: 'Failed to communicate with the chatbot', details: error.message }); - // } - // }, - // }); + + register({ + method: Method.POST, + subscription: '/createDocument', + secureHandler: async ({ req, res }) => { + const { file_path } = req.body; + const public_path = path.join(publicDirectory, file_path); + const file_name = path.basename(file_path); + + try { + // Read file data and convert to base64 + const file_data = fs.readFileSync(public_path, { encoding: 'base64' }); + + const response = await axios.post('http://localhost:8080/createDocument', { + file_data, + file_name, + }); + + res.send({ document: response.data }); + } catch (error: any) { + console.error('Error communicating with chatbot:', error); + res.status(500).send({ error: 'Failed to communicate with the chatbot', details: error.message }); + } + }, + }); } } |