aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/client/apis/vectorstore/VectorstoreUpload.ts112
-rw-r--r--src/server/ApiManagers/AssistantManager.ts41
2 files changed, 137 insertions, 16 deletions
diff --git a/src/client/apis/vectorstore/VectorstoreUpload.ts b/src/client/apis/vectorstore/VectorstoreUpload.ts
new file mode 100644
index 000000000..78f652d9a
--- /dev/null
+++ b/src/client/apis/vectorstore/VectorstoreUpload.ts
@@ -0,0 +1,112 @@
+import * as dotenv from 'dotenv';
+import { Pinecone, ServerlessSpec } from '@pinecone-database/pinecone';
+import { Configuration, OpenAI } from 'openai';
+import * as fs from 'fs';
+import * as path from 'path';
+import { Document } from './file_processing'; // Assuming you have this file
+import { getSummarizedSystemPrompt, getSummarizedChunksPrompt } from './prompt_generator'; // Assuming you have this file
+import { CohereClient } from 'cohere-ai';
+
+dotenv.config();
+
+const pinecone = new Pinecone({
+ apiKey: process.env.PINECONE_API_KEY || '',
+});
+
+interface ChunkMetaData {
+ text: string;
+ type: string;
+ original_document: string;
+ file_path: string;
+ location: string;
+ start_page: number;
+ end_page: number;
+}
+
+interface Chunk {
+ id: string;
+ values: number[];
+ metadata: ChunkMetaData;
+}
+
+class Vectorstore {
+ private documents: Document[];
+ private index_name: string;
+ private index: any; // Type this properly based on Pinecone's TypeScript definitions
+ private documents_folder: string;
+
+ constructor() {
+ this.documents = [];
+ this.index_name = 'pdf-chatbot';
+ this.index = this.createIndex();
+ this.documents_folder = path.join('output', 'documents');
+ fs.mkdirSync(this.documents_folder, { recursive: true });
+ }
+
+ addDocument(document: Document): void {
+ this.documents.push(document);
+ this.indexDocument(document);
+ }
+
+ private async indexDocument(document: Document): Promise<void> {
+ console.log('Uploading vectors to content namespace..');
+ await this.index.upsert(document.chunks);
+ }
+
+ async retrieve(query: string, top_k: number = 10): Promise<Chunk[]> {
+ console.log(`Retrieving chunks for query: ${query}`);
+
+ const cohere = new CohereClient({
+ token: process.env.COHERE_API_KEY || '',
+ });
+
+ try {
+ const embedResponse = await cohere.embed({
+ texts: [query],
+ model: 'embed-english-v3.0',
+ inputType: 'search_query',
+ });
+
+ const queryEmb = embedResponse.embeddings[0];
+
+ const queryResponse = await this.index.query({
+ vector: queryEmb,
+ topK: top_k,
+ includeValues: true,
+ includeMetadata: true,
+ });
+
+ return queryResponse.matches as Chunk[];
+ } catch (e) {
+ console.error(`Error embedding query: ${e}`);
+ return [];
+ }
+ }
+
+ getSummaries(): string {
+ const summaries = this.documents.map(doc => doc.summary);
+ return summaries.map((summary, i) => `${i + 1}. ${summary}`).join('\n') + '\n';
+ }
+
+ private async createIndex(): Promise<any> {
+ const indexes = await pinecone.listIndexes();
+ if (indexes.includes(this.index_name)) {
+ console.log('Index already exists...');
+ } else {
+ await pinecone.createIndex({
+ name: this.index_name,
+ dimension: 1024,
+ metric: 'cosine',
+ spec: {
+ serverless: {
+ cloud: 'aws',
+ region: 'us-east-1',
+ },
+ },
+ });
+ }
+ return pinecone.Index(this.index_name);
+ }
+}
+
+export { Vectorstore, Chunk, ChunkMetaData };
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index f0ca983d7..77d8af724 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -147,21 +147,30 @@ export default class AssistantManager extends ApiManager {
}
},
});
- // register({
- // method: Method.POST,
- // subscription: '/uploadPDF',
- // secureHandler: async ({ req, res }) => {
- // const { file_path } = req.body;
- // const fullPath = path.join(publicDirectory, file_path);
- // const fileData = createReadStream(fullPath);
- // try {
- // const response = await axios.post('http://localhost:8080/uploadPDF', { fileData });
- // res.send({ response: response });
- // } catch (error: any) {
- // console.error('Error communicating with chatbot:', error);
- // res.status(500).send({ error: 'Failed to communicate with the chatbot', details: error.message });
- // }
- // },
- // });
+
+ register({
+ method: Method.POST,
+ subscription: '/createDocument',
+ secureHandler: async ({ req, res }) => {
+ const { file_path } = req.body;
+ const public_path = path.join(publicDirectory, file_path);
+ const file_name = path.basename(file_path);
+
+ try {
+ // Read file data and convert to base64
+ const file_data = fs.readFileSync(public_path, { encoding: 'base64' });
+
+ const response = await axios.post('http://localhost:8080/createDocument', {
+ file_data,
+ file_name,
+ });
+
+ res.send({ document: response.data });
+ } catch (error: any) {
+ console.error('Error communicating with chatbot:', error);
+ res.status(500).send({ error: 'Failed to communicate with the chatbot', details: error.message });
+ }
+ },
+ });
}
}