import { Doc } from "../new_fields/Doc"; import { StrCast, Cast } from "../new_fields/Types"; import { List } from "../new_fields/List"; import { CognitiveServices } from "./cognitive_services/CognitiveServices"; import React = require("react"); import { observer } from "mobx-react"; import { observable, action, computed, reaction } from "mobx"; var assert = require('assert'); var sw = require('stopword'); var FeedParser = require('feedparser'); import "./ClientRecommender.scss"; import { JSXElement } from "babel-types"; import { ToPlainText, RichTextField } from "../new_fields/RichTextField"; export interface RecommenderProps { title: string; } export interface RecommenderDocument { actualDoc: Doc; vectorDoc: number[]; score: number; } @observer export class ClientRecommender extends React.Component { static Instance: ClientRecommender; private mainDoc?: RecommenderDocument; private docVectors: Set = new Set(); @observable private corr_matrix = [[0, 0], [0, 0]]; constructor(props: RecommenderProps) { //console.log("creating client recommender..."); super(props); if (!ClientRecommender.Instance) ClientRecommender.Instance = this; ClientRecommender.Instance.docVectors = new Set(); //ClientRecommender.Instance.corr_matrix = [[0, 0], [0, 0]]; } @action public reset_docs() { ClientRecommender.Instance.docVectors = new Set(); ClientRecommender.Instance.mainDoc = undefined; ClientRecommender.Instance.corr_matrix = [[0, 0], [0, 0]]; } public deleteDocs() { console.log("deleting previews..."); } /*** * Computes the cosine similarity between two vectors in Euclidean space. */ private distance(vector1: number[], vector2: number[], metric: string = "cosine") { assert(vector1.length === vector2.length, "Vectors are not the same length"); let similarity: number; switch (metric) { case "cosine": var dotproduct = 0; var mA = 0; var mB = 0; for (let i = 0; i < vector1.length; i++) { // here you missed the i++ dotproduct += (vector1[i] * vector2[i]); mA += (vector1[i] * vector1[i]); mB += (vector2[i] * vector2[i]); } mA = Math.sqrt(mA); mB = Math.sqrt(mB); similarity = (dotproduct) / ((mA) * (mB)); // here you needed extra brackets return similarity; case "euclidian": var sum = 0; for (let i = 0; i < vector1.length; i++) { sum += Math.pow(vector1[i] - vector2[i], 2); } similarity = Math.sqrt(sum); return similarity; default: return 0; } } public computeSimilarities() { ClientRecommender.Instance.docVectors.forEach((doc: RecommenderDocument) => { if (ClientRecommender.Instance.mainDoc) { const distance = ClientRecommender.Instance.distance(ClientRecommender.Instance.mainDoc.vectorDoc, doc.vectorDoc, "euclidian"); doc.score = distance; } } ); let doclist = Array.from(ClientRecommender.Instance.docVectors); doclist.sort((a: RecommenderDocument, b: RecommenderDocument) => a.score - b.score); return doclist; } /*** * Computes the mean of a set of vectors */ public mean(paragraph: Set, dataDoc: Doc, mainDoc: boolean) { const n = 200; const num_words = paragraph.size; let meanVector = new Array(n).fill(0); // mean vector if (num_words > 0) { // check to see if paragraph actually was vectorized paragraph.forEach((wordvec: number[]) => { for (let i = 0; i < n; i++) { meanVector[i] += wordvec[i]; } }); meanVector = meanVector.map(x => x / num_words); const internalDoc: RecommenderDocument = { actualDoc: dataDoc, vectorDoc: meanVector, score: 0 }; if (mainDoc) ClientRecommender.Instance.mainDoc = internalDoc; ClientRecommender.Instance.addToDocSet(internalDoc); } return meanVector; } private addToDocSet(internalDoc: RecommenderDocument) { if (ClientRecommender.Instance.docVectors) { ClientRecommender.Instance.docVectors.add(internalDoc); } } /*** * Uses Cognitive Services to extract keywords from a document */ public async extractText(dataDoc: Doc, extDoc: Doc, mainDoc: boolean = false) { let fielddata = Cast(dataDoc.data, RichTextField); let data: string; fielddata ? data = fielddata[ToPlainText]() : data = ""; let converter = (results: any, data: string) => { let keyterms = new List(); let keyterms_counted = new List(); let highKP: string[] = [""]; let high = 0; results.documents.forEach((doc: any) => { let keyPhrases = doc.keyPhrases; keyPhrases.map((kp: string) => { const frequency = this.countFrequencies(kp, data); if (frequency > high) { high = frequency; highKP = [kp]; } else if (frequency === high) { highKP.push(kp); } let words = kp.split(" "); // separates phrase into words words = this.removeStopWords(words); words.forEach((word) => { keyterms.push(word); for (let i = 0; i < frequency; i++) { keyterms_counted.push(word); } }); }); }); console.log(highKP); this.sendRequest(highKP); return { keyterms: keyterms, keyterms_counted: keyterms_counted }; }; await CognitiveServices.Text.Appliers.analyzer(dataDoc, extDoc, ["key words"], data, converter, mainDoc); } private findImportantKPs(keyterms_counted: string[], paragraph: string) { let imporantSet = new Set(); } private countFrequencies(keyphrase: string, paragraph: string) { let data = paragraph.split(" "); let kp_array = keyphrase.split(" "); let num_keywords = kp_array.length; let par_length = data.length; let frequency = 0; // console.log("Paragraph: ", data); // console.log("Keyphrases:", kp_array); for (let i = 0; i <= par_length - num_keywords; i++) { const window = data.slice(i, i + num_keywords); if (JSON.stringify(window) === JSON.stringify(kp_array)) { frequency++; } } return frequency; } private removeStopWords(word_array: string[]) { //console.log(sw.removeStopwords(word_array)); return sw.removeStopwords(word_array); } private async sendRequest(keywords: string[]) { let query = ""; keywords.forEach((kp: string) => query += kp); await this.arxivrequest(query); } /** * Request to the arXiv server for ML articles. */ arxivrequest = async (query: string) => { let xhttp = new XMLHttpRequest(); let serveraddress = "http://export.arxiv.org/api" let endpoint = serveraddress + "/query?search_query=all:" + query + "&start=0&max_results=1"; let promisified = (resolve: any, reject: any) => { xhttp.onreadystatechange = function () { if (this.readyState === 4) { let result = xhttp.response; let xml = xhttp.responseXML; console.log(xml); switch (this.status) { case 200: //console.log(result); if (xml) { let titles = xml.getElementsByTagName("title"); if (titles && titles.length > 1) { let text = titles[1].childNodes[0].nodeValue; console.log(text); } let ids = xml.getElementsByTagName("id"); if (ids && ids.length > 1) { let text = ids[1].childNodes[0].nodeValue; console.log(text); } } return resolve(result); case 400: default: return reject(result); } } }; xhttp.open("GET", endpoint, true); xhttp.send(); }; return new Promise(promisified); } processArxivResult = (result: any) => { var xmlDoc = result as XMLDocument; let text = xmlDoc.getElementsByTagName("title")[0].childNodes[0].nodeValue; console.log(text); } /*** * Creates distance matrix for all Documents analyzed */ @action public createDistanceMatrix(documents: Set = ClientRecommender.Instance.docVectors) { const documents_list = Array.from(documents); const n = documents_list.length; var matrix = new Array(n).fill(0).map(() => new Array(n).fill(0)); for (let i = 0; i < n; i++) { var doc1 = documents_list[i]; for (let j = 0; j < n; j++) { var doc2 = documents_list[j]; matrix[i][j] = ClientRecommender.Instance.distance(doc1.vectorDoc, doc2.vectorDoc, "euclidian"); } } ClientRecommender.Instance.corr_matrix = matrix; return matrix; } @computed private get generateRows() { const n = ClientRecommender.Instance.corr_matrix.length; let rows: JSX.Element[] = []; for (let i = 0; i < n; i++) { let children: JSX.Element[] = []; for (let j = 0; j < n; j++) { //let cell = React.createElement("td", ClientRecommender.Instance.corr_matrix[i][j]); let cell = {ClientRecommender.Instance.corr_matrix[i][j].toFixed(4)}; children.push(cell); } //let row = React.createElement("tr", { children: children, key: i }); let row = {children}; rows.push(row); } return rows; } render() { return (

{ClientRecommender.Instance.props.title ? ClientRecommender.Instance.props.title : "hello"}

{/*
{ClientRecommender.Instance.corr_matrix[0][0].toFixed(4)} {ClientRecommender.Instance.corr_matrix[0][1].toFixed(4)}
{ClientRecommender.Instance.corr_matrix[1][0].toFixed(4)} {ClientRecommender.Instance.corr_matrix[1][1].toFixed(4)}
*/} {ClientRecommender.Instance.generateRows}
); } }