diff options
-rw-r--r-- | package.json | 2 | ||||
-rw-r--r-- | src/client/ClientRecommender.tsx | 75 | ||||
-rw-r--r-- | src/client/cognitive_services/CognitiveServices.ts | 13 | ||||
-rw-r--r-- | src/client/util/TooltipTextMenu.scss | 2 | ||||
-rw-r--r-- | src/client/views/nodes/DocumentView.tsx | 1 | ||||
-rw-r--r-- | src/server/Recommender.ts | 29 | ||||
-rw-r--r-- | src/server/index.ts | 1 |
7 files changed, 113 insertions, 10 deletions
diff --git a/package.json b/package.json index ec5af93b1..d4b5bdab6 100644 --- a/package.json +++ b/package.json @@ -118,6 +118,7 @@ "@types/youtube": "0.0.38", "adm-zip": "^0.4.13", "archiver": "^3.0.3", + "arxiv-api-node": "0.0.2", "async": "^2.6.2", "babel-runtime": "^6.26.0", "bcrypt-nodejs": "0.0.3", @@ -218,6 +219,7 @@ "socket.io-client": "^2.2.0", "solr-node": "^1.2.1", "standard-http-error": "^2.0.1", + "stopword": "^0.3.3", "typescript-collections": "^1.3.2", "url-loader": "^1.1.2", "uuid": "^3.3.2", diff --git a/src/client/ClientRecommender.tsx b/src/client/ClientRecommender.tsx index 9953700cc..66f0ae745 100644 --- a/src/client/ClientRecommender.tsx +++ b/src/client/ClientRecommender.tsx @@ -6,6 +6,7 @@ import React = require("react"); import { observer } from "mobx-react"; import { observable, action, computed, reaction } from "mobx"; var assert = require('assert'); +var sw = require('stopword'); import "./ClientRecommender.scss"; import { JSXElement } from "babel-types"; import { ToPlainText, RichTextField } from "../new_fields/RichTextField"; @@ -130,20 +131,86 @@ export class ClientRecommender extends React.Component<RecommenderProps> { let data: string; fielddata ? data = fielddata[ToPlainText]() : data = ""; console.log(data); - let converter = (results: any) => { + let converter = (results: any, data: string) => { let keyterms = new List<string>(); + let keyterms_counted = new List<string>(); results.documents.forEach((doc: any) => { let keyPhrases = doc.keyPhrases; keyPhrases.map((kp: string) => { - const words = kp.split(" "); - words.forEach((word) => keyterms.push(word)); + const frequency = this.countFrequencies(kp, data); + let words = kp.split(" "); // separates phrase into words + words = this.removeStopWords(words); + words.forEach((word) => { + keyterms.push(word); + for (let i = 0; i < frequency; i++) { + keyterms_counted.push(word); + } + }); + }); + }); + return { keyterms: keyterms, keyterms_counted: keyterms_counted }; + }; + let test = (results: any, data: string) => { + results.documents.forEach((doc: any) => { + let kps = doc.keyPhrases; + kps.map((kp: string) => { + this.countFrequencies(kp, data); }); }); - return keyterms; }; await CognitiveServices.Text.Appliers.analyzer(dataDoc, extDoc, ["key words"], data, converter, mainDoc); } + private countFrequencies(keyphrase: string, paragraph: string) { + let data = paragraph.split(" "); + let kp_array = keyphrase.split(" "); + let num_keywords = kp_array.length; + let par_length = data.length; + let frequency = 0; + // console.log("Paragraph: ", data); + // console.log("Keyphrases:", kp_array); + for (let i = 0; i <= par_length - num_keywords; i++) { + const window = data.slice(i, i + num_keywords); + if (JSON.stringify(window) === JSON.stringify(kp_array)) { + frequency++; + } + } + return frequency; + } + + private removeStopWords(word_array: string[]) { + //console.log(sw.removeStopwords(word_array)); + return sw.removeStopwords(word_array); + } + + /** + * Request to the arXiv server for ML articles. + */ + + arxivrequest = async (query: string) => { + let xhttp = new XMLHttpRequest(); + let serveraddress = "http://export.arxiv.org/api" + let endpoint = serveraddress + "/query?search_query=all:" + query + "&start=0&max_results=5"; + let promisified = (resolve: any, reject: any) => { + xhttp.onreadystatechange = function () { + if (this.readyState === 4) { + let result = xhttp.response; + switch (this.status) { + case 200: + console.log(result); + return resolve(result); + case 400: + default: + return reject(result); + } + } + }; + xhttp.open("GET", endpoint, true); + xhttp.send(); + }; + return new Promise<any>(promisified); + } + /*** * Creates distance matrix for all Documents analyzed */ diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index 874ee433d..eb1dd5197 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -15,6 +15,7 @@ type RequestExecutor = (apiKey: string, body: string, service: Service) => Promi type AnalysisApplier<D> = (target: Doc, relevantKeys: string[], data: D, ...args: any) => any; type BodyConverter<D> = (data: D) => string; type Converter = (results: any) => Field; +type TextConverter = (results: any, data: string) => { keyterms: Field, keyterms_counted: Field }; export type Tag = { name: string, confidence: number }; export type Rectangle = { top: number, left: number, width: number, height: number }; @@ -263,7 +264,7 @@ export namespace CognitiveServices { export namespace Appliers { - export async function vectorize(keyterms: any, dataDoc: Doc, mainDoc: boolean = false) { + export async function vectorize(keyterms: any, dataDoc: Doc, mainDoc: boolean = false, data: string) { console.log("vectorizing..."); //keyterms = ["father", "king"]; let args = { method: 'POST', uri: Utils.prepend("/recommender"), body: { keyphrases: keyterms }, json: true }; @@ -284,15 +285,17 @@ export namespace CognitiveServices { }); } - export const analyzer = async (dataDoc: Doc, target: Doc, keys: string[], data: string, converter: Converter, mainDoc: boolean = false) => { + export const analyzer = async (dataDoc: Doc, target: Doc, keys: string[], data: string, converter: TextConverter, mainDoc: boolean = false) => { let results = await ExecuteQuery(Service.Text, Manager, data); console.log(results); - let keyterms = converter(results); + let keyterms = converter(results, data); //target[keys[0]] = Docs.Get.DocumentHierarchyFromJson(results, "Key Word Analysis"); - target[keys[0]] = keyterms; + target[keys[0]] = keyterms.keyterms; console.log("analyzed!"); - await vectorize(keyterms, dataDoc, mainDoc); + await vectorize(keyterms.keyterms_counted, dataDoc, mainDoc, data); }; + + // export async function countFrequencies() } } diff --git a/src/client/util/TooltipTextMenu.scss b/src/client/util/TooltipTextMenu.scss index ebf833dbe..ab6cee763 100644 --- a/src/client/util/TooltipTextMenu.scss +++ b/src/client/util/TooltipTextMenu.scss @@ -351,5 +351,5 @@ .dragger{ color: #eee; - margin-left: 5px; + margin: 5px; }
\ No newline at end of file diff --git a/src/client/views/nodes/DocumentView.tsx b/src/client/views/nodes/DocumentView.tsx index d51c90b61..a1e64f1c5 100644 --- a/src/client/views/nodes/DocumentView.tsx +++ b/src/client/views/nodes/DocumentView.tsx @@ -651,6 +651,7 @@ export class DocumentView extends DocComponent<DocumentViewProps, Document>(Docu // allDocs.forEach(doc => console.log(doc.title)); // clears internal representation of documents as vectors ClientRecommender.Instance.reset_docs(); + ClientRecommender.Instance.arxivrequest("electrons"); await Promise.all(allDocs.map((doc: Doc) => { let mainDoc: boolean = false; const dataDoc = Doc.GetDataDoc(doc); diff --git a/src/server/Recommender.ts b/src/server/Recommender.ts index 781974208..efb5fbbbf 100644 --- a/src/server/Recommender.ts +++ b/src/server/Recommender.ts @@ -5,6 +5,9 @@ var w2v = require('word2vec'); var assert = require('assert'); +var arxivapi = require('arxiv-api-node'); +import requestPromise = require("request-promise"); + export class Recommender { @@ -75,6 +78,32 @@ export class Recommender { } } + public async arxivRequest(query: string) { + // let xhttp = new XMLHttpRequest(); + // let serveraddress = "http://export.arxiv.org/api/query?search_query=all:electron&start=0&max_results=1"; + // let promisified = (resolve: any, reject: any) => { + // xhttp.onreadystatechange = function () { + // if (this.readyState === 4) { + // let result = xhttp.response; + // switch (this.status) { + // case 200: + // console.log(result); + // return resolve(result); + // case 400: + // default: + // return reject(result); + // } + // } + // }; + // xhttp.open("GET", serveraddress, true); + // xhttp.send(); + // }; + // return new Promise<any>(promisified); + + let res = await arxivapi.query("all:electrons"); + console.log(res); + } + diff --git a/src/server/index.ts b/src/server/index.ts index e1ecc4ac0..24ffc466f 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -688,6 +688,7 @@ app.use(RouteStore.corsProxy, (req, res) => { let recommender = new Recommender(); recommender.testModel(); +recommender.arxivRequest("Triangle-GAN"); app.post("/recommender", async (req, res) => { let keyphrases = req.body.keyphrases; |