aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorab <abdullah_ahmed@brown.edu>2019-09-04 09:52:39 -0400
committerab <abdullah_ahmed@brown.edu>2019-09-04 09:52:39 -0400
commita1c2afe27c75354d4365a79ea202eca94516069e (patch)
treedc475b995d6df97e92e0d8dc32e8ef2311a4388c
parent6f6e5c763d9a398e897df94b89fcc7d3845c318e (diff)
stopwords, frequency, proto arxiv
-rw-r--r--package.json2
-rw-r--r--src/client/ClientRecommender.tsx75
-rw-r--r--src/client/cognitive_services/CognitiveServices.ts13
-rw-r--r--src/client/util/TooltipTextMenu.scss2
-rw-r--r--src/client/views/nodes/DocumentView.tsx1
-rw-r--r--src/server/Recommender.ts29
-rw-r--r--src/server/index.ts1
7 files changed, 113 insertions, 10 deletions
diff --git a/package.json b/package.json
index ec5af93b1..d4b5bdab6 100644
--- a/package.json
+++ b/package.json
@@ -118,6 +118,7 @@
"@types/youtube": "0.0.38",
"adm-zip": "^0.4.13",
"archiver": "^3.0.3",
+ "arxiv-api-node": "0.0.2",
"async": "^2.6.2",
"babel-runtime": "^6.26.0",
"bcrypt-nodejs": "0.0.3",
@@ -218,6 +219,7 @@
"socket.io-client": "^2.2.0",
"solr-node": "^1.2.1",
"standard-http-error": "^2.0.1",
+ "stopword": "^0.3.3",
"typescript-collections": "^1.3.2",
"url-loader": "^1.1.2",
"uuid": "^3.3.2",
diff --git a/src/client/ClientRecommender.tsx b/src/client/ClientRecommender.tsx
index 9953700cc..66f0ae745 100644
--- a/src/client/ClientRecommender.tsx
+++ b/src/client/ClientRecommender.tsx
@@ -6,6 +6,7 @@ import React = require("react");
import { observer } from "mobx-react";
import { observable, action, computed, reaction } from "mobx";
var assert = require('assert');
+var sw = require('stopword');
import "./ClientRecommender.scss";
import { JSXElement } from "babel-types";
import { ToPlainText, RichTextField } from "../new_fields/RichTextField";
@@ -130,20 +131,86 @@ export class ClientRecommender extends React.Component<RecommenderProps> {
let data: string;
fielddata ? data = fielddata[ToPlainText]() : data = "";
console.log(data);
- let converter = (results: any) => {
+ let converter = (results: any, data: string) => {
let keyterms = new List<string>();
+ let keyterms_counted = new List<string>();
results.documents.forEach((doc: any) => {
let keyPhrases = doc.keyPhrases;
keyPhrases.map((kp: string) => {
- const words = kp.split(" ");
- words.forEach((word) => keyterms.push(word));
+ const frequency = this.countFrequencies(kp, data);
+ let words = kp.split(" "); // separates phrase into words
+ words = this.removeStopWords(words);
+ words.forEach((word) => {
+ keyterms.push(word);
+ for (let i = 0; i < frequency; i++) {
+ keyterms_counted.push(word);
+ }
+ });
+ });
+ });
+ return { keyterms: keyterms, keyterms_counted: keyterms_counted };
+ };
+ let test = (results: any, data: string) => {
+ results.documents.forEach((doc: any) => {
+ let kps = doc.keyPhrases;
+ kps.map((kp: string) => {
+ this.countFrequencies(kp, data);
});
});
- return keyterms;
};
await CognitiveServices.Text.Appliers.analyzer(dataDoc, extDoc, ["key words"], data, converter, mainDoc);
}
+ private countFrequencies(keyphrase: string, paragraph: string) {
+ let data = paragraph.split(" ");
+ let kp_array = keyphrase.split(" ");
+ let num_keywords = kp_array.length;
+ let par_length = data.length;
+ let frequency = 0;
+ // console.log("Paragraph: ", data);
+ // console.log("Keyphrases:", kp_array);
+ for (let i = 0; i <= par_length - num_keywords; i++) {
+ const window = data.slice(i, i + num_keywords);
+ if (JSON.stringify(window) === JSON.stringify(kp_array)) {
+ frequency++;
+ }
+ }
+ return frequency;
+ }
+
+ private removeStopWords(word_array: string[]) {
+ //console.log(sw.removeStopwords(word_array));
+ return sw.removeStopwords(word_array);
+ }
+
+ /**
+ * Request to the arXiv server for ML articles.
+ */
+
+ arxivrequest = async (query: string) => {
+ let xhttp = new XMLHttpRequest();
+ let serveraddress = "http://export.arxiv.org/api"
+ let endpoint = serveraddress + "/query?search_query=all:" + query + "&start=0&max_results=5";
+ let promisified = (resolve: any, reject: any) => {
+ xhttp.onreadystatechange = function () {
+ if (this.readyState === 4) {
+ let result = xhttp.response;
+ switch (this.status) {
+ case 200:
+ console.log(result);
+ return resolve(result);
+ case 400:
+ default:
+ return reject(result);
+ }
+ }
+ };
+ xhttp.open("GET", endpoint, true);
+ xhttp.send();
+ };
+ return new Promise<any>(promisified);
+ }
+
/***
* Creates distance matrix for all Documents analyzed
*/
diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts
index 874ee433d..eb1dd5197 100644
--- a/src/client/cognitive_services/CognitiveServices.ts
+++ b/src/client/cognitive_services/CognitiveServices.ts
@@ -15,6 +15,7 @@ type RequestExecutor = (apiKey: string, body: string, service: Service) => Promi
type AnalysisApplier<D> = (target: Doc, relevantKeys: string[], data: D, ...args: any) => any;
type BodyConverter<D> = (data: D) => string;
type Converter = (results: any) => Field;
+type TextConverter = (results: any, data: string) => { keyterms: Field, keyterms_counted: Field };
export type Tag = { name: string, confidence: number };
export type Rectangle = { top: number, left: number, width: number, height: number };
@@ -263,7 +264,7 @@ export namespace CognitiveServices {
export namespace Appliers {
- export async function vectorize(keyterms: any, dataDoc: Doc, mainDoc: boolean = false) {
+ export async function vectorize(keyterms: any, dataDoc: Doc, mainDoc: boolean = false, data: string) {
console.log("vectorizing...");
//keyterms = ["father", "king"];
let args = { method: 'POST', uri: Utils.prepend("/recommender"), body: { keyphrases: keyterms }, json: true };
@@ -284,15 +285,17 @@ export namespace CognitiveServices {
});
}
- export const analyzer = async (dataDoc: Doc, target: Doc, keys: string[], data: string, converter: Converter, mainDoc: boolean = false) => {
+ export const analyzer = async (dataDoc: Doc, target: Doc, keys: string[], data: string, converter: TextConverter, mainDoc: boolean = false) => {
let results = await ExecuteQuery(Service.Text, Manager, data);
console.log(results);
- let keyterms = converter(results);
+ let keyterms = converter(results, data);
//target[keys[0]] = Docs.Get.DocumentHierarchyFromJson(results, "Key Word Analysis");
- target[keys[0]] = keyterms;
+ target[keys[0]] = keyterms.keyterms;
console.log("analyzed!");
- await vectorize(keyterms, dataDoc, mainDoc);
+ await vectorize(keyterms.keyterms_counted, dataDoc, mainDoc, data);
};
+
+ // export async function countFrequencies()
}
}
diff --git a/src/client/util/TooltipTextMenu.scss b/src/client/util/TooltipTextMenu.scss
index ebf833dbe..ab6cee763 100644
--- a/src/client/util/TooltipTextMenu.scss
+++ b/src/client/util/TooltipTextMenu.scss
@@ -351,5 +351,5 @@
.dragger{
color: #eee;
- margin-left: 5px;
+ margin: 5px;
} \ No newline at end of file
diff --git a/src/client/views/nodes/DocumentView.tsx b/src/client/views/nodes/DocumentView.tsx
index d51c90b61..a1e64f1c5 100644
--- a/src/client/views/nodes/DocumentView.tsx
+++ b/src/client/views/nodes/DocumentView.tsx
@@ -651,6 +651,7 @@ export class DocumentView extends DocComponent<DocumentViewProps, Document>(Docu
// allDocs.forEach(doc => console.log(doc.title));
// clears internal representation of documents as vectors
ClientRecommender.Instance.reset_docs();
+ ClientRecommender.Instance.arxivrequest("electrons");
await Promise.all(allDocs.map((doc: Doc) => {
let mainDoc: boolean = false;
const dataDoc = Doc.GetDataDoc(doc);
diff --git a/src/server/Recommender.ts b/src/server/Recommender.ts
index 781974208..efb5fbbbf 100644
--- a/src/server/Recommender.ts
+++ b/src/server/Recommender.ts
@@ -5,6 +5,9 @@
var w2v = require('word2vec');
var assert = require('assert');
+var arxivapi = require('arxiv-api-node');
+import requestPromise = require("request-promise");
+
export class Recommender {
@@ -75,6 +78,32 @@ export class Recommender {
}
}
+ public async arxivRequest(query: string) {
+ // let xhttp = new XMLHttpRequest();
+ // let serveraddress = "http://export.arxiv.org/api/query?search_query=all:electron&start=0&max_results=1";
+ // let promisified = (resolve: any, reject: any) => {
+ // xhttp.onreadystatechange = function () {
+ // if (this.readyState === 4) {
+ // let result = xhttp.response;
+ // switch (this.status) {
+ // case 200:
+ // console.log(result);
+ // return resolve(result);
+ // case 400:
+ // default:
+ // return reject(result);
+ // }
+ // }
+ // };
+ // xhttp.open("GET", serveraddress, true);
+ // xhttp.send();
+ // };
+ // return new Promise<any>(promisified);
+
+ let res = await arxivapi.query("all:electrons");
+ console.log(res);
+ }
+
diff --git a/src/server/index.ts b/src/server/index.ts
index e1ecc4ac0..24ffc466f 100644
--- a/src/server/index.ts
+++ b/src/server/index.ts
@@ -688,6 +688,7 @@ app.use(RouteStore.corsProxy, (req, res) => {
let recommender = new Recommender();
recommender.testModel();
+recommender.arxivRequest("Triangle-GAN");
app.post("/recommender", async (req, res) => {
let keyphrases = req.body.keyphrases;