From 74dee5f76ffc5bbdd07caafb4273aaf485dec1b9 Mon Sep 17 00:00:00 2001 From: Sam Wilkins Date: Wed, 12 Jun 2019 23:22:58 -0400 Subject: beginnings of nested golden layout --- src/client/util/DocumentManager.ts | 4 ++-- src/client/util/DragManager.ts | 8 ++++---- src/client/util/TooltipTextMenu.tsx | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'src/client/util') diff --git a/src/client/util/DocumentManager.ts b/src/client/util/DocumentManager.ts index 65c4b9e4b..72d9f7e76 100644 --- a/src/client/util/DocumentManager.ts +++ b/src/client/util/DocumentManager.ts @@ -134,7 +134,7 @@ export class DocumentManager { const actualDoc = Doc.MakeAlias(docDelegate); actualDoc.libraryBrush = true; if (linkPage !== undefined) actualDoc.curPage = linkPage; - (dockFunc || CollectionDockingView.Instance.AddRightSplit)(actualDoc); + (dockFunc || CollectionDockingView.AddRightSplit)(actualDoc); } else { let contextView: DocumentView | null; docDelegate.libraryBrush = true; @@ -142,7 +142,7 @@ export class DocumentManager { contextDoc.panTransformType = "Ease"; contextView.props.focus(contextDoc); } else { - (dockFunc || CollectionDockingView.Instance.AddRightSplit)(contextDoc); + (dockFunc || CollectionDockingView.AddRightSplit)(contextDoc); } } } diff --git a/src/client/util/DragManager.ts b/src/client/util/DragManager.ts index 1e84a0db0..7625b0463 100644 --- a/src/client/util/DragManager.ts +++ b/src/client/util/DragManager.ts @@ -26,8 +26,8 @@ export function SetupDrag(_reference: React.RefObject, docFunc: () // if (this.props.isSelected() || this.props.isTopMost) { if (e.button === 0) { e.stopPropagation(); - if (e.shiftKey && CollectionDockingView.Instance) { - CollectionDockingView.Instance.StartOtherDrag([await docFunc()], e); + if (e.shiftKey && CollectionDockingView.TopLevel) { + CollectionDockingView.TopLevel.StartOtherDrag([await docFunc()], e); } else { document.addEventListener("pointermove", onRowMove); document.addEventListener("pointerup", onRowUp); @@ -264,9 +264,9 @@ export namespace DragManager { if (dragData instanceof DocumentDragData) { dragData.userDropAction = e.ctrlKey || e.altKey ? "alias" : undefined; } - if (e.shiftKey && CollectionDockingView.Instance) { + if (e.shiftKey && CollectionDockingView.TopLevel) { AbortDrag(); - CollectionDockingView.Instance.StartOtherDrag(docs, { + CollectionDockingView.TopLevel.StartOtherDrag(docs, { pageX: e.pageX, pageY: e.pageY, preventDefault: emptyFunction, diff --git a/src/client/util/TooltipTextMenu.tsx b/src/client/util/TooltipTextMenu.tsx index f517f757a..fa2483db5 100644 --- a/src/client/util/TooltipTextMenu.tsx +++ b/src/client/util/TooltipTextMenu.tsx @@ -194,7 +194,7 @@ export class TooltipTextMenu { if (DocumentManager.Instance.getDocumentView(f)) { DocumentManager.Instance.getDocumentView(f)!.props.focus(f); } - else if (CollectionDockingView.Instance) CollectionDockingView.Instance.AddRightSplit(f); + else if (CollectionDockingView.TopLevel) CollectionDockingView.AddRightSplit(f); } })); } -- cgit v1.2.3-70-g09d2 From a3e1f7332e0cb96dae0abd80a2972ae74ac31104 Mon Sep 17 00:00:00 2001 From: Sam Wilkins Date: Mon, 17 Jun 2019 13:30:37 -0400 Subject: Doc server documentation completed --- src/Utils.ts | 10 +- src/client/DocServer.ts | 285 +++++++++++++++++++++++++-------- src/client/util/SerializationHelper.ts | 4 +- src/new_fields/Doc.ts | 4 +- src/new_fields/util.ts | 4 +- src/server/index.ts | 12 +- 6 files changed, 235 insertions(+), 84 deletions(-) (limited to 'src/client/util') diff --git a/src/Utils.ts b/src/Utils.ts index 611c61135..657bd673e 100644 --- a/src/Utils.ts +++ b/src/Utils.ts @@ -73,14 +73,14 @@ export class Utils { }; } - public static Emit(socket: Socket | SocketIOClient.Socket, message: Message, args: T) { + public static emit(socket: Socket | SocketIOClient.Socket, message: Message, args: T) { this.log("Emit", message.Name, args, false); socket.emit(message.Message, args); } - public static EmitCallback(socket: Socket | SocketIOClient.Socket, message: Message, args: T): Promise; - public static EmitCallback(socket: Socket | SocketIOClient.Socket, message: Message, args: T, fn: (args: any) => any): void; - public static EmitCallback(socket: Socket | SocketIOClient.Socket, message: Message, args: T, fn?: (args: any) => any): void | Promise { + public static emitCallback(socket: Socket | SocketIOClient.Socket, message: Message, args: T): Promise; + public static emitCallback(socket: Socket | SocketIOClient.Socket, message: Message, args: T, fn: (args: any) => any): void; + public static emitCallback(socket: Socket | SocketIOClient.Socket, message: Message, args: T, fn?: (args: any) => any): void | Promise { this.log("Emit", message.Name, args, false); if (fn) { socket.emit(message.Message, args, this.loggingCallback('Receiving', fn, message.Name)); @@ -89,7 +89,7 @@ export class Utils { } } - public static AddServerHandler(socket: Socket | SocketIOClient.Socket, message: Message, handler: (args: T) => any) { + public static addServerHandler(socket: Socket | SocketIOClient.Socket, message: Message, handler: (args: T) => any) { socket.on(message.Message, this.loggingCallback('Incoming', handler, message.Name)); } diff --git a/src/client/DocServer.ts b/src/client/DocServer.ts index d759b4757..ad7c706b6 100644 --- a/src/client/DocServer.ts +++ b/src/client/DocServer.ts @@ -1,35 +1,116 @@ import * as OpenSocket from 'socket.io-client'; -import { MessageStore } from "./../server/Message"; +import { MessageStore, Diff } from "./../server/Message"; import { Opt } from '../new_fields/Doc'; import { Utils, emptyFunction } from '../Utils'; import { SerializationHelper } from './util/SerializationHelper'; import { RefField } from '../new_fields/RefField'; import { Id, HandleUpdate } from '../new_fields/FieldSymbols'; +/** + * This class encapsulates the transfer and cross-client synchronization of + * data stored only in documents (RefFields). In the process, it also + * creates and maintains a cache of documents so that they can be accessed + * more efficiently. Currently, there is no cache eviction scheme in place. + * + * NOTE: while this class is technically abstracted to work with any [RefField], because + * [Doc] instances are the only [RefField] we need / have implemented at the moment, the documentation + * will treat all data used here as [Doc]s + * + * Any time we want to write a new field to the database (via the server) + * or update ourselves based on the server's update message, that occurs here + */ export namespace DocServer { + // a document cache for efficient document retrieval const _cache: { [id: string]: RefField | Promise> } = {}; + // the handle / client side endpoint of the web socket (https://bit.ly/2TeALea for more info) connection established with the server const _socket = OpenSocket(`${window.location.protocol}//${window.location.hostname}:4321`); + // this client's distinct GUID created at initialization const GUID: string = Utils.GenerateGuid(); + // indicates whether or not a document is currently being udpated, and, if so, its id + let updatingId: string | undefined; - export function makeReadOnly() { - _CreateField = emptyFunction; - _UpdateField = emptyFunction; - _respondToUpdate = emptyFunction; - } + export namespace Util { - export function prepend(extension: string): string { - return window.location.origin + extension; - } + /** + * Whenever the server sends us its handshake message on our + * websocket, we use the above function to return the handshake. + */ + Utils.addServerHandler(_socket, MessageStore.Foo, onConnection); + + /** + * This function emits a message (with this client's + * unique GUID) to the server + * indicating that this client has connected + */ + function onConnection() { + _socket.emit(MessageStore.Bar.Message, GUID); + } + + /** + * A convenience method. Prepends the full path (i.e. http://localhost:1050) to the + * requested extension + * @param extension the specified sub-path to append to the window origin + */ + export function prepend(extension: string): string { + return window.location.origin + extension; + } + + /** + * Emits a message to the server that wipes + * all documents in the database. + */ + export function deleteDatabase() { + Utils.emit(_socket, MessageStore.DeleteAll, {}); + } + + /** + * This disables this client's ability to write new fields, + * update existing fields, and update and reflect the changes if + * other clients update shared fields. Thus, the client can only read + * a static snapshot of their workspaces + * + * Currently this is conditionally called in MainView.tsx when analyzing + * the document's url. + */ + export function makeReadOnly() { + // replaces default functionality with no-ops + _createField = emptyFunction; + _emitFieldUpdate = emptyFunction; + _respondToUpdate = emptyFunction; + } - export function DeleteDatabase() { - Utils.Emit(_socket, MessageStore.DeleteAll, {}); } - export async function GetRefField(id: string): Promise> { + // RETRIEVE DOCS FROM SERVER + + /** + * Given a single Doc GUID, this utility function will asynchronously attempt to fetch the id's associated + * field, first looking in the RefField cache and then communicating with + * the server if the document has not been cached. + * @param id the id of the requested document + */ + export async function getRefField(id: string): Promise> { + // an initial pass through the cache to determine whether the document needs to be fetched, + // is already in the process of being fetched or already exists in the + // cache let cached = _cache[id]; if (cached === undefined) { - const prom = Utils.EmitCallback(_socket, MessageStore.GetRefField, id).then(async fieldJson => { + // NOT CACHED => we'll have to send a request to the server + + // synchronously, we emit a single callback to the server requesting the serialized (i.e. represented by a string) + // field for the given ids. This returns a promise, which, when resolved, indicates the the JSON serialized version of + // the field has been returned from the server + const getSerializedField = Utils.emitCallback(_socket, MessageStore.GetRefField, id); + + // when the serialized RefField has been received, go head and begin deserializing it into an object. + // Here, once deserialized, we also invoke .proto to 'load' the document's prototype, which ensures that all + // future .proto calls on the Doc won't have to go farther than the cache to get their actual value. + const deserializeField = getSerializedField.then(async fieldJson => { + // deserialize const field = SerializationHelper.Deserialize(fieldJson); + // either way, overwrite or delete any promises cached at this id (that we inserted as flags + // to indicate that the field was in the process of being fetched). Now everything + // should be an actual value within or entirely absent from the cache. if (field !== undefined) { await field.proto; _cache[id] = field; @@ -38,41 +119,45 @@ export namespace DocServer { } return field; }); - _cache[id] = prom; - return prom; + // here, indicate that the document associated with this id is currently + // being retrieved and cached + _cache[id] = deserializeField; + return deserializeField; } else if (cached instanceof Promise) { + // BEING RETRIEVED AND CACHED => some other caller previously (likely recently) called GetRefField(s), + // and requested the document I'm looking for. Shouldn't fetch again, just + // return this promise which will resolve to the field itself (see 7) return cached; } else { + // CACHED => great, let's just return the cached field we have return cached; } } /** - * Given a list of Doc GUIDs, this utility function will asynchronously attempt to fetch each document - * associated with a given input id, first looking in the RefField cache and then communicating with - * the server if the document was not found there. - * + * Given a list of Doc GUIDs, this utility function will asynchronously attempt to each id's associated + * field, first looking in the RefField cache and then communicating with + * the server if the document has not been cached. * @param ids the ids that map to the reqested documents */ - export async function GetRefFields(ids: string[]): Promise<{ [id: string]: Opt }> { + export async function getRefFields(ids: string[]): Promise<{ [id: string]: Opt }> { const requestedIds: string[] = []; const waitingIds: string[] = []; const promises: Promise>[] = []; const map: { [id: string]: Opt } = {}; - // 1) An initial pass through the cache to determine which documents need to be fetched, + // 1) an initial pass through the cache to determine which documents need to be fetched, // which are already in the process of being fetched and which already exist in the // cache for (const id of ids) { const cached = _cache[id]; - if (cached === undefined) { // NOT CACHED => we'll have to send a request to the server requestedIds.push(id); } else if (cached instanceof Promise) { - // BEING CACHED => someone else previously (likely recently) called GetRefFields, + // BEING RETRIEVED AND CACHED => some other caller previously (likely recently) called GetRefField(s), // and requested one of the documents I'm looking for. Shouldn't fetch again, just - // wait until this promise is resolved (see the second to last line of the function) + // wait until this promise is resolved (see 7) promises.push(cached); waitingIds.push(id); } else { @@ -81,46 +166,49 @@ export namespace DocServer { } } - // 2) Synchronously, we emit a single callback to the server requesting the documents for the given ids. - // This returns a promise, which, when resolved, indicates that all the JSON serialized versions of + // 2) synchronously, we emit a single callback to the server requesting the serialized (i.e. represented by a string) + // fields for the given ids. This returns a promise, which, when resolved, indicates that all the JSON serialized versions of // the fields have been returned from the server - const fieldsReceived: Promise = Utils.EmitCallback(_socket, MessageStore.GetRefFields, requestedIds); + const getSerializedFields: Promise = Utils.emitCallback(_socket, MessageStore.GetRefFields, requestedIds); - // 3) When the serialized RefFields have been received, go head and begin deserializing them into objects. + // 3) when the serialized RefFields have been received, go head and begin deserializing them into objects. // Here, once deserialized, we also invoke .proto to 'load' the documents' prototypes, which ensures that all - // future .proto calls won't have to go farther than the cache to get their actual value. - const fieldsDeserialized = fieldsReceived.then(async fields => { + // future .proto calls on the Doc won't have to go farther than the cache to get their actual value. + const deserializeFields = getSerializedFields.then(async fields => { const fieldMap: { [id: string]: RefField } = {}; - const deserializedFields: any = []; + const protosToLoad: any = []; for (const field of fields) { if (field !== undefined) { // deserialize let deserialized: any = SerializationHelper.Deserialize(field); fieldMap[field.id] = deserialized; - deserializedFields.push(deserialized.proto); + // adds to a list of promises that will be awaited asynchronously + protosToLoad.push(deserialized.proto); } } - // this actually handles the loeading of prototypes - await Promise.all(deserializedFields); + // this actually handles the loading of prototypes + await Promise.all(protosToLoad); return fieldMap; }); - // 4) Here, for each of the documents we've requested *ourselves* (i.e. weren't promises or found in the cache) + // 4) here, for each of the documents we've requested *ourselves* (i.e. weren't promises or found in the cache) // we set the value at the field's id to a promise that will resolve to the field. // When we find that promises exist at keys in the cache, THIS is where they were set, just by some other caller (method). - requestedIds.forEach(id => _cache[id] = fieldsDeserialized.then(fields => fields[id])); + // The mapping in the .then call ensures that when other callers await these promises, they'll + // get the resolved field + requestedIds.forEach(id => _cache[id] = deserializeFields.then(fields => fields[id])); - // 5) At this point, all fields have a) been returned from the server and b) been deserialized into actual Field objects whose + // 5) at this point, all fields have a) been returned from the server and b) been deserialized into actual Field objects whose // prototype documents, if any, have also been fetched and cached. - const fields = await fieldsDeserialized; + const fields = await deserializeFields; - // 6) With this confidence, we can now go through and update the cache at the ids of the fields that + // 6) with this confidence, we can now go through and update the cache at the ids of the fields that // we explicitly had to fetch. To finish it off, we add whatever value we've come up with for a given // id to the soon to be returned field mapping. requestedIds.forEach(id => { const field = fields[id]; - // either way, overwrite or delete any promises that we inserted as flags - // to indicate that the field was in the process of being fetched. Now everything + // either way, overwrite or delete any promises (that we inserted as flags + // to indicate that the field was in the process of being fetched). Now everything // should be an actual value within or entirely absent from the cache. if (field !== undefined) { _cache[id] = field; @@ -130,78 +218,141 @@ export namespace DocServer { map[id] = field; }); - // 7) Those promises we encountered in the else if of 1), which represent + // 7) those promises we encountered in the else if of 1), which represent // other callers having already submitted a request to the server for (a) document(s) // in which we're interested, must still be awaited so that we can return the proper // values for those as well. // - // Fortunately, those other callers will also hit their own version of 6) and clean up + // fortunately, those other callers will also hit their own version of 6) and clean up // the shared cache when these promises resolve, so all we have to do is... const otherCallersFetching = await Promise.all(promises); // ...extract the RefFields returned from the resolution of those promises and add them to our // own map. waitingIds.forEach((id, index) => map[id] = otherCallersFetching[index]); - // Now, we return our completed mapping from all of the ids that were passed into the method + // now, we return our completed mapping from all of the ids that were passed into the method // to their actual RefField | undefined values. This return value either becomes the input // argument to the caller's promise (i.e. GetRefFields.then(map => //do something with map...)) // or it is the direct return result if the promise is awaited. return map; } - let _UpdateField = (id: string, diff: any) => { + // WRITE A NEW DOCUMENT TO THE SERVER + + /** + * A wrapper around the function local variable _createField. + * This allows us to swap in different executions while comfortably + * calling the same function throughout the code base (such as in Util.makeReadonly()) + * @param field the [RefField] to be serialized and sent to the server to be stored in the database + */ + export function createField(field: RefField) { + _createField(field); + } + + /** + * The default behavior for field creation. This inserts the [Doc] instance + * in the cache at its id, serializes the [Doc]'s initial state + * and finally sends that seruialized data to the server. + * @param field the [RefField] to be serialized and sent to the server to be stored in the database + */ + let _createField = (field: RefField) => { + _cache[field[Id]] = field; + const initialState = SerializationHelper.serialize(field); + Utils.emit(_socket, MessageStore.CreateField, initialState); + }; + + // NOTIFY THE SERVER OF AN UPDATE TO A DOC'S STATE + + /** + * A wrapper around the function local variable _emitFieldUpdate. + * This allows us to swap in different executions while comfortably + * calling the same function throughout the code base (such as in Util.makeReadonly()) + * @param id the id of the [Doc] whose state has been updated in our client + * @param updatedState the new value of the document. At some point, this + * should actually be a proper diff, to improve efficiency + */ + export function emitFieldUpdate(id: string, updatedState: any) { + _emitFieldUpdate(id, updatedState); + } + + /** + * The default behavior for indicating to the server that we've locally updated + * a document. + * @param id the id of the [Doc] whose state has been updated in our client + * @param updatedState the new value of the document. At some point, this + * should actually be a proper diff, to improve efficiency + */ + let _emitFieldUpdate = (id: string, updatedState: any) => { + // don't emit a duplicate message if the server is already + // (asynchronously) still updating this document's state. if (id === updatingId) { return; } - Utils.Emit(_socket, MessageStore.UpdateField, { id, diff }); + // creates the diff object to send to the server + let diff: Diff = { id, diff: updatedState }; + // emit this diff to notify server + Utils.emit(_socket, MessageStore.UpdateField, diff); }; - export function UpdateField(id: string, diff: any) { - _UpdateField(id, diff); - } + // RESPOND TO THE SERVER'S INDICATION THAT A DOC'S STATE HAS BEEN UPDATED - let _CreateField = (field: RefField) => { - _cache[field[Id]] = field; - const initialState = SerializationHelper.Serialize(field); - Utils.Emit(_socket, MessageStore.CreateField, initialState); - }; + /** + * Whenever the client receives an update, execute the + * current behavior. + */ + Utils.addServerHandler(_socket, MessageStore.UpdateField, respondToUpdate); - export function CreateField(field: RefField) { - _CreateField(field); + /** + * A wrapper around the function local variable _respondToUpdate. + * This allows us to swap in different executions while comfortably + * calling the same function throughout the code base (such as in Util.makeReadonly()) + * @param diff kept as [any], but actually the [Diff] object sent from the server containing + * the [Doc]'s id and its new state + */ + function respondToUpdate(diff: any) { + _respondToUpdate(diff); } - let updatingId: string | undefined; + /** + * The default behavior for responding to another client's indication + * that it has updated the state of a [Doc] that is also in use by + * this client + * @param diff kept as [any], but actually the [Diff] object sent from the server containing + * the [Doc]'s id and its new state + */ let _respondToUpdate = (diff: any) => { const id = diff.id; + // to be valid, the Diff object must reference + // a document's id if (id === undefined) { return; } - const field = _cache[id]; const update = (f: Opt) => { + // if the RefField is absent from the cache or + // its promise in the cache resolves to undefined, there + // can't be anything to update if (f === undefined) { return; } + // extract this Doc's update handler const handler = f[HandleUpdate]; if (handler) { + // set the 'I'm currently updating this Doc' flag updatingId = id; handler.call(f, diff.diff); + // reset to indicate no ongoing updates updatingId = undefined; } }; + // check the cache for the field + const field = _cache[id]; if (field instanceof Promise) { + // if the field is still being retrieved, update when the promise is resolved field.then(update); } else { + // otherwise, just execute the update update(field); } }; - function respondToUpdate(diff: any) { - _respondToUpdate(diff); - } - - function connected() { - _socket.emit(MessageStore.Bar.Message, GUID); - } - Utils.AddServerHandler(_socket, MessageStore.Foo, connected); - Utils.AddServerHandler(_socket, MessageStore.UpdateField, respondToUpdate); } \ No newline at end of file diff --git a/src/client/util/SerializationHelper.ts b/src/client/util/SerializationHelper.ts index 7ded85e43..ea8af3834 100644 --- a/src/client/util/SerializationHelper.ts +++ b/src/client/util/SerializationHelper.ts @@ -7,7 +7,7 @@ export namespace SerializationHelper { return serializing > 0; } - export function Serialize(obj: Field): any { + export function serialize(obj: Field): any { if (obj === undefined || obj === null) { return undefined; } @@ -124,7 +124,7 @@ export namespace Deserializable { export function autoObject(): PropSchema { return custom( - (s) => SerializationHelper.Serialize(s), + (s) => SerializationHelper.serialize(s), (s) => SerializationHelper.Deserialize(s) ); } \ No newline at end of file diff --git a/src/new_fields/Doc.ts b/src/new_fields/Doc.ts index af65f5482..9da8912fe 100644 --- a/src/new_fields/Doc.ts +++ b/src/new_fields/Doc.ts @@ -80,7 +80,7 @@ export class Doc extends RefField { }); this[SelfProxy] = doc; if (!id || forceSave) { - DocServer.CreateField(doc); + DocServer.createField(doc); } return doc; } @@ -108,7 +108,7 @@ export class Doc extends RefField { private ___fields: any = {}; private [Update] = (diff: any) => { - DocServer.UpdateField(this[Id], diff); + DocServer.emitFieldUpdate(this[Id], diff); } private [Self] = this; diff --git a/src/new_fields/util.ts b/src/new_fields/util.ts index 8caceb063..7709d6c24 100644 --- a/src/new_fields/util.ts +++ b/src/new_fields/util.ts @@ -43,7 +43,7 @@ export const setter = action(function (target: any, prop: string | symbol | numb } else { target.__fields[prop] = value; } - target[Update]({ '$set': { ["fields." + prop]: value instanceof ObjectField ? SerializationHelper.Serialize(value) : (value === undefined ? null : value) } }); + target[Update]({ '$set': { ["fields." + prop]: value instanceof ObjectField ? SerializationHelper.serialize(value) : (value === undefined ? null : value) } }); UndoManager.AddEvent({ redo: () => receiver[prop] = value, undo: () => receiver[prop] = curValue @@ -103,7 +103,7 @@ export function updateFunction(target: any, prop: any, value: any, receiver: any let current = ObjectField.MakeCopy(value); return (diff?: any) => { if (true || !diff) { - diff = { '$set': { ["fields." + prop]: SerializationHelper.Serialize(value) } }; + diff = { '$set': { ["fields." + prop]: SerializationHelper.serialize(value) } }; const oldValue = current; const newValue = ObjectField.MakeCopy(value); current = newValue; diff --git a/src/server/index.ts b/src/server/index.ts index fd66c90b4..d9fe9d93d 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -301,16 +301,16 @@ let clients: Map = {}; server.on("connection", function (socket: Socket) { console.log("a user has connected"); - Utils.Emit(socket, MessageStore.Foo, "handshooken"); + Utils.emit(socket, MessageStore.Foo, "handshooken"); - Utils.AddServerHandler(socket, MessageStore.Bar, barReceived); - Utils.AddServerHandler(socket, MessageStore.SetField, (args) => setField(socket, args)); + Utils.addServerHandler(socket, MessageStore.Bar, barReceived); + Utils.addServerHandler(socket, MessageStore.SetField, (args) => setField(socket, args)); Utils.AddServerHandlerCallback(socket, MessageStore.GetField, getField); Utils.AddServerHandlerCallback(socket, MessageStore.GetFields, getFields); - Utils.AddServerHandler(socket, MessageStore.DeleteAll, deleteFields); + Utils.addServerHandler(socket, MessageStore.DeleteAll, deleteFields); - Utils.AddServerHandler(socket, MessageStore.CreateField, CreateField); - Utils.AddServerHandler(socket, MessageStore.UpdateField, diff => UpdateField(socket, diff)); + Utils.addServerHandler(socket, MessageStore.CreateField, CreateField); + Utils.addServerHandler(socket, MessageStore.UpdateField, diff => UpdateField(socket, diff)); Utils.AddServerHandlerCallback(socket, MessageStore.GetRefField, GetRefField); Utils.AddServerHandlerCallback(socket, MessageStore.GetRefFields, GetRefFields); }); -- cgit v1.2.3-70-g09d2 From de0304b2966ebdede9d9db8c510e19020046115c Mon Sep 17 00:00:00 2001 From: Sam Wilkins Date: Mon, 17 Jun 2019 13:38:15 -0400 Subject: peripheral renaming fixes --- src/client/documents/Documents.ts | 4 ++-- src/client/util/History.ts | 6 +++--- src/client/util/SearchUtil.ts | 4 ++-- src/client/util/TooltipTextMenu.tsx | 8 ++++---- src/client/views/MainView.tsx | 10 +++++----- src/client/views/SearchBox.tsx | 6 +++--- src/client/views/collections/CollectionDockingView.tsx | 10 +++++----- src/client/views/collections/CollectionSubView.tsx | 8 ++++---- src/client/views/collections/CollectionVideoView.tsx | 2 +- src/client/views/collections/DockedFrameRenderer.tsx | 2 +- .../collections/collectionFreeForm/CollectionFreeFormView.tsx | 4 ++-- .../views/collections/collectionFreeForm/MarqueeView.tsx | 2 +- src/client/views/nodes/DocumentView.tsx | 2 +- src/client/views/nodes/FormattedTextBox.tsx | 6 +++--- src/client/views/nodes/PDFBox.tsx | 2 +- src/client/views/nodes/VideoBox.tsx | 2 +- src/debug/Viewer.tsx | 2 +- src/mobile/ImageUpload.tsx | 4 ++-- src/new_fields/Proxy.ts | 2 +- src/server/authentication/models/current_user_utils.ts | 6 +++--- 20 files changed, 46 insertions(+), 46 deletions(-) (limited to 'src/client/util') diff --git a/src/client/documents/Documents.ts b/src/client/documents/Documents.ts index b10954636..758291b9b 100644 --- a/src/client/documents/Documents.ts +++ b/src/client/documents/Documents.ts @@ -98,7 +98,7 @@ export namespace Docs { // non-guid string ids for each document prototype let protoIds = [textProtoId, histoProtoId, collProtoId, imageProtoId, webProtoId, kvpProtoId, videoProtoId, audioProtoId, pdfProtoId, iconProtoId] // fetch the actual prototype documents from the server - let actualProtos = await DocServer.GetRefFields(protoIds); + let actualProtos = await DocServer.getRefFields(protoIds); // initialize prototype documents textProto = actualProtos[textProtoId] as Doc || CreateTextProto(); @@ -363,7 +363,7 @@ export namespace Docs { CurrentUserUtils.AddNorthstarSchema(schema, schemaDoc); const docs = schemaDocuments; CurrentUserUtils.GetAllNorthstarColumnAttributes(schema).map(attr => { - DocServer.GetRefField(attr.displayName! + ".alias").then(action((field: Opt) => { + DocServer.getRefField(attr.displayName! + ".alias").then(action((field: Opt) => { if (field instanceof Doc) { docs.push(field); } else { diff --git a/src/client/util/History.ts b/src/client/util/History.ts index 545ea8629..94bfcbe09 100644 --- a/src/client/util/History.ts +++ b/src/client/util/History.ts @@ -88,7 +88,7 @@ export namespace HistoryUtil { } export function createUrl(params: ParsedUrl): string { - let baseUrl = DocServer.prepend(`/${params.type}`); + let baseUrl = DocServer.Util.prepend(`/${params.type}`); switch (params.type) { case "doc": const initializers = encodeURIComponent(JSON.stringify(params.initializers)); @@ -103,7 +103,7 @@ export namespace HistoryUtil { } export async function initDoc(id: string, initializer: DocInitializerList) { - const doc = await DocServer.GetRefField(id); + const doc = await DocServer.getRefField(id); if (!(doc instanceof Doc)) { return; } @@ -111,7 +111,7 @@ export namespace HistoryUtil { } async function onDocUrl(url: DocUrl) { - const field = await DocServer.GetRefField(url.docId); + const field = await DocServer.getRefField(url.docId); await Promise.all(Object.keys(url.initializers).map(id => initDoc(id, url.initializers[id]))); if (field instanceof Doc) { MainView.Instance.openWorkspace(field, true); diff --git a/src/client/util/SearchUtil.ts b/src/client/util/SearchUtil.ts index 28ec8ca14..9dd9acbb7 100644 --- a/src/client/util/SearchUtil.ts +++ b/src/client/util/SearchUtil.ts @@ -7,13 +7,13 @@ export namespace SearchUtil { export function Search(query: string, returnDocs: true): Promise; export function Search(query: string, returnDocs: false): Promise; export async function Search(query: string, returnDocs: boolean) { - const ids = JSON.parse(await rp.get(DocServer.prepend("/search"), { + const ids = JSON.parse(await rp.get(DocServer.Util.prepend("/search"), { qs: { query } })); if (!returnDocs) { return ids; } - const docMap = await DocServer.GetRefFields(ids); + const docMap = await DocServer.getRefFields(ids); return ids.map((id: string) => docMap[id]).filter((doc: any) => doc instanceof Doc); } diff --git a/src/client/util/TooltipTextMenu.tsx b/src/client/util/TooltipTextMenu.tsx index fa2483db5..36219a99e 100644 --- a/src/client/util/TooltipTextMenu.tsx +++ b/src/client/util/TooltipTextMenu.tsx @@ -187,9 +187,9 @@ export class TooltipTextMenu { let link = node && node.marks.find(m => m.type.name === "link"); if (link) { let href: string = link.attrs.href; - if (href.indexOf(DocServer.prepend("/doc/")) === 0) { - let docid = href.replace(DocServer.prepend("/doc/"), ""); - DocServer.GetRefField(docid).then(action((f: Opt) => { + if (href.indexOf(DocServer.Util.prepend("/doc/")) === 0) { + let docid = href.replace(DocServer.Util.prepend("/doc/"), ""); + DocServer.getRefField(docid).then(action((f: Opt) => { if (f instanceof Doc) { if (DocumentManager.Instance.getDocumentView(f)) { DocumentManager.Instance.getDocumentView(f)!.props.focus(f); @@ -218,7 +218,7 @@ export class TooltipTextMenu { handlers: { dragComplete: action(() => { let m = dragData.droppedDocuments; - this.makeLink(DocServer.prepend("/doc/" + m[0][Id])); + this.makeLink(DocServer.Util.prepend("/doc/" + m[0][Id])); }), }, hideSource: false diff --git a/src/client/views/MainView.tsx b/src/client/views/MainView.tsx index 984db0426..734961b56 100644 --- a/src/client/views/MainView.tsx +++ b/src/client/views/MainView.tsx @@ -76,11 +76,11 @@ export class MainView extends React.Component { // causes errors to be generated when modifying an observable outside of an action configure({ enforceActions: "observed" }); if (window.location.search.includes("readonly")) { - DocServer.makeReadOnly(); + DocServer.Util.makeReadOnly(); } if (window.location.search.includes("safe")) { if (!window.location.search.includes("nro")) { - DocServer.makeReadOnly(); + DocServer.Util.makeReadOnly(); } CollectionBaseView.SetSafeMode(true); } @@ -141,7 +141,7 @@ export class MainView extends React.Component { this.createNewWorkspace(); } } else { - DocServer.GetRefField(CurrentUserUtils.MainDocId).then(field => + DocServer.getRefField(CurrentUserUtils.MainDocId).then(field => field instanceof Doc ? this.openWorkspace(field) : this.createNewWorkspace(CurrentUserUtils.MainDocId)); } @@ -294,7 +294,7 @@ export class MainView extends React.Component { let logoutRef = React.createRef(); return [ - , + ,
+
]; } diff --git a/src/client/views/SearchBox.tsx b/src/client/views/SearchBox.tsx index 7164d98a4..973715876 100644 --- a/src/client/views/SearchBox.tsx +++ b/src/client/views/SearchBox.tsx @@ -56,13 +56,13 @@ export class SearchBox extends React.Component { @action getResults = async (query: string) => { - let response = await rp.get(DocServer.prepend('/search'), { + let response = await rp.get(DocServer.Util.prepend('/search'), { qs: { query } }); let res: string[] = JSON.parse(response); - const fields = await DocServer.GetRefFields(res); + const fields = await DocServer.getRefFields(res); const docs: Doc[] = []; for (const id of res) { const field = fields[id]; @@ -74,7 +74,7 @@ export class SearchBox extends React.Component { } public static async convertDataUri(imageUri: string, returnedFilename: string) { try { - let posting = DocServer.prepend(RouteStore.dataUriToImage); + let posting = DocServer.Util.prepend(RouteStore.dataUriToImage); const returnedUri = await rp.post(posting, { body: { uri: imageUri, diff --git a/src/client/views/collections/CollectionDockingView.tsx b/src/client/views/collections/CollectionDockingView.tsx index e2bcb10ec..4f5837590 100644 --- a/src/client/views/collections/CollectionDockingView.tsx +++ b/src/client/views/collections/CollectionDockingView.tsx @@ -306,7 +306,7 @@ export class CollectionDockingView extends React.Component) => + DocServer.getRefField(docid).then(action(async (sourceDoc: Opt) => (sourceDoc instanceof Doc) && DragLinksAsDocuments(tab, x, y, sourceDoc))); } else if ((className === "lm_title" || className === "lm_tab lm_active") && !e.shiftKey) { @@ -320,7 +320,7 @@ export class CollectionDockingView extends React.Component) => { + DocServer.getRefField(docid).then(action((f: Opt) => { if (f instanceof Doc) { DragManager.StartDocumentDrag([tab], new DragManager.DocumentDragData([f]), x, y, { @@ -372,7 +372,7 @@ export class CollectionDockingView extends React.Component { + DocServer.getRefField(tab.contentItem.config.props.documentId).then(async doc => { if (doc instanceof Doc) { let counter: any = this.htmlToElement(`0`); tab.element.append(counter); @@ -409,7 +409,7 @@ export class CollectionDockingView extends React.Component { - let doc = await DocServer.GetRefField(contentItem.config.props.documentId); + let doc = await DocServer.getRefField(contentItem.config.props.documentId); if (doc instanceof Doc) { let theDoc = doc; CollectionDockingView.TopLevel._removedDocs.push(theDoc); diff --git a/src/client/views/collections/CollectionSubView.tsx b/src/client/views/collections/CollectionSubView.tsx index 440a2410b..36e276d13 100644 --- a/src/client/views/collections/CollectionSubView.tsx +++ b/src/client/views/collections/CollectionSubView.tsx @@ -133,7 +133,7 @@ export function CollectionSubView(schemaCtor: (doc: Doc) => T) { if (path.includes(window.location.hostname)) { let s = path.split('/'); let id = s[s.length - 1]; - DocServer.GetRefField(id).then(field => { + DocServer.getRefField(id).then(field => { if (field instanceof Doc) { let alias = Doc.MakeAlias(field); alias.x = options.x || 0; @@ -170,8 +170,8 @@ export function CollectionSubView(schemaCtor: (doc: Doc) => T) { if (html && html.indexOf(document.location.origin)) { // prosemirror text containing link to dash document let start = html.indexOf(window.location.origin); let path = html.substr(start, html.length - start); - let docid = path.substr(0, path.indexOf("\">")).replace(DocServer.prepend("/doc/"), "").split("?")[0]; - DocServer.GetRefField(docid).then(f => (f instanceof Doc) && this.props.addDocument(f, false)); + let docid = path.substr(0, path.indexOf("\">")).replace(DocServer.Util.prepend("/doc/"), "").split("?")[0]; + DocServer.getRefField(docid).then(f => (f instanceof Doc) && this.props.addDocument(f, false)); return; } if (html && html.indexOf("(schemaCtor: (doc: Doc) => T) { if (item.kind === "string" && item.type.indexOf("uri") !== -1) { let str: string; let prom = new Promise(resolve => e.dataTransfer.items[i].getAsString(resolve)) - .then(action((s: string) => rp.head(DocServer.prepend(RouteStore.corsProxy + "/" + (str = s))))) + .then(action((s: string) => rp.head(DocServer.Util.prepend(RouteStore.corsProxy + "/" + (str = s))))) .then(result => { let type = result["content-type"]; if (type) { diff --git a/src/client/views/collections/CollectionVideoView.tsx b/src/client/views/collections/CollectionVideoView.tsx index bd5cd5450..ccbac9915 100644 --- a/src/client/views/collections/CollectionVideoView.tsx +++ b/src/client/views/collections/CollectionVideoView.tsx @@ -97,7 +97,7 @@ export class CollectionVideoView extends React.Component { let filename = encodeURIComponent("snapshot" + this.props.Document.title + "_" + this.props.Document.curPage).replace(/\./g, ""); SearchBox.convertDataUri(dataUrl, filename).then((returnedFilename) => { if (returnedFilename) { - let url = DocServer.prepend(returnedFilename); + let url = DocServer.Util.prepend(returnedFilename); let imageSummary = Docs.Create.ImageDocument(url, { x: NumCast(this.props.Document.x) + width, y: NumCast(this.props.Document.y), width: 150, height: height / width * 150, title: "--snapshot" + NumCast(this.props.Document.curPage) + " image-" diff --git a/src/client/views/collections/DockedFrameRenderer.tsx b/src/client/views/collections/DockedFrameRenderer.tsx index 25d4b2a49..1e7c5661b 100644 --- a/src/client/views/collections/DockedFrameRenderer.tsx +++ b/src/client/views/collections/DockedFrameRenderer.tsx @@ -42,7 +42,7 @@ export class DockedFrameRenderer extends React.Component { } constructor(props: any) { super(props); - DocServer.GetRefField(this.props.documentId).then(action((f: Opt) => this._document = f as Doc)); + DocServer.getRefField(this.props.documentId).then(action((f: Opt) => this._document = f as Doc)); } nativeWidth = () => NumCast(this._document!.nativeWidth, this._panelWidth); diff --git a/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx b/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx index 9d19df540..cd613e6ab 100644 --- a/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx +++ b/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx @@ -235,8 +235,8 @@ export class CollectionFreeFormView extends CollectionSubView(PanZoomDocument) { e.preventDefault(); let start = html.indexOf(window.location.origin); let path = html.substr(start, html.length - start); - let docid = path.substr(0, path.indexOf("\">")).replace(DocServer.prepend("/doc/"), "").split("?")[0]; - DocServer.GetRefField(docid).then(f => { + let docid = path.substr(0, path.indexOf("\">")).replace(DocServer.Util.prepend("/doc/"), "").split("?")[0]; + DocServer.getRefField(docid).then(f => { if (f instanceof Doc) { f.x = pt[0]; f.y = pt[1]; diff --git a/src/client/views/collections/collectionFreeForm/MarqueeView.tsx b/src/client/views/collections/collectionFreeForm/MarqueeView.tsx index cd386abfa..07a58ed64 100644 --- a/src/client/views/collections/collectionFreeForm/MarqueeView.tsx +++ b/src/client/views/collections/collectionFreeForm/MarqueeView.tsx @@ -306,7 +306,7 @@ export class MarqueeView extends React.Component let summary = Docs.Create.TextDocument({ x: bounds.left, y: bounds.top, width: 300, height: 100, backgroundColor: "#e2ad32" /* yellow */, title: "-summary-" }); SearchBox.convertDataUri(dataUrl, "icon" + summary[Id] + "_image").then((returnedFilename) => { if (returnedFilename) { - let url = DocServer.prepend(returnedFilename); + let url = DocServer.Util.prepend(returnedFilename); let imageSummary = Docs.Create.ImageDocument(url, { x: bounds.left, y: bounds.top + 100 / zoomBasis, width: 150, height: bounds.height / bounds.width * 150, title: "-summary image-" diff --git a/src/client/views/nodes/DocumentView.tsx b/src/client/views/nodes/DocumentView.tsx index 16e40000d..fdcb20e9a 100644 --- a/src/client/views/nodes/DocumentView.tsx +++ b/src/client/views/nodes/DocumentView.tsx @@ -422,7 +422,7 @@ export class DocumentView extends DocComponent(Docu }, icon: "search" }); cm.addItem({ description: "Center View", event: () => this.props.focus(this.props.Document), icon: "crosshairs" }); - cm.addItem({ description: "Copy URL", event: () => Utils.CopyText(DocServer.prepend("/doc/" + this.props.Document[Id])), icon: "link" }); + cm.addItem({ description: "Copy URL", event: () => Utils.CopyText(DocServer.Util.prepend("/doc/" + this.props.Document[Id])), icon: "link" }); cm.addItem({ description: "Copy ID", event: () => Utils.CopyText(this.props.Document[Id]), icon: "fingerprint" }); cm.addItem({ description: "Delete", event: this.deleteClicked, icon: "trash" }); if (!this.topMost) { diff --git a/src/client/views/nodes/FormattedTextBox.tsx b/src/client/views/nodes/FormattedTextBox.tsx index d00a4b928..6a14a04f7 100644 --- a/src/client/views/nodes/FormattedTextBox.tsx +++ b/src/client/views/nodes/FormattedTextBox.tsx @@ -237,9 +237,9 @@ export class FormattedTextBox extends DocComponent<(FieldViewProps & FormattedTe href = parent.childNodes[0].href; } if (href) { - if (href.indexOf(DocServer.prepend("/doc/")) === 0) { - let docid = href.replace(DocServer.prepend("/doc/"), "").split("?")[0]; - DocServer.GetRefField(docid).then(f => { + if (href.indexOf(DocServer.Util.prepend("/doc/")) === 0) { + let docid = href.replace(DocServer.Util.prepend("/doc/"), "").split("?")[0]; + DocServer.getRefField(docid).then(f => { (f instanceof Doc) && DocumentManager.Instance.jumpToDocument(f, ctrlKey, document => this.props.addDocTab(document, "inTab")) }); } diff --git a/src/client/views/nodes/PDFBox.tsx b/src/client/views/nodes/PDFBox.tsx index aa29a7170..df9e49b64 100644 --- a/src/client/views/nodes/PDFBox.tsx +++ b/src/client/views/nodes/PDFBox.tsx @@ -257,7 +257,7 @@ export class PDFBox extends DocComponent(PdfDocumen .then(action((dataUrl: string) => { SearchBox.convertDataUri(dataUrl, "icon" + this.Document[Id] + "_" + this.curPage).then((returnedFilename) => { if (returnedFilename) { - let url = DocServer.prepend(returnedFilename); + let url = DocServer.Util.prepend(returnedFilename); this.props.Document.thumbnail = new ImageField(new URL(url)); } runInAction(() => this._renderAsSvg = true); diff --git a/src/client/views/nodes/VideoBox.tsx b/src/client/views/nodes/VideoBox.tsx index 35ecf12f6..9ab607e91 100644 --- a/src/client/views/nodes/VideoBox.tsx +++ b/src/client/views/nodes/VideoBox.tsx @@ -97,7 +97,7 @@ export class VideoBox extends DocComponent(VideoD }; try { let responseSchema: any = {}; - const videoInfoResponse = await rp.get(DocServer.prepend(RouteStore.corsProxy + "/" + `https://www.youtube.com/watch?v=${videoId}`), videoInfoRequestConfig); + const videoInfoResponse = await rp.get(DocServer.Util.prepend(RouteStore.corsProxy + "/" + `https://www.youtube.com/watch?v=${videoId}`), videoInfoRequestConfig); const dataHtml = videoInfoResponse; const start = dataHtml.indexOf('ytplayer.config = ') + 18; const end = dataHtml.indexOf(';ytplayer.load'); diff --git a/src/debug/Viewer.tsx b/src/debug/Viewer.tsx index b22300d0b..753149756 100644 --- a/src/debug/Viewer.tsx +++ b/src/debug/Viewer.tsx @@ -146,7 +146,7 @@ class Viewer extends React.Component { @action onKeyPress = (e: React.KeyboardEvent) => { if (e.key === "Enter") { - DocServer.GetRefField(this.idToAdd).then(action((field: any) => { + DocServer.getRefField(this.idToAdd).then(action((field: any) => { if (field !== undefined) { this.fields.push(field); } diff --git a/src/mobile/ImageUpload.tsx b/src/mobile/ImageUpload.tsx index a8f94b746..df597e0a9 100644 --- a/src/mobile/ImageUpload.tsx +++ b/src/mobile/ImageUpload.tsx @@ -57,11 +57,11 @@ class Uploader extends React.Component { this.status = "getting user document"; - const res = await rp.get(DocServer.prepend(RouteStore.getUserDocumentId)); + const res = await rp.get(DocServer.Util.prepend(RouteStore.getUserDocumentId)); if (!res) { throw new Error("No user id returned"); } - const field = await DocServer.GetRefField(res); + const field = await DocServer.getRefField(res); let pending: Opt; if (field instanceof Doc) { pending = await Cast(field.optionalRightCollection, Doc); diff --git a/src/new_fields/Proxy.ts b/src/new_fields/Proxy.ts index 130ec066e..230e4ab8b 100644 --- a/src/new_fields/Proxy.ts +++ b/src/new_fields/Proxy.ts @@ -57,7 +57,7 @@ export class ProxyField extends ObjectField { return undefined; } if (!this.promise) { - this.promise = DocServer.GetRefField(this.fieldId).then(action((field: any) => { + this.promise = DocServer.getRefField(this.fieldId).then(action((field: any) => { this.promise = undefined; this.cache = field; if (field === undefined) this.failed = true; diff --git a/src/server/authentication/models/current_user_utils.ts b/src/server/authentication/models/current_user_utils.ts index 169be3b99..95c20d2db 100644 --- a/src/server/authentication/models/current_user_utils.ts +++ b/src/server/authentication/models/current_user_utils.ts @@ -40,7 +40,7 @@ export class CurrentUserUtils { } public static async loadCurrentUser(): Promise { - let userPromise = rp.get(DocServer.prepend(RouteStore.getCurrUser)).then(response => { + let userPromise = rp.get(DocServer.Util.prepend(RouteStore.getCurrUser)).then(response => { if (response) { let obj = JSON.parse(response); CurrentUserUtils.curr_id = obj.id as string; @@ -49,9 +49,9 @@ export class CurrentUserUtils { throw new Error("There should be a user! Why does Dash think there isn't one?"); } }); - let userDocPromise = await rp.get(DocServer.prepend(RouteStore.getUserDocumentId)).then(id => { + let userDocPromise = await rp.get(DocServer.Util.prepend(RouteStore.getUserDocumentId)).then(id => { if (id) { - return DocServer.GetRefField(id).then(field => + return DocServer.getRefField(id).then(field => runInAction(() => this.user_document = field instanceof Doc ? field : this.createUserDocument(id))); } else { throw new Error("There should be a user id! Why does Dash think there isn't one?"); -- cgit v1.2.3-70-g09d2 From 4bec1d89eff45d6dcbb4041bc211db88d9da1c8f Mon Sep 17 00:00:00 2001 From: Sam Wilkins Date: Tue, 18 Jun 2019 22:02:58 -0400 Subject: fixed serialization typo and added draft of python word doc scraper to git directory for safety --- src/buxton/scraper.py | 128 +++++++++++++++++++++++++++++++++ src/client/DocServer.ts | 2 +- src/client/util/SerializationHelper.ts | 4 +- src/new_fields/util.ts | 4 +- 4 files changed, 133 insertions(+), 5 deletions(-) create mode 100644 src/buxton/scraper.py (limited to 'src/client/util') diff --git a/src/buxton/scraper.py b/src/buxton/scraper.py new file mode 100644 index 000000000..0abebb485 --- /dev/null +++ b/src/buxton/scraper.py @@ -0,0 +1,128 @@ +import os +import docx2txt +from docx import Document +from docx.opc.constants import RELATIONSHIP_TYPE as RT +import re +from pymongo import MongoClient +import shutil +import uuid + +source = "./source" +dist = "./Dash-Web/src/server/public/files" + +collection_handle = MongoClient("localhost", 27017)["Dash"]["buxton"] + +def extract_links(fileName): + links = [] + doc = Document(fileName) + rels = doc.part.rels + for rel in rels: + item = rels[rel] + if item.reltype == RT.HYPERLINK and ".aspx" not in item._target: + links.append(item._target) + return links + +def extract_value(kv_string): + return kv_string.split(":")[1].strip() + +def mkdir_if_absent(path): + try: + if not os.path.exists(path): + os.mkdir(path) + except OSError: + print("Failed to create the appropriate directory structures for %s" % file_name) + +def parse_document(file_name: str): + result = {} + pure_name = file_name.split(".")[0] + + dir_path = dist + "/" + pure_name + mkdir_if_absent(dir_path) + + raw = str(docx2txt.process(source + "/" + file_name, dir_path)) + + sanitize = lambda line: re.sub("[\n\t]+", "", line).strip().replace(u"\u00A0", " ").replace(u"\u2013", "-").replace(u"\u201c", '''"''').replace(u"\u201d", '''"''') + remove_empty = lambda line: len(line) > 1 + + lines = list(map(sanitize, raw.split("\n"))) + lines = list(filter(remove_empty, lines)) + + result["file_name"] = file_name + result["title"] = lines[2] + result["short_description"] = lines[3].replace("Short Description: ", "") + + cur = 5 + notes = "" + while lines[cur] != "Device Details": + notes += lines[cur] + " " + cur += 1 + result["buxton_notes"] = notes.strip() + + cur += 1 + clean = list(map(lambda data: data.strip().split(":"), lines[cur].split("|"))) + result["company"] = clean[0][1].strip() + result["year"] = clean[1][1].strip() + result["original_price"] = clean[2][1].strip() + + cur += 1 + result["degrees_of_freedom"] = extract_value(lines[cur]) + cur += 1 + result["dimensions"] = extract_value(lines[cur]) + + cur += 2 + result["primary_key"] = extract_value(lines[cur]) + cur += 1 + result["secondary_key"] = extract_value(lines[cur]) + + result["hyperlinks"] = extract_links(source + "/" + file_name) + + cur += 2 + link_descriptions = [] + while lines[cur] != "Image": + link_descriptions.append(lines[cur]) + cur += 1 + result["link_descriptions"] = link_descriptions + + images = [] + captions = [] + cur += 3 + while cur + 1 < len(lines) and lines[cur] != "NOTES:": + images.append(lines[cur]) + captions.append(lines[cur + 1]) + cur += 2 + result["images"] = images + result["captions"] = captions + + notes = [] + if (cur < len(lines) and lines[cur] == "NOTES:"): + cur += 1 + while cur < len(lines): + notes.append(lines[cur]) + cur += 1 + result["notes"] = notes + + return result + +def upload(document): + wrapper = {} + wrapper["_id"] = str(uuid.uuid4()) + wrapper["fields"] = document + wrapper["__type"] = "Doc" + collection_handle.insert_one(wrapper) + +if os.path.exists(dist): + shutil.rmtree(dist) +while (os.path.exists(dist)): + pass +os.mkdir(dist) + +for file_name in os.listdir(source): + if file_name.endswith('.docx'): + upload(parse_document(file_name)) + +lines = ['*', '!.gitignore'] +with open(dist + "/.gitignore", 'w') as f: + f.write('\n'.join(lines)) + + + diff --git a/src/client/DocServer.ts b/src/client/DocServer.ts index ad7c706b6..3b33657eb 100644 --- a/src/client/DocServer.ts +++ b/src/client/DocServer.ts @@ -257,7 +257,7 @@ export namespace DocServer { */ let _createField = (field: RefField) => { _cache[field[Id]] = field; - const initialState = SerializationHelper.serialize(field); + const initialState = SerializationHelper.Serialize(field); Utils.emit(_socket, MessageStore.CreateField, initialState); }; diff --git a/src/client/util/SerializationHelper.ts b/src/client/util/SerializationHelper.ts index ea8af3834..7ded85e43 100644 --- a/src/client/util/SerializationHelper.ts +++ b/src/client/util/SerializationHelper.ts @@ -7,7 +7,7 @@ export namespace SerializationHelper { return serializing > 0; } - export function serialize(obj: Field): any { + export function Serialize(obj: Field): any { if (obj === undefined || obj === null) { return undefined; } @@ -124,7 +124,7 @@ export namespace Deserializable { export function autoObject(): PropSchema { return custom( - (s) => SerializationHelper.serialize(s), + (s) => SerializationHelper.Serialize(s), (s) => SerializationHelper.Deserialize(s) ); } \ No newline at end of file diff --git a/src/new_fields/util.ts b/src/new_fields/util.ts index 7709d6c24..8caceb063 100644 --- a/src/new_fields/util.ts +++ b/src/new_fields/util.ts @@ -43,7 +43,7 @@ export const setter = action(function (target: any, prop: string | symbol | numb } else { target.__fields[prop] = value; } - target[Update]({ '$set': { ["fields." + prop]: value instanceof ObjectField ? SerializationHelper.serialize(value) : (value === undefined ? null : value) } }); + target[Update]({ '$set': { ["fields." + prop]: value instanceof ObjectField ? SerializationHelper.Serialize(value) : (value === undefined ? null : value) } }); UndoManager.AddEvent({ redo: () => receiver[prop] = value, undo: () => receiver[prop] = curValue @@ -103,7 +103,7 @@ export function updateFunction(target: any, prop: any, value: any, receiver: any let current = ObjectField.MakeCopy(value); return (diff?: any) => { if (true || !diff) { - diff = { '$set': { ["fields." + prop]: SerializationHelper.serialize(value) } }; + diff = { '$set': { ["fields." + prop]: SerializationHelper.Serialize(value) } }; const oldValue = current; const newValue = ObjectField.MakeCopy(value); current = newValue; -- cgit v1.2.3-70-g09d2 From 06bd486c72342b4d979245c9f4051156e6492541 Mon Sep 17 00:00:00 2001 From: Sam Wilkins Date: Tue, 25 Jun 2019 21:22:29 -0400 Subject: scraping progress --- src/buxton/scraper.py | 331 --------------------- src/buxton/source/Bill_Notes_Bill_Notes_CyKey.docx | Bin 1675500 -> 0 bytes src/buxton/source/Bill_Notes_Braun_T3.docx | Bin 1671968 -> 0 bytes src/buxton/source/Bill_Notes_CasioC801.docx | Bin 574664 -> 0 bytes src/buxton/source/Bill_Notes_Casio_Mini.docx | Bin 581069 -> 0 bytes .../source/Bill_Notes_FingerWorks_Prototype.docx | Bin 585090 -> 0 bytes .../source/Bill_Notes_Fingerworks_TouchStream.docx | Bin 1722555 -> 0 bytes src/buxton/source/Bill_Notes_FrogPad.docx | Bin 840173 -> 0 bytes src/buxton/source/Bill_Notes_Gavilan_SC.docx | Bin 1695290 -> 0 bytes .../source/Bill_Notes_Grandjean_Stenotype.docx | Bin 2094142 -> 0 bytes src/buxton/source/Bill_Notes_Matias.docx | Bin 590407 -> 0 bytes src/buxton/source/Bill_Notes_MousePen.docx | Bin 505322 -> 0 bytes src/buxton/source/Bill_Notes_NewO.docx | Bin 2264571 -> 0 bytes src/buxton/source/Bill_Notes_OLPC.docx | Bin 6883659 -> 0 bytes src/buxton/source/Bill_Notes_PARCkbd.docx | Bin 631959 -> 0 bytes .../source/Bill_Notes_Philco_Mystery_Control.docx | Bin 1994439 -> 0 bytes src/buxton/source/Bill_Notes_TASA_Kbd.docx | Bin 461199 -> 0 bytes src/buxton/source/Bill_Notes_The_Tap.docx | Bin 711321 -> 0 bytes src/client/util/ClientUtils.ts | 4 + src/scraping/acm/chromedriver.exe | Bin 0 -> 7477760 bytes src/scraping/acm/citations.txt | 2 + src/scraping/acm/debug.log | 38 +++ src/scraping/acm/index.js | 88 ++++++ src/scraping/acm/package.json | 17 ++ src/scraping/acm/results.txt | 64 ++++ src/scraping/buxton/scraper.py | 331 +++++++++++++++++++++ .../buxton/source/Bill_Notes_Bill_Notes_CyKey.docx | Bin 0 -> 1675500 bytes .../buxton/source/Bill_Notes_Braun_T3.docx | Bin 0 -> 1671968 bytes .../buxton/source/Bill_Notes_CasioC801.docx | Bin 0 -> 574664 bytes .../buxton/source/Bill_Notes_Casio_Mini.docx | Bin 0 -> 581069 bytes .../source/Bill_Notes_FingerWorks_Prototype.docx | Bin 0 -> 585090 bytes .../source/Bill_Notes_Fingerworks_TouchStream.docx | Bin 0 -> 1722555 bytes src/scraping/buxton/source/Bill_Notes_FrogPad.docx | Bin 0 -> 840173 bytes .../buxton/source/Bill_Notes_Gavilan_SC.docx | Bin 0 -> 1695290 bytes .../source/Bill_Notes_Grandjean_Stenotype.docx | Bin 0 -> 2094142 bytes src/scraping/buxton/source/Bill_Notes_Matias.docx | Bin 0 -> 590407 bytes .../buxton/source/Bill_Notes_MousePen.docx | Bin 0 -> 505322 bytes src/scraping/buxton/source/Bill_Notes_NewO.docx | Bin 0 -> 2264571 bytes src/scraping/buxton/source/Bill_Notes_OLPC.docx | Bin 0 -> 6883659 bytes src/scraping/buxton/source/Bill_Notes_PARCkbd.docx | Bin 0 -> 631959 bytes .../source/Bill_Notes_Philco_Mystery_Control.docx | Bin 0 -> 1994439 bytes .../buxton/source/Bill_Notes_TASA_Kbd.docx | Bin 0 -> 461199 bytes src/scraping/buxton/source/Bill_Notes_The_Tap.docx | Bin 0 -> 711321 bytes 43 files changed, 544 insertions(+), 331 deletions(-) delete mode 100644 src/buxton/scraper.py delete mode 100644 src/buxton/source/Bill_Notes_Bill_Notes_CyKey.docx delete mode 100644 src/buxton/source/Bill_Notes_Braun_T3.docx delete mode 100644 src/buxton/source/Bill_Notes_CasioC801.docx delete mode 100644 src/buxton/source/Bill_Notes_Casio_Mini.docx delete mode 100644 src/buxton/source/Bill_Notes_FingerWorks_Prototype.docx delete mode 100644 src/buxton/source/Bill_Notes_Fingerworks_TouchStream.docx delete mode 100644 src/buxton/source/Bill_Notes_FrogPad.docx delete mode 100644 src/buxton/source/Bill_Notes_Gavilan_SC.docx delete mode 100644 src/buxton/source/Bill_Notes_Grandjean_Stenotype.docx delete mode 100644 src/buxton/source/Bill_Notes_Matias.docx delete mode 100644 src/buxton/source/Bill_Notes_MousePen.docx delete mode 100644 src/buxton/source/Bill_Notes_NewO.docx delete mode 100644 src/buxton/source/Bill_Notes_OLPC.docx delete mode 100644 src/buxton/source/Bill_Notes_PARCkbd.docx delete mode 100644 src/buxton/source/Bill_Notes_Philco_Mystery_Control.docx delete mode 100644 src/buxton/source/Bill_Notes_TASA_Kbd.docx delete mode 100644 src/buxton/source/Bill_Notes_The_Tap.docx create mode 100644 src/client/util/ClientUtils.ts create mode 100644 src/scraping/acm/chromedriver.exe create mode 100644 src/scraping/acm/citations.txt create mode 100644 src/scraping/acm/debug.log create mode 100644 src/scraping/acm/index.js create mode 100644 src/scraping/acm/package.json create mode 100644 src/scraping/acm/results.txt create mode 100644 src/scraping/buxton/scraper.py create mode 100644 src/scraping/buxton/source/Bill_Notes_Bill_Notes_CyKey.docx create mode 100644 src/scraping/buxton/source/Bill_Notes_Braun_T3.docx create mode 100644 src/scraping/buxton/source/Bill_Notes_CasioC801.docx create mode 100644 src/scraping/buxton/source/Bill_Notes_Casio_Mini.docx create mode 100644 src/scraping/buxton/source/Bill_Notes_FingerWorks_Prototype.docx create mode 100644 src/scraping/buxton/source/Bill_Notes_Fingerworks_TouchStream.docx create mode 100644 src/scraping/buxton/source/Bill_Notes_FrogPad.docx create mode 100644 src/scraping/buxton/source/Bill_Notes_Gavilan_SC.docx create mode 100644 src/scraping/buxton/source/Bill_Notes_Grandjean_Stenotype.docx create mode 100644 src/scraping/buxton/source/Bill_Notes_Matias.docx create mode 100644 src/scraping/buxton/source/Bill_Notes_MousePen.docx create mode 100644 src/scraping/buxton/source/Bill_Notes_NewO.docx create mode 100644 src/scraping/buxton/source/Bill_Notes_OLPC.docx create mode 100644 src/scraping/buxton/source/Bill_Notes_PARCkbd.docx create mode 100644 src/scraping/buxton/source/Bill_Notes_Philco_Mystery_Control.docx create mode 100644 src/scraping/buxton/source/Bill_Notes_TASA_Kbd.docx create mode 100644 src/scraping/buxton/source/Bill_Notes_The_Tap.docx (limited to 'src/client/util') diff --git a/src/buxton/scraper.py b/src/buxton/scraper.py deleted file mode 100644 index 043fd3cf4..000000000 --- a/src/buxton/scraper.py +++ /dev/null @@ -1,331 +0,0 @@ -import os -import docx2txt -from docx import Document -from docx.opc.constants import RELATIONSHIP_TYPE as RT -import re -from pymongo import MongoClient -import shutil -import uuid -import datetime -from PIL import Image -import math -import sys - -source = "./source" -dist = "../server/public/files" - -db = MongoClient("localhost", 27017)["Dash"] -schema_guids = [] - - -def extract_links(fileName): - links = [] - doc = Document(fileName) - rels = doc.part.rels - for rel in rels: - item = rels[rel] - if item.reltype == RT.HYPERLINK and ".aspx" not in item._target: - links.append(item._target) - return listify(links) - - -def extract_value(kv_string): - pieces = kv_string.split(":") - return (pieces[1] if len(pieces) > 1 else kv_string).strip() - - -def mkdir_if_absent(path): - try: - if not os.path.exists(path): - os.mkdir(path) - except OSError: - print("failed to create the appropriate directory structures for %s" % file_name) - - -def guid(): - return str(uuid.uuid4()) - - -def listify(list): - return { - "fields": list, - "__type": "list" - } - - -def protofy(fieldId): - return { - "fieldId": fieldId, - "__type": "proxy" - } - - -def write_schema(parse_results, display_fields): - view_guids = parse_results["child_guids"] - - data_doc = parse_results["schema"] - fields = data_doc["fields"] - - view_doc_guid = guid() - - view_doc = { - "_id": view_doc_guid, - "fields": { - "proto": protofy(data_doc["_id"]), - "x": 10, - "y": 10, - "width": 900, - "height": 600, - "panX": 0, - "panY": 0, - "zoomBasis": 0.5, - "zIndex": 2, - "libraryBrush": False, - "viewType": 2 - }, - "__type": "Doc" - } - - fields["proto"] = protofy("collectionProto") - fields["data"] = listify(proxify_guids(view_guids)) - fields["schemaColumns"] = listify(display_fields) - fields["backgroundColor"] = "white" - fields["scale"] = 0.5 - fields["viewType"] = 2 - fields["author"] = "Bill Buxton" - fields["creationDate"] = { - "date": datetime.datetime.utcnow().microsecond, - "__type": "date" - } - fields["isPrototype"] = True - fields["page"] = -1 - - db.newDocuments.insert_one(data_doc) - db.newDocuments.insert_one(view_doc) - - data_doc_guid = data_doc["_id"] - print(f"inserted view document ({view_doc_guid})") - print(f"inserted data document ({data_doc_guid})\n") - - return view_doc_guid - - -def write_image(folder, name): - path = f"http://localhost:1050/files/{folder}/{name}" - - data_doc_guid = guid() - view_doc_guid = guid() - - view_doc = { - "_id": view_doc_guid, - "fields": { - "proto": protofy(data_doc_guid), - "x": 10, - "y": 10, - "width": 300, - "zIndex": 2, - "libraryBrush": False - }, - "__type": "Doc" - } - - image = Image.open(f"{dist}/{folder}/{name}") - native_width, native_height = image.size - - data_doc = { - "_id": data_doc_guid, - "fields": { - "proto": protofy("imageProto"), - "data": { - "url": path, - "__type": "image" - }, - "title": name, - "nativeWidth": native_width, - "author": "Bill Buxton", - "creationDate": { - "date": datetime.datetime.utcnow().microsecond, - "__type": "date" - }, - "isPrototype": True, - "page": -1, - "nativeHeight": native_height, - "height": native_height - }, - "__type": "Doc" - } - - db.newDocuments.insert_one(view_doc) - db.newDocuments.insert_one(data_doc) - - return view_doc_guid - - -def parse_document(file_name: str): - print(f"parsing {file_name}...") - pure_name = file_name.split(".")[0] - - result = {} - - dir_path = dist + "/" + pure_name - mkdir_if_absent(dir_path) - - raw = str(docx2txt.process(source + "/" + file_name, dir_path)) - - view_guids = [] - count = 0 - for image in os.listdir(dir_path): - count += 1 - view_guids.append(write_image(pure_name, image)) - os.rename(dir_path + "/" + image, dir_path + - "/" + image.replace(".", "_m.", 1)) - print(f"extracted {count} images...") - - def sanitize(line): return re.sub("[\n\t]+", "", line).replace(u"\u00A0", " ").replace( - u"\u2013", "-").replace(u"\u201c", '''"''').replace(u"\u201d", '''"''').strip() - - def sanitize_price(raw: str): - raw = raw.replace(",", "") - start = raw.find("$") - if start > -1: - i = start + 1 - while (i < len(raw) and re.match(r"[0-9\.]", raw[i])): - i += 1 - price = raw[start + 1: i + 1] - return float(price) - elif (raw.lower().find("nfs")): - return -1 - else: - return math.nan - - def remove_empty(line): return len(line) > 1 - - lines = list(map(sanitize, raw.split("\n"))) - lines = list(filter(remove_empty, lines)) - - result["file_name"] = file_name - result["title"] = lines[2].strip() - result["short_description"] = lines[3].strip().replace( - "Short Description: ", "") - - cur = 5 - notes = "" - while lines[cur] != "Device Details": - notes += lines[cur] + " " - cur += 1 - result["buxton_notes"] = notes.strip() - - cur += 1 - clean = list( - map(lambda data: data.strip().split(":"), lines[cur].split("|"))) - result["company"] = clean[0][len(clean[0]) - 1].strip() - result["year"] = clean[1][len(clean[1]) - 1].strip() - result["original_price"] = sanitize_price( - clean[2][len(clean[2]) - 1].strip()) - - cur += 1 - result["degrees_of_freedom"] = extract_value( - lines[cur]).replace("NA", "N/A") - cur += 1 - - dimensions = lines[cur].lower() - if dimensions.startswith("dimensions"): - dim_concat = dimensions[11:].strip() - cur += 1 - while lines[cur] != "Key Words": - dim_concat += (" " + lines[cur].strip()) - cur += 1 - result["dimensions"] = dim_concat - else: - result["dimensions"] = "N/A" - - cur += 1 - result["primary_key"] = extract_value(lines[cur]) - cur += 1 - result["secondary_key"] = extract_value(lines[cur]) - - while lines[cur] != "Links": - result["secondary_key"] += (" " + extract_value(lines[cur]).strip()) - cur += 1 - - cur += 1 - link_descriptions = [] - while lines[cur] != "Image": - link_descriptions.append(lines[cur].strip()) - cur += 1 - result["link_descriptions"] = listify(link_descriptions) - - result["hyperlinks"] = extract_links(source + "/" + file_name) - - images = [] - captions = [] - cur += 3 - while cur + 1 < len(lines) and lines[cur] != "NOTES:": - images.append(lines[cur]) - captions.append(lines[cur + 1]) - cur += 2 - result["images"] = listify(images) - result["captions"] = listify(captions) - - notes = [] - if (cur < len(lines) and lines[cur] == "NOTES:"): - cur += 1 - while cur < len(lines): - notes.append(lines[cur]) - cur += 1 - if len(notes) > 0: - result["notes"] = listify(notes) - - print("writing child schema...") - - return { - "schema": { - "_id": guid(), - "fields": result, - "__type": "Doc" - }, - "child_guids": view_guids - } - - -def proxify_guids(guids): - return list(map(lambda guid: {"fieldId": guid, "__type": "proxy"}, guids)) - - -if os.path.exists(dist): - shutil.rmtree(dist) -while os.path.exists(dist): - pass -os.mkdir(dist) -mkdir_if_absent(source) - -candidates = 0 -for file_name in os.listdir(source): - if file_name.endswith('.docx'): - candidates += 1 - schema_guids.append(write_schema( - parse_document(file_name), ["title", "data"])) - -print("writing parent schema...") -parent_guid = write_schema({ - "schema": { - "_id": guid(), - "fields": {}, - "__type": "Doc" - }, - "child_guids": schema_guids -}, ["title", "short_description", "original_price"]) - -print("appending parent schema to main workspace...\n") -db.newDocuments.update_one( - {"fields.title": "WS collection 1"}, - {"$push": {"fields.data.fields": {"fieldId": parent_guid, "__type": "proxy"}}} -) - -print("rewriting .gitignore...\n") -lines = ['*', '!.gitignore'] -with open(dist + "/.gitignore", 'w') as f: - f.write('\n'.join(lines)) - -suffix = "" if candidates == 1 else "s" -print(f"conversion complete. {candidates} candidate{suffix} processed.") diff --git a/src/buxton/source/Bill_Notes_Bill_Notes_CyKey.docx b/src/buxton/source/Bill_Notes_Bill_Notes_CyKey.docx deleted file mode 100644 index 06094b4d3..000000000 Binary files a/src/buxton/source/Bill_Notes_Bill_Notes_CyKey.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_Braun_T3.docx b/src/buxton/source/Bill_Notes_Braun_T3.docx deleted file mode 100644 index 356697092..000000000 Binary files a/src/buxton/source/Bill_Notes_Braun_T3.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_CasioC801.docx b/src/buxton/source/Bill_Notes_CasioC801.docx deleted file mode 100644 index cd89fb97b..000000000 Binary files a/src/buxton/source/Bill_Notes_CasioC801.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_Casio_Mini.docx b/src/buxton/source/Bill_Notes_Casio_Mini.docx deleted file mode 100644 index a503cddfc..000000000 Binary files a/src/buxton/source/Bill_Notes_Casio_Mini.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_FingerWorks_Prototype.docx b/src/buxton/source/Bill_Notes_FingerWorks_Prototype.docx deleted file mode 100644 index 4d13a8cf5..000000000 Binary files a/src/buxton/source/Bill_Notes_FingerWorks_Prototype.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_Fingerworks_TouchStream.docx b/src/buxton/source/Bill_Notes_Fingerworks_TouchStream.docx deleted file mode 100644 index 578a1be08..000000000 Binary files a/src/buxton/source/Bill_Notes_Fingerworks_TouchStream.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_FrogPad.docx b/src/buxton/source/Bill_Notes_FrogPad.docx deleted file mode 100644 index d01e1bf5c..000000000 Binary files a/src/buxton/source/Bill_Notes_FrogPad.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_Gavilan_SC.docx b/src/buxton/source/Bill_Notes_Gavilan_SC.docx deleted file mode 100644 index 7bd28b376..000000000 Binary files a/src/buxton/source/Bill_Notes_Gavilan_SC.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_Grandjean_Stenotype.docx b/src/buxton/source/Bill_Notes_Grandjean_Stenotype.docx deleted file mode 100644 index 0615c4953..000000000 Binary files a/src/buxton/source/Bill_Notes_Grandjean_Stenotype.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_Matias.docx b/src/buxton/source/Bill_Notes_Matias.docx deleted file mode 100644 index 547603256..000000000 Binary files a/src/buxton/source/Bill_Notes_Matias.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_MousePen.docx b/src/buxton/source/Bill_Notes_MousePen.docx deleted file mode 100644 index 4e1056636..000000000 Binary files a/src/buxton/source/Bill_Notes_MousePen.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_NewO.docx b/src/buxton/source/Bill_Notes_NewO.docx deleted file mode 100644 index a514926d2..000000000 Binary files a/src/buxton/source/Bill_Notes_NewO.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_OLPC.docx b/src/buxton/source/Bill_Notes_OLPC.docx deleted file mode 100644 index bfca0a9bb..000000000 Binary files a/src/buxton/source/Bill_Notes_OLPC.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_PARCkbd.docx b/src/buxton/source/Bill_Notes_PARCkbd.docx deleted file mode 100644 index c0cf6ba9a..000000000 Binary files a/src/buxton/source/Bill_Notes_PARCkbd.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_Philco_Mystery_Control.docx b/src/buxton/source/Bill_Notes_Philco_Mystery_Control.docx deleted file mode 100644 index ad06903f3..000000000 Binary files a/src/buxton/source/Bill_Notes_Philco_Mystery_Control.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_TASA_Kbd.docx b/src/buxton/source/Bill_Notes_TASA_Kbd.docx deleted file mode 100644 index e4c659de9..000000000 Binary files a/src/buxton/source/Bill_Notes_TASA_Kbd.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_The_Tap.docx b/src/buxton/source/Bill_Notes_The_Tap.docx deleted file mode 100644 index 8ceebc71e..000000000 Binary files a/src/buxton/source/Bill_Notes_The_Tap.docx and /dev/null differ diff --git a/src/client/util/ClientUtils.ts b/src/client/util/ClientUtils.ts new file mode 100644 index 000000000..425bde14a --- /dev/null +++ b/src/client/util/ClientUtils.ts @@ -0,0 +1,4 @@ +//AUTO-GENERATED FILE: DO NOT EDIT +export namespace ClientUtils { + export const RELEASE = false; +} \ No newline at end of file diff --git a/src/scraping/acm/chromedriver.exe b/src/scraping/acm/chromedriver.exe new file mode 100644 index 000000000..6a362fd43 Binary files /dev/null and b/src/scraping/acm/chromedriver.exe differ diff --git a/src/scraping/acm/citations.txt b/src/scraping/acm/citations.txt new file mode 100644 index 000000000..e5018ddef --- /dev/null +++ b/src/scraping/acm/citations.txt @@ -0,0 +1,2 @@ +321046 +2412979 \ No newline at end of file diff --git a/src/scraping/acm/debug.log b/src/scraping/acm/debug.log new file mode 100644 index 000000000..8c0a148f4 --- /dev/null +++ b/src/scraping/acm/debug.log @@ -0,0 +1,38 @@ +[0625/170004.768:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022) +[0625/170004.769:ERROR:exception_snapshot_win.cc(98)] thread ID 17604 not found in process +[0625/171124.644:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022) +[0625/171124.645:ERROR:exception_snapshot_win.cc(98)] thread ID 14348 not found in process +[0625/171853.989:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022) +[0625/171853.990:ERROR:exception_snapshot_win.cc(98)] thread ID 12080 not found in process +[0625/171947.744:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022) +[0625/171947.745:ERROR:exception_snapshot_win.cc(98)] thread ID 16160 not found in process +[0625/172007.424:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022) +[0625/172007.425:ERROR:exception_snapshot_win.cc(98)] thread ID 13472 not found in process +[0625/172059.353:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022) +[0625/172059.354:ERROR:exception_snapshot_win.cc(98)] thread ID 6396 not found in process +[0625/172402.795:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022) +[0625/172402.796:ERROR:exception_snapshot_win.cc(98)] thread ID 10720 not found in process +[0625/172618.850:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022) +[0625/172618.850:ERROR:exception_snapshot_win.cc(98)] thread ID 21136 not found in process +[0625/172819.875:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022) +[0625/172819.876:ERROR:exception_snapshot_win.cc(98)] thread ID 17624 not found in process +[0625/172953.674:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022) +[0625/172953.675:ERROR:exception_snapshot_win.cc(98)] thread ID 15180 not found in process +[0625/173412.182:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022) +[0625/173412.182:ERROR:exception_snapshot_win.cc(98)] thread ID 13952 not found in process +[0625/173447.806:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022) +[0625/173447.807:ERROR:exception_snapshot_win.cc(98)] thread ID 1572 not found in process +[0625/173516.188:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022) +[0625/173516.189:ERROR:exception_snapshot_win.cc(98)] thread ID 5472 not found in process +[0625/173528.446:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022) +[0625/173528.447:ERROR:exception_snapshot_win.cc(98)] thread ID 20420 not found in process +[0625/173539.436:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022) +[0625/173539.437:ERROR:exception_snapshot_win.cc(98)] thread ID 16192 not found in process +[0625/173643.139:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022) +[0625/173643.140:ERROR:exception_snapshot_win.cc(98)] thread ID 15716 not found in process +[0625/173659.376:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022) +[0625/173659.377:ERROR:exception_snapshot_win.cc(98)] thread ID 11828 not found in process +[0625/201137.209:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022) +[0625/201137.210:ERROR:exception_snapshot_win.cc(98)] thread ID 7688 not found in process +[0625/210240.476:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022) +[0625/210240.477:ERROR:exception_snapshot_win.cc(98)] thread ID 20828 not found in process diff --git a/src/scraping/acm/index.js b/src/scraping/acm/index.js new file mode 100644 index 000000000..81f775617 --- /dev/null +++ b/src/scraping/acm/index.js @@ -0,0 +1,88 @@ +const { Builder, By, Key, until } = require('selenium-webdriver'); +const fs = require("fs"); + +let driver; +fs.readFile("./citations.txt", { encoding: "utf8" }, scrapeTargets); +results = [] + +async function scrapeTargets(error, data) { + if (error) { + console.log("\nUnable to collect target citations from a citations.txt file stored in this directory.\nPlease make sure one is provided.") + return; + } + + driver = await new Builder().forBrowser('chrome').build(); + + let references = data.split("\n").map(entry => entry.replace("\r", "")).filter(line => line.match(/\d+/g)); + + let results = [] + let pdfs = [] + for (let id of references) { + let result = {} + let lines = [] + try { + let url = `https://dl.acm.org/citation.cfm?id=${id}`; + await driver.get(url); + await driver.sleep(500) + let candidates = await driver.findElements(By.xpath('.//a[contains(@href, "ft_gateway.cfm?id=")]')); + if (candidates.length > 0) { + pdfs.push(candidates[0]) + } + let webElements = await driver.findElements(By.id("abstract-body")) + for (let el of webElements) { + let text = await el.getText() + lines.push(text) + } + result.url = url + result.abstract = lines.join(" "); + await driver.findElement(By.xpath(`//*[@id="tab-1014-btnInnerEl"]/span`)).click() + await driver.sleep(500) + let authors = await driver.findElement(By.xpath('//*[@id="tabpanel-1009-body"]')).getText() + let sanitize = line => line.length > 0 && !(line.startsWith("No contact information") || line.startsWith("View colleagues of") || line.startsWith("Bibliometrics:")) + authorLines = authors.split("\n").map(line => line.trim()).filter(sanitize) + + let i = 0; + let allAuthors = [] + while (i < authorLines.length) { + let individual = []; + while (!authorLines[i].startsWith("Average citations")) { + individual.push(authorLines[i]) + i++ + } + individual.push(authorLines[i]) + allAuthors.push(individual); + i++ + } + result.authors = allAuthors.map(metadata => { + let publicationYears = metadata[1].substring(18).split("-"); + author = { + name: metadata[0], + publication_start: parseInt(publicationYears[0]), + publication_end: parseInt(publicationYears[1]) + }; + for (let count = 2; count < metadata.length; count++) { + let attr = metadata[count] + let char = attr.length - 1; + while (attr[char] != " ") { + char-- + } + let key = attr.substring(0, char).toLowerCase().replace(/ /g, "_").replace(/[\(\)]/g, ""); + let value = parseFloat(attr.substring(char + 1).replace(/,/g, "")); + author[key] = value + } + return author + }) + } catch (e) { + console.log(e) + await driver.quit(); + } + results.push(result) + } + + let output = ""; + results.forEach(res => output += (JSON.stringify(res, null, 4) + "\n")); + + fs.writeFile("./results.txt", output, function errorHandler(exception) { console.log(exception || "results successfully written") }) + + await driver.quit(); +} \ No newline at end of file diff --git a/src/scraping/acm/package.json b/src/scraping/acm/package.json new file mode 100644 index 000000000..10f4d2156 --- /dev/null +++ b/src/scraping/acm/package.json @@ -0,0 +1,17 @@ +{ + "name": "scraper", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [], + "author": "", + "license": "ISC", + "dependencies": { + "axios": "^0.19.0", + "cheerio": "^1.0.0-rc.3", + "selenium-webdriver": "^4.0.0-alpha.4" + } +} diff --git a/src/scraping/acm/results.txt b/src/scraping/acm/results.txt new file mode 100644 index 000000000..05bb2be8b --- /dev/null +++ b/src/scraping/acm/results.txt @@ -0,0 +1,64 @@ +{ + "url": "https://dl.acm.org/citation.cfm?id=321046", + "abstract": "It has been observed by many people that a striking number of quite diverse mathematical problems can be formulated as problems in integer programming, that is, linear programming problems in which some or all of the variables are required to assume integral values. This fact is rendered quite interesting by recent research on such problems, notably by R. E. Gomory [2, 3], which gives promise of yielding efficient computational techniques for their solution. The present paper provides yet another example of the versatility of integer programming as a mathematical modeling device by representing a generalization of the well-known “Travelling Salesman Problem” in integer programming terms. The authors have developed several such models, of which the one presented here is the most efficient in terms of generality, number of variables, and number of constraints. This model is due to the second author [4] and was presented briefly at the Symposium on Combinatorial Problems held at Princeton University, April 1960, sponsored by SIAM and IBM. The problem treated is: (1) A salesman is required to visit each of n cities, indexed by 1, … , n. He leaves from a “base city” indexed by 0, visits each of the n other cities exactly once, and returns to city 0. During his travels he must return to 0 exactly t times, including his final return (here t may be allowed to vary), and he must visit no more than p cities in one tour. (By a tour we mean a succession of visits to cities without stopping at city 0.) It is required to find such an itinerary which minimizes the total distance traveled by the salesman. Note that if t is fixed, then for the problem to have a solution we must have tp ≧ n. For t = 1, p ≧ n, we have the standard traveling salesman problem. Let dij (i ≠ j = 0, 1, … , n) be the distance covered in traveling from city i to city j. The following integer programming problem will be shown to be equivalent to (1): (2) Minimize the linear form ∑0≦i≠j≦n∑ dijxij over the set determined by the relations ∑ni=0i≠j xij = 1 (j = 1, … , n) ∑nj=0j≠i xij = 1 (i = 1, … , n) ui - uj + pxij ≦ p - 1 (1 ≦ i ≠ j ≦ n) where the xij are non-negative integers and the ui (i = 1, …, n) are arbitrary real numbers. (We shall see that it is permissible to restrict the ui to be non-negative integers as well.) If t is fixed it is necessary to add the additional relation: ∑nu=1 xi0 = t Note that the constraints require that xij = 0 or 1, so that a natural correspondence between these two problems exists if the xij are interpreted as follows: The salesman proceeds from city i to city j if and only if xij = 1. Under this correspondence the form to be minimized in (2) is the total distance to be traveled by the salesman in (1), so the burden of proof is to show that the two feasible sets correspond; i.e., a feasible solution to (2) has xij which do define a legitimate itinerary in (1), and, conversely a legitimate itinerary in (1) defines xij, which, together with appropriate ui, satisfy the constraints of (2). Consider a feasible solution to (2). The number of returns to city 0 is given by ∑ni=1 xi0. The constraints of the form ∑ xij = 1, all xij non-negative integers, represent the conditions that each city (other than zero) is visited exactly once. The ui play a role similar to node potentials in a network and the inequalities involving them serve to eliminate tours that do not begin and end at city 0 and tours that visit more than p cities. Consider any xr0r1 = 1 (r1 ≠ 0). There exists a unique r2 such that xr1r2 = 1. Unless r2 = 0, there is a unique r3 with xr2r3 = 1. We proceed in this fashion until some rj = 0. This must happen since the alternative is that at some point we reach an rk = rj, j + 1 < k. Since none of the r's are zero we have uri - uri + 1 + pxriri + 1 ≦ p - 1 or uri - uri + 1 ≦ - 1. Summing from i = j to k - 1, we have urj - urk = 0 ≦ j + 1 - k, which is a contradiction. Thus all tours include city 0. It remains to observe that no tours is of length greater than p. Suppose such a tour exists, x0r1 , xr1r2 , … , xrprp+1 = 1 with all ri ≠ 0. Then, as before, ur1 - urp+1 ≦ - p or urp+1 - ur1 ≧ p. But we have urp+1 - ur1 + pxrp+1r1 ≦ p - 1 or urp+1 - ur1 ≦ p (1 - xrp+1r1) - 1 ≦ p - 1, which is a contradiction. Conversely, if the xij correspond to a legitimate itinerary, it is clear that the ui can be adjusted so that ui = j if city i is the jth city visited in the tour which includes city i, for we then have ui - uj = - 1 if xij = 1, and always ui - uj ≦ p - 1. The above integer program involves n2 + n constraints (if t is not fixed) in n2 + 2n variables. Since the inequality form of constraint is fundamental for integer programming calculations, one may eliminate 2n variables, say the xi0 and x0j, by means of the equation constraints and produce an equivalent problem with n2 + n inequalities and n2 variables. The currently known integer programming procedures are sufficiently regular in their behavior to cast doubt on the heuristic value of machine experiments with our model. However, it seems appropriate to report the results of the five machine experiments we have conducted so far. The solution procedure used was the all-integer algorithm of R. E. Gomory [3] without the ranking procedure he describes. The first three experiments were simple model verification tests on a four-city standard traveling salesman problem with distance matrix [ 20 23 4 30 7 27 25 5 25 3 21 26 ] The first experiment was with a model, now obsolete, using roughly twice as many constraints and variables as the current model (for this problem, 28 constraints in 21 variables). The machine was halted after 4000 pivot steps had failed to produce a solution. The second experiment used the earlier model with the xi0 and x0j eliminated, resulting in a 28-constraint, 15-variable problem. Here the machine produced the optimal solution in 41 pivot steps. The third experiment used the current formulation with the xi0 and x0j eliminated, yielding 13 constraints and 9 variables. The optimal solution was reached in 7 pivot steps. The fourth and fifth experiments were used on a standard ten-city problem, due to Barachet, solved by Dantzig, Johnson and Fulkerson [1]. The current formulation was used, yielding 91 constraints in 81 variables. The fifth problem differed from the fourth only in that the ordering of the rows was altered to attempt to introduce more favorable pivot choices. In each case the machine was stopped after over 250 pivot steps had failed to produce the solution. In each case the last 100 pivot steps had failed to change the value of the objective function. It seems hopeful that more efficient integer programming procedures now under development will yield a satisfactory algorithmic solution to the traveling salesman problem, when applied to this model. In any case, the model serves to illustrate how problems of this sort may be succinctly formulated in integer programming terms.", + "authors": [ + { + "name": "C. E. Miller", + "publication_start": 1960, + "publication_end": 1960, + "publication_count": 1, + "citation_count": 179, + "available_for_download": 1, + "downloads_6_weeks": 132, + "downloads_12_months": 993, + "downloads_cumulative": 9781, + "average_downloads_per_article": 9781, + "average_citations_per_article": 179 + }, + { + "name": "A. W. Tucker", + "publication_start": 1960, + "publication_end": 1993, + "publication_count": 5, + "citation_count": 196, + "available_for_download": 1, + "downloads_6_weeks": 132, + "downloads_12_months": 993, + "downloads_cumulative": 9781, + "average_downloads_per_article": 9781, + "average_citations_per_article": 39.2 + }, + { + "name": "R. A. Zemlin", + "publication_start": 1960, + "publication_end": 1964, + "publication_count": 2, + "citation_count": 188, + "available_for_download": 2, + "downloads_6_weeks": 132, + "downloads_12_months": 998, + "downloads_cumulative": 10012, + "average_downloads_per_article": 5006, + "average_citations_per_article": 94 + } + ] +} +{ + "url": "https://dl.acm.org/citation.cfm?id=2412979", + "abstract": "The STRUCT system utilizes the flexibility of a powerful graphics display system to provide a set of tools for program analysis. These tools allow the analysis of the static prograin structure and the dynamic execution behavior. of programs within the entire operating system/user program environment of the Brown University Graphics System (BUGS). Information is collected and presented in a manner which fully exploits two aspects of this environment. First, the operating system has been developed in a well-structured hierarcal manner following principles laid down by other researchers (2), (3). Second the programs under analysis have been written in a structured programming language following coding conventions which make available, at the source code level, valuable program control information. A new set of pictorial constructs is introduced for presenting a. program structure (static or dynamic) for inspection. These constructs combine the best features of an indented structured source code listing and the box odented nature of traditional flow charts. The graphical tools available are USed to provide for swift changes in. the desired level of detail displayed within a program structure, for traveling linearly through a program structure, for traveling through a complex program structure (following subroutine or system calls), for concurrently viewing multiple related program structures, and for presenting dynamic program behavior data using three-dimensional projections, The volume of a three-dimensional box representing a program block is proportional to the block's resource utilization. The scope of this paper is limited to a description of the STRUCT system. This system is currently being used to predict and analyze the performance advantages available through the migration of function (program modules) between levels of software and between software and firmware within BUGS. The results of this research on migration will be included in a doctoral dissertation currently being written.", + "authors": [ + { + "name": "Andries Van Dam", + "publication_start": 1975, + "publication_end": 1975, + "publication_count": 1, + "citation_count": 0, + "available_for_download": 0, + "downloads_6_weeks": 8, + "downloads_12_months": 97, + "downloads_cumulative": 97, + "average_downloads_per_article": 0, + "average_citations_per_article": 0 + } + ] +} diff --git a/src/scraping/buxton/scraper.py b/src/scraping/buxton/scraper.py new file mode 100644 index 000000000..97af10519 --- /dev/null +++ b/src/scraping/buxton/scraper.py @@ -0,0 +1,331 @@ +import os +import docx2txt +from docx import Document +from docx.opc.constants import RELATIONSHIP_TYPE as RT +import re +from pymongo import MongoClient +import shutil +import uuid +import datetime +from PIL import Image +import math +import sys + +source = "./source" +dist = "../../server/public/files" + +db = MongoClient("localhost", 27017)["Dash"] +schema_guids = [] + + +def extract_links(fileName): + links = [] + doc = Document(fileName) + rels = doc.part.rels + for rel in rels: + item = rels[rel] + if item.reltype == RT.HYPERLINK and ".aspx" not in item._target: + links.append(item._target) + return listify(links) + + +def extract_value(kv_string): + pieces = kv_string.split(":") + return (pieces[1] if len(pieces) > 1 else kv_string).strip() + + +def mkdir_if_absent(path): + try: + if not os.path.exists(path): + os.mkdir(path) + except OSError: + print("failed to create the appropriate directory structures for %s" % file_name) + + +def guid(): + return str(uuid.uuid4()) + + +def listify(list): + return { + "fields": list, + "__type": "list" + } + + +def protofy(fieldId): + return { + "fieldId": fieldId, + "__type": "proxy" + } + + +def write_schema(parse_results, display_fields): + view_guids = parse_results["child_guids"] + + data_doc = parse_results["schema"] + fields = data_doc["fields"] + + view_doc_guid = guid() + + view_doc = { + "_id": view_doc_guid, + "fields": { + "proto": protofy(data_doc["_id"]), + "x": 10, + "y": 10, + "width": 900, + "height": 600, + "panX": 0, + "panY": 0, + "zoomBasis": 0.5, + "zIndex": 2, + "libraryBrush": False, + "viewType": 2 + }, + "__type": "Doc" + } + + fields["proto"] = protofy("collectionProto") + fields["data"] = listify(proxify_guids(view_guids)) + fields["schemaColumns"] = listify(display_fields) + fields["backgroundColor"] = "white" + fields["scale"] = 0.5 + fields["viewType"] = 2 + fields["author"] = "Bill Buxton" + fields["creationDate"] = { + "date": datetime.datetime.utcnow().microsecond, + "__type": "date" + } + fields["isPrototype"] = True + fields["page"] = -1 + + db.newDocuments.insert_one(data_doc) + db.newDocuments.insert_one(view_doc) + + data_doc_guid = data_doc["_id"] + print(f"inserted view document ({view_doc_guid})") + print(f"inserted data document ({data_doc_guid})\n") + + return view_doc_guid + + +def write_image(folder, name): + path = f"http://localhost:1050/files/{folder}/{name}" + + data_doc_guid = guid() + view_doc_guid = guid() + + view_doc = { + "_id": view_doc_guid, + "fields": { + "proto": protofy(data_doc_guid), + "x": 10, + "y": 10, + "width": 300, + "zIndex": 2, + "libraryBrush": False + }, + "__type": "Doc" + } + + image = Image.open(f"{dist}/{folder}/{name}") + native_width, native_height = image.size + + data_doc = { + "_id": data_doc_guid, + "fields": { + "proto": protofy("imageProto"), + "data": { + "url": path, + "__type": "image" + }, + "title": name, + "nativeWidth": native_width, + "author": "Bill Buxton", + "creationDate": { + "date": datetime.datetime.utcnow().microsecond, + "__type": "date" + }, + "isPrototype": True, + "page": -1, + "nativeHeight": native_height, + "height": native_height + }, + "__type": "Doc" + } + + db.newDocuments.insert_one(view_doc) + db.newDocuments.insert_one(data_doc) + + return view_doc_guid + + +def parse_document(file_name: str): + print(f"parsing {file_name}...") + pure_name = file_name.split(".")[0] + + result = {} + + dir_path = dist + "/" + pure_name + mkdir_if_absent(dir_path) + + raw = str(docx2txt.process(source + "/" + file_name, dir_path)) + + view_guids = [] + count = 0 + for image in os.listdir(dir_path): + count += 1 + view_guids.append(write_image(pure_name, image)) + os.rename(dir_path + "/" + image, dir_path + + "/" + image.replace(".", "_m.", 1)) + print(f"extracted {count} images...") + + def sanitize(line): return re.sub("[\n\t]+", "", line).replace(u"\u00A0", " ").replace( + u"\u2013", "-").replace(u"\u201c", '''"''').replace(u"\u201d", '''"''').strip() + + def sanitize_price(raw: str): + raw = raw.replace(",", "") + start = raw.find("$") + if start > -1: + i = start + 1 + while (i < len(raw) and re.match(r"[0-9\.]", raw[i])): + i += 1 + price = raw[start + 1: i + 1] + return float(price) + elif (raw.lower().find("nfs")): + return -1 + else: + return math.nan + + def remove_empty(line): return len(line) > 1 + + lines = list(map(sanitize, raw.split("\n"))) + lines = list(filter(remove_empty, lines)) + + result["file_name"] = file_name + result["title"] = lines[2].strip() + result["short_description"] = lines[3].strip().replace( + "Short Description: ", "") + + cur = 5 + notes = "" + while lines[cur] != "Device Details": + notes += lines[cur] + " " + cur += 1 + result["buxton_notes"] = notes.strip() + + cur += 1 + clean = list( + map(lambda data: data.strip().split(":"), lines[cur].split("|"))) + result["company"] = clean[0][len(clean[0]) - 1].strip() + result["year"] = clean[1][len(clean[1]) - 1].strip() + result["original_price"] = sanitize_price( + clean[2][len(clean[2]) - 1].strip()) + + cur += 1 + result["degrees_of_freedom"] = extract_value( + lines[cur]).replace("NA", "N/A") + cur += 1 + + dimensions = lines[cur].lower() + if dimensions.startswith("dimensions"): + dim_concat = dimensions[11:].strip() + cur += 1 + while lines[cur] != "Key Words": + dim_concat += (" " + lines[cur].strip()) + cur += 1 + result["dimensions"] = dim_concat + else: + result["dimensions"] = "N/A" + + cur += 1 + result["primary_key"] = extract_value(lines[cur]) + cur += 1 + result["secondary_key"] = extract_value(lines[cur]) + + while lines[cur] != "Links": + result["secondary_key"] += (" " + extract_value(lines[cur]).strip()) + cur += 1 + + cur += 1 + link_descriptions = [] + while lines[cur] != "Image": + link_descriptions.append(lines[cur].strip()) + cur += 1 + result["link_descriptions"] = listify(link_descriptions) + + result["hyperlinks"] = extract_links(source + "/" + file_name) + + images = [] + captions = [] + cur += 3 + while cur + 1 < len(lines) and lines[cur] != "NOTES:": + images.append(lines[cur]) + captions.append(lines[cur + 1]) + cur += 2 + result["images"] = listify(images) + result["captions"] = listify(captions) + + notes = [] + if (cur < len(lines) and lines[cur] == "NOTES:"): + cur += 1 + while cur < len(lines): + notes.append(lines[cur]) + cur += 1 + if len(notes) > 0: + result["notes"] = listify(notes) + + print("writing child schema...") + + return { + "schema": { + "_id": guid(), + "fields": result, + "__type": "Doc" + }, + "child_guids": view_guids + } + + +def proxify_guids(guids): + return list(map(lambda guid: {"fieldId": guid, "__type": "proxy"}, guids)) + + +if os.path.exists(dist): + shutil.rmtree(dist) +while os.path.exists(dist): + pass +os.mkdir(dist) +mkdir_if_absent(source) + +candidates = 0 +for file_name in os.listdir(source): + if file_name.endswith('.docx'): + candidates += 1 + schema_guids.append(write_schema( + parse_document(file_name), ["title", "data"])) + +print("writing parent schema...") +parent_guid = write_schema({ + "schema": { + "_id": guid(), + "fields": {}, + "__type": "Doc" + }, + "child_guids": schema_guids +}, ["title", "short_description", "original_price"]) + +print("appending parent schema to main workspace...\n") +db.newDocuments.update_one( + {"fields.title": "WS collection 1"}, + {"$push": {"fields.data.fields": {"fieldId": parent_guid, "__type": "proxy"}}} +) + +print("rewriting .gitignore...\n") +lines = ['*', '!.gitignore'] +with open(dist + "/.gitignore", 'w') as f: + f.write('\n'.join(lines)) + +suffix = "" if candidates == 1 else "s" +print(f"conversion complete. {candidates} candidate{suffix} processed.") diff --git a/src/scraping/buxton/source/Bill_Notes_Bill_Notes_CyKey.docx b/src/scraping/buxton/source/Bill_Notes_Bill_Notes_CyKey.docx new file mode 100644 index 000000000..06094b4d3 Binary files /dev/null and b/src/scraping/buxton/source/Bill_Notes_Bill_Notes_CyKey.docx differ diff --git a/src/scraping/buxton/source/Bill_Notes_Braun_T3.docx b/src/scraping/buxton/source/Bill_Notes_Braun_T3.docx new file mode 100644 index 000000000..356697092 Binary files /dev/null and b/src/scraping/buxton/source/Bill_Notes_Braun_T3.docx differ diff --git a/src/scraping/buxton/source/Bill_Notes_CasioC801.docx b/src/scraping/buxton/source/Bill_Notes_CasioC801.docx new file mode 100644 index 000000000..cd89fb97b Binary files /dev/null and b/src/scraping/buxton/source/Bill_Notes_CasioC801.docx differ diff --git a/src/scraping/buxton/source/Bill_Notes_Casio_Mini.docx b/src/scraping/buxton/source/Bill_Notes_Casio_Mini.docx new file mode 100644 index 000000000..a503cddfc Binary files /dev/null and b/src/scraping/buxton/source/Bill_Notes_Casio_Mini.docx differ diff --git a/src/scraping/buxton/source/Bill_Notes_FingerWorks_Prototype.docx b/src/scraping/buxton/source/Bill_Notes_FingerWorks_Prototype.docx new file mode 100644 index 000000000..4d13a8cf5 Binary files /dev/null and b/src/scraping/buxton/source/Bill_Notes_FingerWorks_Prototype.docx differ diff --git a/src/scraping/buxton/source/Bill_Notes_Fingerworks_TouchStream.docx b/src/scraping/buxton/source/Bill_Notes_Fingerworks_TouchStream.docx new file mode 100644 index 000000000..578a1be08 Binary files /dev/null and b/src/scraping/buxton/source/Bill_Notes_Fingerworks_TouchStream.docx differ diff --git a/src/scraping/buxton/source/Bill_Notes_FrogPad.docx b/src/scraping/buxton/source/Bill_Notes_FrogPad.docx new file mode 100644 index 000000000..d01e1bf5c Binary files /dev/null and b/src/scraping/buxton/source/Bill_Notes_FrogPad.docx differ diff --git a/src/scraping/buxton/source/Bill_Notes_Gavilan_SC.docx b/src/scraping/buxton/source/Bill_Notes_Gavilan_SC.docx new file mode 100644 index 000000000..7bd28b376 Binary files /dev/null and b/src/scraping/buxton/source/Bill_Notes_Gavilan_SC.docx differ diff --git a/src/scraping/buxton/source/Bill_Notes_Grandjean_Stenotype.docx b/src/scraping/buxton/source/Bill_Notes_Grandjean_Stenotype.docx new file mode 100644 index 000000000..0615c4953 Binary files /dev/null and b/src/scraping/buxton/source/Bill_Notes_Grandjean_Stenotype.docx differ diff --git a/src/scraping/buxton/source/Bill_Notes_Matias.docx b/src/scraping/buxton/source/Bill_Notes_Matias.docx new file mode 100644 index 000000000..547603256 Binary files /dev/null and b/src/scraping/buxton/source/Bill_Notes_Matias.docx differ diff --git a/src/scraping/buxton/source/Bill_Notes_MousePen.docx b/src/scraping/buxton/source/Bill_Notes_MousePen.docx new file mode 100644 index 000000000..4e1056636 Binary files /dev/null and b/src/scraping/buxton/source/Bill_Notes_MousePen.docx differ diff --git a/src/scraping/buxton/source/Bill_Notes_NewO.docx b/src/scraping/buxton/source/Bill_Notes_NewO.docx new file mode 100644 index 000000000..a514926d2 Binary files /dev/null and b/src/scraping/buxton/source/Bill_Notes_NewO.docx differ diff --git a/src/scraping/buxton/source/Bill_Notes_OLPC.docx b/src/scraping/buxton/source/Bill_Notes_OLPC.docx new file mode 100644 index 000000000..bfca0a9bb Binary files /dev/null and b/src/scraping/buxton/source/Bill_Notes_OLPC.docx differ diff --git a/src/scraping/buxton/source/Bill_Notes_PARCkbd.docx b/src/scraping/buxton/source/Bill_Notes_PARCkbd.docx new file mode 100644 index 000000000..c0cf6ba9a Binary files /dev/null and b/src/scraping/buxton/source/Bill_Notes_PARCkbd.docx differ diff --git a/src/scraping/buxton/source/Bill_Notes_Philco_Mystery_Control.docx b/src/scraping/buxton/source/Bill_Notes_Philco_Mystery_Control.docx new file mode 100644 index 000000000..ad06903f3 Binary files /dev/null and b/src/scraping/buxton/source/Bill_Notes_Philco_Mystery_Control.docx differ diff --git a/src/scraping/buxton/source/Bill_Notes_TASA_Kbd.docx b/src/scraping/buxton/source/Bill_Notes_TASA_Kbd.docx new file mode 100644 index 000000000..e4c659de9 Binary files /dev/null and b/src/scraping/buxton/source/Bill_Notes_TASA_Kbd.docx differ diff --git a/src/scraping/buxton/source/Bill_Notes_The_Tap.docx b/src/scraping/buxton/source/Bill_Notes_The_Tap.docx new file mode 100644 index 000000000..8ceebc71e Binary files /dev/null and b/src/scraping/buxton/source/Bill_Notes_The_Tap.docx differ -- cgit v1.2.3-70-g09d2 From 490f6843d3523260e6dce81de24676908c4d4b60 Mon Sep 17 00:00:00 2001 From: tschicke-brown Date: Fri, 12 Jul 2019 11:16:17 -0400 Subject: Delete ClientUtils.ts --- src/client/util/ClientUtils.ts | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 src/client/util/ClientUtils.ts (limited to 'src/client/util') diff --git a/src/client/util/ClientUtils.ts b/src/client/util/ClientUtils.ts deleted file mode 100644 index 425bde14a..000000000 --- a/src/client/util/ClientUtils.ts +++ /dev/null @@ -1,4 +0,0 @@ -//AUTO-GENERATED FILE: DO NOT EDIT -export namespace ClientUtils { - export const RELEASE = false; -} \ No newline at end of file -- cgit v1.2.3-70-g09d2