aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java99
-rw-r--r--src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java65
-rw-r--r--src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java59
-rw-r--r--src/test/java/edu/brown/cs/student/FilingTest.java82
4 files changed, 288 insertions, 17 deletions
diff --git a/src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java b/src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java
new file mode 100644
index 0000000..d41e918
--- /dev/null
+++ b/src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java
@@ -0,0 +1,99 @@
+package edu.brown.cs.student.term.parsing;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+
+import java.time.Instant;
+import java.time.ZonedDateTime;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+
+import static org.junit.Assert.assertEquals;
+
+public class FilingFeed {
+ private final List<Filing> filings;
+
+ public FilingFeed(Document document) {
+ // Init array
+ filings = new ArrayList<>();
+
+ // Get all entries
+ NodeList entries = document.getElementsByTagName("entry");
+ for (int i = 0; i < entries.getLength(); i++) {
+ // Assertion allows the cast to be ok :)
+ assert entries.item(i).getNodeType() == Node.ELEMENT_NODE;
+ Element entry = (Element) entries.item(i);
+
+ // Get the timestamp from updated field
+ NodeList updated = entry.getElementsByTagName("updated");
+ // Assert at least one element.
+ assert updated.getLength() == 1;
+ String timestamp = updated.item(0).getTextContent();
+ ZonedDateTime zonedDateTime = ZonedDateTime.parse(timestamp);
+ Instant instant = zonedDateTime.toInstant();
+
+ NodeList link = entry.getElementsByTagName("link");
+ assertEquals(link.getLength(), 1);
+ String linkUrl = link.item(0).getAttributes().getNamedItem("href").getNodeValue();
+
+ filings.add(new Filing(getXmlUrl(linkUrl), instant));
+ }
+ }
+
+ private String getXmlUrl(String filingUrl) {
+ return filingUrl.replace("-index.htm", ".txt");
+ }
+
+ public List<Filing> getFilings() {
+ return filings;
+ }
+
+ public static class Filing {
+ // TODO: update to be immutable
+ private final String xmlUrl;
+ private final Instant timestamp;
+
+ public Filing(String xmlUrl, Instant timestamp) {
+ this.xmlUrl = xmlUrl;
+ this.timestamp = timestamp;
+ }
+
+ public Instant getTimestamp() {
+ return timestamp;
+ }
+
+ public String getXmlUrl() {
+ return xmlUrl;
+ }
+
+ @Override
+ public String toString() {
+ return "Filing{" +
+ "xmlUrl='" + xmlUrl + '\'' +
+ ", timestamp=" + timestamp +
+ '}';
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ Filing filing = (Filing) o;
+ return Objects.equals(xmlUrl, filing.xmlUrl) &&
+ Objects.equals(timestamp, filing.timestamp);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(xmlUrl, timestamp);
+ }
+ }
+
+}
diff --git a/src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java b/src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java
new file mode 100644
index 0000000..2b8016e
--- /dev/null
+++ b/src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java
@@ -0,0 +1,65 @@
+package edu.brown.cs.student.term.parsing;
+
+import org.w3c.dom.Document;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+import javax.print.Doc;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.StringReader;
+import java.net.URL;
+import java.net.URLConnection;
+
+public class TxtXmlParser extends XmlParser {
+
+ public TxtXmlParser() {
+ super();
+ }
+
+ /**
+ * Method used to parse the xml file.
+ *
+ * @param pathToXml The path to the xml text file.
+ * @return The tree structure parsed as an xml doc.
+ */
+ @Override
+ public Document parse(String pathToXml) {
+ try {
+ System.err.println("LOG: To make class for url: " + pathToXml + " in parse() of " + getClass());
+ URL url = new URL(pathToXml);
+ System.err.println("LOG: To establish urlConnection in parse() of " + getClass());
+ URLConnection conn = url.openConnection();
+ conn.addRequestProperty("User-Agent", "Chrome");
+ System.err.println("LOG: Making bufferedReader for url: " + pathToXml + " in " + getClass());
+ BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream()));
+
+ StringBuilder xmlParts = new StringBuilder();
+ boolean isXml = false;
+ String line;
+ while ((line = br.readLine()) != null) {
+ if (line.equals("</XML>")) {
+ break;
+ }
+
+ if (isXml) {
+ xmlParts.append(line);
+ }
+ if (line.equals("<XML>")) {
+ isXml = true;
+ }
+ }
+ System.err.println("LOG: Calling builder.parse() after extracting xml parts from: " + pathToXml + " in " + getClass());
+
+ InputSource xmlLines = new InputSource(new StringReader(xmlParts.toString()));
+ return builder.parse(xmlLines);
+ } catch (SAXException e) {
+ System.err.println("INTERNAL: SAX " + getClass() + " : " + e.getClass());
+ } catch (IOException e) {
+ e.printStackTrace();
+ System.err.println("INTERNAL: IO " + getClass() + " : " + e.getClass());
+ }
+ return null;
+ }
+}
diff --git a/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java b/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java
index 54f9fc0..a267322 100644
--- a/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java
+++ b/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java
@@ -2,8 +2,11 @@ package edu.brown.cs.student.term.repl.commands;
import edu.brown.cs.student.term.DatabaseQuerier;
import edu.brown.cs.student.term.Main;
+import edu.brown.cs.student.term.parsing.FilingFeed;
import edu.brown.cs.student.term.parsing.Transaction;
+import edu.brown.cs.student.term.parsing.TxtXmlParser;
import edu.brown.cs.student.term.parsing.UrlXmlParser;
+import edu.brown.cs.student.term.parsing.XmlParser;
import edu.brown.cs.student.term.repl.Command;
import edu.brown.cs.student.term.trade.Trade;
import org.json.JSONArray;
@@ -18,7 +21,8 @@ import java.time.ZonedDateTime;
public class LoadCommand implements Command {
private Connection conn;
- private final static UrlXmlParser URL_XML_PARSER = new UrlXmlParser();
+ private final static XmlParser URL_XML_PARSER = new UrlXmlParser();
+ private final static XmlParser TXT_XML_PARSER = new TxtXmlParser();
/**
* Main run method for every command.
@@ -27,23 +31,30 @@ public class LoadCommand implements Command {
*/
@Override
public String run(String[] args) {
- // TODO: add log comments
+ // param checking
+ if (args.length != 1){
+ return "ERROR: Incorrect number of arguments for load command";
+ }
+
+ int numFilings;
+ try {
+ numFilings = Integer.parseInt(args[0]);
+ } catch (NumberFormatException e) {
+ return "ERROR: Please input an integer.";
+ }
+
+
System.err.println("LOG: Entered .run() of " + getClass());
- // TODO: call to api for urls to call through the urlxmlparser from reagan
- if (Main.xmlLinks == null) {
- return "ERROR: Please load xml links from frontend.";
+ FilingFeed filings = getFilings(numFilings);
+ if (filings.getFilings().isEmpty()) {
+ System.err.println("WARNING: No filings loaded.");
}
conn = DatabaseQuerier.getConn();
- JSONArray data = Main.xmlLinks.getJSONArray("data");
- for(int i =0; i < data.length(); i++) {
- JSONObject link = data.optJSONObject(i);
-
- String timestamp = link.getString("timestamp");
- String url = link.getString("url");
+ for(FilingFeed.Filing filing : filings.getFilings()) {
try {
System.err.println("LOG: Calling loadTransactionIntoDB() in " + getClass());
- loadTransactionIntoDB(timestamp, url);
+ loadTransactionIntoDB(filing.getTimestamp(), filing.getXmlUrl());
} catch (SQLException throwables) {
System.err.println("INTERNAL: SQLException in .run() of " + getClass());
//throwables.printStackTrace();
@@ -53,19 +64,33 @@ public class LoadCommand implements Command {
return "Loaded?";
}
+ private FilingFeed getFilings(int numFilings) {
+ // TODO: make params more adjustable
+ String queryUrl = "https://www.sec.gov/cgi-bin/browse-edgar?" +
+ "action=getcurrent" +
+ "&CIK=" +
+ "&type=4" +
+ "&company=" +
+ "&dateb=" +
+ "&owner=only" +
+ "&start=0" +
+ "&count=" + numFilings +
+ "&output=atom";
+
+ Document document = URL_XML_PARSER.parse(queryUrl);
+ return new FilingFeed(document);
+ }
+
/**
* Loads a whole transaction, which can have multiple trades, into the DB.
* @param url The url to the public xml file.
* @throws SQLException If the prep statement fails or db doesn't exist, throws.
*/
- private void loadTransactionIntoDB(String timestamp, String url) throws SQLException {
+ private void loadTransactionIntoDB(Instant instant, String url) throws SQLException {
System.err.println("LOG: Parsing XML into transaction in loadTransactionIntoDB(). URL: " + url);
// TODO: check if this is right @julia
// TODO: add parse error handling...
- ZonedDateTime zonedDateTime = ZonedDateTime.parse(timestamp);
- Instant instant = zonedDateTime.toInstant();
-
- Document document = URL_XML_PARSER.parse(url);
+ Document document = TXT_XML_PARSER.parse(url);
if (document == null) {
System.err.println("WARNING: URL " + url + " failed to parse... continuing.");
return;
diff --git a/src/test/java/edu/brown/cs/student/FilingTest.java b/src/test/java/edu/brown/cs/student/FilingTest.java
new file mode 100644
index 0000000..a9b21d3
--- /dev/null
+++ b/src/test/java/edu/brown/cs/student/FilingTest.java
@@ -0,0 +1,82 @@
+package edu.brown.cs.student;
+
+import edu.brown.cs.student.term.parsing.LocalXmlParser;
+import edu.brown.cs.student.term.parsing.Transaction;
+import edu.brown.cs.student.term.parsing.TxtXmlParser;
+import edu.brown.cs.student.term.parsing.UrlXmlParser;
+import edu.brown.cs.student.term.parsing.XmlParser;
+import edu.brown.cs.student.term.trade.Trade;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+
+import javax.print.Doc;
+
+import static org.junit.Assert.*;
+
+public class FilingTest {
+ private XmlParser _xmlParser, _txtXmlParser;
+
+ @Before
+ public void setUp() {
+ _xmlParser = new UrlXmlParser();
+ _txtXmlParser = new TxtXmlParser();
+ }
+
+ @After
+ public void tearDown() {
+ _xmlParser = null;
+ _txtXmlParser = null;
+ }
+
+ @Test
+ public void seeWorks(){
+ setUp();
+
+ String url = "https://www.sec.gov/cgi-bin/browse-edgar?" +
+ "action=getcurrent" +
+ "&CIK=" +
+ "&type=4" +
+ "&company=" +
+ "&dateb=" +
+ "&owner=only" +
+ "&start=0" +
+ "&count=10" +
+ "&output=atom";
+
+ Document doc = _xmlParser.parse(url);
+ assertNotNull(doc);
+ NodeList entries = doc.getElementsByTagName("entry");
+ assertNotEquals(entries.getLength(), 0);
+ assertEquals(entries.item(0).getNodeType(), Node.ELEMENT_NODE);
+ for (int i = 0; i < entries.getLength(); i++) {
+ Element entry = (Element) entries.item(i);
+ NodeList link = entry.getElementsByTagName("link");
+ assertEquals(link.getLength(), 1);
+ String linkUrl = link.item(0).getAttributes().getNamedItem("href").getNodeValue();
+ System.out.println(linkUrl);
+
+ NodeList updated = entry.getElementsByTagName("updated");
+ assertEquals(link.getLength(), 1);
+ System.out.println(updated.item(0).getTextContent());
+ }
+
+ tearDown();
+ }
+
+ @Test
+ public void xmlUrlFromFilingUrl(){
+ setUp();
+
+ String url = "https://www.sec.gov/Archives/edgar/data/1597341/000141588921001958/0001415889-21-001958.txt";
+ Document doc = _txtXmlParser.parse(url);
+ assertNotNull(doc);
+ tearDown();
+ }
+
+
+}