diff options
Diffstat (limited to 'src')
4 files changed, 288 insertions, 17 deletions
diff --git a/src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java b/src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java new file mode 100644 index 0000000..d41e918 --- /dev/null +++ b/src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java @@ -0,0 +1,99 @@ +package edu.brown.cs.student.term.parsing; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +import java.time.Instant; +import java.time.ZonedDateTime; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +import static org.junit.Assert.assertEquals; + +public class FilingFeed { + private final List<Filing> filings; + + public FilingFeed(Document document) { + // Init array + filings = new ArrayList<>(); + + // Get all entries + NodeList entries = document.getElementsByTagName("entry"); + for (int i = 0; i < entries.getLength(); i++) { + // Assertion allows the cast to be ok :) + assert entries.item(i).getNodeType() == Node.ELEMENT_NODE; + Element entry = (Element) entries.item(i); + + // Get the timestamp from updated field + NodeList updated = entry.getElementsByTagName("updated"); + // Assert at least one element. + assert updated.getLength() == 1; + String timestamp = updated.item(0).getTextContent(); + ZonedDateTime zonedDateTime = ZonedDateTime.parse(timestamp); + Instant instant = zonedDateTime.toInstant(); + + NodeList link = entry.getElementsByTagName("link"); + assertEquals(link.getLength(), 1); + String linkUrl = link.item(0).getAttributes().getNamedItem("href").getNodeValue(); + + filings.add(new Filing(getXmlUrl(linkUrl), instant)); + } + } + + private String getXmlUrl(String filingUrl) { + return filingUrl.replace("-index.htm", ".txt"); + } + + public List<Filing> getFilings() { + return filings; + } + + public static class Filing { + // TODO: update to be immutable + private final String xmlUrl; + private final Instant timestamp; + + public Filing(String xmlUrl, Instant timestamp) { + this.xmlUrl = xmlUrl; + this.timestamp = timestamp; + } + + public Instant getTimestamp() { + return timestamp; + } + + public String getXmlUrl() { + return xmlUrl; + } + + @Override + public String toString() { + return "Filing{" + + "xmlUrl='" + xmlUrl + '\'' + + ", timestamp=" + timestamp + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + Filing filing = (Filing) o; + return Objects.equals(xmlUrl, filing.xmlUrl) && + Objects.equals(timestamp, filing.timestamp); + } + + @Override + public int hashCode() { + return Objects.hash(xmlUrl, timestamp); + } + } + +} diff --git a/src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java b/src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java new file mode 100644 index 0000000..2b8016e --- /dev/null +++ b/src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java @@ -0,0 +1,65 @@ +package edu.brown.cs.student.term.parsing; + +import org.w3c.dom.Document; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +import javax.print.Doc; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.StringReader; +import java.net.URL; +import java.net.URLConnection; + +public class TxtXmlParser extends XmlParser { + + public TxtXmlParser() { + super(); + } + + /** + * Method used to parse the xml file. + * + * @param pathToXml The path to the xml text file. + * @return The tree structure parsed as an xml doc. + */ + @Override + public Document parse(String pathToXml) { + try { + System.err.println("LOG: To make class for url: " + pathToXml + " in parse() of " + getClass()); + URL url = new URL(pathToXml); + System.err.println("LOG: To establish urlConnection in parse() of " + getClass()); + URLConnection conn = url.openConnection(); + conn.addRequestProperty("User-Agent", "Chrome"); + System.err.println("LOG: Making bufferedReader for url: " + pathToXml + " in " + getClass()); + BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream())); + + StringBuilder xmlParts = new StringBuilder(); + boolean isXml = false; + String line; + while ((line = br.readLine()) != null) { + if (line.equals("</XML>")) { + break; + } + + if (isXml) { + xmlParts.append(line); + } + if (line.equals("<XML>")) { + isXml = true; + } + } + System.err.println("LOG: Calling builder.parse() after extracting xml parts from: " + pathToXml + " in " + getClass()); + + InputSource xmlLines = new InputSource(new StringReader(xmlParts.toString())); + return builder.parse(xmlLines); + } catch (SAXException e) { + System.err.println("INTERNAL: SAX " + getClass() + " : " + e.getClass()); + } catch (IOException e) { + e.printStackTrace(); + System.err.println("INTERNAL: IO " + getClass() + " : " + e.getClass()); + } + return null; + } +} diff --git a/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java b/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java index 54f9fc0..a267322 100644 --- a/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java +++ b/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java @@ -2,8 +2,11 @@ package edu.brown.cs.student.term.repl.commands; import edu.brown.cs.student.term.DatabaseQuerier; import edu.brown.cs.student.term.Main; +import edu.brown.cs.student.term.parsing.FilingFeed; import edu.brown.cs.student.term.parsing.Transaction; +import edu.brown.cs.student.term.parsing.TxtXmlParser; import edu.brown.cs.student.term.parsing.UrlXmlParser; +import edu.brown.cs.student.term.parsing.XmlParser; import edu.brown.cs.student.term.repl.Command; import edu.brown.cs.student.term.trade.Trade; import org.json.JSONArray; @@ -18,7 +21,8 @@ import java.time.ZonedDateTime; public class LoadCommand implements Command { private Connection conn; - private final static UrlXmlParser URL_XML_PARSER = new UrlXmlParser(); + private final static XmlParser URL_XML_PARSER = new UrlXmlParser(); + private final static XmlParser TXT_XML_PARSER = new TxtXmlParser(); /** * Main run method for every command. @@ -27,23 +31,30 @@ public class LoadCommand implements Command { */ @Override public String run(String[] args) { - // TODO: add log comments + // param checking + if (args.length != 1){ + return "ERROR: Incorrect number of arguments for load command"; + } + + int numFilings; + try { + numFilings = Integer.parseInt(args[0]); + } catch (NumberFormatException e) { + return "ERROR: Please input an integer."; + } + + System.err.println("LOG: Entered .run() of " + getClass()); - // TODO: call to api for urls to call through the urlxmlparser from reagan - if (Main.xmlLinks == null) { - return "ERROR: Please load xml links from frontend."; + FilingFeed filings = getFilings(numFilings); + if (filings.getFilings().isEmpty()) { + System.err.println("WARNING: No filings loaded."); } conn = DatabaseQuerier.getConn(); - JSONArray data = Main.xmlLinks.getJSONArray("data"); - for(int i =0; i < data.length(); i++) { - JSONObject link = data.optJSONObject(i); - - String timestamp = link.getString("timestamp"); - String url = link.getString("url"); + for(FilingFeed.Filing filing : filings.getFilings()) { try { System.err.println("LOG: Calling loadTransactionIntoDB() in " + getClass()); - loadTransactionIntoDB(timestamp, url); + loadTransactionIntoDB(filing.getTimestamp(), filing.getXmlUrl()); } catch (SQLException throwables) { System.err.println("INTERNAL: SQLException in .run() of " + getClass()); //throwables.printStackTrace(); @@ -53,19 +64,33 @@ public class LoadCommand implements Command { return "Loaded?"; } + private FilingFeed getFilings(int numFilings) { + // TODO: make params more adjustable + String queryUrl = "https://www.sec.gov/cgi-bin/browse-edgar?" + + "action=getcurrent" + + "&CIK=" + + "&type=4" + + "&company=" + + "&dateb=" + + "&owner=only" + + "&start=0" + + "&count=" + numFilings + + "&output=atom"; + + Document document = URL_XML_PARSER.parse(queryUrl); + return new FilingFeed(document); + } + /** * Loads a whole transaction, which can have multiple trades, into the DB. * @param url The url to the public xml file. * @throws SQLException If the prep statement fails or db doesn't exist, throws. */ - private void loadTransactionIntoDB(String timestamp, String url) throws SQLException { + private void loadTransactionIntoDB(Instant instant, String url) throws SQLException { System.err.println("LOG: Parsing XML into transaction in loadTransactionIntoDB(). URL: " + url); // TODO: check if this is right @julia // TODO: add parse error handling... - ZonedDateTime zonedDateTime = ZonedDateTime.parse(timestamp); - Instant instant = zonedDateTime.toInstant(); - - Document document = URL_XML_PARSER.parse(url); + Document document = TXT_XML_PARSER.parse(url); if (document == null) { System.err.println("WARNING: URL " + url + " failed to parse... continuing."); return; diff --git a/src/test/java/edu/brown/cs/student/FilingTest.java b/src/test/java/edu/brown/cs/student/FilingTest.java new file mode 100644 index 0000000..a9b21d3 --- /dev/null +++ b/src/test/java/edu/brown/cs/student/FilingTest.java @@ -0,0 +1,82 @@ +package edu.brown.cs.student; + +import edu.brown.cs.student.term.parsing.LocalXmlParser; +import edu.brown.cs.student.term.parsing.Transaction; +import edu.brown.cs.student.term.parsing.TxtXmlParser; +import edu.brown.cs.student.term.parsing.UrlXmlParser; +import edu.brown.cs.student.term.parsing.XmlParser; +import edu.brown.cs.student.term.trade.Trade; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +import javax.print.Doc; + +import static org.junit.Assert.*; + +public class FilingTest { + private XmlParser _xmlParser, _txtXmlParser; + + @Before + public void setUp() { + _xmlParser = new UrlXmlParser(); + _txtXmlParser = new TxtXmlParser(); + } + + @After + public void tearDown() { + _xmlParser = null; + _txtXmlParser = null; + } + + @Test + public void seeWorks(){ + setUp(); + + String url = "https://www.sec.gov/cgi-bin/browse-edgar?" + + "action=getcurrent" + + "&CIK=" + + "&type=4" + + "&company=" + + "&dateb=" + + "&owner=only" + + "&start=0" + + "&count=10" + + "&output=atom"; + + Document doc = _xmlParser.parse(url); + assertNotNull(doc); + NodeList entries = doc.getElementsByTagName("entry"); + assertNotEquals(entries.getLength(), 0); + assertEquals(entries.item(0).getNodeType(), Node.ELEMENT_NODE); + for (int i = 0; i < entries.getLength(); i++) { + Element entry = (Element) entries.item(i); + NodeList link = entry.getElementsByTagName("link"); + assertEquals(link.getLength(), 1); + String linkUrl = link.item(0).getAttributes().getNamedItem("href").getNodeValue(); + System.out.println(linkUrl); + + NodeList updated = entry.getElementsByTagName("updated"); + assertEquals(link.getLength(), 1); + System.out.println(updated.item(0).getTextContent()); + } + + tearDown(); + } + + @Test + public void xmlUrlFromFilingUrl(){ + setUp(); + + String url = "https://www.sec.gov/Archives/edgar/data/1597341/000141588921001958/0001415889-21-001958.txt"; + Document doc = _txtXmlParser.parse(url); + assertNotNull(doc); + tearDown(); + } + + +} |