diff options
-rw-r--r-- | data/trades.sqlite3 | bin | 49152 -> 344064 bytes | |||
-rw-r--r-- | src/main/java/edu/brown/cs/student/term/Main.java | 8 | ||||
-rw-r--r-- | src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java | 87 | ||||
-rw-r--r-- | src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java | 28 | ||||
-rw-r--r-- | src/main/java/edu/brown/cs/student/term/parsing/UrlXmlParser.java | 2 | ||||
-rw-r--r-- | src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java | 109 | ||||
-rw-r--r-- | trades.sqlite3 | 0 |
7 files changed, 142 insertions, 92 deletions
diff --git a/data/trades.sqlite3 b/data/trades.sqlite3 Binary files differindex 7c6f921..c231e0f 100644 --- a/data/trades.sqlite3 +++ b/data/trades.sqlite3 diff --git a/src/main/java/edu/brown/cs/student/term/Main.java b/src/main/java/edu/brown/cs/student/term/Main.java index 2a75bd5..55b1634 100644 --- a/src/main/java/edu/brown/cs/student/term/Main.java +++ b/src/main/java/edu/brown/cs/student/term/Main.java @@ -98,12 +98,20 @@ public final class Main { parser.accepts("gui"); parser.accepts("port").withRequiredArg().ofType(Integer.class) .defaultsTo(DEFAULT_PORT); + parser.accepts("debug"); OptionSet options = parser.parse(args); if (options.has("gui")) { runSparkServer((int) options.valueOf("port")); } + if (!options.has("debug")) { + System.setErr(new PrintStream(new OutputStream() { + public void write(int b) { + } + })); + } + HashMap<String, Command> commandHashMap = new HashMap<>(); commandHashMap.put("setup", new SetupCommand()); commandHashMap.put("load", new LoadCommand()); diff --git a/src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java b/src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java index d41e918..aac6358 100644 --- a/src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java +++ b/src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java @@ -5,17 +5,19 @@ import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; -import java.time.Instant; -import java.time.ZonedDateTime; import java.util.ArrayList; import java.util.List; -import java.util.Objects; - -import static org.junit.Assert.assertEquals; +/** + * Represents the filing from the Edgar rss feed. + */ public class FilingFeed { - private final List<Filing> filings; + private final List<String> filings; + /** + * Constructor that takes the parsed document and extracts the url. + * @param document The document of the rss feed. + */ public FilingFeed(Document document) { // Init array filings = new ArrayList<>(); @@ -27,73 +29,32 @@ public class FilingFeed { assert entries.item(i).getNodeType() == Node.ELEMENT_NODE; Element entry = (Element) entries.item(i); - // Get the timestamp from updated field - NodeList updated = entry.getElementsByTagName("updated"); - // Assert at least one element. - assert updated.getLength() == 1; - String timestamp = updated.item(0).getTextContent(); - ZonedDateTime zonedDateTime = ZonedDateTime.parse(timestamp); - Instant instant = zonedDateTime.toInstant(); - NodeList link = entry.getElementsByTagName("link"); - assertEquals(link.getLength(), 1); String linkUrl = link.item(0).getAttributes().getNamedItem("href").getNodeValue(); - filings.add(new Filing(getXmlUrl(linkUrl), instant)); + filings.add(getXmlUrl(linkUrl)); } } + /** + * Turns the local url into a publicly hosted one. + * @param filingUrl The local url of the .txt to the filing. + * @return The publicly hosted version of the url. + */ private String getXmlUrl(String filingUrl) { - return filingUrl.replace("-index.htm", ".txt"); + String url = filingUrl.replace("-index.htm", ".txt"); + if (!url.contains("https://www.sec.gov/")) { + url = "https://www.sec.gov/" + url; + } + return url; } - public List<Filing> getFilings() { + /** + * Accessor that returns the url to the txt format of the filings. + * @return The list of publicly hosted urls to each filing. + */ + public List<String> getFilings() { return filings; } - public static class Filing { - // TODO: update to be immutable - private final String xmlUrl; - private final Instant timestamp; - - public Filing(String xmlUrl, Instant timestamp) { - this.xmlUrl = xmlUrl; - this.timestamp = timestamp; - } - - public Instant getTimestamp() { - return timestamp; - } - - public String getXmlUrl() { - return xmlUrl; - } - - @Override - public String toString() { - return "Filing{" + - "xmlUrl='" + xmlUrl + '\'' + - ", timestamp=" + timestamp + - '}'; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - Filing filing = (Filing) o; - return Objects.equals(xmlUrl, filing.xmlUrl) && - Objects.equals(timestamp, filing.timestamp); - } - - @Override - public int hashCode() { - return Objects.hash(xmlUrl, timestamp); - } - } - } diff --git a/src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java b/src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java index 2b8016e..1be5f6f 100644 --- a/src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java +++ b/src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java @@ -4,7 +4,6 @@ import org.w3c.dom.Document; import org.xml.sax.InputSource; import org.xml.sax.SAXException; -import javax.print.Doc; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; @@ -12,10 +11,15 @@ import java.io.StringReader; import java.net.URL; import java.net.URLConnection; +/** + * Class that parses the XML contained within a publicly held txt file. + */ public class TxtXmlParser extends XmlParser { + private long timestamp; public TxtXmlParser() { super(); + timestamp = -1; } /** @@ -36,13 +40,22 @@ public class TxtXmlParser extends XmlParser { BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream())); StringBuilder xmlParts = new StringBuilder(); + boolean isXml = false; String line; while ((line = br.readLine()) != null) { + // Get timestamp + if (line.startsWith("<ACCEPTANCE-DATETIME>")) { + String timestampString = line.replaceAll("<ACCEPTANCE-DATETIME>", ""); + + // TODO: check for errors + this.timestamp = Long.parseLong(timestampString); + } + + // For xml if (line.equals("</XML>")) { break; } - if (isXml) { xmlParts.append(line); } @@ -62,4 +75,15 @@ public class TxtXmlParser extends XmlParser { } return null; } + + /** + * Returns the timestamp then resets it to -1. + * @return The timestamp as a number (long). -1 if not assigned. + */ + public long getTimestamp() { + long temp = timestamp; + // Set to -1 for next one... + timestamp = -1; + return temp; + } } diff --git a/src/main/java/edu/brown/cs/student/term/parsing/UrlXmlParser.java b/src/main/java/edu/brown/cs/student/term/parsing/UrlXmlParser.java index c89c31d..21cd7c5 100644 --- a/src/main/java/edu/brown/cs/student/term/parsing/UrlXmlParser.java +++ b/src/main/java/edu/brown/cs/student/term/parsing/UrlXmlParser.java @@ -6,8 +6,10 @@ import org.xml.sax.SAXException; import java.io.IOException; import java.net.URL; import java.net.URLConnection; +import java.time.Instant; public class UrlXmlParser extends XmlParser{ + public UrlXmlParser() { super(); } diff --git a/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java b/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java index b1e8cb4..541add2 100644 --- a/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java +++ b/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java @@ -24,7 +24,7 @@ import java.util.List; public class LoadCommand implements Command { private Connection conn; private final static XmlParser URL_XML_PARSER = new UrlXmlParser(); - private final static XmlParser TXT_XML_PARSER = new TxtXmlParser(); + private final static TxtXmlParser TXT_XML_PARSER = new TxtXmlParser(); /** * Main run method for every command. @@ -34,7 +34,7 @@ public class LoadCommand implements Command { @Override public String run(String[] args) { // param checking - if (args.length != 1){ + if (args.length != 1 && args.length !=2) { return "ERROR: Incorrect number of arguments for load command"; } @@ -45,62 +45,117 @@ public class LoadCommand implements Command { return "ERROR: Please input an integer."; } + boolean isArchive = false; + if (args.length == 2) { + isArchive = args[1].equals("archive"); + } + System.err.println("LOG: Entered .run() of " + getClass()); - List<FilingFeed.Filing> allFilings = getFilings(numFilings); + //List<String> filingUrls = getFilings(numFilings); + getFilings(numFilings, isArchive); + + //loadFilings(filingUrls); + + return "Finished loading " + numFilings + " filings."; + } - if (allFilings.isEmpty()) { + /** + * Parses the urls to filings and loads them into the setup DB. + * @param urls The list of urls to parsable Edgar txt files. + */ + public void loadFilings(List<String> urls) { + if (urls.isEmpty()) { System.err.println("WARNING: No filings loaded."); + return; } conn = DatabaseQuerier.getConn(); - for(FilingFeed.Filing filing : allFilings) { + for(String url : urls) { try { System.err.println("LOG: Calling loadTransactionIntoDB() in " + getClass()); - loadTransactionIntoDB(filing.getTimestamp(), filing.getXmlUrl()); + loadTransactionIntoDB(url); } catch (SQLException throwables) { System.err.println("INTERNAL: SQLException in .run() of " + getClass()); //throwables.printStackTrace(); } } - - return "Loaded?"; } - private List<FilingFeed.Filing> getFilings(int numFilings) { - List<FilingFeed.Filing> all = new ArrayList<>(); + /** + * Makes a request to the public Edgar url and parses it's rss feed. + * @param numFilings The number of filings to parse. + */ + private void getFilings(int numFilings, boolean isArchive) { int counter = 0; + while (100*counter <= numFilings) { - String queryUrl = "https://www.sec.gov/cgi-bin/browse-edgar?" + - "action=getcurrent" + - "&CIK=" + - "&type=4" + - "&company=" + - "&dateb=" + - "&owner=only" + - "&start=" + (100*counter++) + - "&count=" + 100 + - "&output=atom"; + /* + if (counter%10 == 0) { + + System.out.println("Starting wait"); + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + e.printStackTrace(); + } + System.out.println("End wait"); + } + */ + + + String queryUrl = + (isArchive) ? + "https://www.sec.gov/cgi-bin/srch-edgar?" + + "text=form-type%3D4" + + "&start=" + (100*counter++) + + "&count=" + 100 + + "&first=2020" + + "&last=2021" + + "&output=atom" + : + "https://www.sec.gov/cgi-bin/browse-edgar?" + + "action=getcurrent" + + "&CIK=" + + "&type=4" + + "&company=" + + "&dateb=" + + "&owner=only" + + "&start=" + (100*counter++) + + "&count=" + 100 + + "&output=atom"; + System.out.println("LOG: Requesting filings with url: " + queryUrl); Document document = URL_XML_PARSER.parse(queryUrl); + + if (document == null) { + System.err.println("WARNING: Document was null " + queryUrl + " in getFilings(): " + getClass()); + continue; + } + FilingFeed filingFeed = new FilingFeed(document); - all.addAll(filingFeed.getFilings()); + loadFilings(filingFeed.getFilings()); + + if (counter%10 == 0) { + System.out.println("PROGRESS: " + counter*100 + "/" + numFilings); + } } // TODO: make params more adjustable - return all; } + /** * Loads a whole transaction, which can have multiple trades, into the DB. * @param url The url to the public xml file. * @throws SQLException If the prep statement fails or db doesn't exist, throws. */ - private void loadTransactionIntoDB(Instant instant, String url) throws SQLException { + private void loadTransactionIntoDB(String url) throws SQLException { System.err.println("LOG: Parsing XML into transaction in loadTransactionIntoDB(). URL: " + url); // TODO: check if this is right @julia // TODO: add parse error handling... Document document = TXT_XML_PARSER.parse(url); - if (document == null) { + long timestamp = TXT_XML_PARSER.getTimestamp(); + if (document == null || timestamp == -1) { System.err.println("WARNING: URL " + url + " failed to parse... continuing."); return; } @@ -111,7 +166,7 @@ public class LoadCommand implements Command { for(Trade trade : helper.getTrades()) { System.err.println("LOG: Loading a trade into DB -> " + trade); - loadTradeIntoDB(instant, trade); + loadTradeIntoDB(timestamp, trade); System.err.println("LOG: Loaded that trade."); } } catch (Exception e) { @@ -124,7 +179,7 @@ public class LoadCommand implements Command { * @param trade The trade to be loaded. * @throws SQLException If the prep statement fails or db doesn't exist, throws. */ - private void loadTradeIntoDB(Instant instant, Trade trade) throws SQLException { + private void loadTradeIntoDB(long timestamp, Trade trade) throws SQLException { // current table schema that is used... // TODO: make this TABLE with this SCHEMA if doesn't exist. /* @@ -149,7 +204,7 @@ public class LoadCommand implements Command { prep.setString(1, trade.getStock()); prep.setString(2, trade.getHolder().getName()); // TODO: update with timestamp @julia - prep.setLong(3, instant.toEpochMilli()); + prep.setLong(3, timestamp); prep.setInt(4, trade.isBuy() ? 1 : 0); prep.setInt(5, trade.getNumShares()); prep.setInt(6, trade.getHolder().getId()); diff --git a/trades.sqlite3 b/trades.sqlite3 new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/trades.sqlite3 |