diff options
author | Michael Foiani <sotech117@michaels-mbp-3.lan> | 2021-04-17 21:42:13 -0400 |
---|---|---|
committer | Michael Foiani <sotech117@michaels-mbp-3.lan> | 2021-04-17 21:42:13 -0400 |
commit | 9a8483885977d6ca17344d465e431f1f2cdafc06 (patch) | |
tree | 25329295f95d45e376507c2b2e4f9f4d6c7acd45 | |
parent | 6dd133454b9c6c6d666a2dd17dd455ffd66c9937 (diff) |
Created a stable load command that uses thhe official edgar rss feed. Had an option to load mose recent trades or trades from a certain timefrant.
-rw-r--r-- | data/trades.sqlite3 | bin | 344064 -> 16539648 bytes | |||
-rw-r--r-- | src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java | 2 | ||||
-rw-r--r-- | src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java | 19 | ||||
-rw-r--r-- | src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java | 80 |
4 files changed, 66 insertions, 35 deletions
diff --git a/data/trades.sqlite3 b/data/trades.sqlite3 Binary files differindex c231e0f..878261a 100644 --- a/data/trades.sqlite3 +++ b/data/trades.sqlite3 diff --git a/src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java b/src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java index aac6358..b5a6acf 100644 --- a/src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java +++ b/src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java @@ -44,7 +44,7 @@ public class FilingFeed { private String getXmlUrl(String filingUrl) { String url = filingUrl.replace("-index.htm", ".txt"); if (!url.contains("https://www.sec.gov/")) { - url = "https://www.sec.gov/" + url; + url = "https://www.sec.gov" + url; } return url; } diff --git a/src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java b/src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java index ccb8863..2e30fa7 100644 --- a/src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java +++ b/src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java @@ -10,12 +10,16 @@ import java.io.InputStreamReader; import java.io.StringReader; import java.net.URL; import java.net.URLConnection; +import java.text.ParseException; +import java.text.SimpleDateFormat; import java.time.Instant; /** * Class that parses the XML contained within a publicly held txt file. */ public class TxtXmlParser extends XmlParser { + public final static SimpleDateFormat TIMECONVERTER = new SimpleDateFormat("yyyyMMddHHmmss"); + private long timestamp; public TxtXmlParser() { @@ -47,10 +51,9 @@ public class TxtXmlParser extends XmlParser { while ((line = br.readLine()) != null) { // Get timestamp if (line.startsWith("<ACCEPTANCE-DATETIME>")) { - String timestampString = line.replaceAll("<ACCEPTANCE-DATETIME>", ""); - + String datetime = line.replaceAll("<ACCEPTANCE-DATETIME>", ""); // TODO: check for errors - this.timestamp = Long.parseLong(timestampString); + this.timestamp = formatTimestamp(datetime); } // For xml @@ -77,6 +80,16 @@ public class TxtXmlParser extends XmlParser { return null; } + public long formatTimestamp(String datetime) { + long timestamp = -1; + try { + timestamp = TIMECONVERTER.parse(datetime).toInstant().toEpochMilli(); + } catch (ParseException e) { + e.printStackTrace(); + } + return timestamp; + } + /** * Returns the timestamp then resets it to -1. * @return The timestamp as a number (long). -1 if not assigned. diff --git a/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java b/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java index 541add2..00ba3ad 100644 --- a/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java +++ b/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java @@ -34,32 +34,59 @@ public class LoadCommand implements Command { @Override public String run(String[] args) { // param checking - if (args.length != 1 && args.length !=2) { + if (args.length != 1 && args.length !=2 && args.length !=3) { return "ERROR: Incorrect number of arguments for load command"; } int numFilings; try { numFilings = Integer.parseInt(args[0]); + if (numFilings <=0) { + return "ERROR: Please input an positive integer for number of filings."; + } } catch (NumberFormatException e) { - return "ERROR: Please input an integer."; + return "ERROR: Please input an integer for number of filings."; } - boolean isArchive = false; - if (args.length == 2) { - isArchive = args[1].equals("archive"); + int shift = 0; + try { + if (args.length == 2) { + shift = Integer.parseInt(args[1]); + if (shift <=0) { + return "ERROR: Please input an positive integer for the count shift."; + } + } + } catch (NumberFormatException e) { + return "ERROR: Please input an integer for the shift."; + } + + String filingDate = null; + if (args.length == 3) { + filingDate = args[2]; + System.out.println("WARNING: The archive version of the command make take " + + "a long time if a broad query param is inputted."); } System.err.println("LOG: Entered .run() of " + getClass()); //List<String> filingUrls = getFilings(numFilings); - getFilings(numFilings, isArchive); + getFilings(numFilings, shift, filingDate); //loadFilings(filingUrls); return "Finished loading " + numFilings + " filings."; } + private void timeout() { + // System.out.println("timeout 100 mil"); + try { + Thread.sleep(100); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + + /** * Parses the urls to filings and loads them into the setup DB. * @param urls The list of urls to parsable Edgar txt files. @@ -86,29 +113,17 @@ public class LoadCommand implements Command { * Makes a request to the public Edgar url and parses it's rss feed. * @param numFilings The number of filings to parse. */ - private void getFilings(int numFilings, boolean isArchive) { + private void getFilings(int numFilings, int shift, String filingDate) { int counter = 0; - while (100*counter <= numFilings) { - /* - if (counter%10 == 0) { - - System.out.println("Starting wait"); - try { - Thread.sleep(1000); - } catch (InterruptedException e) { - e.printStackTrace(); - } - System.out.println("End wait"); - } - */ - + while (100*counter <= (numFilings - shift)) { + timeout(); String queryUrl = - (isArchive) ? + (filingDate != null) ? "https://www.sec.gov/cgi-bin/srch-edgar?" + - "text=form-type%3D4" + - "&start=" + (100*counter++) + + "text=form-type%3D4+and+(filing-date%3D" + filingDate + ")" + + "&start=" + (100*counter++ + shift) + "&count=" + 100 + "&first=2020" + "&last=2021" + @@ -121,13 +136,12 @@ public class LoadCommand implements Command { "&company=" + "&dateb=" + "&owner=only" + - "&start=" + (100*counter++) + + "&start=" + (100*counter++ + shift) + "&count=" + 100 + "&output=atom"; - System.out.println("LOG: Requesting filings with url: " + queryUrl); + System.err.println("LOG: Requesting filings with url: " + queryUrl); Document document = URL_XML_PARSER.parse(queryUrl); - if (document == null) { System.err.println("WARNING: Document was null " + queryUrl + " in getFilings(): " + getClass()); continue; @@ -153,6 +167,8 @@ public class LoadCommand implements Command { System.err.println("LOG: Parsing XML into transaction in loadTransactionIntoDB(). URL: " + url); // TODO: check if this is right @julia // TODO: add parse error handling... + // timeout to reduce the too many requests + timeout(); Document document = TXT_XML_PARSER.parse(url); long timestamp = TXT_XML_PARSER.getTimestamp(); if (document == null || timestamp == -1) { @@ -166,7 +182,7 @@ public class LoadCommand implements Command { for(Trade trade : helper.getTrades()) { System.err.println("LOG: Loading a trade into DB -> " + trade); - loadTradeIntoDB(timestamp, trade); + loadTradeIntoDB(timestamp, trade, url); System.err.println("LOG: Loaded that trade."); } } catch (Exception e) { @@ -179,7 +195,7 @@ public class LoadCommand implements Command { * @param trade The trade to be loaded. * @throws SQLException If the prep statement fails or db doesn't exist, throws. */ - private void loadTradeIntoDB(long timestamp, Trade trade) throws SQLException { + private void loadTradeIntoDB(long timestamp, Trade trade, String url) throws SQLException { // current table schema that is used... // TODO: make this TABLE with this SCHEMA if doesn't exist. /* @@ -192,14 +208,15 @@ public class LoadCommand implements Command { number_of_shares INTEGER, holder_id INTEGER, share_price NUMERIC, + filing_url TEXT UNIQUE (trade_timestamp, is_buy, number_of_shares, holder_id, share_price)); */ System.err.println("LOG: Setting prepared statement on " + conn); PreparedStatement prep = conn.prepareStatement( "INSERT INTO trades (stock_name, holder_name, trade_timestamp, is_buy, " + - "number_of_shares, holder_id, share_price) " + - "VALUES (?, ?, ?, ?, ?, ?, ?)"); + "number_of_shares, holder_id, share_price, filing_url) " + + "VALUES (?, ?, ?, ?, ?, ?, ?, ?)"); prep.setString(1, trade.getStock()); prep.setString(2, trade.getHolder().getName()); @@ -209,6 +226,7 @@ public class LoadCommand implements Command { prep.setInt(5, trade.getNumShares()); prep.setInt(6, trade.getHolder().getId()); prep.setDouble(7, trade.getPrice()); + prep.setString(8, url); System.err.println("LOG: Inserted values into prep statement."); prep.execute(); |