aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Foiani <sotech117@michaels-mbp-3.lan>2021-04-17 21:42:13 -0400
committerMichael Foiani <sotech117@michaels-mbp-3.lan>2021-04-17 21:42:13 -0400
commit9a8483885977d6ca17344d465e431f1f2cdafc06 (patch)
tree25329295f95d45e376507c2b2e4f9f4d6c7acd45
parent6dd133454b9c6c6d666a2dd17dd455ffd66c9937 (diff)
Created a stable load command that uses thhe official edgar rss feed. Had an option to load mose recent trades or trades from a certain timefrant.
-rw-r--r--data/trades.sqlite3bin344064 -> 16539648 bytes
-rw-r--r--src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java2
-rw-r--r--src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java19
-rw-r--r--src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java80
4 files changed, 66 insertions, 35 deletions
diff --git a/data/trades.sqlite3 b/data/trades.sqlite3
index c231e0f..878261a 100644
--- a/data/trades.sqlite3
+++ b/data/trades.sqlite3
Binary files differ
diff --git a/src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java b/src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java
index aac6358..b5a6acf 100644
--- a/src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java
+++ b/src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java
@@ -44,7 +44,7 @@ public class FilingFeed {
private String getXmlUrl(String filingUrl) {
String url = filingUrl.replace("-index.htm", ".txt");
if (!url.contains("https://www.sec.gov/")) {
- url = "https://www.sec.gov/" + url;
+ url = "https://www.sec.gov" + url;
}
return url;
}
diff --git a/src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java b/src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java
index ccb8863..2e30fa7 100644
--- a/src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java
+++ b/src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java
@@ -10,12 +10,16 @@ import java.io.InputStreamReader;
import java.io.StringReader;
import java.net.URL;
import java.net.URLConnection;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
import java.time.Instant;
/**
* Class that parses the XML contained within a publicly held txt file.
*/
public class TxtXmlParser extends XmlParser {
+ public final static SimpleDateFormat TIMECONVERTER = new SimpleDateFormat("yyyyMMddHHmmss");
+
private long timestamp;
public TxtXmlParser() {
@@ -47,10 +51,9 @@ public class TxtXmlParser extends XmlParser {
while ((line = br.readLine()) != null) {
// Get timestamp
if (line.startsWith("<ACCEPTANCE-DATETIME>")) {
- String timestampString = line.replaceAll("<ACCEPTANCE-DATETIME>", "");
-
+ String datetime = line.replaceAll("<ACCEPTANCE-DATETIME>", "");
// TODO: check for errors
- this.timestamp = Long.parseLong(timestampString);
+ this.timestamp = formatTimestamp(datetime);
}
// For xml
@@ -77,6 +80,16 @@ public class TxtXmlParser extends XmlParser {
return null;
}
+ public long formatTimestamp(String datetime) {
+ long timestamp = -1;
+ try {
+ timestamp = TIMECONVERTER.parse(datetime).toInstant().toEpochMilli();
+ } catch (ParseException e) {
+ e.printStackTrace();
+ }
+ return timestamp;
+ }
+
/**
* Returns the timestamp then resets it to -1.
* @return The timestamp as a number (long). -1 if not assigned.
diff --git a/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java b/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java
index 541add2..00ba3ad 100644
--- a/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java
+++ b/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java
@@ -34,32 +34,59 @@ public class LoadCommand implements Command {
@Override
public String run(String[] args) {
// param checking
- if (args.length != 1 && args.length !=2) {
+ if (args.length != 1 && args.length !=2 && args.length !=3) {
return "ERROR: Incorrect number of arguments for load command";
}
int numFilings;
try {
numFilings = Integer.parseInt(args[0]);
+ if (numFilings <=0) {
+ return "ERROR: Please input an positive integer for number of filings.";
+ }
} catch (NumberFormatException e) {
- return "ERROR: Please input an integer.";
+ return "ERROR: Please input an integer for number of filings.";
}
- boolean isArchive = false;
- if (args.length == 2) {
- isArchive = args[1].equals("archive");
+ int shift = 0;
+ try {
+ if (args.length == 2) {
+ shift = Integer.parseInt(args[1]);
+ if (shift <=0) {
+ return "ERROR: Please input an positive integer for the count shift.";
+ }
+ }
+ } catch (NumberFormatException e) {
+ return "ERROR: Please input an integer for the shift.";
+ }
+
+ String filingDate = null;
+ if (args.length == 3) {
+ filingDate = args[2];
+ System.out.println("WARNING: The archive version of the command make take " +
+ "a long time if a broad query param is inputted.");
}
System.err.println("LOG: Entered .run() of " + getClass());
//List<String> filingUrls = getFilings(numFilings);
- getFilings(numFilings, isArchive);
+ getFilings(numFilings, shift, filingDate);
//loadFilings(filingUrls);
return "Finished loading " + numFilings + " filings.";
}
+ private void timeout() {
+ // System.out.println("timeout 100 mil");
+ try {
+ Thread.sleep(100);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+
+
/**
* Parses the urls to filings and loads them into the setup DB.
* @param urls The list of urls to parsable Edgar txt files.
@@ -86,29 +113,17 @@ public class LoadCommand implements Command {
* Makes a request to the public Edgar url and parses it's rss feed.
* @param numFilings The number of filings to parse.
*/
- private void getFilings(int numFilings, boolean isArchive) {
+ private void getFilings(int numFilings, int shift, String filingDate) {
int counter = 0;
- while (100*counter <= numFilings) {
- /*
- if (counter%10 == 0) {
-
- System.out.println("Starting wait");
- try {
- Thread.sleep(1000);
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- System.out.println("End wait");
- }
- */
-
+ while (100*counter <= (numFilings - shift)) {
+ timeout();
String queryUrl =
- (isArchive) ?
+ (filingDate != null) ?
"https://www.sec.gov/cgi-bin/srch-edgar?" +
- "text=form-type%3D4" +
- "&start=" + (100*counter++) +
+ "text=form-type%3D4+and+(filing-date%3D" + filingDate + ")" +
+ "&start=" + (100*counter++ + shift) +
"&count=" + 100 +
"&first=2020" +
"&last=2021" +
@@ -121,13 +136,12 @@ public class LoadCommand implements Command {
"&company=" +
"&dateb=" +
"&owner=only" +
- "&start=" + (100*counter++) +
+ "&start=" + (100*counter++ + shift) +
"&count=" + 100 +
"&output=atom";
- System.out.println("LOG: Requesting filings with url: " + queryUrl);
+ System.err.println("LOG: Requesting filings with url: " + queryUrl);
Document document = URL_XML_PARSER.parse(queryUrl);
-
if (document == null) {
System.err.println("WARNING: Document was null " + queryUrl + " in getFilings(): " + getClass());
continue;
@@ -153,6 +167,8 @@ public class LoadCommand implements Command {
System.err.println("LOG: Parsing XML into transaction in loadTransactionIntoDB(). URL: " + url);
// TODO: check if this is right @julia
// TODO: add parse error handling...
+ // timeout to reduce the too many requests
+ timeout();
Document document = TXT_XML_PARSER.parse(url);
long timestamp = TXT_XML_PARSER.getTimestamp();
if (document == null || timestamp == -1) {
@@ -166,7 +182,7 @@ public class LoadCommand implements Command {
for(Trade trade : helper.getTrades()) {
System.err.println("LOG: Loading a trade into DB -> " + trade);
- loadTradeIntoDB(timestamp, trade);
+ loadTradeIntoDB(timestamp, trade, url);
System.err.println("LOG: Loaded that trade.");
}
} catch (Exception e) {
@@ -179,7 +195,7 @@ public class LoadCommand implements Command {
* @param trade The trade to be loaded.
* @throws SQLException If the prep statement fails or db doesn't exist, throws.
*/
- private void loadTradeIntoDB(long timestamp, Trade trade) throws SQLException {
+ private void loadTradeIntoDB(long timestamp, Trade trade, String url) throws SQLException {
// current table schema that is used...
// TODO: make this TABLE with this SCHEMA if doesn't exist.
/*
@@ -192,14 +208,15 @@ public class LoadCommand implements Command {
number_of_shares INTEGER,
holder_id INTEGER,
share_price NUMERIC,
+ filing_url TEXT
UNIQUE (trade_timestamp, is_buy, number_of_shares, holder_id, share_price));
*/
System.err.println("LOG: Setting prepared statement on " + conn);
PreparedStatement prep = conn.prepareStatement(
"INSERT INTO trades (stock_name, holder_name, trade_timestamp, is_buy, " +
- "number_of_shares, holder_id, share_price) " +
- "VALUES (?, ?, ?, ?, ?, ?, ?)");
+ "number_of_shares, holder_id, share_price, filing_url) " +
+ "VALUES (?, ?, ?, ?, ?, ?, ?, ?)");
prep.setString(1, trade.getStock());
prep.setString(2, trade.getHolder().getName());
@@ -209,6 +226,7 @@ public class LoadCommand implements Command {
prep.setInt(5, trade.getNumShares());
prep.setInt(6, trade.getHolder().getId());
prep.setDouble(7, trade.getPrice());
+ prep.setString(8, url);
System.err.println("LOG: Inserted values into prep statement.");
prep.execute();