aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--data/trades.sqlite3bin49152 -> 344064 bytes
-rw-r--r--src/main/java/edu/brown/cs/student/term/Main.java8
-rw-r--r--src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java87
-rw-r--r--src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java28
-rw-r--r--src/main/java/edu/brown/cs/student/term/parsing/UrlXmlParser.java2
-rw-r--r--src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java109
-rw-r--r--trades.sqlite30
7 files changed, 142 insertions, 92 deletions
diff --git a/data/trades.sqlite3 b/data/trades.sqlite3
index 7c6f921..c231e0f 100644
--- a/data/trades.sqlite3
+++ b/data/trades.sqlite3
Binary files differ
diff --git a/src/main/java/edu/brown/cs/student/term/Main.java b/src/main/java/edu/brown/cs/student/term/Main.java
index 2a75bd5..55b1634 100644
--- a/src/main/java/edu/brown/cs/student/term/Main.java
+++ b/src/main/java/edu/brown/cs/student/term/Main.java
@@ -98,12 +98,20 @@ public final class Main {
parser.accepts("gui");
parser.accepts("port").withRequiredArg().ofType(Integer.class)
.defaultsTo(DEFAULT_PORT);
+ parser.accepts("debug");
OptionSet options = parser.parse(args);
if (options.has("gui")) {
runSparkServer((int) options.valueOf("port"));
}
+ if (!options.has("debug")) {
+ System.setErr(new PrintStream(new OutputStream() {
+ public void write(int b) {
+ }
+ }));
+ }
+
HashMap<String, Command> commandHashMap = new HashMap<>();
commandHashMap.put("setup", new SetupCommand());
commandHashMap.put("load", new LoadCommand());
diff --git a/src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java b/src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java
index d41e918..aac6358 100644
--- a/src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java
+++ b/src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java
@@ -5,17 +5,19 @@ import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
-import java.time.Instant;
-import java.time.ZonedDateTime;
import java.util.ArrayList;
import java.util.List;
-import java.util.Objects;
-
-import static org.junit.Assert.assertEquals;
+/**
+ * Represents the filing from the Edgar rss feed.
+ */
public class FilingFeed {
- private final List<Filing> filings;
+ private final List<String> filings;
+ /**
+ * Constructor that takes the parsed document and extracts the url.
+ * @param document The document of the rss feed.
+ */
public FilingFeed(Document document) {
// Init array
filings = new ArrayList<>();
@@ -27,73 +29,32 @@ public class FilingFeed {
assert entries.item(i).getNodeType() == Node.ELEMENT_NODE;
Element entry = (Element) entries.item(i);
- // Get the timestamp from updated field
- NodeList updated = entry.getElementsByTagName("updated");
- // Assert at least one element.
- assert updated.getLength() == 1;
- String timestamp = updated.item(0).getTextContent();
- ZonedDateTime zonedDateTime = ZonedDateTime.parse(timestamp);
- Instant instant = zonedDateTime.toInstant();
-
NodeList link = entry.getElementsByTagName("link");
- assertEquals(link.getLength(), 1);
String linkUrl = link.item(0).getAttributes().getNamedItem("href").getNodeValue();
- filings.add(new Filing(getXmlUrl(linkUrl), instant));
+ filings.add(getXmlUrl(linkUrl));
}
}
+ /**
+ * Turns the local url into a publicly hosted one.
+ * @param filingUrl The local url of the .txt to the filing.
+ * @return The publicly hosted version of the url.
+ */
private String getXmlUrl(String filingUrl) {
- return filingUrl.replace("-index.htm", ".txt");
+ String url = filingUrl.replace("-index.htm", ".txt");
+ if (!url.contains("https://www.sec.gov/")) {
+ url = "https://www.sec.gov/" + url;
+ }
+ return url;
}
- public List<Filing> getFilings() {
+ /**
+ * Accessor that returns the url to the txt format of the filings.
+ * @return The list of publicly hosted urls to each filing.
+ */
+ public List<String> getFilings() {
return filings;
}
- public static class Filing {
- // TODO: update to be immutable
- private final String xmlUrl;
- private final Instant timestamp;
-
- public Filing(String xmlUrl, Instant timestamp) {
- this.xmlUrl = xmlUrl;
- this.timestamp = timestamp;
- }
-
- public Instant getTimestamp() {
- return timestamp;
- }
-
- public String getXmlUrl() {
- return xmlUrl;
- }
-
- @Override
- public String toString() {
- return "Filing{" +
- "xmlUrl='" + xmlUrl + '\'' +
- ", timestamp=" + timestamp +
- '}';
- }
-
- @Override
- public boolean equals(Object o) {
- if (this == o) {
- return true;
- }
- if (o == null || getClass() != o.getClass()) {
- return false;
- }
- Filing filing = (Filing) o;
- return Objects.equals(xmlUrl, filing.xmlUrl) &&
- Objects.equals(timestamp, filing.timestamp);
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(xmlUrl, timestamp);
- }
- }
-
}
diff --git a/src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java b/src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java
index 2b8016e..1be5f6f 100644
--- a/src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java
+++ b/src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java
@@ -4,7 +4,6 @@ import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
-import javax.print.Doc;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
@@ -12,10 +11,15 @@ import java.io.StringReader;
import java.net.URL;
import java.net.URLConnection;
+/**
+ * Class that parses the XML contained within a publicly held txt file.
+ */
public class TxtXmlParser extends XmlParser {
+ private long timestamp;
public TxtXmlParser() {
super();
+ timestamp = -1;
}
/**
@@ -36,13 +40,22 @@ public class TxtXmlParser extends XmlParser {
BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream()));
StringBuilder xmlParts = new StringBuilder();
+
boolean isXml = false;
String line;
while ((line = br.readLine()) != null) {
+ // Get timestamp
+ if (line.startsWith("<ACCEPTANCE-DATETIME>")) {
+ String timestampString = line.replaceAll("<ACCEPTANCE-DATETIME>", "");
+
+ // TODO: check for errors
+ this.timestamp = Long.parseLong(timestampString);
+ }
+
+ // For xml
if (line.equals("</XML>")) {
break;
}
-
if (isXml) {
xmlParts.append(line);
}
@@ -62,4 +75,15 @@ public class TxtXmlParser extends XmlParser {
}
return null;
}
+
+ /**
+ * Returns the timestamp then resets it to -1.
+ * @return The timestamp as a number (long). -1 if not assigned.
+ */
+ public long getTimestamp() {
+ long temp = timestamp;
+ // Set to -1 for next one...
+ timestamp = -1;
+ return temp;
+ }
}
diff --git a/src/main/java/edu/brown/cs/student/term/parsing/UrlXmlParser.java b/src/main/java/edu/brown/cs/student/term/parsing/UrlXmlParser.java
index c89c31d..21cd7c5 100644
--- a/src/main/java/edu/brown/cs/student/term/parsing/UrlXmlParser.java
+++ b/src/main/java/edu/brown/cs/student/term/parsing/UrlXmlParser.java
@@ -6,8 +6,10 @@ import org.xml.sax.SAXException;
import java.io.IOException;
import java.net.URL;
import java.net.URLConnection;
+import java.time.Instant;
public class UrlXmlParser extends XmlParser{
+
public UrlXmlParser() {
super();
}
diff --git a/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java b/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java
index b1e8cb4..541add2 100644
--- a/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java
+++ b/src/main/java/edu/brown/cs/student/term/repl/commands/LoadCommand.java
@@ -24,7 +24,7 @@ import java.util.List;
public class LoadCommand implements Command {
private Connection conn;
private final static XmlParser URL_XML_PARSER = new UrlXmlParser();
- private final static XmlParser TXT_XML_PARSER = new TxtXmlParser();
+ private final static TxtXmlParser TXT_XML_PARSER = new TxtXmlParser();
/**
* Main run method for every command.
@@ -34,7 +34,7 @@ public class LoadCommand implements Command {
@Override
public String run(String[] args) {
// param checking
- if (args.length != 1){
+ if (args.length != 1 && args.length !=2) {
return "ERROR: Incorrect number of arguments for load command";
}
@@ -45,62 +45,117 @@ public class LoadCommand implements Command {
return "ERROR: Please input an integer.";
}
+ boolean isArchive = false;
+ if (args.length == 2) {
+ isArchive = args[1].equals("archive");
+ }
+
System.err.println("LOG: Entered .run() of " + getClass());
- List<FilingFeed.Filing> allFilings = getFilings(numFilings);
+ //List<String> filingUrls = getFilings(numFilings);
+ getFilings(numFilings, isArchive);
+
+ //loadFilings(filingUrls);
+
+ return "Finished loading " + numFilings + " filings.";
+ }
- if (allFilings.isEmpty()) {
+ /**
+ * Parses the urls to filings and loads them into the setup DB.
+ * @param urls The list of urls to parsable Edgar txt files.
+ */
+ public void loadFilings(List<String> urls) {
+ if (urls.isEmpty()) {
System.err.println("WARNING: No filings loaded.");
+ return;
}
conn = DatabaseQuerier.getConn();
- for(FilingFeed.Filing filing : allFilings) {
+ for(String url : urls) {
try {
System.err.println("LOG: Calling loadTransactionIntoDB() in " + getClass());
- loadTransactionIntoDB(filing.getTimestamp(), filing.getXmlUrl());
+ loadTransactionIntoDB(url);
} catch (SQLException throwables) {
System.err.println("INTERNAL: SQLException in .run() of " + getClass());
//throwables.printStackTrace();
}
}
-
- return "Loaded?";
}
- private List<FilingFeed.Filing> getFilings(int numFilings) {
- List<FilingFeed.Filing> all = new ArrayList<>();
+ /**
+ * Makes a request to the public Edgar url and parses it's rss feed.
+ * @param numFilings The number of filings to parse.
+ */
+ private void getFilings(int numFilings, boolean isArchive) {
int counter = 0;
+
while (100*counter <= numFilings) {
- String queryUrl = "https://www.sec.gov/cgi-bin/browse-edgar?" +
- "action=getcurrent" +
- "&CIK=" +
- "&type=4" +
- "&company=" +
- "&dateb=" +
- "&owner=only" +
- "&start=" + (100*counter++) +
- "&count=" + 100 +
- "&output=atom";
+ /*
+ if (counter%10 == 0) {
+
+ System.out.println("Starting wait");
+ try {
+ Thread.sleep(1000);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ System.out.println("End wait");
+ }
+ */
+
+
+ String queryUrl =
+ (isArchive) ?
+ "https://www.sec.gov/cgi-bin/srch-edgar?" +
+ "text=form-type%3D4" +
+ "&start=" + (100*counter++) +
+ "&count=" + 100 +
+ "&first=2020" +
+ "&last=2021" +
+ "&output=atom"
+ :
+ "https://www.sec.gov/cgi-bin/browse-edgar?" +
+ "action=getcurrent" +
+ "&CIK=" +
+ "&type=4" +
+ "&company=" +
+ "&dateb=" +
+ "&owner=only" +
+ "&start=" + (100*counter++) +
+ "&count=" + 100 +
+ "&output=atom";
+
System.out.println("LOG: Requesting filings with url: " + queryUrl);
Document document = URL_XML_PARSER.parse(queryUrl);
+
+ if (document == null) {
+ System.err.println("WARNING: Document was null " + queryUrl + " in getFilings(): " + getClass());
+ continue;
+ }
+
FilingFeed filingFeed = new FilingFeed(document);
- all.addAll(filingFeed.getFilings());
+ loadFilings(filingFeed.getFilings());
+
+ if (counter%10 == 0) {
+ System.out.println("PROGRESS: " + counter*100 + "/" + numFilings);
+ }
}
// TODO: make params more adjustable
- return all;
}
+
/**
* Loads a whole transaction, which can have multiple trades, into the DB.
* @param url The url to the public xml file.
* @throws SQLException If the prep statement fails or db doesn't exist, throws.
*/
- private void loadTransactionIntoDB(Instant instant, String url) throws SQLException {
+ private void loadTransactionIntoDB(String url) throws SQLException {
System.err.println("LOG: Parsing XML into transaction in loadTransactionIntoDB(). URL: " + url);
// TODO: check if this is right @julia
// TODO: add parse error handling...
Document document = TXT_XML_PARSER.parse(url);
- if (document == null) {
+ long timestamp = TXT_XML_PARSER.getTimestamp();
+ if (document == null || timestamp == -1) {
System.err.println("WARNING: URL " + url + " failed to parse... continuing.");
return;
}
@@ -111,7 +166,7 @@ public class LoadCommand implements Command {
for(Trade trade : helper.getTrades()) {
System.err.println("LOG: Loading a trade into DB -> " + trade);
- loadTradeIntoDB(instant, trade);
+ loadTradeIntoDB(timestamp, trade);
System.err.println("LOG: Loaded that trade.");
}
} catch (Exception e) {
@@ -124,7 +179,7 @@ public class LoadCommand implements Command {
* @param trade The trade to be loaded.
* @throws SQLException If the prep statement fails or db doesn't exist, throws.
*/
- private void loadTradeIntoDB(Instant instant, Trade trade) throws SQLException {
+ private void loadTradeIntoDB(long timestamp, Trade trade) throws SQLException {
// current table schema that is used...
// TODO: make this TABLE with this SCHEMA if doesn't exist.
/*
@@ -149,7 +204,7 @@ public class LoadCommand implements Command {
prep.setString(1, trade.getStock());
prep.setString(2, trade.getHolder().getName());
// TODO: update with timestamp @julia
- prep.setLong(3, instant.toEpochMilli());
+ prep.setLong(3, timestamp);
prep.setInt(4, trade.isBuy() ? 1 : 0);
prep.setInt(5, trade.getNumShares());
prep.setInt(6, trade.getHolder().getId());
diff --git a/trades.sqlite3 b/trades.sqlite3
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/trades.sqlite3