src/main/java/edu/brown/cs/student/term/parsing/FilingFeed.java


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60

package edu.brown.cs.student.term.parsing;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import java.util.ArrayList;
import java.util.List;

/**
 * Represents the filing from the Edgar rss feed.
 */
public class FilingFeed {
  private final List<String> filings;

  /**
   * Constructor that takes the parsed document and extracts the url.
   * @param document The document of the rss feed.
   */
  public FilingFeed(Document document) {
    // Init array
    filings = new ArrayList<>();

    // Get all entries
    NodeList entries = document.getElementsByTagName("entry");
    for (int i = 0; i < entries.getLength(); i++) {
      // Assertion allows the cast to be ok :)
      assert entries.item(i).getNodeType() == Node.ELEMENT_NODE;
      Element entry = (Element) entries.item(i);

      NodeList link = entry.getElementsByTagName("link");
      String linkUrl = link.item(0).getAttributes().getNamedItem("href").getNodeValue();

      filings.add(getXmlUrl(linkUrl));
    }
  }

  /**
   * Turns the local url into a publicly hosted one.
   * @param filingUrl The local url of the .txt to the filing.
   * @return The publicly hosted version of the url.
   */
  private String getXmlUrl(String filingUrl) {
    String url = filingUrl.replace("-index.htm", ".txt");
    if (!url.contains("https://www.sec.gov/")) {
      url = "https://www.sec.gov" + url;
    }
    return url;
  }

  /**
   * Accessor that returns the url to the txt format of the filings.
   * @return The list of publicly hosted urls to each filing.
   */
  public List<String> getFilings() {
    return filings;
  }

}