package edu.brown.cs.student.term.parsing; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import java.util.ArrayList; import java.util.List; /** * Represents the filing from the Edgar rss feed. */ public class FilingFeed { private final List filings; /** * Constructor that takes the parsed document and extracts the url. * @param document The document of the rss feed. */ public FilingFeed(Document document) { // Init array filings = new ArrayList<>(); // Get all entries NodeList entries = document.getElementsByTagName("entry"); for (int i = 0; i < entries.getLength(); i++) { // Assertion allows the cast to be ok :) assert entries.item(i).getNodeType() == Node.ELEMENT_NODE; Element entry = (Element) entries.item(i); NodeList link = entry.getElementsByTagName("link"); String linkUrl = link.item(0).getAttributes().getNamedItem("href").getNodeValue(); filings.add(getXmlUrl(linkUrl)); } } /** * Turns the local url into a publicly hosted one. * @param filingUrl The local url of the .txt to the filing. * @return The publicly hosted version of the url. */ private String getXmlUrl(String filingUrl) { String url = filingUrl.replace("-index.htm", ".txt"); if (!url.contains("https://www.sec.gov/")) { url = "https://www.sec.gov" + url; } return url; } /** * Accessor that returns the url to the txt format of the filings. * @return The list of publicly hosted urls to each filing. */ public List getFilings() { return filings; } }