package edu.brown.cs.student.term.parsing; import org.w3c.dom.Document; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.StringReader; import java.net.URL; import java.net.URLConnection; import java.text.ParseException; import java.text.SimpleDateFormat; import java.time.Instant; /** * Class that parses the XML contained within a publicly held txt file. */ public class TxtXmlParser extends XmlParser { public final static SimpleDateFormat TIMECONVERTER = new SimpleDateFormat("yyyyMMddHHmmss"); private long timestamp; public TxtXmlParser() { super(); timestamp = -1; } /** * Method used to parse the xml file. * * @param pathToXml The path to the xml text file. * @return The tree structure parsed as an xml doc. */ @Override public Document parse(String pathToXml) { try { System.err.println("LOG: To make class for url: " + pathToXml + " in parse() of " + getClass()); URL url = new URL(pathToXml); System.err.println("LOG: To establish urlConnection in parse() of " + getClass()); URLConnection conn = url.openConnection(); conn.addRequestProperty("User-Agent", "Chrome"); System.err.println("LOG: Making bufferedReader for url: " + pathToXml + " in " + getClass()); BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream())); StringBuilder xmlParts = new StringBuilder(); boolean isXml = false; String line; while ((line = br.readLine()) != null) { // Get timestamp if (line.startsWith("")) { String datetime = line.replaceAll("", ""); // TODO: check for errors this.timestamp = formatTimestamp(datetime); } // For xml if (line.equals("")) { break; } if (isXml) { xmlParts.append(line); } if (line.equals("")) { isXml = true; } } System.err.println("LOG: Calling builder.parse() after extracting xml parts from: " + pathToXml + " in " + getClass()); InputSource xmlLines = new InputSource(new StringReader(xmlParts.toString())); return builder.parse(xmlLines); } catch (SAXException e) { System.err.println("INTERNAL: SAX " + getClass() + " : " + e.getClass()); } catch (IOException e) { e.printStackTrace(); System.err.println("INTERNAL: IO " + getClass() + " : " + e.getClass()); } return null; } public long formatTimestamp(String datetime) { long timestamp = -1; try { timestamp = TIMECONVERTER.parse(datetime).toInstant().toEpochMilli(); } catch (ParseException e) { e.printStackTrace(); } return timestamp; } /** * Returns the timestamp then resets it to -1. * @return The timestamp as a number (long). -1 if not assigned. */ public long getTimestamp() { long temp = timestamp; // Set to -1 for next one... timestamp = -1; return temp; } }