aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/edu/brown/cs/student/term/parsing/TxtXmlParser.java
blob: 2e30fa7fa99ae03fbbf0d21d3f424b62437d33b8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
package edu.brown.cs.student.term.parsing;

import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.net.URL;
import java.net.URLConnection;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.Instant;

/**
 * Class that parses the XML contained within a publicly held txt file.
 */
public class TxtXmlParser extends XmlParser {
  public final static SimpleDateFormat TIMECONVERTER = new SimpleDateFormat("yyyyMMddHHmmss");

  private long timestamp;

  public TxtXmlParser() {
    super();
    timestamp = -1;
  }

  /**
   * Method used to parse the xml file.
   *
   * @param pathToXml The path to the xml text file.
   * @return The tree structure parsed as an xml doc.
   */
  @Override
  public Document parse(String pathToXml) {
    try {
      System.err.println("LOG: To make class for url: " + pathToXml + " in parse() of " + getClass());
      URL url = new URL(pathToXml);
      System.err.println("LOG: To establish urlConnection in parse() of " + getClass());
      URLConnection conn = url.openConnection();
      conn.addRequestProperty("User-Agent", "Chrome");
      System.err.println("LOG: Making bufferedReader for url: " + pathToXml + " in " +  getClass());
      BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream()));

      StringBuilder xmlParts = new StringBuilder();

      boolean isXml = false;
      String line;
      while ((line = br.readLine()) != null) {
        // Get timestamp
        if (line.startsWith("<ACCEPTANCE-DATETIME>")) {
          String datetime = line.replaceAll("<ACCEPTANCE-DATETIME>", "");
          // TODO: check for errors
          this.timestamp = formatTimestamp(datetime);
        }

        // For xml
        if (line.equals("</XML>")) {
          break;
        }
        if (isXml) {
          xmlParts.append(line);
        }
        if (line.equals("<XML>")) {
          isXml = true;
        }
      }
      System.err.println("LOG: Calling builder.parse() after extracting xml parts from: " + pathToXml + " in " +  getClass());

      InputSource xmlLines = new InputSource(new StringReader(xmlParts.toString()));
      return builder.parse(xmlLines);
    } catch (SAXException e) {
      System.err.println("INTERNAL: SAX " + getClass() + " : " + e.getClass());
    } catch (IOException e) {
      e.printStackTrace();
      System.err.println("INTERNAL: IO " + getClass() + " : " + e.getClass());
    }
    return null;
  }

  public long formatTimestamp(String datetime) {
    long timestamp = -1;
    try {
      timestamp = TIMECONVERTER.parse(datetime).toInstant().toEpochMilli();
    } catch (ParseException e) {
      e.printStackTrace();
    }
    return timestamp;
  }

  /**
   * Returns the timestamp then resets it to -1.
   * @return The timestamp as a number (long). -1 if not assigned.
   */
  public long getTimestamp() {
    long temp = timestamp;
    // Set to -1 for next one...
    timestamp = -1;
    return temp;
  }
}