blob: 2e30fa7fa99ae03fbbf0d21d3f424b62437d33b8 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
package edu.brown.cs.student.term.parsing;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.net.URL;
import java.net.URLConnection;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.Instant;
/**
* Class that parses the XML contained within a publicly held txt file.
*/
public class TxtXmlParser extends XmlParser {
public final static SimpleDateFormat TIMECONVERTER = new SimpleDateFormat("yyyyMMddHHmmss");
private long timestamp;
public TxtXmlParser() {
super();
timestamp = -1;
}
/**
* Method used to parse the xml file.
*
* @param pathToXml The path to the xml text file.
* @return The tree structure parsed as an xml doc.
*/
@Override
public Document parse(String pathToXml) {
try {
System.err.println("LOG: To make class for url: " + pathToXml + " in parse() of " + getClass());
URL url = new URL(pathToXml);
System.err.println("LOG: To establish urlConnection in parse() of " + getClass());
URLConnection conn = url.openConnection();
conn.addRequestProperty("User-Agent", "Chrome");
System.err.println("LOG: Making bufferedReader for url: " + pathToXml + " in " + getClass());
BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream()));
StringBuilder xmlParts = new StringBuilder();
boolean isXml = false;
String line;
while ((line = br.readLine()) != null) {
// Get timestamp
if (line.startsWith("<ACCEPTANCE-DATETIME>")) {
String datetime = line.replaceAll("<ACCEPTANCE-DATETIME>", "");
// TODO: check for errors
this.timestamp = formatTimestamp(datetime);
}
// For xml
if (line.equals("</XML>")) {
break;
}
if (isXml) {
xmlParts.append(line);
}
if (line.equals("<XML>")) {
isXml = true;
}
}
System.err.println("LOG: Calling builder.parse() after extracting xml parts from: " + pathToXml + " in " + getClass());
InputSource xmlLines = new InputSource(new StringReader(xmlParts.toString()));
return builder.parse(xmlLines);
} catch (SAXException e) {
System.err.println("INTERNAL: SAX " + getClass() + " : " + e.getClass());
} catch (IOException e) {
e.printStackTrace();
System.err.println("INTERNAL: IO " + getClass() + " : " + e.getClass());
}
return null;
}
public long formatTimestamp(String datetime) {
long timestamp = -1;
try {
timestamp = TIMECONVERTER.parse(datetime).toInstant().toEpochMilli();
} catch (ParseException e) {
e.printStackTrace();
}
return timestamp;
}
/**
* Returns the timestamp then resets it to -1.
* @return The timestamp as a number (long). -1 if not assigned.
*/
public long getTimestamp() {
long temp = timestamp;
// Set to -1 for next one...
timestamp = -1;
return temp;
}
}
|