4A Server -  2.0
 All Classes Namespaces Files Functions Variables Enumerator
Util.java
Go to the documentation of this file.
1 /*
2  * Project: Server for annotations sharing
3  * Author: Ing. Jaroslav Dytrych idytrych@fit.vutbr.cz
4  * File: Util.java
5  * Description: Utility class (manipulates RFC 3339 dates and contains other
6  * utility methods)
7  */
8 
9 /**
10  * @file Util.java
11  *
12  * @brief Utility class (manipulates RFC 3339 dates and contains other utility methods)
13  */
14 
15 package cz.vutbr.fit.knot.annotations.comet;
16 
17 import java.text.SimpleDateFormat;
18 import java.util.ArrayList;
19 import java.util.Arrays;
20 import java.util.Date;
21 import java.util.Iterator;
22 
23 /**
24  * Utility class (manipulates RFC 3339 dates and contains other utility methods)
25  *
26  * @brief Utility class (manipulates RFC 3339 dates)
27  * @author idytrych
28  */
29 public class Util {
30  /*
31  * Adapted from http://lit.hatori42.com/RFC3339Date.txt and added sign recognition.
32  *
33  I was working on an Atom (http://www.w3.org/2005/Atom) parser and discovered that I
34  could not parse dates in the format defined by RFC 3339 using the SimpleDateFormat
35  class. The reason was the ':' in the time zone. This code strips out the colon if
36  it's there and tries four different formats on the resulting string depending on if
37  it has a time zone, or if it has a fractional second part. There is a probably a
38  better way to do this, and a more proper way. But this is a really small addition
39  to a codebase (You don't need a jar, just throw this function in some static
40  Utility class if you have one).
41 
42  Feel free to use this in your code, but I'd appreciate it if you keep this note in
43  the code if you distribute it. Thanks!
44 
45  For people who might be googling: The date format parsed by this goes by:
46  atomDateConstruct, xsd:dateTime, RFC3339 and is compatable with: ISO.8601.1988,
47  W3C.NOTE-datetime-19980827 and W3C.REC-xmlschema-2-20041028 (that I know of)
48 
49 
50  Copyright 2007, Chad Okere (ceothrow1 at gmail dotcom)
51  OMG NO WARRENTY EXPRESSED OR IMPLIED!!!1
52  */
53 
54  public static final int RESERVED_CHARS = 0;
55  public static final int TEXTUAL_CHARS = 1;
56  public static final int NUMERAL_CHARS = 2;
57  public static final int TEXTUAL_MAX_SIZE = 8;
58  public static final int NUMERAL_MAX_SIZE = 4;
59 
60  public static final String START_OF_TEXTUAL = "&";
61  public static final String START_OF_NUMERAL = "&#";
62  public static final String END_OF_MARK = ";";
63 
64  public static final ArrayList<String> AMP_LIST = new ArrayList<String>(Arrays.asList("&",START_OF_TEXTUAL + "amp" + END_OF_MARK,START_OF_NUMERAL + "38" + END_OF_MARK));
65  public static final ArrayList<String> APOS_LIST = new ArrayList<String>(Arrays.asList("'",START_OF_TEXTUAL + "apos" + END_OF_MARK,START_OF_NUMERAL + "39" + END_OF_MARK));
66  public static final ArrayList<String> QUOT_LIST = new ArrayList<String>(Arrays.asList("\"",START_OF_TEXTUAL + "quot" + END_OF_MARK,START_OF_NUMERAL + "34" + END_OF_MARK));
67  public static final ArrayList<String> GT_LIST = new ArrayList<String>(Arrays.asList(">",START_OF_TEXTUAL + "gt" + END_OF_MARK,START_OF_NUMERAL + "62" + END_OF_MARK));
68  public static final ArrayList<String> LT_LIST = new ArrayList<String>(Arrays.asList("<",START_OF_TEXTUAL + "lt" + END_OF_MARK,START_OF_NUMERAL + "60" + END_OF_MARK));
69 
70  public static final ArrayList<ArrayList<String>> RESERVED_HTML_CHARS = new ArrayList<ArrayList<String>>();
71  static{
72  RESERVED_HTML_CHARS.add(AMP_LIST);
73  RESERVED_HTML_CHARS.add(APOS_LIST);
74  RESERVED_HTML_CHARS.add(QUOT_LIST);
75  RESERVED_HTML_CHARS.add(GT_LIST);
76  RESERVED_HTML_CHARS.add(LT_LIST);
77  }
78  /**
79  * Parse date in format according to RFC 3339
80  *
81  * @author Chad Okere (ceothrow1 at gmail dotcom)
82  * @author idytrych
83  * @param datestring Date in string
84  * @return Returns parsed date
85  */
86  public static java.util.Date parseRFC3339Date(String datestring) throws java.text.ParseException, IndexOutOfBoundsException {
87  Date d = new Date();
88 
89  //if there is no time zone, we don't need to do any special parsing.
90  if (datestring.endsWith("Z")) {
91  try {
92  SimpleDateFormat s = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");//spec for RFC3339
93  d = s.parse(datestring);
94  } catch (java.text.ParseException pe) {//try again with optional decimals
95  SimpleDateFormat s = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSSSS'Z'");//spec for RFC3339 (with fractional seconds)
96  s.setLenient(true);
97  d = s.parse(datestring);
98  }
99  return d;
100  }
101 
102  //step one, split off the timezone.
103  String sign = "-";
104  if (datestring.contains("+")) {
105  sign = "+";
106  }
107  String firstpart = datestring.substring(0, datestring.lastIndexOf(sign));
108  String secondpart = datestring.substring(datestring.lastIndexOf(sign));
109 
110  //step two, remove the colon from the timezone offset
111  secondpart = secondpart.substring(0, secondpart.indexOf(':')) + secondpart.substring(secondpart.indexOf(':') + 1);
112  datestring = firstpart + secondpart;
113  SimpleDateFormat s = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ");//spec for RFC3339
114  try {
115  d = s.parse(datestring);
116  } catch (java.text.ParseException pe) {//try again with optional decimals
117  s = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSSSSZ");//spec for RFC3339 (with fractional seconds)
118  s.setLenient(true);
119  d = s.parse(datestring);
120  }
121  return d;
122  } // parseRFC3339Date()
123 
124  /**
125  * Parse date in format according to RFC 3339 (date only, without time)
126  *
127  * @author idytrych
128  * @param datestring Date in string
129  * @return Returns parsed date
130  */
131  public static java.util.Date parseDate(String datestring) throws java.text.ParseException, IndexOutOfBoundsException {
132  Date d = new Date();
133 
134  //if there is no time zone, we don't need to do any special parsing.
135  if (datestring.endsWith("Z")) {
136  SimpleDateFormat s = new SimpleDateFormat("yyyy-MM-dd'Z'");//spec for RFC3339
137  d = s.parse(datestring);
138  return d;
139  }
140 
141  //step one, split off the timezone.
142  String sign = "-";
143  if (datestring.contains("+")) {
144  sign = "+";
145  }
146  String firstpart = datestring.substring(0, datestring.lastIndexOf(sign));
147  String secondpart = datestring.substring(datestring.lastIndexOf(sign));
148 
149  //step two, remove the colon from the timezone offset
150  secondpart = secondpart.substring(0, secondpart.indexOf(':')) + secondpart.substring(secondpart.indexOf(':') + 1);
151  datestring = firstpart + secondpart;
152  SimpleDateFormat s = new SimpleDateFormat("yyyy-MM-ddZ");//spec for RFC3339
153  d = s.parse(datestring);
154  return d;
155  } // parseDate()
156 
157  /**
158  * Parse time in format according to RFC 3339
159  *
160  * @author idytrych
161  * @param datestring Time in string
162  * @return Returns parsed time (with some date)
163  */
164  public static java.util.Date parseTime(String datestring) throws java.text.ParseException, IndexOutOfBoundsException {
165  Date d = new Date();
166 
167  //if there is no time zone, we don't need to do any special parsing.
168  if (datestring.endsWith("Z")) {
169  try {
170  SimpleDateFormat s = new SimpleDateFormat("HH:mm:ss'Z'");//spec for RFC3339
171  d = s.parse(datestring);
172  } catch (java.text.ParseException pe) {//try again with optional decimals
173  SimpleDateFormat s = new SimpleDateFormat("HH:mm:ss.SSSSSS'Z'");//spec for RFC3339 (with fractional seconds)
174  s.setLenient(true);
175  d = s.parse(datestring);
176  }
177  return d;
178  }
179 
180  //step one, split off the timezone.
181  String sign = "-";
182  if (datestring.contains("+")) {
183  sign = "+";
184  }
185  String firstpart = datestring.substring(0, datestring.lastIndexOf(sign));
186  String secondpart = datestring.substring(datestring.lastIndexOf(sign));
187 
188  //step two, remove the colon from the timezone offset
189  secondpart = secondpart.substring(0, secondpart.indexOf(':')) + secondpart.substring(secondpart.indexOf(':') + 1);
190  datestring = firstpart + secondpart;
191  SimpleDateFormat s = new SimpleDateFormat("HH:mm:ssZ");//spec for RFC3339
192  try {
193  d = s.parse(datestring);
194  } catch (java.text.ParseException pe) {//try again with optional decimals
195  s = new SimpleDateFormat("HH:mm:ss.SSSSSSZ");//spec for RFC3339 (with fractional seconds)
196  s.setLenient(true);
197  d = s.parse(datestring);
198  }
199  return d;
200  } // parseTime()
201 
202  /**
203  * Converts date to string in format according to RFC 3339
204  *
205  * @param date Date to be converted
206  * @return String with date in format according to RFC 3339
207  */
208  public static String toRFC3339Date(Date date) {
209  if(date == null) return "";
210  SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ");
211  String formatted = sdf.format(date);
212  if (!formatted.endsWith("Z")) {
213  formatted = formatted.substring(0,formatted.length() - 2) + ":"
214  + formatted.substring(formatted.length() - 2,formatted.length());
215  }
216  return formatted;
217  }
218 
219  /**
220  * Converts date to string in format according to RFC 3339 (date only, without time)
221  *
222  * @param date Date to be converted
223  * @return String with date in format according to RFC 3339 (without time)
224  */
225  public static String toRFC3339DateOnly(Date date) {
226  if(date == null) return "";
227  SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-ddZ");
228  String formatted;
229  try{
230  formatted = sdf.format(date);
231  }
232 
233  catch(Exception e){
234  System.out.print(e.getMessage());
235  return "";
236  }
237 
238  if (!formatted.endsWith("Z")) {
239  formatted = formatted.substring(0,formatted.length() - 2) + ":"
240  + formatted.substring(formatted.length() - 2,formatted.length());
241  }
242  return formatted;
243  }
244 
245  /**
246  * Converts date to string in format according to RFC 3339
247  * (date only, without time, without time zone)
248  *
249  * @param date Date to be converted
250  * @return String with date in format according to RFC 3339 (without time and time zone)
251  */
252  public static String toRFC3339DateOnlyWTZ(Date date) {
253  if(date == null) return "";
254  SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
255  String formatted;
256  try{
257  formatted = sdf.format(date);
258  }
259 
260  catch(Exception e){
261  System.out.print(e.getMessage());
262  return "";
263  }
264 
265  return formatted;
266  }
267 
268  /**
269  * Converts time to string in format according to RFC 3339
270  *
271  * @param date Date with time to be converted
272  * @return String with time in format according to RFC 3339
273  */
274  public static String toRFC3339Time(Date date) {
275  if(date == null) return "";
276  SimpleDateFormat sdf = new SimpleDateFormat("HH:mm:ssZ");
277  String formatted = sdf.format(date);
278  if (!formatted.endsWith("Z")) {
279  formatted = formatted.substring(0,formatted.length() - 2) + ":"
280  + formatted.substring(formatted.length() - 2,formatted.length());
281  }
282  return formatted;
283  }
284 
285 
286  /**
287  * Gets minimum of three values
288  *
289  * @param a Value 1
290  * @param b Value 2
291  * @param c Value 3
292  * @return Returns minimum of three values
293  */
294  public static int minimum (int a, int b, int c) {
295  return Math.min(a, Math.min(b, c));
296  }
297 
298  /**
299  * Compute Levenshtein distance
300  * Adapted from http://www.merriampark.com/ld.htm
301  *
302  * @author Michael Gilleland (megilleland at yahoo.com)
303  * @param s String s
304  * @param t String t
305  * @return Returns Levenshtein distance of strings s and t
306  */
307  public static int levenshtein (String s, String t) {
308  int d[][]; // matrix
309  int n; // length of s
310  int m; // length of t
311  int i; // iterates through s
312  int j; // iterates through t
313  char s_i; // ith character of s
314  char t_j; // jth character of t
315  int cost; // cost
316 
317  // Step 1
318 
319  n = s.length();
320  m = t.length();
321  if (n == 0) {
322  return m;
323  }
324  if (m == 0) {
325  return n;
326  }
327  d = new int[n + 1][m + 1];
328 
329  // Step 2
330 
331  for (i = 0; i <= n; i++) {
332  d[i][0] = i;
333  }
334 
335  for (j = 0; j <= m; j++) {
336  d[0][j] = j;
337  }
338 
339  // Step 3
340 
341  for (i = 1; i <= n; i++) {
342 
343  s_i = s.charAt(i - 1);
344 
345  // Step 4
346 
347  for (j = 1; j <= m; j++) {
348 
349  t_j = t.charAt(j - 1);
350 
351  // Step 5
352 
353  if (s_i == t_j) {
354  cost = 0;
355  } else {
356  cost = 1;
357  }
358 
359  // Step 6
360 
361  d[i][j] = minimum(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost);
362 
363  }
364 
365  }
366 
367  // Step 7
368 
369  return d[n][m];
370 
371  } // levenshtein()
372 
373  /**
374  * Escapes string for use in XML
375  * (replaces <,>,&,",')
376  *
377  * @param str String to escape
378  * @return Escaped string
379  */
380  public static String escapeForXml(String str) {
381  String escaped = str.replace("&", "&amp;");
382  escaped = escaped.replace("<", "&lt;");
383  escaped = escaped.replace(">", "&gt;");
384  escaped = escaped.replace("\"", "&quot;");
385  escaped = escaped.replace("'", "&apos;");
386  return escaped;
387  }
388 
389  /**
390  * Parses the timezone from the end of the String
391  * @param date String with date and timezone on the end of it.
392  * @return Timezone string or null
393  */
394  public static String parseTimeZoneID(String date){
395  if(date == null || date.length()-5 < 0){
396  return null;
397  }
398  String offset = date.substring(date.length()-6, date.length());
399  if(offset.matches("(\\+|-)(0[0-9]|1[0-5]):[0-5][0-9]")){
400  return offset;
401  }
402  else{
403  return null;
404  }
405  }
406 
407  /**
408  * The method seeks a special character in a string and evaluates whether it
409  * is a character entity.
410  *
411  * @param source tested string
412  * @return true if it is character entity
413  */
414  private static boolean isSpecial(String source){
415  int startIndex = 0;
416  boolean isTextOnly = false;
417  boolean isNumberOnly = false;
418 
419  if(source.charAt(0) != '&'){
420  return false;
421  }else{
422  startIndex ++;
423  }
424 
425  if(source.charAt(1) == '#'){
426  startIndex ++;
427  isNumberOnly = true;
428  }
429 
430  for(int i = startIndex; i < source.length(); i++){
431  char actualChar = source.charAt(i);
432 
433  if(actualChar == ';' && (isTextOnly || isNumberOnly) && i > 0){
434  return true;
435  }
436 
437  if(isTextOnly && !isNumberOnly){
438  //text only section
439  if(!Character.isLetter(actualChar)){
440  return false;
441  }
442  }else if(!isTextOnly && isNumberOnly){
443  //number only
444  if(!Character.isDigit(actualChar) && (i - startIndex) > NUMERAL_MAX_SIZE){
445  return false;
446  }
447  }else{
448  return false;
449  }
450  }
451  return false;
452  }
453  /**
454  * The method converts an ordinary string to XML string.
455  *
456  * @param source ordinary string
457  * @return converted XML string
458  *
459  */
460  public static String toHTMLString(String source){
461  if (source == null) {
462  return "";
463  }
464  StringBuilder result = new StringBuilder();
465  boolean added;
466 
467  for(int i = 0; i < source.length(); i++){
468  added = false;
469  Iterator <ArrayList<String>> reservedCharsIt = RESERVED_HTML_CHARS.iterator();
470  while(reservedCharsIt.hasNext()){
471  ArrayList<String> currentReservedChar = reservedCharsIt.next();
472  if(source.charAt(i) == currentReservedChar.get(RESERVED_CHARS).charAt(0)){
473  if(source.charAt(i) != '&'){
474  result.append(currentReservedChar.get(NUMERAL_CHARS));
475  added = true;
476  continue;
477  }else{
478  int index = source.indexOf(END_OF_MARK, i);
479  int length = 0;
480  if(index > 0){
481  length = index - source.length();
482  }
483 
484  if(length > 0 && length <= TEXTUAL_MAX_SIZE && isSpecial(source.substring(i, index))){
485  i += length;
486  result.append(source.substring(i, index));
487  added = true;
488  continue;
489  }else{
490  result.append(currentReservedChar.get(NUMERAL_CHARS));
491  added = true;
492  continue;
493  }
494  }
495  }
496  }
497  if(!added){
498  result.append(source.charAt(i));
499  }
500  }
501 
502  return result.toString();
503  }
504 
505 } // class Util
static String parseTimeZoneID(String date)
Definition: Util.java:394
static final String START_OF_NUMERAL
Definition: Util.java:61
static String toRFC3339DateOnlyWTZ(Date date)
Definition: Util.java:252
static boolean isSpecial(String source)
Definition: Util.java:414
static int levenshtein(String s, String t)
Definition: Util.java:307
static String escapeForXml(String str)
Definition: Util.java:380
static final ArrayList< String > AMP_LIST
Definition: Util.java:64
static final String START_OF_TEXTUAL
Definition: Util.java:60
static int minimum(int a, int b, int c)
Definition: Util.java:294
static final ArrayList< String > APOS_LIST
Definition: Util.java:65
static final ArrayList< String > LT_LIST
Definition: Util.java:68
static String toRFC3339Time(Date date)
Definition: Util.java:274
static final String END_OF_MARK
Definition: Util.java:62
static String toRFC3339DateOnly(Date date)
Definition: Util.java:225
static final ArrayList< String > GT_LIST
Definition: Util.java:67
static String toHTMLString(String source)
Definition: Util.java:460
static java.util.Date parseTime(String datestring)
Definition: Util.java:164
static java.util.Date parseDate(String datestring)
Definition: Util.java:131
static final ArrayList< ArrayList< String > > RESERVED_HTML_CHARS
Definition: Util.java:70
Utility class (manipulates RFC 3339 dates)
Definition: Util.java:29
static final ArrayList< String > QUOT_LIST
Definition: Util.java:66
static String toRFC3339Date(Date date)
Definition: Util.java:208
static java.util.Date parseRFC3339Date(String datestring)
Definition: Util.java:86