4A Server -  2.0
 All Classes Namespaces Files Functions Variables Enumerator
LevenshteinMethod.java
Go to the documentation of this file.
1 /*
2  * Project: Server for annotations sharing
3  * Subproject: Position of fragment of text in XML document
4  * Authors: Michael Angelov
5  * File: LevenshteinMethod.java
6  * Description: Compare class using Levenshtein approximate string matching method
7  */
8 
9 /**
10  * @file LevenshteinMethod.java
11  *
12  * @brief Compare class using Levenshtein approximate string matching method
13  */
14 
15 package cz.vutbr.fit.knot.annotations.fragmentUpdater.compareMethods;
16 
17 /**
18  * Compare class using Levenshtein approximate string matching method
19  *
20  * @brief Compare class using Levenshtein approximate string matching method
21  * @author Michael Angelov
22  */
23 public class LevenshteinMethod extends CompareMethod {
24 
25  private double minimumPercentage;
26 
27  /**
28  * Constructor
29  *
30  * @param minimumPercentage minimum similarity percentage condition that must be met for strings to match
31  */
33  super();
34  this.minimumPercentage = minimumPercentage;
35  }
36 
37  /**
38  * Minimum percentage setter
39  *
40  * @param minimumPercentage minimum similarity percentage
41  */
43  this.minimumPercentage = minimumPercentage;
44  }
45 
46  /**
47  * Minimum percentage getter
48  *
49  * @return minimum similarity percentage
50  */
51  public double getMinimumPercentage() {
52  return minimumPercentage;
53  }
54 
55  /**
56  * Overriden compare() method
57  *
58  * @param first first string to compare
59  * @param second second string to compare
60  * @return true if strings match, false if don't
61  */
62  @Override
63  public boolean compare(String first, String second) {
64  if (second == null) {
65  return false;
66  }
67  if (similarity(first, second) >= minimumPercentage) {
68  return true;
69  } else {
70  return false;
71  }
72  }
73 
74  /**
75  * Overriden toString() method
76  *
77  * @return string representation of class
78  */
79  @Override
80  public String toString() {
81  return "Levenshtein - " + (int)(minimumPercentage * 100) + "%";
82  }
83 
84  /**
85  * Function for choosing minimum of three values
86  *
87  * @param a first value
88  * @param b second value
89  * @param c third value
90  * @return the minimum value
91  */
92  private static int Minimum(int a, int b, int c) {
93  int mi;
94 
95  mi = a;
96 
97  if (b < mi) {
98  mi = b;
99  }
100 
101  if (c < mi) {
102  mi = c;
103  }
104 
105  return mi;
106  }
107 
108  /**
109  * Levenshtein's similarity algorithm
110  *
111  * @param s first string to compare
112  * @param t second string to compare
113  * @return similarity in interval <0..1> (0-100%)
114  */
115  private static double similarity(String s, String t) {
116 
117  int d[][]; // matrix
118  int n; // length of s
119  int m; // length of t
120  int i; // iterates through s
121  int j; // iterates through t
122  char s_i; // ith character of s
123  char t_j; // jth character of t
124  int cost; // cost
125 
126  // Step 1
127 
128  n = s.length();
129  m = t.length();
130 
131  double max = (double) Math.max(n, m);
132  double similarity;
133 
134  if (n == 0) {
135  return 1 - (m / max);
136  }
137  if (m == 0) {
138  return 1 - (n / max);
139  }
140 
141  d = new int[n + 1][m + 1];
142 
143  for (i = 0; i <= n; i++) {
144  d[i][0] = i;
145  }
146 
147  for (j = 0; j <= m; j++) {
148  d[0][j] = j;
149  }
150 
151  for (i = 1; i <= n; i++) {
152 
153  s_i = s.charAt(i - 1);
154 
155  for (j = 1; j <= m; j++) {
156 
157  t_j = t.charAt(j - 1);
158 
159  if (s_i == t_j) {
160  cost = 0;
161  } else {
162  cost = 1;
163  }
164 
165  d[i][j] = Minimum(d[i - 1][j] + 1, // cost for insertion
166  d[i][j - 1] + 1, // cost for deletion
167  d[i - 1][j - 1] + cost); // cost for substitution
168 
169  }
170  }
171 
172  // set similarity to interval <0, 1>
173  similarity = 1.0f - (double) (d[n][m]) / Math.max(n, m);
174 
175  return similarity;
176  }
177 
178 } // class LevenshteinMethod
Compare class using Levenshtein approximate string matching method.