1 /**
2 * Copyright 2005 Steve Molloy
3 *
4 * This file is part of OV4J.
5 *
6 * OV4J is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
8 *
9 * OV4J is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
10 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License along with OV4J; if not, write to the Free Software
13 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
14 *
15 */
16 package org.ov4j.comp;
17
18 import java.util.ArrayList;
19 import java.util.logging.Level;
20 import java.util.logging.Logger;
21
22 import org.ov4j.Config;
23
24 /**
25 * This class is used to compare sentences, or any group of words.
26 *
27 * @author smolloy
28 *
29 */
30 public class SentenceComparisonResult extends ComparisonResult<String> {
31 /**
32 * Logger for this class
33 */
34 private static final Logger logger = Logger.getLogger(SentenceComparisonResult.class.getName());
35
36 /** Regular expression for splitting words. */
37 private static final String WORD_SEPARATION_REGEX = Config.getString("OV4J.comp.WordSeparationRegex");
38
39 /**
40 * @see org.ov4j.comp.ComparisonResult#compute()
41 */
42 @Override
43 public void compute() {
44 if (SentenceComparisonResult.logger.isLoggable(Level.FINER)) {
45 SentenceComparisonResult.logger.entering("SentenceComparisonResult", "compute()", "start");
46 }
47
48 if (getOriginal() != null && getChanged() != null) {
49 String[] oWords = getOriginal().split(SentenceComparisonResult.WORD_SEPARATION_REGEX);
50 String[] cWords = getChanged().split(SentenceComparisonResult.WORD_SEPARATION_REGEX);
51
52 final ArrayList<String> tmp = new ArrayList<String>();
53 for (int i = 0; i < oWords.length; i++) {
54 if (oWords[i].trim().length() > 0) {
55 tmp.add(oWords[i]);
56 }
57 }
58 oWords = new String[tmp.size()];
59 tmp.toArray(oWords);
60 tmp.clear();
61 for (int i = 0; i < cWords.length; i++) {
62 if (cWords[i].trim().length() > 0) {
63 tmp.add(cWords[i]);
64 }
65 }
66 cWords = new String[tmp.size()];
67 tmp.toArray(cWords);
68
69 final ArrayList<StringComparisonResult> tmpMatches = new ArrayList<StringComparisonResult>();
70 final ArrayList<String> tmpNeg = new ArrayList<String>();
71 final ArrayList<String> tmpPos = new ArrayList<String>();
72
73 int i = 0, j = 0;
74 while (i < oWords.length && j < cWords.length) {
75 if (oWords[i].equals(cWords[j])) {
76 final StringComparisonResult strRes = new StringComparisonResult();
77 strRes.setOriginal(oWords[i]);
78 strRes.setChanged(cWords[j]);
79 strRes.compute();
80 tmpMatches.add(strRes);
81 i++;
82 j++;
83 } else {
84 int oIdx = (getOriginal()).indexOf(cWords[j], i);
85 int cIdx = (getChanged()).indexOf(oWords[i], j);
86 if (oIdx < 0) {
87 oIdx = Integer.MAX_VALUE;
88 }
89 if (cIdx < 0) {
90 cIdx = Integer.MAX_VALUE;
91 }
92 if (oIdx == cIdx) {
93 final StringComparisonResult strRes = new StringComparisonResult();
94 strRes.setOriginal(oWords[i]);
95 strRes.setChanged(cWords[j]);
96 strRes.compute();
97 tmpMatches.add(strRes);
98 i++;
99 j++;
100 } else {
101 final StringComparisonResult strRes = new StringComparisonResult();
102 strRes.setOriginal(oWords[i]);
103 strRes.setChanged((j + 1 >= cWords.length) ? "" : cWords[j + 1]);
104 strRes.compute();
105 final double prec1 = strRes.getPrecision();
106 strRes.setOriginal((i + 1 >= oWords.length) ? "" : oWords[i + 1]);
107 strRes.setChanged(cWords[j]);
108 strRes.compute();
109 final double prec2 = strRes.getPrecision();
110 if (prec2 > prec1) {
111 tmpNeg.add(oWords[i]);
112 i++;
113 } else {
114 tmpPos.add(cWords[j]);
115 j++;
116 }
117 }
118 }
119 }
120 while (i < oWords.length) {
121 tmpNeg.add(oWords[i++]);
122 }
123 while (j < cWords.length) {
124 tmpPos.add(cWords[j++]);
125 }
126
127 final StringComparisonResult[] newMatches = new StringComparisonResult[tmpMatches.size()];
128 tmpMatches.toArray(newMatches);
129 setMatches(newMatches);
130 final String[] newPos = new String[tmpPos.size()];
131 tmpPos.toArray(newPos);
132 setFalsePositives(newPos);
133 final String[] newNeg = new String[tmpNeg.size()];
134 tmpNeg.toArray(newNeg);
135 setFalseNegatives(newNeg);
136
137 double recall = 0.0;
138 double precision = 0.0;
139 for (i = 0; i < getMatches().length; i++) {
140 precision += getMatches()[i].getPrecision();
141 recall += getMatches()[i].getRecall();
142 }
143 if (cWords.length > 0) {
144 setPrecision(Math.min(1.0, Math.max(0.0, precision / cWords.length)));
145 } else {
146 setPrecision(1.0);
147 }
148 if (oWords.length > 0) {
149 setRecall(Math.min(1.0, Math.max(0.0, recall / oWords.length)));
150 } else {
151 setRecall(1.0);
152 }
153 } else if (getOriginal() != null) {
154 setPrecision(1.0);
155 setRecall(0.0);
156 } else if (getChanged() != null) {
157 setPrecision(0.0);
158 setRecall(1.0);
159 } else {
160 setPrecision(1.0);
161 setRecall(1.0);
162 }
163
164 if (SentenceComparisonResult.logger.isLoggable(Level.FINER)) {
165 SentenceComparisonResult.logger.exiting("SentenceComparisonResult", "compute()", "end");
166 }
167 }
168
169 /**
170 * @see org.ov4j.comp.ComparisonResult#fastCompute()
171 */
172 @Override
173 public void fastCompute() {
174 if (SentenceComparisonResult.logger.isLoggable(Level.FINER)) {
175 SentenceComparisonResult.logger.entering("SentenceComparisonResult", "fastCompute()", "start");
176 }
177
178 if (getOriginal() != null && getChanged() != null && getOriginal().equals(getChanged())) {
179 setPrecision(1.0);
180 }
181
182 if (SentenceComparisonResult.logger.isLoggable(Level.FINER)) {
183 SentenceComparisonResult.logger.exiting("SentenceComparisonResult", "fastCompute()", "end");
184 }
185 }
186 }