View Javadoc

1   /**
2    * Copyright 2005 Steve Molloy
3    * 
4    * This file is part of OV4J.
5    * 
6    * OV4J is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as
7    * published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
8    * 
9    * OV4J is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
10   * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11   * 
12   * You should have received a copy of the GNU General Public License along with OV4J; if not, write to the Free Software
13   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
14   * 
15   */
16  package org.ov4j.comp;
17  
18  import java.util.ArrayList;
19  import java.util.logging.Level;
20  import java.util.logging.Logger;
21  
22  import org.ov4j.Config;
23  
24  /**
25   * This class is used to compare sentences, or any group of words.
26   * 
27   * @author smolloy
28   * 
29   */
30  public class SentenceComparisonResult extends ComparisonResult<String> {
31  	/**
32  	 * Logger for this class
33  	 */
34  	private static final Logger	logger					= Logger.getLogger(SentenceComparisonResult.class.getName());
35  
36  	/** Regular expression for splitting words. */
37  	private static final String	WORD_SEPARATION_REGEX	= Config.getString("OV4J.comp.WordSeparationRegex");
38  
39  	/**
40  	 * @see org.ov4j.comp.ComparisonResult#compute()
41  	 */
42  	@Override
43  	public void compute() {
44  		if (SentenceComparisonResult.logger.isLoggable(Level.FINER)) {
45  			SentenceComparisonResult.logger.entering("SentenceComparisonResult", "compute()", "start");
46  		}
47  
48  		if (getOriginal() != null && getChanged() != null) {
49  			String[] oWords = getOriginal().split(SentenceComparisonResult.WORD_SEPARATION_REGEX);
50  			String[] cWords = getChanged().split(SentenceComparisonResult.WORD_SEPARATION_REGEX);
51  
52  			final ArrayList<String> tmp = new ArrayList<String>();
53  			for (int i = 0; i < oWords.length; i++) {
54  				if (oWords[i].trim().length() > 0) {
55  					tmp.add(oWords[i]);
56  				}
57  			}
58  			oWords = new String[tmp.size()];
59  			tmp.toArray(oWords);
60  			tmp.clear();
61  			for (int i = 0; i < cWords.length; i++) {
62  				if (cWords[i].trim().length() > 0) {
63  					tmp.add(cWords[i]);
64  				}
65  			}
66  			cWords = new String[tmp.size()];
67  			tmp.toArray(cWords);
68  
69  			final ArrayList<StringComparisonResult> tmpMatches = new ArrayList<StringComparisonResult>();
70  			final ArrayList<String> tmpNeg = new ArrayList<String>();
71  			final ArrayList<String> tmpPos = new ArrayList<String>();
72  
73  			int i = 0, j = 0;
74  			while (i < oWords.length && j < cWords.length) {
75  				if (oWords[i].equals(cWords[j])) {
76  					final StringComparisonResult strRes = new StringComparisonResult();
77  					strRes.setOriginal(oWords[i]);
78  					strRes.setChanged(cWords[j]);
79  					strRes.compute();
80  					tmpMatches.add(strRes);
81  					i++;
82  					j++;
83  				} else {
84  					int oIdx = (getOriginal()).indexOf(cWords[j], i);
85  					int cIdx = (getChanged()).indexOf(oWords[i], j);
86  					if (oIdx < 0) {
87  						oIdx = Integer.MAX_VALUE;
88  					}
89  					if (cIdx < 0) {
90  						cIdx = Integer.MAX_VALUE;
91  					}
92  					if (oIdx == cIdx) {
93  						final StringComparisonResult strRes = new StringComparisonResult();
94  						strRes.setOriginal(oWords[i]);
95  						strRes.setChanged(cWords[j]);
96  						strRes.compute();
97  						tmpMatches.add(strRes);
98  						i++;
99  						j++;
100 					} else {
101 						final StringComparisonResult strRes = new StringComparisonResult();
102 						strRes.setOriginal(oWords[i]);
103 						strRes.setChanged((j + 1 >= cWords.length) ? "" : cWords[j + 1]);
104 						strRes.compute();
105 						final double prec1 = strRes.getPrecision();
106 						strRes.setOriginal((i + 1 >= oWords.length) ? "" : oWords[i + 1]);
107 						strRes.setChanged(cWords[j]);
108 						strRes.compute();
109 						final double prec2 = strRes.getPrecision();
110 						if (prec2 > prec1) {
111 							tmpNeg.add(oWords[i]);
112 							i++;
113 						} else {
114 							tmpPos.add(cWords[j]);
115 							j++;
116 						}
117 					}
118 				}
119 			}
120 			while (i < oWords.length) {
121 				tmpNeg.add(oWords[i++]);
122 			}
123 			while (j < cWords.length) {
124 				tmpPos.add(cWords[j++]);
125 			}
126 
127 			final StringComparisonResult[] newMatches = new StringComparisonResult[tmpMatches.size()];
128 			tmpMatches.toArray(newMatches);
129 			setMatches(newMatches);
130 			final String[] newPos = new String[tmpPos.size()];
131 			tmpPos.toArray(newPos);
132 			setFalsePositives(newPos);
133 			final String[] newNeg = new String[tmpNeg.size()];
134 			tmpNeg.toArray(newNeg);
135 			setFalseNegatives(newNeg);
136 
137 			double recall = 0.0;
138 			double precision = 0.0;
139 			for (i = 0; i < getMatches().length; i++) {
140 				precision += getMatches()[i].getPrecision();
141 				recall += getMatches()[i].getRecall();
142 			}
143 			if (cWords.length > 0) {
144 				setPrecision(Math.min(1.0, Math.max(0.0, precision / cWords.length)));
145 			} else {
146 				setPrecision(1.0);
147 			}
148 			if (oWords.length > 0) {
149 				setRecall(Math.min(1.0, Math.max(0.0, recall / oWords.length)));
150 			} else {
151 				setRecall(1.0);
152 			}
153 		} else if (getOriginal() != null) {
154 			setPrecision(1.0);
155 			setRecall(0.0);
156 		} else if (getChanged() != null) {
157 			setPrecision(0.0);
158 			setRecall(1.0);
159 		} else {
160 			setPrecision(1.0);
161 			setRecall(1.0);
162 		}
163 
164 		if (SentenceComparisonResult.logger.isLoggable(Level.FINER)) {
165 			SentenceComparisonResult.logger.exiting("SentenceComparisonResult", "compute()", "end");
166 		}
167 	}
168 
169 	/**
170 	 * @see org.ov4j.comp.ComparisonResult#fastCompute()
171 	 */
172 	@Override
173 	public void fastCompute() {
174 		if (SentenceComparisonResult.logger.isLoggable(Level.FINER)) {
175 			SentenceComparisonResult.logger.entering("SentenceComparisonResult", "fastCompute()", "start");
176 		}
177 
178 		if (getOriginal() != null && getChanged() != null && getOriginal().equals(getChanged())) {
179 			setPrecision(1.0);
180 		}
181 
182 		if (SentenceComparisonResult.logger.isLoggable(Level.FINER)) {
183 			SentenceComparisonResult.logger.exiting("SentenceComparisonResult", "fastCompute()", "end");
184 		}
185 	}
186 }