1 /** 2 * Copyright 2005 Steve Molloy 3 * 4 * This file is part of OV4J. 5 * 6 * OV4J is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. 8 * 9 * OV4J is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty 10 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 * 12 * You should have received a copy of the GNU General Public License along with OV4J; if not, write to the Free Software 13 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 14 * 15 */ 16 package org.ov4j.comp; 17 18 import java.util.ArrayList; 19 import java.util.logging.Level; 20 import java.util.logging.Logger; 21 22 import org.ov4j.Config; 23 24 /** 25 * This class is used to compare sentences, or any group of words. 26 * 27 * @author smolloy 28 * 29 */ 30 public class SentenceComparisonResult extends ComparisonResult<String> { 31 /** 32 * Logger for this class 33 */ 34 private static final Logger logger = Logger.getLogger(SentenceComparisonResult.class.getName()); 35 36 /** Regular expression for splitting words. */ 37 private static final String WORD_SEPARATION_REGEX = Config.getString("OV4J.comp.WordSeparationRegex"); 38 39 /** 40 * @see org.ov4j.comp.ComparisonResult#compute() 41 */ 42 @Override 43 public void compute() { 44 if (SentenceComparisonResult.logger.isLoggable(Level.FINER)) { 45 SentenceComparisonResult.logger.entering("SentenceComparisonResult", "compute()", "start"); 46 } 47 48 if (getOriginal() != null && getChanged() != null) { 49 String[] oWords = getOriginal().split(SentenceComparisonResult.WORD_SEPARATION_REGEX); 50 String[] cWords = getChanged().split(SentenceComparisonResult.WORD_SEPARATION_REGEX); 51 52 final ArrayList<String> tmp = new ArrayList<String>(); 53 for (int i = 0; i < oWords.length; i++) { 54 if (oWords[i].trim().length() > 0) { 55 tmp.add(oWords[i]); 56 } 57 } 58 oWords = new String[tmp.size()]; 59 tmp.toArray(oWords); 60 tmp.clear(); 61 for (int i = 0; i < cWords.length; i++) { 62 if (cWords[i].trim().length() > 0) { 63 tmp.add(cWords[i]); 64 } 65 } 66 cWords = new String[tmp.size()]; 67 tmp.toArray(cWords); 68 69 final ArrayList<StringComparisonResult> tmpMatches = new ArrayList<StringComparisonResult>(); 70 final ArrayList<String> tmpNeg = new ArrayList<String>(); 71 final ArrayList<String> tmpPos = new ArrayList<String>(); 72 73 int i = 0, j = 0; 74 while (i < oWords.length && j < cWords.length) { 75 if (oWords[i].equals(cWords[j])) { 76 final StringComparisonResult strRes = new StringComparisonResult(); 77 strRes.setOriginal(oWords[i]); 78 strRes.setChanged(cWords[j]); 79 strRes.compute(); 80 tmpMatches.add(strRes); 81 i++; 82 j++; 83 } else { 84 int oIdx = (getOriginal()).indexOf(cWords[j], i); 85 int cIdx = (getChanged()).indexOf(oWords[i], j); 86 if (oIdx < 0) { 87 oIdx = Integer.MAX_VALUE; 88 } 89 if (cIdx < 0) { 90 cIdx = Integer.MAX_VALUE; 91 } 92 if (oIdx == cIdx) { 93 final StringComparisonResult strRes = new StringComparisonResult(); 94 strRes.setOriginal(oWords[i]); 95 strRes.setChanged(cWords[j]); 96 strRes.compute(); 97 tmpMatches.add(strRes); 98 i++; 99 j++; 100 } else { 101 final StringComparisonResult strRes = new StringComparisonResult(); 102 strRes.setOriginal(oWords[i]); 103 strRes.setChanged((j + 1 >= cWords.length) ? "" : cWords[j + 1]); 104 strRes.compute(); 105 final double prec1 = strRes.getPrecision(); 106 strRes.setOriginal((i + 1 >= oWords.length) ? "" : oWords[i + 1]); 107 strRes.setChanged(cWords[j]); 108 strRes.compute(); 109 final double prec2 = strRes.getPrecision(); 110 if (prec2 > prec1) { 111 tmpNeg.add(oWords[i]); 112 i++; 113 } else { 114 tmpPos.add(cWords[j]); 115 j++; 116 } 117 } 118 } 119 } 120 while (i < oWords.length) { 121 tmpNeg.add(oWords[i++]); 122 } 123 while (j < cWords.length) { 124 tmpPos.add(cWords[j++]); 125 } 126 127 final StringComparisonResult[] newMatches = new StringComparisonResult[tmpMatches.size()]; 128 tmpMatches.toArray(newMatches); 129 setMatches(newMatches); 130 final String[] newPos = new String[tmpPos.size()]; 131 tmpPos.toArray(newPos); 132 setFalsePositives(newPos); 133 final String[] newNeg = new String[tmpNeg.size()]; 134 tmpNeg.toArray(newNeg); 135 setFalseNegatives(newNeg); 136 137 double recall = 0.0; 138 double precision = 0.0; 139 for (i = 0; i < getMatches().length; i++) { 140 precision += getMatches()[i].getPrecision(); 141 recall += getMatches()[i].getRecall(); 142 } 143 if (cWords.length > 0) { 144 setPrecision(Math.min(1.0, Math.max(0.0, precision / cWords.length))); 145 } else { 146 setPrecision(1.0); 147 } 148 if (oWords.length > 0) { 149 setRecall(Math.min(1.0, Math.max(0.0, recall / oWords.length))); 150 } else { 151 setRecall(1.0); 152 } 153 } else if (getOriginal() != null) { 154 setPrecision(1.0); 155 setRecall(0.0); 156 } else if (getChanged() != null) { 157 setPrecision(0.0); 158 setRecall(1.0); 159 } else { 160 setPrecision(1.0); 161 setRecall(1.0); 162 } 163 164 if (SentenceComparisonResult.logger.isLoggable(Level.FINER)) { 165 SentenceComparisonResult.logger.exiting("SentenceComparisonResult", "compute()", "end"); 166 } 167 } 168 169 /** 170 * @see org.ov4j.comp.ComparisonResult#fastCompute() 171 */ 172 @Override 173 public void fastCompute() { 174 if (SentenceComparisonResult.logger.isLoggable(Level.FINER)) { 175 SentenceComparisonResult.logger.entering("SentenceComparisonResult", "fastCompute()", "start"); 176 } 177 178 if (getOriginal() != null && getChanged() != null && getOriginal().equals(getChanged())) { 179 setPrecision(1.0); 180 } 181 182 if (SentenceComparisonResult.logger.isLoggable(Level.FINER)) { 183 SentenceComparisonResult.logger.exiting("SentenceComparisonResult", "fastCompute()", "end"); 184 } 185 } 186 }