Clover Coverage Report
Coverage timestamp: Fri May 9 2008 10:54:27 EST
../../../../img/srcFileCovDistChart9.png 37% of files have more coverage
64   306   32   4.57
44   120   0.5   14
14     2.29  
1    
 
  FuzzyTermEnum       Line # 30 64 32 81.1% 0.8114754
 
  (3)
 
1    package org.apache.lucene.search;
2   
3    /**
4    * Copyright 2004 The Apache Software Foundation
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10    * http://www.apache.org/licenses/LICENSE-2.0
11    *
12    * Unless required by applicable law or agreed to in writing, software
13    * distributed under the License is distributed on an "AS IS" BASIS,
14    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15    * See the License for the specific language governing permissions and
16    * limitations under the License.
17    */
18   
19    import org.apache.lucene.index.IndexReader;
20    import org.apache.lucene.index.Term;
21   
22    import java.io.IOException;
23   
24    /** Subclass of FilteredTermEnum for enumerating all terms that are similiar
25    * to the specified filter term.
26    *
27    * <p>Term enumerations are always ordered by Term.compareTo(). Each term in
28    * the enumeration is greater than all that precede it.
29    */
 
30    public final class FuzzyTermEnum extends FilteredTermEnum {
31   
32    /* This should be somewhere around the average long word.
33    * If it is longer, we waste time and space. If it is shorter, we waste a
34    * little bit of time growing the array as we encounter longer words.
35    */
36    private static final int TYPICAL_LONGEST_WORD_IN_INDEX = 19;
37   
38    /* Allows us save time required to create a new array
39    * everytime similarity is called.
40    */
41    private int[][] d;
42   
43    private float similarity;
44    private boolean endEnum = false;
45   
46    private Term searchTerm = null;
47    private final String field;
48    private final String text;
49    private final String prefix;
50   
51    private final float minimumSimilarity;
52    private final float scale_factor;
53    private final int[] maxDistances = new int[TYPICAL_LONGEST_WORD_IN_INDEX];
54   
55    /**
56    * Creates a FuzzyTermEnum with an empty prefix and a minSimilarity of 0.5f.
57    * <p>
58    * After calling the constructor the enumeration is already pointing to the first
59    * valid term if such a term exists.
60    *
61    * @param reader
62    * @param term
63    * @throws IOException
64    * @see #FuzzyTermEnum(IndexReader, Term, float, int)
65    */
 
66  0 toggle public FuzzyTermEnum(IndexReader reader, Term term) throws IOException {
67  0 this(reader, term, FuzzyQuery.defaultMinSimilarity, FuzzyQuery.defaultPrefixLength);
68    }
69   
70    /**
71    * Creates a FuzzyTermEnum with an empty prefix.
72    * <p>
73    * After calling the constructor the enumeration is already pointing to the first
74    * valid term if such a term exists.
75    *
76    * @param reader
77    * @param term
78    * @param minSimilarity
79    * @throws IOException
80    * @see #FuzzyTermEnum(IndexReader, Term, float, int)
81    */
 
82  0 toggle public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity) throws IOException {
83  0 this(reader, term, minSimilarity, FuzzyQuery.defaultPrefixLength);
84    }
85   
86    /**
87    * Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
88    * length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity &gt;
89    * <code>minSimilarity</code>.
90    * <p>
91    * After calling the constructor the enumeration is already pointing to the first
92    * valid term if such a term exists.
93    *
94    * @param reader Delivers terms.
95    * @param term Pattern term.
96    * @param minSimilarity Minimum required similarity for terms from the reader. Default value is 0.5f.
97    * @param prefixLength Length of required common prefix. Default value is 0.
98    * @throws IOException
99    */
 
100  40 toggle public FuzzyTermEnum(IndexReader reader, Term term, final float minSimilarity, final int prefixLength) throws IOException {
101  40 super();
102   
103  40 if (minSimilarity >= 1.0f)
104  0 throw new IllegalArgumentException("minimumSimilarity cannot be greater than or equal to 1");
105  40 else if (minSimilarity < 0.0f)
106  0 throw new IllegalArgumentException("minimumSimilarity cannot be less than 0");
107  40 if(prefixLength < 0)
108  0 throw new IllegalArgumentException("prefixLength cannot be less than 0");
109   
110  40 this.minimumSimilarity = minSimilarity;
111  40 this.scale_factor = 1.0f / (1.0f - minimumSimilarity);
112  40 this.searchTerm = term;
113  40 this.field = searchTerm.field();
114   
115    //The prefix could be longer than the word.
116    //It's kind of silly though. It means we must match the entire word.
117  40 final int fullSearchTermLength = searchTerm.text().length();
118  40 final int realPrefixLength = prefixLength > fullSearchTermLength ? fullSearchTermLength : prefixLength;
119   
120  40 this.text = searchTerm.text().substring(realPrefixLength);
121  40 this.prefix = searchTerm.text().substring(0, realPrefixLength);
122   
123  40 initializeMaxDistances();
124  40 this.d = initDistanceArray();
125   
126  40 setEnum(reader.terms(new Term(searchTerm.field(), prefix)));
127    }
128   
129    /**
130    * The termCompare method in FuzzyTermEnum uses Levenshtein distance to
131    * calculate the distance between the given term and the comparing term.
132    */
 
133  156 toggle protected final boolean termCompare(Term term) {
134  156 if (field == term.field() && term.text().startsWith(prefix)) {
135  139 final String target = term.text().substring(prefix.length());
136  139 this.similarity = similarity(target);
137  139 return (similarity > minimumSimilarity);
138    }
139  17 endEnum = true;
140  17 return false;
141    }
142   
 
143  49 toggle public final float difference() {
144  49 return (float)((similarity - minimumSimilarity) * scale_factor);
145    }
146   
 
147  170 toggle public final boolean endEnum() {
148  170 return endEnum;
149    }
150   
151    /******************************
152    * Compute Levenshtein distance
153    ******************************/
154   
155    /**
156    * Finds and returns the smallest of three integers
157    */
 
158  2920 toggle private static final int min(int a, int b, int c) {
159  2920 final int t = (a < b) ? a : b;
160  2920 return (t < c) ? t : c;
161    }
162   
 
163  40 toggle private final int[][] initDistanceArray(){
164  40 return new int[this.text.length() + 1][TYPICAL_LONGEST_WORD_IN_INDEX]