Clover Coverage Report
Coverage timestamp: Fri May 9 2008 10:54:27 EST
../../../../img/srcFileCovDistChart8.png 55% of files have more coverage
70   201   28   5.38
30   117   0.4   4.33
13     2.15  
3    
 
  FuzzyQuery       Line # 29 62 24 71% 0.71
  FuzzyQuery.ScoreTerm       Line # 152 2 1 100% 1.0
  FuzzyQuery.ScoreTermQueue       Line # 162 6 3 100% 1.0
 
  (8)
 
1    package org.apache.lucene.search;
2   
3    /**
4    * Copyright 2004 The Apache Software Foundation
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10    * http://www.apache.org/licenses/LICENSE-2.0
11    *
12    * Unless required by applicable law or agreed to in writing, software
13    * distributed under the License is distributed on an "AS IS" BASIS,
14    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15    * See the License for the specific language governing permissions and
16    * limitations under the License.
17    */
18   
19    import org.apache.lucene.index.IndexReader;
20    import org.apache.lucene.index.Term;
21    import org.apache.lucene.util.PriorityQueue;
22    import org.apache.lucene.util.ToStringUtils;
23   
24    import java.io.IOException;
25   
26    /** Implements the fuzzy search query. The similiarity measurement
27    * is based on the Levenshtein (edit distance) algorithm.
28    */
 
29    public class FuzzyQuery extends MultiTermQuery {
30   
31    public final static float defaultMinSimilarity = 0.5f;
32    public final static int defaultPrefixLength = 0;
33   
34    private float minimumSimilarity;
35    private int prefixLength;
36   
37    /**
38    * Create a new FuzzyQuery that will match terms with a similarity
39    * of at least <code>minimumSimilarity</code> to <code>term</code>.
40    * If a <code>prefixLength</code> &gt; 0 is specified, a common prefix
41    * of that length is also required.
42    *
43    * @param term the term to search for
44    * @param minimumSimilarity a value between 0 and 1 to set the required similarity
45    * between the query term and the matching terms. For example, for a
46    * <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length
47    * as the query term is considered similar to the query term if the edit distance
48    * between both terms is less than <code>length(term)*0.5</code>
49    * @param prefixLength length of common (non-fuzzy) prefix
50    * @throws IllegalArgumentException if minimumSimilarity is &gt;= 1 or &lt; 0
51    * or if prefixLength &lt; 0
52    */
 
53  63 toggle public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength) throws IllegalArgumentException {
54  63 super(term);
55   
56  63 if (minimumSimilarity >= 1.0f)
57  1 throw new IllegalArgumentException("minimumSimilarity >= 1");
58  62 else if (minimumSimilarity < 0.0f)
59  1 throw new IllegalArgumentException("minimumSimilarity < 0");
60  61 if (prefixLength < 0)
61  0 throw new IllegalArgumentException("prefixLength < 0");
62   
63  61 this.minimumSimilarity = minimumSimilarity;
64  61 this.prefixLength = prefixLength;
65    }
66   
67    /**
68    * Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0)}.
69    */
 
70  0 toggle public FuzzyQuery(Term term, float minimumSimilarity) throws IllegalArgumentException {
71  0 this(term, minimumSimilarity, defaultPrefixLength);
72    }
73   
74    /**
75    * Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f, 0)}.
76    */
 
77  1 toggle public FuzzyQuery(Term term) {
78  1 this(term, defaultMinSimilarity, defaultPrefixLength);
79    }
80   
81    /**
82    * Returns the minimum similarity that is required for this query to match.
83    * @return float value between 0.0 and 1.0
84    */
 
85  2 toggle public float getMinSimilarity() {
86  2 return minimumSimilarity;
87    }
88   
89    /**
90    * Returns the non-fuzzy prefix length. This is the number of characters at the start
91    * of a term that must be identical (not fuzzy) to the query term if the query
92    * is to match that term.
93    */
 
94  2 toggle public int getPrefixLength() {
95  2 return prefixLength;
96    }
97   
 
98  40 toggle protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
99  40 return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength);
100    }
101   
 
102  40 toggle public Query rewrite(IndexReader reader) throws IOException {
103  40 FilteredTermEnum enumerator = getEnum(reader);
104  40 int maxClauseCount = BooleanQuery.getMaxClauseCount();
105  40 ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount);
106   
107  40 try {
108  40 do {
109  61 float minScore = 0.0f;
110  61 float score = 0.0f;
111  61 Term t = enumerator.term();
112  61 if (t != null) {
113  49 score = enumerator.difference();
114    // terms come in alphabetical order, therefore if queue is full and score
115    // not bigger than minScore, we can skip
116  49 if(stQueue.size() < maxClauseCount || score > minScore){
117  49 stQueue.insert(new ScoreTerm(t, score));
118  49 minScore = ((ScoreTerm)stQueue.top()).score; // maintain minScore
119    }
120    }
121  61 } while (enumerator.next());
122    } finally {
123  40 enumerator.close();
124    }
125   
126  40 BooleanQuery query = new BooleanQuery(true);
127  40 int size = stQueue.size();
128  89 for(int i = 0; i < size; i++){
129  49 ScoreTerm st = (ScoreTerm) stQueue.pop();
130  49 TermQuery tq = new TermQuery(st.term); // found a match
131  49 tq.setBoost(getBoost() * st.score); // set the boost
132  49 query.add(tq, BooleanClause.Occur.SHOULD); // add to query
133    }
134   
135  40 return query;
136    }
137