|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| FuzzyQuery | Line # 29 | 62 | 24 | 71% |
0.71
|
|||||||||||||||||||||||||||||||||||||||||||||||||||||||
| FuzzyQuery.ScoreTerm | Line # 152 | 2 | 1 | 100% |
1.0
|
|||||||||||||||||||||||||||||||||||||||||||||||||||||||
| FuzzyQuery.ScoreTermQueue | Line # 162 | 6 | 3 | 100% |
1.0
|
|||||||||||||||||||||||||||||||||||||||||||||||||||||||
| (8) | |||
| Result | |||
|
0.48672566
|
org.apache.lucene.search.TestFuzzyQuery.testFuzziness
org.apache.lucene.search.TestFuzzyQuery.testFuzziness
|
1 PASS | |
|
0.460177
|
org.apache.lucene.search.TestMultiSearcherRanking.testFuzzyQuery
org.apache.lucene.search.TestMultiSearcherRanking.testFuzzyQuery
|
1 PASS | |
|
0.44247788
|
org.apache.lucene.search.TestFuzzyQuery.testFuzzinessLong
org.apache.lucene.search.TestFuzzyQuery.testFuzzinessLong
|
1 PASS | |
|
0.21238938
|
org.apache.lucene.queryParser.TestQueryParser.testWildcard
org.apache.lucene.queryParser.TestQueryParser.testWildcard
|
1 PASS | |
|
0.19469027
|
org.apache.lucene.queryParser.TestMultiFieldQueryParser.testSimple
org.apache.lucene.queryParser.TestMultiFieldQueryParser.testSimple
|
1 PASS | |
|
0.19469027
|
org.apache.lucene.queryParser.TestQueryParser.testEscaped
org.apache.lucene.queryParser.TestQueryParser.testEscaped
|
1 PASS | |
|
0.19469027
|
org.apache.lucene.queryParser.TestMultiFieldQueryParser.testAnalyzerReturningNull
org.apache.lucene.queryParser.TestMultiFieldQueryParser.testAnalyzerReturningNull
|
1 PASS | |
|
0.15929204
|
org.apache.lucene.search.TestWildcard.testEquals
org.apache.lucene.search.TestWildcard.testEquals
|
1 PASS | |
| 1 | package org.apache.lucene.search; | |
| 2 | ||
| 3 | /** | |
| 4 | * Copyright 2004 The Apache Software Foundation | |
| 5 | * | |
| 6 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
| 7 | * you may not use this file except in compliance with the License. | |
| 8 | * You may obtain a copy of the License at | |
| 9 | * | |
| 10 | * http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 | * | |
| 12 | * Unless required by applicable law or agreed to in writing, software | |
| 13 | * distributed under the License is distributed on an "AS IS" BASIS, | |
| 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 15 | * See the License for the specific language governing permissions and | |
| 16 | * limitations under the License. | |
| 17 | */ | |
| 18 | ||
| 19 | import org.apache.lucene.index.IndexReader; | |
| 20 | import org.apache.lucene.index.Term; | |
| 21 | import org.apache.lucene.util.PriorityQueue; | |
| 22 | import org.apache.lucene.util.ToStringUtils; | |
| 23 | ||
| 24 | import java.io.IOException; | |
| 25 | ||
| 26 | /** Implements the fuzzy search query. The similiarity measurement | |
| 27 | * is based on the Levenshtein (edit distance) algorithm. | |
| 28 | */ | |
| 29 | public class FuzzyQuery extends MultiTermQuery { | |
| 30 | ||
| 31 | public final static float defaultMinSimilarity = 0.5f; | |
| 32 | public final static int defaultPrefixLength = 0; | |
| 33 | ||
| 34 | private float minimumSimilarity; | |
| 35 | private int prefixLength; | |
| 36 | ||
| 37 | /** | |
| 38 | * Create a new FuzzyQuery that will match terms with a similarity | |
| 39 | * of at least <code>minimumSimilarity</code> to <code>term</code>. | |
| 40 | * If a <code>prefixLength</code> > 0 is specified, a common prefix | |
| 41 | * of that length is also required. | |
| 42 | * | |
| 43 | * @param term the term to search for | |
| 44 | * @param minimumSimilarity a value between 0 and 1 to set the required similarity | |
| 45 | * between the query term and the matching terms. For example, for a | |
| 46 | * <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length | |
| 47 | * as the query term is considered similar to the query term if the edit distance | |
| 48 | * between both terms is less than <code>length(term)*0.5</code> | |
| 49 | * @param prefixLength length of common (non-fuzzy) prefix | |
| 50 | * @throws IllegalArgumentException if minimumSimilarity is >= 1 or < 0 | |
| 51 | * or if prefixLength < 0 | |
| 52 | */ | |
| 53 | 63 |
public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength) throws IllegalArgumentException { |
| 54 | 63 | super(term); |
| 55 | ||
| 56 | 63 | if (minimumSimilarity >= 1.0f) |
| 57 | 1 | throw new IllegalArgumentException("minimumSimilarity >= 1"); |
| 58 | 62 | else if (minimumSimilarity < 0.0f) |
| 59 | 1 | throw new IllegalArgumentException("minimumSimilarity < 0"); |
| 60 | 61 | if (prefixLength < 0) |
| 61 | 0 | throw new IllegalArgumentException("prefixLength < 0"); |
| 62 | ||
| 63 | 61 | this.minimumSimilarity = minimumSimilarity; |
| 64 | 61 | this.prefixLength = prefixLength; |
| 65 | } | |
| 66 | ||
| 67 | /** | |
| 68 | * Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0)}. | |
| 69 | */ | |
| 70 | 0 |
public FuzzyQuery(Term term, float minimumSimilarity) throws IllegalArgumentException { |
| 71 | 0 | this(term, minimumSimilarity, defaultPrefixLength); |
| 72 | } | |
| 73 | ||
| 74 | /** | |
| 75 | * Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f, 0)}. | |
| 76 | */ | |
| 77 | 1 |
public FuzzyQuery(Term term) { |
| 78 | 1 | this(term, defaultMinSimilarity, defaultPrefixLength); |
| 79 | } | |
| 80 | ||
| 81 | /** | |
| 82 | * Returns the minimum similarity that is required for this query to match. | |
| 83 | * @return float value between 0.0 and 1.0 | |
| 84 | */ | |
| 85 | 2 |
public float getMinSimilarity() { |
| 86 | 2 | return minimumSimilarity; |
| 87 | } | |
| 88 | ||
| 89 | /** | |
| 90 | * Returns the non-fuzzy prefix length. This is the number of characters at the start | |
| 91 | * of a term that must be identical (not fuzzy) to the query term if the query | |
| 92 | * is to match that term. | |
| 93 | */ | |
| 94 | 2 |
public int getPrefixLength() { |
| 95 | 2 | return prefixLength; |
| 96 | } | |
| 97 | ||
| 98 | 40 |
protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { |
| 99 | 40 | return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength); |
| 100 | } | |
| 101 | ||
| 102 | 40 |
public Query rewrite(IndexReader reader) throws IOException { |
| 103 | 40 | FilteredTermEnum enumerator = getEnum(reader); |
| 104 | 40 | int maxClauseCount = BooleanQuery.getMaxClauseCount(); |
| 105 | 40 | ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount); |
| 106 | ||
| 107 | 40 | try { |
| 108 | 40 | do { |
| 109 | 61 | float minScore = 0.0f; |
| 110 | 61 | float score = 0.0f; |
| 111 | 61 | Term t = enumerator.term(); |
| 112 | 61 | if (t != null) { |
| 113 | 49 | score = enumerator.difference(); |
| 114 | // terms come in alphabetical order, therefore if queue is full and score | |
| 115 | // not bigger than minScore, we can skip | |
| 116 | 49 | if(stQueue.size() < maxClauseCount || score > minScore){ |
| 117 | 49 | stQueue.insert(new ScoreTerm(t, score)); |
| 118 | 49 | minScore = ((ScoreTerm)stQueue.top()).score; // maintain minScore |
| 119 | } | |
| 120 | } | |
| 121 | 61 | } while (enumerator.next()); |
| 122 | } finally { | |
| 123 | 40 | enumerator.close(); |
| 124 | } | |
| 125 | ||
| 126 | 40 | BooleanQuery query = new BooleanQuery(true); |
| 127 | 40 | int size = stQueue.size(); |
| 128 | 89 | for(int i = 0; i < size; i++){ |
| 129 | 49 | ScoreTerm st = (ScoreTerm) stQueue.pop(); |
| 130 | 49 | TermQuery tq = new TermQuery(st.term); // found a match |
| 131 | 49 | tq.setBoost(getBoost() * st.score); // set the boost |
| 132 | 49 | query.add(tq, BooleanClause.Occur.SHOULD); // add to query |
| 133 | } | |
| 134 | ||
| 135 | 40 | return query; |
| 136 | } | |
| 137 | ||