|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| LengthFilter | Line # 27 | 8 | 5 | 100% |
1.0
|
|||||||||||||||||||||||||||||||||||||||||||||||||||||||
| (1) | |||
| Result | |||
|
1.0
|
org.apache.lucene.analysis.TestLengthFilter.testFilter
org.apache.lucene.analysis.TestLengthFilter.testFilter
|
1 PASS | |
| 1 | package org.apache.lucene.analysis; | |
| 2 | ||
| 3 | /** | |
| 4 | * Copyright 2004 The Apache Software Foundation | |
| 5 | * | |
| 6 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
| 7 | * you may not use this file except in compliance with the License. | |
| 8 | * You may obtain a copy of the License at | |
| 9 | * | |
| 10 | * http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 | * | |
| 12 | * Unless required by applicable law or agreed to in writing, software | |
| 13 | * distributed under the License is distributed on an "AS IS" BASIS, | |
| 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 15 | * See the License for the specific language governing permissions and | |
| 16 | * limitations under the License. | |
| 17 | */ | |
| 18 | ||
| 19 | import java.io.IOException; | |
| 20 | ||
| 21 | /** | |
| 22 | * Removes words that are too long and too short from the stream. | |
| 23 | * | |
| 24 | * @author David Spencer | |
| 25 | * @version $Id: LengthFilter.java 347992 2005-11-21 21:41:43Z dnaber $ | |
| 26 | */ | |
| 27 | public final class LengthFilter extends TokenFilter { | |
| 28 | ||
| 29 | final int min; | |
| 30 | final int max; | |
| 31 | ||
| 32 | /** | |
| 33 | * Build a filter that removes words that are too long or too | |
| 34 | * short from the text. | |
| 35 | */ | |
| 36 | 1 |
public LengthFilter(TokenStream in, int min, int max) |
| 37 | { | |
| 38 | 1 | super(in); |
| 39 | 1 | this.min = min; |
| 40 | 1 | this.max = max; |
| 41 | } | |
| 42 | ||
| 43 | /** | |
| 44 | * Returns the next input Token whose termText() is the right len | |
| 45 | */ | |
| 46 | 4 |
public final Token next() throws IOException |
| 47 | { | |
| 48 | // return the first non-stop word found | |
| 49 | 8 | for (Token token = input.next(); token != null; token = input.next()) |
| 50 | { | |
| 51 | 7 | int len = token.termText().length(); |
| 52 | 7 | if (len >= min && len <= max) { |
| 53 | 3 | return token; |
| 54 | } | |
| 55 | // note: else we ignore it but should we index each part of it? | |
| 56 | } | |
| 57 | // reached EOS -- return null | |
| 58 | 1 | return null; |
| 59 | } | |
| 60 | } | |
|
||||||||||