|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| PorterStemFilter | Line # 39 | 9 | 4 | 0% |
0.0
|
|||||||||||||||||||||||||||||||||||||||||||||||||||||||
| No Tests | |||
| 1 | package org.apache.lucene.analysis; | |
| 2 | ||
| 3 | /** | |
| 4 | * Copyright 2004 The Apache Software Foundation | |
| 5 | * | |
| 6 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
| 7 | * you may not use this file except in compliance with the License. | |
| 8 | * You may obtain a copy of the License at | |
| 9 | * | |
| 10 | * http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 | * | |
| 12 | * Unless required by applicable law or agreed to in writing, software | |
| 13 | * distributed under the License is distributed on an "AS IS" BASIS, | |
| 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 15 | * See the License for the specific language governing permissions and | |
| 16 | * limitations under the License. | |
| 17 | */ | |
| 18 | ||
| 19 | import java.io.IOException; | |
| 20 | ||
| 21 | /** Transforms the token stream as per the Porter stemming algorithm. | |
| 22 | Note: the input to the stemming filter must already be in lower case, | |
| 23 | so you will need to use LowerCaseFilter or LowerCaseTokenizer farther | |
| 24 | down the Tokenizer chain in order for this to work properly! | |
| 25 | <P> | |
| 26 | To use this filter with other analyzers, you'll want to write an | |
| 27 | Analyzer class that sets up the TokenStream chain as you want it. | |
| 28 | To use this with LowerCaseTokenizer, for example, you'd write an | |
| 29 | analyzer like this: | |
| 30 | <P> | |
| 31 | <PRE> | |
| 32 | class MyAnalyzer extends Analyzer { | |
| 33 | public final TokenStream tokenStream(String fieldName, Reader reader) { | |
| 34 | return new PorterStemFilter(new LowerCaseTokenizer(reader)); | |
| 35 | } | |
| 36 | } | |
| 37 | </PRE> | |
| 38 | */ | |
| 39 | public final class PorterStemFilter extends TokenFilter { | |
| 40 | private PorterStemmer stemmer; | |
| 41 | ||
| 42 | 0 |
public PorterStemFilter(TokenStream in) { |
| 43 | 0 | super(in); |
| 44 | 0 | stemmer = new PorterStemmer(); |
| 45 | } | |
| 46 | ||
| 47 | /** Returns the next input Token, after being stemmed */ | |
| 48 | 0 |
public final Token next() throws IOException { |
| 49 | 0 | Token token = input.next(); |
| 50 | 0 | if (token == null) |
| 51 | 0 | return null; |
| 52 | else { | |
| 53 | 0 | String s = stemmer.stem(token.termText); |
| 54 | 0 | if (s != token.termText) // Yes, I mean object reference comparison here |
| 55 | 0 | token.termText = s; |
| 56 | 0 | return token; |
| 57 | } | |
| 58 | } | |
| 59 | } | |
|
||||||||||