Clover Coverage Report
Coverage timestamp: Fri May 9 2008 10:54:27 EST
../../../../img/srcFileCovDistChart0.png 86% of files have more coverage
350   546   177   12.96
194   340   0.51   27
27     6.56  
1    
 
  PorterStemmer       Line # 57 350 177 0% 0.0
 
No Tests
 
1    package org.apache.lucene.analysis;
2   
3    /**
4    * Copyright 2004 The Apache Software Foundation
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10    * http://www.apache.org/licenses/LICENSE-2.0
11    *
12    * Unless required by applicable law or agreed to in writing, software
13    * distributed under the License is distributed on an "AS IS" BASIS,
14    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15    * See the License for the specific language governing permissions and
16    * limitations under the License.
17    */
18   
19    /*
20   
21    Porter stemmer in Java. The original paper is in
22   
23    Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
24    no. 3, pp 130-137,
25   
26    See also http://www.tartarus.org/~martin/PorterStemmer/index.html
27   
28    Bug 1 (reported by Gonzalo Parra 16/10/99) fixed as marked below.
29    Tthe words 'aed', 'eed', 'oed' leave k at 'a' for step 3, and b[k-1]
30    is then out outside the bounds of b.
31   
32    Similarly,
33   
34    Bug 2 (reported by Steve Dyrdahl 22/2/00) fixed as marked below.
35    'ion' by itself leaves j = -1 in the test for 'ion' in step 5, and
36    b[j] is then outside the bounds of b.
37   
38    Release 3.
39   
40    [ This version is derived from Release 3, modified by Brian Goetz to
41    optimize for fewer object creations. ]
42   
43    */
44   
45   
46    import java.io.*;
47   
48    /**
49    *
50    * Stemmer, implementing the Porter Stemming Algorithm
51    *
52    * The Stemmer class transforms a word into its root form. The input
53    * word can be provided a character at time (by calling add()), or at once
54    * by calling one of the various stem(something) methods.
55    */
56   
 
57    class PorterStemmer
58    {
59    private char[] b;
60    private int i, /* offset into b */
61    j, k, k0;
62    private boolean dirty = false;
63    private static final int INC = 50; /* unit of size whereby b is increased */
64    private static final int EXTRA = 1;
65   
 
66  0 toggle public PorterStemmer() {
67  0 b = new char[INC];
68  0 i = 0;
69    }
70   
71    /**
72    * reset() resets the stemmer so it can stem another word. If you invoke
73    * the stemmer by calling add(char) and then stem(), you must call reset()
74    * before starting another word.
75    */
 
76  0 toggle public void reset() { i = 0; dirty = false; }
77   
78    /**
79    * Add a character to the word being stemmed. When you are finished
80    * adding characters, you can call stem(void) to process the word.
81    */
 
82  0 toggle public void add(char ch) {
83  0 if (b.length <= i + EXTRA) {
84  0 char[] new_b = new char[b.length+INC];
85  0 for (int c = 0; c < b.length; c++)
86  0 new_b[c] = b[c];
87  0 b = new_b;
88    }
89  0 b[i++] = ch;
90    }
91   
92    /**
93    * After a word has been stemmed, it can be retrieved by toString(),
94    * or a reference to the internal buffer can be retrieved by getResultBuffer
95    * and getResultLength (which is generally more efficient.)
96    */
 
97  0 toggle public String toString() { return new String(b,0,i); }
98   
99    /**
100    * Returns the length of the word resulting from the stemming process.
101    */
 
102  0 toggle public int getResultLength() { return i; }
103   
104    /**
105    * Returns a reference to a character buffer containing the results of
106    * the stemming process. You also need to consult getResultLength()
107    * to determine the length of the result.
108    */
 
109  0 toggle public char[] getResultBuffer() { return b; }
110   
111    /* cons(i) is true <=> b[i] is a consonant. */
112   
 
113  0 toggle private final boolean cons(int i) {
114  0 switch (b[i]) {
115  0 case 'a': case 'e': case 'i': case 'o': case 'u':
116  0 return false;
117  0 case 'y':
118  0 return (i==k0) ? true : !cons(i-1);
119  0 default:
120  0 return true;
121    }
122    }
123   
124    /* m() measures the number of consonant sequences between k0 and j. if c is
125    a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
126    presence,
127   
128    <c><v> gives 0
129    <c>vc<v> gives 1
130    <c>vcvc<v> gives 2
131    <c>vcvcvc<v> gives 3
132    ....
133    */
134   
 
135  0 toggle private final int m() {
136  0 int n = 0;
137  0 int i = k0;
138  0 while(true) {
139  0 if (i > j)
140  0 return n;
141  0 if (! cons(i))
142  0 break;
143  0 i++;
144    }
145  0 i++;
146  0 while(true) {
147  0 while(true) {
148  0 if (i > j)
149  0 return n;
150  0 if (cons(i))
151  0 break;
152  0 i++;
153    }
154