| 1 |
|
package org.apache.lucene; |
| 2 |
|
|
| 3 |
|
|
| 4 |
|
|
| 5 |
|
|
| 6 |
|
|
| 7 |
|
|
| 8 |
|
|
| 9 |
|
|
| 10 |
|
|
| 11 |
|
|
| 12 |
|
|
| 13 |
|
|
| 14 |
|
|
| 15 |
|
|
| 16 |
|
|
| 17 |
|
|
| 18 |
|
|
| 19 |
|
import org.apache.lucene.analysis.SimpleAnalyzer; |
| 20 |
|
import org.apache.lucene.analysis.Analyzer; |
| 21 |
|
import org.apache.lucene.analysis.TokenStream; |
| 22 |
|
import org.apache.lucene.analysis.Token; |
| 23 |
|
|
| 24 |
|
import java.io.Reader; |
| 25 |
|
import java.io.StringReader; |
| 26 |
|
import java.io.File; |
| 27 |
|
import java.io.FileInputStream; |
| 28 |
|
import java.io.BufferedReader; |
| 29 |
|
import java.io.InputStreamReader; |
| 30 |
|
import java.util.Date; |
| 31 |
|
|
|
|
|
| 0% |
Uncovered Elements: 33 (33) |
Complexity: 7 |
Complexity Density: 0.28 |
|
| 32 |
|
class AnalysisTest { |
|
|
|
| 0% |
Uncovered Elements: 4 (4) |
Complexity: 2 |
Complexity Density: 0.5 |
|
| 33 |
0
|
public static void main(String[] args) {... |
| 34 |
0
|
try { |
| 35 |
0
|
test("This is a test", true); |
| 36 |
|
|
| 37 |
0
|
test(new File("words.txt"), false); |
| 38 |
|
} catch (Exception e) { |
| 39 |
0
|
System.out.println(" caught a " + e.getClass() + |
| 40 |
|
"\n with message: " + e.getMessage()); |
| 41 |
|
} |
| 42 |
|
} |
| 43 |
|
|
|
|
|
| 0% |
Uncovered Elements: 6 (6) |
Complexity: 1 |
Complexity Density: 0.17 |
|
| 44 |
0
|
static void test(File file, boolean verbose)... |
| 45 |
|
throws Exception { |
| 46 |
0
|
long bytes = file.length(); |
| 47 |
0
|
System.out.println(" Reading test file containing " + bytes + " bytes."); |
| 48 |
|
|
| 49 |
0
|
FileInputStream is = new FileInputStream(file); |
| 50 |
0
|
BufferedReader ir = new BufferedReader(new InputStreamReader(is)); |
| 51 |
|
|
| 52 |
0
|
test(ir, verbose, bytes); |
| 53 |
|
|
| 54 |
0
|
ir.close(); |
| 55 |
|
} |
| 56 |
|
|
|
|
|
| 0% |
Uncovered Elements: 2 (2) |
Complexity: 1 |
Complexity Density: 0.5 |
|
| 57 |
0
|
static void test(String text, boolean verbose) throws Exception {... |
| 58 |
0
|
System.out.println(" Tokenizing string: " + text); |
| 59 |
0
|
test(new StringReader(text), verbose, text.length()); |
| 60 |
|
} |
| 61 |
|
|
|
|
|
| 0% |
Uncovered Elements: 17 (17) |
Complexity: 3 |
Complexity Density: 0.23 |
|
| 62 |
0
|
static void test(Reader reader, boolean verbose, long bytes)... |
| 63 |
|
throws Exception { |
| 64 |
0
|
Analyzer analyzer = new SimpleAnalyzer(); |
| 65 |
0
|
TokenStream stream = analyzer.tokenStream(null, reader); |
| 66 |
|
|
| 67 |
0
|
Date start = new Date(); |
| 68 |
|
|
| 69 |
0
|
int count = 0; |
| 70 |
0
|
for (Token t = stream.next(); t!=null; t = stream.next()) { |
| 71 |
0
|
if (verbose) { |
| 72 |
0
|
System.out.println("Text=" + t.termText() |
| 73 |
|
+ " start=" + t.startOffset() |
| 74 |
|
+ " end=" + t.endOffset()); |
| 75 |
|
} |
| 76 |
0
|
count++; |
| 77 |
|
} |
| 78 |
|
|
| 79 |
0
|
Date end = new Date(); |
| 80 |
|
|
| 81 |
0
|
long time = end.getTime() - start.getTime(); |
| 82 |
0
|
System.out.println(time + " milliseconds to extract " + count + " tokens"); |
| 83 |
0
|
System.out.println((time*1000.0)/count + " microseconds/token"); |
| 84 |
0
|
System.out.println((bytes * 1000.0 * 60.0 * 60.0)/(time * 1000000.0) |
| 85 |
|
+ " megabytes/hour"); |
| 86 |
|
} |
| 87 |
|
} |