Clover Coverage Report
Coverage timestamp: Fri May 9 2008 10:54:27 EST
../../../../img/srcFileCovDistChart0.png 86% of files have more coverage
13   85   4   3.25
0   33   0.31   4
4     1  
1    
 
  HTMLDocument       Line # 25 13 4 0% 0.0
 
No Tests
 
1    package org.apache.lucene.demo;
2   
3    /**
4    * Copyright 2004 The Apache Software Foundation
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10    * http://www.apache.org/licenses/LICENSE-2.0
11    *
12    * Unless required by applicable law or agreed to in writing, software
13    * distributed under the License is distributed on an "AS IS" BASIS,
14    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15    * See the License for the specific language governing permissions and
16    * limitations under the License.
17    */
18   
19    import java.io.*;
20    import org.apache.lucene.document.*;
21    import org.apache.lucene.demo.html.HTMLParser;
22   
23    /** A utility for making Lucene Documents for HTML documents. */
24   
 
25    public class HTMLDocument {
26    static char dirSep = System.getProperty("file.separator").charAt(0);
27   
 
28  0 toggle public static String uid(File f) {
29    // Append path and date into a string in such a way that lexicographic
30    // sorting gives the same results as a walk of the file hierarchy. Thus
31    // null (\u0000) is used both to separate directory components and to
32    // separate the path from the date.
33  0 return f.getPath().replace(dirSep, '\u0000') +
34    "\u0000" +
35    DateTools.timeToString(f.lastModified(), DateTools.Resolution.SECOND);
36    }
37   
 
38  0 toggle public static String uid2url(String uid) {
39  0 String url = uid.replace('\u0000', '/'); // replace nulls with slashes
40  0 return url.substring(0, url.lastIndexOf('/')); // remove date from end
41    }
42   
 
43  0 toggle public static Document Document(File f)
44    throws IOException, InterruptedException {
45    // make a new, empty document
46  0 Document doc = new Document();
47   
48    // Add the url as a field named "path". Use a field that is
49    // indexed (i.e. searchable), but don't tokenize the field into words.
50  0 doc.add(new Field("path", f.getPath().replace(dirSep, '/'), Field.Store.YES,
51    Field.Index.UN_TOKENIZED));
52   
53    // Add the last modified date of the file a field named "modified".
54    // Use a field that is indexed (i.e. searchable), but don't tokenize
55    // the field into words.
56  0 doc.add(new Field("modified",
57    DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE),
58    Field.Store.YES, Field.Index.UN_TOKENIZED));
59   
60    // Add the uid as a field, so that index can be incrementally maintained.
61    // This field is not stored with document, it is indexed, but it is not
62    // tokenized prior to indexing.
63  0 doc.add(new Field("uid", uid(f), Field.Store.NO, Field.Index.UN_TOKENIZED));
64   
65  0 FileInputStream fis = new FileInputStream(f);
66  0 HTMLParser parser = new HTMLParser(fis);
67   
68    // Add the tag-stripped contents as a Reader-valued Text field so it will
69    // get tokenized and indexed.
70  0 doc.add(new Field("contents", parser.getReader()));
71   
72    // Add the summary as a field that is stored and returned with
73    // hit documents for display.
74  0 doc.add(new Field("summary", parser.getSummary(), Field.Store.YES, Field.Index.NO));
75   
76    // Add the title as a field that it can be searched and that is stored.
77  0 doc.add(new Field("title", parser.getTitle(), Field.Store.YES, Field.Index.TOKENIZED));
78   
79    // return the document
80  0 return doc;
81    }
82   
 
83  0 toggle private HTMLDocument() {}
84    }
85