Merge pull request #496 from nicolas-raoul/unit-test-for-issue2907
[mono.git] / mcs / tools / monkeydoc / Monkeydoc / SearchableIndex.cs
1 //
2 //
3 // SearchableIndex.cs: Index that uses Lucene to search through the docs 
4 //
5 // Author: Mario Sopena
6 //
7
8 using System;
9 using System.IO;
10 using System.Collections;
11 // Lucene imports
12 using Lucene.Net.Index;
13 using Lucene.Net.Documents;
14 using Lucene.Net.Analysis;
15 using Lucene.Net.Analysis.Standard;
16 using Lucene.Net.Search;
17 using Lucene.Net.QueryParsers;
18 using Lucene.Net.Store;
19
20 namespace MonkeyDoc
21 {
22         public class SearchableIndex 
23         {
24                 const int maxSearchCount = 30;
25
26                 IndexSearcher searcher;
27                 string dir;
28                 public string Dir {
29                         get { 
30                                 if (dir == null) dir = "search_index";
31                                 return dir;
32                         }
33                         set { dir = value; }
34                 }
35                 public ArrayList Results;
36         
37                 public static SearchableIndex Load (string dir) {
38                         SearchableIndex s = new SearchableIndex ();
39                         s.dir = dir;
40                         s.Results = new ArrayList (20);
41                         try {
42                                 //s.searcher = new IndexSearcher (dir);
43                                 // TODO: parametrize that depending if we run on the desktop (low footprint) or the server (use RAMDirectory for instance)
44                                 s.searcher = new IndexSearcher (FSDirectory.Open (dir));
45                         } catch (IOException) {
46                                 Console.WriteLine ("Index nonexistent or in bad format");
47                                 return null;
48                         }
49                         return s;
50                 }
51                 
52                 //
53                 // Search the index with term
54                 //
55
56                 public Result Search (string term)
57                 {
58                         return Search (term, maxSearchCount);
59                 }
60
61                 public Result Search (string term, int count)
62                 {
63                         return Search (term, count, 0);
64                 }
65
66                 public Result Search (string term, int count, int start) {
67                         try {
68                                 term = term.ToLower ();
69                                 Term htTerm = new Term ("hottext", term);
70                                 Query qq1 = new FuzzyQuery (htTerm);
71                                 Query qq2 = new TermQuery (htTerm);
72                                 qq2.Boost = 10f;
73                                 Query qq3 = new PrefixQuery (htTerm);
74                                 qq3.Boost = 10f;
75                                 DisjunctionMaxQuery q1 = new DisjunctionMaxQuery (0f);
76                                 q1.Add (qq1);
77                                 q1.Add (qq2);
78                                 q1.Add (qq3);
79                                 Query q2 = new TermQuery (new Term ("text", term));
80                                 q2.Boost = 3f;
81                                 Query q3 = new TermQuery (new Term ("examples", term));
82                                 q3.Boost = 3f;
83                                 DisjunctionMaxQuery q = new DisjunctionMaxQuery (0f);
84
85                                 q.Add (q1);
86                                 q.Add (q2);
87                                 q.Add (q3);
88                         
89                                 TopDocs top = SearchInternal (q, count, start);
90                                 Result r = new Result (term, searcher, top.ScoreDocs);
91                                 Results.Add (r);
92                                 return r;
93                         } catch (IOException) {
94                                 Console.WriteLine ("No index in {0}", dir);
95                                 return null;
96                         }
97                 }
98
99                 TopDocs SearchInternal (Query q, int count, int start)
100                 {
101                         // Easy path that doesn't involve creating a Collector ourselves
102                         // watch for Lucene.NET improvement on that (like searcher.SearchAfter)
103                         if (start == 0)
104                                 return searcher.Search (q, count);
105
106                         var weight = searcher.CreateWeight (q); // TODO: reuse weight instead of query
107                         var collector = TopScoreDocCollector.Create (start + count + 1, false);
108                         searcher.Search (q, collector);
109
110                         return collector.TopDocs (start, count);
111                 }
112
113                 public Result FastSearch (string term, int number)
114                 {
115                         try {
116                                 term = term.ToLower ();
117                                 Query q1 = new TermQuery (new Term ("hottext", term));
118                                 Query q2 = new PrefixQuery (new Term ("hottext", term));
119                                 q2.Boost = 0.5f;
120                                 DisjunctionMaxQuery q = new DisjunctionMaxQuery (0f);
121                                 q.Add (q1);
122                                 q.Add (q2);
123                                 TopDocs top = searcher.Search (q, number);
124                                 return new Result (term, searcher, top.ScoreDocs);
125                         } catch (IOException) {
126                                 Console.WriteLine ("No index in {0}", dir);
127                                 return null;
128                         }
129                 }
130         
131                 Query Parse (string term, string field, bool fuzzy)
132                 {
133                         QueryParser parser = new QueryParser (Lucene.Net.Util.Version.LUCENE_CURRENT,
134                                                               field,
135                                                               new StandardAnalyzer (Lucene.Net.Util.Version.LUCENE_CURRENT));
136                         return parser.Parse (term);
137                 }
138         }
139         //
140         // An object representing the search term with the results
141         // 
142         public class Result {
143                 string term;
144                 Searcher searcher;
145                 ScoreDoc[] docs;
146
147                 public string Term {
148                         get { return term;}
149                 }
150
151                 public int Count {
152                         get { return docs.Length; }
153                 }
154
155                 public Document this [int i] {
156                         get { return searcher.Doc (docs[i].Doc); }
157                 }
158         
159                 public string GetTitle (int i) 
160                 {
161                         Document d = this[i];
162                         return d == null ? string.Empty : d.Get ("title");
163                 }
164
165                 public string GetUrl (int i)
166                 {
167                         Document d = this[i];
168                         return d == null ? string.Empty : d.Get ("url");
169                 }
170
171                 public string GetFullTitle (int i)
172                 {
173                         Document d = this[i];
174                         return d == null ? string.Empty : d.Get ("fulltitle");
175                 }
176
177                 public float Score (int i)
178                 {
179                         return docs[i].Score;
180                 }
181
182                 public Result (string Term, Searcher searcher, ScoreDoc[] docs) 
183                 {
184                         this.term = Term;
185                         this.searcher = searcher;
186                         this.docs = docs;
187                 }
188         }
189 }
190