Merge pull request #409 from Alkarex/patch-1
[mono.git] / mcs / tools / monkeydoc / Lucene.Net / Lucene.Net / Search / MultiSearcher.cs
1 /* 
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  * 
9  * http://www.apache.org/licenses/LICENSE-2.0
10  * 
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 using System;
19
20 using Document = Mono.Lucene.Net.Documents.Document;
21 using FieldSelector = Mono.Lucene.Net.Documents.FieldSelector;
22 using CorruptIndexException = Mono.Lucene.Net.Index.CorruptIndexException;
23 using IndexReader = Mono.Lucene.Net.Index.IndexReader;
24 using Term = Mono.Lucene.Net.Index.Term;
25 using ReaderUtil = Mono.Lucene.Net.Util.ReaderUtil;
26
27 namespace Mono.Lucene.Net.Search
28 {
29         
30         /// <summary>Implements search over a set of <code>Searchables</code>.
31         /// 
32         /// <p/>Applications usually need only call the inherited {@link #Search(Query)}
33         /// or {@link #Search(Query,Filter)} methods.
34         /// </summary>
35         public class MultiSearcher:Searcher
36         {
37                 private class AnonymousClassCollector:Collector
38                 {
39                         public AnonymousClassCollector(Mono.Lucene.Net.Search.Collector collector, int start, MultiSearcher enclosingInstance)
40                         {
41                                 InitBlock(collector, start, enclosingInstance);
42                         }
43                         private void  InitBlock(Mono.Lucene.Net.Search.Collector collector, int start, MultiSearcher enclosingInstance)
44                         {
45                                 this.collector = collector;
46                                 this.start = start;
47                                 this.enclosingInstance = enclosingInstance;
48                         }
49                         private Mono.Lucene.Net.Search.Collector collector;
50                         private int start;
51                         private MultiSearcher enclosingInstance;
52                         public MultiSearcher Enclosing_Instance
53                         {
54                                 get
55                                 {
56                                         return enclosingInstance;
57                                 }
58                                 
59                         }
60                         public override void  SetScorer(Scorer scorer)
61                         {
62                                 collector.SetScorer(scorer);
63                         }
64                         public override void  Collect(int doc)
65                         {
66                                 collector.Collect(doc);
67                         }
68                         public override void  SetNextReader(IndexReader reader, int docBase)
69                         {
70                                 collector.SetNextReader(reader, start + docBase);
71                         }
72                         public override bool AcceptsDocsOutOfOrder()
73                         {
74                                 return collector.AcceptsDocsOutOfOrder();
75                         }
76                 }
77                 
78                 /// <summary> Document Frequency cache acting as a Dummy-Searcher. This class is no
79                 /// full-fledged Searcher, but only supports the methods necessary to
80                 /// initialize Weights.
81                 /// </summary>
82                 private class CachedDfSource:Searcher
83                 {
84                         private System.Collections.IDictionary dfMap; // Map from Terms to corresponding doc freqs
85                         private int maxDoc; // document count
86                         
87                         public CachedDfSource(System.Collections.IDictionary dfMap, int maxDoc, Similarity similarity)
88                         {
89                                 this.dfMap = dfMap;
90                                 this.maxDoc = maxDoc;
91                                 SetSimilarity(similarity);
92                         }
93                         
94                         public override int DocFreq(Term term)
95                         {
96                                 int df;
97                                 try
98                                 {
99                                         df = ((System.Int32) dfMap[term]);
100                                 }
101                                 catch (System.NullReferenceException e)
102                                 {
103                                         throw new System.ArgumentException("df for term " + term.Text() + " not available");
104                                 }
105                                 return df;
106                         }
107                         
108                         public override int[] DocFreqs(Term[] terms)
109                         {
110                                 int[] result = new int[terms.Length];
111                                 for (int i = 0; i < terms.Length; i++)
112                                 {
113                                         result[i] = DocFreq(terms[i]);
114                                 }
115                                 return result;
116                         }
117                         
118                         public override int MaxDoc()
119                         {
120                                 return maxDoc;
121                         }
122                         
123                         public override Query Rewrite(Query query)
124                         {
125                                 // this is a bit of a hack. We know that a query which
126                                 // creates a Weight based on this Dummy-Searcher is
127                                 // always already rewritten (see preparedWeight()).
128                                 // Therefore we just return the unmodified query here
129                                 return query;
130                         }
131                         
132                         public override void  Close()
133                         {
134                                 throw new System.NotSupportedException();
135                         }
136
137             /// <summary>
138             /// .NET
139             /// </summary>
140             public override void Dispose()
141             {
142                 Close();
143             }
144                         
145                         public override Document Doc(int i)
146                         {
147                                 throw new System.NotSupportedException();
148                         }
149                         
150                         public override Document Doc(int i, FieldSelector fieldSelector)
151                         {
152                                 throw new System.NotSupportedException();
153                         }
154                         
155                         public override Explanation Explain(Weight weight, int doc)
156                         {
157                                 throw new System.NotSupportedException();
158                         }
159                         
160                         public override void  Search(Weight weight, Filter filter, Collector results)
161                         {
162                                 throw new System.NotSupportedException();
163                         }
164                         
165                         public override TopDocs Search(Weight weight, Filter filter, int n)
166                         {
167                                 throw new System.NotSupportedException();
168                         }
169                         
170                         public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
171                         {
172                                 throw new System.NotSupportedException();
173                         }
174                 }
175                 
176                 private Searchable[] searchables;
177                 private int[] starts;
178                 private int maxDoc = 0;
179                 
180                 /// <summary>Creates a searcher which searches <i>searchers</i>. </summary>
181                 public MultiSearcher(Searchable[] searchables)
182                 {
183                         this.searchables = searchables;
184                         
185                         starts = new int[searchables.Length + 1]; // build starts array
186                         for (int i = 0; i < searchables.Length; i++)
187                         {
188                                 starts[i] = maxDoc;
189                                 maxDoc += searchables[i].MaxDoc(); // compute maxDocs
190                         }
191                         starts[searchables.Length] = maxDoc;
192                 }
193                 
194                 /// <summary>Return the array of {@link Searchable}s this searches. </summary>
195                 public virtual Searchable[] GetSearchables()
196                 {
197                         return searchables;
198                 }
199                 
200                 protected internal virtual int[] GetStarts()
201                 {
202                         return starts;
203                 }
204                 
205                 // inherit javadoc
206                 public override void  Close()
207                 {
208                         for (int i = 0; i < searchables.Length; i++)
209                                 searchables[i].Close();
210                 }
211
212         /// <summary>
213         /// .NET
214         /// </summary>
215         public override void Dispose()
216         {
217             Close();
218         }
219
220                 public override int DocFreq(Term term)
221                 {
222                         int docFreq = 0;
223                         for (int i = 0; i < searchables.Length; i++)
224                                 docFreq += searchables[i].DocFreq(term);
225                         return docFreq;
226                 }
227                 
228                 // inherit javadoc
229                 public override Document Doc(int n)
230                 {
231                         int i = SubSearcher(n); // find searcher index
232                         return searchables[i].Doc(n - starts[i]); // dispatch to searcher
233                 }
234                 
235                 // inherit javadoc
236                 public override Document Doc(int n, FieldSelector fieldSelector)
237                 {
238                         int i = SubSearcher(n); // find searcher index
239                         return searchables[i].Doc(n - starts[i], fieldSelector); // dispatch to searcher
240                 }
241                 
242                 /// <summary>Returns index of the searcher for document <code>n</code> in the array
243                 /// used to construct this searcher. 
244                 /// </summary>
245                 public virtual int SubSearcher(int n)
246                 {
247                         // find searcher for doc n:
248                         return ReaderUtil.SubIndex(n, starts);
249                 }
250                 
251                 /// <summary>Returns the document number of document <code>n</code> within its
252                 /// sub-index. 
253                 /// </summary>
254                 public virtual int SubDoc(int n)
255                 {
256                         return n - starts[SubSearcher(n)];
257                 }
258                 
259                 public override int MaxDoc()
260                 {
261                         return maxDoc;
262                 }
263                 
264                 public override TopDocs Search(Weight weight, Filter filter, int nDocs)
265                 {
266                         
267                         HitQueue hq = new HitQueue(nDocs, false);
268                         int totalHits = 0;
269                         
270                         for (int i = 0; i < searchables.Length; i++)
271                         {
272                                 // search each searcher
273                                 TopDocs docs = searchables[i].Search(weight, filter, nDocs);
274                                 totalHits += docs.TotalHits; // update totalHits
275                                 ScoreDoc[] scoreDocs = docs.ScoreDocs;
276                                 for (int j = 0; j < scoreDocs.Length; j++)
277                                 {
278                                         // merge scoreDocs into hq
279                                         ScoreDoc scoreDoc = scoreDocs[j];
280                                         scoreDoc.doc += starts[i]; // convert doc
281                                         if (!hq.Insert(scoreDoc))
282                                                 break; // no more scores > minScore
283                                 }
284                         }
285                         
286                         ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
287                         for (int i = hq.Size() - 1; i >= 0; i--)
288                         // put docs in array
289                                 scoreDocs2[i] = (ScoreDoc) hq.Pop();
290                         
291                         float maxScore = (totalHits == 0)?System.Single.NegativeInfinity:scoreDocs2[0].score;
292                         
293                         return new TopDocs(totalHits, scoreDocs2, maxScore);
294                 }
295                 
296                 public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
297                 {
298                         FieldDocSortedHitQueue hq = null;
299                         int totalHits = 0;
300                         
301                         float maxScore = System.Single.NegativeInfinity;
302                         
303                         for (int i = 0; i < searchables.Length; i++)
304                         {
305                                 // search each searcher
306                                 TopFieldDocs docs = searchables[i].Search(weight, filter, n, sort);
307                                 // If one of the Sort fields is FIELD_DOC, need to fix its values, so that
308                                 // it will break ties by doc Id properly. Otherwise, it will compare to
309                                 // 'relative' doc Ids, that belong to two different searchers.
310                                 for (int j = 0; j < docs.fields.Length; j++)
311                                 {
312                                         if (docs.fields[j].GetType() == SortField.DOC)
313                                         {
314                                                 // iterate over the score docs and change their fields value
315                                                 for (int j2 = 0; j2 < docs.ScoreDocs.Length; j2++)
316                                                 {
317                                                         FieldDoc fd = (FieldDoc) docs.ScoreDocs[j2];
318                                                         fd.fields[j] = (System.Int32) (((System.Int32) fd.fields[j]) + starts[i]);
319                                                 }
320                                                 break;
321                                         }
322                                 }
323                                 if (hq == null)
324                                         hq = new FieldDocSortedHitQueue(docs.fields, n);
325                                 totalHits += docs.TotalHits; // update totalHits
326                                 maxScore = System.Math.Max(maxScore, docs.GetMaxScore());
327                                 ScoreDoc[] scoreDocs = docs.ScoreDocs;
328                                 for (int j = 0; j < scoreDocs.Length; j++)
329                                 {
330                                         // merge scoreDocs into hq
331                                         ScoreDoc scoreDoc = scoreDocs[j];
332                                         scoreDoc.doc += starts[i]; // convert doc
333                                         if (!hq.Insert(scoreDoc))
334                                                 break; // no more scores > minScore
335                                 }
336                         }
337                         
338                         ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
339                         for (int i = hq.Size() - 1; i >= 0; i--)
340                         // put docs in array
341                                 scoreDocs2[i] = (ScoreDoc) hq.Pop();
342                         
343                         return new TopFieldDocs(totalHits, scoreDocs2, hq.GetFields(), maxScore);
344                 }
345                 
346                 // inherit javadoc
347                 public override void  Search(Weight weight, Filter filter, Collector collector)
348                 {
349                         for (int i = 0; i < searchables.Length; i++)
350                         {
351                                 
352                                 int start = starts[i];
353                                 
354                                 Collector hc = new AnonymousClassCollector(collector, start, this);
355                                 
356                                 searchables[i].Search(weight, filter, hc);
357                         }
358                 }
359                 
360                 public override Query Rewrite(Query original)
361                 {
362                         Query[] queries = new Query[searchables.Length];
363                         for (int i = 0; i < searchables.Length; i++)
364                         {
365                                 queries[i] = searchables[i].Rewrite(original);
366                         }
367                         return queries[0].Combine(queries);
368                 }
369                 
370                 public override Explanation Explain(Weight weight, int doc)
371                 {
372                         int i = SubSearcher(doc); // find searcher index
373                         return searchables[i].Explain(weight, doc - starts[i]); // dispatch to searcher
374                 }
375                 
376                 /// <summary> Create weight in multiple index scenario.
377                 /// 
378                 /// Distributed query processing is done in the following steps:
379                 /// 1. rewrite query
380                 /// 2. extract necessary terms
381                 /// 3. collect dfs for these terms from the Searchables
382                 /// 4. create query weight using aggregate dfs.
383                 /// 5. distribute that weight to Searchables
384                 /// 6. merge results
385                 /// 
386                 /// Steps 1-4 are done here, 5+6 in the search() methods
387                 /// 
388                 /// </summary>
389                 /// <returns> rewritten queries
390                 /// </returns>
391                 public /*protected internal*/ override Weight CreateWeight(Query original)
392                 {
393                         // step 1
394                         Query rewrittenQuery = Rewrite(original);
395                         
396                         // step 2
397                         System.Collections.Hashtable terms = new System.Collections.Hashtable();
398                         rewrittenQuery.ExtractTerms(terms);
399                         
400                         // step3
401                         Term[] allTermsArray = new Term[terms.Count];
402             int index = 0;
403             System.Collections.IEnumerator e = terms.Keys.GetEnumerator();
404             while (e.MoveNext())
405                 allTermsArray[index++] = e.Current as Term;
406             int[] aggregatedDfs = new int[terms.Count];
407                         for (int i = 0; i < searchables.Length; i++)
408                         {
409                                 int[] dfs = searchables[i].DocFreqs(allTermsArray);
410                                 for (int j = 0; j < aggregatedDfs.Length; j++)
411                                 {
412                                         aggregatedDfs[j] += dfs[j];
413                                 }
414                         }
415                         
416                         System.Collections.Hashtable dfMap = new System.Collections.Hashtable();
417                         for (int i = 0; i < allTermsArray.Length; i++)
418                         {
419                                 dfMap[allTermsArray[i]] = (System.Int32) aggregatedDfs[i];
420                         }
421                         
422                         // step4
423                         int numDocs = MaxDoc();
424                         CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, GetSimilarity());
425                         
426                         return rewrittenQuery.Weight(cacheSim);
427                 }
428         }
429 }