2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
20 using Document = Mono.Lucene.Net.Documents.Document;
21 using FieldSelector = Mono.Lucene.Net.Documents.FieldSelector;
22 using CorruptIndexException = Mono.Lucene.Net.Index.CorruptIndexException;
23 using IndexReader = Mono.Lucene.Net.Index.IndexReader;
24 using Term = Mono.Lucene.Net.Index.Term;
25 using ReaderUtil = Mono.Lucene.Net.Util.ReaderUtil;
27 namespace Mono.Lucene.Net.Search
30 /// <summary>Implements search over a set of <code>Searchables</code>.
32 /// <p/>Applications usually need only call the inherited {@link #Search(Query)}
33 /// or {@link #Search(Query,Filter)} methods.
35 public class MultiSearcher:Searcher
37 private class AnonymousClassCollector:Collector
39 public AnonymousClassCollector(Mono.Lucene.Net.Search.Collector collector, int start, MultiSearcher enclosingInstance)
41 InitBlock(collector, start, enclosingInstance);
43 private void InitBlock(Mono.Lucene.Net.Search.Collector collector, int start, MultiSearcher enclosingInstance)
45 this.collector = collector;
47 this.enclosingInstance = enclosingInstance;
49 private Mono.Lucene.Net.Search.Collector collector;
51 private MultiSearcher enclosingInstance;
52 public MultiSearcher Enclosing_Instance
56 return enclosingInstance;
60 public override void SetScorer(Scorer scorer)
62 collector.SetScorer(scorer);
64 public override void Collect(int doc)
66 collector.Collect(doc);
68 public override void SetNextReader(IndexReader reader, int docBase)
70 collector.SetNextReader(reader, start + docBase);
72 public override bool AcceptsDocsOutOfOrder()
74 return collector.AcceptsDocsOutOfOrder();
78 /// <summary> Document Frequency cache acting as a Dummy-Searcher. This class is no
79 /// full-fledged Searcher, but only supports the methods necessary to
80 /// initialize Weights.
82 private class CachedDfSource:Searcher
84 private System.Collections.IDictionary dfMap; // Map from Terms to corresponding doc freqs
85 private int maxDoc; // document count
87 public CachedDfSource(System.Collections.IDictionary dfMap, int maxDoc, Similarity similarity)
91 SetSimilarity(similarity);
94 public override int DocFreq(Term term)
99 df = ((System.Int32) dfMap[term]);
101 catch (System.NullReferenceException e)
103 throw new System.ArgumentException("df for term " + term.Text() + " not available");
108 public override int[] DocFreqs(Term[] terms)
110 int[] result = new int[terms.Length];
111 for (int i = 0; i < terms.Length; i++)
113 result[i] = DocFreq(terms[i]);
118 public override int MaxDoc()
123 public override Query Rewrite(Query query)
125 // this is a bit of a hack. We know that a query which
126 // creates a Weight based on this Dummy-Searcher is
127 // always already rewritten (see preparedWeight()).
128 // Therefore we just return the unmodified query here
132 public override void Close()
134 throw new System.NotSupportedException();
140 public override void Dispose()
145 public override Document Doc(int i)
147 throw new System.NotSupportedException();
150 public override Document Doc(int i, FieldSelector fieldSelector)
152 throw new System.NotSupportedException();
155 public override Explanation Explain(Weight weight, int doc)
157 throw new System.NotSupportedException();
160 public override void Search(Weight weight, Filter filter, Collector results)
162 throw new System.NotSupportedException();
165 public override TopDocs Search(Weight weight, Filter filter, int n)
167 throw new System.NotSupportedException();
170 public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
172 throw new System.NotSupportedException();
176 private Searchable[] searchables;
177 private int[] starts;
178 private int maxDoc = 0;
180 /// <summary>Creates a searcher which searches <i>searchers</i>. </summary>
181 public MultiSearcher(Searchable[] searchables)
183 this.searchables = searchables;
185 starts = new int[searchables.Length + 1]; // build starts array
186 for (int i = 0; i < searchables.Length; i++)
189 maxDoc += searchables[i].MaxDoc(); // compute maxDocs
191 starts[searchables.Length] = maxDoc;
194 /// <summary>Return the array of {@link Searchable}s this searches. </summary>
195 public virtual Searchable[] GetSearchables()
200 protected internal virtual int[] GetStarts()
206 public override void Close()
208 for (int i = 0; i < searchables.Length; i++)
209 searchables[i].Close();
215 public override void Dispose()
220 public override int DocFreq(Term term)
223 for (int i = 0; i < searchables.Length; i++)
224 docFreq += searchables[i].DocFreq(term);
229 public override Document Doc(int n)
231 int i = SubSearcher(n); // find searcher index
232 return searchables[i].Doc(n - starts[i]); // dispatch to searcher
236 public override Document Doc(int n, FieldSelector fieldSelector)
238 int i = SubSearcher(n); // find searcher index
239 return searchables[i].Doc(n - starts[i], fieldSelector); // dispatch to searcher
242 /// <summary>Returns index of the searcher for document <code>n</code> in the array
243 /// used to construct this searcher.
245 public virtual int SubSearcher(int n)
247 // find searcher for doc n:
248 return ReaderUtil.SubIndex(n, starts);
251 /// <summary>Returns the document number of document <code>n</code> within its
254 public virtual int SubDoc(int n)
256 return n - starts[SubSearcher(n)];
259 public override int MaxDoc()
264 public override TopDocs Search(Weight weight, Filter filter, int nDocs)
267 HitQueue hq = new HitQueue(nDocs, false);
270 for (int i = 0; i < searchables.Length; i++)
272 // search each searcher
273 TopDocs docs = searchables[i].Search(weight, filter, nDocs);
274 totalHits += docs.TotalHits; // update totalHits
275 ScoreDoc[] scoreDocs = docs.ScoreDocs;
276 for (int j = 0; j < scoreDocs.Length; j++)
278 // merge scoreDocs into hq
279 ScoreDoc scoreDoc = scoreDocs[j];
280 scoreDoc.doc += starts[i]; // convert doc
281 if (!hq.Insert(scoreDoc))
282 break; // no more scores > minScore
286 ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
287 for (int i = hq.Size() - 1; i >= 0; i--)
289 scoreDocs2[i] = (ScoreDoc) hq.Pop();
291 float maxScore = (totalHits == 0)?System.Single.NegativeInfinity:scoreDocs2[0].score;
293 return new TopDocs(totalHits, scoreDocs2, maxScore);
296 public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
298 FieldDocSortedHitQueue hq = null;
301 float maxScore = System.Single.NegativeInfinity;
303 for (int i = 0; i < searchables.Length; i++)
305 // search each searcher
306 TopFieldDocs docs = searchables[i].Search(weight, filter, n, sort);
307 // If one of the Sort fields is FIELD_DOC, need to fix its values, so that
308 // it will break ties by doc Id properly. Otherwise, it will compare to
309 // 'relative' doc Ids, that belong to two different searchers.
310 for (int j = 0; j < docs.fields.Length; j++)
312 if (docs.fields[j].GetType() == SortField.DOC)
314 // iterate over the score docs and change their fields value
315 for (int j2 = 0; j2 < docs.ScoreDocs.Length; j2++)
317 FieldDoc fd = (FieldDoc) docs.ScoreDocs[j2];
318 fd.fields[j] = (System.Int32) (((System.Int32) fd.fields[j]) + starts[i]);
324 hq = new FieldDocSortedHitQueue(docs.fields, n);
325 totalHits += docs.TotalHits; // update totalHits
326 maxScore = System.Math.Max(maxScore, docs.GetMaxScore());
327 ScoreDoc[] scoreDocs = docs.ScoreDocs;
328 for (int j = 0; j < scoreDocs.Length; j++)
330 // merge scoreDocs into hq
331 ScoreDoc scoreDoc = scoreDocs[j];
332 scoreDoc.doc += starts[i]; // convert doc
333 if (!hq.Insert(scoreDoc))
334 break; // no more scores > minScore
338 ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
339 for (int i = hq.Size() - 1; i >= 0; i--)
341 scoreDocs2[i] = (ScoreDoc) hq.Pop();
343 return new TopFieldDocs(totalHits, scoreDocs2, hq.GetFields(), maxScore);
347 public override void Search(Weight weight, Filter filter, Collector collector)
349 for (int i = 0; i < searchables.Length; i++)
352 int start = starts[i];
354 Collector hc = new AnonymousClassCollector(collector, start, this);
356 searchables[i].Search(weight, filter, hc);
360 public override Query Rewrite(Query original)
362 Query[] queries = new Query[searchables.Length];
363 for (int i = 0; i < searchables.Length; i++)
365 queries[i] = searchables[i].Rewrite(original);
367 return queries[0].Combine(queries);
370 public override Explanation Explain(Weight weight, int doc)
372 int i = SubSearcher(doc); // find searcher index
373 return searchables[i].Explain(weight, doc - starts[i]); // dispatch to searcher
376 /// <summary> Create weight in multiple index scenario.
378 /// Distributed query processing is done in the following steps:
380 /// 2. extract necessary terms
381 /// 3. collect dfs for these terms from the Searchables
382 /// 4. create query weight using aggregate dfs.
383 /// 5. distribute that weight to Searchables
386 /// Steps 1-4 are done here, 5+6 in the search() methods
389 /// <returns> rewritten queries
391 public /*protected internal*/ override Weight CreateWeight(Query original)
394 Query rewrittenQuery = Rewrite(original);
397 System.Collections.Hashtable terms = new System.Collections.Hashtable();
398 rewrittenQuery.ExtractTerms(terms);
401 Term[] allTermsArray = new Term[terms.Count];
403 System.Collections.IEnumerator e = terms.Keys.GetEnumerator();
405 allTermsArray[index++] = e.Current as Term;
406 int[] aggregatedDfs = new int[terms.Count];
407 for (int i = 0; i < searchables.Length; i++)
409 int[] dfs = searchables[i].DocFreqs(allTermsArray);
410 for (int j = 0; j < aggregatedDfs.Length; j++)
412 aggregatedDfs[j] += dfs[j];
416 System.Collections.Hashtable dfMap = new System.Collections.Hashtable();
417 for (int i = 0; i < allTermsArray.Length; i++)
419 dfMap[allTermsArray[i]] = (System.Int32) aggregatedDfs[i];
423 int numDocs = MaxDoc();
424 CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, GetSimilarity());
426 return rewrittenQuery.Weight(cacheSim);