2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
20 using Document = Mono.Lucene.Net.Documents.Document;
21 using FieldSelector = Mono.Lucene.Net.Documents.FieldSelector;
22 using FieldSelectorResult = Mono.Lucene.Net.Documents.FieldSelectorResult;
23 using Fieldable = Mono.Lucene.Net.Documents.Fieldable;
25 namespace Mono.Lucene.Net.Index
29 /// <summary>An IndexReader which reads multiple, parallel indexes. Each index added
30 /// must have the same number of documents, but typically each contains
31 /// different fields. Each document contains the union of the fields of all
32 /// documents with the same document number. When searching, matches for a
33 /// query term are from the first index added that has the field.
35 /// <p/>This is useful, e.g., with collections that have large fields which
36 /// change rarely and small fields that change more frequently. The smaller
37 /// fields may be re-indexed in a new index and both indexes may be searched
40 /// <p/><strong>Warning:</strong> It is up to you to make sure all indexes
41 /// are created and modified the same way. For example, if you add
42 /// documents to one index, you need to add the same documents in the
43 /// same order to the other indexes. <em>Failure to do so will result in
44 /// undefined behavior</em>.
46 public class ParallelReader:IndexReader, System.ICloneable
48 private System.Collections.ArrayList readers = new System.Collections.ArrayList();
49 private System.Collections.IList decrefOnClose = new System.Collections.ArrayList(); // remember which subreaders to decRef on close
50 internal bool incRefReaders = false;
51 private System.Collections.SortedList fieldToReader = new System.Collections.SortedList();
52 private System.Collections.IDictionary readerToFields = new System.Collections.Hashtable();
53 private System.Collections.IList storedFieldReaders = new System.Collections.ArrayList();
57 private bool hasDeletions;
59 /// <summary>Construct a ParallelReader.
60 /// <p/>Note that all subreaders are closed if this ParallelReader is closed.<p/>
62 public ParallelReader():this(true)
66 /// <summary>Construct a ParallelReader. </summary>
67 /// <param name="closeSubReaders">indicates whether the subreaders should be closed
68 /// when this ParallelReader is closed
70 public ParallelReader(bool closeSubReaders):base()
72 this.incRefReaders = !closeSubReaders;
75 /// <summary>Add an IndexReader.</summary>
76 /// <throws> IOException if there is a low-level IO error </throws>
77 public virtual void Add(IndexReader reader)
83 /// <summary>Add an IndexReader whose stored fields will not be returned. This can
84 /// accellerate search when stored fields are only needed from a subset of
88 /// <throws> IllegalArgumentException if not all indexes contain the same number </throws>
89 /// <summary> of documents
91 /// <throws> IllegalArgumentException if not all indexes have the same value </throws>
92 /// <summary> of {@link IndexReader#MaxDoc()}
94 /// <throws> IOException if there is a low-level IO error </throws>
95 public virtual void Add(IndexReader reader, bool ignoreStoredFields)
99 if (readers.Count == 0)
101 this.maxDoc = reader.MaxDoc();
102 this.numDocs = reader.NumDocs();
103 this.hasDeletions = reader.HasDeletions();
106 if (reader.MaxDoc() != maxDoc)
107 // check compatibility
108 throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc());
109 if (reader.NumDocs() != numDocs)
110 throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs());
112 System.Collections.Generic.ICollection<string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL);
113 readerToFields[reader] = fields;
114 System.Collections.IEnumerator i = fields.GetEnumerator();
117 // update fieldToReader map
118 System.String field = (System.String) i.Current;
119 if (fieldToReader[field] == null)
120 fieldToReader[field] = reader;
123 if (!ignoreStoredFields)
124 storedFieldReaders.Add(reader); // add to storedFieldReaders
131 decrefOnClose.Add(incRefReaders);
134 public override System.Object Clone()
138 return DoReopen(true);
140 catch (System.Exception ex)
142 throw new System.SystemException(ex.Message, ex);
146 /// <summary> Tries to reopen the subreaders.
148 /// If one or more subreaders could be re-opened (i. e. subReader.reopen()
149 /// returned a new instance != subReader), then a new ParallelReader instance
150 /// is returned, otherwise this instance is returned.
152 /// A re-opened instance might share one or more subreaders with the old
153 /// instance. Index modification operations result in undefined behavior
154 /// when performed before the old instance is closed.
155 /// (see {@link IndexReader#Reopen()}).
157 /// If subreaders are shared, then the reference count of those
158 /// readers is increased to ensure that the subreaders remain open
159 /// until the last referring reader is closed.
162 /// <throws> CorruptIndexException if the index is corrupt </throws>
163 /// <throws> IOException if there is a low-level IO error </throws>
164 public override IndexReader Reopen()
168 return DoReopen(false);
172 protected internal virtual IndexReader DoReopen(bool doClone)
176 bool reopened = false;
177 System.Collections.IList newReaders = new System.Collections.ArrayList();
179 bool success = false;
183 for (int i = 0; i < readers.Count; i++)
185 IndexReader oldReader = (IndexReader) readers[i];
186 IndexReader newReader = null;
189 newReader = (IndexReader) oldReader.Clone();
193 newReader = oldReader.Reopen();
195 newReaders.Add(newReader);
196 // if at least one of the subreaders was updated we remember that
197 // and return a new ParallelReader
198 if (newReader != oldReader)
207 if (!success && reopened)
209 for (int i = 0; i < newReaders.Count; i++)
211 IndexReader r = (IndexReader) newReaders[i];
218 catch (System.IO.IOException ignore)
220 // keep going - we want to clean up as much as possible
229 System.Collections.IList newDecrefOnClose = new System.Collections.ArrayList();
230 ParallelReader pr = new ParallelReader();
231 for (int i = 0; i < readers.Count; i++)
233 IndexReader oldReader = (IndexReader) readers[i];
234 IndexReader newReader = (IndexReader) newReaders[i];
235 if (newReader == oldReader)
237 newDecrefOnClose.Add(true);
242 // this is a new subreader instance, so on close() we don't
243 // decRef but close it
244 newDecrefOnClose.Add(false);
246 pr.Add(newReader, !storedFieldReaders.Contains(oldReader));
248 pr.decrefOnClose = newDecrefOnClose;
249 pr.incRefReaders = incRefReaders;
254 // No subreader was refreshed
260 public override int NumDocs()
262 // Don't call ensureOpen() here (it could affect performance)
266 public override int MaxDoc()
268 // Don't call ensureOpen() here (it could affect performance)
272 public override bool HasDeletions()
274 // Don't call ensureOpen() here (it could affect performance)
278 // check first reader
279 public override bool IsDeleted(int n)
281 // Don't call ensureOpen() here (it could affect performance)
282 if (readers.Count > 0)
283 return ((IndexReader) readers[0]).IsDeleted(n);
287 // delete in all readers
288 protected internal override void DoDelete(int n)
290 for (int i = 0; i < readers.Count; i++)
292 ((IndexReader) readers[i]).DeleteDocument(n);
297 // undeleteAll in all readers
298 protected internal override void DoUndeleteAll()
300 for (int i = 0; i < readers.Count; i++)
302 ((IndexReader) readers[i]).UndeleteAll();
304 hasDeletions = false;
307 // append fields from storedFieldReaders
308 public override Document Document(int n, FieldSelector fieldSelector)
311 Document result = new Document();
312 for (int i = 0; i < storedFieldReaders.Count; i++)
314 IndexReader reader = (IndexReader) storedFieldReaders[i];
316 bool include = (fieldSelector == null);
319 System.Collections.IEnumerator it = ((System.Collections.ICollection) readerToFields[reader]).GetEnumerator();
320 while (it.MoveNext())
322 if (fieldSelector.Accept((System.String) it.Current) != FieldSelectorResult.NO_LOAD)
331 System.Collections.IEnumerator fieldIterator = reader.Document(n, fieldSelector).GetFields().GetEnumerator();
332 while (fieldIterator.MoveNext())
334 result.Add((Fieldable) fieldIterator.Current);
342 public override TermFreqVector[] GetTermFreqVectors(int n)
345 System.Collections.ArrayList results = new System.Collections.ArrayList();
346 System.Collections.IEnumerator i = new System.Collections.Hashtable(fieldToReader).GetEnumerator();
349 System.Collections.DictionaryEntry e = (System.Collections.DictionaryEntry) i.Current;
350 System.String field = (System.String) e.Key;
351 IndexReader reader = (IndexReader) e.Value;
352 TermFreqVector vector = reader.GetTermFreqVector(n, field);
356 return (TermFreqVector[]) results.ToArray(typeof(TermFreqVector));
359 public override TermFreqVector GetTermFreqVector(int n, System.String field)
362 IndexReader reader = ((IndexReader) fieldToReader[field]);
363 return reader == null?null:reader.GetTermFreqVector(n, field);
367 public override void GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper)
370 IndexReader reader = ((IndexReader) fieldToReader[field]);
373 reader.GetTermFreqVector(docNumber, field, mapper);
377 public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper)
381 System.Collections.IEnumerator i = new System.Collections.Hashtable(fieldToReader).GetEnumerator();
384 System.Collections.DictionaryEntry e = (System.Collections.DictionaryEntry) i.Current;
385 System.String field = (System.String) e.Key;
386 IndexReader reader = (IndexReader) e.Value;
387 reader.GetTermFreqVector(docNumber, field, mapper);
391 public override bool HasNorms(System.String field)
394 IndexReader reader = ((IndexReader) fieldToReader[field]);
395 return reader == null?false:reader.HasNorms(field);
398 public override byte[] Norms(System.String field)
401 IndexReader reader = ((IndexReader) fieldToReader[field]);
402 return reader == null?null:reader.Norms(field);
405 public override void Norms(System.String field, byte[] result, int offset)
408 IndexReader reader = ((IndexReader) fieldToReader[field]);
410 reader.Norms(field, result, offset);
413 protected internal override void DoSetNorm(int n, System.String field, byte value_Renamed)
415 IndexReader reader = ((IndexReader) fieldToReader[field]);
417 reader.DoSetNorm(n, field, value_Renamed);
420 public override TermEnum Terms()
423 return new ParallelTermEnum(this);
426 public override TermEnum Terms(Term term)
429 return new ParallelTermEnum(this, term);
432 public override int DocFreq(Term term)
435 IndexReader reader = ((IndexReader) fieldToReader[term.Field()]);
436 return reader == null?0:reader.DocFreq(term);
439 public override TermDocs TermDocs(Term term)
442 return new ParallelTermDocs(this, term);
445 public override TermDocs TermDocs()
448 return new ParallelTermDocs(this);
451 public override TermPositions TermPositions(Term term)
454 return new ParallelTermPositions(this, term);
457 public override TermPositions TermPositions()
460 return new ParallelTermPositions(this);
463 /// <summary> Checks recursively if all subreaders are up to date. </summary>
464 public override bool IsCurrent()
466 for (int i = 0; i < readers.Count; i++)
468 if (!((IndexReader) readers[i]).IsCurrent())
474 // all subreaders are up to date
478 /// <summary> Checks recursively if all subindexes are optimized </summary>
479 public override bool IsOptimized()
481 for (int i = 0; i < readers.Count; i++)
483 if (!((IndexReader) readers[i]).IsOptimized())
489 // all subindexes are optimized
494 /// <summary>Not implemented.</summary>
495 /// <throws> UnsupportedOperationException </throws>
496 public override long GetVersion()
498 throw new System.NotSupportedException("ParallelReader does not support this method.");
502 public /*internal*/ virtual IndexReader[] GetSubReaders()
504 return (IndexReader[]) readers.ToArray(typeof(IndexReader));
510 protected internal override void DoCommit()
515 protected internal override void DoCommit(System.Collections.Generic.IDictionary<string, string> commitUserData)
517 for (int i = 0; i < readers.Count; i++)
518 ((IndexReader) readers[i]).Commit(commitUserData);
521 protected internal override void DoClose()
525 for (int i = 0; i < readers.Count; i++)
527 if (((System.Boolean) decrefOnClose[i]))
529 ((IndexReader) readers[i]).DecRef();
533 ((IndexReader) readers[i]).Close();
538 Mono.Lucene.Net.Search.FieldCache_Fields.DEFAULT.Purge(this);
541 public override System.Collections.Generic.ICollection<string> GetFieldNames(IndexReader.FieldOption fieldNames)
544 System.Collections.Generic.List<string> fieldSet = new System.Collections.Generic.List<string>();
545 for (int i = 0; i < readers.Count; i++)
547 IndexReader reader = ((IndexReader) readers[i]);
548 System.Collections.Generic.ICollection<string> names = reader.GetFieldNames(fieldNames);
549 fieldSet.AddRange(names);
554 private class ParallelTermEnum:TermEnum
556 private void InitBlock(ParallelReader enclosingInstance)
558 this.enclosingInstance = enclosingInstance;
560 private ParallelReader enclosingInstance;
561 public ParallelReader Enclosing_Instance
565 return enclosingInstance;
569 private System.String field;
570 private System.Collections.IEnumerator fieldIterator;
571 private TermEnum termEnum;
573 public ParallelTermEnum(ParallelReader enclosingInstance)
575 InitBlock(enclosingInstance);
578 field = ((System.String) Enclosing_Instance.fieldToReader.GetKey(0));
580 catch (ArgumentOutOfRangeException e)
582 // No fields, so keep field == null, termEnum == null
586 termEnum = ((IndexReader) Enclosing_Instance.fieldToReader[field]).Terms();
589 public ParallelTermEnum(ParallelReader enclosingInstance, Term term)
591 InitBlock(enclosingInstance);
592 field = term.Field();
593 IndexReader reader = ((IndexReader) Enclosing_Instance.fieldToReader[field]);
595 termEnum = reader.Terms(term);
598 public override bool Next()
600 if (termEnum == null)
603 // another term in this field?
604 if (termEnum.Next() && (System.Object) termEnum.Term().Field() == (System.Object) field)
605 return true; // yes, keep going
607 termEnum.Close(); // close old termEnum
609 // find the next field with terms, if any
610 if (fieldIterator == null)
612 System.Collections.Comparer comparer = System.Collections.Comparer.Default;
613 System.Collections.SortedList newList = new System.Collections.SortedList();
614 if (Enclosing_Instance.fieldToReader != null)
616 if (Enclosing_Instance.fieldToReader.Count > 0)
619 while (comparer.Compare(Enclosing_Instance.fieldToReader.GetKey(index), field) < 0)
621 for (; index < Enclosing_Instance.fieldToReader.Count; index++)
623 newList.Add(Enclosing_Instance.fieldToReader.GetKey(index), Enclosing_Instance.fieldToReader[Enclosing_Instance.fieldToReader.GetKey(index)]);
628 fieldIterator = newList.Keys.GetEnumerator();
629 fieldIterator.MoveNext();
630 System.Object generatedAux = fieldIterator.Current; // Skip field to get next one
632 while (fieldIterator.MoveNext())
634 field = ((System.String) fieldIterator.Current);
635 termEnum = ((IndexReader) Enclosing_Instance.fieldToReader[field]).Terms(new Term(field));
636 Term term = termEnum.Term();
637 if (term != null && (System.Object) term.Field() == (System.Object) field)
643 return false; // no more fields
646 public override Term Term()
648 if (termEnum == null)
651 return termEnum.Term();
654 public override int DocFreq()
656 if (termEnum == null)
659 return termEnum.DocFreq();
662 public override void Close()
664 if (termEnum != null)
669 // wrap a TermDocs in order to support seek(Term)
670 private class ParallelTermDocs : TermDocs
672 private void InitBlock(ParallelReader enclosingInstance)
674 this.enclosingInstance = enclosingInstance;
676 private ParallelReader enclosingInstance;
677 public ParallelReader Enclosing_Instance
681 return enclosingInstance;
685 protected internal TermDocs termDocs;
687 public ParallelTermDocs(ParallelReader enclosingInstance)
689 InitBlock(enclosingInstance);
691 public ParallelTermDocs(ParallelReader enclosingInstance, Term term)
693 InitBlock(enclosingInstance);
695 termDocs = (Enclosing_Instance.readers.Count == 0)?null:((IndexReader) Enclosing_Instance.readers[0]).TermDocs(null);
700 public virtual int Doc()
702 return termDocs.Doc();
704 public virtual int Freq()
706 return termDocs.Freq();
709 public virtual void Seek(Term term)
711 IndexReader reader = ((IndexReader) Enclosing_Instance.fieldToReader[term.Field()]);
712 termDocs = reader != null?reader.TermDocs(term):null;
715 public virtual void Seek(TermEnum termEnum)
717 Seek(termEnum.Term());
720 public virtual bool Next()
722 if (termDocs == null)
725 return termDocs.Next();
728 public virtual int Read(int[] docs, int[] freqs)
730 if (termDocs == null)
733 return termDocs.Read(docs, freqs);
736 public virtual bool SkipTo(int target)
738 if (termDocs == null)
741 return termDocs.SkipTo(target);
744 public virtual void Close()
746 if (termDocs != null)
751 private class ParallelTermPositions:ParallelTermDocs, TermPositions
753 private void InitBlock(ParallelReader enclosingInstance)
755 this.enclosingInstance = enclosingInstance;
757 private ParallelReader enclosingInstance;
758 public new ParallelReader Enclosing_Instance
762 return enclosingInstance;
767 public ParallelTermPositions(ParallelReader enclosingInstance):base(enclosingInstance)
769 InitBlock(enclosingInstance);
771 public ParallelTermPositions(ParallelReader enclosingInstance, Term term):base(enclosingInstance)
773 InitBlock(enclosingInstance);
777 public override void Seek(Term term)
779 IndexReader reader = ((IndexReader) Enclosing_Instance.fieldToReader[term.Field()]);
780 termDocs = reader != null?reader.TermPositions(term):null;
783 public virtual int NextPosition()
785 // It is an error to call this if there is no next position, e.g. if termDocs==null
786 return ((TermPositions) termDocs).NextPosition();
789 public virtual int GetPayloadLength()
791 return ((TermPositions) termDocs).GetPayloadLength();
794 public virtual byte[] GetPayload(byte[] data, int offset)
796 return ((TermPositions) termDocs).GetPayload(data, offset);
800 // TODO: Remove warning after API has been finalized
801 public virtual bool IsPayloadAvailable()
803 return ((TermPositions) termDocs).IsPayloadAvailable();