2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
20 using Document = Mono.Lucene.Net.Documents.Document;
21 using FieldSelector = Mono.Lucene.Net.Documents.FieldSelector;
22 using FieldSelectorResult = Mono.Lucene.Net.Documents.FieldSelectorResult;
23 using FieldOption = Mono.Lucene.Net.Index.IndexReader.FieldOption;
24 using MergeAbortedException = Mono.Lucene.Net.Index.MergePolicy.MergeAbortedException;
25 using Directory = Mono.Lucene.Net.Store.Directory;
26 using IndexInput = Mono.Lucene.Net.Store.IndexInput;
27 using IndexOutput = Mono.Lucene.Net.Store.IndexOutput;
29 namespace Mono.Lucene.Net.Index
32 /// <summary> The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add},
33 /// into a single Segment. After adding the appropriate readers, call the merge method to combine the
36 /// If the compoundFile flag is set, then the segments will be merged into a compound file.
40 /// <seealso cref="merge">
42 /// <seealso cref="add">
44 public sealed class SegmentMerger
46 private class AnonymousClassCheckAbort:CheckAbort
48 private void InitBlock(SegmentMerger enclosingInstance)
50 this.enclosingInstance = enclosingInstance;
52 private SegmentMerger enclosingInstance;
53 public SegmentMerger Enclosing_Instance
57 return enclosingInstance;
61 internal AnonymousClassCheckAbort(SegmentMerger enclosingInstance, Mono.Lucene.Net.Index.MergePolicy.OneMerge Param1, Mono.Lucene.Net.Store.Directory Param2):base(Param1, Param2)
63 InitBlock(enclosingInstance);
65 public override void Work(double units)
70 private class AnonymousClassCheckAbort1:CheckAbort
72 private void InitBlock(SegmentMerger enclosingInstance)
74 this.enclosingInstance = enclosingInstance;
76 private SegmentMerger enclosingInstance;
77 public SegmentMerger Enclosing_Instance
81 return enclosingInstance;
85 internal AnonymousClassCheckAbort1(SegmentMerger enclosingInstance, Mono.Lucene.Net.Index.MergePolicy.OneMerge Param1, Mono.Lucene.Net.Store.Directory Param2):base(Param1, Param2)
87 InitBlock(enclosingInstance);
89 public override void Work(double units)
95 private class AnonymousClassFieldSelector : FieldSelector
97 public AnonymousClassFieldSelector(SegmentMerger enclosingInstance)
99 InitBlock(enclosingInstance);
101 private void InitBlock(SegmentMerger enclosingInstance)
103 this.enclosingInstance = enclosingInstance;
105 private SegmentMerger enclosingInstance;
106 public SegmentMerger Enclosing_Instance
110 return enclosingInstance;
114 public FieldSelectorResult Accept(System.String fieldName)
116 return FieldSelectorResult.LOAD_FOR_MERGE;
119 private void InitBlock()
121 termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL;
124 /// <summary>norms header placeholder </summary>
125 internal static readonly byte[] NORMS_HEADER = new byte[]{(byte) 'N', (byte) 'R', (byte) 'M', unchecked((byte) - 1)};
127 private Directory directory;
128 private System.String segment;
129 private int termIndexInterval;
131 private System.Collections.IList readers = new System.Collections.ArrayList();
132 private FieldInfos fieldInfos;
134 private int mergedDocs;
136 private CheckAbort checkAbort;
138 // Whether we should merge doc stores (stored fields and
139 // vectors files). When all segments we are merging
140 // already share the same doc store files, we don't need
141 // to merge the doc stores.
142 private bool mergeDocStores;
144 /// <summary>Maximum number of contiguous documents to bulk-copy
145 /// when merging stored fields
147 private const int MAX_RAW_MERGE_DOCS = 4192;
149 /// <summary>This ctor used only by test code.
152 /// <param name="dir">The Directory to merge the other segments into
154 /// <param name="name">The name of the new segment
156 public /*internal*/ SegmentMerger(Directory dir, System.String name)
161 checkAbort = new AnonymousClassCheckAbort(this, null, null);
164 internal SegmentMerger(IndexWriter writer, System.String name, MergePolicy.OneMerge merge)
167 directory = writer.GetDirectory();
171 checkAbort = new CheckAbort(merge, directory);
175 checkAbort = new AnonymousClassCheckAbort1(this, null, null);
177 termIndexInterval = writer.GetTermIndexInterval();
180 internal bool HasProx()
182 return fieldInfos.HasProx();
185 /// <summary> Add an IndexReader to the collection of readers that are to be merged</summary>
186 /// <param name="reader">
188 public /*internal*/ void Add(IndexReader reader)
193 /// <summary> </summary>
194 /// <param name="i">The index of the reader to return
196 /// <returns> The ith reader to be merged
198 internal IndexReader SegmentReader(int i)
200 return (IndexReader) readers[i];
203 /// <summary> Merges the readers specified by the {@link #add} method into the directory passed to the constructor</summary>
204 /// <returns> The number of documents that were merged
206 /// <throws> CorruptIndexException if the index is corrupt </throws>
207 /// <throws> IOException if there is a low-level IO error </throws>
208 public /*internal*/ int Merge()
213 /// <summary> Merges the readers specified by the {@link #add} method
214 /// into the directory passed to the constructor.
216 /// <param name="mergeDocStores">if false, we will not merge the
217 /// stored fields nor vectors files
219 /// <returns> The number of documents that were merged
221 /// <throws> CorruptIndexException if the index is corrupt </throws>
222 /// <throws> IOException if there is a low-level IO error </throws>
223 internal int Merge(bool mergeDocStores)
226 this.mergeDocStores = mergeDocStores;
228 // NOTE: it's important to add calls to
229 // checkAbort.work(...) if you make any changes to this
230 // method that will spend alot of time. The frequency
231 // of this check impacts how long
232 // IndexWriter.close(false) takes to actually stop the
235 mergedDocs = MergeFields();
239 if (mergeDocStores && fieldInfos.HasVectors())
245 /// <summary> close all IndexReaders that have been added.
246 /// Should not be called before merge().
248 /// <throws> IOException </throws>
249 public /*internal*/ void CloseReaders()
251 for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext(); )
253 ((IndexReader) iter.Current).Close();
257 public /*internal*/ System.Collections.Generic.ICollection<string> GetMergedFiles()
259 System.Collections.Generic.IDictionary<string,string> fileSet = new System.Collections.Generic.Dictionary<string,string>();
262 for (int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS.Length; i++)
264 System.String ext = IndexFileNames.COMPOUND_EXTENSIONS[i];
266 if (ext.Equals(IndexFileNames.PROX_EXTENSION) && !HasProx())
269 if (mergeDocStores || (!ext.Equals(IndexFileNames.FIELDS_EXTENSION) && !ext.Equals(IndexFileNames.FIELDS_INDEX_EXTENSION)))
270 fileSet[segment + "." + ext] = segment + "." + ext;
273 // Fieldable norm files
274 for (int i = 0; i < fieldInfos.Size(); i++)
276 FieldInfo fi = fieldInfos.FieldInfo(i);
277 if (fi.isIndexed && !fi.omitNorms)
279 fileSet[segment + "." + IndexFileNames.NORMS_EXTENSION]=segment + "." + IndexFileNames.NORMS_EXTENSION;
285 if (fieldInfos.HasVectors() && mergeDocStores)
287 for (int i = 0; i < IndexFileNames.VECTOR_EXTENSIONS.Length; i++)
289 fileSet[segment + "." + IndexFileNames.VECTOR_EXTENSIONS[i]] = segment + "." + IndexFileNames.VECTOR_EXTENSIONS[i];
296 public /*internal*/ System.Collections.Generic.ICollection<string> CreateCompoundFile(System.String fileName)
298 System.Collections.Generic.ICollection<string> files = GetMergedFiles();
299 CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort);
301 // Now merge all added files
302 System.Collections.IEnumerator it = files.GetEnumerator();
303 while (it.MoveNext())
305 cfsWriter.AddFile((System.String) it.Current);
314 private void AddIndexed(IndexReader reader, FieldInfos fInfos, System.Collections.Generic.ICollection<string> names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool storePayloads, bool omitTFAndPositions)
316 System.Collections.Generic.IEnumerator<string> i = names.GetEnumerator();
319 System.String field = i.Current;
320 fInfos.Add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.HasNorms(field), storePayloads, omitTFAndPositions);
324 private SegmentReader[] matchingSegmentReaders;
325 private int[] rawDocLengths;
326 private int[] rawDocLengths2;
328 private void SetMatchingSegmentReaders()
330 // If the i'th reader is a SegmentReader and has
331 // identical fieldName -> number mapping, then this
332 // array will be non-null at position i:
333 int numReaders = readers.Count;
334 matchingSegmentReaders = new SegmentReader[numReaders];
336 // If this reader is a SegmentReader, and all of its
337 // field name -> number mappings match the "merged"
338 // FieldInfos, then we can do a bulk copy of the
340 for (int i = 0; i < numReaders; i++)
342 IndexReader reader = (IndexReader) readers[i];
343 if (reader is SegmentReader)
345 SegmentReader segmentReader = (SegmentReader) reader;
347 FieldInfos segmentFieldInfos = segmentReader.FieldInfos();
348 int numFieldInfos = segmentFieldInfos.Size();
349 for (int j = 0; same && j < numFieldInfos; j++)
351 same = fieldInfos.FieldName(j).Equals(segmentFieldInfos.FieldName(j));
355 matchingSegmentReaders[i] = segmentReader;
360 // Used for bulk-reading raw bytes for stored fields
361 rawDocLengths = new int[MAX_RAW_MERGE_DOCS];
362 rawDocLengths2 = new int[MAX_RAW_MERGE_DOCS];
365 /// <summary> </summary>
366 /// <returns> The number of documents in all of the readers
368 /// <throws> CorruptIndexException if the index is corrupt </throws>
369 /// <throws> IOException if there is a low-level IO error </throws>
370 private int MergeFields()
375 // When we are not merging by doc stores, that means
376 // all segments were written as part of a single
377 // autoCommit=false IndexWriter session, so their field
378 // name -> number mapping are the same. So, we start
379 // with the fieldInfos of the last segment in this
380 // case, to keep that numbering.
381 SegmentReader sr = (SegmentReader) readers[readers.Count - 1];
382 fieldInfos = (FieldInfos) sr.core.fieldInfos.Clone();
386 fieldInfos = new FieldInfos(); // merge field names
389 for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext(); )
391 IndexReader reader = (IndexReader) iter.Current;
392 if (reader is SegmentReader)
394 SegmentReader segmentReader = (SegmentReader) reader;
395 FieldInfos readerFieldInfos = segmentReader.FieldInfos();
396 int numReaderFieldInfos = readerFieldInfos.Size();
397 for (int j = 0; j < numReaderFieldInfos; j++)
399 FieldInfo fi = readerFieldInfos.FieldInfo(j);
400 fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions);
405 AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
406 AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
407 AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
408 AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false);
409 AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true);
410 AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false);
411 AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.INDEXED), false, false, false, false, false);
412 fieldInfos.Add(reader.GetFieldNames(FieldOption.UNINDEXED), false);
415 fieldInfos.Write(directory, segment + ".fnm");
419 SetMatchingSegmentReaders();
424 // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
425 // in merge mode, we use this FieldSelector
426 FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);
428 // merge field values
429 FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
434 for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext(); )
436 IndexReader reader = (IndexReader) iter.Current;
437 SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
438 FieldsReader matchingFieldsReader = null;
439 if (matchingSegmentReader != null)
441 FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader();
442 if (fieldsReader != null && fieldsReader.CanReadRawDocs())
444 matchingFieldsReader = fieldsReader;
447 if (reader.HasDeletions())
449 docCount += CopyFieldsWithDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader);
453 docCount += CopyFieldsNoDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader);
459 fieldsWriter.Close();
462 System.String fileName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
463 long fdxFileLength = directory.FileLength(fileName);
465 if (4 + ((long) docCount) * 8 != fdxFileLength)
466 // This is most likely a bug in Sun JRE 1.6.0_04/_05;
467 // we detect that the bug has struck, here, and
468 // throw an exception to prevent the corruption from
469 // entering the index. See LUCENE-1282 for
471 throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption");
473 // If we are skipping the doc stores, that means there
474 // are no deletions in any of these segments, so we
475 // just sum numDocs() of each segment to get total docCount
478 for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext(); )
480 docCount += ((IndexReader) iter.Current).NumDocs();
487 private int CopyFieldsWithDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
490 int maxDoc = reader.MaxDoc();
491 if (matchingFieldsReader != null)
493 // We can bulk-copy because the fieldInfos are "congruent"
494 for (int j = 0; j < maxDoc; )
496 if (reader.IsDeleted(j))
502 // We can optimize this case (doing a bulk byte copy) since the field
503 // numbers are identical
504 int start = j, numDocs = 0;
511 if (reader.IsDeleted(j))
517 while (numDocs < MAX_RAW_MERGE_DOCS);
519 IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
520 fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
522 checkAbort.Work(300 * numDocs);
527 for (int j = 0; j < maxDoc; j++)
529 if (reader.IsDeleted(j))
534 // NOTE: it's very important to first assign to doc then pass it to
535 // termVectorsWriter.addAllDocVectors; see LUCENE-1282
536 Document doc = reader.Document(j, fieldSelectorMerge);
537 fieldsWriter.AddDocument(doc);
539 checkAbort.Work(300);
545 private int CopyFieldsNoDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
547 int maxDoc = reader.MaxDoc();
549 if (matchingFieldsReader != null)
551 // We can bulk-copy because the fieldInfos are "congruent"
552 while (docCount < maxDoc)
554 int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
555 IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len);
556 fieldsWriter.AddRawDocuments(stream, rawDocLengths, len);
558 checkAbort.Work(300 * len);
563 for (; docCount < maxDoc; docCount++)
565 // NOTE: it's very important to first assign to doc then pass it to
566 // termVectorsWriter.addAllDocVectors; see LUCENE-1282
567 Document doc = reader.Document(docCount, fieldSelectorMerge);
568 fieldsWriter.AddDocument(doc);
569 checkAbort.Work(300);
575 /// <summary> Merge the TermVectors from each of the segments into the new one.</summary>
576 /// <throws> IOException </throws>
577 private void MergeVectors()
579 TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos);
584 for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext(); )
586 SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
587 TermVectorsReader matchingVectorsReader = null;
588 if (matchingSegmentReader != null)
590 TermVectorsReader vectorsReader = matchingSegmentReader.GetTermVectorsReaderOrig();
592 // If the TV* files are an older format then they cannot read raw docs:
593 if (vectorsReader != null && vectorsReader.CanReadRawDocs())
595 matchingVectorsReader = vectorsReader;
598 IndexReader reader = (IndexReader) iter.Current;
599 if (reader.HasDeletions())
601 CopyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, reader);
605 CopyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, reader);
611 termVectorsWriter.Close();
614 System.String fileName = segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION;
615 long tvxSize = directory.FileLength(fileName);
617 if (4 + ((long) mergedDocs) * 16 != tvxSize)
618 // This is most likely a bug in Sun JRE 1.6.0_04/_05;
619 // we detect that the bug has struck, here, and
620 // throw an exception to prevent the corruption from
621 // entering the index. See LUCENE-1282 for
623 throw new System.SystemException("mergeVectors produced an invalid result: mergedDocs is " + mergedDocs + " but tvx size is " + tvxSize + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption");
626 private void CopyVectorsWithDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader)
628 int maxDoc = reader.MaxDoc();
629 if (matchingVectorsReader != null)
631 // We can bulk-copy because the fieldInfos are "congruent"
632 for (int docNum = 0; docNum < maxDoc; )
634 if (reader.IsDeleted(docNum))
640 // We can optimize this case (doing a bulk byte copy) since the field
641 // numbers are identical
642 int start = docNum, numDocs = 0;
647 if (docNum >= maxDoc)
649 if (reader.IsDeleted(docNum))
655 while (numDocs < MAX_RAW_MERGE_DOCS);
657 matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
658 termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
659 checkAbort.Work(300 * numDocs);
664 for (int docNum = 0; docNum < maxDoc; docNum++)
666 if (reader.IsDeleted(docNum))
672 // NOTE: it's very important to first assign to vectors then pass it to
673 // termVectorsWriter.addAllDocVectors; see LUCENE-1282
674 TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
675 termVectorsWriter.AddAllDocVectors(vectors);
676 checkAbort.Work(300);
681 private void CopyVectorsNoDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader)
683 int maxDoc = reader.MaxDoc();
684 if (matchingVectorsReader != null)
686 // We can bulk-copy because the fieldInfos are "congruent"
688 while (docCount < maxDoc)
690 int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
691 matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len);
692 termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len);
694 checkAbort.Work(300 * len);
699 for (int docNum = 0; docNum < maxDoc; docNum++)
701 // NOTE: it's very important to first assign to vectors then pass it to
702 // termVectorsWriter.addAllDocVectors; see LUCENE-1282
703 TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
704 termVectorsWriter.AddAllDocVectors(vectors);
705 checkAbort.Work(300);
710 private SegmentMergeQueue queue = null;
712 private void MergeTerms()
715 SegmentWriteState state = new SegmentWriteState(null, directory, segment, null, mergedDocs, 0, termIndexInterval);
717 FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos);
721 queue = new SegmentMergeQueue(readers.Count);
723 MergeTermInfos(consumer);
733 internal bool omitTermFreqAndPositions;
735 private void MergeTermInfos(FormatPostingsFieldsConsumer consumer)
737 int base_Renamed = 0;
738 int readerCount = readers.Count;
739 for (int i = 0; i < readerCount; i++)
741 IndexReader reader = (IndexReader) readers[i];
742 TermEnum termEnum = reader.Terms();
743 SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader);
744 int[] docMap = smi.GetDocMap();
749 docMaps = new int[readerCount][];
750 delCounts = new int[readerCount];
753 delCounts[i] = smi.reader.MaxDoc() - smi.reader.NumDocs();
756 base_Renamed += reader.NumDocs();
758 System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc() - smi.delCount);
767 SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count];
769 System.String currentField = null;
770 FormatPostingsTermsConsumer termsConsumer = null;
772 while (queue.Size() > 0)
774 int matchSize = 0; // pop matching terms
775 match[matchSize++] = (SegmentMergeInfo) queue.Pop();
776 Term term = match[0].term;
777 SegmentMergeInfo top = (SegmentMergeInfo) queue.Top();
779 while (top != null && term.CompareTo(top.term) == 0)
781 match[matchSize++] = (SegmentMergeInfo) queue.Pop();
782 top = (SegmentMergeInfo) queue.Top();
785 if ((System.Object) currentField != (System.Object) term.field)
787 currentField = term.field;
788 if (termsConsumer != null)
789 termsConsumer.Finish();
790 FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField);
791 termsConsumer = consumer.AddField(fieldInfo);
792 omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
795 int df = AppendPostings(termsConsumer, match, matchSize); // add new TermInfo
797 checkAbort.Work(df / 3.0);
799 while (matchSize > 0)
801 SegmentMergeInfo smi = match[--matchSize];
806 smi.Close(); // done with a segment
811 private byte[] payloadBuffer;
812 private int[][] docMaps;
813 internal int[][] GetDocMaps()
817 private int[] delCounts;
818 internal int[] GetDelCounts()
823 /// <summary>Process postings from multiple segments all positioned on the
824 /// same term. Writes out merged entries into freqOutput and
825 /// the proxOutput streams.
828 /// <param name="smis">array of segments
830 /// <param name="n">number of cells in the array actually occupied
832 /// <returns> number of documents across all segments where this term was found
834 /// <throws> CorruptIndexException if the index is corrupt </throws>
835 /// <throws> IOException if there is a low-level IO error </throws>
836 private int AppendPostings(FormatPostingsTermsConsumer termsConsumer, SegmentMergeInfo[] smis, int n)
839 FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(smis[0].term.text);
841 for (int i = 0; i < n; i++)
843 SegmentMergeInfo smi = smis[i];
844 TermPositions postings = smi.GetPositions();
845 System.Diagnostics.Debug.Assert(postings != null);
846 int base_Renamed = smi.base_Renamed;
847 int[] docMap = smi.GetDocMap();
848 postings.Seek(smi.termEnum);
850 while (postings.Next())
853 int doc = postings.Doc();
855 doc = docMap[doc]; // map around deletions
856 doc += base_Renamed; // convert to merged space
858 int freq = postings.Freq();
859 FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(doc, freq);
861 if (!omitTermFreqAndPositions)
863 for (int j = 0; j < freq; j++)
865 int position = postings.NextPosition();
866 int payloadLength = postings.GetPayloadLength();
867 if (payloadLength > 0)
869 if (payloadBuffer == null || payloadBuffer.Length < payloadLength)
870 payloadBuffer = new byte[payloadLength];
871 postings.GetPayload(payloadBuffer, 0);
873 posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength);
875 posConsumer.Finish();
879 docConsumer.Finish();
884 private void MergeNorms()
886 byte[] normBuffer = null;
887 IndexOutput output = null;
890 int numFieldInfos = fieldInfos.Size();
891 for (int i = 0; i < numFieldInfos; i++)
893 FieldInfo fi = fieldInfos.FieldInfo(i);
894 if (fi.isIndexed && !fi.omitNorms)
898 output = directory.CreateOutput(segment + "." + IndexFileNames.NORMS_EXTENSION);
899 output.WriteBytes(NORMS_HEADER, NORMS_HEADER.Length);
901 for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext(); )
903 IndexReader reader = (IndexReader) iter.Current;
904 int maxDoc = reader.MaxDoc();
905 if (normBuffer == null || normBuffer.Length < maxDoc)
907 // the buffer is too small for the current segment
908 normBuffer = new byte[maxDoc];
910 reader.Norms(fi.name, normBuffer, 0);
911 if (!reader.HasDeletions())
913 //optimized case for segments without deleted docs
914 output.WriteBytes(normBuffer, maxDoc);
918 // this segment has deleted docs, so we have to
919 // check for every doc if it is deleted or not
920 for (int k = 0; k < maxDoc; k++)
922 if (!reader.IsDeleted(k))
924 output.WriteByte(normBuffer[k]);
928 checkAbort.Work(maxDoc);
942 internal class CheckAbort
944 private double workCount;
945 private MergePolicy.OneMerge merge;
946 private Directory dir;
947 public CheckAbort(MergePolicy.OneMerge merge, Directory dir)
953 /// <summary> Records the fact that roughly units amount of work
954 /// have been done since this method was last called.
955 /// When adding time-consuming code into SegmentMerger,
956 /// you should test different values for units to ensure
957 /// that the time in between calls to merge.checkAborted
958 /// is up to ~ 1 second.
960 public virtual void Work(double units)
963 if (workCount >= 10000.0)
965 merge.CheckAborted(dir);