Merge pull request #409 from Alkarex/patch-1
[mono.git] / mcs / tools / monkeydoc / Lucene.Net / Lucene.Net / Index / SegmentMerger.cs
1 /* 
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  * 
9  * http://www.apache.org/licenses/LICENSE-2.0
10  * 
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 using System;
19
20 using Document = Mono.Lucene.Net.Documents.Document;
21 using FieldSelector = Mono.Lucene.Net.Documents.FieldSelector;
22 using FieldSelectorResult = Mono.Lucene.Net.Documents.FieldSelectorResult;
23 using FieldOption = Mono.Lucene.Net.Index.IndexReader.FieldOption;
24 using MergeAbortedException = Mono.Lucene.Net.Index.MergePolicy.MergeAbortedException;
25 using Directory = Mono.Lucene.Net.Store.Directory;
26 using IndexInput = Mono.Lucene.Net.Store.IndexInput;
27 using IndexOutput = Mono.Lucene.Net.Store.IndexOutput;
28
29 namespace Mono.Lucene.Net.Index
30 {
31         
32         /// <summary> The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add},
33         /// into a single Segment.  After adding the appropriate readers, call the merge method to combine the 
34         /// segments.
35         /// <p/> 
36         /// If the compoundFile flag is set, then the segments will be merged into a compound file.
37         /// 
38         /// 
39         /// </summary>
40         /// <seealso cref="merge">
41         /// </seealso>
42         /// <seealso cref="add">
43         /// </seealso>
44         public sealed class SegmentMerger
45         {
46                 private class AnonymousClassCheckAbort:CheckAbort
47                 {
48                         private void  InitBlock(SegmentMerger enclosingInstance)
49                         {
50                                 this.enclosingInstance = enclosingInstance;
51                         }
52                         private SegmentMerger enclosingInstance;
53                         public SegmentMerger Enclosing_Instance
54                         {
55                                 get
56                                 {
57                                         return enclosingInstance;
58                                 }
59                                 
60                         }
61                         internal AnonymousClassCheckAbort(SegmentMerger enclosingInstance, Mono.Lucene.Net.Index.MergePolicy.OneMerge Param1, Mono.Lucene.Net.Store.Directory Param2):base(Param1, Param2)
62                         {
63                                 InitBlock(enclosingInstance);
64                         }
65                         public override void  Work(double units)
66                         {
67                                 // do nothing
68                         }
69                 }
70                 private class AnonymousClassCheckAbort1:CheckAbort
71                 {
72                         private void  InitBlock(SegmentMerger enclosingInstance)
73                         {
74                                 this.enclosingInstance = enclosingInstance;
75                         }
76                         private SegmentMerger enclosingInstance;
77                         public SegmentMerger Enclosing_Instance
78                         {
79                                 get
80                                 {
81                                         return enclosingInstance;
82                                 }
83                                 
84                         }
85                         internal AnonymousClassCheckAbort1(SegmentMerger enclosingInstance, Mono.Lucene.Net.Index.MergePolicy.OneMerge Param1, Mono.Lucene.Net.Store.Directory Param2):base(Param1, Param2)
86                         {
87                                 InitBlock(enclosingInstance);
88                         }
89                         public override void  Work(double units)
90                         {
91                                 // do nothing
92                         }
93                 }
94                 [Serializable]
95                 private class AnonymousClassFieldSelector : FieldSelector
96                 {
97                         public AnonymousClassFieldSelector(SegmentMerger enclosingInstance)
98                         {
99                                 InitBlock(enclosingInstance);
100                         }
101                         private void  InitBlock(SegmentMerger enclosingInstance)
102                         {
103                                 this.enclosingInstance = enclosingInstance;
104                         }
105                         private SegmentMerger enclosingInstance;
106                         public SegmentMerger Enclosing_Instance
107                         {
108                                 get
109                                 {
110                                         return enclosingInstance;
111                                 }
112                                 
113                         }
114                         public FieldSelectorResult Accept(System.String fieldName)
115                         {
116                                 return FieldSelectorResult.LOAD_FOR_MERGE;
117                         }
118                 }
119                 private void  InitBlock()
120                 {
121                         termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL;
122                 }
123                 
124                 /// <summary>norms header placeholder </summary>
125                 internal static readonly byte[] NORMS_HEADER = new byte[]{(byte) 'N', (byte) 'R', (byte) 'M', unchecked((byte) - 1)};
126                 
127                 private Directory directory;
128                 private System.String segment;
129                 private int termIndexInterval;
130                 
131                 private System.Collections.IList readers = new System.Collections.ArrayList();
132                 private FieldInfos fieldInfos;
133                 
134                 private int mergedDocs;
135                 
136                 private CheckAbort checkAbort;
137                 
138                 // Whether we should merge doc stores (stored fields and
139                 // vectors files).  When all segments we are merging
140                 // already share the same doc store files, we don't need
141                 // to merge the doc stores.
142                 private bool mergeDocStores;
143                 
144                 /// <summary>Maximum number of contiguous documents to bulk-copy
145                 /// when merging stored fields 
146                 /// </summary>
147                 private const int MAX_RAW_MERGE_DOCS = 4192;
148                 
149                 /// <summary>This ctor used only by test code.
150                 /// 
151                 /// </summary>
152                 /// <param name="dir">The Directory to merge the other segments into
153                 /// </param>
154                 /// <param name="name">The name of the new segment
155                 /// </param>
156                 public /*internal*/ SegmentMerger(Directory dir, System.String name)
157                 {
158                         InitBlock();
159                         directory = dir;
160                         segment = name;
161                         checkAbort = new AnonymousClassCheckAbort(this, null, null);
162                 }
163                 
164                 internal SegmentMerger(IndexWriter writer, System.String name, MergePolicy.OneMerge merge)
165                 {
166                         InitBlock();
167                         directory = writer.GetDirectory();
168                         segment = name;
169                         if (merge != null)
170                         {
171                                 checkAbort = new CheckAbort(merge, directory);
172                         }
173                         else
174                         {
175                                 checkAbort = new AnonymousClassCheckAbort1(this, null, null);
176                         }
177                         termIndexInterval = writer.GetTermIndexInterval();
178                 }
179                 
180                 internal bool HasProx()
181                 {
182                         return fieldInfos.HasProx();
183                 }
184                 
185                 /// <summary> Add an IndexReader to the collection of readers that are to be merged</summary>
186                 /// <param name="reader">
187                 /// </param>
188                 public /*internal*/ void  Add(IndexReader reader)
189                 {
190                         readers.Add(reader);
191                 }
192                 
193                 /// <summary> </summary>
194                 /// <param name="i">The index of the reader to return
195                 /// </param>
196                 /// <returns> The ith reader to be merged
197                 /// </returns>
198                 internal IndexReader SegmentReader(int i)
199                 {
200                         return (IndexReader) readers[i];
201                 }
202                 
203                 /// <summary> Merges the readers specified by the {@link #add} method into the directory passed to the constructor</summary>
204                 /// <returns> The number of documents that were merged
205                 /// </returns>
206                 /// <throws>  CorruptIndexException if the index is corrupt </throws>
207                 /// <throws>  IOException if there is a low-level IO error </throws>
208                 public /*internal*/ int Merge()
209                 {
210                         return Merge(true);
211                 }
212                 
213                 /// <summary> Merges the readers specified by the {@link #add} method
214                 /// into the directory passed to the constructor.
215                 /// </summary>
216                 /// <param name="mergeDocStores">if false, we will not merge the
217                 /// stored fields nor vectors files
218                 /// </param>
219                 /// <returns> The number of documents that were merged
220                 /// </returns>
221                 /// <throws>  CorruptIndexException if the index is corrupt </throws>
222                 /// <throws>  IOException if there is a low-level IO error </throws>
223                 internal int Merge(bool mergeDocStores)
224                 {
225                         
226                         this.mergeDocStores = mergeDocStores;
227                         
228                         // NOTE: it's important to add calls to
229                         // checkAbort.work(...) if you make any changes to this
230                         // method that will spend alot of time.  The frequency
231                         // of this check impacts how long
232                         // IndexWriter.close(false) takes to actually stop the
233                         // threads.
234                         
235                         mergedDocs = MergeFields();
236                         MergeTerms();
237                         MergeNorms();
238                         
239                         if (mergeDocStores && fieldInfos.HasVectors())
240                                 MergeVectors();
241                         
242                         return mergedDocs;
243                 }
244                 
245                 /// <summary> close all IndexReaders that have been added.
246                 /// Should not be called before merge().
247                 /// </summary>
248                 /// <throws>  IOException </throws>
249                 public /*internal*/ void  CloseReaders()
250                 {
251                         for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext(); )
252                         {
253                                 ((IndexReader) iter.Current).Close();
254                         }
255                 }
256
257         public /*internal*/ System.Collections.Generic.ICollection<string> GetMergedFiles()
258                 {
259             System.Collections.Generic.IDictionary<string,string> fileSet = new System.Collections.Generic.Dictionary<string,string>();
260                         
261                         // Basic files
262                         for (int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS.Length; i++)
263                         {
264                                 System.String ext = IndexFileNames.COMPOUND_EXTENSIONS[i];
265                                 
266                                 if (ext.Equals(IndexFileNames.PROX_EXTENSION) && !HasProx())
267                                         continue;
268                                 
269                                 if (mergeDocStores || (!ext.Equals(IndexFileNames.FIELDS_EXTENSION) && !ext.Equals(IndexFileNames.FIELDS_INDEX_EXTENSION)))
270                     fileSet[segment + "." + ext] = segment + "." + ext;
271                         }
272                         
273                         // Fieldable norm files
274                         for (int i = 0; i < fieldInfos.Size(); i++)
275                         {
276                                 FieldInfo fi = fieldInfos.FieldInfo(i);
277                                 if (fi.isIndexed && !fi.omitNorms)
278                                 {
279                     fileSet[segment + "." + IndexFileNames.NORMS_EXTENSION]=segment + "." + IndexFileNames.NORMS_EXTENSION;
280                                         break;
281                                 }
282                         }
283                         
284                         // Vector files
285                         if (fieldInfos.HasVectors() && mergeDocStores)
286                         {
287                                 for (int i = 0; i < IndexFileNames.VECTOR_EXTENSIONS.Length; i++)
288                                 {
289                     fileSet[segment + "." + IndexFileNames.VECTOR_EXTENSIONS[i]] = segment + "." + IndexFileNames.VECTOR_EXTENSIONS[i];
290                                 }
291                         }
292
293             return fileSet.Keys;
294         }
295
296         public /*internal*/ System.Collections.Generic.ICollection<string> CreateCompoundFile(System.String fileName)
297         {
298             System.Collections.Generic.ICollection<string> files = GetMergedFiles();
299             CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort);
300
301                         // Now merge all added files
302                         System.Collections.IEnumerator it = files.GetEnumerator();
303                         while (it.MoveNext())
304                         {
305                                 cfsWriter.AddFile((System.String) it.Current);
306                         }
307                         
308                         // Perform the merge
309                         cfsWriter.Close();
310
311             return files;
312                 }
313
314         private void AddIndexed(IndexReader reader, FieldInfos fInfos, System.Collections.Generic.ICollection<string> names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool storePayloads, bool omitTFAndPositions)
315                 {
316                         System.Collections.Generic.IEnumerator<string> i = names.GetEnumerator();
317                         while (i.MoveNext())
318                         {
319                 System.String field = i.Current;
320                                 fInfos.Add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.HasNorms(field), storePayloads, omitTFAndPositions);
321                         }
322                 }
323                 
324                 private SegmentReader[] matchingSegmentReaders;
325                 private int[] rawDocLengths;
326                 private int[] rawDocLengths2;
327                 
328                 private void  SetMatchingSegmentReaders()
329                 {
330                         // If the i'th reader is a SegmentReader and has
331                         // identical fieldName -> number mapping, then this
332                         // array will be non-null at position i:
333                         int numReaders = readers.Count;
334                         matchingSegmentReaders = new SegmentReader[numReaders];
335                         
336                         // If this reader is a SegmentReader, and all of its
337                         // field name -> number mappings match the "merged"
338                         // FieldInfos, then we can do a bulk copy of the
339                         // stored fields:
340                         for (int i = 0; i < numReaders; i++)
341                         {
342                                 IndexReader reader = (IndexReader) readers[i];
343                                 if (reader is SegmentReader)
344                                 {
345                                         SegmentReader segmentReader = (SegmentReader) reader;
346                                         bool same = true;
347                                         FieldInfos segmentFieldInfos = segmentReader.FieldInfos();
348                                         int numFieldInfos = segmentFieldInfos.Size();
349                                         for (int j = 0; same && j < numFieldInfos; j++)
350                                         {
351                                                 same = fieldInfos.FieldName(j).Equals(segmentFieldInfos.FieldName(j));
352                                         }
353                                         if (same)
354                                         {
355                                                 matchingSegmentReaders[i] = segmentReader;
356                                         }
357                                 }
358                         }
359                         
360                         // Used for bulk-reading raw bytes for stored fields
361                         rawDocLengths = new int[MAX_RAW_MERGE_DOCS];
362                         rawDocLengths2 = new int[MAX_RAW_MERGE_DOCS];
363                 }
364                 
365                 /// <summary> </summary>
366                 /// <returns> The number of documents in all of the readers
367                 /// </returns>
368                 /// <throws>  CorruptIndexException if the index is corrupt </throws>
369                 /// <throws>  IOException if there is a low-level IO error </throws>
370                 private int MergeFields()
371                 {
372                         
373                         if (!mergeDocStores)
374                         {
375                                 // When we are not merging by doc stores, that means
376                                 // all segments were written as part of a single
377                                 // autoCommit=false IndexWriter session, so their field
378                                 // name -> number mapping are the same.  So, we start
379                                 // with the fieldInfos of the last segment in this
380                                 // case, to keep that numbering.
381                                 SegmentReader sr = (SegmentReader) readers[readers.Count - 1];
382                                 fieldInfos = (FieldInfos) sr.core.fieldInfos.Clone();
383                         }
384                         else
385                         {
386                                 fieldInfos = new FieldInfos(); // merge field names
387                         }
388                         
389                         for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext(); )
390                         {
391                                 IndexReader reader = (IndexReader) iter.Current;
392                                 if (reader is SegmentReader)
393                                 {
394                                         SegmentReader segmentReader = (SegmentReader) reader;
395                                         FieldInfos readerFieldInfos = segmentReader.FieldInfos();
396                                         int numReaderFieldInfos = readerFieldInfos.Size();
397                                         for (int j = 0; j < numReaderFieldInfos; j++)
398                                         {
399                                                 FieldInfo fi = readerFieldInfos.FieldInfo(j);
400                                                 fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions);
401                                         }
402                                 }
403                                 else
404                                 {
405                                         AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
406                                         AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
407                                         AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
408                                         AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false);
409                                         AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true);
410                                         AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false);
411                                         AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.INDEXED), false, false, false, false, false);
412                                         fieldInfos.Add(reader.GetFieldNames(FieldOption.UNINDEXED), false);
413                                 }
414                         }
415                         fieldInfos.Write(directory, segment + ".fnm");
416                         
417                         int docCount = 0;
418                         
419                         SetMatchingSegmentReaders();
420                         
421                         if (mergeDocStores)
422                         {
423                                 
424                                 // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
425                                 // in  merge mode, we use this FieldSelector
426                                 FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);
427                                 
428                                 // merge field values
429                                 FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
430                                 
431                                 try
432                                 {
433                                         int idx = 0;
434                                         for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext(); )
435                                         {
436                                                 IndexReader reader = (IndexReader) iter.Current;
437                                                 SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
438                                                 FieldsReader matchingFieldsReader = null;
439                                                 if (matchingSegmentReader != null)
440                                                 {
441                                                         FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader();
442                                                         if (fieldsReader != null && fieldsReader.CanReadRawDocs())
443                                                         {
444                                                                 matchingFieldsReader = fieldsReader;
445                                                         }
446                                                 }
447                                                 if (reader.HasDeletions())
448                                                 {
449                                                         docCount += CopyFieldsWithDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader);
450                                                 }
451                                                 else
452                                                 {
453                                                         docCount += CopyFieldsNoDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader);
454                                                 }
455                                         }
456                                 }
457                                 finally
458                                 {
459                                         fieldsWriter.Close();
460                                 }
461                                 
462                                 System.String fileName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
463                                 long fdxFileLength = directory.FileLength(fileName);
464                                 
465                                 if (4 + ((long) docCount) * 8 != fdxFileLength)
466                                 // This is most likely a bug in Sun JRE 1.6.0_04/_05;
467                                 // we detect that the bug has struck, here, and
468                                 // throw an exception to prevent the corruption from
469                                 // entering the index.  See LUCENE-1282 for
470                                 // details.
471                                         throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption");
472                         }
473                         // If we are skipping the doc stores, that means there
474                         // are no deletions in any of these segments, so we
475                         // just sum numDocs() of each segment to get total docCount
476                         else
477                         {
478                                 for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext(); )
479                                 {
480                                         docCount += ((IndexReader) iter.Current).NumDocs();
481                                 }
482                         }
483                         
484                         return docCount;
485                 }
486                 
487                 private int CopyFieldsWithDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
488                 {
489                         int docCount = 0;
490                         int maxDoc = reader.MaxDoc();
491                         if (matchingFieldsReader != null)
492                         {
493                                 // We can bulk-copy because the fieldInfos are "congruent"
494                                 for (int j = 0; j < maxDoc; )
495                                 {
496                                         if (reader.IsDeleted(j))
497                                         {
498                                                 // skip deleted docs
499                                                 ++j;
500                                                 continue;
501                                         }
502                                         // We can optimize this case (doing a bulk byte copy) since the field 
503                                         // numbers are identical
504                                         int start = j, numDocs = 0;
505                                         do 
506                                         {
507                                                 j++;
508                                                 numDocs++;
509                                                 if (j >= maxDoc)
510                                                         break;
511                                                 if (reader.IsDeleted(j))
512                                                 {
513                                                         j++;
514                                                         break;
515                                                 }
516                                         }
517                                         while (numDocs < MAX_RAW_MERGE_DOCS);
518                                         
519                                         IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
520                                         fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
521                                         docCount += numDocs;
522                                         checkAbort.Work(300 * numDocs);
523                                 }
524                         }
525                         else
526                         {
527                                 for (int j = 0; j < maxDoc; j++)
528                                 {
529                                         if (reader.IsDeleted(j))
530                                         {
531                                                 // skip deleted docs
532                                                 continue;
533                                         }
534                                         // NOTE: it's very important to first assign to doc then pass it to
535                                         // termVectorsWriter.addAllDocVectors; see LUCENE-1282
536                                         Document doc = reader.Document(j, fieldSelectorMerge);
537                                         fieldsWriter.AddDocument(doc);
538                                         docCount++;
539                                         checkAbort.Work(300);
540                                 }
541                         }
542                         return docCount;
543                 }
544                 
545                 private int CopyFieldsNoDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
546                 {
547                         int maxDoc = reader.MaxDoc();
548                         int docCount = 0;
549                         if (matchingFieldsReader != null)
550                         {
551                                 // We can bulk-copy because the fieldInfos are "congruent"
552                                 while (docCount < maxDoc)
553                                 {
554                                         int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
555                                         IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len);
556                                         fieldsWriter.AddRawDocuments(stream, rawDocLengths, len);
557                                         docCount += len;
558                                         checkAbort.Work(300 * len);
559                                 }
560                         }
561                         else
562                         {
563                                 for (; docCount < maxDoc; docCount++)
564                                 {
565                                         // NOTE: it's very important to first assign to doc then pass it to
566                                         // termVectorsWriter.addAllDocVectors; see LUCENE-1282
567                                         Document doc = reader.Document(docCount, fieldSelectorMerge);
568                                         fieldsWriter.AddDocument(doc);
569                                         checkAbort.Work(300);
570                                 }
571                         }
572                         return docCount;
573                 }
574                 
575                 /// <summary> Merge the TermVectors from each of the segments into the new one.</summary>
576                 /// <throws>  IOException </throws>
577                 private void  MergeVectors()
578                 {
579                         TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos);
580                         
581                         try
582                         {
583                                 int idx = 0;
584                                 for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext(); )
585                                 {
586                                         SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
587                                         TermVectorsReader matchingVectorsReader = null;
588                                         if (matchingSegmentReader != null)
589                                         {
590                                                 TermVectorsReader vectorsReader = matchingSegmentReader.GetTermVectorsReaderOrig();
591                                                 
592                                                 // If the TV* files are an older format then they cannot read raw docs:
593                                                 if (vectorsReader != null && vectorsReader.CanReadRawDocs())
594                                                 {
595                                                         matchingVectorsReader = vectorsReader;
596                                                 }
597                                         }
598                                         IndexReader reader = (IndexReader) iter.Current;
599                                         if (reader.HasDeletions())
600                                         {
601                                                 CopyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, reader);
602                                         }
603                                         else
604                                         {
605                                                 CopyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, reader);
606                                         }
607                                 }
608                         }
609                         finally
610                         {
611                                 termVectorsWriter.Close();
612                         }
613                         
614                         System.String fileName = segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION;
615                         long tvxSize = directory.FileLength(fileName);
616                         
617                         if (4 + ((long) mergedDocs) * 16 != tvxSize)
618                         // This is most likely a bug in Sun JRE 1.6.0_04/_05;
619                         // we detect that the bug has struck, here, and
620                         // throw an exception to prevent the corruption from
621                         // entering the index.  See LUCENE-1282 for
622                         // details.
623                                 throw new System.SystemException("mergeVectors produced an invalid result: mergedDocs is " + mergedDocs + " but tvx size is " + tvxSize + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption");
624                 }
625                 
626                 private void  CopyVectorsWithDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader)
627                 {
628                         int maxDoc = reader.MaxDoc();
629                         if (matchingVectorsReader != null)
630                         {
631                                 // We can bulk-copy because the fieldInfos are "congruent"
632                                 for (int docNum = 0; docNum < maxDoc; )
633                                 {
634                                         if (reader.IsDeleted(docNum))
635                                         {
636                                                 // skip deleted docs
637                                                 ++docNum;
638                                                 continue;
639                                         }
640                                         // We can optimize this case (doing a bulk byte copy) since the field 
641                                         // numbers are identical
642                                         int start = docNum, numDocs = 0;
643                                         do 
644                                         {
645                                                 docNum++;
646                                                 numDocs++;
647                                                 if (docNum >= maxDoc)
648                                                         break;
649                                                 if (reader.IsDeleted(docNum))
650                                                 {
651                                                         docNum++;
652                                                         break;
653                                                 }
654                                         }
655                                         while (numDocs < MAX_RAW_MERGE_DOCS);
656                                         
657                                         matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
658                                         termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
659                                         checkAbort.Work(300 * numDocs);
660                                 }
661                         }
662                         else
663                         {
664                                 for (int docNum = 0; docNum < maxDoc; docNum++)
665                                 {
666                                         if (reader.IsDeleted(docNum))
667                                         {
668                                                 // skip deleted docs
669                                                 continue;
670                                         }
671                                         
672                                         // NOTE: it's very important to first assign to vectors then pass it to
673                                         // termVectorsWriter.addAllDocVectors; see LUCENE-1282
674                                         TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
675                                         termVectorsWriter.AddAllDocVectors(vectors);
676                                         checkAbort.Work(300);
677                                 }
678                         }
679                 }
680                 
681                 private void  CopyVectorsNoDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader)
682                 {
683                         int maxDoc = reader.MaxDoc();
684                         if (matchingVectorsReader != null)
685                         {
686                                 // We can bulk-copy because the fieldInfos are "congruent"
687                                 int docCount = 0;
688                                 while (docCount < maxDoc)
689                                 {
690                                         int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
691                                         matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len);
692                                         termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len);
693                                         docCount += len;
694                                         checkAbort.Work(300 * len);
695                                 }
696                         }
697                         else
698                         {
699                                 for (int docNum = 0; docNum < maxDoc; docNum++)
700                                 {
701                                         // NOTE: it's very important to first assign to vectors then pass it to
702                                         // termVectorsWriter.addAllDocVectors; see LUCENE-1282
703                                         TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
704                                         termVectorsWriter.AddAllDocVectors(vectors);
705                                         checkAbort.Work(300);
706                                 }
707                         }
708                 }
709                 
710                 private SegmentMergeQueue queue = null;
711                 
712                 private void  MergeTerms()
713                 {
714                         
715                         SegmentWriteState state = new SegmentWriteState(null, directory, segment, null, mergedDocs, 0, termIndexInterval);
716                         
717                         FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos);
718                         
719                         try
720                         {
721                                 queue = new SegmentMergeQueue(readers.Count);
722                                 
723                                 MergeTermInfos(consumer);
724                         }
725                         finally
726                         {
727                                 consumer.Finish();
728                                 if (queue != null)
729                                         queue.Close();
730                         }
731                 }
732                 
733                 internal bool omitTermFreqAndPositions;
734                 
735                 private void  MergeTermInfos(FormatPostingsFieldsConsumer consumer)
736                 {
737                         int base_Renamed = 0;
738                         int readerCount = readers.Count;
739                         for (int i = 0; i < readerCount; i++)
740                         {
741                                 IndexReader reader = (IndexReader) readers[i];
742                                 TermEnum termEnum = reader.Terms();
743                                 SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader);
744                                 int[] docMap = smi.GetDocMap();
745                                 if (docMap != null)
746                                 {
747                                         if (docMaps == null)
748                                         {
749                                                 docMaps = new int[readerCount][];
750                                                 delCounts = new int[readerCount];
751                                         }
752                                         docMaps[i] = docMap;
753                                         delCounts[i] = smi.reader.MaxDoc() - smi.reader.NumDocs();
754                                 }
755                                 
756                                 base_Renamed += reader.NumDocs();
757                                 
758                                 System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc() - smi.delCount);
759                                 
760                                 if (smi.Next())
761                                         queue.Add(smi);
762                                 // initialize queue
763                                 else
764                                         smi.Close();
765                         }
766                         
767                         SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count];
768                         
769                         System.String currentField = null;
770                         FormatPostingsTermsConsumer termsConsumer = null;
771                         
772                         while (queue.Size() > 0)
773                         {
774                                 int matchSize = 0; // pop matching terms
775                                 match[matchSize++] = (SegmentMergeInfo) queue.Pop();
776                                 Term term = match[0].term;
777                                 SegmentMergeInfo top = (SegmentMergeInfo) queue.Top();
778                                 
779                                 while (top != null && term.CompareTo(top.term) == 0)
780                                 {
781                                         match[matchSize++] = (SegmentMergeInfo) queue.Pop();
782                                         top = (SegmentMergeInfo) queue.Top();
783                                 }
784                                 
785                                 if ((System.Object) currentField != (System.Object) term.field)
786                                 {
787                                         currentField = term.field;
788                                         if (termsConsumer != null)
789                                                 termsConsumer.Finish();
790                                         FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField);
791                                         termsConsumer = consumer.AddField(fieldInfo);
792                                         omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
793                                 }
794                                 
795                                 int df = AppendPostings(termsConsumer, match, matchSize); // add new TermInfo
796                                 
797                                 checkAbort.Work(df / 3.0);
798                                 
799                                 while (matchSize > 0)
800                                 {
801                                         SegmentMergeInfo smi = match[--matchSize];
802                                         if (smi.Next())
803                                                 queue.Add(smi);
804                                         // restore queue
805                                         else
806                                                 smi.Close(); // done with a segment
807                                 }
808                         }
809                 }
810                 
811                 private byte[] payloadBuffer;
812                 private int[][] docMaps;
813                 internal int[][] GetDocMaps()
814                 {
815                         return docMaps;
816                 }
817                 private int[] delCounts;
818                 internal int[] GetDelCounts()
819                 {
820                         return delCounts;
821                 }
822                 
823                 /// <summary>Process postings from multiple segments all positioned on the
824                 /// same term. Writes out merged entries into freqOutput and
825                 /// the proxOutput streams.
826                 /// 
827                 /// </summary>
828                 /// <param name="smis">array of segments
829                 /// </param>
830                 /// <param name="n">number of cells in the array actually occupied
831                 /// </param>
832                 /// <returns> number of documents across all segments where this term was found
833                 /// </returns>
834                 /// <throws>  CorruptIndexException if the index is corrupt </throws>
835                 /// <throws>  IOException if there is a low-level IO error </throws>
836                 private int AppendPostings(FormatPostingsTermsConsumer termsConsumer, SegmentMergeInfo[] smis, int n)
837                 {
838                         
839                         FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(smis[0].term.text);
840                         int df = 0;
841                         for (int i = 0; i < n; i++)
842                         {
843                                 SegmentMergeInfo smi = smis[i];
844                                 TermPositions postings = smi.GetPositions();
845                                 System.Diagnostics.Debug.Assert(postings != null);
846                                 int base_Renamed = smi.base_Renamed;
847                                 int[] docMap = smi.GetDocMap();
848                                 postings.Seek(smi.termEnum);
849                                 
850                                 while (postings.Next())
851                                 {
852                                         df++;
853                                         int doc = postings.Doc();
854                                         if (docMap != null)
855                                                 doc = docMap[doc]; // map around deletions
856                                         doc += base_Renamed; // convert to merged space
857                                         
858                                         int freq = postings.Freq();
859                                         FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(doc, freq);
860                                         
861                                         if (!omitTermFreqAndPositions)
862                                         {
863                                                 for (int j = 0; j < freq; j++)
864                                                 {
865                                                         int position = postings.NextPosition();
866                                                         int payloadLength = postings.GetPayloadLength();
867                                                         if (payloadLength > 0)
868                                                         {
869                                                                 if (payloadBuffer == null || payloadBuffer.Length < payloadLength)
870                                                                         payloadBuffer = new byte[payloadLength];
871                                                                 postings.GetPayload(payloadBuffer, 0);
872                                                         }
873                                                         posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength);
874                                                 }
875                                                 posConsumer.Finish();
876                                         }
877                                 }
878                         }
879                         docConsumer.Finish();
880                         
881                         return df;
882                 }
883                 
884                 private void  MergeNorms()
885                 {
886                         byte[] normBuffer = null;
887                         IndexOutput output = null;
888                         try
889                         {
890                                 int numFieldInfos = fieldInfos.Size();
891                                 for (int i = 0; i < numFieldInfos; i++)
892                                 {
893                                         FieldInfo fi = fieldInfos.FieldInfo(i);
894                                         if (fi.isIndexed && !fi.omitNorms)
895                                         {
896                                                 if (output == null)
897                                                 {
898                                                         output = directory.CreateOutput(segment + "." + IndexFileNames.NORMS_EXTENSION);
899                                                         output.WriteBytes(NORMS_HEADER, NORMS_HEADER.Length);
900                                                 }
901                                                 for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext(); )
902                                                 {
903                                                         IndexReader reader = (IndexReader) iter.Current;
904                                                         int maxDoc = reader.MaxDoc();
905                                                         if (normBuffer == null || normBuffer.Length < maxDoc)
906                                                         {
907                                                                 // the buffer is too small for the current segment
908                                                                 normBuffer = new byte[maxDoc];
909                                                         }
910                                                         reader.Norms(fi.name, normBuffer, 0);
911                                                         if (!reader.HasDeletions())
912                                                         {
913                                                                 //optimized case for segments without deleted docs
914                                                                 output.WriteBytes(normBuffer, maxDoc);
915                                                         }
916                                                         else
917                                                         {
918                                                                 // this segment has deleted docs, so we have to
919                                                                 // check for every doc if it is deleted or not
920                                                                 for (int k = 0; k < maxDoc; k++)
921                                                                 {
922                                                                         if (!reader.IsDeleted(k))
923                                                                         {
924                                                                                 output.WriteByte(normBuffer[k]);
925                                                                         }
926                                                                 }
927                                                         }
928                                                         checkAbort.Work(maxDoc);
929                                                 }
930                                         }
931                                 }
932                         }
933                         finally
934                         {
935                                 if (output != null)
936                                 {
937                                         output.Close();
938                                 }
939                         }
940                 }
941                 
942                 internal class CheckAbort
943                 {
944                         private double workCount;
945                         private MergePolicy.OneMerge merge;
946                         private Directory dir;
947                         public CheckAbort(MergePolicy.OneMerge merge, Directory dir)
948                         {
949                                 this.merge = merge;
950                                 this.dir = dir;
951                         }
952                         
953                         /// <summary> Records the fact that roughly units amount of work
954                         /// have been done since this method was last called.
955                         /// When adding time-consuming code into SegmentMerger,
956                         /// you should test different values for units to ensure
957                         /// that the time in between calls to merge.checkAborted
958                         /// is up to ~ 1 second.
959                         /// </summary>
960                         public virtual void  Work(double units)
961                         {
962                                 workCount += units;
963                                 if (workCount >= 10000.0)
964                                 {
965                                         merge.CheckAborted(dir);
966                                         workCount = 0;
967                                 }
968                         }
969                 }
970         }
971 }