[monkeydoc] Merge/add monkeydoc to master.
[mono.git] / mcs / tools / monkeydoc / Lucene.Net / Lucene.Net / Index / CheckIndex.cs
1 /* 
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  * 
9  * http://www.apache.org/licenses/LICENSE-2.0
10  * 
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 using System;
19
20 using AbstractField = Mono.Lucene.Net.Documents.AbstractField;
21 using Document = Mono.Lucene.Net.Documents.Document;
22 using Directory = Mono.Lucene.Net.Store.Directory;
23 using FSDirectory = Mono.Lucene.Net.Store.FSDirectory;
24 using IndexInput = Mono.Lucene.Net.Store.IndexInput;
25
26 namespace Mono.Lucene.Net.Index
27 {
28         
29         /// <summary> Basic tool and API to check the health of an index and
30         /// write a new segments file that removes reference to
31         /// problematic segments.
32         /// 
33         /// <p/>As this tool checks every byte in the index, on a large
34         /// index it can take quite a long time to run.
35         /// 
36         /// <p/><b>WARNING</b>: this tool and API is new and
37         /// experimental and is subject to suddenly change in the
38         /// next release.  Please make a complete backup of your
39         /// index before using this to fix your index!
40         /// </summary>
41         public class CheckIndex
42         {
43                 
44                 /// <summary>Default PrintStream for all CheckIndex instances.</summary>
45                 /// <deprecated> Use {@link #setInfoStream} per instance,
46                 /// instead. 
47                 /// </deprecated>
48         [Obsolete("Use SetInfoStream per instance,instead.")]
49                 public static System.IO.StreamWriter out_Renamed = null;
50                 
51                 private System.IO.StreamWriter infoStream;
52                 private Directory dir;
53                 
54                 /// <summary> Returned from {@link #CheckIndex()} detailing the health and status of the index.
55                 /// 
56                 /// <p/><b>WARNING</b>: this API is new and experimental and is
57                 /// subject to suddenly change in the next release.
58                 /// 
59                 /// </summary>
60                 
61                 public class Status
62                 {
63                         
64                         /// <summary>True if no problems were found with the index. </summary>
65                         public bool clean;
66                         
67                         /// <summary>True if we were unable to locate and load the segments_N file. </summary>
68                         public bool missingSegments;
69                         
70                         /// <summary>True if we were unable to open the segments_N file. </summary>
71                         public bool cantOpenSegments;
72                         
73                         /// <summary>True if we were unable to read the version number from segments_N file. </summary>
74                         public bool missingSegmentVersion;
75                         
76                         /// <summary>Name of latest segments_N file in the index. </summary>
77                         public System.String segmentsFileName;
78                         
79                         /// <summary>Number of segments in the index. </summary>
80                         public int numSegments;
81                         
82                         /// <summary>String description of the version of the index. </summary>
83                         public System.String segmentFormat;
84                         
85                         /// <summary>Empty unless you passed specific segments list to check as optional 3rd argument.</summary>
86                         /// <seealso cref="CheckIndex.CheckIndex(List)">
87                         /// </seealso>
88                         public System.Collections.IList segmentsChecked = new System.Collections.ArrayList();
89                         
90                         /// <summary>True if the index was created with a newer version of Lucene than the CheckIndex tool. </summary>
91                         public bool toolOutOfDate;
92                         
93                         /// <summary>List of {@link SegmentInfoStatus} instances, detailing status of each segment. </summary>
94                         public System.Collections.IList segmentInfos = new System.Collections.ArrayList();
95                         
96                         /// <summary>Directory index is in. </summary>
97                         public Directory dir;
98                         
99                         /// <summary> SegmentInfos instance containing only segments that
100                         /// had no problems (this is used with the {@link CheckIndex#fixIndex} 
101                         /// method to repair the index. 
102                         /// </summary>
103                         internal SegmentInfos newSegments;
104                         
105                         /// <summary>How many documents will be lost to bad segments. </summary>
106                         public int totLoseDocCount;
107                         
108                         /// <summary>How many bad segments were found. </summary>
109                         public int numBadSegments;
110                         
111                         /// <summary>True if we checked only specific segments ({@link
112                         /// #CheckIndex(List)}) was called with non-null
113                         /// argument). 
114                         /// </summary>
115                         public bool partial;
116                         
117                         /// <summary>Holds the userData of the last commit in the index </summary>
118             public System.Collections.Generic.IDictionary<string, string> userData;
119                         
120                         /// <summary>Holds the status of each segment in the index.
121                         /// See {@link #segmentInfos}.
122                         /// 
123                         /// <p/><b>WARNING</b>: this API is new and experimental and is
124                         /// subject to suddenly change in the next release.
125                         /// </summary>
126                         public class SegmentInfoStatus
127                         {
128                                 /// <summary>Name of the segment. </summary>
129                                 public System.String name;
130                                 
131                                 /// <summary>Document count (does not take deletions into account). </summary>
132                                 public int docCount;
133                                 
134                                 /// <summary>True if segment is compound file format. </summary>
135                                 public bool compound;
136                                 
137                                 /// <summary>Number of files referenced by this segment. </summary>
138                                 public int numFiles;
139                                 
140                                 /// <summary>Net size (MB) of the files referenced by this
141                                 /// segment. 
142                                 /// </summary>
143                                 public double sizeMB;
144                                 
145                                 /// <summary>Doc store offset, if this segment shares the doc
146                                 /// store files (stored fields and term vectors) with
147                                 /// other segments.  This is -1 if it does not share. 
148                                 /// </summary>
149                                 public int docStoreOffset = - 1;
150                                 
151                                 /// <summary>String of the shared doc store segment, or null if
152                                 /// this segment does not share the doc store files. 
153                                 /// </summary>
154                                 public System.String docStoreSegment;
155                                 
156                                 /// <summary>True if the shared doc store files are compound file
157                                 /// format. 
158                                 /// </summary>
159                                 public bool docStoreCompoundFile;
160                                 
161                                 /// <summary>True if this segment has pending deletions. </summary>
162                                 public bool hasDeletions;
163                                 
164                                 /// <summary>Name of the current deletions file name. </summary>
165                                 public System.String deletionsFileName;
166                                 
167                                 /// <summary>Number of deleted documents. </summary>
168                                 public int numDeleted;
169                                 
170                                 /// <summary>True if we were able to open a SegmentReader on this
171                                 /// segment. 
172                                 /// </summary>
173                                 public bool openReaderPassed;
174                                 
175                                 /// <summary>Number of fields in this segment. </summary>
176                                 internal int numFields;
177                                 
178                                 /// <summary>True if at least one of the fields in this segment
179                                 /// does not omitTermFreqAndPositions.
180                                 /// </summary>
181                                 /// <seealso cref="AbstractField.setOmitTermFreqAndPositions">
182                                 /// </seealso>
183                                 public bool hasProx;
184
185                 /// <summary>Map&lt;String, String&gt; that includes certain
186                                 /// debugging details that IndexWriter records into
187                                 /// each segment it creates 
188                                 /// </summary>
189                 public System.Collections.Generic.IDictionary<string, string> diagnostics;
190                                 
191                                 /// <summary>Status for testing of field norms (null if field norms could not be tested). </summary>
192                                 public FieldNormStatus fieldNormStatus;
193                                 
194                                 /// <summary>Status for testing of indexed terms (null if indexed terms could not be tested). </summary>
195                                 public TermIndexStatus termIndexStatus;
196                                 
197                                 /// <summary>Status for testing of stored fields (null if stored fields could not be tested). </summary>
198                                 public StoredFieldStatus storedFieldStatus;
199                                 
200                                 /// <summary>Status for testing of term vectors (null if term vectors could not be tested). </summary>
201                                 public TermVectorStatus termVectorStatus;
202                         }
203                         
204                         /// <summary> Status from testing field norms.</summary>
205                         public sealed class FieldNormStatus
206                         {
207                                 /// <summary>Number of fields successfully tested </summary>
208                                 public long totFields = 0L;
209                                 
210                                 /// <summary>Exception thrown during term index test (null on success) </summary>
211                                 public System.Exception error = null;
212                         }
213                         
214                         /// <summary> Status from testing term index.</summary>
215                         public sealed class TermIndexStatus
216                         {
217                                 /// <summary>Total term count </summary>
218                                 public long termCount = 0L;
219                                 
220                                 /// <summary>Total frequency across all terms. </summary>
221                                 public long totFreq = 0L;
222                                 
223                                 /// <summary>Total number of positions. </summary>
224                                 public long totPos = 0L;
225                                 
226                                 /// <summary>Exception thrown during term index test (null on success) </summary>
227                                 public System.Exception error = null;
228                         }
229                         
230                         /// <summary> Status from testing stored fields.</summary>
231                         public sealed class StoredFieldStatus
232                         {
233                                 
234                                 /// <summary>Number of documents tested. </summary>
235                                 public int docCount = 0;
236                                 
237                                 /// <summary>Total number of stored fields tested. </summary>
238                                 public long totFields = 0;
239                                 
240                                 /// <summary>Exception thrown during stored fields test (null on success) </summary>
241                                 public System.Exception error = null;
242                         }
243                         
244                         /// <summary> Status from testing stored fields.</summary>
245                         public sealed class TermVectorStatus
246                         {
247                                 
248                                 /// <summary>Number of documents tested. </summary>
249                                 public int docCount = 0;
250                                 
251                                 /// <summary>Total number of term vectors tested. </summary>
252                                 public long totVectors = 0;
253                                 
254                                 /// <summary>Exception thrown during term vector test (null on success) </summary>
255                                 public System.Exception error = null;
256                         }
257                 }
258                 
259                 /// <summary>Create a new CheckIndex on the directory. </summary>
260                 public CheckIndex(Directory dir)
261                 {
262                         this.dir = dir;
263                         infoStream = out_Renamed;
264                 }
265                 
266                 /// <summary>Set infoStream where messages should go.  If null, no
267                 /// messages are printed 
268                 /// </summary>
269                 public virtual void  SetInfoStream(System.IO.StreamWriter out_Renamed)
270                 {
271                         infoStream = out_Renamed;
272                 }
273                 
274                 private void  Msg(System.String msg)
275                 {
276                         if (infoStream != null)
277                                 infoStream.WriteLine(msg);
278                 }
279                 
280                 private class MySegmentTermDocs:SegmentTermDocs
281                 {
282                         
283                         internal int delCount;
284                         
285                         internal MySegmentTermDocs(SegmentReader p):base(p)
286                         {
287                         }
288                         
289                         public override void  Seek(Term term)
290                         {
291                                 base.Seek(term);
292                                 delCount = 0;
293                         }
294                         
295                         protected internal override void  SkippingDoc()
296                         {
297                                 delCount++;
298                         }
299                 }
300                 
301                 /// <summary>Returns true if index is clean, else false. </summary>
302                 /// <deprecated> Please instantiate a CheckIndex and then use {@link #CheckIndex()} instead 
303                 /// </deprecated>
304         [Obsolete("Please instantiate a CheckIndex and then use CheckIndex() instead")]
305                 public static bool Check(Directory dir, bool doFix)
306                 {
307                         return Check(dir, doFix, null);
308                 }
309                 
310                 /// <summary>Returns true if index is clean, else false.</summary>
311                 /// <deprecated> Please instantiate a CheckIndex and then use {@link #CheckIndex(List)} instead 
312                 /// </deprecated>
313         [Obsolete("Please instantiate a CheckIndex and then use CheckIndex(List) instead")]
314                 public static bool Check(Directory dir, bool doFix, System.Collections.IList onlySegments)
315                 {
316                         CheckIndex checker = new CheckIndex(dir);
317                         Status status = checker.CheckIndex_Renamed_Method(onlySegments);
318                         if (doFix && !status.clean)
319                                 checker.FixIndex(status);
320                         
321                         return status.clean;
322                 }
323                 
324                 /// <summary>Returns a {@link Status} instance detailing
325                 /// the state of the index.
326                 /// 
327                 /// <p/>As this method checks every byte in the index, on a large
328                 /// index it can take quite a long time to run.
329                 /// 
330                 /// <p/><b>WARNING</b>: make sure
331                 /// you only call this when the index is not opened by any
332                 /// writer. 
333                 /// </summary>
334                 public virtual Status CheckIndex_Renamed_Method()
335                 {
336                         return CheckIndex_Renamed_Method(null);
337                 }
338                 
339                 /// <summary>Returns a {@link Status} instance detailing
340                 /// the state of the index.
341                 /// 
342                 /// </summary>
343                 /// <param name="onlySegments">list of specific segment names to check
344                 /// 
345                 /// <p/>As this method checks every byte in the specified
346                 /// segments, on a large index it can take quite a long
347                 /// time to run.
348                 /// 
349                 /// <p/><b>WARNING</b>: make sure
350                 /// you only call this when the index is not opened by any
351                 /// writer. 
352                 /// </param>
353                 public virtual Status CheckIndex_Renamed_Method(System.Collections.IList onlySegments)
354                 {
355             System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
356                         SegmentInfos sis = new SegmentInfos();
357                         Status result = new Status();
358                         result.dir = dir;
359                         try
360                         {
361                                 sis.Read(dir);
362                         }
363                         catch (System.Exception t)
364                         {
365                                 Msg("ERROR: could not read any segments file in directory");
366                                 result.missingSegments = true;
367                                 if (infoStream != null)
368                                         infoStream.WriteLine(t.StackTrace);
369                                 return result;
370                         }
371                         
372                         int numSegments = sis.Count;
373                         System.String segmentsFileName = sis.GetCurrentSegmentFileName();
374                         IndexInput input = null;
375                         try
376                         {
377                                 input = dir.OpenInput(segmentsFileName);
378                         }
379                         catch (System.Exception t)
380                         {
381                                 Msg("ERROR: could not open segments file in directory");
382                                 if (infoStream != null)
383                                         infoStream.WriteLine(t.StackTrace);
384                                 result.cantOpenSegments = true;
385                                 return result;
386                         }
387                         int format = 0;
388                         try
389                         {
390                                 format = input.ReadInt();
391                         }
392                         catch (System.Exception t)
393                         {
394                                 Msg("ERROR: could not read segment file version in directory");
395                                 if (infoStream != null)
396                                         infoStream.WriteLine(t.StackTrace);
397                                 result.missingSegmentVersion = true;
398                                 return result;
399                         }
400                         finally
401                         {
402                                 if (input != null)
403                                         input.Close();
404                         }
405                         
406                         System.String sFormat = "";
407                         bool skip = false;
408                         
409                         if (format == SegmentInfos.FORMAT)
410                                 sFormat = "FORMAT [Lucene Pre-2.1]";
411                         if (format == SegmentInfos.FORMAT_LOCKLESS)
412                                 sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
413                         else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
414                                 sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
415                         else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
416                                 sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
417                         else
418                         {
419                                 if (format == SegmentInfos.FORMAT_CHECKSUM)
420                                         sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
421                                 else if (format == SegmentInfos.FORMAT_DEL_COUNT)
422                                         sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
423                                 else if (format == SegmentInfos.FORMAT_HAS_PROX)
424                                         sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
425                                 else if (format == SegmentInfos.FORMAT_USER_DATA)
426                                         sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
427                                 else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
428                                         sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
429                                 else if (format < SegmentInfos.CURRENT_FORMAT)
430                                 {
431                                         sFormat = "int=" + format + " [newer version of Lucene than this tool]";
432                                         skip = true;
433                                 }
434                                 else
435                                 {
436                                         sFormat = format + " [Lucene 1.3 or prior]";
437                                 }
438                         }
439                         
440                         result.segmentsFileName = segmentsFileName;
441                         result.numSegments = numSegments;
442                         result.segmentFormat = sFormat;
443                         result.userData = sis.GetUserData();
444                         System.String userDataString;
445                         if (sis.GetUserData().Count > 0)
446                         {
447                                 userDataString = " userData=" + SupportClass.CollectionsHelper.CollectionToString(sis.GetUserData());
448                         }
449                         else
450                         {
451                                 userDataString = "";
452                         }
453                         
454                         Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);
455                         
456                         if (onlySegments != null)
457                         {
458                                 result.partial = true;
459                                 if (infoStream != null)
460                                         infoStream.Write("\nChecking only these segments:");
461                                 System.Collections.IEnumerator it = onlySegments.GetEnumerator();
462                                 while (it.MoveNext())
463                                 {
464                                         if (infoStream != null)
465                                         {
466                                                 infoStream.Write(" " + it.Current);
467                                         }
468                                 }
469                 System.Collections.IEnumerator e = onlySegments.GetEnumerator();
470                 while (e.MoveNext() == true)
471                 {
472                     result.segmentsChecked.Add(e.Current);
473                 }
474                 Msg(":");
475                         }
476                         
477                         if (skip)
478                         {
479                                 Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
480                                 result.toolOutOfDate = true;
481                                 return result;
482                         }
483                         
484                         
485                         result.newSegments = (SegmentInfos) sis.Clone();
486                         result.newSegments.Clear();
487                         
488                         for (int i = 0; i < numSegments; i++)
489                         {
490                                 SegmentInfo info = sis.Info(i);
491                                 if (onlySegments != null && !onlySegments.Contains(info.name))
492                                         continue;
493                                 Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
494                                 result.segmentInfos.Add(segInfoStat);
495                                 Msg("  " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
496                                 segInfoStat.name = info.name;
497                                 segInfoStat.docCount = info.docCount;
498                                 
499                                 int toLoseDocCount = info.docCount;
500                                 
501                                 SegmentReader reader = null;
502                                 
503                                 try
504                                 {
505                                         Msg("    compound=" + info.GetUseCompoundFile());
506                                         segInfoStat.compound = info.GetUseCompoundFile();
507                                         Msg("    hasProx=" + info.GetHasProx());
508                                         segInfoStat.hasProx = info.GetHasProx();
509                                         Msg("    numFiles=" + info.Files().Count);
510                                         segInfoStat.numFiles = info.Files().Count;
511                                         Msg(System.String.Format(nf, "    size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
512                                         segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
513                     System.Collections.Generic.IDictionary<string, string> diagnostics = info.GetDiagnostics();
514                                         segInfoStat.diagnostics = diagnostics;
515                                         if (diagnostics.Count > 0)
516                                         {
517                                                 Msg("    diagnostics = " + SupportClass.CollectionsHelper.CollectionToString(diagnostics));
518                                         }
519                                         
520                                         int docStoreOffset = info.GetDocStoreOffset();
521                                         if (docStoreOffset != - 1)
522                                         {
523                                                 Msg("    docStoreOffset=" + docStoreOffset);
524                                                 segInfoStat.docStoreOffset = docStoreOffset;
525                                                 Msg("    docStoreSegment=" + info.GetDocStoreSegment());
526                                                 segInfoStat.docStoreSegment = info.GetDocStoreSegment();
527                                                 Msg("    docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile());
528                                                 segInfoStat.docStoreCompoundFile = info.GetDocStoreIsCompoundFile();
529                                         }
530                                         System.String delFileName = info.GetDelFileName();
531                                         if (delFileName == null)
532                                         {
533                                                 Msg("    no deletions");
534                                                 segInfoStat.hasDeletions = false;
535                                         }
536                                         else
537                                         {
538                                                 Msg("    has deletions [delFileName=" + delFileName + "]");
539                                                 segInfoStat.hasDeletions = true;
540                                                 segInfoStat.deletionsFileName = delFileName;
541                                         }
542                                         if (infoStream != null)
543                                                 infoStream.Write("    test: open reader.........");
544                                         reader = SegmentReader.Get(info);
545                                         
546                                         segInfoStat.openReaderPassed = true;
547                                         
548                                         int numDocs = reader.NumDocs();
549                                         toLoseDocCount = numDocs;
550                                         if (reader.HasDeletions())
551                                         {
552                                                 if (reader.deletedDocs.Count() != info.GetDelCount())
553                                                 {
554                                                         throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
555                                                 }
556                                                 if (reader.deletedDocs.Count() > reader.MaxDoc())
557                                                 {
558                                                         throw new System.SystemException("too many deleted docs: maxDoc()=" + reader.MaxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
559                                                 }
560                                                 if (info.docCount - numDocs != info.GetDelCount())
561                                                 {
562                                                         throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
563                                                 }
564                                                 segInfoStat.numDeleted = info.docCount - numDocs;
565                                                 Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
566                                         }
567                                         else
568                                         {
569                                                 if (info.GetDelCount() != 0)
570                                                 {
571                                                         throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
572                                                 }
573                                                 Msg("OK");
574                                         }
575                                         if (reader.MaxDoc() != info.docCount)
576                                                 throw new System.SystemException("SegmentReader.maxDoc() " + reader.MaxDoc() + " != SegmentInfos.docCount " + info.docCount);
577                                         
578                                         // Test getFieldNames()
579                                         if (infoStream != null)
580                                         {
581                                                 infoStream.Write("    test: fields..............");
582                                         }
583                     System.Collections.Generic.ICollection<string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
584                                         Msg("OK [" + fieldNames.Count + " fields]");
585                                         segInfoStat.numFields = fieldNames.Count;
586                                         
587                                         // Test Field Norms
588                                         segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader);
589                                         
590                                         // Test the Term Index
591                                         segInfoStat.termIndexStatus = TestTermIndex(info, reader);
592                                         
593                                         // Test Stored Fields
594                                         segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf);
595                                         
596                                         // Test Term Vectors
597                                         segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf);
598                                         
599                                         // Rethrow the first exception we encountered
600                                         //  This will cause stats for failed segments to be incremented properly
601                                         if (segInfoStat.fieldNormStatus.error != null)
602                                         {
603                                                 throw new System.SystemException("Field Norm test failed");
604                                         }
605                                         else if (segInfoStat.termIndexStatus.error != null)
606                                         {
607                                                 throw new System.SystemException("Term Index test failed");
608                                         }
609                                         else if (segInfoStat.storedFieldStatus.error != null)
610                                         {
611                                                 throw new System.SystemException("Stored Field test failed");
612                                         }
613                                         else if (segInfoStat.termVectorStatus.error != null)
614                                         {
615                                                 throw new System.SystemException("Term Vector test failed");
616                                         }
617                                         
618                                         Msg("");
619                                 }
620                                 catch (System.Exception t)
621                                 {
622                                         Msg("FAILED");
623                                         System.String comment;
624                                         comment = "fixIndex() would remove reference to this segment";
625                                         Msg("    WARNING: " + comment + "; full exception:");
626                                         if (infoStream != null)
627                                                 infoStream.WriteLine(t.StackTrace);
628                                         Msg("");
629                                         result.totLoseDocCount += toLoseDocCount;
630                                         result.numBadSegments++;
631                                         continue;
632                                 }
633                                 finally
634                                 {
635                                         if (reader != null)
636                                                 reader.Close();
637                                 }
638                                 
639                                 // Keeper
640                                 result.newSegments.Add(info.Clone());
641                         }
642                         
643                         if (0 == result.numBadSegments)
644                         {
645                                 result.clean = true;
646                                 Msg("No problems were detected with this index.\n");
647                         }
648                         else
649                                 Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
650                         
651                         return result;
652                 }
653                 
654                 /// <summary> Test field norms.</summary>
655         private Status.FieldNormStatus TestFieldNorms(System.Collections.Generic.ICollection<string> fieldNames, SegmentReader reader)
656                 {
657                         Status.FieldNormStatus status = new Status.FieldNormStatus();
658                         
659                         try
660                         {
661                                 // Test Field Norms
662                                 if (infoStream != null)
663                                 {
664                                         infoStream.Write("    test: field norms.........");
665                                 }
666                                 System.Collections.IEnumerator it = fieldNames.GetEnumerator();
667                                 byte[] b = new byte[reader.MaxDoc()];
668                                 while (it.MoveNext())
669                                 {
670                                         System.String fieldName = (System.String) it.Current;
671                     if (reader.HasNorms(fieldName))
672                     {
673                         reader.Norms(fieldName, b, 0);
674                         ++status.totFields;
675                     }
676                                 }
677                                 
678                                 Msg("OK [" + status.totFields + " fields]");
679                         }
680                         catch (System.Exception e)
681                         {
682                                 Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
683                                 status.error = e;
684                                 if (infoStream != null)
685                                 {
686                                         infoStream.WriteLine(e.StackTrace);
687                                 }
688                         }
689                         
690                         return status;
691                 }
692                 
693                 /// <summary> Test the term index.</summary>
694                 private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader)
695                 {
696                         Status.TermIndexStatus status = new Status.TermIndexStatus();
697                         
698                         try
699                         {
700                                 if (infoStream != null)
701                                 {
702                                         infoStream.Write("    test: terms, freq, prox...");
703                                 }
704                                 
705                                 TermEnum termEnum = reader.Terms();
706                                 TermPositions termPositions = reader.TermPositions();
707                                 
708                                 // Used only to count up # deleted docs for this term
709                                 MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);
710                                 
711                                 int maxDoc = reader.MaxDoc();
712                                 
713                                 while (termEnum.Next())
714                                 {
715                                         status.termCount++;
716                                         Term term = termEnum.Term();
717                                         int docFreq = termEnum.DocFreq();
718                                         termPositions.Seek(term);
719                                         int lastDoc = - 1;
720                                         int freq0 = 0;
721                                         status.totFreq += docFreq;
722                                         while (termPositions.Next())
723                                         {
724                                                 freq0++;
725                                                 int doc = termPositions.Doc();
726                                                 int freq = termPositions.Freq();
727                                                 if (doc <= lastDoc)
728                                                 {
729                                                         throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
730                                                 }
731                                                 if (doc >= maxDoc)
732                                                 {
733                                                         throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
734                                                 }
735                                                 
736                                                 lastDoc = doc;
737                                                 if (freq <= 0)
738                                                 {
739                                                         throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
740                                                 }
741                                                 
742                                                 int lastPos = - 1;
743                                                 status.totPos += freq;
744                                                 for (int j = 0; j < freq; j++)
745                                                 {
746                                                         int pos = termPositions.NextPosition();
747                                                         if (pos < - 1)
748                                                         {
749                                                                 throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
750                                                         }
751                                                         if (pos < lastPos)
752                                                         {
753                                                                 throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
754                                                         }
755                                                 }
756                                         }
757                                         
758                                         // Now count how many deleted docs occurred in
759                                         // this term:
760                                         int delCount;
761                                         if (reader.HasDeletions())
762                                         {
763                                                 myTermDocs.Seek(term);
764                                                 while (myTermDocs.Next())
765                                                 {
766                                                 }
767                                                 delCount = myTermDocs.delCount;
768                                         }
769                                         else
770                                         {
771                                                 delCount = 0;
772                                         }
773                                         
774                                         if (freq0 + delCount != docFreq)
775                                         {
776                                                 throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
777                                         }
778                                 }
779                                 
780                                 Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
781                         }
782                         catch (System.Exception e)
783                         {
784                                 Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
785                                 status.error = e;
786                                 if (infoStream != null)
787                                 {
788                                         infoStream.WriteLine(e.StackTrace);
789                                 }
790                         }
791                         
792                         return status;
793                 }
794                 
795                 /// <summary> Test stored fields for a segment.</summary>
796                 private Status.StoredFieldStatus TestStoredFields(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
797                 {
798                         Status.StoredFieldStatus status = new Status.StoredFieldStatus();
799                         
800                         try
801                         {
802                                 if (infoStream != null)
803                                 {
804                                         infoStream.Write("    test: stored fields.......");
805                                 }
806                                 
807                                 // Scan stored fields for all documents
808                                 for (int j = 0; j < info.docCount; ++j)
809                                 {
810                                         if (!reader.IsDeleted(j))
811                                         {
812                                                 status.docCount++;
813                                                 Document doc = reader.Document(j);
814                                                 status.totFields += doc.GetFields().Count;
815                                         }
816                                 }
817                                 
818                                 // Validate docCount
819                                 if (status.docCount != reader.NumDocs())
820                                 {
821                                         throw new System.SystemException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs");
822                                 }
823                                 
824                 Msg(string.Format(format, "OK [{0:d} total field count; avg {1:f} fields per doc]", new object[] { status.totFields, (((float) status.totFields) / status.docCount) }));
825             }
826                         catch (System.Exception e)
827                         {
828                                 Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
829                                 status.error = e;
830                                 if (infoStream != null)
831                                 {
832                                         infoStream.WriteLine(e.StackTrace);
833                                 }
834                         }
835                         
836                         return status;
837                 }
838                 
839                 /// <summary> Test term vectors for a segment.</summary>
840         private Status.TermVectorStatus TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
841                 {
842                         Status.TermVectorStatus status = new Status.TermVectorStatus();
843                         
844                         try
845                         {
846                                 if (infoStream != null)
847                                 {
848                                         infoStream.Write("    test: term vectors........");
849                                 }
850                                 
851                                 for (int j = 0; j < info.docCount; ++j)
852                                 {
853                                         if (!reader.IsDeleted(j))
854                                         {
855                                                 status.docCount++;
856                                                 TermFreqVector[] tfv = reader.GetTermFreqVectors(j);
857                                                 if (tfv != null)
858                                                 {
859                                                         status.totVectors += tfv.Length;
860                                                 }
861                                         }
862                                 }
863                                 
864                 Msg(System.String.Format(format, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { status.totVectors, (((float) status.totVectors) / status.docCount) }));
865             }
866                         catch (System.Exception e)
867                         {
868                                 Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
869                                 status.error = e;
870                                 if (infoStream != null)
871                                 {
872                                         infoStream.WriteLine(e.StackTrace);
873                                 }
874                         }
875                         
876                         return status;
877                 }
878                 
879                 /// <summary>Repairs the index using previously returned result
880                 /// from {@link #checkIndex}.  Note that this does not
881                 /// remove any of the unreferenced files after it's done;
882                 /// you must separately open an {@link IndexWriter}, which
883                 /// deletes unreferenced files when it's created.
884                 /// 
885                 /// <p/><b>WARNING</b>: this writes a
886                 /// new segments file into the index, effectively removing
887                 /// all documents in broken segments from the index.
888                 /// BE CAREFUL.
889                 /// 
890                 /// <p/><b>WARNING</b>: Make sure you only call this when the
891                 /// index is not opened  by any writer. 
892                 /// </summary>
893                 public virtual void  FixIndex(Status result)
894                 {
895                         if (result.partial)
896                                 throw new System.ArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)");
897                         result.newSegments.Commit(result.dir);
898                 }
899                 
900                 private static bool assertsOn;
901                 
902                 private static bool TestAsserts()
903                 {
904                         assertsOn = true;
905                         return true;
906                 }
907                 
908                 private static bool AssertsOn()
909                 {
910                         System.Diagnostics.Debug.Assert(TestAsserts());
911                         return assertsOn;
912                 }
913                 
914                 /// <summary>Command-line interface to check and fix an index.
915                 /// <p/>
916                 /// Run it like this:
917                 /// <pre>
918                 /// java -ea:Mono.Lucene.Net... Mono.Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]
919                 /// </pre>
920                 /// <ul>
921                 /// <li><code>-fix</code>: actually write a new segments_N file, removing any problematic segments</li>
922                 /// <li><code>-segment X</code>: only check the specified
923                 /// segment(s).  This can be specified multiple times,
924                 /// to check more than one segment, eg <code>-segment _2
925                 /// -segment _a</code>.  You can't use this with the -fix
926                 /// option.</li>
927                 /// </ul>
928                 /// <p/><b>WARNING</b>: <code>-fix</code> should only be used on an emergency basis as it will cause
929                 /// documents (perhaps many) to be permanently removed from the index.  Always make
930                 /// a backup copy of your index before running this!  Do not run this tool on an index
931                 /// that is actively being written to.  You have been warned!
932                 /// <p/>                Run without -fix, this tool will open the index, report version information
933                 /// and report any exceptions it hits and what action it would take if -fix were
934                 /// specified.  With -fix, this tool will remove any segments that have issues and
935                 /// write a new segments_N file.  This means all documents contained in the affected
936                 /// segments will be removed.
937                 /// <p/>
938                 /// This tool exits with exit code 1 if the index cannot be opened or has any
939                 /// corruption, else 0.
940                 /// </summary>
941                 [STAThread]
942                 public static void  Main(System.String[] args)
943                 {
944                         
945                         bool doFix = false;
946                         System.Collections.IList onlySegments = new System.Collections.ArrayList();
947                         System.String indexPath = null;
948                         int i = 0;
949                         while (i < args.Length)
950                         {
951                                 if (args[i].Equals("-fix"))
952                                 {
953                                         doFix = true;
954                                         i++;
955                                 }
956                                 else if (args[i].Equals("-segment"))
957                                 {
958                                         if (i == args.Length - 1)
959                                         {
960                                                 System.Console.Out.WriteLine("ERROR: missing name for -segment option");
961                                                 System.Environment.Exit(1);
962                                         }
963                                         onlySegments.Add(args[i + 1]);
964                                         i += 2;
965                                 }
966                                 else
967                                 {
968                                         if (indexPath != null)
969                                         {
970                                                 System.Console.Out.WriteLine("ERROR: unexpected extra argument '" + args[i] + "'");
971                                                 System.Environment.Exit(1);
972                                         }
973                                         indexPath = args[i];
974                                         i++;
975                                 }
976                         }
977                         
978                         if (indexPath == null)
979                         {
980                                 System.Console.Out.WriteLine("\nERROR: index path not specified");
981                                 System.Console.Out.WriteLine("\nUsage: java Mono.Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" + "\n" + "  -fix: actually write a new segments_N file, removing any problematic segments\n" + "  -segment X: only check the specified segments.  This can be specified multiple\n" + "              times, to check more than one segment, eg '-segment _2 -segment _a'.\n" + "              You can't use this with the -fix option\n" + "\n" + "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" + "documents (perhaps many) to be permanently removed from the index.  Always make\n" + "a backup copy of your index before running this!  Do not run this tool on an index\n" + "that is actively being written to.  You have been warned!\n" + "\n" + "Run without -fix, this tool will open the index, report version information\n" + "and report any exceptions it hits and what action it would take if -fix were\n" + "specified.  With -fix, this tool will remove any segments that have issues and\n" + "write a new segments_N file.  This means all documents contained in the affected\n" + "segments will be removed.\n" + "\n" + "This tool exits with exit code 1 if the index cannot be opened or has any\n" + "corruption, else 0.\n");
982                                 System.Environment.Exit(1);
983                         }
984                         
985                         if (!AssertsOn())
986                                 System.Console.Out.WriteLine("\nNOTE: testing will be more thorough if you run java with '-ea:Mono.Lucene.Net...', so assertions are enabled");
987                         
988                         if (onlySegments.Count == 0)
989                                 onlySegments = null;
990                         else if (doFix)
991                         {
992                                 System.Console.Out.WriteLine("ERROR: cannot specify both -fix and -segment");
993                                 System.Environment.Exit(1);
994                         }
995                         
996                         System.Console.Out.WriteLine("\nOpening index @ " + indexPath + "\n");
997                         Directory dir = null;
998                         try
999                         {
1000                                 dir = FSDirectory.Open(new System.IO.FileInfo(indexPath));
1001                         }
1002                         catch (System.Exception t)
1003                         {
1004                                 System.Console.Out.WriteLine("ERROR: could not open directory \"" + indexPath + "\"; exiting");
1005                                 System.Console.Out.WriteLine(t.StackTrace);
1006                                 System.Environment.Exit(1);
1007                         }
1008                         
1009                         CheckIndex checker = new CheckIndex(dir);
1010                         System.IO.StreamWriter temp_writer;
1011                         temp_writer = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding);
1012                         temp_writer.AutoFlush = true;
1013                         checker.SetInfoStream(temp_writer);
1014                         
1015                         Status result = checker.CheckIndex_Renamed_Method(onlySegments);
1016                         if (result.missingSegments)
1017                         {
1018                                 System.Environment.Exit(1);
1019                         }
1020                         
1021                         if (!result.clean)
1022                         {
1023                                 if (!doFix)
1024                                 {
1025                                         System.Console.Out.WriteLine("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n");
1026                                 }
1027                                 else
1028                                 {
1029                                         System.Console.Out.WriteLine("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
1030                                         System.Console.Out.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
1031                                         for (int s = 0; s < 5; s++)
1032                                         {
1033                                                 System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1000));
1034                                                 System.Console.Out.WriteLine("  " + (5 - s) + "...");
1035                                         }
1036                                         System.Console.Out.WriteLine("Writing...");
1037                                         checker.FixIndex(result);
1038                                         System.Console.Out.WriteLine("OK");
1039                                         System.Console.Out.WriteLine("Wrote new segments file \"" + result.newSegments.GetCurrentSegmentFileName() + "\"");
1040                                 }
1041                         }
1042                         System.Console.Out.WriteLine("");
1043                         
1044                         int exitCode;
1045                         if (result != null && result.clean == true)
1046                                 exitCode = 0;
1047                         else
1048                                 exitCode = 1;
1049                         System.Environment.Exit(exitCode);
1050                 }
1051         }
1052 }