2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
20 using AbstractField = Mono.Lucene.Net.Documents.AbstractField;
21 using Document = Mono.Lucene.Net.Documents.Document;
22 using Directory = Mono.Lucene.Net.Store.Directory;
23 using FSDirectory = Mono.Lucene.Net.Store.FSDirectory;
24 using IndexInput = Mono.Lucene.Net.Store.IndexInput;
26 namespace Mono.Lucene.Net.Index
29 /// <summary> Basic tool and API to check the health of an index and
30 /// write a new segments file that removes reference to
31 /// problematic segments.
33 /// <p/>As this tool checks every byte in the index, on a large
34 /// index it can take quite a long time to run.
36 /// <p/><b>WARNING</b>: this tool and API is new and
37 /// experimental and is subject to suddenly change in the
38 /// next release. Please make a complete backup of your
39 /// index before using this to fix your index!
41 public class CheckIndex
44 /// <summary>Default PrintStream for all CheckIndex instances.</summary>
45 /// <deprecated> Use {@link #setInfoStream} per instance,
48 [Obsolete("Use SetInfoStream per instance,instead.")]
49 public static System.IO.StreamWriter out_Renamed = null;
51 private System.IO.StreamWriter infoStream;
52 private Directory dir;
54 /// <summary> Returned from {@link #CheckIndex()} detailing the health and status of the index.
56 /// <p/><b>WARNING</b>: this API is new and experimental and is
57 /// subject to suddenly change in the next release.
64 /// <summary>True if no problems were found with the index. </summary>
67 /// <summary>True if we were unable to locate and load the segments_N file. </summary>
68 public bool missingSegments;
70 /// <summary>True if we were unable to open the segments_N file. </summary>
71 public bool cantOpenSegments;
73 /// <summary>True if we were unable to read the version number from segments_N file. </summary>
74 public bool missingSegmentVersion;
76 /// <summary>Name of latest segments_N file in the index. </summary>
77 public System.String segmentsFileName;
79 /// <summary>Number of segments in the index. </summary>
80 public int numSegments;
82 /// <summary>String description of the version of the index. </summary>
83 public System.String segmentFormat;
85 /// <summary>Empty unless you passed specific segments list to check as optional 3rd argument.</summary>
86 /// <seealso cref="CheckIndex.CheckIndex(List)">
88 public System.Collections.IList segmentsChecked = new System.Collections.ArrayList();
90 /// <summary>True if the index was created with a newer version of Lucene than the CheckIndex tool. </summary>
91 public bool toolOutOfDate;
93 /// <summary>List of {@link SegmentInfoStatus} instances, detailing status of each segment. </summary>
94 public System.Collections.IList segmentInfos = new System.Collections.ArrayList();
96 /// <summary>Directory index is in. </summary>
99 /// <summary> SegmentInfos instance containing only segments that
100 /// had no problems (this is used with the {@link CheckIndex#fixIndex}
101 /// method to repair the index.
103 internal SegmentInfos newSegments;
105 /// <summary>How many documents will be lost to bad segments. </summary>
106 public int totLoseDocCount;
108 /// <summary>How many bad segments were found. </summary>
109 public int numBadSegments;
111 /// <summary>True if we checked only specific segments ({@link
112 /// #CheckIndex(List)}) was called with non-null
117 /// <summary>Holds the userData of the last commit in the index </summary>
118 public System.Collections.Generic.IDictionary<string, string> userData;
120 /// <summary>Holds the status of each segment in the index.
121 /// See {@link #segmentInfos}.
123 /// <p/><b>WARNING</b>: this API is new and experimental and is
124 /// subject to suddenly change in the next release.
126 public class SegmentInfoStatus
128 /// <summary>Name of the segment. </summary>
129 public System.String name;
131 /// <summary>Document count (does not take deletions into account). </summary>
134 /// <summary>True if segment is compound file format. </summary>
135 public bool compound;
137 /// <summary>Number of files referenced by this segment. </summary>
140 /// <summary>Net size (MB) of the files referenced by this
143 public double sizeMB;
145 /// <summary>Doc store offset, if this segment shares the doc
146 /// store files (stored fields and term vectors) with
147 /// other segments. This is -1 if it does not share.
149 public int docStoreOffset = - 1;
151 /// <summary>String of the shared doc store segment, or null if
152 /// this segment does not share the doc store files.
154 public System.String docStoreSegment;
156 /// <summary>True if the shared doc store files are compound file
159 public bool docStoreCompoundFile;
161 /// <summary>True if this segment has pending deletions. </summary>
162 public bool hasDeletions;
164 /// <summary>Name of the current deletions file name. </summary>
165 public System.String deletionsFileName;
167 /// <summary>Number of deleted documents. </summary>
168 public int numDeleted;
170 /// <summary>True if we were able to open a SegmentReader on this
173 public bool openReaderPassed;
175 /// <summary>Number of fields in this segment. </summary>
176 internal int numFields;
178 /// <summary>True if at least one of the fields in this segment
179 /// does not omitTermFreqAndPositions.
181 /// <seealso cref="AbstractField.setOmitTermFreqAndPositions">
185 /// <summary>Map<String, String> that includes certain
186 /// debugging details that IndexWriter records into
187 /// each segment it creates
189 public System.Collections.Generic.IDictionary<string, string> diagnostics;
191 /// <summary>Status for testing of field norms (null if field norms could not be tested). </summary>
192 public FieldNormStatus fieldNormStatus;
194 /// <summary>Status for testing of indexed terms (null if indexed terms could not be tested). </summary>
195 public TermIndexStatus termIndexStatus;
197 /// <summary>Status for testing of stored fields (null if stored fields could not be tested). </summary>
198 public StoredFieldStatus storedFieldStatus;
200 /// <summary>Status for testing of term vectors (null if term vectors could not be tested). </summary>
201 public TermVectorStatus termVectorStatus;
204 /// <summary> Status from testing field norms.</summary>
205 public sealed class FieldNormStatus
207 /// <summary>Number of fields successfully tested </summary>
208 public long totFields = 0L;
210 /// <summary>Exception thrown during term index test (null on success) </summary>
211 public System.Exception error = null;
214 /// <summary> Status from testing term index.</summary>
215 public sealed class TermIndexStatus
217 /// <summary>Total term count </summary>
218 public long termCount = 0L;
220 /// <summary>Total frequency across all terms. </summary>
221 public long totFreq = 0L;
223 /// <summary>Total number of positions. </summary>
224 public long totPos = 0L;
226 /// <summary>Exception thrown during term index test (null on success) </summary>
227 public System.Exception error = null;
230 /// <summary> Status from testing stored fields.</summary>
231 public sealed class StoredFieldStatus
234 /// <summary>Number of documents tested. </summary>
235 public int docCount = 0;
237 /// <summary>Total number of stored fields tested. </summary>
238 public long totFields = 0;
240 /// <summary>Exception thrown during stored fields test (null on success) </summary>
241 public System.Exception error = null;
244 /// <summary> Status from testing stored fields.</summary>
245 public sealed class TermVectorStatus
248 /// <summary>Number of documents tested. </summary>
249 public int docCount = 0;
251 /// <summary>Total number of term vectors tested. </summary>
252 public long totVectors = 0;
254 /// <summary>Exception thrown during term vector test (null on success) </summary>
255 public System.Exception error = null;
259 /// <summary>Create a new CheckIndex on the directory. </summary>
260 public CheckIndex(Directory dir)
263 infoStream = out_Renamed;
266 /// <summary>Set infoStream where messages should go. If null, no
267 /// messages are printed
269 public virtual void SetInfoStream(System.IO.StreamWriter out_Renamed)
271 infoStream = out_Renamed;
274 private void Msg(System.String msg)
276 if (infoStream != null)
277 infoStream.WriteLine(msg);
280 private class MySegmentTermDocs:SegmentTermDocs
283 internal int delCount;
285 internal MySegmentTermDocs(SegmentReader p):base(p)
289 public override void Seek(Term term)
295 protected internal override void SkippingDoc()
301 /// <summary>Returns true if index is clean, else false. </summary>
302 /// <deprecated> Please instantiate a CheckIndex and then use {@link #CheckIndex()} instead
304 [Obsolete("Please instantiate a CheckIndex and then use CheckIndex() instead")]
305 public static bool Check(Directory dir, bool doFix)
307 return Check(dir, doFix, null);
310 /// <summary>Returns true if index is clean, else false.</summary>
311 /// <deprecated> Please instantiate a CheckIndex and then use {@link #CheckIndex(List)} instead
313 [Obsolete("Please instantiate a CheckIndex and then use CheckIndex(List) instead")]
314 public static bool Check(Directory dir, bool doFix, System.Collections.IList onlySegments)
316 CheckIndex checker = new CheckIndex(dir);
317 Status status = checker.CheckIndex_Renamed_Method(onlySegments);
318 if (doFix && !status.clean)
319 checker.FixIndex(status);
324 /// <summary>Returns a {@link Status} instance detailing
325 /// the state of the index.
327 /// <p/>As this method checks every byte in the index, on a large
328 /// index it can take quite a long time to run.
330 /// <p/><b>WARNING</b>: make sure
331 /// you only call this when the index is not opened by any
334 public virtual Status CheckIndex_Renamed_Method()
336 return CheckIndex_Renamed_Method(null);
339 /// <summary>Returns a {@link Status} instance detailing
340 /// the state of the index.
343 /// <param name="onlySegments">list of specific segment names to check
345 /// <p/>As this method checks every byte in the specified
346 /// segments, on a large index it can take quite a long
349 /// <p/><b>WARNING</b>: make sure
350 /// you only call this when the index is not opened by any
353 public virtual Status CheckIndex_Renamed_Method(System.Collections.IList onlySegments)
355 System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
356 SegmentInfos sis = new SegmentInfos();
357 Status result = new Status();
363 catch (System.Exception t)
365 Msg("ERROR: could not read any segments file in directory");
366 result.missingSegments = true;
367 if (infoStream != null)
368 infoStream.WriteLine(t.StackTrace);
372 int numSegments = sis.Count;
373 System.String segmentsFileName = sis.GetCurrentSegmentFileName();
374 IndexInput input = null;
377 input = dir.OpenInput(segmentsFileName);
379 catch (System.Exception t)
381 Msg("ERROR: could not open segments file in directory");
382 if (infoStream != null)
383 infoStream.WriteLine(t.StackTrace);
384 result.cantOpenSegments = true;
390 format = input.ReadInt();
392 catch (System.Exception t)
394 Msg("ERROR: could not read segment file version in directory");
395 if (infoStream != null)
396 infoStream.WriteLine(t.StackTrace);
397 result.missingSegmentVersion = true;
406 System.String sFormat = "";
409 if (format == SegmentInfos.FORMAT)
410 sFormat = "FORMAT [Lucene Pre-2.1]";
411 if (format == SegmentInfos.FORMAT_LOCKLESS)
412 sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
413 else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
414 sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
415 else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
416 sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
419 if (format == SegmentInfos.FORMAT_CHECKSUM)
420 sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
421 else if (format == SegmentInfos.FORMAT_DEL_COUNT)
422 sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
423 else if (format == SegmentInfos.FORMAT_HAS_PROX)
424 sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
425 else if (format == SegmentInfos.FORMAT_USER_DATA)
426 sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
427 else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
428 sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
429 else if (format < SegmentInfos.CURRENT_FORMAT)
431 sFormat = "int=" + format + " [newer version of Lucene than this tool]";
436 sFormat = format + " [Lucene 1.3 or prior]";
440 result.segmentsFileName = segmentsFileName;
441 result.numSegments = numSegments;
442 result.segmentFormat = sFormat;
443 result.userData = sis.GetUserData();
444 System.String userDataString;
445 if (sis.GetUserData().Count > 0)
447 userDataString = " userData=" + SupportClass.CollectionsHelper.CollectionToString(sis.GetUserData());
454 Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);
456 if (onlySegments != null)
458 result.partial = true;
459 if (infoStream != null)
460 infoStream.Write("\nChecking only these segments:");
461 System.Collections.IEnumerator it = onlySegments.GetEnumerator();
462 while (it.MoveNext())
464 if (infoStream != null)
466 infoStream.Write(" " + it.Current);
469 System.Collections.IEnumerator e = onlySegments.GetEnumerator();
470 while (e.MoveNext() == true)
472 result.segmentsChecked.Add(e.Current);
479 Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
480 result.toolOutOfDate = true;
485 result.newSegments = (SegmentInfos) sis.Clone();
486 result.newSegments.Clear();
488 for (int i = 0; i < numSegments; i++)
490 SegmentInfo info = sis.Info(i);
491 if (onlySegments != null && !onlySegments.Contains(info.name))
493 Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
494 result.segmentInfos.Add(segInfoStat);
495 Msg(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
496 segInfoStat.name = info.name;
497 segInfoStat.docCount = info.docCount;
499 int toLoseDocCount = info.docCount;
501 SegmentReader reader = null;
505 Msg(" compound=" + info.GetUseCompoundFile());
506 segInfoStat.compound = info.GetUseCompoundFile();
507 Msg(" hasProx=" + info.GetHasProx());
508 segInfoStat.hasProx = info.GetHasProx();
509 Msg(" numFiles=" + info.Files().Count);
510 segInfoStat.numFiles = info.Files().Count;
511 Msg(System.String.Format(nf, " size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
512 segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
513 System.Collections.Generic.IDictionary<string, string> diagnostics = info.GetDiagnostics();
514 segInfoStat.diagnostics = diagnostics;
515 if (diagnostics.Count > 0)
517 Msg(" diagnostics = " + SupportClass.CollectionsHelper.CollectionToString(diagnostics));
520 int docStoreOffset = info.GetDocStoreOffset();
521 if (docStoreOffset != - 1)
523 Msg(" docStoreOffset=" + docStoreOffset);
524 segInfoStat.docStoreOffset = docStoreOffset;
525 Msg(" docStoreSegment=" + info.GetDocStoreSegment());
526 segInfoStat.docStoreSegment = info.GetDocStoreSegment();
527 Msg(" docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile());
528 segInfoStat.docStoreCompoundFile = info.GetDocStoreIsCompoundFile();
530 System.String delFileName = info.GetDelFileName();
531 if (delFileName == null)
533 Msg(" no deletions");
534 segInfoStat.hasDeletions = false;
538 Msg(" has deletions [delFileName=" + delFileName + "]");
539 segInfoStat.hasDeletions = true;
540 segInfoStat.deletionsFileName = delFileName;
542 if (infoStream != null)
543 infoStream.Write(" test: open reader.........");
544 reader = SegmentReader.Get(info);
546 segInfoStat.openReaderPassed = true;
548 int numDocs = reader.NumDocs();
549 toLoseDocCount = numDocs;
550 if (reader.HasDeletions())
552 if (reader.deletedDocs.Count() != info.GetDelCount())
554 throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
556 if (reader.deletedDocs.Count() > reader.MaxDoc())
558 throw new System.SystemException("too many deleted docs: maxDoc()=" + reader.MaxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
560 if (info.docCount - numDocs != info.GetDelCount())
562 throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
564 segInfoStat.numDeleted = info.docCount - numDocs;
565 Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
569 if (info.GetDelCount() != 0)
571 throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
575 if (reader.MaxDoc() != info.docCount)
576 throw new System.SystemException("SegmentReader.maxDoc() " + reader.MaxDoc() + " != SegmentInfos.docCount " + info.docCount);
578 // Test getFieldNames()
579 if (infoStream != null)
581 infoStream.Write(" test: fields..............");
583 System.Collections.Generic.ICollection<string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
584 Msg("OK [" + fieldNames.Count + " fields]");
585 segInfoStat.numFields = fieldNames.Count;
588 segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader);
590 // Test the Term Index
591 segInfoStat.termIndexStatus = TestTermIndex(info, reader);
593 // Test Stored Fields
594 segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf);
597 segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf);
599 // Rethrow the first exception we encountered
600 // This will cause stats for failed segments to be incremented properly
601 if (segInfoStat.fieldNormStatus.error != null)
603 throw new System.SystemException("Field Norm test failed");
605 else if (segInfoStat.termIndexStatus.error != null)
607 throw new System.SystemException("Term Index test failed");
609 else if (segInfoStat.storedFieldStatus.error != null)
611 throw new System.SystemException("Stored Field test failed");
613 else if (segInfoStat.termVectorStatus.error != null)
615 throw new System.SystemException("Term Vector test failed");
620 catch (System.Exception t)
623 System.String comment;
624 comment = "fixIndex() would remove reference to this segment";
625 Msg(" WARNING: " + comment + "; full exception:");
626 if (infoStream != null)
627 infoStream.WriteLine(t.StackTrace);
629 result.totLoseDocCount += toLoseDocCount;
630 result.numBadSegments++;
640 result.newSegments.Add(info.Clone());
643 if (0 == result.numBadSegments)
646 Msg("No problems were detected with this index.\n");
649 Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
654 /// <summary> Test field norms.</summary>
655 private Status.FieldNormStatus TestFieldNorms(System.Collections.Generic.ICollection<string> fieldNames, SegmentReader reader)
657 Status.FieldNormStatus status = new Status.FieldNormStatus();
662 if (infoStream != null)
664 infoStream.Write(" test: field norms.........");
666 System.Collections.IEnumerator it = fieldNames.GetEnumerator();
667 byte[] b = new byte[reader.MaxDoc()];
668 while (it.MoveNext())
670 System.String fieldName = (System.String) it.Current;
671 if (reader.HasNorms(fieldName))
673 reader.Norms(fieldName, b, 0);
678 Msg("OK [" + status.totFields + " fields]");
680 catch (System.Exception e)
682 Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
684 if (infoStream != null)
686 infoStream.WriteLine(e.StackTrace);
693 /// <summary> Test the term index.</summary>
694 private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader)
696 Status.TermIndexStatus status = new Status.TermIndexStatus();
700 if (infoStream != null)
702 infoStream.Write(" test: terms, freq, prox...");
705 TermEnum termEnum = reader.Terms();
706 TermPositions termPositions = reader.TermPositions();
708 // Used only to count up # deleted docs for this term
709 MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);
711 int maxDoc = reader.MaxDoc();
713 while (termEnum.Next())
716 Term term = termEnum.Term();
717 int docFreq = termEnum.DocFreq();
718 termPositions.Seek(term);
721 status.totFreq += docFreq;
722 while (termPositions.Next())
725 int doc = termPositions.Doc();
726 int freq = termPositions.Freq();
729 throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
733 throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
739 throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
743 status.totPos += freq;
744 for (int j = 0; j < freq; j++)
746 int pos = termPositions.NextPosition();
749 throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
753 throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
758 // Now count how many deleted docs occurred in
761 if (reader.HasDeletions())
763 myTermDocs.Seek(term);
764 while (myTermDocs.Next())
767 delCount = myTermDocs.delCount;
774 if (freq0 + delCount != docFreq)
776 throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
780 Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
782 catch (System.Exception e)
784 Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
786 if (infoStream != null)
788 infoStream.WriteLine(e.StackTrace);
795 /// <summary> Test stored fields for a segment.</summary>
796 private Status.StoredFieldStatus TestStoredFields(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
798 Status.StoredFieldStatus status = new Status.StoredFieldStatus();
802 if (infoStream != null)
804 infoStream.Write(" test: stored fields.......");
807 // Scan stored fields for all documents
808 for (int j = 0; j < info.docCount; ++j)
810 if (!reader.IsDeleted(j))
813 Document doc = reader.Document(j);
814 status.totFields += doc.GetFields().Count;
819 if (status.docCount != reader.NumDocs())
821 throw new System.SystemException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs");
824 Msg(string.Format(format, "OK [{0:d} total field count; avg {1:f} fields per doc]", new object[] { status.totFields, (((float) status.totFields) / status.docCount) }));
826 catch (System.Exception e)
828 Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
830 if (infoStream != null)
832 infoStream.WriteLine(e.StackTrace);
839 /// <summary> Test term vectors for a segment.</summary>
840 private Status.TermVectorStatus TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
842 Status.TermVectorStatus status = new Status.TermVectorStatus();
846 if (infoStream != null)
848 infoStream.Write(" test: term vectors........");
851 for (int j = 0; j < info.docCount; ++j)
853 if (!reader.IsDeleted(j))
856 TermFreqVector[] tfv = reader.GetTermFreqVectors(j);
859 status.totVectors += tfv.Length;
864 Msg(System.String.Format(format, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { status.totVectors, (((float) status.totVectors) / status.docCount) }));
866 catch (System.Exception e)
868 Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
870 if (infoStream != null)
872 infoStream.WriteLine(e.StackTrace);
879 /// <summary>Repairs the index using previously returned result
880 /// from {@link #checkIndex}. Note that this does not
881 /// remove any of the unreferenced files after it's done;
882 /// you must separately open an {@link IndexWriter}, which
883 /// deletes unreferenced files when it's created.
885 /// <p/><b>WARNING</b>: this writes a
886 /// new segments file into the index, effectively removing
887 /// all documents in broken segments from the index.
890 /// <p/><b>WARNING</b>: Make sure you only call this when the
891 /// index is not opened by any writer.
893 public virtual void FixIndex(Status result)
896 throw new System.ArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)");
897 result.newSegments.Commit(result.dir);
900 private static bool assertsOn;
902 private static bool TestAsserts()
908 private static bool AssertsOn()
910 System.Diagnostics.Debug.Assert(TestAsserts());
914 /// <summary>Command-line interface to check and fix an index.
916 /// Run it like this:
918 /// java -ea:Mono.Lucene.Net... Mono.Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]
921 /// <li><code>-fix</code>: actually write a new segments_N file, removing any problematic segments</li>
922 /// <li><code>-segment X</code>: only check the specified
923 /// segment(s). This can be specified multiple times,
924 /// to check more than one segment, eg <code>-segment _2
925 /// -segment _a</code>. You can't use this with the -fix
928 /// <p/><b>WARNING</b>: <code>-fix</code> should only be used on an emergency basis as it will cause
929 /// documents (perhaps many) to be permanently removed from the index. Always make
930 /// a backup copy of your index before running this! Do not run this tool on an index
931 /// that is actively being written to. You have been warned!
932 /// <p/> Run without -fix, this tool will open the index, report version information
933 /// and report any exceptions it hits and what action it would take if -fix were
934 /// specified. With -fix, this tool will remove any segments that have issues and
935 /// write a new segments_N file. This means all documents contained in the affected
936 /// segments will be removed.
938 /// This tool exits with exit code 1 if the index cannot be opened or has any
939 /// corruption, else 0.
942 public static void Main(System.String[] args)
946 System.Collections.IList onlySegments = new System.Collections.ArrayList();
947 System.String indexPath = null;
949 while (i < args.Length)
951 if (args[i].Equals("-fix"))
956 else if (args[i].Equals("-segment"))
958 if (i == args.Length - 1)
960 System.Console.Out.WriteLine("ERROR: missing name for -segment option");
961 System.Environment.Exit(1);
963 onlySegments.Add(args[i + 1]);
968 if (indexPath != null)
970 System.Console.Out.WriteLine("ERROR: unexpected extra argument '" + args[i] + "'");
971 System.Environment.Exit(1);
978 if (indexPath == null)
980 System.Console.Out.WriteLine("\nERROR: index path not specified");
981 System.Console.Out.WriteLine("\nUsage: java Mono.Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" + "\n" + " -fix: actually write a new segments_N file, removing any problematic segments\n" + " -segment X: only check the specified segments. This can be specified multiple\n" + " times, to check more than one segment, eg '-segment _2 -segment _a'.\n" + " You can't use this with the -fix option\n" + "\n" + "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" + "documents (perhaps many) to be permanently removed from the index. Always make\n" + "a backup copy of your index before running this! Do not run this tool on an index\n" + "that is actively being written to. You have been warned!\n" + "\n" + "Run without -fix, this tool will open the index, report version information\n" + "and report any exceptions it hits and what action it would take if -fix were\n" + "specified. With -fix, this tool will remove any segments that have issues and\n" + "write a new segments_N file. This means all documents contained in the affected\n" + "segments will be removed.\n" + "\n" + "This tool exits with exit code 1 if the index cannot be opened or has any\n" + "corruption, else 0.\n");
982 System.Environment.Exit(1);
986 System.Console.Out.WriteLine("\nNOTE: testing will be more thorough if you run java with '-ea:Mono.Lucene.Net...', so assertions are enabled");
988 if (onlySegments.Count == 0)
992 System.Console.Out.WriteLine("ERROR: cannot specify both -fix and -segment");
993 System.Environment.Exit(1);
996 System.Console.Out.WriteLine("\nOpening index @ " + indexPath + "\n");
997 Directory dir = null;
1000 dir = FSDirectory.Open(new System.IO.FileInfo(indexPath));
1002 catch (System.Exception t)
1004 System.Console.Out.WriteLine("ERROR: could not open directory \"" + indexPath + "\"; exiting");
1005 System.Console.Out.WriteLine(t.StackTrace);
1006 System.Environment.Exit(1);
1009 CheckIndex checker = new CheckIndex(dir);
1010 System.IO.StreamWriter temp_writer;
1011 temp_writer = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding);
1012 temp_writer.AutoFlush = true;
1013 checker.SetInfoStream(temp_writer);
1015 Status result = checker.CheckIndex_Renamed_Method(onlySegments);
1016 if (result.missingSegments)
1018 System.Environment.Exit(1);
1025 System.Console.Out.WriteLine("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n");
1029 System.Console.Out.WriteLine("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
1030 System.Console.Out.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
1031 for (int s = 0; s < 5; s++)
1033 System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1000));
1034 System.Console.Out.WriteLine(" " + (5 - s) + "...");
1036 System.Console.Out.WriteLine("Writing...");
1037 checker.FixIndex(result);
1038 System.Console.Out.WriteLine("OK");
1039 System.Console.Out.WriteLine("Wrote new segments file \"" + result.newSegments.GetCurrentSegmentFileName() + "\"");
1042 System.Console.Out.WriteLine("");
1045 if (result != null && result.clean == true)
1049 System.Environment.Exit(exitCode);