2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
20 using Directory = Mono.Lucene.Net.Store.Directory;
21 using IndexInput = Mono.Lucene.Net.Store.IndexInput;
22 using IndexOutput = Mono.Lucene.Net.Store.IndexOutput;
23 using BitVector = Mono.Lucene.Net.Util.BitVector;
25 namespace Mono.Lucene.Net.Index
28 /// <summary> Information about a segment such as it's name, directory, and files related
31 /// * <p/><b>NOTE:</b> This API is new and still experimental
32 /// (subject to change suddenly in the next release)<p/>
34 public sealed class SegmentInfo : System.ICloneable
37 internal const int NO = - 1; // e.g. no norms; no deletes;
38 internal const int YES = 1; // e.g. have norms; have deletes;
39 internal const int CHECK_DIR = 0; // e.g. must check dir to see if there are norms/deletions
40 internal const int WITHOUT_GEN = 0; // a file name that has no GEN in it.
42 public System.String name; // unique name in dir
43 public int docCount; // number of docs in seg
44 public Directory dir; // where segment resides
46 private bool preLockless; // true if this is a segments file written before
47 // lock-less commits (2.1)
49 private long delGen; // current generation of del file; NO if there
50 // are no deletes; CHECK_DIR if it's a pre-2.1 segment
51 // (and we must check filesystem); YES or higher if
52 // there are deletes at generation N
54 private long[] normGen; // current generation of each field's norm file.
55 // If this array is null, for lockLess this means no
56 // separate norms. For preLockLess this means we must
57 // check filesystem. If this array is not null, its
58 // values mean: NO says this field has no separate
59 // norms; CHECK_DIR says it is a preLockLess segment and
60 // filesystem must be checked; >= YES says this field
61 // has separate norms with the specified generation
63 private sbyte isCompoundFile; // NO if it is not; YES if it is; CHECK_DIR if it's
64 // pre-2.1 (ie, must check file system to see
65 // if <name>.cfs and <name>.nrm exist)
67 private bool hasSingleNormFile; // true if this segment maintains norms in a single file;
69 // this is currently false for segments populated by DocumentWriter
70 // and true for newly created merged segments (both
71 // compound and non compound).
73 private System.Collections.Generic.IList<string> files; // cached list of files that this segment uses
76 internal long sizeInBytes = - 1; // total byte size of all of our files (computed on demand)
78 private int docStoreOffset; // if this segment shares stored fields & vectors, this
79 // offset is where in that file this segment's docs begin
80 private System.String docStoreSegment; // name used to derive fields/vectors file we share with
82 private bool docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx)
84 private int delCount; // How many deleted docs in this segment, or -1 if not yet known
85 // (if it's an older index)
87 private bool hasProx; // True if this segment has any fields with omitTermFreqAndPositions==false
89 private System.Collections.Generic.IDictionary<string, string> diagnostics;
91 public override System.String ToString()
93 return "si: " + dir.ToString() + " " + name + " docCount: " + docCount + " delCount: " + delCount + " delFileName: " + GetDelFileName();
96 public SegmentInfo(System.String name, int docCount, Directory dir)
99 this.docCount = docCount;
102 isCompoundFile = (sbyte) (CHECK_DIR);
104 hasSingleNormFile = false;
105 docStoreOffset = - 1;
106 docStoreSegment = name;
107 docStoreIsCompoundFile = false;
112 public SegmentInfo(System.String name, int docCount, Directory dir, bool isCompoundFile, bool hasSingleNormFile):this(name, docCount, dir, isCompoundFile, hasSingleNormFile, - 1, null, false, true)
116 public SegmentInfo(System.String name, int docCount, Directory dir, bool isCompoundFile, bool hasSingleNormFile, int docStoreOffset, System.String docStoreSegment, bool docStoreIsCompoundFile, bool hasProx):this(name, docCount, dir)
118 this.isCompoundFile = (sbyte) (isCompoundFile?YES:NO);
119 this.hasSingleNormFile = hasSingleNormFile;
121 this.docStoreOffset = docStoreOffset;
122 this.docStoreSegment = docStoreSegment;
123 this.docStoreIsCompoundFile = docStoreIsCompoundFile;
124 this.hasProx = hasProx;
126 System.Diagnostics.Debug.Assert(docStoreOffset == - 1 || docStoreSegment != null, "dso=" + docStoreOffset + " dss=" + docStoreSegment + " docCount=" + docCount);
129 /// <summary> Copy everything from src SegmentInfo into our instance.</summary>
130 internal void Reset(SegmentInfo src)
134 docCount = src.docCount;
136 preLockless = src.preLockless;
138 docStoreOffset = src.docStoreOffset;
139 docStoreIsCompoundFile = src.docStoreIsCompoundFile;
140 if (src.normGen == null)
146 normGen = new long[src.normGen.Length];
147 Array.Copy(src.normGen, 0, normGen, 0, src.normGen.Length);
149 isCompoundFile = src.isCompoundFile;
150 hasSingleNormFile = src.hasSingleNormFile;
151 delCount = src.delCount;
154 // must be Map<String, String>
155 internal void SetDiagnostics(System.Collections.Generic.IDictionary<string, string> diagnostics)
157 this.diagnostics = diagnostics;
160 // returns Map<String, String>
161 public System.Collections.Generic.IDictionary<string, string> GetDiagnostics()
166 /// <summary> Construct a new SegmentInfo instance by reading a
167 /// previously saved SegmentInfo from input.
170 /// <param name="dir">directory to load from
172 /// <param name="format">format of the segments info file
174 /// <param name="input">input handle to read segment info from
176 internal SegmentInfo(Directory dir, int format, IndexInput input)
179 name = input.ReadString();
180 docCount = input.ReadInt();
181 if (format <= SegmentInfos.FORMAT_LOCKLESS)
183 delGen = input.ReadLong();
184 if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE)
186 docStoreOffset = input.ReadInt();
187 if (docStoreOffset != - 1)
189 docStoreSegment = input.ReadString();
190 docStoreIsCompoundFile = (1 == input.ReadByte());
194 docStoreSegment = name;
195 docStoreIsCompoundFile = false;
200 docStoreOffset = - 1;
201 docStoreSegment = name;
202 docStoreIsCompoundFile = false;
204 if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE)
206 hasSingleNormFile = (1 == input.ReadByte());
210 hasSingleNormFile = false;
212 int numNormGen = input.ReadInt();
213 if (numNormGen == NO)
219 normGen = new long[numNormGen];
220 for (int j = 0; j < numNormGen; j++)
222 normGen[j] = input.ReadLong();
225 isCompoundFile = (sbyte) input.ReadByte();
226 preLockless = (isCompoundFile == CHECK_DIR);
227 if (format <= SegmentInfos.FORMAT_DEL_COUNT)
229 delCount = input.ReadInt();
230 System.Diagnostics.Debug.Assert(delCount <= docCount);
234 if (format <= SegmentInfos.FORMAT_HAS_PROX)
235 hasProx = input.ReadByte() == 1;
239 if (format <= SegmentInfos.FORMAT_DIAGNOSTICS)
241 diagnostics = input.ReadStringStringMap();
245 diagnostics = new System.Collections.Generic.Dictionary<string,string>();
252 isCompoundFile = (sbyte) (CHECK_DIR);
254 hasSingleNormFile = false;
255 docStoreOffset = - 1;
256 docStoreIsCompoundFile = false;
257 docStoreSegment = null;
260 diagnostics = new System.Collections.Generic.Dictionary<string,string>();
264 internal void SetNumFields(int numFields)
268 // normGen is null if we loaded a pre-2.1 segment
269 // file, or, if this segments file hasn't had any
270 // norms set against it yet:
271 normGen = new long[numFields];
275 // Do nothing: thus leaving normGen[k]==CHECK_DIR (==0), so that later we know
276 // we have to check filesystem for norm files, because this is prelockless.
280 // This is a FORMAT_LOCKLESS segment, which means
281 // there are no separate norms:
282 for (int i = 0; i < numFields; i++)
290 /// <summary>Returns total size in bytes of all of files used by
293 public long SizeInBytes()
295 if (sizeInBytes == - 1)
297 System.Collections.Generic.IList<string> files = Files();
298 int size = files.Count;
300 for (int i = 0; i < size; i++)
302 System.String fileName = (System.String) files[i];
303 // We don't count bytes used by a shared doc store
304 // against this segment:
305 if (docStoreOffset == - 1 || !IndexFileNames.IsDocStoreFile(fileName))
306 sizeInBytes += dir.FileLength(fileName);
312 public bool HasDeletions()
316 // delGen == NO: this means this segment was written
317 // by the LOCKLESS code and for certain does not have
320 // delGen == CHECK_DIR: this means this segment was written by
321 // pre-LOCKLESS code which means we must check
322 // directory to see if .del file exists
324 // delGen >= YES: this means this segment was written by
325 // the LOCKLESS code and for certain has
332 else if (delGen >= YES)
338 return dir.FileExists(GetDelFileName());
342 internal void AdvanceDelGen()
344 // delGen 0 is reserved for pre-LOCKLESS format
356 internal void ClearDelGen()
362 public System.Object Clone()
364 SegmentInfo si = new SegmentInfo(name, docCount, dir);
365 si.isCompoundFile = isCompoundFile;
367 si.delCount = delCount;
368 si.hasProx = hasProx;
369 si.preLockless = preLockless;
370 si.hasSingleNormFile = hasSingleNormFile;
371 if (this.diagnostics != null)
373 si.diagnostics = new System.Collections.Generic.Dictionary<string, string>();
374 foreach (string o in diagnostics.Keys)
376 si.diagnostics.Add(o,diagnostics[o]);
381 si.normGen = new long[normGen.Length];
382 normGen.CopyTo(si.normGen, 0);
384 si.docStoreOffset = docStoreOffset;
385 si.docStoreSegment = docStoreSegment;
386 si.docStoreIsCompoundFile = docStoreIsCompoundFile;
387 if (this.files != null)
389 si.files = new System.Collections.Generic.List<string>();
390 foreach (string file in files)
399 public System.String GetDelFileName()
403 // In this case we know there is no deletion filename
404 // against this segment
409 // If delGen is CHECK_DIR, it's the pre-lockless-commit file format
410 return IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen);
414 /// <summary> Returns true if this field for this segment has saved a separate norms file (_<segment>_N.sX).
417 /// <param name="fieldNumber">the field index to check
419 public bool HasSeparateNorms(int fieldNumber)
421 if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == CHECK_DIR))
423 // Must fallback to directory file exists check:
424 System.String fileName = name + ".s" + fieldNumber;
425 return dir.FileExists(fileName);
427 else if (normGen == null || normGen[fieldNumber] == NO)
437 /// <summary> Returns true if any fields in this segment have separate norms.</summary>
438 public bool HasSeparateNorms()
444 // This means we were created w/ LOCKLESS code and no
445 // norms are written yet:
450 // This means this segment was saved with pre-LOCKLESS
451 // code. So we must fallback to the original
452 // directory list check:
453 System.String[] result = dir.List();
456 throw new System.IO.IOException("cannot read directory " + dir + ": list() returned null");
459 System.String pattern;
460 pattern = name + ".s";
461 int patternLength = pattern.Length;
462 for (int i = 0; i < result.Length; i++)
464 if (result[i].StartsWith(pattern) && System.Char.IsDigit(result[i][patternLength]))
472 // This means this segment was saved with LOCKLESS
473 // code so we first check whether any normGen's are >= 1
474 // (meaning they definitely have separate norms):
475 for (int i = 0; i < normGen.Length; i++)
477 if (normGen[i] >= YES)
482 // Next we look for any == 0. These cases were
483 // pre-LOCKLESS and must be checked in directory:
484 for (int i = 0; i < normGen.Length; i++)
486 if (normGen[i] == CHECK_DIR)
488 if (HasSeparateNorms(i))
499 /// <summary> Increment the generation count for the norms file for
503 /// <param name="fieldIndex">field whose norm file will be rewritten
505 internal void AdvanceNormGen(int fieldIndex)
507 if (normGen[fieldIndex] == NO)
509 normGen[fieldIndex] = YES;
513 normGen[fieldIndex]++;
518 /// <summary> Get the file name for the norms file for this field.
521 /// <param name="number">field index
523 public System.String GetNormFileName(int number)
525 System.String prefix;
534 gen = normGen[number];
537 if (HasSeparateNorms(number))
539 // case 1: separate norm
541 return IndexFileNames.FileNameFromGeneration(name, prefix + number, gen);
544 if (hasSingleNormFile)
546 // case 2: lockless (or nrm file exists) - single file for all norms
547 prefix = "." + IndexFileNames.NORMS_EXTENSION;
548 return IndexFileNames.FileNameFromGeneration(name, prefix, WITHOUT_GEN);
551 // case 3: norm file for each field
553 return IndexFileNames.FileNameFromGeneration(name, prefix + number, WITHOUT_GEN);
556 /// <summary> Mark whether this segment is stored as a compound file.
559 /// <param name="isCompoundFile">true if this is a compound file;
562 internal void SetUseCompoundFile(bool isCompoundFile)
566 this.isCompoundFile = (sbyte) (YES);
570 this.isCompoundFile = (sbyte) (NO);
575 /// <summary> Returns true if this segment is stored as a compound
576 /// file; else, false.
578 public bool GetUseCompoundFile()
580 if (isCompoundFile == NO)
584 else if (isCompoundFile == YES)
590 return dir.FileExists(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
594 public int GetDelCount()
600 System.String delFileName = GetDelFileName();
601 delCount = new BitVector(dir, delFileName).Count();
606 System.Diagnostics.Debug.Assert(delCount <= docCount);
610 internal void SetDelCount(int delCount)
612 this.delCount = delCount;
613 System.Diagnostics.Debug.Assert(delCount <= docCount);
616 public int GetDocStoreOffset()
618 return docStoreOffset;
621 public bool GetDocStoreIsCompoundFile()
623 return docStoreIsCompoundFile;
626 internal void SetDocStoreIsCompoundFile(bool v)
628 docStoreIsCompoundFile = v;
632 public System.String GetDocStoreSegment()
634 return docStoreSegment;
637 internal void SetDocStoreOffset(int offset)
639 docStoreOffset = offset;
643 internal void SetDocStore(int offset, System.String segment, bool isCompoundFile)
645 docStoreOffset = offset;
646 docStoreSegment = segment;
647 docStoreIsCompoundFile = isCompoundFile;
650 /// <summary> Save this segment's info.</summary>
651 internal void Write(IndexOutput output)
653 output.WriteString(name);
654 output.WriteInt(docCount);
655 output.WriteLong(delGen);
656 output.WriteInt(docStoreOffset);
657 if (docStoreOffset != - 1)
659 output.WriteString(docStoreSegment);
660 output.WriteByte((byte) (docStoreIsCompoundFile?1:0));
663 output.WriteByte((byte) (hasSingleNormFile?1:0));
670 output.WriteInt(normGen.Length);
671 for (int j = 0; j < normGen.Length; j++)
673 output.WriteLong(normGen[j]);
676 output.WriteByte((byte) isCompoundFile);
677 output.WriteInt(delCount);
678 output.WriteByte((byte) (hasProx?1:0));
679 output.WriteStringStringMap(diagnostics);
682 internal void SetHasProx(bool hasProx)
684 this.hasProx = hasProx;
688 public bool GetHasProx()
693 private void AddIfExists(System.Collections.Generic.IList<string> files, System.String fileName)
695 if (dir.FileExists(fileName))
700 * Return all files referenced by this SegmentInfo. The
701 * returns List is a locally cached List so you should not
705 public System.Collections.Generic.IList<string> Files()
714 System.Collections.Generic.List<string> fileList = new System.Collections.Generic.List<string>();
716 bool useCompoundFile = GetUseCompoundFile();
720 fileList.Add(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
724 System.String[] exts = IndexFileNames.NON_STORE_INDEX_EXTENSIONS;
725 for (int i = 0; i < exts.Length; i++)
726 AddIfExists(fileList, name + "." + exts[i]);
729 if (docStoreOffset != - 1)
731 // We are sharing doc stores (stored fields, term
732 // vectors) with other segments
733 System.Diagnostics.Debug.Assert(docStoreSegment != null);
734 if (docStoreIsCompoundFile)
736 fileList.Add(docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION);
740 System.String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS;
741 for (int i = 0; i < exts.Length; i++)
742 AddIfExists(fileList, docStoreSegment + "." + exts[i]);
745 else if (!useCompoundFile)
747 // We are not sharing, and, these files were not
748 // included in the compound file
749 System.String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS;
750 for (int i = 0; i < exts.Length; i++)
751 AddIfExists(fileList, name + "." + exts[i]);
754 System.String delFileName = IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen);
755 if (delFileName != null && (delGen >= YES || dir.FileExists(delFileName)))
757 fileList.Add(delFileName);
760 // Careful logic for norms files
763 for (int i = 0; i < normGen.Length; i++)
765 long gen = normGen[i];
768 // Definitely a separate norm file, with generation:
769 fileList.Add(IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen));
773 // No separate norms but maybe plain norms
774 // in the non compound file case:
775 if (!hasSingleNormFile && !useCompoundFile)
777 System.String fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i;
778 if (dir.FileExists(fileName))
780 fileList.Add(fileName);
784 else if (CHECK_DIR == gen)
786 // Pre-2.1: we have to check file existence
787 System.String fileName = null;
790 fileName = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i;
792 else if (!hasSingleNormFile)
794 fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i;
796 if (fileName != null && dir.FileExists(fileName))
798 fileList.Add(fileName);
803 else if (preLockless || (!hasSingleNormFile && !useCompoundFile))
805 // Pre-2.1: we have to scan the dir to find all
806 // matching _X.sN/_X.fN files for our segment:
807 System.String prefix;
809 prefix = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION;
811 prefix = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION;
812 int prefixLength = prefix.Length;
813 System.String[] allFiles = dir.ListAll();
814 IndexFileNameFilter filter = IndexFileNameFilter.GetFilter();
815 for (int i = 0; i < allFiles.Length; i++)
817 System.String fileName = allFiles[i];
818 if (filter.Accept(null, fileName) && fileName.Length > prefixLength && System.Char.IsDigit(fileName[prefixLength]) && fileName.StartsWith(prefix))
820 fileList.Add(fileName);
824 //System.Diagnostics.Debug.Assert();
829 /* Called whenever any change is made that affects which
830 * files this segment has. */
831 private void ClearFiles()
837 /// <summary>Used for debugging </summary>
838 public System.String SegString(Directory dir)
843 if (GetUseCompoundFile())
848 catch (System.IO.IOException ioe)
853 System.String docStore;
855 if (docStoreOffset != - 1)
856 docStore = "->" + docStoreSegment;
860 return name + ":" + cfs + (this.dir == dir?"":"x") + docCount + docStore;
863 /// <summary>We consider another SegmentInfo instance equal if it
864 /// has the same dir and same name.
866 public override bool Equals(System.Object obj)
871 other = (SegmentInfo) obj;
873 catch (System.InvalidCastException cce)
877 return other.dir == dir && other.name.Equals(name);
880 public override int GetHashCode()
882 return dir.GetHashCode() + name.GetHashCode();