2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
20 using Directory = Mono.Lucene.Net.Store.Directory;
21 using CloseableThreadLocal = Mono.Lucene.Net.Util.CloseableThreadLocal;
22 using SimpleLRUCache = Mono.Lucene.Net.Util.Cache.SimpleLRUCache;
24 namespace Mono.Lucene.Net.Index
27 /// <summary>This stores a monotonically increasing set of <Term, TermInfo> pairs in a
28 /// Directory. Pairs are accessed either by Term or by ordinal position the
32 sealed class TermInfosReader
34 private Directory directory;
35 private System.String segment;
36 private FieldInfos fieldInfos;
38 private CloseableThreadLocal threadResources = new CloseableThreadLocal();
39 private SegmentTermEnum origEnum;
42 private Term[] indexTerms;
43 private TermInfo[] indexInfos;
44 private long[] indexPointers;
46 private int totalIndexInterval;
48 private const int DEFAULT_CACHE_SIZE = 1024;
50 /// <summary> Per-thread resources managed by ThreadLocal</summary>
51 private sealed class ThreadResources
53 internal SegmentTermEnum termEnum;
55 // Used for caching the least recently looked-up Terms
56 internal Mono.Lucene.Net.Util.Cache.Cache termInfoCache;
59 internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize, int indexDivisor)
63 if (indexDivisor < 1 && indexDivisor != - 1)
65 throw new System.ArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor);
74 origEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_EXTENSION, readBufferSize), fieldInfos, false);
78 if (indexDivisor != - 1)
81 totalIndexInterval = origEnum.indexInterval * indexDivisor;
82 SegmentTermEnum indexEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION, readBufferSize), fieldInfos, true);
86 int indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor; // otherwise read index
88 indexTerms = new Term[indexSize];
89 indexInfos = new TermInfo[indexSize];
90 indexPointers = new long[indexSize];
92 for (int i = 0; indexEnum.Next(); i++)
94 indexTerms[i] = indexEnum.Term();
95 indexInfos[i] = indexEnum.TermInfo();
96 indexPointers[i] = indexEnum.indexPointer;
98 for (int j = 1; j < indexDivisor; j++)
99 if (!indexEnum.Next())
110 // Do not load terms index:
111 totalIndexInterval = - 1;
114 indexPointers = null;
120 // With lock-less commits, it's entirely possible (and
121 // fine) to hit a FileNotFound exception above. In
122 // this case, we want to explicitly close any subset
123 // of things that were opened so that we don't have to
124 // wait for a GC to do so.
132 public int GetSkipInterval()
134 return origEnum.skipInterval;
137 public int GetMaxSkipLevels()
139 return origEnum.maxSkipLevels;
142 internal void Close()
144 if (origEnum != null)
146 threadResources.Close();
149 /// <summary>Returns the number of term/value pairs in the set. </summary>
155 private ThreadResources GetThreadResources()
157 ThreadResources resources = (ThreadResources) threadResources.Get();
158 if (resources == null)
160 resources = new ThreadResources();
161 resources.termEnum = Terms();
162 // Cache does not have to be thread-safe, it is only used by one thread at the same time
163 resources.termInfoCache = new SimpleLRUCache(DEFAULT_CACHE_SIZE);
164 threadResources.Set(resources);
170 /// <summary>Returns the offset of the greatest index entry which is less than or equal to term.</summary>
171 private int GetIndexOffset(Term term)
173 int lo = 0; // binary search indexTerms[]
174 int hi = indexTerms.Length - 1;
178 int mid = SupportClass.Number.URShift((lo + hi), 1);
179 int delta = term.CompareTo(indexTerms[mid]);
190 private void SeekEnum(SegmentTermEnum enumerator, int indexOffset)
192 enumerator.Seek(indexPointers[indexOffset], ((long)indexOffset * totalIndexInterval) - 1, indexTerms[indexOffset], indexInfos[indexOffset]);
195 /// <summary>Returns the TermInfo for a Term in the set, or null. </summary>
196 internal TermInfo Get(Term term)
198 return Get(term, true);
201 /// <summary>Returns the TermInfo for a Term in the set, or null. </summary>
202 private TermInfo Get(Term term, bool useCache)
210 ThreadResources resources = GetThreadResources();
211 Mono.Lucene.Net.Util.Cache.Cache cache = null;
215 cache = resources.termInfoCache;
216 // check the cache first if the term was recently looked up
217 ti = (TermInfo) cache.Get(term);
224 // optimize sequential access: first try scanning cached enum w/o seeking
225 SegmentTermEnum enumerator = resources.termEnum;
226 if (enumerator.Term() != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term()) >= 0))
228 int enumOffset = (int) (enumerator.position / totalIndexInterval) + 1;
229 if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0)
233 int numScans = enumerator.ScanTo(term);
234 if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0)
236 ti = enumerator.TermInfo();
237 if (cache != null && numScans > 1)
239 // we only want to put this TermInfo into the cache if
240 // scanEnum skipped more than one dictionary entry.
241 // This prevents RangeQueries or WildcardQueries to
242 // wipe out the cache when they iterate over a large numbers
256 // random-access: must seek
257 SeekEnum(enumerator, GetIndexOffset(term));
258 enumerator.ScanTo(term);
259 if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0)
261 ti = enumerator.TermInfo();
274 private void EnsureIndexIsRead()
276 if (indexTerms == null)
278 throw new System.SystemException("terms index was not loaded when this reader was created");
282 /// <summary>Returns the position of a Term in the set or -1. </summary>
283 internal long GetPosition(Term term)
289 int indexOffset = GetIndexOffset(term);
291 SegmentTermEnum enumerator = GetThreadResources().termEnum;
292 SeekEnum(enumerator, indexOffset);
294 while (term.CompareTo(enumerator.Term()) > 0 && enumerator.Next())
298 if (term.CompareTo(enumerator.Term()) == 0)
299 return enumerator.position;
304 /// <summary>Returns an enumeration of all the Terms and TermInfos in the set. </summary>
305 public SegmentTermEnum Terms()
307 return (SegmentTermEnum) origEnum.Clone();
310 /// <summary>Returns an enumeration of terms starting at or after the named term. </summary>
311 public SegmentTermEnum Terms(Term term)
313 // don't use the cache in this call because we want to reposition the
316 return (SegmentTermEnum) GetThreadResources().termEnum.Clone();