[monkeydoc] Merge/add monkeydoc to master.
[mono.git] / mcs / tools / monkeydoc / Lucene.Net / Lucene.Net / Index / TermInfosReader.cs
1 /* 
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  * 
9  * http://www.apache.org/licenses/LICENSE-2.0
10  * 
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 using System;
19
20 using Directory = Mono.Lucene.Net.Store.Directory;
21 using CloseableThreadLocal = Mono.Lucene.Net.Util.CloseableThreadLocal;
22 using SimpleLRUCache = Mono.Lucene.Net.Util.Cache.SimpleLRUCache;
23
24 namespace Mono.Lucene.Net.Index
25 {
26
27     /// <summary>This stores a monotonically increasing set of &lt;Term, TermInfo&gt; pairs in a
28         /// Directory.  Pairs are accessed either by Term or by ordinal position the
29         /// set.  
30         /// </summary>
31         
32         sealed class TermInfosReader
33         {
34                 private Directory directory;
35                 private System.String segment;
36                 private FieldInfos fieldInfos;
37                 
38                 private CloseableThreadLocal threadResources = new CloseableThreadLocal();
39                 private SegmentTermEnum origEnum;
40                 private long size;
41                 
42                 private Term[] indexTerms;
43                 private TermInfo[] indexInfos;
44                 private long[] indexPointers;
45                 
46                 private int totalIndexInterval;
47                 
48                 private const int DEFAULT_CACHE_SIZE = 1024;
49                 
50                 /// <summary> Per-thread resources managed by ThreadLocal</summary>
51                 private sealed class ThreadResources
52                 {
53                         internal SegmentTermEnum termEnum;
54                         
55                         // Used for caching the least recently looked-up Terms
56                         internal Mono.Lucene.Net.Util.Cache.Cache termInfoCache;
57                 }
58                 
59                 internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize, int indexDivisor)
60                 {
61                         bool success = false;
62                         
63                         if (indexDivisor < 1 && indexDivisor != - 1)
64                         {
65                                 throw new System.ArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor);
66                         }
67                         
68                         try
69                         {
70                                 directory = dir;
71                                 segment = seg;
72                                 fieldInfos = fis;
73                                 
74                                 origEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_EXTENSION, readBufferSize), fieldInfos, false);
75                                 size = origEnum.size;
76                                 
77                                 
78                                 if (indexDivisor != - 1)
79                                 {
80                                         // Load terms index
81                                         totalIndexInterval = origEnum.indexInterval * indexDivisor;
82                                         SegmentTermEnum indexEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION, readBufferSize), fieldInfos, true);
83                                         
84                                         try
85                                         {
86                                                 int indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor; // otherwise read index
87                                                 
88                                                 indexTerms = new Term[indexSize];
89                                                 indexInfos = new TermInfo[indexSize];
90                                                 indexPointers = new long[indexSize];
91                                                 
92                                                 for (int i = 0; indexEnum.Next(); i++)
93                                                 {
94                                                         indexTerms[i] = indexEnum.Term();
95                                                         indexInfos[i] = indexEnum.TermInfo();
96                                                         indexPointers[i] = indexEnum.indexPointer;
97                                                         
98                                                         for (int j = 1; j < indexDivisor; j++)
99                                                                 if (!indexEnum.Next())
100                                                                         break;
101                                                 }
102                                         }
103                                         finally
104                                         {
105                                                 indexEnum.Close();
106                                         }
107                                 }
108                                 else
109                                 {
110                                         // Do not load terms index:
111                                         totalIndexInterval = - 1;
112                                         indexTerms = null;
113                                         indexInfos = null;
114                                         indexPointers = null;
115                                 }
116                                 success = true;
117                         }
118                         finally
119                         {
120                                 // With lock-less commits, it's entirely possible (and
121                                 // fine) to hit a FileNotFound exception above. In
122                                 // this case, we want to explicitly close any subset
123                                 // of things that were opened so that we don't have to
124                                 // wait for a GC to do so.
125                                 if (!success)
126                                 {
127                                         Close();
128                                 }
129                         }
130                 }
131                 
132                 public int GetSkipInterval()
133                 {
134                         return origEnum.skipInterval;
135                 }
136                 
137                 public int GetMaxSkipLevels()
138                 {
139                         return origEnum.maxSkipLevels;
140                 }
141                 
142                 internal void  Close()
143                 {
144                         if (origEnum != null)
145                                 origEnum.Close();
146                         threadResources.Close();
147                 }
148                 
149                 /// <summary>Returns the number of term/value pairs in the set. </summary>
150                 internal long Size()
151                 {
152                         return size;
153                 }
154                 
155                 private ThreadResources GetThreadResources()
156                 {
157                         ThreadResources resources = (ThreadResources) threadResources.Get();
158                         if (resources == null)
159                         {
160                                 resources = new ThreadResources();
161                                 resources.termEnum = Terms();
162                                 // Cache does not have to be thread-safe, it is only used by one thread at the same time
163                                 resources.termInfoCache = new SimpleLRUCache(DEFAULT_CACHE_SIZE);
164                                 threadResources.Set(resources);
165                         }
166                         return resources;
167                 }
168                 
169                 
170                 /// <summary>Returns the offset of the greatest index entry which is less than or equal to term.</summary>
171                 private int GetIndexOffset(Term term)
172                 {
173                         int lo = 0; // binary search indexTerms[]
174                         int hi = indexTerms.Length - 1;
175                         
176                         while (hi >= lo)
177                         {
178                                 int mid = SupportClass.Number.URShift((lo + hi), 1);
179                                 int delta = term.CompareTo(indexTerms[mid]);
180                                 if (delta < 0)
181                                         hi = mid - 1;
182                                 else if (delta > 0)
183                                         lo = mid + 1;
184                                 else
185                                         return mid;
186                         }
187                         return hi;
188                 }
189                 
190                 private void  SeekEnum(SegmentTermEnum enumerator, int indexOffset)
191                 {
192                         enumerator.Seek(indexPointers[indexOffset], ((long)indexOffset * totalIndexInterval) - 1, indexTerms[indexOffset], indexInfos[indexOffset]);
193                 }
194                 
195                 /// <summary>Returns the TermInfo for a Term in the set, or null. </summary>
196                 internal TermInfo Get(Term term)
197                 {
198                         return Get(term, true);
199                 }
200                 
201                 /// <summary>Returns the TermInfo for a Term in the set, or null. </summary>
202                 private TermInfo Get(Term term, bool useCache)
203                 {
204                         if (size == 0)
205                                 return null;
206                         
207                         EnsureIndexIsRead();
208                         
209                         TermInfo ti;
210                         ThreadResources resources = GetThreadResources();
211                         Mono.Lucene.Net.Util.Cache.Cache cache = null;
212                         
213                         if (useCache)
214                         {
215                                 cache = resources.termInfoCache;
216                                 // check the cache first if the term was recently looked up
217                                 ti = (TermInfo) cache.Get(term);
218                                 if (ti != null)
219                                 {
220                                         return ti;
221                                 }
222                         }
223                         
224                         // optimize sequential access: first try scanning cached enum w/o seeking
225                         SegmentTermEnum enumerator = resources.termEnum;
226                         if (enumerator.Term() != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term()) >= 0))
227                         {
228                                 int enumOffset = (int) (enumerator.position / totalIndexInterval) + 1;
229                                 if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0)
230                                 {
231                                         // no need to seek
232                                         
233                                         int numScans = enumerator.ScanTo(term);
234                                         if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0)
235                                         {
236                                                 ti = enumerator.TermInfo();
237                                                 if (cache != null && numScans > 1)
238                                                 {
239                                                         // we only  want to put this TermInfo into the cache if
240                                                         // scanEnum skipped more than one dictionary entry.
241                                                         // This prevents RangeQueries or WildcardQueries to 
242                                                         // wipe out the cache when they iterate over a large numbers
243                                                         // of terms in order
244                                                         cache.Put(term, ti);
245                                                 }
246                                         }
247                                         else
248                                         {
249                                                 ti = null;
250                                         }
251                                         
252                                         return ti;
253                                 }
254                         }
255                         
256                         // random-access: must seek
257                         SeekEnum(enumerator, GetIndexOffset(term));
258                         enumerator.ScanTo(term);
259                         if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0)
260                         {
261                                 ti = enumerator.TermInfo();
262                                 if (cache != null)
263                                 {
264                                         cache.Put(term, ti);
265                                 }
266                         }
267                         else
268                         {
269                                 ti = null;
270                         }
271                         return ti;
272                 }
273                                                 
274                 private void  EnsureIndexIsRead()
275                 {
276                         if (indexTerms == null)
277                         {
278                                 throw new System.SystemException("terms index was not loaded when this reader was created");
279                         }
280                 }
281                 
282                 /// <summary>Returns the position of a Term in the set or -1. </summary>
283                 internal long GetPosition(Term term)
284                 {
285                         if (size == 0)
286                                 return - 1;
287                         
288                         EnsureIndexIsRead();
289                         int indexOffset = GetIndexOffset(term);
290                         
291                         SegmentTermEnum enumerator = GetThreadResources().termEnum;
292                         SeekEnum(enumerator, indexOffset);
293                         
294                         while (term.CompareTo(enumerator.Term()) > 0 && enumerator.Next())
295                         {
296                         }
297                         
298                         if (term.CompareTo(enumerator.Term()) == 0)
299                                 return enumerator.position;
300                         else
301                                 return - 1;
302                 }
303                 
304                 /// <summary>Returns an enumeration of all the Terms and TermInfos in the set. </summary>
305                 public SegmentTermEnum Terms()
306                 {
307                         return (SegmentTermEnum) origEnum.Clone();
308                 }
309                 
310                 /// <summary>Returns an enumeration of terms starting at or after the named term. </summary>
311                 public SegmentTermEnum Terms(Term term)
312                 {
313                         // don't use the cache in this call because we want to reposition the
314                         // enumeration
315                         Get(term, false);
316                         return (SegmentTermEnum) GetThreadResources().termEnum.Clone();
317                 }
318         }
319 }