2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
20 using Analyzer = Mono.Lucene.Net.Analysis.Analyzer;
21 using Document = Mono.Lucene.Net.Documents.Document;
22 using Directory = Mono.Lucene.Net.Store.Directory;
23 using FSDirectory = Mono.Lucene.Net.Store.FSDirectory;
24 using LockObtainFailedException = Mono.Lucene.Net.Store.LockObtainFailedException;
26 namespace Mono.Lucene.Net.Index
29 /// <summary> <p/>[Note that as of <b>2.1</b>, all but one of the
30 /// methods in this class are available via {@link
31 /// IndexWriter}. The one method that is not available is
32 /// {@link #DeleteDocument(int)}.]<p/>
34 /// A class to modify an index, i.e. to delete and add documents. This
35 /// class hides {@link IndexReader} and {@link IndexWriter} so that you
36 /// do not need to care about implementation details such as that adding
37 /// documents is done via IndexWriter and deletion is done via IndexReader.
39 /// <p/>Note that you cannot create more than one <code>IndexModifier</code> object
40 /// on the same directory at the same time.
42 /// <p/>Example usage:
44 /// <!-- ======================================================== -->
45 /// <!-- = Java Sourcecode to HTML automatically converted code = -->
46 /// <!-- = Java2Html Converter V4.1 2004 by Markus Gebhard markus@jave.de = -->
47 /// <!-- = Further information: http://www.java2html.de = -->
48 /// <div align="left" class="java">
49 /// <table border="0" cellpadding="3" cellspacing="0" bgcolor="#ffffff">
51 /// <!-- start source code -->
52 /// <td nowrap="nowrap" valign="top" align="left">
54 /// <font color="#ffffff">    </font><font color="#000000">Analyzer analyzer = </font><font color="#7f0055"><b>new </b></font><font color="#000000">StandardAnalyzer</font><font color="#000000">()</font><font color="#000000">;</font><br/>
55 /// <font color="#ffffff">    </font><font color="#3f7f5f">// create an index in /tmp/index, overwriting an existing one:</font><br/>
56 /// <font color="#ffffff">    </font><font color="#000000">IndexModifier indexModifier = </font><font color="#7f0055"><b>new </b></font><font color="#000000">IndexModifier</font><font color="#000000">(</font><font color="#2a00ff">"/tmp/index"</font><font color="#000000">, analyzer, </font><font color="#7f0055"><b>true</b></font><font color="#000000">)</font><font color="#000000">;</font><br/>
57 /// <font color="#ffffff">    </font><font color="#000000">Document doc = </font><font color="#7f0055"><b>new </b></font><font color="#000000">Document</font><font color="#000000">()</font><font color="#000000">;</font><br/>
58 /// <font color="#ffffff">    </font><font color="#000000">doc.add</font><font color="#000000">(</font><font color="#7f0055"><b>new </b></font><font color="#000000">Field</font><font color="#000000">(</font><font color="#2a00ff">"id"</font><font color="#000000">, </font><font color="#2a00ff">"1"</font><font color="#000000">, Field.Store.YES, Field.Index.NOT_ANALYZED</font><font color="#000000">))</font><font color="#000000">;</font><br/>
59 /// <font color="#ffffff">    </font><font color="#000000">doc.add</font><font color="#000000">(</font><font color="#7f0055"><b>new </b></font><font color="#000000">Field</font><font color="#000000">(</font><font color="#2a00ff">"body"</font><font color="#000000">, </font><font color="#2a00ff">"a simple test"</font><font color="#000000">, Field.Store.YES, Field.Index.ANALYZED</font><font color="#000000">))</font><font color="#000000">;</font><br/>
60 /// <font color="#ffffff">    </font><font color="#000000">indexModifier.addDocument</font><font color="#000000">(</font><font color="#000000">doc</font><font color="#000000">)</font><font color="#000000">;</font><br/>
61 /// <font color="#ffffff">    </font><font color="#7f0055"><b>int </b></font><font color="#000000">deleted = indexModifier.delete</font><font color="#000000">(</font><font color="#7f0055"><b>new </b></font><font color="#000000">Term</font><font color="#000000">(</font><font color="#2a00ff">"id"</font><font color="#000000">, </font><font color="#2a00ff">"1"</font><font color="#000000">))</font><font color="#000000">;</font><br/>
62 /// <font color="#ffffff">    </font><font color="#000000">System.out.println</font><font color="#000000">(</font><font color="#2a00ff">"Deleted " </font><font color="#000000">+ deleted + </font><font color="#2a00ff">" document"</font><font color="#000000">)</font><font color="#000000">;</font><br/>
63 /// <font color="#ffffff">    </font><font color="#000000">indexModifier.flush</font><font color="#000000">()</font><font color="#000000">;</font><br/>
64 /// <font color="#ffffff">    </font><font color="#000000">System.out.println</font><font color="#000000">(</font><font color="#000000">indexModifier.docCount</font><font color="#000000">() </font><font color="#000000">+ </font><font color="#2a00ff">" docs in index"</font><font color="#000000">)</font><font color="#000000">;</font><br/>
65 /// <font color="#ffffff">    </font><font color="#000000">indexModifier.close</font><font color="#000000">()</font><font color="#000000">;</font></code>
67 /// <!-- end source code -->
71 /// <!-- = END of automatically generated HTML code = -->
72 /// <!-- ======================================================== -->
74 /// <p/>Not all methods of IndexReader and IndexWriter are offered by this
75 /// class. If you need access to additional methods, either use those classes
76 /// directly or implement your own class that extends <code>IndexModifier</code>.
78 /// <p/>Although an instance of this class can be used from more than one
79 /// thread, you will not get the best performance. You might want to use
80 /// IndexReader and IndexWriter directly for that (but you will need to
81 /// care about synchronization yourself then).
83 /// <p/>While you can freely mix calls to add() and delete() using this class,
84 /// you should batch you calls for best performance. For example, if you
85 /// want to update 20 documents, you should first delete all those documents,
86 /// then add all the new documents.
89 /// <deprecated> Please use {@link IndexWriter} instead.
91 [Obsolete("Please use IndexWriter instead.")]
92 public class IndexModifier
94 private void InitBlock()
96 maxBufferedDocs = IndexWriter.DEFAULT_MAX_BUFFERED_DOCS;
97 maxFieldLength = IndexWriter.DEFAULT_MAX_FIELD_LENGTH;
98 mergeFactor = IndexWriter.DEFAULT_MERGE_FACTOR;
101 protected internal IndexWriter indexWriter = null;
102 protected internal IndexReader indexReader = null;
104 protected internal Directory directory = null;
105 protected internal Analyzer analyzer = null;
106 protected internal bool open = false, closeDir = false;
109 protected internal System.IO.StreamWriter infoStream = null;
110 protected internal bool useCompoundFile = true;
111 protected internal int maxBufferedDocs;
112 protected internal int maxFieldLength;
113 protected internal int mergeFactor;
115 /// <summary> Open an index with write access.
118 /// <param name="directory">the index directory
120 /// <param name="analyzer">the analyzer to use for adding new documents
122 /// <param name="create"><code>true</code> to create the index or overwrite the existing one;
123 /// <code>false</code> to append to the existing index
125 /// <throws> CorruptIndexException if the index is corrupt </throws>
126 /// <throws> LockObtainFailedException if another writer </throws>
127 /// <summary> has this index open (<code>write.lock</code> could not
130 /// <throws> IOException if there is a low-level IO error </throws>
131 public IndexModifier(Directory directory, Analyzer analyzer, bool create)
134 Init(directory, analyzer, create);
137 /// <summary> Open an index with write access.
140 /// <param name="dirName">the index directory
142 /// <param name="analyzer">the analyzer to use for adding new documents
144 /// <param name="create"><code>true</code> to create the index or overwrite the existing one;
145 /// <code>false</code> to append to the existing index
147 /// <throws> CorruptIndexException if the index is corrupt </throws>
148 /// <throws> LockObtainFailedException if another writer </throws>
149 /// <summary> has this index open (<code>write.lock</code> could not
152 /// <throws> IOException if there is a low-level IO error </throws>
153 public IndexModifier(System.String dirName, Analyzer analyzer, bool create)
156 Directory dir = FSDirectory.GetDirectory(dirName);
157 this.closeDir = true;
158 Init(dir, analyzer, create);
161 /// <summary> Open an index with write access.
164 /// <param name="file">the index directory
166 /// <param name="analyzer">the analyzer to use for adding new documents
168 /// <param name="create"><code>true</code> to create the index or overwrite the existing one;
169 /// <code>false</code> to append to the existing index
171 /// <throws> CorruptIndexException if the index is corrupt </throws>
172 /// <throws> LockObtainFailedException if another writer </throws>
173 /// <summary> has this index open (<code>write.lock</code> could not
176 /// <throws> IOException if there is a low-level IO error </throws>
177 public IndexModifier(System.IO.FileInfo file, Analyzer analyzer, bool create)
180 Directory dir = FSDirectory.GetDirectory(file);
181 this.closeDir = true;
182 Init(dir, analyzer, create);
185 /// <summary> Initialize an IndexWriter.</summary>
186 /// <throws> CorruptIndexException if the index is corrupt </throws>
187 /// <throws> LockObtainFailedException if another writer </throws>
188 /// <summary> has this index open (<code>write.lock</code> could not
191 /// <throws> IOException if there is a low-level IO error </throws>
192 protected internal virtual void Init(Directory directory, Analyzer analyzer, bool create)
194 this.directory = directory;
195 lock (this.directory)
197 this.analyzer = analyzer;
198 indexWriter = new IndexWriter(directory, analyzer, create, IndexWriter.MaxFieldLength.LIMITED);
203 /// <summary> Throw an IllegalStateException if the index is closed.</summary>
204 /// <throws> IllegalStateException </throws>
205 protected internal virtual void AssureOpen()
209 throw new System.SystemException("Index is closed");
213 /// <summary> Close the IndexReader and open an IndexWriter.</summary>
214 /// <throws> CorruptIndexException if the index is corrupt </throws>
215 /// <throws> LockObtainFailedException if another writer </throws>
216 /// <summary> has this index open (<code>write.lock</code> could not
219 /// <throws> IOException if there is a low-level IO error </throws>
220 protected internal virtual void CreateIndexWriter()
222 if (indexWriter == null)
224 if (indexReader != null)
229 indexWriter = new IndexWriter(directory, analyzer, false, new IndexWriter.MaxFieldLength(maxFieldLength));
230 // IndexModifier cannot use ConcurrentMergeScheduler
231 // because it synchronizes on the directory which can
233 indexWriter.SetMergeScheduler(new SerialMergeScheduler());
234 indexWriter.SetInfoStream(infoStream);
235 indexWriter.SetUseCompoundFile(useCompoundFile);
236 if (maxBufferedDocs != IndexWriter.DISABLE_AUTO_FLUSH)
237 indexWriter.SetMaxBufferedDocs(maxBufferedDocs);
238 indexWriter.SetMergeFactor(mergeFactor);
242 /// <summary> Close the IndexWriter and open an IndexReader.</summary>
243 /// <throws> CorruptIndexException if the index is corrupt </throws>
244 /// <throws> IOException if there is a low-level IO error </throws>
245 protected internal virtual void CreateIndexReader()
247 if (indexReader == null)
249 if (indexWriter != null)
254 indexReader = IndexReader.Open(directory);
258 /// <summary> Make sure all changes are written to disk.</summary>
259 /// <throws> CorruptIndexException if the index is corrupt </throws>
260 /// <throws> LockObtainFailedException if another writer </throws>
261 /// <summary> has this index open (<code>write.lock</code> could not
264 /// <throws> IOException if there is a low-level IO error </throws>
265 public virtual void Flush()
270 if (indexWriter != null)
285 /// <summary> Adds a document to this index, using the provided analyzer instead of the
286 /// one specific in the constructor. If the document contains more than
287 /// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are
290 /// <seealso cref="IndexWriter.AddDocument(Document, Analyzer)">
292 /// <throws> IllegalStateException if the index is closed </throws>
293 /// <throws> CorruptIndexException if the index is corrupt </throws>
294 /// <throws> LockObtainFailedException if another writer </throws>
295 /// <summary> has this index open (<code>write.lock</code> could not
298 /// <throws> IOException if there is a low-level IO error </throws>
299 public virtual void AddDocument(Document doc, Analyzer docAnalyzer)
305 if (docAnalyzer != null)
306 indexWriter.AddDocument(doc, docAnalyzer);
308 indexWriter.AddDocument(doc);
312 /// <summary> Adds a document to this index. If the document contains more than
313 /// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are
316 /// <seealso cref="IndexWriter.AddDocument(Document)">
318 /// <throws> IllegalStateException if the index is closed </throws>
319 /// <throws> CorruptIndexException if the index is corrupt </throws>
320 /// <throws> LockObtainFailedException if another writer </throws>
321 /// <summary> has this index open (<code>write.lock</code> could not
324 /// <throws> IOException if there is a low-level IO error </throws>
325 public virtual void AddDocument(Document doc)
327 AddDocument(doc, null);
330 /// <summary> Deletes all documents containing <code>term</code>.
331 /// This is useful if one uses a document field to hold a unique ID string for
332 /// the document. Then to delete such a document, one merely constructs a
333 /// term with the appropriate field and the unique ID string as its text and
334 /// passes it to this method. Returns the number of documents deleted.
336 /// <returns> the number of documents deleted
338 /// <seealso cref="IndexReader.DeleteDocuments(Term)">
340 /// <throws> IllegalStateException if the index is closed </throws>
341 /// <throws> StaleReaderException if the index has changed </throws>
342 /// <summary> since this reader was opened
344 /// <throws> CorruptIndexException if the index is corrupt </throws>
345 /// <throws> LockObtainFailedException if another writer </throws>
346 /// <summary> has this index open (<code>write.lock</code> could not
349 /// <throws> IOException if there is a low-level IO error </throws>
350 public virtual int DeleteDocuments(Term term)
356 return indexReader.DeleteDocuments(term);
360 /// <summary> Deletes the document numbered <code>docNum</code>.</summary>
361 /// <seealso cref="IndexReader.DeleteDocument(int)">
363 /// <throws> StaleReaderException if the index has changed </throws>
364 /// <summary> since this reader was opened
366 /// <throws> CorruptIndexException if the index is corrupt </throws>
367 /// <throws> LockObtainFailedException if another writer </throws>
368 /// <summary> has this index open (<code>write.lock</code> could not
371 /// <throws> IllegalStateException if the index is closed </throws>
372 public virtual void DeleteDocument(int docNum)
378 indexReader.DeleteDocument(docNum);
383 /// <summary> Returns the number of documents currently in this
384 /// index. If the writer is currently open, this returns
385 /// {@link IndexWriter#DocCount()}, else {@link
386 /// IndexReader#NumDocs()}. But, note that {@link
387 /// IndexWriter#DocCount()} does not take deletions into
388 /// account, unlike {@link IndexReader#numDocs}.
390 /// <throws> IllegalStateException if the index is closed </throws>
391 public virtual int DocCount()
396 if (indexWriter != null)
398 return indexWriter.DocCount();
402 return indexReader.NumDocs();
407 /// <summary> Merges all segments together into a single segment, optimizing an index
410 /// <seealso cref="IndexWriter.Optimize()">
412 /// <throws> IllegalStateException if the index is closed </throws>
413 /// <throws> CorruptIndexException if the index is corrupt </throws>
414 /// <throws> LockObtainFailedException if another writer </throws>
415 /// <summary> has this index open (<code>write.lock</code> could not
418 /// <throws> IOException if there is a low-level IO error </throws>
419 public virtual void Optimize()
425 indexWriter.Optimize();
429 /// <summary> If non-null, information about merges and a message when
430 /// {@link #GetMaxFieldLength()} is reached will be printed to this.
431 /// <p/>Example: <tt>index.setInfoStream(System.err);</tt>
433 /// <seealso cref="IndexWriter.SetInfoStream(PrintStream)">
435 /// <throws> IllegalStateException if the index is closed </throws>
436 public virtual void SetInfoStream(System.IO.StreamWriter infoStream)
441 if (indexWriter != null)
443 indexWriter.SetInfoStream(infoStream);
445 this.infoStream = infoStream;
449 /// <seealso cref="IndexModifier.SetInfoStream(PrintStream)">
451 /// <throws> CorruptIndexException if the index is corrupt </throws>
452 /// <throws> LockObtainFailedException if another writer </throws>
453 /// <summary> has this index open (<code>write.lock</code> could not
456 /// <throws> IOException if there is a low-level IO error </throws>
457 public virtual System.IO.StreamWriter GetInfoStream()
463 return indexWriter.GetInfoStream();
467 /// <summary> Setting to turn on usage of a compound file. When on, multiple files
468 /// for each segment are merged into a single file once the segment creation
469 /// is finished. This is done regardless of what directory is in use.
471 /// <seealso cref="IndexWriter.SetUseCompoundFile(boolean)">
473 /// <throws> IllegalStateException if the index is closed </throws>
474 public virtual void SetUseCompoundFile(bool useCompoundFile)
479 if (indexWriter != null)
481 indexWriter.SetUseCompoundFile(useCompoundFile);
483 this.useCompoundFile = useCompoundFile;
487 /// <seealso cref="IndexModifier.SetUseCompoundFile(boolean)">
489 /// <throws> CorruptIndexException if the index is corrupt </throws>
490 /// <throws> LockObtainFailedException if another writer </throws>
491 /// <summary> has this index open (<code>write.lock</code> could not
494 /// <throws> IOException if there is a low-level IO error </throws>
495 public virtual bool GetUseCompoundFile()
501 return indexWriter.GetUseCompoundFile();
505 /// <summary> The maximum number of terms that will be indexed for a single field in a
506 /// document. This limits the amount of memory required for indexing, so that
507 /// collections with very large files will not crash the indexing process by
508 /// running out of memory.<p/>
509 /// Note that this effectively truncates large documents, excluding from the
510 /// index terms that occur further in the document. If you know your source
511 /// documents are large, be sure to set this value high enough to accommodate
512 /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit
513 /// is your memory, but you should anticipate an OutOfMemoryError.<p/>
514 /// By default, no more than 10,000 terms will be indexed for a field.
516 /// <seealso cref="IndexWriter.SetMaxFieldLength(int)">
518 /// <throws> IllegalStateException if the index is closed </throws>
519 public virtual void SetMaxFieldLength(int maxFieldLength)
524 if (indexWriter != null)
526 indexWriter.SetMaxFieldLength(maxFieldLength);
528 this.maxFieldLength = maxFieldLength;
532 /// <seealso cref="IndexModifier.SetMaxFieldLength(int)">
534 /// <throws> CorruptIndexException if the index is corrupt </throws>
535 /// <throws> LockObtainFailedException if another writer </throws>
536 /// <summary> has this index open (<code>write.lock</code> could not
539 /// <throws> IOException if there is a low-level IO error </throws>
540 public virtual int GetMaxFieldLength()
546 return indexWriter.GetMaxFieldLength();
550 /// <summary> Determines the minimal number of documents required before the buffered
551 /// in-memory documents are merging and a new Segment is created.
552 /// Since Documents are merged in a {@link Mono.Lucene.Net.Store.RAMDirectory},
553 /// large value gives faster indexing. At the same time, mergeFactor limits
554 /// the number of files open in a FSDirectory.
556 /// <p/>The default value is 10.
559 /// <seealso cref="IndexWriter.SetMaxBufferedDocs(int)">
561 /// <throws> IllegalStateException if the index is closed </throws>
562 /// <throws> IllegalArgumentException if maxBufferedDocs is smaller than 2 </throws>
563 public virtual void SetMaxBufferedDocs(int maxBufferedDocs)
568 if (indexWriter != null)
570 indexWriter.SetMaxBufferedDocs(maxBufferedDocs);
572 this.maxBufferedDocs = maxBufferedDocs;
576 /// <seealso cref="IndexModifier.SetMaxBufferedDocs(int)">
578 /// <throws> CorruptIndexException if the index is corrupt </throws>
579 /// <throws> LockObtainFailedException if another writer </throws>
580 /// <summary> has this index open (<code>write.lock</code> could not
583 /// <throws> IOException if there is a low-level IO error </throws>
584 public virtual int GetMaxBufferedDocs()
590 return indexWriter.GetMaxBufferedDocs();
594 /// <summary> Determines how often segment indices are merged by addDocument(). With
595 /// smaller values, less RAM is used while indexing, and searches on
596 /// unoptimized indices are faster, but indexing speed is slower. With larger
597 /// values, more RAM is used during indexing, and while searches on unoptimized
598 /// indices are slower, indexing is faster. Thus larger values (> 10) are best
599 /// for batch index creation, and smaller values (< 10) for indices that are
600 /// interactively maintained.
601 /// <p/>This must never be less than 2. The default value is 10.
604 /// <seealso cref="IndexWriter.SetMergeFactor(int)">
606 /// <throws> IllegalStateException if the index is closed </throws>
607 public virtual void SetMergeFactor(int mergeFactor)
612 if (indexWriter != null)
614 indexWriter.SetMergeFactor(mergeFactor);
616 this.mergeFactor = mergeFactor;
620 /// <seealso cref="IndexModifier.SetMergeFactor(int)">
622 /// <throws> CorruptIndexException if the index is corrupt </throws>
623 /// <throws> LockObtainFailedException if another writer </throws>
624 /// <summary> has this index open (<code>write.lock</code> could not
627 /// <throws> IOException if there is a low-level IO error </throws>
628 public virtual int GetMergeFactor()
634 return indexWriter.GetMergeFactor();
638 /// <summary> Close this index, writing all pending changes to disk.
641 /// <throws> IllegalStateException if the index has been closed before already </throws>
642 /// <throws> CorruptIndexException if the index is corrupt </throws>
643 /// <throws> IOException if there is a low-level IO error </throws>
644 public virtual void Close()
649 throw new System.SystemException("Index is closed already");
650 if (indexWriter != null)
655 else if (indexReader != null)
669 public override System.String ToString()
671 return "Index@" + directory;
675 // used as an example in the javadoc:
676 public static void main(String[] args) throws IOException {
677 Analyzer analyzer = new StandardAnalyzer();
678 // create an index in /tmp/index, overwriting an existing one:
679 IndexModifier indexModifier = new IndexModifier("/tmp/index", analyzer, true);
680 Document doc = new Document();
681 doc.add(new Fieldable("id", "1", Fieldable.Store.YES, Fieldable.Index.NOT_ANALYZED));
682 doc.add(new Fieldable("body", "a simple test", Fieldable.Store.YES, Fieldable.Index.ANALYZED));
683 indexModifier.addDocument(doc);
684 int deleted = indexModifier.delete(new Term("id", "1"));
685 System.out.println("Deleted " + deleted + " document");
686 indexModifier.flush();
687 System.out.println(indexModifier.docCount() + " docs in index");
688 indexModifier.close();