using System;
using System.IO;
-using System.Collections;
+using System.Collections.Generic;
// Lucene imports
-using Monodoc.Lucene.Net.Index;
-using Monodoc.Lucene.Net.Documents;
-using Monodoc.Lucene.Net.Analysis;
-using Monodoc.Lucene.Net.Analysis.Standard;
-using Monodoc.Lucene.Net.Search;
-using Monodoc.Lucene.Net.QueryParsers;
+using Lucene.Net.Index;
+using Lucene.Net.Documents;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Search;
+using Lucene.Net.QueryParsers;
+using Lucene.Net.Store;
namespace Monodoc
{
+ public class SearchableIndex
+ {
+ const int maxSearchCount = 30;
-//TODO: where do I call searcher.close()
-public class SearchableIndex
-{
- IndexSearcher searcher;
- string dir;
- public string Dir {
- get {
- if (dir == null) dir = "search_index";
- return dir;
+ IndexSearcher searcher;
+ string dir;
+
+ public string Dir {
+ get {
+ if (dir == null)
+ dir = "search_index";
+ return dir;
+ }
+ set { dir = value; }
}
- set { dir = value; }
- }
- public ArrayList Results;
-
- public static SearchableIndex Load (string dir) {
- SearchableIndex s = new SearchableIndex ();
- s.dir = dir;
- s.Results = new ArrayList (20);
- try {
- s.searcher = new IndexSearcher (dir);
- } catch (IOException) {
- Console.WriteLine ("Index nonexistent or in bad format");
- return null;
+
+ public static SearchableIndex Load (string dir)
+ {
+ SearchableIndex s = new SearchableIndex ();
+ s.dir = dir;
+ try {
+ //s.searcher = new IndexSearcher (dir);
+ // TODO: parametrize that depending if we run on the desktop (low footprint) or the server (use RAMDirectory for instance)
+ s.searcher = new IndexSearcher (FSDirectory.Open (dir));
+ } catch (IOException) {
+ Console.WriteLine ("Index nonexistent or in bad format");
+ return null;
+ }
+ return s;
}
- return s;
- }
- //
- // Search the index with term
- //
- public Result Search (string term) {
- try {
- Query q1 = QueryParser.Parse (term, "hottext", new StandardAnalyzer ());
- Query q2 = QueryParser.Parse (term, "text", new StandardAnalyzer ());
- q2.SetBoost (0.7f);
- Query q3 = QueryParser.Parse (term, "examples", new StandardAnalyzer ());
- q3.SetBoost (0.5f);
- BooleanQuery q = new BooleanQuery();
- q.Add (q1, false, false);
- q.Add (q2, false, false);
- q.Add (q3, false, false);
- Hits hits = searcher.Search(q);
- Result r = new Result (term, hits);
- Results.Add (r);
- return r;
- } catch (IOException) {
- Console.WriteLine ("No index in {0}", dir);
- return null;
+ public Result Search (string term)
+ {
+ return Search (term, maxSearchCount);
}
- }
-
-}
-//
-// An object representing the search term with the results
-//
-public class Result {
- string term;
- public string Term {
- get { return term;}
- }
- public Hits hits;
- public int Count {
- get { return hits.Length(); }
- }
- public Document this [int i] {
- get { return hits.Doc (i); }
+ public Result Search (string term, int count)
+ {
+ return Search (term, count, 0);
+ }
+
+ public Result Search (string term, int count, int start) {
+ try {
+ term = term.ToLower ();
+ Term htTerm = new Term ("hottext", term);
+ Query qq1 = new FuzzyQuery (htTerm);
+ Query qq2 = new TermQuery (htTerm);
+ qq2.Boost = 10f;
+ Query qq3 = new PrefixQuery (htTerm);
+ qq3.Boost = 10f;
+ DisjunctionMaxQuery q1 = new DisjunctionMaxQuery (0f);
+ q1.Add (qq1);
+ q1.Add (qq2);
+ q1.Add (qq3);
+ Query q2 = new TermQuery (new Term ("text", term));
+ q2.Boost = 3f;
+ Query q3 = new TermQuery (new Term ("examples", term));
+ q3.Boost = 3f;
+ DisjunctionMaxQuery q = new DisjunctionMaxQuery (0f);
+
+ q.Add (q1);
+ q.Add (q2);
+ q.Add (q3);
+
+ TopDocs top = SearchInternal (q, count, start);
+ Result r = new Result (term, searcher, top.ScoreDocs);
+ return r;
+ } catch (IOException) {
+ Console.WriteLine ("No index in {0}", dir);
+ return null;
+ }
+ }
+
+ TopDocs SearchInternal (Query q, int count, int start)
+ {
+ // Easy path that doesn't involve creating a Collector ourselves
+ // watch for Lucene.NET improvement on that (like searcher.SearchAfter)
+ if (start == 0)
+ return searcher.Search (q, count);
+
+ var weight = searcher.CreateWeight (q); // TODO: reuse weight instead of query
+ var collector = TopScoreDocCollector.Create (start + count + 1, false);
+ searcher.Search (q, collector);
+
+ return collector.TopDocs (start, count);
+ }
+
+ public Result FastSearch (string term, int number)
+ {
+ try {
+ term = term.ToLower ();
+ Query q1 = new TermQuery (new Term ("hottext", term));
+ Query q2 = new PrefixQuery (new Term ("hottext", term));
+ q2.Boost = 0.5f;
+ DisjunctionMaxQuery q = new DisjunctionMaxQuery (0f);
+ q.Add (q1);
+ q.Add (q2);
+ TopDocs top = searcher.Search (q, number);
+ return new Result (term, searcher, top.ScoreDocs);
+ } catch (IOException) {
+ Console.WriteLine ("No index in {0}", dir);
+ return null;
+ }
+ }
}
+
+ //
+ // An object representing the search term with the results
+ //
+ public class Result {
+ string term;
+ Searcher searcher;
+ ScoreDoc[] docs;
+
+ public string Term {
+ get { return term;}
+ }
+
+ public int Count {
+ get { return docs.Length; }
+ }
+
+ public Document this [int i] {
+ get { return searcher.Doc (docs[i].Doc); }
+ }
- public string GetTitle (int i)
- {
- Document d = hits.Doc (i);
- if (d == null)
- return "";
- else
- return d.Get ("title");
- }
- public string GetUrl (int i)
- {
- Document d = hits.Doc (i);
- if (d == null)
- return "";
- else
- return d.Get ("url");
-
- }
- public float Score (int i)
- {
- return hits.Score (i);
- }
- public Result (string Term, Hits hits)
- {
- this.term = Term;
- this.hits = hits;
+ public string GetTitle (int i)
+ {
+ Document d = this[i];
+ return d == null ? string.Empty : d.Get ("title");
+ }
+
+ public string GetUrl (int i)
+ {
+ Document d = this[i];
+ return d == null ? string.Empty : d.Get ("url");
+ }
+
+ public string GetFullTitle (int i)
+ {
+ Document d = this[i];
+ return d == null ? string.Empty : d.Get ("fulltitle");
+ }
+
+ public float Score (int i)
+ {
+ return docs[i].Score;
+ }
+
+ public Result (string Term, Searcher searcher, ScoreDoc[] docs)
+ {
+ this.term = Term;
+ this.searcher = searcher;
+ this.docs = docs;
+ }
}
}
-}