2 * Copyright 2004 The Apache Software Foundation
\r
4 * Licensed under the Apache License, Version 2.0 (the "License");
\r
5 * you may not use this file except in compliance with the License.
\r
6 * You may obtain a copy of the License at
\r
8 * http://www.apache.org/licenses/LICENSE-2.0
\r
10 * Unless required by applicable law or agreed to in writing, software
\r
11 * distributed under the License is distributed on an "AS IS" BASIS,
\r
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
\r
13 * See the License for the specific language governing permissions and
\r
14 * limitations under the License.
\r
17 using Monodoc.Lucene.Net.Index;
\r
18 using Term = Monodoc.Lucene.Net.Index.Term;
\r
19 using TermPositions = Monodoc.Lucene.Net.Index.TermPositions;
\r
20 namespace Monodoc.Lucene.Net.Search
\r
23 /// <summary>A Query that matches documents containing a particular sequence of terms.
\r
24 /// This may be combined with other terms with a {@link BooleanQuery}.
\r
27 public class PhraseQuery:Query
\r
29 private System.String field;
\r
30 private System.Collections.ArrayList terms = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
\r
31 private System.Collections.ArrayList positions = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
\r
32 private int slop = 0;
\r
34 /// <summary>Constructs an empty phrase query. </summary>
\r
35 public PhraseQuery()
\r
39 /// <summary>Sets the number of other words permitted between words in query phrase.
\r
40 /// If zero, then this is an exact phrase search. For larger values this works
\r
41 /// like a <code>WITHIN</code> or <code>NEAR</code> operator.
\r
42 /// <p>The slop is in fact an edit-distance, where the units correspond to
\r
43 /// moves of terms in the query phrase out of position. For example, to switch
\r
44 /// the order of two words requires two moves (the first move places the words
\r
45 /// atop one another), so to permit re-orderings of phrases, the slop must be
\r
47 /// <p>More exact matches are scored higher than sloppier matches, thus search
\r
48 /// results are sorted by exactness.
\r
49 /// <p>The slop is zero by default, requiring exact matches.
\r
51 public virtual void SetSlop(int s)
\r
55 /// <summary>Returns the slop. See setSlop(). </summary>
\r
56 public virtual int GetSlop()
\r
61 /// <summary> Adds a term to the end of the query phrase.
\r
62 /// The relative position of the term is the one immediately after the last term added.
\r
64 public virtual void Add(Term term)
\r
67 if (positions.Count > 0)
\r
68 position = ((System.Int32) positions[positions.Count - 1]) + 1;
\r
70 Add(term, position);
\r
73 /// <summary> Adds a term to the end of the query phrase.
\r
74 /// The relative position of the term within the phrase is specified explicitly.
\r
75 /// This allows e.g. phrases with more than one term at the same position
\r
76 /// or phrases with gaps (e.g. in connection with stopwords).
\r
79 /// <param name="">term
\r
81 /// <param name="">position
\r
83 public virtual void Add(Term term, int position)
\r
85 if (terms.Count == 0)
\r
86 field = term.Field();
\r
87 else if ((System.Object) term.Field() != (System.Object) field)
\r
89 throw new System.ArgumentException("All phrase terms must be in the same field: " + term);
\r
93 positions.Add((System.Int32) position);
\r
96 /// <summary>Returns the set of terms in this phrase. </summary>
\r
97 public virtual Term[] GetTerms()
\r
99 return (Term[]) terms.ToArray(typeof(Term));
\r
102 /// <summary> Returns the relative positions of terms in this phrase.</summary>
\r
103 public virtual int[] GetPositions()
\r
105 int[] result = new int[positions.Count];
\r
106 for (int i = 0; i < positions.Count; i++)
\r
107 result[i] = ((System.Int32) positions[i]);
\r
112 private class PhraseWeight : Weight
\r
114 private void InitBlock(PhraseQuery enclosingInstance)
\r
116 this.enclosingInstance = enclosingInstance;
\r
118 private PhraseQuery enclosingInstance;
\r
119 virtual public Query Query
\r
123 return Enclosing_Instance;
\r
127 virtual public float Value
\r
131 return value_Renamed;
\r
135 public PhraseQuery Enclosing_Instance
\r
139 return enclosingInstance;
\r
143 private Searcher searcher;
\r
144 private float value_Renamed;
\r
146 private float queryNorm;
\r
147 private float queryWeight;
\r
149 public PhraseWeight(PhraseQuery enclosingInstance, Searcher searcher)
\r
151 InitBlock(enclosingInstance);
\r
152 this.searcher = searcher;
\r
155 public override System.String ToString()
\r
157 return "weight(" + Enclosing_Instance + ")";
\r
160 public virtual float SumOfSquaredWeights()
\r
162 idf = Enclosing_Instance.GetSimilarity(searcher).Idf(Enclosing_Instance.terms, searcher);
\r
163 queryWeight = idf * Enclosing_Instance.GetBoost(); // compute query weight
\r
164 return queryWeight * queryWeight; // square it
\r
167 public virtual void Normalize(float queryNorm)
\r
169 this.queryNorm = queryNorm;
\r
170 queryWeight *= queryNorm; // normalize query weight
\r
171 value_Renamed = queryWeight * idf; // idf for document
\r
174 public virtual Scorer Scorer(Monodoc.Lucene.Net.Index.IndexReader reader)
\r
176 if (Enclosing_Instance.terms.Count == 0)
\r
177 // optimize zero-term case
\r
180 TermPositions[] tps = new TermPositions[Enclosing_Instance.terms.Count];
\r
181 for (int i = 0; i < Enclosing_Instance.terms.Count; i++)
\r
183 TermPositions p = reader.TermPositions((Term) Enclosing_Instance.terms[i]);
\r
189 if (Enclosing_Instance.slop == 0)
\r
190 // optimize exact case
\r
191 return new ExactPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), Enclosing_Instance.GetSimilarity(searcher), reader.Norms(Enclosing_Instance.field));
\r
193 return new SloppyPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), Enclosing_Instance.GetSimilarity(searcher), Enclosing_Instance.slop, reader.Norms(Enclosing_Instance.field));
\r
196 public virtual Explanation Explain(Monodoc.Lucene.Net.Index.IndexReader reader, int doc)
\r
199 Explanation result = new Explanation();
\r
200 result.SetDescription("weight(" + Query + " in " + doc + "), product of:");
\r
202 System.Text.StringBuilder docFreqs = new System.Text.StringBuilder();
\r
203 System.Text.StringBuilder query = new System.Text.StringBuilder();
\r
204 query.Append('\"');
\r
205 for (int i = 0; i < Enclosing_Instance.terms.Count; i++)
\r
209 docFreqs.Append(" ");
\r
213 Term term = (Term) Enclosing_Instance.terms[i];
\r
215 docFreqs.Append(term.Text());
\r
216 docFreqs.Append("=");
\r
217 docFreqs.Append(searcher.DocFreq(term));
\r
219 query.Append(term.Text());
\r
221 query.Append('\"');
\r
223 Explanation idfExpl = new Explanation(idf, "idf(" + Enclosing_Instance.field + ": " + docFreqs + ")");
\r
225 // explain query weight
\r
226 Explanation queryExpl = new Explanation();
\r
227 queryExpl.SetDescription("queryWeight(" + Query + "), product of:");
\r
229 Explanation boostExpl = new Explanation(Enclosing_Instance.GetBoost(), "boost");
\r
230 if (Enclosing_Instance.GetBoost() != 1.0f)
\r
231 queryExpl.AddDetail(boostExpl);
\r
232 queryExpl.AddDetail(idfExpl);
\r
234 Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");
\r
235 queryExpl.AddDetail(queryNormExpl);
\r
237 queryExpl.SetValue(boostExpl.GetValue() * idfExpl.GetValue() * queryNormExpl.GetValue());
\r
239 result.AddDetail(queryExpl);
\r
241 // explain Field weight
\r
242 Explanation fieldExpl = new Explanation();
\r
243 fieldExpl.SetDescription("fieldWeight(" + Enclosing_Instance.field + ":" + query + " in " + doc + "), product of:");
\r
245 Explanation tfExpl = Scorer(reader).Explain(doc);
\r
246 fieldExpl.AddDetail(tfExpl);
\r
247 fieldExpl.AddDetail(idfExpl);
\r
249 Explanation fieldNormExpl = new Explanation();
\r
250 byte[] fieldNorms = reader.Norms(Enclosing_Instance.field);
\r
251 float fieldNorm = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]):0.0f;
\r
252 fieldNormExpl.SetValue(fieldNorm);
\r
253 fieldNormExpl.SetDescription("fieldNorm(Field=" + Enclosing_Instance.field + ", doc=" + doc + ")");
\r
254 fieldExpl.AddDetail(fieldNormExpl);
\r
256 fieldExpl.SetValue(tfExpl.GetValue() * idfExpl.GetValue() * fieldNormExpl.GetValue());
\r
258 result.AddDetail(fieldExpl);
\r
261 result.SetValue(queryExpl.GetValue() * fieldExpl.GetValue());
\r
263 if (queryExpl.GetValue() == 1.0f)
\r
270 protected internal override Weight CreateWeight(Searcher searcher)
\r
272 if (terms.Count == 1)
\r
274 // optimize one-term case
\r
275 Term term = (Term) terms[0];
\r
276 Query termQuery = new TermQuery(term);
\r
277 termQuery.SetBoost(GetBoost());
\r
278 return termQuery.CreateWeight(searcher);
\r
280 return new PhraseWeight(this, searcher);
\r
284 /// <summary>Prints a user-readable version of this query. </summary>
\r
285 public override System.String ToString(System.String f)
\r
287 System.Text.StringBuilder buffer = new System.Text.StringBuilder();
\r
288 if (!field.Equals(f))
\r
290 buffer.Append(field);
\r
291 buffer.Append(":");
\r
294 buffer.Append("\"");
\r
295 for (int i = 0; i < terms.Count; i++)
\r
297 buffer.Append(((Term) terms[i]).Text());
\r
298 if (i != terms.Count - 1)
\r
299 buffer.Append(" ");
\r
301 buffer.Append("\"");
\r
305 buffer.Append("~");
\r
306 buffer.Append(slop);
\r
309 if (GetBoost() != 1.0f)
\r
311 System.Globalization.NumberFormatInfo nfi = new System.Globalization.CultureInfo("en-US", false).NumberFormat;
\r
312 nfi.NumberDecimalDigits = 1;
\r
314 buffer.Append("^");
\r
315 buffer.Append(GetBoost().ToString("N", nfi));
\r
318 return buffer.ToString();
\r
321 /// <summary>Returns true iff <code>o</code> is equal to this. </summary>
\r
322 public override bool Equals(System.Object o)
\r
324 if (!(o is PhraseQuery))
\r
326 PhraseQuery other = (PhraseQuery) o;
\r
327 return (this.GetBoost() == other.GetBoost()) &&
\r
328 (this.slop == other.slop) &&
\r
329 this.terms.Equals(other.terms) &&
\r
330 this.positions.Equals(other.positions);
\r
333 /// <summary>Returns a hash code value for this object.</summary>
\r
334 public override int GetHashCode()
\r
336 return System.BitConverter.ToInt32(System.BitConverter.GetBytes(GetBoost()), 0) ^
337 System.BitConverter.ToInt32(System.BitConverter.GetBytes(slop), 0) ^
338 terms.GetHashCode() ^
339 positions.GetHashCode();
341 override public System.Object Clone()
\r