Merge branch 'master' of github.com:tgiphil/mono
[mono.git] / mcs / tools / monodoc / Lucene.Net / Lucene.Net / Search / PhraseQuery.cs
1 /*\r
2  * Copyright 2004 The Apache Software Foundation\r
3  * \r
4  * Licensed under the Apache License, Version 2.0 (the "License");\r
5  * you may not use this file except in compliance with the License.\r
6  * You may obtain a copy of the License at\r
7  * \r
8  * http://www.apache.org/licenses/LICENSE-2.0\r
9  * \r
10  * Unless required by applicable law or agreed to in writing, software\r
11  * distributed under the License is distributed on an "AS IS" BASIS,\r
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
13  * See the License for the specific language governing permissions and\r
14  * limitations under the License.\r
15  */\r
16 using System;\r
17 using Monodoc.Lucene.Net.Index;\r
18 using Term = Monodoc.Lucene.Net.Index.Term;\r
19 using TermPositions = Monodoc.Lucene.Net.Index.TermPositions;\r
20 namespace Monodoc.Lucene.Net.Search\r
21 {\r
22         \r
23         /// <summary>A Query that matches documents containing a particular sequence of terms.\r
24         /// This may be combined with other terms with a {@link BooleanQuery}.\r
25         /// </summary>\r
26         [Serializable]\r
27         public class PhraseQuery:Query\r
28         {\r
29                 private System.String field;\r
30                 private System.Collections.ArrayList terms = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));\r
31         private System.Collections.ArrayList positions = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));\r
32         private int slop = 0;\r
33                 \r
34                 /// <summary>Constructs an empty phrase query. </summary>\r
35                 public PhraseQuery()\r
36                 {\r
37                 }\r
38                 \r
39                 /// <summary>Sets the number of other words permitted between words in query phrase.\r
40                 /// If zero, then this is an exact phrase search.  For larger values this works\r
41                 /// like a <code>WITHIN</code> or <code>NEAR</code> operator.\r
42                 /// <p>The slop is in fact an edit-distance, where the units correspond to\r
43                 /// moves of terms in the query phrase out of position.  For example, to switch\r
44                 /// the order of two words requires two moves (the first move places the words\r
45                 /// atop one another), so to permit re-orderings of phrases, the slop must be\r
46                 /// at least two.\r
47                 /// <p>More exact matches are scored higher than sloppier matches, thus search\r
48                 /// results are sorted by exactness.\r
49                 /// <p>The slop is zero by default, requiring exact matches.\r
50                 /// </summary>\r
51                 public virtual void  SetSlop(int s)\r
52                 {\r
53                         slop = s;\r
54                 }\r
55                 /// <summary>Returns the slop.  See setSlop(). </summary>\r
56                 public virtual int GetSlop()\r
57                 {\r
58                         return slop;\r
59                 }\r
60                 \r
61         /// <summary> Adds a term to the end of the query phrase.\r
62         /// The relative position of the term is the one immediately after the last term added.\r
63         /// </summary>\r
64         public virtual void  Add(Term term)\r
65         {\r
66             int position = 0;\r
67             if (positions.Count > 0)\r
68                 position = ((System.Int32) positions[positions.Count - 1]) + 1;\r
69                         \r
70             Add(term, position);\r
71         }\r
72                 \r
73         /// <summary> Adds a term to the end of the query phrase.\r
74         /// The relative position of the term within the phrase is specified explicitly.\r
75         /// This allows e.g. phrases with more than one term at the same position\r
76         /// or phrases with gaps (e.g. in connection with stopwords).\r
77         /// \r
78         /// </summary>\r
79         /// <param name="">term\r
80         /// </param>\r
81         /// <param name="">position\r
82         /// </param>\r
83         public virtual void  Add(Term term, int position)\r
84         {\r
85             if (terms.Count == 0)\r
86                 field = term.Field();\r
87             else if ((System.Object) term.Field() != (System.Object) field)\r
88             {\r
89                 throw new System.ArgumentException("All phrase terms must be in the same field: " + term);\r
90             }\r
91                         \r
92             terms.Add(term);\r
93             positions.Add((System.Int32) position);\r
94         }\r
95                 \r
96         /// <summary>Returns the set of terms in this phrase. </summary>\r
97                 public virtual Term[] GetTerms()\r
98                 {\r
99                         return (Term[]) terms.ToArray(typeof(Term));\r
100                 }\r
101                 \r
102         /// <summary> Returns the relative positions of terms in this phrase.</summary>\r
103         public virtual int[] GetPositions()\r
104         {\r
105             int[] result = new int[positions.Count];\r
106             for (int i = 0; i < positions.Count; i++)\r
107                 result[i] = ((System.Int32) positions[i]);\r
108             return result;\r
109         }\r
110                 \r
111         [Serializable]\r
112                 private class PhraseWeight : Weight\r
113                 {\r
114                         private void  InitBlock(PhraseQuery enclosingInstance)\r
115                         {\r
116                                 this.enclosingInstance = enclosingInstance;\r
117                         }\r
118                         private PhraseQuery enclosingInstance;\r
119                         virtual public Query Query\r
120                         {\r
121                                 get\r
122                                 {\r
123                                         return Enclosing_Instance;\r
124                                 }\r
125                                 \r
126                         }\r
127             virtual public float Value\r
128             {\r
129                 get\r
130                 {\r
131                     return value_Renamed;\r
132                 }\r
133                                 \r
134             }\r
135             public PhraseQuery Enclosing_Instance\r
136             {\r
137                 get\r
138                 {\r
139                     return enclosingInstance;\r
140                 }\r
141                                 \r
142             }\r
143             private Searcher searcher;\r
144                         private float value_Renamed;\r
145                         private float idf;\r
146                         private float queryNorm;\r
147                         private float queryWeight;\r
148                         \r
149                         public PhraseWeight(PhraseQuery enclosingInstance, Searcher searcher)\r
150                         {\r
151                                 InitBlock(enclosingInstance);\r
152                                 this.searcher = searcher;\r
153                         }\r
154                         \r
155                         public override System.String ToString()\r
156                         {\r
157                                 return "weight(" + Enclosing_Instance + ")";\r
158                         }\r
159                         \r
160                         public virtual float SumOfSquaredWeights()\r
161                         {\r
162                                 idf = Enclosing_Instance.GetSimilarity(searcher).Idf(Enclosing_Instance.terms, searcher);\r
163                                 queryWeight = idf * Enclosing_Instance.GetBoost(); // compute query weight\r
164                                 return queryWeight * queryWeight; // square it\r
165                         }\r
166                         \r
167                         public virtual void  Normalize(float queryNorm)\r
168                         {\r
169                                 this.queryNorm = queryNorm;\r
170                                 queryWeight *= queryNorm; // normalize query weight\r
171                                 value_Renamed = queryWeight * idf; // idf for document \r
172                         }\r
173                         \r
174                         public virtual Scorer Scorer(Monodoc.Lucene.Net.Index.IndexReader reader)\r
175                         {\r
176                                 if (Enclosing_Instance.terms.Count == 0)\r
177                                 // optimize zero-term case\r
178                                         return null;\r
179                                 \r
180                                 TermPositions[] tps = new TermPositions[Enclosing_Instance.terms.Count];\r
181                                 for (int i = 0; i < Enclosing_Instance.terms.Count; i++)\r
182                                 {\r
183                                         TermPositions p = reader.TermPositions((Term) Enclosing_Instance.terms[i]);\r
184                                         if (p == null)\r
185                                                 return null;\r
186                                         tps[i] = p;\r
187                                 }\r
188                                 \r
189                                 if (Enclosing_Instance.slop == 0)\r
190                                 // optimize exact case\r
191                                         return new ExactPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), Enclosing_Instance.GetSimilarity(searcher), reader.Norms(Enclosing_Instance.field));\r
192                                 else\r
193                                         return new SloppyPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), Enclosing_Instance.GetSimilarity(searcher), Enclosing_Instance.slop, reader.Norms(Enclosing_Instance.field));\r
194                         }\r
195                         \r
196                         public virtual Explanation Explain(Monodoc.Lucene.Net.Index.IndexReader reader, int doc)\r
197                         {\r
198                                 \r
199                                 Explanation result = new Explanation();\r
200                                 result.SetDescription("weight(" + Query + " in " + doc + "), product of:");\r
201                                 \r
202                                 System.Text.StringBuilder docFreqs = new System.Text.StringBuilder();\r
203                                 System.Text.StringBuilder query = new System.Text.StringBuilder();\r
204                                 query.Append('\"');\r
205                                 for (int i = 0; i < Enclosing_Instance.terms.Count; i++)\r
206                                 {\r
207                                         if (i != 0)\r
208                                         {\r
209                                                 docFreqs.Append(" ");\r
210                                                 query.Append(" ");\r
211                                         }\r
212                                         \r
213                                         Term term = (Term) Enclosing_Instance.terms[i];\r
214                                         \r
215                                         docFreqs.Append(term.Text());\r
216                                         docFreqs.Append("=");\r
217                                         docFreqs.Append(searcher.DocFreq(term));\r
218                                         \r
219                                         query.Append(term.Text());\r
220                                 }\r
221                                 query.Append('\"');\r
222                                 \r
223                                 Explanation idfExpl = new Explanation(idf, "idf(" + Enclosing_Instance.field + ": " + docFreqs + ")");\r
224                                 \r
225                                 // explain query weight\r
226                                 Explanation queryExpl = new Explanation();\r
227                                 queryExpl.SetDescription("queryWeight(" + Query + "), product of:");\r
228                                 \r
229                                 Explanation boostExpl = new Explanation(Enclosing_Instance.GetBoost(), "boost");\r
230                                 if (Enclosing_Instance.GetBoost() != 1.0f)\r
231                                         queryExpl.AddDetail(boostExpl);\r
232                                 queryExpl.AddDetail(idfExpl);\r
233                                 \r
234                                 Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");\r
235                                 queryExpl.AddDetail(queryNormExpl);\r
236                                 \r
237                                 queryExpl.SetValue(boostExpl.GetValue() * idfExpl.GetValue() * queryNormExpl.GetValue());\r
238                                 \r
239                                 result.AddDetail(queryExpl);\r
240                                 \r
241                                 // explain Field weight\r
242                                 Explanation fieldExpl = new Explanation();\r
243                                 fieldExpl.SetDescription("fieldWeight(" + Enclosing_Instance.field + ":" + query + " in " + doc + "), product of:");\r
244                                 \r
245                                 Explanation tfExpl = Scorer(reader).Explain(doc);\r
246                                 fieldExpl.AddDetail(tfExpl);\r
247                                 fieldExpl.AddDetail(idfExpl);\r
248                                 \r
249                                 Explanation fieldNormExpl = new Explanation();\r
250                                 byte[] fieldNorms = reader.Norms(Enclosing_Instance.field);\r
251                                 float fieldNorm = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]):0.0f;\r
252                                 fieldNormExpl.SetValue(fieldNorm);\r
253                                 fieldNormExpl.SetDescription("fieldNorm(Field=" + Enclosing_Instance.field + ", doc=" + doc + ")");\r
254                                 fieldExpl.AddDetail(fieldNormExpl);\r
255                                 \r
256                                 fieldExpl.SetValue(tfExpl.GetValue() * idfExpl.GetValue() * fieldNormExpl.GetValue());\r
257                                 \r
258                                 result.AddDetail(fieldExpl);\r
259                                 \r
260                                 // combine them\r
261                                 result.SetValue(queryExpl.GetValue() * fieldExpl.GetValue());\r
262                                 \r
263                                 if (queryExpl.GetValue() == 1.0f)\r
264                                         return fieldExpl;\r
265                                 \r
266                                 return result;\r
267                         }\r
268                 }\r
269                 \r
270                 protected internal override Weight CreateWeight(Searcher searcher)\r
271                 {\r
272                         if (terms.Count == 1)\r
273                         {\r
274                                 // optimize one-term case\r
275                                 Term term = (Term) terms[0];\r
276                                 Query termQuery = new TermQuery(term);\r
277                                 termQuery.SetBoost(GetBoost());\r
278                                 return termQuery.CreateWeight(searcher);\r
279                         }\r
280                         return new PhraseWeight(this, searcher);\r
281                 }\r
282                 \r
283                 \r
284                 /// <summary>Prints a user-readable version of this query. </summary>\r
285                 public override System.String ToString(System.String f)\r
286                 {\r
287                         System.Text.StringBuilder buffer = new System.Text.StringBuilder();\r
288                         if (!field.Equals(f))\r
289                         {\r
290                                 buffer.Append(field);\r
291                                 buffer.Append(":");\r
292                         }\r
293                         \r
294                         buffer.Append("\"");\r
295                         for (int i = 0; i < terms.Count; i++)\r
296                         {\r
297                                 buffer.Append(((Term) terms[i]).Text());\r
298                                 if (i != terms.Count - 1)\r
299                                         buffer.Append(" ");\r
300                         }\r
301                         buffer.Append("\"");\r
302                         \r
303                         if (slop != 0)\r
304                         {\r
305                                 buffer.Append("~");\r
306                                 buffer.Append(slop);\r
307                         }\r
308                         \r
309                         if (GetBoost() != 1.0f)\r
310                         {\r
311                 System.Globalization.NumberFormatInfo nfi = new System.Globalization.CultureInfo("en-US", false).NumberFormat;\r
312                 nfi.NumberDecimalDigits = 1;\r
313 \r
314                 buffer.Append("^");\r
315                 buffer.Append(GetBoost().ToString("N", nfi));\r
316                         }\r
317                         \r
318                         return buffer.ToString();\r
319                 }\r
320                 \r
321                 /// <summary>Returns true iff <code>o</code> is equal to this. </summary>\r
322                 public  override bool Equals(System.Object o)\r
323                 {\r
324                         if (!(o is PhraseQuery))\r
325                                 return false;\r
326                         PhraseQuery other = (PhraseQuery) o;\r
327                         return (this.GetBoost() == other.GetBoost()) && \r
328                 (this.slop == other.slop) && \r
329                 this.terms.Equals(other.terms) && \r
330                 this.positions.Equals(other.positions);\r
331                 }\r
332                 \r
333                 /// <summary>Returns a hash code value for this object.</summary>\r
334                 public override int GetHashCode()\r
335                 {\r
336             return System.BitConverter.ToInt32(System.BitConverter.GetBytes(GetBoost()), 0) ^ 
337                 System.BitConverter.ToInt32(System.BitConverter.GetBytes(slop), 0) ^ 
338                 terms.GetHashCode() ^ 
339                 positions.GetHashCode();
340                 }\r
341                 override public System.Object Clone()\r
342                 {\r
343                         return null;\r
344                 }\r
345         }\r
346 }