Merge pull request #409 from Alkarex/patch-1
[mono.git] / mcs / tools / monkeydoc / Lucene.Net / Lucene.Net / Search / MultiPhraseQuery.cs
1 /* 
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  * 
9  * http://www.apache.org/licenses/LICENSE-2.0
10  * 
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 using System;
19
20 using IndexReader = Mono.Lucene.Net.Index.IndexReader;
21 using MultipleTermPositions = Mono.Lucene.Net.Index.MultipleTermPositions;
22 using Term = Mono.Lucene.Net.Index.Term;
23 using TermPositions = Mono.Lucene.Net.Index.TermPositions;
24 using ToStringUtils = Mono.Lucene.Net.Util.ToStringUtils;
25
26 namespace Mono.Lucene.Net.Search
27 {
28         
29         /// <summary> MultiPhraseQuery is a generalized version of PhraseQuery, with an added
30         /// method {@link #Add(Term[])}.
31         /// To use this class, to search for the phrase "Microsoft app*" first use
32         /// add(Term) on the term "Microsoft", then find all terms that have "app" as
33         /// prefix using IndexReader.terms(Term), and use MultiPhraseQuery.add(Term[]
34         /// terms) to add them to the query.
35         /// 
36         /// </summary>
37         /// <version>  1.0
38         /// </version>
39         [Serializable]
40         public class MultiPhraseQuery:Query
41         {
42                 private System.String field;
43                 private System.Collections.ArrayList termArrays = new System.Collections.ArrayList();
44                 private System.Collections.ArrayList positions = new System.Collections.ArrayList();
45                 
46                 private int slop = 0;
47                 
48                 /// <summary>Sets the phrase slop for this query.</summary>
49                 /// <seealso cref="PhraseQuery.SetSlop(int)">
50                 /// </seealso>
51                 public virtual void  SetSlop(int s)
52                 {
53                         slop = s;
54                 }
55                 
56                 /// <summary>Sets the phrase slop for this query.</summary>
57                 /// <seealso cref="PhraseQuery.GetSlop()">
58                 /// </seealso>
59                 public virtual int GetSlop()
60                 {
61                         return slop;
62                 }
63                 
64                 /// <summary>Add a single term at the next position in the phrase.</summary>
65                 /// <seealso cref="PhraseQuery.add(Term)">
66                 /// </seealso>
67                 public virtual void  Add(Term term)
68                 {
69                         Add(new Term[]{term});
70                 }
71                 
72                 /// <summary>Add multiple terms at the next position in the phrase.  Any of the terms
73                 /// may match.
74                 /// 
75                 /// </summary>
76                 /// <seealso cref="PhraseQuery.add(Term)">
77                 /// </seealso>
78                 public virtual void  Add(Term[] terms)
79                 {
80                         int position = 0;
81                         if (positions.Count > 0)
82                                 position = ((System.Int32) positions[positions.Count - 1]) + 1;
83                         
84                         Add(terms, position);
85                 }
86                 
87                 /// <summary> Allows to specify the relative position of terms within the phrase.
88                 /// 
89                 /// </summary>
90                 /// <seealso cref="PhraseQuery.Add(Term, int)">
91                 /// </seealso>
92                 /// <param name="terms">
93                 /// </param>
94                 /// <param name="position">
95                 /// </param>
96                 public virtual void  Add(Term[] terms, int position)
97                 {
98                         if (termArrays.Count == 0)
99                                 field = terms[0].Field();
100                         
101                         for (int i = 0; i < terms.Length; i++)
102                         {
103                                 if ((System.Object) terms[i].Field() != (System.Object) field)
104                                 {
105                                         throw new System.ArgumentException("All phrase terms must be in the same field (" + field + "): " + terms[i]);
106                                 }
107                         }
108                         
109                         termArrays.Add(terms);
110                         positions.Add((System.Int32) position);
111                 }
112
113         /// <summary> Returns a List&lt;Term[]&gt; of the terms in the multiphrase.
114                 /// Do not modify the List or its contents.
115                 /// </summary>
116                 public virtual System.Collections.IList GetTermArrays()
117                 {
118                         return (System.Collections.IList) System.Collections.ArrayList.ReadOnly(new System.Collections.ArrayList(termArrays));
119                 }
120                 
121                 /// <summary> Returns the relative positions of terms in this phrase.</summary>
122                 public virtual int[] GetPositions()
123                 {
124                         int[] result = new int[positions.Count];
125                         for (int i = 0; i < positions.Count; i++)
126                                 result[i] = ((System.Int32) positions[i]);
127                         return result;
128                 }
129                 
130                 // inherit javadoc
131                 public override void  ExtractTerms(System.Collections.Hashtable terms)
132                 {
133                         for (System.Collections.IEnumerator iter = termArrays.GetEnumerator(); iter.MoveNext(); )
134                         {
135                                 Term[] arr = (Term[]) iter.Current;
136                                 for (int i = 0; i < arr.Length; i++)
137                                 {
138                                         SupportClass.CollectionsHelper.AddIfNotContains(terms, arr[i]);
139                                 }
140                         }
141                 }
142                 
143                 
144                 [Serializable]
145                 private class MultiPhraseWeight:Weight
146                 {
147                         private void  InitBlock(MultiPhraseQuery enclosingInstance)
148                         {
149                                 this.enclosingInstance = enclosingInstance;
150                         }
151                         private MultiPhraseQuery enclosingInstance;
152                         public MultiPhraseQuery Enclosing_Instance
153                         {
154                                 get
155                                 {
156                                         return enclosingInstance;
157                                 }
158                                 
159                         }
160                         private Similarity similarity;
161                         private float value_Renamed;
162                         private float idf;
163                         private float queryNorm;
164                         private float queryWeight;
165                         
166                         public MultiPhraseWeight(MultiPhraseQuery enclosingInstance, Searcher searcher)
167                         {
168                                 InitBlock(enclosingInstance);
169                                 this.similarity = Enclosing_Instance.GetSimilarity(searcher);
170                                 
171                                 // compute idf
172                                 System.Collections.IEnumerator i = Enclosing_Instance.termArrays.GetEnumerator();
173                                 while (i.MoveNext())
174                                 {
175                                         Term[] terms = (Term[]) i.Current;
176                                         for (int j = 0; j < terms.Length; j++)
177                                         {
178                                                 idf += Enclosing_Instance.GetSimilarity(searcher).Idf(terms[j], searcher);
179                                         }
180                                 }
181                         }
182                         
183                         public override Query GetQuery()
184                         {
185                                 return Enclosing_Instance;
186                         }
187                         public override float GetValue()
188                         {
189                                 return value_Renamed;
190                         }
191                         
192                         public override float SumOfSquaredWeights()
193                         {
194                                 queryWeight = idf * Enclosing_Instance.GetBoost(); // compute query weight
195                                 return queryWeight * queryWeight; // square it
196                         }
197                         
198                         public override void  Normalize(float queryNorm)
199                         {
200                                 this.queryNorm = queryNorm;
201                                 queryWeight *= queryNorm; // normalize query weight
202                                 value_Renamed = queryWeight * idf; // idf for document 
203                         }
204                         
205                         public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer)
206                         {
207                                 if (Enclosing_Instance.termArrays.Count == 0)
208                                 // optimize zero-term case
209                                         return null;
210                                 
211                                 TermPositions[] tps = new TermPositions[Enclosing_Instance.termArrays.Count];
212                                 for (int i = 0; i < tps.Length; i++)
213                                 {
214                                         Term[] terms = (Term[]) Enclosing_Instance.termArrays[i];
215                                         
216                                         TermPositions p;
217                                         if (terms.Length > 1)
218                                                 p = new MultipleTermPositions(reader, terms);
219                                         else
220                                                 p = reader.TermPositions(terms[0]);
221                                         
222                                         if (p == null)
223                                                 return null;
224                                         
225                                         tps[i] = p;
226                                 }
227                                 
228                                 if (Enclosing_Instance.slop == 0)
229                                         return new ExactPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, reader.Norms(Enclosing_Instance.field));
230                                 else
231                                         return new SloppyPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, Enclosing_Instance.slop, reader.Norms(Enclosing_Instance.field));
232                         }
233                         
234                         public override Explanation Explain(IndexReader reader, int doc)
235                         {
236                                 ComplexExplanation result = new ComplexExplanation();
237                                 result.SetDescription("weight(" + GetQuery() + " in " + doc + "), product of:");
238                                 
239                                 Explanation idfExpl = new Explanation(idf, "idf(" + GetQuery() + ")");
240                                 
241                                 // explain query weight
242                                 Explanation queryExpl = new Explanation();
243                                 queryExpl.SetDescription("queryWeight(" + GetQuery() + "), product of:");
244                                 
245                                 Explanation boostExpl = new Explanation(Enclosing_Instance.GetBoost(), "boost");
246                                 if (Enclosing_Instance.GetBoost() != 1.0f)
247                                         queryExpl.AddDetail(boostExpl);
248                                 
249                                 queryExpl.AddDetail(idfExpl);
250                                 
251                                 Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");
252                                 queryExpl.AddDetail(queryNormExpl);
253                                 
254                                 queryExpl.SetValue(boostExpl.GetValue() * idfExpl.GetValue() * queryNormExpl.GetValue());
255                                 
256                                 result.AddDetail(queryExpl);
257                                 
258                                 // explain field weight
259                                 ComplexExplanation fieldExpl = new ComplexExplanation();
260                                 fieldExpl.SetDescription("fieldWeight(" + GetQuery() + " in " + doc + "), product of:");
261                                 
262                                 Scorer scorer = Scorer(reader, true, false);
263                                 if (scorer == null)
264                                 {
265                                         return new Explanation(0.0f, "no matching docs");
266                                 }
267                                 Explanation tfExpl = scorer.Explain(doc);
268                                 fieldExpl.AddDetail(tfExpl);
269                                 fieldExpl.AddDetail(idfExpl);
270                                 
271                                 Explanation fieldNormExpl = new Explanation();
272                                 byte[] fieldNorms = reader.Norms(Enclosing_Instance.field);
273                                 float fieldNorm = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]):1.0f;
274                                 fieldNormExpl.SetValue(fieldNorm);
275                                 fieldNormExpl.SetDescription("fieldNorm(field=" + Enclosing_Instance.field + ", doc=" + doc + ")");
276                                 fieldExpl.AddDetail(fieldNormExpl);
277                                 
278                                 fieldExpl.SetMatch(tfExpl.IsMatch());
279                                 fieldExpl.SetValue(tfExpl.GetValue() * idfExpl.GetValue() * fieldNormExpl.GetValue());
280                                 
281                                 result.AddDetail(fieldExpl);
282                                 System.Boolean? tempAux = fieldExpl.GetMatch();
283                                 result.SetMatch(tempAux);
284                                 
285                                 // combine them
286                                 result.SetValue(queryExpl.GetValue() * fieldExpl.GetValue());
287                                 
288                                 if (queryExpl.GetValue() == 1.0f)
289                                         return fieldExpl;
290                                 
291                                 return result;
292                         }
293                 }
294                 
295                 public override Query Rewrite(IndexReader reader)
296                 {
297                         if (termArrays.Count == 1)
298                         {
299                                 // optimize one-term case
300                                 Term[] terms = (Term[]) termArrays[0];
301                                 BooleanQuery boq = new BooleanQuery(true);
302                                 for (int i = 0; i < terms.Length; i++)
303                                 {
304                                         boq.Add(new TermQuery(terms[i]), BooleanClause.Occur.SHOULD);
305                                 }
306                                 boq.SetBoost(GetBoost());
307                                 return boq;
308                         }
309                         else
310                         {
311                                 return this;
312                         }
313                 }
314                 
315                 public override Weight CreateWeight(Searcher searcher)
316                 {
317                         return new MultiPhraseWeight(this, searcher);
318                 }
319                 
320                 /// <summary>Prints a user-readable version of this query. </summary>
321                 public override System.String ToString(System.String f)
322                 {
323                         System.Text.StringBuilder buffer = new System.Text.StringBuilder();
324                         if (!field.Equals(f))
325                         {
326                                 buffer.Append(field);
327                                 buffer.Append(":");
328                         }
329                         
330                         buffer.Append("\"");
331                         System.Collections.IEnumerator i = termArrays.GetEnumerator();
332             bool first = true;
333                         while (i.MoveNext())
334                         {
335                 if (first)
336                 {
337                     first = false;
338                 }
339                 else
340                 {
341                     buffer.Append(" ");
342                 }
343
344                                 Term[] terms = (Term[]) i.Current;
345                                 if (terms.Length > 1)
346                                 {
347                                         buffer.Append("(");
348                                         for (int j = 0; j < terms.Length; j++)
349                                         {
350                                                 buffer.Append(terms[j].Text());
351                                                 if (j < terms.Length - 1)
352                                                         buffer.Append(" ");
353                                         }
354                                         buffer.Append(")");
355                                 }
356                                 else
357                                 {
358                                         buffer.Append(terms[0].Text());
359                                 }
360                         }
361                         buffer.Append("\"");
362                         
363                         if (slop != 0)
364                         {
365                                 buffer.Append("~");
366                                 buffer.Append(slop);
367                         }
368                         
369                         buffer.Append(ToStringUtils.Boost(GetBoost()));
370                         
371                         return buffer.ToString();
372                 }
373                 
374                 
375                 /// <summary>Returns true if <code>o</code> is equal to this. </summary>
376                 public  override bool Equals(System.Object o)
377                 {
378                         if (!(o is MultiPhraseQuery))
379                                 return false;
380                         MultiPhraseQuery other = (MultiPhraseQuery) o;
381             bool eq = this.GetBoost() == other.GetBoost() && this.slop == other.slop;
382             if(!eq)
383             {
384                 return false;
385             }
386             eq = this.termArrays.Count.Equals(other.termArrays.Count);
387             if (!eq)
388             {
389                 return false;
390             }
391
392             for (int i = 0; i < this.termArrays.Count; i++)
393             {
394                 if (!SupportClass.Compare.CompareTermArrays((Term[])this.termArrays[i], (Term[])other.termArrays[i]))
395                 {
396                     return false;
397                 }
398             }
399             if(!eq)
400             {
401                 return false;
402             }
403             eq = this.positions.Count.Equals(other.positions.Count);
404             if (!eq)
405             {
406                 return false;
407             }
408             for (int i = 0; i < this.positions.Count; i++)
409             {
410                 if (!((int)this.positions[i] == (int)other.positions[i]))
411                 {
412                     return false;
413                 }
414             }
415             return true;
416         }
417                 
418                 /// <summary>Returns a hash code value for this object.</summary>
419                 public override int GetHashCode()
420                 {
421             int posHash = 0;
422             foreach(int pos in positions)
423             {
424                 posHash += pos.GetHashCode();
425             }
426                         return BitConverter.ToInt32(BitConverter.GetBytes(GetBoost()), 0) ^ slop ^ TermArraysHashCode() ^ posHash ^ 0x4AC65113;
427                 }
428                 
429                 // Breakout calculation of the termArrays hashcode
430                 private int TermArraysHashCode()
431                 {
432                         int hashCode = 1;
433                         System.Collections.IEnumerator iterator = termArrays.GetEnumerator();
434                         while (iterator.MoveNext())
435                         {
436                                 Term[] termArray = (Term[]) iterator.Current;
437                                 hashCode = 31 * hashCode + (termArray == null?0:ArraysHashCode(termArray));
438                         }
439                         return hashCode;
440                 }
441                 
442                 private int ArraysHashCode(Term[] termArray)
443                 {
444                         if (termArray == null)
445                                 return 0;
446                         
447                         int result = 1;
448                         
449                         for (int i = 0; i < termArray.Length; i++)
450                         {
451                                 Term term = termArray[i];
452                                 result = 31 * result + (term == null?0:term.GetHashCode());
453                         }
454                         
455                         return result;
456                 }
457                 
458                 // Breakout calculation of the termArrays equals
459                 private bool TermArraysEquals(System.Collections.IList termArrays1, System.Collections.IList termArrays2)
460                 {
461                         if (termArrays1.Count != termArrays2.Count)
462                         {
463                                 return false;
464                         }
465                         System.Collections.IEnumerator iterator1 = termArrays1.GetEnumerator();
466                         System.Collections.IEnumerator iterator2 = termArrays2.GetEnumerator();
467                         while (iterator1.MoveNext())
468                         {
469                                 Term[] termArray1 = (Term[]) iterator1.Current;
470                                 Term[] termArray2 = (Term[]) iterator2.Current;
471                                 if (!(termArray1 == null ? termArray2 == null : TermEquals(termArray1, termArray2)))
472                                 {
473                                         return false;
474                                 }
475                         }
476                         return true;
477                 }
478
479         public static bool TermEquals(System.Array array1, System.Array array2)
480         {
481             bool result = false;
482             if ((array1 == null) && (array2 == null))
483                 result = true;
484             else if ((array1 != null) && (array2 != null))
485             {
486                 if (array1.Length == array2.Length)
487                 {
488                     int length = array1.Length;
489                     result = true;
490                     for (int index = 0; index < length; index++)
491                     {
492                         if (!(array1.GetValue(index).Equals(array2.GetValue(index))))
493                         {
494                             result = false;
495                             break;
496                         }
497                     }
498                 }
499             }
500             return result;
501         }
502         }
503 }