3 // Copyright (c) Microsoft Corporation. All rights reserved.
6 // =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
10 // <OWNER>[....]</OWNER>
12 // Support for sorting.
14 // =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
16 using System.Collections.Generic;
17 using System.Threading;
18 using System.Diagnostics.Contracts;
20 namespace System.Linq.Parallel
23 //---------------------------------------------------------------------------------------
24 // The sort helper abstraction hides the implementation of our parallel merge sort. See
25 // comments below for more details. In summary, there will be one sort helper per
26 // partition. Each will, in parallel read the whole key/value set from its input,
27 // perform a local sort on this data, and then cooperatively merge with other concurrent
28 // tasks to generate a single sorted output. The local sort step is done using a simple
29 // quick-sort algorithm. Then we use a log(p) reduction to perform merges in parallel;
30 // during each round of merges, half of the threads will stop doing work and may return.
31 // At the end, one thread will remain and it holds the final sorted output.
34 internal abstract class SortHelper<TInputOutput>
36 internal abstract TInputOutput[] Sort();
39 internal class SortHelper<TInputOutput, TKey> : SortHelper<TInputOutput>, IDisposable
42 private QueryOperatorEnumerator<TInputOutput, TKey> m_source; // The data source from which to pull data.
43 private int m_partitionCount; // The partition count.
44 private int m_partitionIndex; // This helper's index.
46 // This data is shared among all partitions.
47 private QueryTaskGroupState m_groupState; // To communicate status, e.g. cancellation.
48 private int[][] m_sharedIndices; // Shared set of indices used during sorting.
49 private GrowingArray<TKey>[] m_sharedKeys; // Shared keys with which to compare elements.
50 private TInputOutput[][] m_sharedValues; // The actual values used for comparisons.
51 private Barrier[,] m_sharedBarriers; // A matrix of barriers used for synchronizing during merges.
52 private OrdinalIndexState m_indexState; // State of the order index
53 private IComparer<TKey> m_keyComparer; // Comparer for the order keys
55 //---------------------------------------------------------------------------------------
56 // Creates a single sort helper object. This is marked private to ensure the only
57 // snippet of code that creates one is the factory, since creating many implies some
58 // implementation detail in terms of dependencies which other places in the codebase
59 // shouldn't need to worry about.
62 private SortHelper(QueryOperatorEnumerator<TInputOutput, TKey> source, int partitionCount, int partitionIndex,
63 QueryTaskGroupState groupState, int[][] sharedIndices,
64 OrdinalIndexState indexState, IComparer<TKey> keyComparer,
65 GrowingArray<TKey>[] sharedkeys, TInputOutput[][] sharedValues, Barrier[,] sharedBarriers)
67 Contract.Assert(source != null);
68 Contract.Assert(groupState != null);
69 Contract.Assert(sharedIndices != null);
70 Contract.Assert(sharedkeys != null);
71 Contract.Assert(sharedValues != null);
72 Contract.Assert(sharedBarriers != null);
73 Contract.Assert(groupState.CancellationState.MergedCancellationToken != null);
74 Contract.Assert(sharedIndices.Length <= sharedkeys.Length);
75 Contract.Assert(sharedIndices.Length == sharedValues.Length);
76 Contract.Assert(sharedIndices.Length == sharedBarriers.GetLength(1));
77 Contract.Assert(groupState.CancellationState.MergedCancellationToken != null);
80 m_partitionCount = partitionCount;
81 m_partitionIndex = partitionIndex;
82 m_groupState = groupState;
83 m_sharedIndices = sharedIndices;
84 m_indexState = indexState;
85 m_keyComparer = keyComparer;
86 m_sharedKeys = sharedkeys;
87 m_sharedValues = sharedValues;
88 m_sharedBarriers = sharedBarriers;
90 Contract.Assert(m_sharedKeys.Length >= m_sharedValues.Length);
93 //---------------------------------------------------------------------------------------
94 // Factory method to create a bunch of sort helpers that are all related. Once created,
95 // these helpers must all run concurrently with one another.
98 // partitions - the input data partitions to be sorted
99 // groupState - common state used for communication (e.g. cancellation)
102 // An array of helpers, one for each partition.
105 internal static SortHelper<TInputOutput, TKey>[] GenerateSortHelpers(
106 PartitionedStream<TInputOutput, TKey> partitions, QueryTaskGroupState groupState)
108 int degreeOfParallelism = partitions.PartitionCount;
109 SortHelper<TInputOutput, TKey>[] helpers = new SortHelper<TInputOutput, TKey>[degreeOfParallelism];
111 // Calculate the next highest power of two greater than or equal to the DOP.
112 // Also, calculate phaseCount = log2(degreeOfParallelismPow2)
113 int degreeOfParallelismPow2 = 1, phaseCount = 0;
114 while (degreeOfParallelismPow2 < degreeOfParallelism)
117 degreeOfParallelismPow2 <<= 1;
120 // Initialize shared objects used during sorting.
121 int[][] sharedIndices = new int[degreeOfParallelism][];
122 GrowingArray<TKey>[] sharedKeys = new GrowingArray<TKey>[degreeOfParallelism];
123 TInputOutput[][] sharedValues = new TInputOutput[degreeOfParallelism][];
124 Barrier[,] sharedBarriers = new Barrier[phaseCount, degreeOfParallelism];
126 if (degreeOfParallelism > 1)
128 // Initialize the barriers we need. Due to the logarithmic reduction, we don't
129 // need to populate the whole matrix.
131 for (int i = 0; i < sharedBarriers.GetLength(0); i++)
133 for (int j = 0; j < sharedBarriers.GetLength(1); j++)
135 // As the phases increase, the barriers required become more and more sparse.
136 if ((j % offset) == 0)
138 sharedBarriers[i, j] = new Barrier(2);
145 // Lastly populate the array of sort helpers.
146 for (int i = 0; i < degreeOfParallelism; i++)
148 helpers[i] = new SortHelper<TInputOutput, TKey>(
149 partitions[i], degreeOfParallelism, i,
150 groupState, sharedIndices,
151 partitions.OrdinalIndexState, partitions.KeyComparer,
152 sharedKeys, sharedValues, sharedBarriers);
158 //---------------------------------------------------------------------------------------
159 // Disposes of this sort helper's expensive state.
162 public void Dispose()
164 // We only dispose of the barriers when the 1st partition finishes. That's because
165 // all others depend on the shared barriers, so we can't get rid of them eagerly.
166 if (m_partitionIndex == 0)
168 for (int i = 0; i < m_sharedBarriers.GetLength(0); i++)
170 for (int j = 0; j < m_sharedBarriers.GetLength(1); j++)
172 Barrier b = m_sharedBarriers[i, j];
182 //---------------------------------------------------------------------------------------
183 // Sorts the data, possibly returning a result.
186 // This method makes some pretty fundamental assumptions about what concurrency
187 // exists in the system. Namely, it assumes all SortHelpers are running in
188 // parallel. If they aren't Sort will end up waiting for certain events that
189 // will never happen -- i.e. we will deadlock.
192 internal override TInputOutput[] Sort()
194 // Step 1. Accumulate this partitions' worth of input.
195 GrowingArray<TKey> sourceKeys = null;
196 List<TInputOutput> sourceValues = null;
198 BuildKeysFromSource(ref sourceKeys, ref sourceValues);
200 Contract.Assert(sourceValues != null, "values weren't populated");
201 Contract.Assert(sourceKeys != null, "keys weren't populated");
203 // Step 2. Locally sort this partition's key indices in-place.
204 QuickSortIndicesInPlace(sourceKeys, sourceValues, m_indexState);
206 // Step 3. Enter into the merging phases, each separated by several barriers.
207 if (m_partitionCount > 1)
209 // We only need to merge if there is more than 1 partition.
210 MergeSortCooperatively();
213 return m_sharedValues[m_partitionIndex];
216 //-----------------------------------------------------------------------------------
217 // Generates a list of values and keys from the data source. After calling this,
218 // the keys and values lists will be populated; each key at index i corresponds to
219 // the value at index i in the other list.
222 // Should only be called once per sort helper.
225 private void BuildKeysFromSource(ref GrowingArray<TKey> keys, ref List<TInputOutput> values)
227 values = new List<TInputOutput>();
229 // Enumerate the whole input set, generating a key set in the process.
230 CancellationToken cancelToken = m_groupState.CancellationState.MergedCancellationToken;
233 TInputOutput current = default(TInputOutput);
234 TKey currentKey = default(TKey);
235 bool hadNext = m_source.MoveNext(ref current, ref currentKey);
239 keys = new GrowingArray<TKey>();
247 if ((i++ & CancellationState.POLL_INTERVAL) == 0)
248 CancellationState.ThrowIfCanceled(cancelToken);
250 // Accumulate the keys and values so that we can sort them in a moment.
251 keys.Add(currentKey);
254 while (m_source.MoveNext(ref current, ref currentKey));
263 //-----------------------------------------------------------------------------------
264 // Produces a list of indices and sorts them in place using a local sort.
267 // Each element in the indices array is an index which refers to an element in
268 // the key/value array. After calling this routine, the indices will be ordered
269 // such that the keys they refere to are in ascending or descending order,
270 // according to the sort criteria used.
273 private void QuickSortIndicesInPlace(GrowingArray<TKey> keys, List<TInputOutput> values, OrdinalIndexState ordinalIndexState)
275 Contract.Assert(keys != null);
276 Contract.Assert(values != null);
277 Contract.Assert(keys.Count == values.Count);
279 // Generate a list of keys in forward order. We will sort them in a moment.
280 int[] indices = new int[values.Count];
281 for (int i = 0; i < indices.Length; i++)
286 // Now sort the indices in place.
287 if (indices.Length > 1
288 && ordinalIndexState.IsWorseThan(OrdinalIndexState.Increasing))
290 QuickSort(0, indices.Length - 1, keys.InternalArray, indices, m_groupState.CancellationState.MergedCancellationToken);
293 if (m_partitionCount == 1)
295 // If there is only one partition, we will produce the final value set now,
296 // since there will be no merge afterward (which is where we usually do this).
297 TInputOutput[] sortedValues = new TInputOutput[values.Count];
298 for (int i = 0; i < indices.Length; i++)
300 sortedValues[i] = values[indices[i]];
302 m_sharedValues[m_partitionIndex] = sortedValues;
306 // Otherwise, a merge will happen. Generate the shared data structures.
307 m_sharedIndices[m_partitionIndex] = indices;
308 m_sharedKeys[m_partitionIndex] = keys;
309 m_sharedValues[m_partitionIndex] = new TInputOutput[values.Count];
311 // Copy local structures to shared space.
312 values.CopyTo(m_sharedValues[m_partitionIndex]);
316 //-----------------------------------------------------------------------------------
317 // Works cooperatively with other concurrent sort helpers to produce a final sorted
318 // output list of data. Here is an overview of the algorithm used.
320 // During each phase, we must communicate with a partner task. As a simple
321 // illustration, imagine we have 8 partitions (P=8), numbered 0-7. There will be
322 // Log2(O)+2 phases (separated by barriers), where O is the next power of two greater
323 // than or equal to P, in the sort operation:
326 // phase=L: [0][1] [2][3] [4][5] [6][7]
327 // phase=0: [0,1] [2,3] [4,5] [6,7]
328 // phase=1: [0,2] [4,6]
332 // During phase L, each partition locally sorts its data. Then, at each subsequent
333 // phase in the logarithmic reduction, two partitions are paired together and cooperate
334 // to accomplish a portion of the merge. The left one then goes on to choose another
335 // partner, in the next phase, and the right one exits. And so on, until phase M, when
336 // there is just one partition left (the 0th), which is when it may publish the final
337 // output from the sort operation.
339 // Notice we mentioned rounding up to the next power of two when determining the number
340 // of phases. Values of P which aren't powers of 2 are slightly problematic, because
341 // they create a load imbalance in one of the partitions and heighten the depth of the
342 // logarithmic tree. As an illustration, imagine this case:
345 // phase=L: [0][1] [2][3] [4]
346 // phase=0: [0,1] [2,3] [4,X] [X,X]
347 // phase=1: [0,2] [4,X]
351 // Partition #4 in this example performs its local sort during phase L, but then has nothing
352 // to do during phases 0 and 2. (I.e. it has nobody to merge with.) Only during phase 2
353 // does it then resume work and help phase 2 perform its merge. This is modeled a bit like
354 // there were actually 8 partitions, which is the next power of two greater than or equal to
355 // 5. This example was chosen as an extreme case of imbalance. We stall a processor (the 5th)
356 // for two complete phases. If P = 6 or 7, the problem would not be nearly so bad, but if
357 // P = 9, the last partition would stall for yet another phase (and so on for every power of
358 // two boundary). We handle these, cases, but note that an overabundance of them will probably
359 // negatively impact speedups.
362 private void MergeSortCooperatively()
364 CancellationToken cancelToken = m_groupState.CancellationState.MergedCancellationToken;
366 int phaseCount = m_sharedBarriers.GetLength(0);
367 for (int phase = 0; phase < phaseCount; phase++)
369 bool isLastPhase = (phase == (phaseCount - 1));
371 // Calculate our partner for this phase and the next.
372 int partnerIndex = ComputePartnerIndex(phase);
374 // If we have a partner (see above for non power of 2 cases and why the index returned might
375 // be out of bounds), we will coordinate with the partner to produce the merged output.
376 if (partnerIndex < m_partitionCount)
378 // Cache references to our local data.
379 int[] myIndices = m_sharedIndices[m_partitionIndex];
380 GrowingArray<TKey> myKeys = m_sharedKeys[m_partitionIndex];
381 TKey[] myKeysArr = myKeys.InternalArray;
383 TInputOutput[] myValues = m_sharedValues[m_partitionIndex];
386 // First we must rendezvous with our merge partner so we know the previous sort
387 // and merge phase has been completed. By convention, we always use the left-most
388 // partner's barrier for this; all that matters is that both uses the same.
389 m_sharedBarriers[phase, Math.Min(m_partitionIndex, partnerIndex)].SignalAndWait(cancelToken);
391 // Grab the two sorted inputs and then merge them cooperatively into one list. One
392 // worker merges from left-to-right until it's placed elements up to the half-way
393 // point, and the other worker does the same, but only from right-to-left.
394 if (m_partitionIndex < partnerIndex)
396 // Before moving on to the actual merge, the left-most partition will allocate data
397 // to hold the merged indices and key/value pairs.
399 // First, remember a copy of all of the partner's lists.
400 int[] rightIndices = m_sharedIndices[partnerIndex];
401 TKey[] rightKeys = m_sharedKeys[partnerIndex].InternalArray;
402 TInputOutput[] rightValues = m_sharedValues[partnerIndex];
404 // We copy the our own items into the right's (overwriting its values) so that it can
405 // retrieve them after the barrier. This is an exchange operation.
406 m_sharedIndices[partnerIndex] = myIndices;
407 m_sharedKeys[partnerIndex] = myKeys;
408 m_sharedValues[partnerIndex] = myValues;
410 int leftCount = myValues.Length;
411 int rightCount = rightValues.Length;
412 int totalCount = leftCount + rightCount;
414 // Now allocate the lists into which the merged data will go. Share this
415 // with the other thread so that it can place data into it as well.
416 int[] mergedIndices = null;
417 TInputOutput[] mergedValues = new TInputOutput[totalCount];
419 // Only on the last phase do we need to remember indices and keys.
422 mergedIndices = new int[totalCount];
425 // Publish our newly allocated merged data structures.
426 m_sharedIndices[m_partitionIndex] = mergedIndices;
427 m_sharedKeys[m_partitionIndex] = myKeys;
428 m_sharedValues[m_partitionIndex] = mergedValues;
430 Contract.Assert(myKeysArr != null);
432 m_sharedBarriers[phase, m_partitionIndex].SignalAndWait(cancelToken);
434 // Merge the left half into the shared merged space. This is a normal merge sort with
435 // the caveat that we stop merging once we reach the half-way point (since our partner
436 // is doing the same for the right half). Note that during the last phase we only
437 // copy the values and not the indices or keys.
438 int m = (totalCount + 1)/2;
439 int i = 0, j0 = 0, j1 = 0;
442 if ((i & CancellationState.POLL_INTERVAL) == 0)
443 CancellationState.ThrowIfCanceled(cancelToken);
445 if (j0 < leftCount && (j1 >= rightCount ||
446 m_keyComparer.Compare(myKeysArr[myIndices[j0]],
447 rightKeys[rightIndices[j1]]) <= 0))
451 mergedValues[i] = myValues[myIndices[j0]];
455 mergedIndices[i] = myIndices[j0];
463 mergedValues[i] = rightValues[rightIndices[j1]];
467 mergedIndices[i] = leftCount + rightIndices[j1];
474 // If it's not the last phase, we just bulk propagate the keys and values.
475 if (!isLastPhase && leftCount > 0)
477 Array.Copy(myValues, 0, mergedValues, 0, leftCount);
480 // And now just wait for the second half. We never reuse the same barrier across multiple
481 // phases, so we can always dispose of it when we wake up.
482 m_sharedBarriers[phase, m_partitionIndex].SignalAndWait(cancelToken);
486 // Wait for the other partition to allocate the shared data.
487 m_sharedBarriers[phase, partnerIndex].SignalAndWait(cancelToken);
489 // After the barrier, the other partition will have made two things available to us:
490 // (1) its own indices, keys, and values, stored in the cell that used to hold our data,
491 // and (2) the arrays into which merged data will go, stored in its shared array cells.
492 // We will snag references to all of these things.
493 int[] leftIndices = m_sharedIndices[m_partitionIndex];
494 TKey[] leftKeys = m_sharedKeys[m_partitionIndex].InternalArray;
495 TInputOutput[] leftValues = m_sharedValues[m_partitionIndex];
496 int[] mergedIndices = m_sharedIndices[partnerIndex];
497 GrowingArray<TKey> mergedKeys = m_sharedKeys[partnerIndex];
498 TInputOutput[] mergedValues = m_sharedValues[partnerIndex];
500 Contract.Assert(leftValues != null);
501 Contract.Assert(leftKeys != null);
503 int leftCount = leftValues.Length;
504 int rightCount = myValues.Length;
505 int totalCount = leftCount + rightCount;
507 // Merge the right half into the shared merged space. This is a normal merge sort with
508 // the caveat that we stop merging once we reach the half-way point (since our partner
509 // is doing the same for the left half). Note that during the last phase we only
510 // copy the values and not the indices or keys.
511 int m = (totalCount + 1)/2;
512 int i = totalCount - 1, j0 = leftCount - 1, j1 = rightCount - 1;
515 if ((i & CancellationState.POLL_INTERVAL) == 0)
516 CancellationState.ThrowIfCanceled(cancelToken);
518 if (j0 >= 0 && (j1 < 0 ||
519 m_keyComparer.Compare(leftKeys[leftIndices[j0]],
520 myKeysArr[myIndices[j1]]) > 0))
524 mergedValues[i] = leftValues[leftIndices[j0]];
528 mergedIndices[i] = leftIndices[j0];
536 mergedValues[i] = myValues[myIndices[j1]];
540 mergedIndices[i] = leftCount + myIndices[j1];
547 // If it's not the last phase, we just bulk propagate the keys and values.
548 if (!isLastPhase && myValues.Length > 0)
550 mergedKeys.CopyFrom(myKeysArr, myValues.Length);
551 Array.Copy(myValues, 0, mergedValues, leftCount, myValues.Length);
554 // Wait for our partner to finish copying too.
555 m_sharedBarriers[phase, partnerIndex].SignalAndWait(cancelToken);
557 // Now the greater of the two partners can leave, it's done.
564 //---------------------------------------------------------------------------------------
565 // Computes our partner index given the logarithmic reduction algorithm specified above.
568 private int ComputePartnerIndex(int phase)
570 int offset = 1 << phase;
571 return m_partitionIndex + ((m_partitionIndex % (offset * 2)) == 0 ? offset : -offset);
574 //---------------------------------------------------------------------------------------
575 // Sort algorithm used to sort key/value lists. After this has been called, the indices
576 // will have been placed in sorted order based on the keys provided.
579 private void QuickSort(int left, int right, TKey[] keys, int[] indices, CancellationToken cancelToken)
581 Contract.Assert(keys != null, "need a non-null keyset");
582 Contract.Assert(keys.Length >= indices.Length);
583 Contract.Assert(left <= right);
584 Contract.Assert(0 <= left && left < keys.Length);
585 Contract.Assert(0 <= right && right < keys.Length);
587 // cancellation check.
588 // only test for intervals that are wider than so many items, else this test is
589 // relatively expensive compared to the work being performend.
590 if (right - left > CancellationState.POLL_INTERVAL)
591 CancellationState.ThrowIfCanceled(cancelToken);
597 int pivot = indices[i + ((j - i) >> 1)];
598 TKey pivotKey = keys[pivot];
602 while (m_keyComparer.Compare(keys[indices[i]], pivotKey) < 0) i++;
603 while (m_keyComparer.Compare(keys[indices[j]], pivotKey) > 0) j--;
605 Contract.Assert(i >= left && j <= right, "(i>=left && j<=right) sort failed - bogus IComparer?");
615 int tmp = indices[i];
616 indices[i] = indices[j];
625 if (j - left <= right - i)
629 QuickSort(left, j, keys, indices, cancelToken);
637 QuickSort(i, right, keys, indices, cancelToken);
642 while (left < right);