1 //---------------------------------------------------------------------
2 // <copyright file="ProjectionPruner.cs" company="Microsoft">
3 // Copyright (c) Microsoft Corporation. All rights reserved.
7 // @backupOwner Microsoft
8 //---------------------------------------------------------------------
11 using System.Collections.Generic;
12 //using System.Diagnostics; // Please use PlanCompiler.Assert instead of Debug.Assert in this class...
14 // It is fine to use Debug.Assert in cases where you assert an obvious thing that is supposed
15 // to prevent from simple mistakes during development (e.g. method argument validation
16 // in cases where it was you who created the variables or the variables had already been validated or
17 // in "else" clauses where due to code changes (e.g. adding a new value to an enum type) the default
18 // "else" block is chosen why the new condition should be treated separately). This kind of asserts are
19 // (can be) helpful when developing new code to avoid simple mistakes but have no or little value in
20 // the shipped product.
21 // PlanCompiler.Assert *MUST* be used to verify conditions in the trees. These would be assumptions
22 // about how the tree was built etc. - in these cases we probably want to throw an exception (this is
23 // what PlanCompiler.Assert does when the condition is not met) if either the assumption is not correct
24 // or the tree was built/rewritten not the way we thought it was.
25 // Use your judgment - if you rather remove an assert than ship it use Debug.Assert otherwise use
26 // PlanCompiler.Assert.
28 using System.Globalization;
32 using md = System.Data.Metadata.Edm;
33 using cqt = System.Data.Common.CommandTrees;
34 using System.Data.Query.InternalTrees;
36 namespace System.Data.Query.PlanCompiler
40 /// The ProjectionPruner module is responsible for eliminating unnecessary column
41 /// references (and other expressions) from the query.
43 /// Projection pruning logically operates in two passes - the first pass is a top-down
44 /// pass where information about all referenced columns and expressions is collected
45 /// (pushed down from a node to its children).
47 /// The second phase is a bottom-up phase, where each node (in response to the
48 /// information collected above) attempts to rid itself of unwanted columns and
51 /// The two phases can be combined into a single tree walk, where for each node, the
52 /// processing is on the lines of:
54 /// - compute and push information to children (top-down)
55 /// - process children
56 /// - eliminate unnecessary references from myself (bottom-up)
59 internal class ProjectionPruner : BasicOpVisitorOfNode
61 #region Nested Classes
63 /// This class tracks down the vars that are referenced in the column map
65 private class ColumnMapVarTracker : ColumnMapVisitor<VarVec>
67 #region public methods
69 /// Find all vars that were referenced in the column map. Looks for VarRefColumnMap
70 /// in the ColumnMap tree, and tracks those vars
72 /// NOTE: The "vec" parameter must be supplied by the caller. The caller is responsible for
73 /// clearing out this parameter (if necessary) before calling into this function
75 /// <param name="columnMap">the column map to traverse</param>
76 /// <param name="vec">the set of referenced columns</param>
77 internal static void FindVars(ColumnMap columnMap, VarVec vec)
79 ColumnMapVarTracker tracker = new ColumnMapVarTracker();
80 columnMap.Accept<VarVec>(tracker, vec);
87 /// Trivial constructor
89 private ColumnMapVarTracker() : base() { }
94 /// Handler for VarRefColumnMap. Simply adds the "var" to the set of referenced vars
96 /// <param name="columnMap">the current varRefColumnMap</param>
97 /// <param name="arg">the set of referenced vars so far</param>
98 internal override void Visit(VarRefColumnMap columnMap, VarVec arg)
100 arg.Set(columnMap.Var);
101 base.Visit(columnMap, arg);
107 #region private state
109 private PlanCompiler m_compilerState;
110 private Command m_command { get { return m_compilerState.Command; } }
111 private VarVec m_referencedVars; // the list of referenced vars in the query
118 /// Trivial private constructor
120 /// <param name="compilerState">current compiler state</param>
121 private ProjectionPruner(PlanCompiler compilerState)
123 m_compilerState = compilerState;
124 m_referencedVars = compilerState.Command.CreateVarVec();
129 #region Process Driver
132 /// Runs through the root node of the tree, and eliminates all
133 /// unreferenced expressions
135 /// <param name="compilerState">current compiler state</param>
136 internal static void Process(PlanCompiler compilerState)
138 compilerState.Command.Root = Process(compilerState, compilerState.Command.Root);
142 /// Runs through the given subtree, and eliminates all
143 /// unreferenced expressions
145 /// <param name="compilerState">current compiler state</param>
146 /// <param name="node">The node to be processed</param>
147 /// <returns>The processed, i.e. transformed node</returns>
148 internal static Node Process(PlanCompiler compilerState, Node node)
150 ProjectionPruner pruner = new ProjectionPruner(compilerState);
151 return pruner.Process(node);
155 /// The real driver of the pruning process. Simply invokes the visitor over the input node
157 /// <param name="node">The node to be processed</param>
158 /// <returns>The processed node</returns>
159 private Node Process(Node node)
161 return VisitNode(node);
169 /// Adds a reference to this Var
171 /// <param name="v"></param>
172 private void AddReference(Var v)
174 m_referencedVars.Set(v);
178 /// Adds a reference to each var in a set of Vars
180 /// <param name="varSet"></param>
181 private void AddReference(IEnumerable<Var> varSet)
183 foreach (Var v in varSet)
190 /// Is this Var referenced?
192 /// <param name="v"></param>
193 /// <returns></returns>
194 private bool IsReferenced(Var v)
196 return m_referencedVars.IsSet(v);
199 /// Is this var unreferenced? Duh
201 /// <param name="v"></param>
202 /// <returns></returns>
203 private bool IsUnreferenced(Var v)
205 return !IsReferenced(v);
209 /// Prunes a VarMap - gets rid of unreferenced vars from the VarMap inplace
210 /// Additionally, propagates var references to the inner vars
212 /// <param name="varMap"></param>
213 private void PruneVarMap(VarMap varMap)
215 List<Var> unreferencedVars = new List<Var>();
216 // build up a list of unreferenced vars
217 foreach (Var v in varMap.Keys)
219 if (!IsReferenced(v))
221 unreferencedVars.Add(v);
225 AddReference(varMap[v]);
228 // remove each of the corresponding entries from the varmap
229 foreach (Var v in unreferencedVars)
236 /// Prunes a varset - gets rid of unreferenced vars from the Varset in place
238 /// <param name="varSet">the varset to prune</param>
239 private void PruneVarSet(VarVec varSet)
241 varSet.And(m_referencedVars);
246 #region Visitor Helpers
249 /// Visits the children and recomputes the node info
251 /// <param name="n">The current node</param>
252 protected override void VisitChildren(Node n)
254 base.VisitChildren(n);
255 m_command.RecomputeNodeInfo(n);
259 /// Visits the children in reverse order and recomputes the node info
261 /// <param name="n">The current node</param>
262 protected override void VisitChildrenReverse(Node n)
264 base.VisitChildrenReverse(n);
265 m_command.RecomputeNodeInfo(n);
269 #region Visitor methods
271 #region AncillaryOp Visitors
276 /// Walks the children (VarDefOp), and looks for those whose Vars
277 /// have been referenced. Only those VarDefOps are visited - the
278 /// others are ignored.
280 /// At the end, a new list of children is created - with only those
281 /// VarDefOps that have been referenced
283 /// <param name="op">the varDefListOp</param>
284 /// <param name="n">corresponding node</param>
285 /// <returns>modified node</returns>
286 public override Node Visit(VarDefListOp op, Node n)
289 // NOTE: It would be nice to optimize this to only create a new node
290 // and new list, if we needed to eliminate some arguments, but
291 // I'm not sure that the effort to eliminate the allocations
292 // wouldn't be more expensive than the allocations themselves.
293 // It's something that we can consider if it shows up on the
296 // Get rid of all the children that we don't care about (ie)
297 // those VarDefOp's that haven't been referenced
298 List<Node> newChildren = new List<Node>();
299 foreach (Node chi in n.Children)
301 VarDefOp varDefOp = chi.Op as VarDefOp;
302 if (IsReferenced(varDefOp.Var))
304 newChildren.Add(VisitNode(chi));
307 return m_command.CreateNode(op, newChildren);
315 /// PhysicalProjectOp
317 /// Insist that all Vars in this are required
319 /// <param name="op"></param>
320 /// <param name="n"></param>
321 /// <returns></returns>
322 public override Node Visit(PhysicalProjectOp op, Node n)
324 if (n == m_command.Root)
327 // Walk the column map to find all the referenced vars
329 ColumnMapVarTracker.FindVars(op.ColumnMap, m_referencedVars);
330 op.Outputs.RemoveAll(IsUnreferenced);
334 AddReference(op.Outputs);
336 // then visit the children
345 /// Common handling for all NestOps.
347 /// <param name="op"></param>
348 /// <param name="n"></param>
349 /// <returns></returns>
350 protected override Node VisitNestOp(NestBaseOp op, Node n)
352 // Mark all vars as needed
353 AddReference(op.Outputs);
355 // visit children. Need to do some more actually - to indicate that all
356 // vars from the children are really required.
362 /// SingleStreamNestOp
364 /// Insist (for now) that all Vars are required
366 /// <param name="op"></param>
367 /// <param name="n"></param>
368 /// <returns></returns>
369 public override Node Visit(SingleStreamNestOp op, Node n)
371 AddReference(op.Discriminator);
372 return VisitNestOp(op, n);
376 /// MultiStreamNestOp
378 /// Insist (for now) that all Vars are required
380 /// <param name="op"></param>
381 /// <param name="n"></param>
382 /// <returns></returns>
383 public override Node Visit(MultiStreamNestOp op, Node n)
385 return VisitNestOp(op, n);
390 #region RelOp Visitors
395 /// Common handling for all ApplyOps. Visit the right child first to capture
396 /// any references to the left, and then visit the left child.
398 /// <param name="op"></param>
399 /// <param name="n">the apply op</param>
400 /// <returns>modified subtree</returns>
401 protected override Node VisitApplyOp(ApplyBaseOp op, Node n)
403 // visit the right child first, then the left
404 VisitChildrenReverse(n);
411 /// We remove all null and constant keys that are not referenced as long as
412 /// there is one key left. We add all remaining keys to the referenced list
413 /// and proceed to the inputs
415 /// <param name="op">the DistinctOp</param>
416 /// <param name="n">Current subtree</param>
417 /// <returns></returns>
418 public override Node Visit(DistinctOp op, Node n)
420 if (op.Keys.Count > 1 && n.Child0.Op.OpType == OpType.Project)
422 RemoveRedundantConstantKeys(op.Keys, ((ProjectOp)n.Child0.Op).Outputs, n.Child0.Child1);
424 AddReference(op.Keys); // mark all keys as referenced - nothing more to do
425 VisitChildren(n); // visit the children
432 /// An ElementOp that is still present when Projection Prunning is invoked can only get introduced
433 /// in the TransformationRules phase by transforming an apply operation into a scalar subquery.
434 /// Such ElementOp serves as root of a defining expression of a VarDefinitionOp node and
435 /// thus what it produces is useful.
437 /// <param name="op">the ElementOp</param>
438 /// <param name="n">Current subtree</param>
439 /// <returns></returns>
440 public override Node Visit(ElementOp op, Node n)
442 ExtendedNodeInfo nodeInfo = m_command.GetExtendedNodeInfo(n.Child0);
443 AddReference(nodeInfo.Definitions);
445 n.Child0 = VisitNode(n.Child0); // visit the child
446 m_command.RecomputeNodeInfo(n);
453 /// First visit the predicate (because that may contain references to
454 /// the relop input), and then visit the relop input. No additional
455 /// processing is required
457 /// <param name="op">the filterOp</param>
458 /// <param name="n">current node</param>
459 /// <returns></returns>
460 public override Node Visit(FilterOp op, Node n)
462 // visit the predicate first, and then teh relop input
463 VisitChildrenReverse(n);
470 /// First, we visit the vardeflist for aggregates and potentially group aggregates
471 /// as they may reference keys (including constant keys).
472 /// Then we remove all null and constant keys that are not referenced as long as
473 /// there is one key left. We add all remaining key columns to the referenced list.
474 /// Then we walk through the vardeflist for the keys; and finally process the relop input
475 /// Once we're done, we update the "Outputs" varset - to account for any
476 /// pruned vars. The "Keys" varset will not change
478 /// <param name="op">the groupbyOp</param>
479 /// <param name="n">current subtree</param>
480 /// <returns>modified subtree</returns>
481 protected override Node VisitGroupByOp(GroupByBaseOp op, Node n)
483 // DevDiv#322980: Visit the vardeflist for aggregates and potentially group aggregates before removing
484 // redundant constant keys. This is because they may depend on (reference) the keys
485 for (int i = n.Children.Count - 1; i >= 2; i--)
487 n.Children[i] = VisitNode(n.Children[i]);
490 //All constant and null keys that are not referenced can be removed
491 //as long as there is at least one key left.
492 if (op.Keys.Count > 1)
494 RemoveRedundantConstantKeys(op.Keys, op.Outputs, n.Child1);
497 AddReference(op.Keys); // all keys are referenced
500 n.Children[1] = VisitNode(n.Children[1]);
503 n.Children[0] = VisitNode(n.Children[0]);
505 PruneVarSet(op.Outputs); // remove unnecessary vars from the outputs
507 //SQLBUDT #543064: If there are no keys to start with
508 // and none of the aggregates is referenced, the GroupBy
509 // is equivalent to a SingleRowTableOp
510 if (op.Keys.Count == 0 && op.Outputs.Count == 0)
512 return m_command.CreateNode(m_command.CreateSingleRowTableOp());
515 m_command.RecomputeNodeInfo(n);
520 /// Helper method for removing redundant constant keys from GroupByOp and DistictOp.
521 /// It only examines the keys defined in the given varDefListNode.
522 /// It removes all constant and null keys that are not referenced elsewhere,
523 /// but ensuring that at least one key is left.
524 /// It should not be called with empty keyVec.
526 /// <param name="keyVec">The keys</param>
527 /// <param name="outputVec">The var vec that needs to be updated along with the keys</param>
528 /// <param name="varDefListNode">Var def list node for the keys </param>
529 private void RemoveRedundantConstantKeys(VarVec keyVec, VarVec outputVec, Node varDefListNode)
531 //Find all the keys that are nulls and constants
532 List<Node> constantKeys = varDefListNode.Children.Where(d => d.Op.OpType == OpType.VarDef
533 && PlanCompilerUtil.IsConstantBaseOp(d.Child0.Op.OpType)).ToList();
535 VarVec constantKeyVars = this.m_command.CreateVarVec(constantKeys.Select(d => ((VarDefOp)d.Op).Var));
537 //Get the list of unreferenced constant keys
538 constantKeyVars.Minus(m_referencedVars);
540 //Remove the unreferenced constant keys
541 keyVec.Minus(constantKeyVars);
542 outputVec.Minus(constantKeyVars);
544 varDefListNode.Children.RemoveAll(c => constantKeys.Contains(c) && constantKeyVars.IsSet(((VarDefOp)c.Op).Var));
546 //If no keys are left add one.
547 if (keyVec.Count == 0)
549 Node keyNode = constantKeys.First();
550 Var keyVar = ((VarDefOp)keyNode.Op).Var;
552 outputVec.Set(keyVar);
553 varDefListNode.Children.Add(keyNode);
558 /// First defer to default handling for groupby nodes
559 /// If all group aggregate vars are prunned out turn it into a GroupBy.
561 /// <param name="op"></param>
562 /// <param name="n"></param>
563 /// <returns></returns>
564 public override Node Visit(GroupByIntoOp op, Node n)
566 Node result = VisitGroupByOp(op, n);
568 //Transform the GroupByInto into a GroupBy if all group aggregate vars were prunned out
569 if (result.Op.OpType == OpType.GroupByInto && n.Child3.Children.Count == 0)
571 GroupByIntoOp newOp = (GroupByIntoOp)result.Op;
573 result = m_command.CreateNode(m_command.CreateGroupByOp(newOp.Keys, newOp.Outputs),
574 result.Child0, result.Child1, result.Child2);
582 /// Common handling for all join ops. For all joins (other than crossjoin),
583 /// we must first visit the predicate (to capture any references from it), and
584 /// then visit the relop inputs. The relop inputs can be visited in any order
585 /// because there can be no correlations between them
586 /// For crossjoins, we simply use the default processing - visit all children
587 /// ; there can be no correlations between the nodes anyway
589 /// <param name="op"></param>
590 /// <param name="n">Node for the join subtree</param>
591 /// <returns>modified subtree</returns>
592 protected override Node VisitJoinOp(JoinBaseOp op, Node n)
594 // Simply visit all children for a CrossJoin
595 if (n.Op.OpType == OpType.CrossJoin)
601 // For other joins, we first need to visit the predicate, and then the
603 // first visit the predicate
604 n.Child2 = VisitNode(n.Child2);
605 // then visit the 2 join inputs
606 n.Child0 = VisitNode(n.Child0);
607 n.Child1 = VisitNode(n.Child1);
608 m_command.RecomputeNodeInfo(n);
616 /// We visit the projections first (the VarDefListOp child), and then
617 /// the input (the RelOp child) - this reverse order is necessary, since
618 /// the projections need to be visited to determine if anything from
619 /// the input is really needed.
621 /// The VarDefListOp child will handle the removal of unnecessary VarDefOps.
622 /// On the way out, we then update our "Vars" property to reflect the Vars
623 /// that have been eliminated
625 /// <param name="op">the ProjectOp</param>
626 /// <param name="n">the current node</param>
627 /// <returns>modified subtree</returns>
628 public override Node Visit(ProjectOp op, Node n)
631 // Update my Vars - to remove "unreferenced" vars. Do this before visiting
632 // the children - the outputs of the ProjectOp are only consumed by upstream
633 // consumers, and if a Var has not yet been referenced, its not needed upstream
634 PruneVarSet(op.Outputs);
636 // first visit the computed expressions, then visit the input relop
637 VisitChildrenReverse(n);
639 // If there are no Vars left, then simply return my child - otherwise,
640 // return the current node
641 return op.Outputs.IsEmpty ? n.Child0 : n;
647 /// Update the list of referenced columns
649 /// <param name="op"></param>
650 /// <param name="n"></param>
651 /// <returns></returns>
652 public override Node Visit(ScanTableOp op, Node n)
654 PlanCompiler.Assert(!n.HasChild0, "scanTable with an input?"); // no more views
655 // update the list of referenced columns in the table
656 op.Table.ReferencedColumns.And(m_referencedVars);
657 m_command.RecomputeNodeInfo(n);
664 /// Common handling for all SetOps. We first identify the "output" vars
665 /// that are referenced, and mark the corresponding "input" vars as referenced
666 /// We then remove all unreferenced output Vars from the "Outputs" varset
667 /// as well as from the Varmaps.
668 /// Finally, we visit the children
670 /// <param name="op"></param>
671 /// <param name="n">current node</param>
672 /// <returns></returns>
673 protected override Node VisitSetOp(SetOp op, Node n)
675 // Prune the outputs varset, except for Intersect and Except, which require
676 // all their outputs to compare, so don't bother pruning them.
677 if (OpType.Intersect == op.OpType || OpType.Except == op.OpType)
679 AddReference(op.Outputs);
682 PruneVarSet(op.Outputs);
684 // Prune the varmaps. Identify which of the setOp vars have been
685 // referenced, and eliminate those entries that don't show up. Additionally
686 // mark all the other Vars as referenced
687 foreach (VarMap varMap in op.VarMap)
692 // Now visit the children
700 /// First visit the sort keys - no sort key can be eliminated.
701 /// Then process the vardeflist child (if there is one) that contains computed
702 /// vars, and finally process the relop input. As before, the computedvars
703 /// and sortkeys need to be processed before the relop input
705 /// <param name="op">the sortop</param>
706 /// <param name="n">the current subtree</param>
707 /// <returns>modified subtree</returns>
708 protected override Node VisitSortOp(SortBaseOp op, Node n)
710 // first visit the sort keys
711 foreach (InternalTrees.SortKey sk in op.Keys)
713 AddReference(sk.Var);
715 // next walk through all the computed expressions
718 n.Child1 = VisitNode(n.Child1);
720 // finally process the input
721 n.Child0 = VisitNode(n.Child0);
723 m_command.RecomputeNodeInfo(n);
730 /// Marks the unnestVar as referenced, and if there
731 /// is a child, visits the child.
733 /// <param name="op">the unnestOp</param>
734 /// <param name="n">current subtree</param>
735 /// <returns>modified subtree</returns>
736 public override Node Visit(UnnestOp op, Node n)
738 AddReference(op.Var);
739 VisitChildren(n); // visit my vardefop - defining the unnest var(if any)
745 #region ScalarOps Visitors
748 // The only ScalarOps that need special processing are
749 // * VarRefOp: we mark the corresponding Var as referenced
750 // * ExistsOp: We mark the (only) Var of the child ProjectOp as referenced
753 #region ScalarOps with special treatment
758 /// Mark the corresponding Var as "referenced"
760 /// <param name="op">the VarRefOp</param>
761 /// <param name="n">current node</param>
762 /// <returns></returns>
763 public override Node Visit(VarRefOp op, Node n)
765 AddReference(op.Var);
772 /// The child must be a ProjectOp - with exactly 1 var. Mark it as referenced
774 /// <param name="op">the ExistsOp</param>
775 /// <param name="n">the input node</param>
776 /// <returns></returns>
777 public override Node Visit(ExistsOp op, Node n)
779 // Ensure that the child is a projectOp, and has exactly one var. Mark
780 // that var as referenced always
781 ProjectOp projectOp = (ProjectOp)n.Child0.Op;
783 //It is enougth to reference the first output, this usually is a simple constant
784 AddReference(projectOp.Outputs.First);