1 //---------------------------------------------------------------------
2 // <copyright file="CellTreeSimplifier.cs" company="Microsoft">
3 // Copyright (c) Microsoft Corporation. All rights reserved.
7 // @backupOwner Microsoft
8 //---------------------------------------------------------------------
10 using System.Data.Common.Utils;
11 using System.Data.Mapping.ViewGeneration.Structures;
12 using System.Collections.Generic;
14 using System.Diagnostics;
15 using System.Data.Metadata.Edm;
18 namespace System.Data.Mapping.ViewGeneration
21 // This class simplifies an extent's view. Given a view, runs the TM/SP
22 // rules to remove unnecessary self-joins or self-unions
23 internal class CellTreeSimplifier : InternalBase
27 private ViewgenContext m_viewgenContext;
32 private CellTreeSimplifier(ViewgenContext context)
34 m_viewgenContext = context;
38 #region Exposed Methods
39 // effects: see CellTreeNode.Simplify below
40 internal static CellTreeNode MergeNodes(CellTreeNode rootNode)
42 CellTreeSimplifier simplifier = new CellTreeSimplifier(rootNode.ViewgenContext);
43 return simplifier.SimplifyTreeByMergingNodes(rootNode);
46 // effects: Simplifies the tree rooted at rootNode and returns a new
47 // tree -- it ensures that the returned tree has at most one node for
48 // any particular extent unless the tree has nodes of the same extent
49 // embedded two leaves below LASJ or LOJ, e.g., if we have a tree
50 // (where Ni indicates a node for extent i - one Ni can be different
52 // [N0 IJ N1] LASJ N0 --> This will not be simplified
53 // canBooleansOverlap indicates whether an original input cell
54 // contributes to multiple nodes in this tree, e.g., V1 IJ V2 UNION V2 IJ V3
55 private CellTreeNode SimplifyTreeByMergingNodes(CellTreeNode rootNode)
58 if (rootNode is LeafCellTreeNode)
59 { // View already simple!
62 Debug.Assert(rootNode.OpType == CellTreeOpType.LOJ || rootNode.OpType == CellTreeOpType.IJ ||
63 rootNode.OpType == CellTreeOpType.FOJ || rootNode.OpType == CellTreeOpType.Union ||
64 rootNode.OpType == CellTreeOpType.LASJ,
65 "Only handle these operations");
67 // Before we apply any rule, check if we can improve the opportunity to
69 rootNode = RestructureTreeForMerges(rootNode);
71 List<CellTreeNode> children = rootNode.Children;
72 Debug.Assert(children.Count > 0, "OpCellTreeNode has no children?");
75 for (int i = 0; i < children.Count; i++)
77 children[i] = SimplifyTreeByMergingNodes(children[i]);
80 // Essentially, we have a node with IJ, LOJ, U or FOJ type that
81 // has some children. Check if some of the children can be merged
82 // with one another using the corresponding TM/SP rule
84 // Ops such as IJ, Union and FOJ are associative, i.e., A op (B
85 // op C) is the same as (A op B) op C. This is not true for LOJ
87 bool isAssociativeOp = CellTreeNode.IsAssociativeOp(rootNode.OpType);
90 // Group all the leaf cells of an extent together so that we can
91 // later simply run through them without running nested loops
92 // We do not do this for LOJ/LASJ nodes since LOJ (LASJ) is not commutative
94 children = GroupLeafChildrenByExtent(children);
98 children = GroupNonAssociativeLeafChildren(children);
101 // childrenSet keeps track of the children that need to be procesed/partitioned
102 OpCellTreeNode newNode = new OpCellTreeNode(m_viewgenContext, rootNode.OpType);
103 CellTreeNode lastChild = null;
104 bool skipRest = false;
105 foreach (CellTreeNode child in children)
107 if (lastChild == null)
109 // First time in the loop. Just set lastChild
114 bool mergedOk = false;
115 // try to merge lastChild and child
116 if (false == skipRest && lastChild.OpType == CellTreeOpType.Leaf &&
117 child.OpType == CellTreeOpType.Leaf)
119 // Both are cell queries. Can try to merge them
120 // We do not add lastChild since it could merge
121 // further. It will be added in a later loop or outside the loop
122 mergedOk = TryMergeCellQueries(rootNode.OpType, ref lastChild, child);
125 if (false == mergedOk)
127 // No merge occurred. Simply add the previous child as it
128 // is (Note lastChild will be added in the next loop or if
129 // the loop finishes, outside the loop
130 newNode.Add(lastChild);
132 if (false == isAssociativeOp)
134 // LOJ is not associative:
135 // (P loj PA) loj PO != P loj (PA loj PO). The RHS does not have
136 // Persons who have orders but no addresses
142 newNode.Add(lastChild);
143 CellTreeNode result = newNode.AssociativeFlatten();
148 #region Private Methods
149 // effects: Restructure tree so that it is better positioned for merges
150 private CellTreeNode RestructureTreeForMerges(CellTreeNode rootNode)
152 List<CellTreeNode> children = rootNode.Children;
153 if (CellTreeNode.IsAssociativeOp(rootNode.OpType) == false || children.Count <= 1)
158 // If this node's operator is associative and each child's
159 // operator is also associative, check if there is a common set
160 // of leaf nodes across all grandchildren
162 Set<LeafCellTreeNode> commonGrandChildren = GetCommonGrandChildren(children);
163 if (commonGrandChildren == null)
168 CellTreeOpType commonChildOpType = children[0].OpType;
170 // We do have the structure that we are looking for
171 // (common op2 gc2) op1 (common op2 gc3) op1 (common op2 gc4) becomes
172 // common op2 (gc2 op1 gc3 op1 gc4)
173 // e.g., (A IJ B IJ X IJ Y) UNION (A IJ B IJ Y IJ Z) UNION (A IJ B IJ R IJ S)
174 // becomes A IJ B IJ ((X IJ Y) UNION (Y IJ Z) UNION (R IJ S))
176 // From each child in children, get the nodes other than commonGrandChildren - these are gc2, gc3, ...
177 // Each gc2 must be connected by op2 as before, i.e., ABC + ACD = A(BC + CD)
179 // All children must be OpCellTreeNodes!
180 List<OpCellTreeNode> newChildren = new List<OpCellTreeNode>(children.Count);
181 foreach (OpCellTreeNode child in children)
183 // Remove all children in child that belong to commonGrandChildren
184 // All grandChildren must be leaf nodes at this point
185 List<LeafCellTreeNode> newGrandChildren = new List<LeafCellTreeNode>(child.Children.Count);
186 foreach (LeafCellTreeNode grandChild in child.Children)
188 if (commonGrandChildren.Contains(grandChild) == false)
190 newGrandChildren.Add(grandChild);
193 // In the above example, child.OpType is IJ
194 Debug.Assert(child.OpType == commonChildOpType);
195 OpCellTreeNode newChild = new OpCellTreeNode(m_viewgenContext, child.OpType,
196 Helpers.AsSuperTypeList<LeafCellTreeNode, CellTreeNode>(newGrandChildren));
197 newChildren.Add(newChild);
199 // Connect gc2 op1 gc3 op1 gc4 - op1 is UNION in this
200 // ((X IJ Y) UNION (Y IJ Z) UNION (R IJ S))
201 // rootNode.Type is UNION
202 CellTreeNode remainingNodes = new OpCellTreeNode(m_viewgenContext, rootNode.OpType,
203 Helpers.AsSuperTypeList<OpCellTreeNode, CellTreeNode>(newChildren));
204 // Take the common grandchildren and connect via commonChildType
206 CellTreeNode commonNodes = new OpCellTreeNode(m_viewgenContext, commonChildOpType,
207 Helpers.AsSuperTypeList<LeafCellTreeNode, CellTreeNode>(commonGrandChildren));
209 // Connect both by commonChildType
210 CellTreeNode result = new OpCellTreeNode(m_viewgenContext, commonChildOpType,
211 new CellTreeNode[] { commonNodes, remainingNodes });
213 result = result.AssociativeFlatten();
217 // effects: Given a set of nodes, determines if all nodes are the exact same associative opType AND
218 // there are leaf children that are common across the children "nodes". If there are any,
219 // returns them. Else return null
220 private static Set<LeafCellTreeNode> GetCommonGrandChildren(List<CellTreeNode> nodes)
222 Set<LeafCellTreeNode> commonLeaves = null;
224 // We could make this general and apply recursively but we don't for now
226 // Look for a tree of the form: (common op2 gc2) op1 (common op2 gc3) op1 (common op2 gc4)
227 // e.g., (A IJ B IJ X IJ Y) UNION (A IJ B IJ Y IJ Z) UNION (A IJ B IJ R IJ S)
228 // Where op1 and op2 are associative and common, gc2 etc are leaf nodes
229 CellTreeOpType commonChildOpType = CellTreeOpType.Leaf;
231 foreach (CellTreeNode node in nodes)
233 OpCellTreeNode opNode = node as OpCellTreeNode;
238 Debug.Assert(opNode.OpType != CellTreeOpType.Leaf, "Leaf type for op cell node?");
239 // Now check for whether the op is associative and the same as the previous one
240 if (commonChildOpType == CellTreeOpType.Leaf)
242 commonChildOpType = opNode.OpType;
244 else if (CellTreeNode.IsAssociativeOp(opNode.OpType) == false || commonChildOpType != opNode.OpType)
249 // Make sure all the children are leaf children
250 Set<LeafCellTreeNode> nodeChildrenSet = new Set<LeafCellTreeNode>(LeafCellTreeNode.EqualityComparer);
251 foreach (CellTreeNode grandChild in opNode.Children)
253 LeafCellTreeNode leafGrandChild = grandChild as LeafCellTreeNode;
254 if (leafGrandChild == null)
258 nodeChildrenSet.Add(leafGrandChild);
261 if (commonLeaves == null)
263 commonLeaves = nodeChildrenSet;
267 commonLeaves.Intersect(nodeChildrenSet);
271 if (commonLeaves.Count == 0)
273 // No restructuring possible
278 // effects: Given a list of node, produces a new list in which all
279 // leaf nodes of the same extent are adjacent to each other. Non-leaf
280 // nodes are also adjacent to each other. CHANGE_Microsoft_IMPROVE: Merge with GroupByRightExtent
281 private static List<CellTreeNode> GroupLeafChildrenByExtent(List<CellTreeNode> nodes)
283 // Keep track of leaf cells for each extent
284 KeyToListMap<EntitySetBase, CellTreeNode> extentMap =
285 new KeyToListMap<EntitySetBase, CellTreeNode>(EqualityComparer<EntitySetBase>.Default);
287 List<CellTreeNode> newNodes = new List<CellTreeNode>();
288 foreach (CellTreeNode node in nodes)
290 LeafCellTreeNode leafNode = node as LeafCellTreeNode;
291 // All non-leaf nodes are added to the result now
292 // leaf nodes are added outside the loop
293 if (leafNode != null)
295 extentMap.Add(leafNode.LeftCellWrapper.RightCellQuery.Extent, leafNode);
302 // Go through the map and add the leaf children
303 newNodes.AddRange(extentMap.AllValues);
307 // effects: A restrictive version of GroupLeafChildrenByExtent --
308 // only for LASJ and LOJ nodes (works for LOJ only when A LOJ B LOJ C
309 // s.t., B and C are subsets of A -- in our case that is how LOJs are constructed
310 private static List<CellTreeNode> GroupNonAssociativeLeafChildren(List<CellTreeNode> nodes)
312 // Keep track of leaf cells for each extent ignoring the 0th child
313 KeyToListMap<EntitySetBase, CellTreeNode> extentMap =
314 new KeyToListMap<EntitySetBase, CellTreeNode>(EqualityComparer<EntitySetBase>.Default);
316 List<CellTreeNode> newNodes = new List<CellTreeNode>();
317 List<CellTreeNode> nonLeafNodes = new List<CellTreeNode>();
319 newNodes.Add(nodes[0]);
320 for (int i = 1; i < nodes.Count; i++)
322 CellTreeNode node = nodes[i];
323 LeafCellTreeNode leafNode = node as LeafCellTreeNode;
324 // All non-leaf nodes are added to the result now
325 // leaf nodes are added outside the loop
326 if (leafNode != null)
328 extentMap.Add(leafNode.LeftCellWrapper.RightCellQuery.Extent, leafNode);
332 nonLeafNodes.Add(node);
335 // Go through the map and add the leaf children
336 // If a group of nodes exists for the 0th node's extent -- place
338 LeafCellTreeNode firstNode = nodes[0] as LeafCellTreeNode;
339 if (firstNode != null)
341 EntitySetBase firstExtent = firstNode.LeftCellWrapper.RightCellQuery.Extent;
342 if (extentMap.ContainsKey(firstExtent))
344 newNodes.AddRange(extentMap.ListForKey(firstExtent));
345 // Remove this set from the map
346 extentMap.RemoveKey(firstExtent);
349 newNodes.AddRange(extentMap.AllValues);
350 newNodes.AddRange(nonLeafNodes);
354 // requires: node1 and node2 are two children of the same parent
355 // connected by opType
356 // effects: Given two cell tree nodes, node1 and node2, runs the
357 // TM/SP rule on them to merge them (if they belong to the same
358 // extent). Returns true if the merge succeeds
359 private bool TryMergeCellQueries(CellTreeOpType opType, ref CellTreeNode node1,
363 LeafCellTreeNode leaf1 = node1 as LeafCellTreeNode;
364 LeafCellTreeNode leaf2 = node2 as LeafCellTreeNode;
366 Debug.Assert(leaf1 != null, "Merge only possible on leaf nodes (1)");
367 Debug.Assert(leaf2 != null, "Merge only possible on leaf nodes (2)");
369 CellQuery mergedLeftCellQuery;
370 CellQuery mergedRightCellQuery;
371 if (!TryMergeTwoCellQueries(leaf1.LeftCellWrapper.RightCellQuery, leaf2.LeftCellWrapper.RightCellQuery, opType, m_viewgenContext.MemberMaps.RightDomainMap, out mergedRightCellQuery))
376 if (!TryMergeTwoCellQueries(leaf1.LeftCellWrapper.LeftCellQuery, leaf2.LeftCellWrapper.LeftCellQuery, opType, m_viewgenContext.MemberMaps.LeftDomainMap, out mergedLeftCellQuery))
381 // Create a temporary node and add the two children
382 // so that we can get the merged selectiondomains and attributes
383 // Note that temp.SelectionDomain below determines the domain
384 // based on the opType, e.g., for IJ, it intersects the
385 // multiconstants of all the children
386 OpCellTreeNode temp = new OpCellTreeNode(m_viewgenContext, opType);
389 // Note: We are losing the original cell number information here and the line number information
390 // But we will not raise any
392 // We do not create CellExpressions with LOJ, FOJ - canBooleansOverlap is true for validation
393 CellTreeOpType inputOpType = opType;
394 if (opType == CellTreeOpType.FOJ || opType == CellTreeOpType.LOJ)
396 inputOpType = CellTreeOpType.IJ;
399 LeftCellWrapper wrapper = new LeftCellWrapper(m_viewgenContext.ViewTarget, temp.Attributes,
400 temp.LeftFragmentQuery,
402 mergedRightCellQuery,
403 m_viewgenContext.MemberMaps,
404 leaf1.LeftCellWrapper.Cells.Concat(leaf2.LeftCellWrapper.Cells));
405 node1 = new LeafCellTreeNode(m_viewgenContext, wrapper, temp.RightFragmentQuery);
410 // effects: Merges query2 with this according to the TM/SP rules for opType and
411 // returns the merged result. canBooleansOverlap indicates whether the bools in this and query2 can overlap, i.e.
412 // the same cells may have contributed to query2 and this earlier in the merge process
413 internal bool TryMergeTwoCellQueries(CellQuery query1, CellQuery query2, CellTreeOpType opType,
414 MemberDomainMap memberDomainMap, out CellQuery mergedQuery)
418 // Initialize g1 and g2 according to the TM/SP rules for IJ, LOJ, Union, FOJ cases
419 BoolExpression g1 = null;
420 BoolExpression g2 = null;
423 case CellTreeOpType.IJ:
425 case CellTreeOpType.LOJ:
426 case CellTreeOpType.LASJ:
427 g2 = BoolExpression.True;
429 case CellTreeOpType.FOJ:
430 case CellTreeOpType.Union:
431 g1 = BoolExpression.True;
432 g2 = BoolExpression.True;
435 Debug.Fail("Unsupported operator");
439 Dictionary<MemberPath, MemberPath> remap =
440 new Dictionary<MemberPath, MemberPath>(MemberPath.EqualityComparer);
442 //Continue merging only if both queries are over the same source
444 if (!query1.Extent.Equals(query2.Extent))
450 newRoot = query1.SourceExtentMemberPath;
453 // Conjuncts for ANDing with the previous whereClauses
454 BoolExpression conjunct1 = BoolExpression.True;
455 BoolExpression conjunct2 = BoolExpression.True;
456 BoolExpression whereClause = null;
460 case CellTreeOpType.IJ:
461 // Project[D1, D2, A, B, C] Select[cond1 and cond2] (T)
462 // We simply merge the two lists of booleans -- no conjuct is added
463 // conjunct1 and conjunct2 don't change
465 // query1.WhereCaluse AND query2.WhereCaluse
466 Debug.Assert(g1 == null && g2 == null, "IJ does not affect g1 and g2");
467 whereClause = BoolExpression.CreateAnd(query1.WhereClause, query2.WhereClause);
470 case CellTreeOpType.LOJ:
471 // conjunct1 does not change since D1 remains as is
472 // Project[D1, (expr2 and cond2 and G2) as D2, A, B, C] Select[cond1] (T)
473 // D1 does not change. New d2 is the list of booleans expressions
474 // for query2 ANDed with g2 AND query2.WhereClause
475 Debug.Assert(g1 == null, "LOJ does not affect g1");
476 conjunct2 = BoolExpression.CreateAnd(query2.WhereClause, g2);
477 // Just query1's whereclause
478 whereClause = query1.WhereClause;
481 case CellTreeOpType.FOJ:
482 case CellTreeOpType.Union:
483 // Project[(expr1 and cond1 and G1) as D1, (expr2 and cond2 and G2) as D2, A, B, C] Select[cond1] (T)
484 // New D1 is a list -- newD1 = D1 AND query1.WhereClause AND g1
485 // New D1 is a list -- newD2 = D2 AND query2.WhereClause AND g2
486 conjunct1 = BoolExpression.CreateAnd(query1.WhereClause, g1);
487 conjunct2 = BoolExpression.CreateAnd(query2.WhereClause, g2);
489 // The new whereClause -- g1 AND query1.WhereCaluse OR g2 AND query2.WhereClause
490 whereClause = BoolExpression.CreateOr(BoolExpression.CreateAnd(query1.WhereClause, g1),
491 BoolExpression.CreateAnd(query2.WhereClause, g2));
494 case CellTreeOpType.LASJ:
495 // conjunct1 does not change since D1 remains as is
496 // Project[D1, (expr2 and cond2 and G2) as D2, A, B, C] Select[cond1] (T)
497 // D1 does not change. New d2 is the list of booleans expressions
498 // for query2 ANDed with g2 AND NOT query2.WhereClause
499 Debug.Assert(g1 == null, "LASJ does not affect g1");
500 conjunct2 = BoolExpression.CreateAnd(query2.WhereClause, g2);
501 whereClause = BoolExpression.CreateAnd(query1.WhereClause, BoolExpression.CreateNot(conjunct2));
504 Debug.Fail("Unsupported operator");
508 // Create the various remapped parts for the cell query --
509 // boolean expressions, merged slots, whereclause, duplicate
510 // elimination, join tree
511 List<BoolExpression> boolExprs =
512 MergeBoolExpressions(query1, query2, conjunct1, conjunct2, opType);
513 //BoolExpression.RemapBools(boolExprs, remap);
515 ProjectedSlot[] mergedSlots;
516 if (false == ProjectedSlot.TryMergeRemapSlots(query1.ProjectedSlots, query2.ProjectedSlots, out mergedSlots))
518 // merging failed because two different right slots go to same left slot
522 whereClause = whereClause.RemapBool(remap);
524 CellQuery.SelectDistinct elimDupl = MergeDupl(query1.SelectDistinctFlag, query2.SelectDistinctFlag);
526 whereClause.ExpensiveSimplify();
527 mergedQuery = new CellQuery(mergedSlots, whereClause,
528 boolExprs, elimDupl, newRoot);
532 // effects: Given two duplicate eliination choices, returns an OR of them
533 static private CellQuery.SelectDistinct MergeDupl(CellQuery.SelectDistinct d1, CellQuery.SelectDistinct d2)
535 if (d1 == CellQuery.SelectDistinct.Yes || d2 == CellQuery.SelectDistinct.Yes)
537 return CellQuery.SelectDistinct.Yes;
541 return CellQuery.SelectDistinct.No;
545 // requires: query1 has the same number of boolean expressions as
546 // query2. There should be no index i for which query1's bools[i] !=
547 // null and query2's bools[i] != null
548 // effects: Given two cellqueries query1 and query2, merges their
549 // boolean expressions while ANDING query1 bools with conjunct1 and
550 // query2's bools with conjunct2 and returns the result
551 static private List<BoolExpression>
552 MergeBoolExpressions(CellQuery query1, CellQuery query2,
553 BoolExpression conjunct1, BoolExpression conjunct2, CellTreeOpType opType)
556 List<BoolExpression> bools1 = query1.BoolVars;
557 List<BoolExpression> bools2 = query2.BoolVars;
559 // Add conjuncts to both sets if needed
560 if (false == conjunct1.IsTrue)
562 bools1 = BoolExpression.AddConjunctionToBools(bools1, conjunct1);
565 if (false == conjunct2.IsTrue)
567 bools2 = BoolExpression.AddConjunctionToBools(bools2, conjunct2);
571 Debug.Assert(bools1.Count == bools2.Count);
572 List<BoolExpression> bools = new List<BoolExpression>();
573 // Both bools1[i] and bools2[i] be null for some of the i's. When
574 // we merge two (leaf) cells (say), only one boolean each is set
575 // in it; the rest are all nulls. If the SP/TM rules have been
576 // applied, more than one boolean may be non-null in a cell query
577 for (int i = 0; i < bools1.Count; i++)
579 BoolExpression merged = null;
580 if (bools1[i] == null)
584 else if (bools2[i] == null)
590 if (opType == CellTreeOpType.IJ)
592 merged = BoolExpression.CreateAnd(bools1[i], bools2[i]);
594 else if (opType == CellTreeOpType.Union)
596 merged = BoolExpression.CreateOr(bools1[i], bools2[i]);
598 else if (opType == CellTreeOpType.LASJ)
600 merged = BoolExpression.CreateAnd(bools1[i],
601 BoolExpression.CreateNot(bools2[i]));
605 Debug.Fail("No other operation expected for boolean merge");
610 merged.ExpensiveSimplify();
619 #region String methods
620 internal override void ToCompactString(StringBuilder builder)
622 m_viewgenContext.MemberMaps.ProjectedSlotMap.ToCompactString(builder);