|
23 | 23 | s :- [s/Any]] |
24 | 24 | (remove (partial = e) s)) |
25 | 25 |
|
| 26 | +(s/defn find-start :- EPVPattern |
| 27 | + "Returns the first pattern with the smallest count" |
| 28 | + [pattern-counts :- {EPVPattern s/Num} |
| 29 | + patterns :- [EPVPattern]] |
| 30 | + (let [local-counts (select-keys pattern-counts patterns) |
| 31 | + low-count (reduce min (map second local-counts)) |
| 32 | + pattern (ffirst (filter #(= low-count (second %)) local-counts))] |
| 33 | + ;; must use first/filter/= instead of some/#{pattern} because |
| 34 | + ;; patterns contains metadata and pattern does not |
| 35 | + (first (filter (partial = pattern) patterns)))) |
26 | 36 |
|
27 | 37 | (s/defn paths :- [[EPVPattern]] |
28 | 38 | "Returns a seq of all paths through the constraints. A path is defined |
29 | 39 | by new patterns containing at least one variable common to the patterns |
30 | | - that appeared before it. This prevents cross products in a join." |
31 | | - ([patterns :- [EPVPattern]] |
32 | | - (let [all-paths (paths #{} patterns)] |
33 | | - (assert (every? (partial = (count patterns)) (map count all-paths)) |
34 | | - (str "No valid paths through: " (vec patterns))) |
35 | | - all-paths)) |
36 | | - ([bound :- #{Symbol} |
37 | | - patterns :- [EPVPattern]] |
38 | | - (apply concat |
39 | | - (keep ;; discard paths that can't proceed (they return nil) |
40 | | - (fn [p] |
41 | | - (let [b (get-vars p)] |
42 | | - ;; only proceed when the pattern matches what has been bound |
43 | | - (if (or (empty? bound) (seq (set/intersection b bound))) |
44 | | - ;; pattern can be added to the path, get the other patterns |
45 | | - (let [remaining (without p patterns)] |
46 | | - ;; if there are more patterns to add to the path, recurse |
47 | | - (if (seq remaining) |
48 | | - (map (partial cons p) |
49 | | - (seq |
50 | | - (paths (into bound b) remaining))) |
51 | | - [[p]]))))) |
52 | | - patterns)))) |
| 40 | + that appeared before it. Patterns must form a group." |
| 41 | + ([patterns :- [EPVPattern] |
| 42 | + pattern-counts :- {EPVPattern s/Num}] |
| 43 | + (s/letfn [(remaining-paths :- [[EPVPattern]] |
| 44 | + [bound :- #{Symbol} |
| 45 | + rpatterns :- [EPVPattern]] |
| 46 | + (if (seq rpatterns) |
| 47 | + (apply concat |
| 48 | + (keep ;; discard paths that can't proceed (they return nil) |
| 49 | + (fn [p] |
| 50 | + (let [b (get-vars p)] |
| 51 | + ;; only proceed when the pattern matches what has been bound |
| 52 | + (if (or (empty? bound) (seq (set/intersection b bound))) |
| 53 | + ;; pattern can be added to the path, get the other patterns |
| 54 | + (let [remaining (without p rpatterns)] |
| 55 | + ;; if there are more patterns to add to the path, recurse |
| 56 | + (if (seq remaining) |
| 57 | + (map (partial cons p) |
| 58 | + (seq |
| 59 | + (remaining-paths (into bound b) remaining))) |
| 60 | + [[p]]))))) |
| 61 | + rpatterns)) |
| 62 | + [[]]))] |
| 63 | + (let [start (find-start pattern-counts patterns) |
| 64 | + all-paths (map (partial cons start) |
| 65 | + (remaining-paths (get-vars start) (without start patterns)))] |
| 66 | + (assert (every? (partial = (count patterns)) (map count all-paths)) |
| 67 | + (str "No valid paths through: " (vec patterns))) |
| 68 | + all-paths)))) |
53 | 69 |
|
54 | 70 |
|
55 | 71 | (def epv-pattern? vector?) |
|
75 | 91 | (recur (into plan nxt-filters) bound patterns remaining-filters) |
76 | 92 | (recur (conj plan np) (into bound (get-vars np)) rp filters))))))) |
77 | 93 |
|
| 94 | +(s/defn first-group :- [(s/one [Pattern] "group") (s/one [Pattern] "remainder")] |
| 95 | + "Finds a group from a sequence of patterns. A group is defined by every pattern |
| 96 | + sharing at least one var with at least one other pattern. Returns a pair. |
| 97 | + The first returned element is the Patterns in the group, the second is what was left over." |
| 98 | + [[fp & rp] :- [Pattern]] |
| 99 | + (letfn [;; Define a reduction step. |
| 100 | + ;; Accumulates a triple of: known vars; patterns that are part of the group; |
| 101 | + ;; patterns that are not in the group. Each step looks at a pattern for |
| 102 | + ;; inclusion or exclusion |
| 103 | + (step [[vs included excluded] next-pattern] |
| 104 | + (let [new-vars (get-vars next-pattern)] |
| 105 | + (if (seq (set/intersection vs new-vars)) |
| 106 | + [(into vs new-vars) (conj included next-pattern) excluded] |
| 107 | + [vs included (conj excluded next-pattern)]))) |
| 108 | + ;; apply the reduction steps, with a given set of known vars, and |
| 109 | + ;; included patterns. Previously excluded patterns are being scanned |
| 110 | + ;; again using the new known vars. |
| 111 | + (groups [[v i e]] (reduce step [v i []] e))] |
| 112 | + ;; scan for everything that matches the first pattern, and then iterate until |
| 113 | + ;; everything that matches the resulting patterns has also been found. |
| 114 | + ;; Drop the set of vars before returning. |
| 115 | + (rest (u/fixpoint groups [(get-vars fp) [fp] rp])))) |
| 116 | + |
78 | 117 | (s/defn min-join-path :- [EPVPattern] |
79 | 118 | "Calculates a plan based on no outer joins (a cross product), and minimized joins. |
80 | 119 | A plan is the order in which to evaluate constraints and join them to the accumulated |
81 | 120 | evaluated data. If it is not possible to create a path without a cross product, |
82 | 121 | then return a plan of the patterns in the provided order." |
83 | 122 | [patterns :- [Pattern] |
84 | 123 | count-map :- {EPVPattern s/Num}] |
85 | | - (or |
86 | | - (->> (paths patterns) |
87 | | - (sort-by (partial mapv count-map)) |
88 | | - first) |
89 | | - patterns)) ;; TODO: longest paths with minimized cross products |
| 124 | + (loop [[grp rmdr] (first-group patterns) ordered []] |
| 125 | + (let [all-ordered (->> (paths grp count-map) |
| 126 | + (sort-by (partial mapv count-map)) |
| 127 | + first |
| 128 | + (concat ordered))] ;; TODO: order groups, rather than concat as found |
| 129 | + (if (empty? rmdr) |
| 130 | + all-ordered |
| 131 | + (recur (first-group rmdr) all-ordered))))) |
90 | 132 |
|
91 | 133 | (s/defn user-plan :- [EPVPattern] |
92 | 134 | "Returns the original path specified by the user" |
|
0 commit comments