# M D B l o c a l ASYA KAMSKY LEAD KNOW-IT-ALL MONGODB, INC @asya999 #askAsya PIPELINE POWER DOING MORE WITH MONGODB AGGREGATION FRAMEWORK
APPLICATIONS & DATA STORE RETRIEVE find() & aggregate()
# M D B l o c a l OPTIONS FOR ANALYTICS pre-aggregate aggregate in	MongoDB aggregate	elsewhere
# M D B l o c a l pre-aggregate aggregate in	MongoDB aggregate	elsewhere OPTIONS FOR ANALYTICS
# M D B l o c a l pre-aggregate aggregate in	MongoDB aggregate	elsewhere OPTIONS FOR ANALYTICS
⏱⏱
# M D B l o c a l pre-aggregate aggregate in	MongoDB aggregate	elsewhere OPTIONS FOR ANALYTICS
# M D B l o c a l ANALYTICS = AGGREGATION pre-aggregate aggregate in	MongoDB aggregate	elsewhere
PIPELINE
ps ax |grep mongod |head	1 *nix	command	line	pipe PIPELINE
$match $group | $sort| Input stream {} {} {} {} Result {} {} ... PIPELINE MongoDB	document	pipeline
Stage	1 Stage	2 Stage	3 Stage	4 {} {} {} {} {} {} {} {} DATA PIPELINE {} {} {} {} {"$stage":{	...	}} START Collection View Special	stage STAGES
{title: "The Great Gatsby", language: "English", subjects: "Long Island"} {title: "The Great Gatsby", language: "English", subjects: "New York"} {title: "The Great Gatsby", language: "English", subjects: "1920s"} {title: "The Great Gatsby", language: "English", subjects: [ "Long Island", "New York", "1920s"] }, {"$match":{"language":"English"}} $match { _id:"Long Island", count: 1 }, $group { _id: "New York", count: 2 }, $unwind { _id: "1920s", count: 1 }, $sort $skip$limit $project {"$unwind":"$subjects"} {"$group":{"_id":"$subjects", "count":{"$sum:1}} { _id: "Harlem", count: 1 }, { _id: "Long Island", count: 1 }, { _id: "New York", count: 2 }, { _id: "1920s", count: 1 }, {title: "Open City", language: "English", subjects: [ "New York" "Harlem" ] } { title: "The Great Gatsby", language: "English", subjects: [ "Long Island", "New York", "1920s"] }, { title: "War and Peace", language: "Russian", subjects: [ "Russia", "War of 1812", "Napoleon"] }, { title: "Open City", language: "English", subjects: [ "New York", "Harlem" ] }, {title: "Open City", language: "English", subjects: "New York"} {title: "Open City", language: "English", subjects: "Harlem"} { _id: "Harlem", count: 1 }, {"$sort:{"count":-1} {"$limit":3} {"$project":...}
STAGES
Group	and Transform Aliases Special •Input •Output Reorder Transform Decrease Increase
{title: "The Great Gatsby", language: "English", subjects: "Long Island"} {title: "The Great Gatsby", language: "English", subjects: "New York"} {title: "The Great Gatsby", language: "English", subjects: "1920s"} {title: "The Great Gatsby", language: "English", subjects: [ "Long Island", "New York", "1920s"] }, {"$match":{"language":"English"}} $match { _id:"Long Island", count: 1 }, $group { _id: "New York", count: 2 }, $unwind { _id: "1920s", count: 1 }, $sort $skip$limit $project {"$unwind":"$subjects"} {"$group":{"_id":"$subjects", "count":{"$sum:1}} { _id: "Harlem", count: 1 }, { _id: "Long Island", count: 1 }, { _id: "New York", count: 2 }, { _id: "1920s", count: 1 }, {title: "Open City", language: "English", subjects: [ "New York" "Harlem" ] } { title: "The Great Gatsby", language: "English", subjects: [ "Long Island", "New York", "1920s"] }, { title: "War and Peace", language: "Russian", subjects: [ "Russia", "War of 1812", "Napoleon"] }, { title: "Open City", language: "English", subjects: [ "New York", "Harlem" ] }, {title: "Open City", language: "English", subjects: "New York"} {title: "Open City", language: "English", subjects: "Harlem"} { _id: "Harlem", count: 1 }, {"$sort:{"count":-1} {"$limit":3} {"$project":...}
# M D B l o c a l LET ME EXPLAIN...
db.books.aggregate([ {$match:{"language":"English"}}, {$unwind:"$subjects"}, {$group:{_id:"$subjects",count:{$sum:1}}}, {$sort:{count:-1}}, {$limit:3} ],{explain:true}) {"stages" : [ {"$cursor" : {"query" : { "language" : "English"}, "fields" : { "subjects" : 1,"_id" : 0} ... }}, {"$unwind" : {"path" : "$subjects"}}, {"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}} }}, {"$sort" : { "sortKey" : {"count" : -1}, "limit" : NumberLong(3) }} ] }
db.books.aggregate([ {$match:{"language":"English"}}, {$unwind:"$subjects"}, {$group:{_id:"$subjects",count:{$sum:1}}}, {$sort:{count:-1}}, {$limit:3} ],{explain:true}) {"stages" : [ {"$cursor" : {"query" : { "language" : "English"}, "fields" : { "subjects" : 1,"_id" : 0} ... }}, {"$unwind" : {"path" : "$subjects"}}, {"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}} }}, {"$sort" : { "sortKey" : {"count" : -1}, "limit" : NumberLong(3) }} ] }
db.books.aggregate([ {$match:{"language":"English"}}, {$unwind:"$subjects"}, {$group:{_id:"$subjects",count:{$sum:1}}}, {$sort:{count:-1}}, {$limit:3} ],{explain:true}) {"stages" : [ {"$cursor" : {"query" : { }, "fields" : { "subjects" : 1,"_id" : 0} ... }}, {"$unwind" : {"path" : "$subjects"}}, {"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}} }}, {"$sort" : { "sortKey" : {"count" : -1}, "limit" : NumberLong(3) }} ] }
db.books.aggregate([ {$unwind:"$subjects"}, {$match:{"language":"English"}}, {$group:{_id:"$subjects",count:{$sum:1}}}, {$sort:{count:-1}}, {$limit:3} ],{explain:true})
db.books.aggregate([ {$unwind:"$subjects"}, {$match:{"language":"English"}}, {$group:{_id:"$subjects",count:{$sum:1}}}, {$sort:{count:-1}}, {$limit:3} ],{explain:true}) {"stages" : [ {"$cursor" : {"query" : { "language" : "English"}, "fields" : { "subjects" : 1,"_id" : 0} ... }}, {"$unwind" : {"path" : "$subjects"}}, {"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}} }}, {"$sort" : { "sortKey" : {"count" : -1}, "limit" : NumberLong(3) }} ] }
db.books.aggregate([ {$unwind:"$subjects"}, {$match:{"language":"English","subjects":/^[ABC]/}}, {$group:{_id:"$subjects",count:{$sum:1}}}, {$sort:{count:-1}}, {$limit:3} ],{explain:true}) {"stages" : [ {"$cursor" : {"query" : { "language" : "English"}, "fields" : { "subjects" : 1,"_id" : 0} ... }}, {"$unwind" : {"path" : "$subjects"}}, {"$match" : {"subjects" : {"$regex" : "^[ABC]"}}}, {"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}}}}, {"$sort" : { "sortKey" : {"count" : -1}, "limit" : NumberLong(3) }} ] }
db.books.aggregate([ {$unwind:"$subjects"}, {$match:{"language":"English","subjects":/^[ABC]/}}, {$group:{_id:"$subjects",count:{$sum:1}}}, {$sort:{count:-1}}, {$limit:3} ],{explain:true}) {"stages" : [ {"$cursor" : {"query" : { "language" : "English"}, "fields" : { "subjects" : 1,"_id" : 0} ... }}, {"$unwind" : {"path" : "$subjects"}}, {"$match" : {"subjects" : {"$regex" : "^[ABC]"}}}, {"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}}}}, {"$sort" : { "sortKey" : {"count" : -1}, "limit" : NumberLong(3) }} ] }
#MDBTOUR STREAMING VS BLOCKING
{title: "The Great Gatsby", language: "English". subjects: "Long Island"} {title: "The Great Gatsby", language: "English", subjects: "New York"} {title: "The Great Gatsby", language: "English", subjects: "1920s"} {title: "The Great Gatsby", language: "English", subjects: [ "Long Island", "New York", "1920s"] }, {"$match":{"language":"English"}} $match { _id:"Long Island", count: 1 }, $group { _id: "New York", count: 2 }, $unwind { _id: "1920s", count: 1 }, $sort $skip$limit $project {"$unwind":"$subjects"} {"$group":{"_id":"$subjects", "count":{"$sum:1}} { _id: "Harlem", count: 1 }, { _id: "Long Island", count: 1 }, { _id: "New York", count: 2 }, { _id: "1920s", count: 1 }, {title: "Open City", language: "English", subjects: [ "New York" "Harlem" ] } { title: "The Great Gatsby", language: "English", subjects: [ "Long Island", "New York", "1920s"] }, { title: "War and Peace", language: "Russian", subjects: [ "Russia", "War of 1812", "Napoleon"] }, { title: "Open City", language: "English", subjects: [ "New York", "Harlem" ] }, {title: "Open City", language: "English", subjects: "New York"} {title: "Open City", language: "English", subjects: "Harlem"} { _id: "Harlem", count: 1 }, {"$sort:{"count":-1} {"$limit":3} {"$project":...} $group $sort 1
Group	and Transform Aliases Special •Input •Output Reorder Transform Decrease Increase $group $sort
#MDBTOUR STREAMING VS BLOCKING RESOURCE USE
INPUT STAGE RESULTSSTAGE STREAMING RESOURCE USE
Each	document	is	streamed	through	in	RAM STREAMING RESOURCE USE
INPUT STAGE RESULTSSTAGE BLOCKING RESOURCE USE
Sort	with	Limit: uses	sizeOfDoc *	Limit	+	sizeOfDoc Sort	without	Limit: uses	sizeOfAllDocs Group: uses	sizeOfDoc *	numberOfGroups +	sizeOfDoc BLOCKING RESOURCE USE
#MDBTOUR STAGES, EXPRESSIONS, OPERATORS, ACCUMULATORS, OH MY!
# M D B l o c a l EXPRESSIONS
# M D B l o c a l ARRAY EXPRESSIONS
# M D B l o c a l $arrayElemAt $concatArrays $indexOfArray $isArray $size $range $reverseArray ARRAY EXPRESSIONS $map $reduce $filter $slice $zip $in ...	plus	all	the	set	expressions
# M D B l o c a l $map $reduce $filter $slice $zip $in ...	plus	all	the	set	expressions $arrayElemAt $concatArrays $indexOfArray $isArray $size $range $reverseArray ARRAY EXPRESSIONS
# M D B l o c a l $map input: array output: array $filter input: array output: subset of array $reduce input: array output: anything you want ARRAY EXPRESSIONS
# M D B l o c a l "arr":[{"a":1},{"a":99},{"a":5},{"a":3}] {"$map": { "input": "$arr", "in": "$$this" }} {"$map":{ "input": "$arr", "as":"eachElem", "in":{"b":"$$eachElem.a"} } } {"$map":{ "input": {"$range":[0,{"$size":"$arr"}]}, "as":"index", "in":{"c":{"$arrayElemAt":["$arr.a","$$index"]}} } } ARRAY EXPRESSIONS "a" 1 "a" 99 "a" 5 "a" 3 "b" 1 "b" 99 "b" 5 "b" 3 "c" 1 "c" 99 "c" 5 "c" 3 "a" 1 "a" 99 "a" 5 "a" 3 "a" 1 "a" 99 "a" 5 "a" 3 0 1 2 3 {"$map": { "input": "$arr", "as": "var", "in": "$$var" }} $map {"$map": { "input": "$arr.a", "as": "eachElem", "in": {"b":"$$eachElem"} }} 1 99 5 3
# M D B l o c a l "arr":[{"a":1},{"a":99},{"a":5},{"a":3}] {"$filter": { "input": "$arr", "cond":{"$lt":["$$this.a",10]} }} {"$filter":{ "input": "$arr", "as":"elem", "cond":{"$lt":["$$elem.a",10]} } } ARRAY EXPRESSIONS "a" 1 "a" 99 "a" 5 "a" 3 $filter "a" 1 "a" 5 "a" 3
# M D B l o c a l "arr":[{"a":1},{"a":99},{"a":5},{"a":3}] {"$reduce": { "input": "$arr", "initialValue": 0, "in": {$add:["$$value","$$this.a"]} } } ARRAY EXPRESSIONS "a" 1 "a" 99 "a" 5 "a" 3 $reduce 01100105108 "a" 1 "a" 99 "a" 5 "a" 3
# M D B l o c a l "arr":[{"a":1},{"a":99},{"a":5},{"a":3}] {"$reduce": { "input": "$arr", "initialValue": 0, "in": {$add:["$$value","$$this.a"]} } } {"$reduce":{ "input": "$arr", "intialValue":[], "in":{"$concatArrays":[ [ "$$this" ], "$$value" ]} } } ARRAY EXPRESSIONS $reduce "a" 1 "a" 99 "a" 5 "a" 3 108 "a" 1 "a" 99 "a" 5 "a" 3
# M D B l o c a l "arr":[{"a":1},{"a":99},{"a":5},{"a":3}] {"$reduce": { "input": "$arr", "initialValue": 0, "in": {$add:["$$value","$$this.a"]} } } {"$reduce":{ "input": "$arr", "intialValue":[], "in":{"$concatArrays":[ [ "$$this" ], "$$value" ]} } } ARRAY EXPRESSIONS $reduce "a" 1 "a" 99 "a" 5 "a" 3 []"a" 1"a" 99 "a" 1 "a" 5 "a" 99 "a" 1 "a" 3 "a" 5 "a" 99 "a" 1 108 "a" 1 "a" 99 "a" 5 "a" 3
# M D B l o c a l READABILITY TIP
# M D B l o c a l Functions for expressions reverseArray = function(input) { return {"$reduce":{ "input": input, "intialValue":[], "in":{"$concatArrays":[ [ "$$this" ], "$$value" ]} }}; }; db.c.aggregate([ {"$addFields":{ "revArray":reverseArray("$origArray") } } ]) ENCAPSULATE COMPLEXITY
# M D B l o c a l AGGREGATION FIND QUERY UPDATE QUERY DOCUMENT VALIDATION EXPRESSIONS {children: [ {name:"Max", dob:"1994-12-01", dep:true}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]}
# M D B l o c a l AGGREGATION FIND QUERY UPDATE QUERY DOCUMENT VALIDATION EXPRESSIONS {children: [ {name:"Max", dob:"1994-12-01", dep:false}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} db.c.aggregate([ {$addFields:{ numChildren:{$size:"$children"}, numDependents:{$size:{ $filter:{ input:"$children.dep", cond: "$$this" } }} }}, ... ])
# M D B l o c a l AGGREGATION FIND QUERY UPDATE QUERY DOCUMENT VALIDATION EXPRESSIONS new in 3.6! {children: [ {name:"Max", dob:"1994-12-01", dep:false}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} db.c.find({$expr:{$gt:["$a","$b"]}})
# M D B l o c a l AGGREGATION FIND QUERY UPDATE QUERY DOCUMENT VALIDATION EXPRESSIONS new in 3.6! {children: [ {name:"Max", dob:"1994-12-01", dep:false}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} db.c.find({$expr: { $lt:[ {$size:{$filter:{ input:"$children.dep", cond:"$$this" }}}, 2 }})
# M D B l o c a l AGGREGATION FIND QUERY UPDATE QUERY DOCUMENT VALIDATION EXPRESSIONS new in 3.6! {children: [ {name:"Max", dob:"1994-12-01", dep:false}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} db.c.find({$expr:{$gt:[ {$let:{ vars:{dobs:{$map:{ input:"$children.dob" in: {$year:{$dateFromString:{ dateString:"$$this" }}}}}}, in:{$subtract:[ {$max:"$$dobs"}, {$min:"$$dobs"} ]}}}, 10 ]}})
# M D B l o c a l AGGREGATION FIND QUERY UPDATE QUERY DOCUMENT VALIDATION EXPRESSIONS new in 3.6! {children: [ {name:"Max", dob:"1994-12-01", dep:false}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} db.c.update({$expr: {$anyElementTrue:{$map:{ input:"$children", in: {$and:[ {$lt:["$$this.dob","1997-01-22"]}, "$$this.dep" ]} }}}}, {$set:{ audit:true }} )
# M D B l o c a l AGGREGATION FIND QUERY UPDATE QUERY DOCUMENT VALIDATION EXPRESSIONS new in 3.6! {children: [ {name:"Max", dob:"1994-12-01", dep:false}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} db.createCollection("c", validator: {$expr: { <anything you can express> }})
# M D B l o c a l NETWORK SUSPECT ACTIVITY DETECTION
start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T05:28:13Z") }
{$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}}, $sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T08:54:04Z") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T09:01:11Z") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T09:04:59Z") } ]}
$sort$match $group $addFields $match start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } $project { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") } ]} {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}},
$sort$match $group $addFields $match start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } $project { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") } ]} {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}}
$sort$match $group $addFields $match start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } $project {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { "user" : "35237073", "suspectLogins" : [ {"diff": 4.8333333333, "ip1": "106.220.151.16", "t1":"2017-05-08T06:58", "ip2": "223.182.113.15" "t2":"2017-05-08T07:03" }, {"diff": 8.3, "ip1": "223.182.113.15", "t1":"2017-05-08T07:03", "ip2": "49.206.217.26", "t2":"2017-05-08T07:11" } ] }
$sort$match $group $addFields $match $project {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") } ]}
$sort$match $group $addFields $match start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } $addFields $match $proje {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}}, {$addFields:{diffIpNum:{$size:{$setUnion:"$ips.ip"}}}}, {$match:{diffIpNum:{$gt:1}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") } ]}
$sort$match $group $match start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } $addFields $match $project {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{$expr:{$gt:[{$size:"$diffIps"},1]}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") }, diffIps: [ "71.56.112.56", "12.130.117.87" ] ]}
$sort$match $group $match start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } $addFields $match $project {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") }, diffIps: [ "71.56.112.56", "12.130.117.87" ] ]}
$sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} $match $addFields $match $project { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") }, diffIps: [ "71.56.112.56", "12.130.117.87" ] ]}
$sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$cond:{ }}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} $match $addFields $match $project { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") }, diffIps: [ "71.56.112.56", "12.130.117.87" ] ]}
$sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$cond:{ if:{$ne:["$$ip1.ip","$$ip2.ip"]}, then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, else: 9999 }}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} $match $addFields $match $project { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") }, diffIps: [ "71.56.112.56", "12.130.117.87" ] ]}
$sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$cond:{ if:{$ne:["$$ip1.ip","$$ip2.ip"]}, then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, else: 9999 }}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} $match $addFields $match $project { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") }, diffIps: [ "71.56.112.56", "12.130.117.87" ] ]}
$sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$cond:{ if:{$ne:["$$ip1.ip","$$ip2.ip"]}, then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, else: 9999 }}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$lt:["$$this.diff",10]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { "user" : "35237073", "suspectLogins" : [ {"diff": 4.8333333333, "ip1": "106.220.151.16", "t1":"2017-05-08T06:58", "ip2": "223.182.113.15" "t2":"2017-05-08T07:03" }, {"diff": 8.3, "ip1": "223.182.113.15", "t1":"2017-05-08T07:03", "ip2": "49.206.217.26", "t2":"2017-05-08T07:11" } ] } $match $addFields $match $project
$sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$cond:{ if:{$ne:["$$ip1.ip","$$ip2.ip"]}, then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, else: 9999 }}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$lt:["$$this.diff",10]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { "user" : "35237073", "suspectLogins" : [ {"diff": 4.8333333333, "ip1": "106.220.151.16", "t1":"2017-05-08T06:58", "ip2": "223.182.113.15" "t2":"2017-05-08T07:03" }, {"diff": 8.3, "ip1": "223.182.113.15", "t1":"2017-05-08T07:03", "ip2": "49.206.217.26", "t2":"2017-05-08T07:11" } ] } $match $addFields $match $project
$sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$cond:{ if:{$ne:["$$ip1.ip","$$ip2.ip"]}, then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, else: 9999 }}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$lt:["$$this.diff",10]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { "user" : "35237073", "suspectLogins" : [ {"diff": 4.8333333333, "ip1": "106.220.151.16", "t1":"2017-05-08T06:58", "ip2": "223.182.113.15" "t2":"2017-05-08T07:03" }, {"diff": 8.3, "ip1": "223.182.113.15", "t1":"2017-05-08T07:03", "ip2": "49.206.217.26", "t2":"2017-05-08T07:11" } ] } $match $addFields $match $project
$sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$cond:{ if:{$ne:["$$ip1.ip","$$ip2.ip"]}, then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, else: 9999 }}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$lt:["$$this.diff",10]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { "user" : "35237073", "suspectLogins" : [ {"diff": 4.8333333333, "ip1": "106.220.151.16", "t1":"2017-05-08T06:58", "ip2": "223.182.113.15" "t2":"2017-05-08T07:03" }, {"diff": 8.3, "ip1": "223.182.113.15", "t1":"2017-05-08T07:03", "ip2": "49.206.217.26", "t2":"2017-05-08T07:11" } ] } $match $addFields $match $project
# M D B l o c a l understand stages • Best order for performance • Avoid unnecessary "blocking" • keep "streaming" • Maximize use of indexes • early stages get the index! • Liberally check explain() output POWERFUL AGGREGATIONS understand expressions • Schema manipulation • Array transformation • Use in find query filter, ... use functions • Readable, debug-able, reusable
# M D B l o c a l Better performance & optimizations More stages & expressions More options for output Compass helper for aggregate Unify different languages THE FUTURE OF AGGREGATION
https://github.com/asya999/mdbw17 https://github.com/asya999/mdbw17
# M D B l o c a l THANK YOU! https://github.com/asya999/mdbw17

Doing More with MongoDB Aggregation

  • 1.
    # M DB l o c a l ASYA KAMSKY LEAD KNOW-IT-ALL MONGODB, INC @asya999 #askAsya PIPELINE POWER DOING MORE WITH MONGODB AGGREGATION FRAMEWORK
  • 2.
  • 4.
    # M DB l o c a l OPTIONS FOR ANALYTICS pre-aggregate aggregate in MongoDB aggregate elsewhere
  • 5.
    # M DB l o c a l pre-aggregate aggregate in MongoDB aggregate elsewhere OPTIONS FOR ANALYTICS
  • 6.
    # M DB l o c a l pre-aggregate aggregate in MongoDB aggregate elsewhere OPTIONS FOR ANALYTICS
  • 10.
  • 11.
    # M DB l o c a l pre-aggregate aggregate in MongoDB aggregate elsewhere OPTIONS FOR ANALYTICS
  • 12.
    # M DB l o c a l ANALYTICS = AGGREGATION pre-aggregate aggregate in MongoDB aggregate elsewhere
  • 13.
  • 14.
    ps ax |grepmongod |head 1 *nix command line pipe PIPELINE
  • 15.
    $match $group |$sort| Input stream {} {} {} {} Result {} {} ... PIPELINE MongoDB document pipeline
  • 16.
    Stage 1 Stage 2 Stage 3Stage 4 {} {} {} {} {} {} {} {} DATA PIPELINE {} {} {} {} {"$stage":{ ... }} START Collection View Special stage STAGES
  • 17.
    {title: "The GreatGatsby", language: "English", subjects: "Long Island"} {title: "The Great Gatsby", language: "English", subjects: "New York"} {title: "The Great Gatsby", language: "English", subjects: "1920s"} {title: "The Great Gatsby", language: "English", subjects: [ "Long Island", "New York", "1920s"] }, {"$match":{"language":"English"}} $match { _id:"Long Island", count: 1 }, $group { _id: "New York", count: 2 }, $unwind { _id: "1920s", count: 1 }, $sort $skip$limit $project {"$unwind":"$subjects"} {"$group":{"_id":"$subjects", "count":{"$sum:1}} { _id: "Harlem", count: 1 }, { _id: "Long Island", count: 1 }, { _id: "New York", count: 2 }, { _id: "1920s", count: 1 }, {title: "Open City", language: "English", subjects: [ "New York" "Harlem" ] } { title: "The Great Gatsby", language: "English", subjects: [ "Long Island", "New York", "1920s"] }, { title: "War and Peace", language: "Russian", subjects: [ "Russia", "War of 1812", "Napoleon"] }, { title: "Open City", language: "English", subjects: [ "New York", "Harlem" ] }, {title: "Open City", language: "English", subjects: "New York"} {title: "Open City", language: "English", subjects: "Harlem"} { _id: "Harlem", count: 1 }, {"$sort:{"count":-1} {"$limit":3} {"$project":...}
  • 18.
  • 20.
  • 21.
    {title: "The GreatGatsby", language: "English", subjects: "Long Island"} {title: "The Great Gatsby", language: "English", subjects: "New York"} {title: "The Great Gatsby", language: "English", subjects: "1920s"} {title: "The Great Gatsby", language: "English", subjects: [ "Long Island", "New York", "1920s"] }, {"$match":{"language":"English"}} $match { _id:"Long Island", count: 1 }, $group { _id: "New York", count: 2 }, $unwind { _id: "1920s", count: 1 }, $sort $skip$limit $project {"$unwind":"$subjects"} {"$group":{"_id":"$subjects", "count":{"$sum:1}} { _id: "Harlem", count: 1 }, { _id: "Long Island", count: 1 }, { _id: "New York", count: 2 }, { _id: "1920s", count: 1 }, {title: "Open City", language: "English", subjects: [ "New York" "Harlem" ] } { title: "The Great Gatsby", language: "English", subjects: [ "Long Island", "New York", "1920s"] }, { title: "War and Peace", language: "Russian", subjects: [ "Russia", "War of 1812", "Napoleon"] }, { title: "Open City", language: "English", subjects: [ "New York", "Harlem" ] }, {title: "Open City", language: "English", subjects: "New York"} {title: "Open City", language: "English", subjects: "Harlem"} { _id: "Harlem", count: 1 }, {"$sort:{"count":-1} {"$limit":3} {"$project":...}
  • 22.
    # M DB l o c a l LET ME EXPLAIN...
  • 23.
    db.books.aggregate([ {$match:{"language":"English"}}, {$unwind:"$subjects"}, {$group:{_id:"$subjects",count:{$sum:1}}}, {$sort:{count:-1}}, {$limit:3} ],{explain:true}) {"stages" : [ {"$cursor": {"query" : { "language" : "English"}, "fields" : { "subjects" : 1,"_id" : 0} ... }}, {"$unwind" : {"path" : "$subjects"}}, {"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}} }}, {"$sort" : { "sortKey" : {"count" : -1}, "limit" : NumberLong(3) }} ] }
  • 24.
    db.books.aggregate([ {$match:{"language":"English"}}, {$unwind:"$subjects"}, {$group:{_id:"$subjects",count:{$sum:1}}}, {$sort:{count:-1}}, {$limit:3} ],{explain:true}) {"stages" : [ {"$cursor": {"query" : { "language" : "English"}, "fields" : { "subjects" : 1,"_id" : 0} ... }}, {"$unwind" : {"path" : "$subjects"}}, {"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}} }}, {"$sort" : { "sortKey" : {"count" : -1}, "limit" : NumberLong(3) }} ] }
  • 25.
    db.books.aggregate([ {$match:{"language":"English"}}, {$unwind:"$subjects"}, {$group:{_id:"$subjects",count:{$sum:1}}}, {$sort:{count:-1}}, {$limit:3} ],{explain:true}) {"stages" : [ {"$cursor": {"query" : { }, "fields" : { "subjects" : 1,"_id" : 0} ... }}, {"$unwind" : {"path" : "$subjects"}}, {"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}} }}, {"$sort" : { "sortKey" : {"count" : -1}, "limit" : NumberLong(3) }} ] }
  • 26.
  • 27.
    db.books.aggregate([ {$unwind:"$subjects"}, {$match:{"language":"English"}}, {$group:{_id:"$subjects",count:{$sum:1}}}, {$sort:{count:-1}}, {$limit:3} ],{explain:true}) {"stages" : [ {"$cursor": {"query" : { "language" : "English"}, "fields" : { "subjects" : 1,"_id" : 0} ... }}, {"$unwind" : {"path" : "$subjects"}}, {"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}} }}, {"$sort" : { "sortKey" : {"count" : -1}, "limit" : NumberLong(3) }} ] }
  • 28.
    db.books.aggregate([ {$unwind:"$subjects"}, {$match:{"language":"English","subjects":/^[ABC]/}}, {$group:{_id:"$subjects",count:{$sum:1}}}, {$sort:{count:-1}}, {$limit:3} ],{explain:true}) {"stages" : [ {"$cursor": {"query" : { "language" : "English"}, "fields" : { "subjects" : 1,"_id" : 0} ... }}, {"$unwind" : {"path" : "$subjects"}}, {"$match" : {"subjects" : {"$regex" : "^[ABC]"}}}, {"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}}}}, {"$sort" : { "sortKey" : {"count" : -1}, "limit" : NumberLong(3) }} ] }
  • 29.
    db.books.aggregate([ {$unwind:"$subjects"}, {$match:{"language":"English","subjects":/^[ABC]/}}, {$group:{_id:"$subjects",count:{$sum:1}}}, {$sort:{count:-1}}, {$limit:3} ],{explain:true}) {"stages" : [ {"$cursor": {"query" : { "language" : "English"}, "fields" : { "subjects" : 1,"_id" : 0} ... }}, {"$unwind" : {"path" : "$subjects"}}, {"$match" : {"subjects" : {"$regex" : "^[ABC]"}}}, {"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}}}}, {"$sort" : { "sortKey" : {"count" : -1}, "limit" : NumberLong(3) }} ] }
  • 30.
  • 31.
    {title: "The GreatGatsby", language: "English". subjects: "Long Island"} {title: "The Great Gatsby", language: "English", subjects: "New York"} {title: "The Great Gatsby", language: "English", subjects: "1920s"} {title: "The Great Gatsby", language: "English", subjects: [ "Long Island", "New York", "1920s"] }, {"$match":{"language":"English"}} $match { _id:"Long Island", count: 1 }, $group { _id: "New York", count: 2 }, $unwind { _id: "1920s", count: 1 }, $sort $skip$limit $project {"$unwind":"$subjects"} {"$group":{"_id":"$subjects", "count":{"$sum:1}} { _id: "Harlem", count: 1 }, { _id: "Long Island", count: 1 }, { _id: "New York", count: 2 }, { _id: "1920s", count: 1 }, {title: "Open City", language: "English", subjects: [ "New York" "Harlem" ] } { title: "The Great Gatsby", language: "English", subjects: [ "Long Island", "New York", "1920s"] }, { title: "War and Peace", language: "Russian", subjects: [ "Russia", "War of 1812", "Napoleon"] }, { title: "Open City", language: "English", subjects: [ "New York", "Harlem" ] }, {title: "Open City", language: "English", subjects: "New York"} {title: "Open City", language: "English", subjects: "Harlem"} { _id: "Harlem", count: 1 }, {"$sort:{"count":-1} {"$limit":3} {"$project":...} $group $sort 1
  • 32.
  • 33.
  • 34.
  • 35.
  • 36.
  • 37.
  • 38.
  • 39.
    # M DB l o c a l EXPRESSIONS
  • 43.
    # M DB l o c a l ARRAY EXPRESSIONS
  • 44.
    # M DB l o c a l $arrayElemAt $concatArrays $indexOfArray $isArray $size $range $reverseArray ARRAY EXPRESSIONS $map $reduce $filter $slice $zip $in ... plus all the set expressions
  • 45.
    # M DB l o c a l $map $reduce $filter $slice $zip $in ... plus all the set expressions $arrayElemAt $concatArrays $indexOfArray $isArray $size $range $reverseArray ARRAY EXPRESSIONS
  • 46.
    # M DB l o c a l $map input: array output: array $filter input: array output: subset of array $reduce input: array output: anything you want ARRAY EXPRESSIONS
  • 47.
    # M DB l o c a l "arr":[{"a":1},{"a":99},{"a":5},{"a":3}] {"$map": { "input": "$arr", "in": "$$this" }} {"$map":{ "input": "$arr", "as":"eachElem", "in":{"b":"$$eachElem.a"} } } {"$map":{ "input": {"$range":[0,{"$size":"$arr"}]}, "as":"index", "in":{"c":{"$arrayElemAt":["$arr.a","$$index"]}} } } ARRAY EXPRESSIONS "a" 1 "a" 99 "a" 5 "a" 3 "b" 1 "b" 99 "b" 5 "b" 3 "c" 1 "c" 99 "c" 5 "c" 3 "a" 1 "a" 99 "a" 5 "a" 3 "a" 1 "a" 99 "a" 5 "a" 3 0 1 2 3 {"$map": { "input": "$arr", "as": "var", "in": "$$var" }} $map {"$map": { "input": "$arr.a", "as": "eachElem", "in": {"b":"$$eachElem"} }} 1 99 5 3
  • 48.
    # M DB l o c a l "arr":[{"a":1},{"a":99},{"a":5},{"a":3}] {"$filter": { "input": "$arr", "cond":{"$lt":["$$this.a",10]} }} {"$filter":{ "input": "$arr", "as":"elem", "cond":{"$lt":["$$elem.a",10]} } } ARRAY EXPRESSIONS "a" 1 "a" 99 "a" 5 "a" 3 $filter "a" 1 "a" 5 "a" 3
  • 49.
    # M DB l o c a l "arr":[{"a":1},{"a":99},{"a":5},{"a":3}] {"$reduce": { "input": "$arr", "initialValue": 0, "in": {$add:["$$value","$$this.a"]} } } ARRAY EXPRESSIONS "a" 1 "a" 99 "a" 5 "a" 3 $reduce 01100105108 "a" 1 "a" 99 "a" 5 "a" 3
  • 50.
    # M DB l o c a l "arr":[{"a":1},{"a":99},{"a":5},{"a":3}] {"$reduce": { "input": "$arr", "initialValue": 0, "in": {$add:["$$value","$$this.a"]} } } {"$reduce":{ "input": "$arr", "intialValue":[], "in":{"$concatArrays":[ [ "$$this" ], "$$value" ]} } } ARRAY EXPRESSIONS $reduce "a" 1 "a" 99 "a" 5 "a" 3 108 "a" 1 "a" 99 "a" 5 "a" 3
  • 51.
    # M DB l o c a l "arr":[{"a":1},{"a":99},{"a":5},{"a":3}] {"$reduce": { "input": "$arr", "initialValue": 0, "in": {$add:["$$value","$$this.a"]} } } {"$reduce":{ "input": "$arr", "intialValue":[], "in":{"$concatArrays":[ [ "$$this" ], "$$value" ]} } } ARRAY EXPRESSIONS $reduce "a" 1 "a" 99 "a" 5 "a" 3 []"a" 1"a" 99 "a" 1 "a" 5 "a" 99 "a" 1 "a" 3 "a" 5 "a" 99 "a" 1 108 "a" 1 "a" 99 "a" 5 "a" 3
  • 52.
    # M DB l o c a l READABILITY TIP
  • 53.
    # M DB l o c a l Functions for expressions reverseArray = function(input) { return {"$reduce":{ "input": input, "intialValue":[], "in":{"$concatArrays":[ [ "$$this" ], "$$value" ]} }}; }; db.c.aggregate([ {"$addFields":{ "revArray":reverseArray("$origArray") } } ]) ENCAPSULATE COMPLEXITY
  • 54.
    # M DB l o c a l AGGREGATION FIND QUERY UPDATE QUERY DOCUMENT VALIDATION EXPRESSIONS {children: [ {name:"Max", dob:"1994-12-01", dep:true}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]}
  • 55.
    # M DB l o c a l AGGREGATION FIND QUERY UPDATE QUERY DOCUMENT VALIDATION EXPRESSIONS {children: [ {name:"Max", dob:"1994-12-01", dep:false}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} db.c.aggregate([ {$addFields:{ numChildren:{$size:"$children"}, numDependents:{$size:{ $filter:{ input:"$children.dep", cond: "$$this" } }} }}, ... ])
  • 56.
    # M DB l o c a l AGGREGATION FIND QUERY UPDATE QUERY DOCUMENT VALIDATION EXPRESSIONS new in 3.6! {children: [ {name:"Max", dob:"1994-12-01", dep:false}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} db.c.find({$expr:{$gt:["$a","$b"]}})
  • 57.
    # M DB l o c a l AGGREGATION FIND QUERY UPDATE QUERY DOCUMENT VALIDATION EXPRESSIONS new in 3.6! {children: [ {name:"Max", dob:"1994-12-01", dep:false}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} db.c.find({$expr: { $lt:[ {$size:{$filter:{ input:"$children.dep", cond:"$$this" }}}, 2 }})
  • 58.
    # M DB l o c a l AGGREGATION FIND QUERY UPDATE QUERY DOCUMENT VALIDATION EXPRESSIONS new in 3.6! {children: [ {name:"Max", dob:"1994-12-01", dep:false}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} db.c.find({$expr:{$gt:[ {$let:{ vars:{dobs:{$map:{ input:"$children.dob" in: {$year:{$dateFromString:{ dateString:"$$this" }}}}}}, in:{$subtract:[ {$max:"$$dobs"}, {$min:"$$dobs"} ]}}}, 10 ]}})
  • 59.
    # M DB l o c a l AGGREGATION FIND QUERY UPDATE QUERY DOCUMENT VALIDATION EXPRESSIONS new in 3.6! {children: [ {name:"Max", dob:"1994-12-01", dep:false}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} db.c.update({$expr: {$anyElementTrue:{$map:{ input:"$children", in: {$and:[ {$lt:["$$this.dob","1997-01-22"]}, "$$this.dep" ]} }}}}, {$set:{ audit:true }} )
  • 60.
    # M DB l o c a l AGGREGATION FIND QUERY UPDATE QUERY DOCUMENT VALIDATION EXPRESSIONS new in 3.6! {children: [ {name:"Max", dob:"1994-12-01", dep:false}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} db.createCollection("c", validator: {$expr: { <anything you can express> }})
  • 61.
    # M DB l o c a l NETWORK SUSPECT ACTIVITY DETECTION
  • 62.
  • 63.
    {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}}, $sort$match $group start=ISODate("...") end=ISODate("...") { user:"303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T08:54:04Z") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T09:01:11Z") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T09:04:59Z") } ]}
  • 64.
    $sort$match $group $addFields$match start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } $project { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") } ]} {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}},
  • 65.
    $sort$match $group $addFields$match start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } $project { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") } ]} {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}}
  • 66.
    $sort$match $group $addFields$match start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } $project {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { "user" : "35237073", "suspectLogins" : [ {"diff": 4.8333333333, "ip1": "106.220.151.16", "t1":"2017-05-08T06:58", "ip2": "223.182.113.15" "t2":"2017-05-08T07:03" }, {"diff": 8.3, "ip1": "223.182.113.15", "t1":"2017-05-08T07:03", "ip2": "49.206.217.26", "t2":"2017-05-08T07:11" } ] }
  • 67.
    $sort$match $group $addFields$match $project {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") } ]}
  • 68.
    $sort$match $group $addFields$match start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } $addFields $match $proje {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}}, {$addFields:{diffIpNum:{$size:{$setUnion:"$ips.ip"}}}}, {$match:{diffIpNum:{$gt:1}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") } ]}
  • 69.
    $sort$match $group $match start=ISODate("...") end=ISODate("...") { user:"303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } $addFields $match $project {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{$expr:{$gt:[{$size:"$diffIps"},1]}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") }, diffIps: [ "71.56.112.56", "12.130.117.87" ] ]}
  • 70.
    $sort$match $group $match start=ISODate("...") end=ISODate("...") { user:"303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } $addFields $match $project {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") }, diffIps: [ "71.56.112.56", "12.130.117.87" ] ]}
  • 71.
    $sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr:"71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} $match $addFields $match $project { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") }, diffIps: [ "71.56.112.56", "12.130.117.87" ] ]}
  • 72.
    $sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr:"71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$cond:{ }}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} $match $addFields $match $project { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") }, diffIps: [ "71.56.112.56", "12.130.117.87" ] ]}
  • 73.
    $sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr:"71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$cond:{ if:{$ne:["$$ip1.ip","$$ip2.ip"]}, then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, else: 9999 }}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} $match $addFields $match $project { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") }, diffIps: [ "71.56.112.56", "12.130.117.87" ] ]}
  • 74.
    $sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr:"71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$cond:{ if:{$ne:["$$ip1.ip","$$ip2.ip"]}, then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, else: 9999 }}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} $match $addFields $match $project { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") }, diffIps: [ "71.56.112.56", "12.130.117.87" ] ]}
  • 75.
    $sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr:"71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$cond:{ if:{$ne:["$$ip1.ip","$$ip2.ip"]}, then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, else: 9999 }}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$lt:["$$this.diff",10]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { "user" : "35237073", "suspectLogins" : [ {"diff": 4.8333333333, "ip1": "106.220.151.16", "t1":"2017-05-08T06:58", "ip2": "223.182.113.15" "t2":"2017-05-08T07:03" }, {"diff": 8.3, "ip1": "223.182.113.15", "t1":"2017-05-08T07:03", "ip2": "49.206.217.26", "t2":"2017-05-08T07:11" } ] } $match $addFields $match $project
  • 76.
    $sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr:"71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$cond:{ if:{$ne:["$$ip1.ip","$$ip2.ip"]}, then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, else: 9999 }}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$lt:["$$this.diff",10]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { "user" : "35237073", "suspectLogins" : [ {"diff": 4.8333333333, "ip1": "106.220.151.16", "t1":"2017-05-08T06:58", "ip2": "223.182.113.15" "t2":"2017-05-08T07:03" }, {"diff": 8.3, "ip1": "223.182.113.15", "t1":"2017-05-08T07:03", "ip2": "49.206.217.26", "t2":"2017-05-08T07:11" } ] } $match $addFields $match $project
  • 77.
    $sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr:"71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$cond:{ if:{$ne:["$$ip1.ip","$$ip2.ip"]}, then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, else: 9999 }}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$lt:["$$this.diff",10]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { "user" : "35237073", "suspectLogins" : [ {"diff": 4.8333333333, "ip1": "106.220.151.16", "t1":"2017-05-08T06:58", "ip2": "223.182.113.15" "t2":"2017-05-08T07:03" }, {"diff": 8.3, "ip1": "223.182.113.15", "t1":"2017-05-08T07:03", "ip2": "49.206.217.26", "t2":"2017-05-08T07:11" } ] } $match $addFields $match $project
  • 78.
    $sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr:"71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$cond:{ if:{$ne:["$$ip1.ip","$$ip2.ip"]}, then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, else: 9999 }}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$lt:["$$this.diff",10]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { "user" : "35237073", "suspectLogins" : [ {"diff": 4.8333333333, "ip1": "106.220.151.16", "t1":"2017-05-08T06:58", "ip2": "223.182.113.15" "t2":"2017-05-08T07:03" }, {"diff": 8.3, "ip1": "223.182.113.15", "t1":"2017-05-08T07:03", "ip2": "49.206.217.26", "t2":"2017-05-08T07:11" } ] } $match $addFields $match $project
  • 79.
    # M DB l o c a l understand stages • Best order for performance • Avoid unnecessary "blocking" • keep "streaming" • Maximize use of indexes • early stages get the index! • Liberally check explain() output POWERFUL AGGREGATIONS understand expressions • Schema manipulation • Array transformation • Use in find query filter, ... use functions • Readable, debug-able, reusable
  • 80.
    # M DB l o c a l Better performance & optimizations More stages & expressions More options for output Compass helper for aggregate Unify different languages THE FUTURE OF AGGREGATION
  • 81.
  • 82.
    # M DB l o c a l THANK YOU! https://github.com/asya999/mdbw17