How to make a double grouping in MongoDB? -
for first time familiar mongodb, question arose when grouping data.
given data 2 days:
db.test.insert({ "_id" : objectid("13edebb315d8952400407343"), "create_at" : isodate("2012-12-19t12:00:00.000z"), "item" : { "tags" : [ "aaaa" ], "event" : "accepted", } }); db.test.insert({ "_id" : objectid("13edebb39e60c73800b35727"), "create_at" : isodate("2012-12-19t12:05:00.000z"), "item" : { "tags" : [ "aaaa" ], "event" : "delivered" } }); db.test.insert({ "_id" : objectid("13edebb315d8952400407344"), "create_at" : isodate("2012-12-19t13:40:00.000z"), "item" : { "tags" : [ "bbbb" ], "event" : "accepted", } }); db.test.insert({ "_id" : objectid("13edebb39e60c73800b35728"), "create_at" : isodate("2012-12-19t13:45:00.000z"), "item" : { "tags" : [ "bbbb" ], "event" : "delivered" } }); db.test.insert({ "_id" : objectid("13edebb315d8952400407345"), "create_at" : isodate("2012-12-20t16:30:00.000z"), "item" : { "tags" : [], "event" : "accepted", } }); db.test.insert({ "_id" : objectid("13edebb39e60c73800b35729"), "create_at" : isodate("2012-12-20t16:35:00.000z"), "item" : { "tags" : [], "event" : "delivered" } });
the output need result:
{ "total_count": 6 "items": [ { "total_count": 2, "created_at": "wed, 19 dec 2012 00:00:00 gmt", "tags": { "aaaa": 1, "bbbb": 1 }, "event": "sent" }, { "total_count": 2, "created_at": "wed, 19 dec 2012 00:00:00 gmt", "tags": { "aaaa": 1, "bbbb": 1 }, "event": "delivered" }, { "total_count": 1, "created_at": "wed, 20 dec 2012 00:00:00 gmt", "tags": {}, "event": "sent" }, { "total_count": 1, "created_at": "wed, 20 dec 2012 00:00:00 gmt", "tags": {}, "event": "delivered" } }
still managed form part of necessary data, request:
db.test.aggregate([ {$group: { _id:{event:'$item.event', doy:{$dayofyear:'$create_at'} }, total_count:{$sum:1}, created_at:{$first: '$create_at'}, tags: {$addtoset: '$item.tags'} }, }, {$project:{total_count:1, _id:0, event:'$_id.event', created_at:1, tags:1}} ])
but how necessary information tags array of tags , number? date indicate beginning of day 00:00:00?
wdberkeley
made analysis @ question. add mine below:
- elements(such "aaaa") in tags key in final sub-document. i'm not aware aggregation pipeline can this.
- output requires form of
tags:{tag:count, ...}
, $unwind operator used if in pipeline operation style.output requires documents empty tags still need save, $unwind operator ignore these documents. - js code prohibited in pipeline operation, pipeline operation can't implement date format(such
"created_at" : "wed, 20 dec 2012 00:00:00 gmt"
).
conclusion: use mapreduce instead of aggregation pipeline. following code passed in mongo shell.
function map() { var date = this.create_at; var datestr = date.getfullyear() + "-" + (date.getmonth() + 1) + "-" + date.getdate(); var tags = {}; var tagstemp = this.item.tags; if (tagstemp != null) { (var x = 0; x < tagstemp.length; x++) { var tag = tagstemp[x]; var count = tags[tag]; count = (count == null) ? 1 : (count + 1); tags[tag] = count; } } emit({ event : this.item.event, datestr : datestr }, { total_count: 1, tags : tags }); } function reduce(key, values) { var tags = {}; var total_count = 0; values.foreach(function(value) { ( var tag in value.tags) { var count = tags[tag]; if (count == null) count = 0; tags[tag] = count + value.tags[tag]; } total_count += value.total_count; }); return { total_count: total_count, tags : tags }; } function finalhandle(key, reducevalue) { reducevalue.create_at = new date(key.datestr).toutcstring(); reducevalue.event = key.event; return reducevalue; } var mr = db.test.mapreduce(map, reduce, {finalize: finalhandle, out:{inline:1}}); var total = 0; var items = []; mr.results.foreach(function(x) { items.push(x.value); total += x.value.total_count; }); printjson({total_count: total, items: items});
Comments
Post a Comment