How to make a double grouping in MongoDB? -


for first time familiar mongodb, question arose when grouping data.
given data 2 days:

db.test.insert({     "_id" : objectid("13edebb315d8952400407343"),     "create_at" : isodate("2012-12-19t12:00:00.000z"),     "item" : {         "tags" : [              "aaaa"         ],         "event" : "accepted",     } });  db.test.insert({     "_id" : objectid("13edebb39e60c73800b35727"),     "create_at" : isodate("2012-12-19t12:05:00.000z"),     "item" : {         "tags" : [              "aaaa"         ],         "event" : "delivered"     } });  db.test.insert({     "_id" : objectid("13edebb315d8952400407344"),     "create_at" : isodate("2012-12-19t13:40:00.000z"),     "item" : {         "tags" : [              "bbbb"         ],         "event" : "accepted",     } });  db.test.insert({     "_id" : objectid("13edebb39e60c73800b35728"),     "create_at" : isodate("2012-12-19t13:45:00.000z"),     "item" : {         "tags" : [              "bbbb"         ],         "event" : "delivered"     } });  db.test.insert({     "_id" : objectid("13edebb315d8952400407345"),     "create_at" : isodate("2012-12-20t16:30:00.000z"),     "item" : {         "tags" : [],         "event" : "accepted",     } });  db.test.insert({     "_id" : objectid("13edebb39e60c73800b35729"),     "create_at" : isodate("2012-12-20t16:35:00.000z"),     "item" : {         "tags" : [],         "event" : "delivered"     } }); 


the output need result:

{   "total_count": 6   "items": [     {       "total_count": 2,       "created_at": "wed, 19 dec 2012 00:00:00 gmt",       "tags": {         "aaaa": 1,         "bbbb": 1       },       "event": "sent"     },     {       "total_count": 2,       "created_at": "wed, 19 dec 2012 00:00:00 gmt",       "tags": {         "aaaa": 1,         "bbbb": 1       },       "event": "delivered"     },     {       "total_count": 1,       "created_at": "wed, 20 dec 2012 00:00:00 gmt",       "tags": {},       "event": "sent"     },     {       "total_count": 1,       "created_at": "wed, 20 dec 2012 00:00:00 gmt",       "tags": {},       "event": "delivered"     } } 


still managed form part of necessary data, request:

db.test.aggregate([    {$group:          {                _id:{event:'$item.event', doy:{$dayofyear:'$create_at'} },      total_count:{$sum:1},      created_at:{$first: '$create_at'},      tags: {$addtoset: '$item.tags'}    },   },   {$project:{total_count:1,  _id:0, event:'$_id.event', created_at:1, tags:1}} ]) 

but how necessary information tags array of tags , number? date indicate beginning of day 00:00:00?

wdberkeley made analysis @ question. add mine below:

  1. elements(such "aaaa") in tags key in final sub-document. i'm not aware aggregation pipeline can this.
  2. output requires form of tags:{tag:count, ...}, $unwind operator used if in pipeline operation style.output requires documents empty tags still need save, $unwind operator ignore these documents.
  3. js code prohibited in pipeline operation, pipeline operation can't implement date format(such "created_at" : "wed, 20 dec 2012 00:00:00 gmt").

conclusion: use mapreduce instead of aggregation pipeline. following code passed in mongo shell.

function map() {     var date = this.create_at;     var datestr = date.getfullyear() + "-" + (date.getmonth() + 1) + "-"             + date.getdate();     var tags = {};     var tagstemp = this.item.tags;     if (tagstemp != null) {         (var x = 0; x < tagstemp.length; x++) {             var tag = tagstemp[x];             var count = tags[tag];             count = (count == null) ? 1 : (count + 1);             tags[tag] = count;         }     }     emit({         event : this.item.event,         datestr : datestr     }, {         total_count: 1,         tags : tags     }); }  function reduce(key, values) {     var tags = {};     var total_count = 0;     values.foreach(function(value) {         ( var tag in value.tags) {             var count = tags[tag];             if (count == null)                 count = 0;             tags[tag] = count + value.tags[tag];         }         total_count += value.total_count;     });     return {         total_count: total_count,         tags : tags     }; }   function finalhandle(key, reducevalue) {     reducevalue.create_at = new date(key.datestr).toutcstring();     reducevalue.event = key.event;     return reducevalue; }  var mr = db.test.mapreduce(map, reduce, {finalize: finalhandle, out:{inline:1}});  var total = 0; var items = [];  mr.results.foreach(function(x) {     items.push(x.value);     total += x.value.total_count; });  printjson({total_count: total, items: items}); 

Comments

Popular posts from this blog

javascript - Jquery show_hide, what to add in order to make the page scroll to the bottom of the hidden field once button is clicked -

python - Django-cities exits with "killed" -

python - How to get a widget position inside it's layout in Kivy? -