Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-1904

map-reduce produces different results for an indexed query

    • Type: Icon: Bug Bug
    • Resolution: Done
    • Priority: Icon: Major - P3 Major - P3
    • 1.7.3
    • Affects Version/s: 1.6.2
    • Component/s: Index Maintenance
    • None
    • Environment:
      Ubuntu 10.0.4 on EC2
    • Minor Change
    • ALL

      I've been investigating some unusual numbers in my map-reduce results
      and made an interesting discovery. If the map reduce query is on an
      indexed array, and more than one value in the array matches the query,
      the document is mapped more than once. A simple example is below:
      // make sure the collection is empty
      > db.example.drop()
      true
      > db.example.save(

      { arr : [1, 2] }

      )

      // just aggregate by _id
      > map = function()

      { emit(this._id, 1) }

      function () {
      emit(this._id, 1);
      }

      // count the values
      > reduce = function(k,vals)

      { ... var sum=0; ... for(var i in vals) sum += vals[i]; ... return sum; ... }


      function (k, vals) {
      var sum = 0;
      for (var i in vals)

      { sum += vals[i]; }


      return sum;
      }

      // the first M/R finds the document once & produces the correct count
      > res = db.example.mapReduce(map,reduce, { query : {} })

      {
      "result" : "tmp.mr.mapreduce_1286336126_70",
      "timeMillis" : 16,
      "counts" :

      { "input" : 1, "emit" : 1, "output" : 1 }

      ,
      "ok" : 1,
      }
      > db[res.result].find()

      { "_id" : ObjectId("4cabee4ac0f7095167a5ab62"), "value" : 1 }

      // the second query matches the array without an index, and still
      produces expected results
      > res = db.example.mapReduce(map,reduce, { query : { arr: {$gte:0} } })

      {
      "result" : "tmp.mr.mapreduce_1286336141_71",
      "timeMillis" : 12,
      "counts" :

      { "input" : 1, "emit" : 1, "output" : 1 }

      ,
      "ok" : 1,
      }
      > db[res.result].find()

      { "_id" : ObjectId("4cabee4ac0f7095167a5ab62"), "value" : 1 }

      // now index on the array and run the exact same M/R - note that it
      now has 2 inputs & 2 emits, and the count has doubled
      > db.example.ensureIndex(

      {arr:1}

      )
      > res = db.example.mapReduce(map,reduce, { query : { arr: {$gte:0} } })

      {
      "result" : "tmp.mr.mapreduce_1286336171_72",
      "timeMillis" : 15,
      "counts" :

      { "input" : 2, "emit" : 2, "output" : 1 }

      ,
      "ok" : 1,
      }
      > db[res.result].find()

      { "_id" : ObjectId("4cabee4ac0f7095167a5ab62"), "value" : 2 }

      This seems bad - is this expected behavior?

            Assignee:
            eliot Eliot Horowitz (Inactive)
            Reporter:
            iragsdale Ian Ragsdale
            Votes:
            1 Vote for this issue
            Watchers:
            2 Start watching this issue

              Created:
              Updated:
              Resolved: