MongoDB- Insert if it doesn't exist, else skip


Is it possible to insert in Mongo with condition;

//Pseudo code

Bulk Insert Item :

If Key exists
    Skip, don't throw error
If key does not exist
    Add item

If i do single inserts, it might return an error or insert in the collection, but is it possible in bulk ?

You have two real choices here depending on how you want to handle things:

  1. Use upsert functionality of MongoDB to essentially "lookup" if the key data exists. If not then you only pass in data to $setOnInsert and that will not touch anything else.

  2. Use "UnOrdered" operations in Bulk. The whole batch of updates will continue even if an error is returned, but the error report(s) are just that, and anything that is not an error will be comitted.

Whole example:

var async = require('async'),
    mongoose = require('mongoose'),
    Schema = mongoose.Schema;

var testSchema = new Schema({
  "_id": Number,
  "name": String
},{ "_id": false });

var Test = mongoose.model('Test',testSchema,'test');

mongoose.connect('mongodb://localhost/test');

var data = [
  { "_id": 1, "name": "One" },
  { "_id": 1, "name": "Another" },
  { "_id": 2, "name": "Two" }
];

async.series(
  [
    // Start fresh
    function(callback) {
      Test.remove({},callback);
    },

    // Ordered will fail on error. Upserts never fail!
    function(callback) {
      var bulk = Test.collection.initializeOrderedBulkOp();
      data.forEach(function(item) {
        bulk.find({ "_id": item._id }).upsert().updateOne({
          "$setOnInsert": { "name": item.name }
        });
      });
      bulk.execute(callback);
    },

    // All as expected
    function(callback) {
      Test.find().exec(function(err,docs) {
        console.log(docs)
        callback(err);
      });
    },


    // Start again
    function(callback) {
      Test.remove({},callback);
    },

    // Unordered will just continue on error and record an error
    function(callback) {
      var bulk = Test.collection.initializeUnorderedBulkOp();
      data.forEach(function(item) {
        bulk.insert(item);
      });
      bulk.execute(function(err,result) {
        callback(); // so what! Could not care about errors
      });
    },


    // Still processed the whole batch
    function(callback) {
      Test.find().exec(function(err,docs) {
        console.log(docs)
        callback(err);
      });
    }
  ],
  function(err) {
    if (err) throw err;
    mongoose.disconnect();
  }
);

Note that the "changed action" in current drivers is that the result response on .execute() will return an error object to be thrown, where previous releases did not do so with "Un-ordered" operations.

This makes it imperative that your code never relies on the err returned alone, and you should be inpspeting the returned result instead for the full classification of errors.

Nonetheless, when unordered then the batch continues until the end, no matter how many errors occur. Things that are not an error will be committed as normal.

This really comes down to "is sequence important". If so, then you need "Ordered" operations and you can only avoid duplicate keys by using "upserts". Otherwise use "unordered", but be aware of the error returns and what they actually mean.

Also, when using .collection to get the underlying collection object from the base driver to enable "Bulk" operations, then always be sure that either "some" mongoose method has always been called first.

Without that, there is no guaranteed connection to the database with the native driver methods as it is handled for the mongoose methods, so the operation will fail due to no connection.

The alternate to "firing" a mongoose method first, is to wrap your app logic in an event listener for the connection:

mongoose.connection.on("open",function(err) {
    // app logic in here
})

As has already been said, "insert if it doesn't already exist" can be achieved by using the update command with the upsert option set to true. Here's how to do that with the 3.x node.js driver:

let ops = [];
ops.push({ updateOne: { filter: {key:"value1"}, update: {} }, { upsert:true } });
ops.push({ updateOne: { filter: {key:"value2"}, update: { $set:{/*...*/} } }, { upsert:true } });
ops.push({ updateOne: { filter: {key:"value3"}, update: { { $setOnInsert:{/*...*/} } } }, { upsert:true } });
// < add more ops here >
await db.collection("my-collection").bulkWrite(ops, {ordered:false});

If the filter returns zero results, a new document will be created using the filter conditions and the $set updates (if any). If you use $setOnInsert, then the updates are only applied to new docs.

Posting this example because it would have been handy for my situation. More info in the docs for db.collection.bulkWrite.


Use setOnInsert

db.collection('collection').updateOne(
     { _id: data._id },
    { $setOnInsert: { ...data } },
    { upsert: true },
  )