getting started with mongodb and nodejs
TRANSCRIPT
Getting Started With MongoDB and Node.JS
Jay RunkelPrincipal Solutions [email protected]@jayrunkel
3
GridFS
Driv
erGridFS APIdoc.jpg(meta data)
doc.jpg(1)doc.jpg
(1)doc.jpg(1)
fs.files fs.chunksdoc.jpg
6
Goal Today
• Help you get started with MongoDB and Node.JS
• Assumption:– New to both technologies– Programming Experience– Database Experience
• Learn from my newbie confusion
7
Agenda
1. Why Node.JS and MongoDB?
2. Find and Insert
3. Node.JS async, event queue, flow control
4. Controlling multiple threads– Bulk insert
11
Documents are Rich Data Structures{ first_name: ‘Paul’, surname: ‘Miller’, cell: ‘+447557505611’ city: ‘London’, location: [45.123,47.232], Profession: [banking, finance, trader], cars: [ { model: ‘Bentley’, year: 1973, value: 100000, … }, { model: ‘Rolls Royce’, year: 1965, value: 330000, … } ]}
Fields can contain an array of sub-documents
Fields
Typed field values
Fields can contain arrays
String
Number
Geo-Coordinates
Do More With Your Data
MongoDB
{ first_name: ‘Paul’, surname: ‘Miller’, city: ‘London’, location: [45.123,47.232], cars: [ { model: ‘Bentley’, year: 1973, value: 100000, … }, { model: ‘Rolls Royce’, year: 1965, value: 330000, … } }}
Rich QueriesFind Paul’s cars
Find everybody in London with a car built between 1970 and 1980
Geospatial Find all of the car owners within 5km of Trafalgar Sq.
Text Search Find all the cars described as having leather seats
Aggregation Calculate the average value of Paul’s car collection
Map ReduceWhat is the ownership pattern of colors by geography over time?(is purple trending up in China?)
14
Sample Data
{"_id" : "20160101000000:UA7549","icao" : "UA7549","callsign" : "UA7549","ts" : ISODate("2016-01-01T05:00:00Z"),"events" : {
"a" : 1773,"b" : 258,"p" : [50,-60],"s" : 345,"t" : ISODate("2016-01-01T05:00:00Z"),"v" : 299
}}
15
Example: Find Query
• Find a flight status entry for United Airlines Flight 1234– db.data.findOne({"callsign" : "UA7549"})
• Find a aircraft flying at less than 5000 feet– db.data.findOne({"events.a" : {$lt : 5000}})
• Set value of note field– db.data.update({"callsign" : "OY1949"},
{$set : {"note" : "spoke with captain"}})
17
The synchronous way
var MongoClient = require('mongodb').MongoClient;
var db = MongoClient.connect('mongodb://localhost:27017/adsb');var col = db.collection('data');var doc = col.findOne({"callsign" : "UA7549"});
console.log("Here is my doc: %j", doc);db.close();
18
The synchronous way
var MongoClient = require('mongodb').MongoClient;
var db = MongoClient.connect('mongodb://localhost:27017/adsb');var col = db.collection('data');var doc = col.findOne({"callsign" : "UA7549"});
console.log("Here is my doc: %j", doc);db.close();
19
It works this way in the mongoshell???
var col = db.getCollection("data");var doc = col.findOne({"callsign" : "HR9368"});printjson(doc);
22
Asynchronous Programming
time
Func1
Func1 IO
Func1Func2
Func2 IO
Func2
IO Processing runs on separate threads in parallel with main processing thread
23
Callbacks
col.findOne({"callsign" : "UA7549"}, function (err, doc) {assert.equal(null, err);console.log("Here is my doc: %j", doc);
console.log(”All done!”);
• Execute findOne
• When it is done, call the callback function
• Callback function takes two arguments– err – contains the error message or null– doc – the result of the findOne call
• “All Done” will be printed before the “Here is my doc…”
25
Event queue
From: http://www.slideshare.net/takingblue/talk-nodejs-andisomorphicjavascript
MongoDB
26
Event Queue/Call Stackfunction find1234 () { col.findOne({_id: 1234}, fo1_cb);
find1234();col.updateMany({color: “blue”}, {$set : {w : 5}}, up1_cb);col.insert({product: 1234, cost: 99}, ins1_cb);
console.log(“done”);
Call Stack Driver API
CallbackQueue
Event Loop
27
Event Queue/Call Stackfunction find1234 () { col.findOne({_id: 1234}, fo1_cb);
find1234();col.updateMany({color: “blue”}, {$set : {w : 5}}, up1_cb);col.insert({product: 1234, cost: 99}, ins1_cb);
console.log(“done”);
Call Stack Driver API
CallbackQueue
Event Loop
find1234()
28
Event Queue/Call Stackfunction find1234 () { col.findOne({_id: 1234}, fo1_cb);
find1234();col.updateMany({color: “blue”}, {$set : {w : 5}}, up1_cb);col.insert({product: 1234, cost: 99}, ins1_cb);
console.log(“done”);
Call Stack Driver API
CallbackQueue
Event Loop
find1234()
col.findOne()
col.findOne()
29
Event Queue/Call Stackfunction find1234 () { col.findOne({_id: 1234}, fo1_cb);
find1234();col.updateMany({color: “blue”}, {$set : {w : 5}}, up1_cb);col.insert({product: 1234, cost: 99}, ins1_cb);
console.log(“done”);
Call Stack Driver API
CallbackQueue
Event Loop
col.updateMany()
col.findOne()
col.updateMany()
30
Event Queue/Call Stackfunction find1234 () { col.findOne({_id: 1234}, fo1_cb);
find1234();col.updateMany({color: “blue”}, {$set : {w : 5}}, up1_cb);col.insert({product: 1234, cost: 99}, ins1_cb);
console.log(“done”);
Call Stack Driver API
CallbackQueue
Event Loop
col.insert()
col.insert()
col.updateMany()
fo1_cb
31
Event Queue/Call Stackfunction find1234 () { col.findOne({_id: 1234}, fo1_cb);
find1234();col.updateMany({color: “blue”}, {$set : {w : 5}}, up1_cb);col.insert({product: 1234, cost: 99}, ins1_cb);
console.log(“done”);
Call Stack Driver API
CallbackQueue
Event Loop
console.log()
col.updateMany()
fo1_cb ins1_cb
32
Event Queue/Call Stackfunction find1234 () { col.findOne({_id: 1234}, fo1_cb);
find1234();col.updateMany({color: “blue”}, {$set : {w : 5}}, up1_cb);col.insert({product: 1234, cost: 99}, ins1_cb);
console.log(“done”);
function fo1_cb () {…}
Call Stack Driver API
CallbackQueue
Event Loop
fo1_cb()
col.updateMany()
ins1_cb
33
Event Queue/Call StackCall Stack Driver API
CallbackQueue
Event Loop
ins1_cb()
up1_cb
function find1234 () { col.findOne({_id: 1234}, fo1_cb);
find1234();col.updateMany({color: “blue”}, {$set : {w : 5}}, up1_cb);col.insert({product: 1234, cost: 99}, ins1_cb);
console.log(“done”);
function ins1_cb () {…}
34
Event Queue/Call StackCall Stack Driver API
CallbackQueue
Event Loop
up1_cb()
function find1234 () { col.findOne({_id: 1234}, fo1_cb);
find1234();col.updateMany({color: “blue”}, {$set : {w : 5}}, up1_cb);col.insert({product: 1234, cost: 99}, ins1_cb);
console.log(“done”);
function up1_cb () {…}
36
MongoDB Asynchronous Queries
var MongoClient = require('mongodb').MongoClient, assert = require('assert');
MongoClient.connect('mongodb://localhost:27017/adsb', function (err, db) { assert.equal(null, err); var col = db.collection('data');
col.findOne({"callsign" : "UA7549"}, function (err, doc) { assert.equal(null, err);
console.log("Here is my doc: %j", doc);
db.close(); });});
37
Asynchronously
var MongoClient = require('mongodb').MongoClient, assert = require('assert');
MongoClient.connect('mongodb://localhost:27017/adsb', function (err, db) { assert.equal(null, err); var col = db.collection('data');
col.findOne({"callsign" : "UA7549"}, function (err, doc) { assert.equal(null, err);
console.log("Here is my doc: %j", doc);
db.close(); });});
callback
38
Asynchronously
var MongoClient = require('mongodb').MongoClient, assert = require('assert');
MongoClient.connect('mongodb://localhost:27017/adsb', function (err, db) { assert.equal(null, err); var col = db.collection('data');
col.findOne({"callsign" : "UA7549"}, function (err, doc) { assert.equal(null, err);
console.log("Here is my doc: %j", doc);
db.close(); });});
callback
39
This gets ugly fast
var MongoClient = require('mongodb').MongoClient, assert = require('assert');
var db = MongoClient.connect('mongodb://localhost:27017/adsb', function (err, db) { assert.equal(null, err); var col = db.collection('data');
col.findOne({"callsign" : "UA7549"}, function (err, doc) { assert.equal(null, err);
console.log("Here is my doc: %j", doc);
col.updateOne({"callsign" : "UA7549"}, {$set : {"note" : "Spoke with the pilot"}}, {}, function(err, result) {
assert.equal(null, err);
console.log("Note updated"); db.close();});
});});
40
Nested callbacks - yuck
async1(function(input, result1) { async2(function(result2) { async3(function(result3) { async4(function(result4) { async5(function(output) { // do something with output }); }); }); });})
42
With Flow Control - StepStep ( function connectToMongoDB (err, db) {
MongoClient.connect('mongodb:// localhost:27017/adsb', this); }, function findOneDoc(err, db) {
if (err) console.log("Connect: %j", err);
database = db; collection = db.collection('data');
collection.findOne(csQuery, {}, this); }, function updateDoc(err, doc) {
if (err) console.log("Find One: %j", err);
console.log("Here is my doc: %j", doc);
collection.updateOne(csQuery, {$inc : {"timesViewed" : 1}},{},this); }, function findOneDoc2 (err, result) {
if (err) console.log("Update error: %j", err);
collection.findOne(csQuery, {}, this); }, function closeConnection(err, doc) {
if (err) console.log("FindOne Doc: %j", err);
console.log("Note updated: %j", doc);
database.close(); });
43
With Flow ControlStep ( function connectToMongoDB (err, db) {
MongoClient.connect('mongodb:// localhost:27017/adsb', this); }, function findOneDoc(err, db) {
if (err) console.log("Connect: %j", err);
database = db; collection = db.collection('data');
collection.findOne(csQuery, {}, this); }, function updateDoc(err, doc) {
if (err) console.log("Find One: %j", err);
console.log("Here is my doc: %j", doc);
collection.updateOne(csQuery, {$inc : {"timesViewed" : 1}},{},this); }, function findOneDoc2 (err, result) {
if (err) console.log("Update error: %j", err);
collection.findOne(csQuery, {}, this); }, function closeConnection(err, doc) {
if (err) console.log("FindOne Doc: %j", err);
console.log("Note updated: %j", doc);
database.close(); }
44
With Flow ControlStep ( function connectToMongoDB (err, db) {
MongoClient.connect('mongodb:// localhost:27017/adsb', this); }, function findOneDoc(err, db) {
if (err) console.log("Connect: %j", err);
database = db; collection = db.collection('data');
collection.findOne(csQuery, {}, this); }, function updateDoc(err, doc) {
if (err) console.log("Find One: %j", err);
console.log("Here is my doc: %j", doc);
collection.updateOne(csQuery, {$inc : {"timesViewed" : 1}},{},this); }, function findOneDoc2 (err, result) {
if (err) console.log("Update error: %j", err);
collection.findOne(csQuery, {}, this); }, function closeConnection(err, doc) {
if (err) console.log("FindOne Doc: %j", err);
console.log("Note updated: %j", doc);
database.close(); }
45
You can also execute in parallel
Step ( function connectToMongoDB (err, db) {
MongoClient.connect('mongodb://localhost:27017/adsb', this); }, function executeParallel (err, db) {
if (err) console.log("Connect: %j", err); var collection = db.collection('data'); database = db;
collection.findOne(csQuery, {}, this.parallel()); collection.updateOne(csQuery, {$inc : {"timesViewed" : 1}}, {}, this.parallel()); collection.findOne(csQuery, {}, this.parallel());
}, function closeConnection(err, doc1, upResult, doc2) {
if (err) console.log("Error: %j", err); console.log("Here is doc1: %j", doc1); console.log("Incremented: %j", upResult); console.log("Here is doc2: %j", doc2); database.close();
});
47
Find Many - Cursor
• This works in the MongoShell
var col = db.getCollection("data");var cursor = col.find({"events.a" : {$gt : 5000}});
while (cursor.hasNext()) { printjson(cursor.next());}
48
Find Many - Cursor
• This works in the MongoShell
var col = db.getCollection("data");var cursor = col.find({"events.a" : {$gt : 5000}});
while (cursor.hasNext()) { printjson(cursor.next());}
• It does not work in Node.JS
• The MongoDB driver retrieves documents in batches from MongoDB– Retrieving a new batch is asynchronous
49
Find Many - Streams
MongoClient.connect("mongodb://localhost:27017/adsb", function (err, db) { var col = db.collection('data') var stream = col.find({"events.a" : {$gt : 5000}}).stream();
stream.on('data', function(doc) { console.log("Doc: %j", doc);
});
stream.on('error', function (doc) { console.log("Query failed: %j", doc);
});
stream.on('end', function() { console.log("All data retrieved.");
db.close(); });});
50
Find Many - Streams
MongoClient.connect("mongodb://localhost:27017/adsb", function (err, db) { var col = db.collection('data') var stream = col.find({"events.a" : {$gt : 5000}}).stream();
stream.on('data', function(doc) { console.log("Doc: %j", doc);
});
stream.on('error', function (doc) { console.log("Query failed: %j", doc);
});
stream.on('end', function() { console.log("All data retrieved.");
db.close(); });});
‘data’ callback invoked for each document
51
What about insert?
MongoClient.connect('mongodb://localhost:27017/adsb', function (err, db) { assert.equal(null, err); var col = db.collection('data');
col.insert({x: 1, y: 2, z: 3}, {}, function (err, result) {
assert.equal(null, err); console.log("Insert Complete"); db.close();
});});
52
What if I have to insert 100M documents?
MongoClient.connect('mongodb://localhost:27017/adsb', function (err, db) { assert.equal(null, err); var col = db.collection('data');
for (i = 1; i <= 100000000; i++) { col.insert({x: i, y: 2, z: 3},
{}, function (err, result) { assert.equal(null, err); console.log("Insert Complete"); });
}});
Let’s insert all 100,000,000in parallel!!!!
53
What if I have to insert 100M documents?
MongoClient.connect('mongodb://localhost:27017/adsb', function (err, db) { assert.equal(null, err); var col = db.collection('data');
for (i = 1; i <= 100000000; i++) { col.insert({x: i, y: 2, z: 3},
{}, function (err, result) { assert.equal(null, err); console.log("Insert Complete"); db.close(); });
}});
FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - JavaScript heap out of memory
54
Event Queue/Call Stackfor (i = 1; i <= 100000000; i++) { col.insert({x: i, y: 2, z: 3},
{}, function (err,
result) { assert.equal(null,
err);
console.log("Insert C…"); });
}
Call Stack Driver API
CallbackQueue
Event Loop
55
Event Queue/Call Stackfor (i = 1; i <= 100000000; i++) { col.insert({x: i, y: 2, z: 3},
{}, function (err,
result) { assert.equal(null,
err);
console.log("Insert C…"); db.close();
});
Call Stack Driver API
CallbackQueue
Event Loop
col.insert()
col.insert()
col.insert()
col.insert()
col.insert()
col.insert()
col.insert()
col.insert()
col.insert()
56
Event Queue/Call Stackfor (i = 1; i <= 100000000; i++) { col.insert({x: i, y: 2, z: 3},
{}, function (err,
result) { assert.equal(null,
err);
console.log("Insert C…"); db.close();
});
Call Stack Driver API
CallbackQueue
Event Loop
col.insert()
col.insert()
col.insert()
col.insert()
col.insert()
col.insert()
col.insert()
col.insert()
col.insert()
FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - JavaScript heap out of memory
57
Let’s try 5 at a time
var DOCS_TO_INSERT = 1000;var numInsertsRunning = 0; // number of insert threadsvar insertCount = 0; // number of documents inserted so far
MongoClient.connect('mongodb://localhost:27017/adsb', function (err, db) { assert.equal(null, err); var col = db.collection('data');
for (i = 0; i < 5; i++) { ++numInsertsRunning; insertDocument(db, col, ++insertCount, i, function (err, result) { console.log("All ", DOCS_TO_INSERT, " documents inserted."); db.close(); });
}});
58
insertDocument function
function insertDocument (db, col, docNum, threadNum, callback) { col.insert({x: ++insertCount, y: 2, z: 3},
{}, function (err, result) {
assert.equal(null, err);
console.log("Thread: ", threadNum, " inserted doc: ", docNum, "."); if (insertCount < DOCS_TO_INSERT) insertDocument(db, col, ++insertCount, threadNum, callback); else if (numInsertsRunning == 1) { numInsertsRunning--; callback(null, true); } else numInsertsRunning--;
});}
59
InsertDocument Callback Logiccol.insert({x: ++insertCount, y: 2, z: 3}, function (err, result) {
}
Have all the documents been inserted?
Call insertDocument again
Are other inserts still running?
Do nothing & decrement running thread count
All inserts doneCall the original callback
Yes Yes
No No
60
Bulk Inserts
• The previous example was for illustrative purposes
• MongoDB provides a buik write API that provides better performance for bulk writes
• The bulk write API batches up writes– Batch writes with a single acknowledgement
• Use collection.bulkWrite
• Improve performance using multiple bulkWrite threads– Previous example will be identical– Replace collection.insert with collection.bulkWrite
61
Another word of caution
• All my examples established a MongoDB connection and then closed it– This was for illustrative purposes
• Don’t continuously open and close MongoDB connections.
• Open a connection once once– Use that connection through the life of the program– Close it at the end
62
Summary• Asynchronous vs synchronous programming
• Call stack, event loop, driver API
• Flow control
• Find, insert, update examples
• Managing multiple parallel threads– bulk insert example
• Learn from my mistakes and misconceptions
@jayrunkel
Github: https://github.com/jayrunkel/nodeJSWebJun2016