83462

Node.js callback with MongoDB update never returns although it updates DB

Question:

I am in the market for a new vehicle. Instead of repeatedly searching the dealerships websites, I thought this would be an interesting and fun opportunity to learn a little node and mongodb so I'm scraping my local dealerships' websites to grab the makes and models that I am interested in.

The problem that I am running into is that node won't terminate after my final callback has run through.

var cheerio = require('cheerio'); var request = require('request'); var db = require('mongodb'); var S = require('string'); var log = require('console').log; var async = require('async'); var links = []; var website = 'http://www.yahoo.com'; async.series( [ function(){ log('starting'); db.connect('mongodb://127.0.0.1:27017/test', function(err, base){ if(err) throw err; db = base; }); }, request(website, start) ], function(){ log('closing DB'); db.close(); }); function start(err,resp,body){ var $ = cheerio.load(body); var numPages = 2; $('.gbps').each(function(i,elem) { links.push('http://www.yahoo.com'); }); var pageURLS = []; for (var i = 2; i<=numPages; i++){ //create URLs for additional pages pageURLS[i-2] = website; } var pages = 1; log('getting page URLs'); pageURLS.forEach(function(url, index, array){ request(url, function(error,response,bodies) { pages++; var $ = cheerio.load(bodies); $('.tab').each(function(i,elem) { links.push('http://www.yahoo.com'); }); if (pages == numPages){ getDetailInfo(); }; }); }); } function getDetailInfo(){ log(links.length); links.forEach(function(link, index, array){ request(link, doStuff); }); } function doStuff(err, response, body){ if(err){ log(err); } parseDetailResponse(err,response,body, addToDB); } function parseDetailResponse(err,resp,body,callback){ log('parsing'); var $ = cheerio.load(body); var specs = $('.specifications').children().map(function(i, elem){ var key = 'key'; var value = 'value'; var ret = {}; ret [ 'name' ] = key; ret [ 'value' ] = value; return ret; }); var makeAndModel = 'makeAndModel'; callback(['picture url', 'vehicle description', 100, specs, makeAndModel]); } function getMakeAndModel(stuff){ var $ = cheerio.load(stuff); temp = $('.gbps').map(function(i, elem){ var ret = {}; switch(i){ case 0: ret['name'] = 'year'; ret['value'] = $(this).text(); break; case 1: ret['name'] = 'make'; ret['value'] = $(this).text(); break; case 2: ret['name'] = 'model'; ret['value'] = $(this).text(); break; case 3: ret['name'] = 'ignore'; ret['value'] = $(this).text(); break; default: ret['name'] = 'ignore'; ret['value'] = 'ignore'; } return ret; }); return temp; } function addToDB(arr){ log('adding to DB'); pic = arr[0]; description = arr[1]; price = arr[2]; specs = arr[3]; makeAndModel = arr[4]; var obj = {}; for (var i = specs.length - 1; i >= 0; i--) { obj [specs[i].name] = specs[i].value; }; for (var i = makeAndModel.length - 1; i >= 0; i--){ obj [makeAndModel[i].name] = makeAndModel[i].value; }; db.collection('carsTest').update( {VIN: obj.VIN}, { $set: { VIN: obj.VIN, make: obj.make, model: obj.model, year: obj.year, price: price, engine: obj.Engine, interior: obj.Interior, exterior: obj.Exterior, 'model code': obj['Model Code'], 'stock number': S(obj['Stock Number']).toInt(), transmission: obj.Transmission, mileage: obj.Mileage ? obj.Mileage : 0, description: description, picture: pic, } }, {upsert: true, safe: true}, function(err,result){ if(err){ throw err; } }); log('finished with this one!'); }

I've omitted and changed a fair amount as a proof here without a lot of error checking or anything but even this will add the document but won't quit. Node just sits there, waiting for something to happen and it never calls the final callback to close the db and exit.

> db.carsTest.find().pretty() { "_id" : ObjectId("52139aa7c9b7a39e0f1eb61d"), "VIN" : null, "description" : "vehicle description", "engine" : null, "exterior" : null, "interior" : null, "make" : null, "mileage" : 0, "model" : null, "model code" : null, "picture" : "picture url", "price" : 100, "stock number" : NaN, "transmission" : null, "year" : null }

Answer1:

I think that you misunderstand how async.series works.

Your functions in async.series don't take callback as an argument and they don't call it. And that request(...) stuff is probably not a function at all. That's probably why it breaks async loop. Try this:

async.series( [ function(callback) { // <--- missing callback log('starting'); db.connect('mongodb://127.0.0.1:27017/test', function(err, base){ if(err) throw err; db = base; callback(); // <--- missing callback }); }, function(callback) { // <--- missing function with callback request(website, function(err,resp,body) { start(err, resp, body, callback); }) } ], function(){ log('closing DB'); db.close(); } );

Note that I've added callback argument when calling start. Thus you will have to refactor your code havily so that every function accepts callback which can be called at the end when you know that all jobs are done. For example you can add async.parallel inside start and this function may look like this:

function start(err, resp, body, callback) { // some stuff happens here var jobs = [] pageURLS.forEach(function(url, index, array){ jobs.push(function(clb) { request(url, function(error,response,bodies) { // some stuff clb(); // <--- this refers to the local callback for the job }); }); }); async.parallel(jobs, function() { // all jobs are done, let's finilize everything callback(); }); };

Recommend

  • I'm moving from C to C++ and I don't get this creation of a class
  • How to get a TypeSafe URL to a static resource in Yesod
  • retrieve instagram images- get access denied message
  • Android displaying images dynamically
  • Converting urls to links in a string using preg_replace()
  • Grunt cssmin / CleanCSS source map rebasing
  • vue-router and Express
  • How can I do a 301 redirect from http to https in Wildfly 8.2?
  • htaccess regex directory to variable
  • Django how to modify database records by template
  • Can I put a + sign in a folder with IIS?
  • Open hyperlink on click on an ggplot/plotly chart
  • Symfony2 redirect to https route fails (uses wrong port)
  • how to pass class attribute and value to markdown syntax
  • Receive list of elements in their visual order
  • How can I sync the html/session used between Django's 'Client' and Selenium's we
  • Is there any way to call saveCurrentTurnWithMatchData without sending a push notification?
  • WordPress > setting permalink option via script buggy?
  • Angular Bootstrap Carousel Slide Transition not working correctly
  • Extract All Possible Paths from Expression-Tree and evaluate them to hold TRUE
  • XSLT foreach repeating nodes to flat
  • Do I need to seed any random number generator before using EVP_PKEY_keygen of OpenSSL?
  • JqueryMobile Popup menu is not working
  • List images(01.png) and descriptions(01.txt) from directory
  • Silverlight DependencyProperty.SetCurrentValue Equivalent
  • D3 nodes and links from JSON with nested arrays of children
  • Repeat a vertical line on every page in Report Builder / SSRS
  • htaccess rewriting URLs with multiple forward slashes
  • Display Images one by one with next and previous functionality
  • Web-crawler for facebook in python
  • Comma separated Values
  • A cron job substitute?
  • Error creating VM instance in Google Compute Engine
  • Hits per day in Google Big Query
  • how does django model after text[] in postgresql [duplicate]
  • costura.fody for a dll that references another dll
  • Observable and ngFor in Angular 2
  • UserPrincipal.Current returns apppool on IIS
  • java string with new operator and a literal
  • How to push additional view controllers onto NavigationController but keep the TabBar?