Same job.id being processed by multiple cluster workers.
See original GitHub issueWhen using cluster to process jobs, I am seeing this issue intermittently. The same job is processed simultaneously by 2 or 3 different workers.
I have attached an image showing the job log printing out multiple pids and duplicate line items as the job is processed twice.
My code has the following directory structure: /jobs —> sendEmailToGroup.js —> sendEmail.js server.js
server.js
var debug = require('debug')('cluster')
, cluster = require('cluster')
, config = require('./config.js');
//--------------------------------------------------------------------------------------------
// cluster
//--------------------------------------------------------------------------------------------
if (cluster.isMaster) {
var clusterWorkerSize = 4; // set this manually. I am running on a Quadcore Mac mini
console.log("master pid %s", process.pid);
for (var i = 0; i < clusterWorkerSize; i++) {
cluster.fork();
}
}
else {
process._debugPort = 5858 + cluster.worker.id;
debug("worker %s pid %s debugPort %s", cluster.worker && cluster.worker.id, process.pid, process._debugPort);
// I run the server only on worker processes. It is my understanding that these worker processes share nothing, and that I should be able to create a kue server on each worker.
// I am wrapping Kue in express for future feature additions to this system.
//--------------------------------------------------------------------------------------------
// Express
//--------------------------------------------------------------------------------------------
var express = require('express');
var app = express();
app.set('port', config.PORT);
//--------------------------------------------------------------------------------------------
// Kue
//--------------------------------------------------------------------------------------------
var kue = require('kue');
var jobs = kue.createQueue({
prefix: config.KUE_TYPE
, redis: {
port: config.REDIS_PORT || 6379
, host: config.REDIS_HOST || '127.0.0.1'
}
});
// make sure each worker promotes jobs
// if this is my issue...I'm still confused as to how it would breaks things
var promote_interval = 100;
jobs.promote(promote_interval);
jobs.process('sendEmailToGroup', 1, require('./jobs/sendEmailToGroup'));
jobs.process('sendEmail', 20, require('./jobs/sendEmail'));
// simple express middleware
app.use(function(req, res, next){
kue.app.set('title', req.host);
next();
});
//
app.use(kue.app);
app.listen(app.get('port'));
}
sendEmailToGroup.js
var cluser = require('cluster')
, config = require('../config.js')
, dpd = require('../dpd-init.js')(config.DPD_ROOT)
, _ = require('lodash')
, moment = require('moment')
, kue = require('kue');
var jobs = kue.createQueue({
prefix: config.KUE_TYPE
, redis: {
port: config.REDIS_PORT || 6379
, host: config.REDIS_HOST || '127.0.0.1'
}
});
require('../parse-init.js');
exports = module.exports = (function(job, done){
var jobKey = "job_" + job.id;
job.log('start ' + jobKey + ' pid ' + process.pid);
var queryCompletion = 0;
var limit = 100;
var upperLimit = 10001;
var completion = upperLimit + limit;
var skip = 0;
var lastResultCount = limit;
var totalResultCount = 0;
var lastSent = moment().toDate();
if(job.data.lastSent) {
lastSent = moment(job.data.lastSent).toDate();
}
var query = new Parse.Query(User);
query.limit(limit);
query.equalTo('emailGroup', job.data.emailGroup);
query.exists("email");
query.notEqualTo('doNotSendEmail', true);
query.descending("createdAt");
promiseWhile(function() {
if(lastResultCount >= limit && lastResultCount != 0 && skip < upperLimit){
return true;
}
else {
return false;
}
},
function() {
query.lessThan("createdAt", lastSent);
var mainPromise = new Parse.Promise();
mainPromise = query.find().then(function(results){
lastResultCount = results.length;
totalResultCount += lastResultCount;
skip += limit;
if (lastResultCount < limit && upperLimit > totalResultCount){
completion = totalResultCount;
}
job.progress(queryCompletion, completion);
var promises = [];
_.each(results, function(user) {
var promise = new Parse.Promise();
promises.push(promise);
if(user.get('email') !== undefined){
var userId = user.id;
var userCreatedAt = user.createdAt;
lastSent = moment(userCreatedAt).toDate();
var toName = user.get('kFullNameKey') || user.get('username');
var toEmail = user.get('email');
var emailGroup = user.get('emailGroup');
var sendEmail = jobs.create('sendEmail', {
title: '' + job.data.emailType + ': ' + job.data.typeObjectId,
emailType: job.data.emailType,
typeObjectId: job.data.typeObjectId,
template: job.data.template,
userId: userId,
toName: toName,
toEmail: toEmail,
fromName: job.data.fromName,
fromEmail: job.data.fromEmail,
subject: job.data.subject,
emailGroup: emailGroup,
emailScheduleId: job.data.emailScheduleId,
userCreatedAt: userCreatedAt,
user: user
}).attempts(2).save();
promise.resolve();
return promise;
}
});
job.log("Retrieved " + results.length + " Users" + " / " + totalResultCount + " Total");
return Parse.Promise.when(promises);
}, function(error) {
//fail the job on error
job.log("" + error.message);
done(error);
});
return mainPromise;
}).then(function() {
job.log('job done');
done();
}, function(error) {
});
});
Any ideas (other than “don’t use Parse”) would be extremely helpful.
Issue Analytics
- State:
- Created 9 years ago
- Comments:12
Top Results From Across the Web
Running a job on the cluster | South Dakota State University
We see there are a few jobs running, some on just a single node, some on multiple nodes. In this case, all jobs...
Read more >Job processing / Batch processing multiple clusters
Besides this, it is possible to work on the same directories (e.g. clusters) on multiple computers which share the work automatically.
Read more >Create, run, and manage Databricks Jobs
Learn how to create, run, schedule, and manage workflows in the Databricks Jobs UI.
Read more >System-directed cluster picking - Supply Chain Management
Cluster picking is a piece picking process that lets you pick ... item for multiple work orders by visiting the pick location only...
Read more >Cluster | Node.js v19.3.0 Documentation
Clusters of Node.js processes can be used to run multiple instances of Node.js ... Each new worker is given its own unique id,...
Read more >Top Related Medium Post
No results found
Top Related StackOverflow Question
No results found
Troubleshoot Live Code
Lightrun enables developers to add logs, metrics and snapshots to live code - no restarts or redeploys required.
Start FreeTop Related Reddit Thread
No results found
Top Related Hackernoon Post
No results found
Top Related Tweet
No results found
Top Related Dev.to Post
No results found
Top Related Hashnode Post
No results found
Top GitHub Comments
Same problem. Several cluster workers get the same message.
Code (simplified):
Is threre any solution?
@kimballfrank have you found the solution?