Failure on writing large amount of data to excel even though using streams
See original GitHub issueI am using exceljs with Streams but it still fails to export large amount of data due to Heap out of memory issue. My database table contains around 3 million records which I am trying to write into an excel. Any help would be much appreciated. Below is the piece of code and a screenshot of the error.
const QueryHelper = require('../../utils/query-helper');
const stream = require('stream');
const Excel = require('exceljs');
/**
* Provides a repository for `AuditTrail`.
* @memberof features.audit
*/
class AuditTrailRepository {
constructor(context) {
/**
* @type {base.db.DbDriver}
*/
this.connection = context.connection;
/**
* @type {string}
*/
this.tableName = 'AuditTrail';
/**
* @type {ServerResponse}
*/
this.response = context.response;
/**
* Object of the QueryHelper class
* @type {utils.db.QueryHelper}
*/
this.sqlForAuditTrail = new QueryHelper(this.connection.tables.auditTrail);
}
/**
* Export all AuditTrail Data.
*/
async exportAuditTrail() {
const query = this.sqlForAuditTrail.select().build().toParam();
let readableStream = new stream.Readable({
objectMode: true,
read(size) {}
});
// This method just pushes json objects in the readable stream as it receives streamed rows from db
this.connection.runQueryParamStream(query.text, query.values, true, {
readableStream: readableStream,
objectMode: true,
counter: 0
});
let columns = [{
header: 'Id',
key: 'Id'
},
{
header: 'UserId',
key: 'UserId'
},
{
header: 'Date',
key: 'Date'
},
{
header: 'Object',
key: 'Object'
},
{
header: 'ObjectId',
key: 'ObjectId'
},
{
header: 'Action',
key: 'Action'
},
{
header: 'Message',
key: 'Message'
},
{
header: 'Data',
key: 'Data'
},
{
header: 'UserMessage',
key: 'UserMessage'
}
];
this.response.writeHead(200, {
'Content-disposition': 'attachment; filename="AuditTrail.xlsx"'
});
var options = {
stream: this.response,
useStyles: true,
useSharedStrings: true
};
var workbook = new Excel.stream.xlsx.WorkbookWriter(options);
var worksheet = workbook.addWorksheet('AuditTrail');
worksheet.columns = columns;
readableStream.pipe(new ExcelTransform({
writableObjectMode: true,
readableObjectMode: false,
workbook: workbook,
worksheet: worksheet
}));
}
}
class ExcelTransform extends stream.Transform {
constructor(options) {
super(options);
const self = this;
self.workbook = options.workbook;
self.workbook.stream.on('readable', function () {
const chunk = self.workbook.stream.read();
self.push(chunk);
});
self.workbook.stream.on('error', function (error) {
console.log('::::Error:::', error);
});
self.workbook.stream.on('close', function () {
console.log(':::::End::::', ended');
});
self.worksheet = options.worksheet;
}
_transform(data, encoding, callback) {
this.worksheet.addRow(data).commit();
callback();
}
_flush(callback) {
this.workbook.commit();
callback();
}
}
module.exports = AuditTrailRepository;
Issue Analytics
- State:
- Created 5 years ago
- Reactions:2
- Comments:17 (1 by maintainers)
Top Results From Across the Web
OutOfMemoryError when writing large Excel file with Apache ...
I think the error is caused by the XSSFSheet object. In this case, the simplest solution would be to write each row object...
Read more >How to correct a #NUM! error - Microsoft Support
A formula results in a number that's too large or too small to be shown in Excel. To fix this, change the formula...
Read more >How to write a million rows in an Excel file efficiently
1 million rows cannot be added using Method A even with 16 GB heap memory. Thanks for reading! Apache Poi · Large File....
Read more >Working with Big Excel Sheet - Oracle Communities
2. We too need to generate multi sheet output excel (well formatted) either fetching data from DB or parsing huge number of files....
Read more >Using Node.js to Read Really, Really Large Datasets & Files ...
Write a program that loads in this data and creates an array with all ... you an idea of how to handle large...
Read more >Top Related Medium Post
No results found
Top Related StackOverflow Question
No results found
Troubleshoot Live Code
Lightrun enables developers to add logs, metrics and snapshots to live code - no restarts or redeploys required.
Start FreeTop Related Reddit Thread
No results found
Top Related Hackernoon Post
No results found
Top Related Tweet
No results found
Top Related Dev.to Post
No results found
Top Related Hashnode Post
No results found
Top GitHub Comments
We have resolved the issue with large amount of data with this:
Comment in example code is only my assumption but timeout really does the trick. Calling timeout every 10th row for performance reasons. If you want best memory usage, call it on every single row.
My team is also experiencing this issue. In our case we only have around 30,000 rows, but heavy styling that creates lots of overhead.
Commenting to subscribe. If anyone else has ideas for heavy styling overhead, please let me know.