IBM Cloud

IBM Cloud Object Storage Nodejs上传2GB大文件出错

我们可以使用IBM Cloud Object Storage保存文件,IBM Cloud Object Storage提供了一系列的SDK,方便我们操作Storage。比如Java, Nodejs等等。

官方文档:https://cloud.ibm.com/docs/cloud-object-storage/libraries?topic=cloud-object-storage-node

下面是官方给的Nodejs上传大文件的Sample代码:

这个代码实际使用时,对于2GB以上的大文件,会出错。原因是 fs.readFile 这个方法本身不支持2GB以上的文件,因为这个方法会把文件读到内存中,如果文件过大,会造成内存溢出。

function multiPartUpload(bucketName, itemName, filePath) {
    var uploadID = null;

    if (!fs.existsSync(filePath)) {
        log.error(new Error(`The file \'${filePath}\' does not exist or is not accessible.`));
        return;
    }

    console.log(`Starting multi-part upload for ${itemName} to bucket: ${bucketName}`);
    return cos.createMultipartUpload({
        Bucket: bucketName,
        Key: itemName
    }).promise()
    .then((data) => {
        uploadID = data.UploadId;

        //begin the file upload 
        // 注意!!!  fs.readFile方法不支持2GB以上的文件,因为fs.readFile会把文件读取到内存中,当文件过大时,是不允许的。    
        fs.readFile(filePath, (e, fileData) => {
            //min 5MB part
            var partSize = 1024 * 1024 * 5;
            var partCount = Math.ceil(fileData.length / partSize);
    
            async.timesSeries(partCount, (partNum, next) => {
                var start = partNum * partSize;
                var end = Math.min(start + partSize, fileData.length);
    
                partNum++;

                console.log(`Uploading to ${itemName} (part ${partNum} of ${partCount})`);  

                cos.uploadPart({
                    Body: fileData.slice(start, end),
                    Bucket: bucketName,
                    Key: itemName,
                    PartNumber: partNum,
                    UploadId: uploadID
                }).promise()
                .then((data) => {
                    next(e, {ETag: data.ETag, PartNumber: partNum});
                })
                .catch((e) => {
                    cancelMultiPartUpload(bucketName, itemName, uploadID);
                    console.error(`ERROR: ${e.code} - ${e.message}\n`);
                });
            }, (e, dataPacks) => {
                cos.completeMultipartUpload({
                    Bucket: bucketName,
                    Key: itemName,
                    MultipartUpload: {
                        Parts: dataPacks
                    },
                    UploadId: uploadID
                }).promise()
                .then(console.log(`Upload of all ${partCount} parts of ${itemName} successful.`))
                .catch((e) => {
                    cancelMultiPartUpload(bucketName, itemName, uploadID);
                    console.error(`ERROR: ${e.code} - ${e.message}\n`);
                });
            });
        });
    })
    .catch((e) => {
        console.error(`ERROR: ${e.code} - ${e.message}\n`);
    });
}

function cancelMultiPartUpload(bucketName, itemName, uploadID) {
    return cos.abortMultipartUpload({
        Bucket: bucketName,
        Key: itemName,
        UploadId: uploadID
    }).promise()
    .then(() => {
        console.log(`Multi-part upload aborted for ${itemName}`);
    })
    .catch((e)=>{
        console.error(`ERROR: ${e.code} - ${e.message}\n`);
    });
}

下面是修改后的代码,用fs.openSync代替了 fs.readFile

function uploadItem(itemName, filePath) {

    var p = new Promise((resolve, reject) => {

        let uploadID = null;

        if (!fs.existsSync(filePath)) {
            reject(itemName)
            return;
        }

        cos.createMultipartUpload({
            Bucket: environment.bucketName,
            Key: itemName,
        }).promise()
            .then((data) => {

                uploadID = data.UploadId;

                fs.readFile("server.js", (e, fileData) => {

                    // 这里使用fs.statSync来获得文件size
                    var stats = fs.statSync(filePath);

                    // 这里使用fs.openSync来打开大文件
                    var fd = fs.openSync(filePath, 'r');

                    // min 40MB part
                    const partSize = 1024 * 1024 * 40;
                    const partCount = Math.ceil(stats.size / partSize);

                    async.timesSeries(partCount, (partNum, next) => {
                        // 计算每次的游标开始位置
                        const start = partNum * partSize;
                        // 计算每次的游标终了位置
                        const end = Math.min(start + partSize, stats.size);

                        // 这里使用按游标和字节数来读取文件。
                        var bufferRead = Buffer.alloc(end - start);
                        fs.readSync(fd, bufferRead, 0, end - start, start);
                        
                        partNum++;

                        cos.uploadPart({    
                            Body: bufferRead,
                            Bucket: environment.bucketName,
                            Key: itemName,
                            PartNumber: partNum,
                            UploadId: uploadID,
                        }).promise()
                            .then((data) => {
                                next(e, {ETag: data.ETag, PartNumber: partNum});
                            })
                            .catch((e) => {
                                fs.close(fd);
                                reject(itemName)
                            });
                    }, (e, dataPacks) => {
                        // 关闭文件
                        fs.close(fd);
                        // 完成上传
                        cos.completeMultipartUpload({
                            Bucket: environment.bucketName,
                            Key: itemName,
                            MultipartUpload: {
                                Parts: dataPacks,
                            },
                            UploadId: uploadID,
                        }).promise()
                        // 删除文件
                        fs.unlink(filePath, function (err) {
                            if (err) {
                                reject(itemName)
                                return;
                            }
                            resolve(itemName)
                        })
                    });
                });
            })
            .catch((e) => {
                fs.close(fd);
                reject(itemName);
            });
    });
    return p;
}