在 Node.js 中我必须读取文件夹中的文件,并为每个文件获取文件处理程序信息,这是我使用的最简单的实现fs.readdir
:
FileServer.prototype.listLocal = function (params) {
var self = this;
var options = {
limit: 100,
desc: 1
};
// override defaults
for (var attrname in params) { options[attrname] = params[attrname]; }
// media path is the media folder
var mediaDir = path.join(self._options.mediaDir, path.sep);
return new Promise((resolve, reject) => {
fs.readdir(mediaDir, (error, results) => {
if (error) {
self.logger.error("FileServer.list error:%s", error);
return reject(error);
} else { // list files
// cut to max files
results = results.slice(0, options.limit);
// filter default ext
results = results.filter(item => {
return (item.indexOf('.mp3') > -1);
});
// format meta data
results = results.map(file => {
var filePath = path.join(self._options.mediaDir, path.sep, file);
var item = {
name: file,
path: filePath
};
const fd = fs.openSync(filePath, 'r');
var fstat = fs.fstatSync(fd);
// file size in bytes
item.size = fstat.size;
item.sizehr = self.formatSizeUnits(fstat.size);
// "Birth Time" Time of file creation. Set once when the file is created.
item.birthtime = fstat.birthtime;
// "Modified Time" Time when file data last modified.
item.mtime = fstat.mtime;
// "Access Time" Time when file data last accessed.
item.atime = fstat.atime;
item.timestamp = new Date(item.mtime).getTime();
item.media_id = path.basename(filePath, '.mp3');
fs.closeSync(fd);//close file
return item;
});
if (options.desc) { // sort by most recent
results.sort(function (a, b) {
return b.timestamp - a.timestamp;
});
} else { // sort by older
results.sort(function (a, b) {
return a.timestamp - b.timestamp;
});
}
return resolve(results);
}
})
});
}
这样对于每个文件我都会得到一个项目数组
{
"name": "sample121.mp3",
"path": "/data/sample121.mp3",
"size": 5751405,
"sizehr": "5.4850 MB",
"birthtime": "2018-10-08T15:26:08.397Z",
"mtime": "2018-10-08T15:26:11.650Z",
"atime": "2018-10-10T09:01:48.534Z",
"timestamp": 1539012371650,
"media_id": "sample121"
}
也就是说,问题是 Node.js 已知fs.readdir
当要列出的文件夹包含大量文件(例如从一万到十万甚至更多)时,可能会冻结节点 I/O 循环。
这是一个已知问题 - 请参阅here https://github.com/tagspaces/tagspaces/issues/738了解更多信息。
也有改善计划fs.readdir
在某种程度上,比如流媒体 - 请参阅here https://github.com/nodejs/node/issues/583对这个。
与此同时,我正在寻找类似的补丁,因为我的文件夹非常大。
由于问题是事件循环被冻结,有人提出了一个解决方案process.nextTick
,我在这里合奏了
FileServer.prototype.listLocalNextTick = function (params) {
var self = this;
var options = {
limit: 100,
desc: 1
};
// override defaults
for (var attrname in params) { options[attrname] = params[attrname]; }
// media path is the media folder
var mediaDir = path.join(self._options.mediaDir, path.sep);
return new Promise((resolve, reject) => {
var AsyncArrayProcessor = function (inArray, inEntryProcessingFunction) {
var elemNum = 0;
var arrLen = inArray.length;
var ArrayIterator = function () {
inEntryProcessingFunction(inArray[elemNum]);
elemNum++;
if (elemNum < arrLen) process.nextTick(ArrayIterator);
}
if (elemNum < arrLen) process.nextTick(ArrayIterator);
}
fs.readdir(mediaDir, function (error, results) {
if (error) {
self.logger.error("FileServer.list error:%s", error);
return reject(error);
}
// cut to max files
results = results.slice(0, options.limit);
// filter default ext
results = results.filter(item => {
return (item.indexOf('.mp3') > -1);
});
var ProcessDirectoryEntry = function (file) {
// This may be as complex as you may fit in a single event loop
var filePath = path.join(self._options.mediaDir, path.sep, file);
var item = {
name: file,
path: filePath
};
const fd = fs.openSync(filePath, 'r');
var fstat = fs.fstatSync(fd);
// file size in bytes
item.size = fstat.size;
item.sizehr = self.formatSizeUnits(fstat.size);
// "Birth Time" Time of file creation. Set once when the file is created.
item.birthtime = fstat.birthtime;
// "Modified Time" Time when file data last modified.
item.mtime = fstat.mtime;
// "Access Time" Time when file data last accessed.
item.atime = fstat.atime;
item.timestamp = new Date(item.mtime).getTime();
item.media_id = path.basename(filePath, '.mp3');
// map to file item
file = item;
}//ProcessDirectoryEntry
// LP: fs.readdir() callback is finished, event loop continues...
AsyncArrayProcessor(results, ProcessDirectoryEntry);
if (options.desc) { // sort by most recent
results.sort(function (a, b) {
return b.timestamp - a.timestamp;
});
} else { // sort by older
results.sort(function (a, b) {
return a.timestamp - b.timestamp;
});
}
return resolve(results);
});
});
}//listLocalNextTick
这似乎避免了原来的问题,但我无法再将文件列表映射到我之前使用文件处理程序执行的项目,因为在运行时AsyncArrayProcessor
在文件列表上,因此ProcessDirectoryEntry
在每个文件条目上的异步性质process.nextTick
导致我无法取回results
数组修改如上listLocal
我刚刚进行迭代的函数array.map
of the results
大批。
如何打补丁listLocalNextTick
表现得像listLocal
但保留process.nextTick
方法?
[UPDATE]
根据建议的解决方案,这是迄今为止最好的实现:
/**
* Scan files in directory
* @param {String} needle
* @param {object} options
* @returns {nodeStream}
*/
scanDirStream : function(needle,params) {
var options = {
type: 'f',
name: '*'
};
for (var attrname in params) { options[attrname] = params[attrname]; }
return new Promise((resolve, reject) => {
var opt=[needle];
for (var k in options) {
var v = options[k];
if (!Util.empty(v)) {
opt.push('-' + k);
opt.push(v);
}
};
var data='';
var listing = spawn('find',opt)
listing.stdout.on('data', _data => {
var buff=Buffer.from(_data, 'utf-8').toString();
if(buff!='') data+=buff;
})
listing.stderr.on('data', error => {
return reject(Buffer.from(error, 'utf-8').toString());
});
listing.on('close', (code) => {
var res = data.split('\n');
return resolve(res);
});
});
使用示例:
scanDirStream(mediaRoot,{
name: '*.mp3'
})
.then(results => {
console.info("files:%d", results);
})
.catch(error => {
console.error("error %s", error);
});
最终可以修改为在每次添加一个刻度回调stdout.on
在监听目录中获取新文件时发出事件。