我正在构建一个网页,用于记录用户设备中的音频,并将其发送到 Microsoft 的认知语音服务以进行语音到文本的转换。到目前为止,我已经能够创建和播放用 JavaScript 制作的 .ogg 文件,但我需要获取 .wav 格式的文件。
斑点类型audio/wav
不能依赖,因为并非所有浏览器都支持它(至少我的浏览器不支持)。 Blob 被发送到 Django 服务器并由其存储。当我尝试使用 PySoundFile 打开这些文件时,出现错误:File contains data in an unknown format
。这些斑点是用new Blob(chunks, { type: 'audio/ogg; codecs=opus' })
并保存使用django.db.FileField https://docs.djangoproject.com/en/2.2/ref/models/fields/#django.db.models.FileField。 Blob 块来自MediaRecorder.ondataavailable
.
更新:
我放弃使用媒体记录器 https://developer.mozilla.org/en-US/docs/Web/API/MediaRecorder并选择了脚本处理器节点 https://developer.mozilla.org/en-US/docs/Web/API/ScriptProcessorNode反而。同样,Firefox 可以工作,但 Chrome 不能。 Chrome 似乎在音频末尾获取了一小部分,并在音频长度内重复了这一部分。这是我使用的代码,它基于 Matt Diamond 的工作github.com/mattdiamond/Recorderjs https://github.com/mattdiamond/Recorderjs。使用他的作品的演示可以在webaudiodemos.appspot.com/AudioRecorder/index.html https://webaudiodemos.appspot.com/AudioRecorder/index.html,它对我来说适用于 Firefox 和 Chrome。另外,我原来的代码在一个类中,但我不想包含整个类。如果我在翻译中犯了任何语法错误,我深表歉意。
let recBuffers = [[], []];
let recLength = 0;
let numChannels = 2;
let listening = false;
let timeout = null;
let constraints = {
audio: true
};
let failedToGetUserMedia = false;
if (navigator.getUserMedia) {
navigator.getUserMedia(constraints, (stream) => {
init(stream);
}, (err) => {
alert('Unable to access audio.\n\n' + err);
console.log('The following error occurred: ' + err);
failedToGetUserMedia = true;
});
}
else if (navigator.mediaDevices.getUserMedia) {
navigator.mediaDevices.getUserMedia(constraints).then((stream) => {
init(stream);
}).catch((err) => {
alert('Unable to access audio.\n\n' + err);
console.log('The following error occurred: ' + err);
failedToGetUserMedia = true;
});
}
else failedToGetUserMedia = true;
function beginRecording() {
recBuffers = [[], []];
recLength = 0;
listening = true;
timeout = setTimeout(() => {
endRecording();
}, maxTime);
}
function endRecording() {
clearTimeout(timeout);
timeout = null;
exportWAV();
}
function init(stream) {
let audioContext = new AudioContext();
let source = audioContext.createMediaStreamSource(stream);
let context = source.context;
let node = (context.createScriptProcessor || context.createJavaScriptNode).call(context, 4096, numChannels, numChannels);
node.onaudioprocess = (e) => {
if (!listening) return;
for (var i = 0; i < numChannels; i++) {
recBuffers[i].push(e.inputBuffer.getChannelData(i));
}
recLength += recBuffers[0][0].length;
}
source.connect(node);
node.connect(context.destination);
}
function mergeBuffers(buffers, len) {
let result = new Float32Array(len);
let offset = 0;
for (var i = 0; i < buffers.length; i++) {
result.set(buffers[i], offset);
offset += buffers[i].length;
}
return result;
}
function interleave(inputL, inputR) {
let len = inputL.length + inputR.length;
let result = new Float32Array(len);
let index = 0;
let inputIndex = 0;
while (index < len) {
result[index++] = inputL[inputIndex];
result[index++] = inputR[inputIndex];
inputIndex++;
}
return result;
}
function exportWAV() {
let buffers = [];
for (var i = 0; i < numChannels; i++) {
buffers.push(mergeBuffers(recBuffers[i], recLength));
}
let interleaved = numChannels == 2 ? interleave(buffers[0], buffers[1]) : buffers[0];
let dataView = encodeWAV(interleaved);
let blob = new Blob([ dataView ], { type: 'audio/wav' });
blob.name = Math.floor((new Date()).getTime() / 1000) + '.wav';
listening = false;
return blob;
}
function floatTo16BitPCM(output, offset, input){
for (var i = 0; i < input.length; i++, offset+=2){
var s = Math.max(-1, Math.min(1, input[i]));
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
}
function writeString(view, offset, string){
for (var i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
}
function encodeWAV(samples){
var buffer = new ArrayBuffer(44 + samples.length * 2);
var view = new DataView(buffer);
/* RIFF identifier */
writeString(view, 0, 'RIFF');
/* file length */
view.setUint32(4, 36 + samples.length * 2, true);
/* RIFF type */
writeString(view, 8, 'WAVE');
/* format chunk identifier */
writeString(view, 12, 'fmt ');
/* format chunk length */
view.setUint32(16, 16, true);
/* sample format (raw) */
view.setUint16(20, 1, true);
/* channel count */
view.setUint16(22, numChannels, true);
/* sample rate */
view.setUint32(24, context.sampleRate, true);
/* byte rate (sample rate * block align) */
view.setUint32(28, context.sampleRate * 4, true);
/* block align (channel count * bytes per sample) */
view.setUint16(32, numChannels * 2, true);
/* bits per sample */
view.setUint16(34, 16, true);
/* data chunk identifier */
writeString(view, 36, 'data');
/* data chunk length */
view.setUint32(40, samples.length * 2, true);
floatTo16BitPCM(view, 44, samples);
return view;
}
if (!failedToGetUserMedia) beginRecording();
更新:
我已经确认,当 Chrome 缓冲区的值作为 Firefox 上交错的输入提供时,输出与 Chrome 的输出相同。这表明 Chrome 没有用正确的值填充 recBuffers。事实上,当我在 Chrome 上查看 recBuffers 时,每个通道都充满了交替列表。例如:
recBuffers = [[
[2, 3],
[7, 1],
[2, 3],
[7, 1],
[2, 3],
[7, 1],
[2, 3],
[7, 1],
[2, 3],
[7, 1]
], [
[5, 4],
[6, 8],
[5, 4],
[6, 8],
[5, 4],
[6, 8],
[5, 4],
[6, 8],
[5, 4],
[6, 8]
]]
当然,实际值是不同的。这只是一个例子来说明这一点。