Problem saving audioBuffer to file - whisper api

Hi, how can i convert the audioBuffer into a e.g. wav file? I want to use whisper for spech regognition. this is my code:
const tmpFile = await new Promise((resolve, reject) => {
tmp.file({ postfix: ".ogg" }, (err, path, fd, cleanup) => {
if (err) {
reject(err);
return;
}

fs.writeFile(path, msg.audioBuffer, (err) => {
if (err) {
reject(err);
return;
}

resolve({ path, cleanup });
});
});
});
const tmpFile = await new Promise((resolve, reject) => {
tmp.file({ postfix: ".ogg" }, (err, path, fd, cleanup) => {
if (err) {
reject(err);
return;
}

fs.writeFile(path, msg.audioBuffer, (err) => {
if (err) {
reject(err);
return;
}

resolve({ path, cleanup });
});
});
});
The file is written successfully, but I can't play it (unrecognized file format) I also tried to post audioBuffer directly, but that doesn't work either.
async function resolveSpeechWithWhisper(audioBuffer) {
const transcription = await openai.audio.transcriptions.create({
file: audioBuffer,
model: "whisper-1",
response_format: "verbose_json",
timestamp_granularities: ["word"],
});
}
async function resolveSpeechWithWhisper(audioBuffer) {
const transcription = await openai.audio.transcriptions.create({
file: audioBuffer,
model: "whisper-1",
response_format: "verbose_json",
timestamp_granularities: ["word"],
});
}
error: BadRequestError: 400 Could not parse multipart form at APIError.generate (/home/git/discord-speech-recognition/examples/simpleBot/node_modules/openai/error.js:44:20) at OpenAI.makeStatusError (/home/git/discord-speech-recognition/examples/simpleBot/node_modules/openai/core.js:263:33) at OpenAI.makeRequest (/home/git/discord-speech-recognition/examples/simpleBot/node_modules/openai/core.js:306:30) at process.processTicksAndRejections (node:internal/process/task_queues:95:5) at async Object.resolveSpeechWithWhisper [as speechRecognition] (/home/git/discord-speech-recognition/examples/simpleBot/index.js:20:25) {
"@discordjs/opus": "^0.8.0",
"@discordjs/voice": "^0.11.0",
"discord-speech-recognition": "^3.4.1",
"discord.js": "^14.3.0",
"dotenv": "^16.4.5",
"openai": "^4.29.2",
"tmp": "^0.2.3",
"tweetnacl": "^1.0.3"
"@discordjs/opus": "^0.8.0",
"@discordjs/voice": "^0.11.0",
"discord-speech-recognition": "^3.4.1",
"discord.js": "^14.3.0",
"dotenv": "^16.4.5",
"openai": "^4.29.2",
"tmp": "^0.2.3",
"tweetnacl": "^1.0.3"
Can anyone help me with this? Thanks
5 Replies
d.js toolkit
d.js toolkit3mo ago
- What's your exact discord.js npm list discord.js and node node -v version? - Not a discord.js issue? Check out #other-js-ts. - Consider reading #how-to-get-help to improve your question! - Explain what exactly your issue is. - Post the full error stack trace, not just the top part! - Show your code! - Issue solved? Press the button! - Marked as resolved by OP
jaemil
jaemil3mo ago
node: v20.11.0 im using the SpeechEvents.speech event.
client.on(SpeechEvents.speech, async (msg) => {
// If bot didn't recognize speech, content will be empty
// if (!msg.content) return;

console.log("msg", msg);

console.log("audioBuffer type:", typeof msg.audioBuffer);
console.log("audioBuffer:", msg.audioBuffer);
}
client.on(SpeechEvents.speech, async (msg) => {
// If bot didn't recognize speech, content will be empty
// if (!msg.content) return;

console.log("msg", msg);

console.log("audioBuffer type:", typeof msg.audioBuffer);
console.log("audioBuffer:", msg.audioBuffer);
}
const client = new Client({
intents: [
GatewayIntentBits.GuildVoiceStates,
GatewayIntentBits.GuildMessages,
GatewayIntentBits.Guilds,
GatewayIntentBits.MessageContent,
],
});
addSpeechEvent(client, {
speechRecognition: resolveSpeechWithWhisper,
ignoreBots: true,
});
const client = new Client({
intents: [
GatewayIntentBits.GuildVoiceStates,
GatewayIntentBits.GuildMessages,
GatewayIntentBits.Guilds,
GatewayIntentBits.MessageContent,
],
});
addSpeechEvent(client, {
speechRecognition: resolveSpeechWithWhisper,
ignoreBots: true,
});
jaemil
jaemil3mo ago
GitHub
GitHub - Rei-x/discord-speech-recognition: Speech to text extension...
Speech to text extension for discord.js. Contribute to Rei-x/discord-speech-recognition development by creating an account on GitHub.
jaemil
jaemil3mo ago
const opusStream = receiver.subscribe(userId, {
end: {
behavior: EndBehaviorType.AfterSilence,
duration: 300,
},
});

const bufferData: Uint8Array[] = [];
opusStream
.pipe(new OpusDecodingStream())
.on("data", (data: Uint8Array) => {
bufferData.push(data);
});

opusStream.on("end", async () => {
const voiceMessage = await createVoiceMessage({
client,
bufferData,
user,
connection,
speechOptions,
});

if (voiceMessage) client.emit(SpeechEvents.speech, voiceMessage);
});
const opusStream = receiver.subscribe(userId, {
end: {
behavior: EndBehaviorType.AfterSilence,
duration: 300,
},
});

const bufferData: Uint8Array[] = [];
opusStream
.pipe(new OpusDecodingStream())
.on("data", (data: Uint8Array) => {
bufferData.push(data);
});

opusStream.on("end", async () => {
const voiceMessage = await createVoiceMessage({
client,
bufferData,
user,
connection,
speechOptions,
});

if (voiceMessage) client.emit(SpeechEvents.speech, voiceMessage);
});
this is how audioBuffer is created https://github.com/Rei-x/discord-speech-recognition/blob/main/src/bot/events/speech.ts
GitHub
discord-speech-recognition/src/bot/events/speech.ts at main · Rei-x...
Speech to text extension for discord.js. Contribute to Rei-x/discord-speech-recognition development by creating an account on GitHub.
jaemil
jaemil3mo ago
Thank you so much, it did work. now i only need to double the sample rate from 48000 to 96000. idk why the recorded audio is in slowmo..