You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

189 lines
5.8 KiB

import { Response } from 'express';
import { AuthRequest } from '../types/index.js';
import { db } from '../config/database.js';
import { generateSpeech } from '../services/elevenlabs.service.js';
/**
* Get pronunciation audio for a word
* Checks cache first, then calls ElevenLabs API if not cached
*/
export async function pronounceWord(req: AuthRequest, res: Response) {
try {
const wordId = parseInt(req.params.wordId);
if (!wordId || isNaN(wordId)) {
return res.status(400).json({
success: false,
error: {
code: 'INVALID_WORD_ID',
message: 'Invalid word ID'
}
});
}
// Get word text from database
const wordResult = await db.execute({
sql: 'SELECT word FROM words WHERE id = ?',
args: [wordId]
});
if (!wordResult.rows.length) {
return res.status(404).json({
success: false,
error: {
code: 'WORD_NOT_FOUND',
message: 'Word not found'
}
});
}
const wordText = wordResult.rows[0].word as string;
const voiceId = '1FSm04EkRXraU6SyzoLr'; // Can be made configurable later
// Check cache first
const cacheResult = await db.execute({
sql: 'SELECT audio_data, audio_format FROM word_pronunciations WHERE word_id = ? AND voice_id = ?',
args: [wordId, voiceId]
});
if (cacheResult.rows.length > 0) {
// Return cached audio
const audioDataValue = cacheResult.rows[0].audio_data;
const audioFormat = cacheResult.rows[0].audio_format as string;
// Convert database BLOB to Buffer
let audioBuffer: Buffer;
if (audioDataValue instanceof Uint8Array) {
audioBuffer = Buffer.from(audioDataValue);
} else if (audioDataValue instanceof ArrayBuffer) {
audioBuffer = Buffer.from(audioDataValue);
} else {
// Fallback: convert to string then to buffer
audioBuffer = Buffer.from(audioDataValue as any);
}
const contentType = audioFormat === 'mp3' ? 'audio/mpeg' :
audioFormat === 'wav' ? 'audio/wav' :
audioFormat === 'ogg' ? 'audio/ogg' : 'audio/mpeg';
res.setHeader('Content-Type', contentType);
res.setHeader('Content-Length', audioBuffer.length);
res.setHeader('Cache-Control', 'public, max-age=31536000'); // Cache for 1 year
res.send(audioBuffer);
return;
}
// Not cached - generate speech using ElevenLabs
try {
const { audio, format } = await generateSpeech(wordText, voiceId);
// Store in cache (don't await - cache in background to return faster)
db.execute({
sql: `
INSERT INTO word_pronunciations (word_id, voice_id, audio_data, audio_format)
VALUES (?, ?, ?, ?)
ON CONFLICT(word_id, voice_id) DO UPDATE SET
audio_data = excluded.audio_data,
audio_format = excluded.audio_format,
created_at = CURRENT_TIMESTAMP
`,
args: [wordId, voiceId, audio, format]
}).catch(err => {
console.error('Error caching pronunciation:', err);
// Don't fail the request if caching fails
});
// Return audio immediately
const contentType = format === 'mp3' ? 'audio/mpeg' :
format === 'wav' ? 'audio/wav' :
format === 'ogg' ? 'audio/ogg' : 'audio/mpeg';
res.setHeader('Content-Type', contentType);
res.setHeader('Content-Length', audio.length);
res.setHeader('Cache-Control', 'public, max-age=31536000'); // Cache for 1 year
res.send(audio);
} catch (error: any) {
console.error('Error generating speech:', error);
// If ElevenLabs fails and API key is not configured, return helpful error
if (error.message.includes('not configured')) {
return res.status(503).json({
success: false,
error: {
code: 'ELEVENLABS_NOT_CONFIGURED',
message: 'Text-to-speech is not configured. Please set ELEVENLABS_API_KEY environment variable.'
}
});
}
// Handle timeout errors
if (error.message.includes('timed out')) {
return res.status(504).json({
success: false,
error: {
code: 'SPEECH_GENERATION_TIMEOUT',
message: 'Speech generation timed out. Please try again.'
}
});
}
return res.status(500).json({
success: false,
error: {
code: 'SPEECH_GENERATION_ERROR',
message: error.message || 'Failed to generate speech pronunciation'
}
});
}
} catch (error: any) {
console.error('Pronounce word error:', error);
res.status(500).json({
success: false,
error: {
code: 'PRONOUNCE_WORD_ERROR',
message: 'Error generating word pronunciation'
}
});
}
}
/**
* Clear all cached pronunciations (admin only)
* Forces regeneration with current model/settings
*/
export async function clearPronunciationsCache(req: AuthRequest, res: Response) {
try {
if (!req.userId) {
return res.status(401).json({
success: false,
error: {
code: 'UNAUTHORIZED',
message: 'Authentication required'
}
});
}
// Delete all cached pronunciations
const result = await db.execute({
sql: 'DELETE FROM word_pronunciations',
args: []
});
res.json({
success: true,
data: {
message: 'Pronunciation cache cleared successfully',
deletedCount: result.rowsAffected || 0
}
});
} catch (error: any) {
console.error('Clear pronunciations cache error:', error);
res.status(500).json({
success: false,
error: {
code: 'CLEAR_CACHE_ERROR',
message: 'Error clearing pronunciation cache'
}
});
}
}