9 changed files with 409 additions and 14 deletions
@ -0,0 +1,3 @@
@@ -0,0 +1,3 @@
|
||||
-- Clear all cached pronunciations |
||||
-- This will force regeneration with new model/settings on next play |
||||
DELETE FROM word_pronunciations; |
||||
@ -0,0 +1,174 @@
@@ -0,0 +1,174 @@
|
||||
import { Response } from 'express'; |
||||
import { AuthRequest } from '../types/index.js'; |
||||
import { db } from '../config/database.js'; |
||||
import { generateSpeech } from '../services/elevenlabs.service.js'; |
||||
|
||||
/** |
||||
* Get pronunciation audio for a word |
||||
* Checks cache first, then calls ElevenLabs API if not cached |
||||
*/ |
||||
export async function pronounceWord(req: AuthRequest, res: Response) { |
||||
try { |
||||
const wordId = parseInt(req.params.wordId); |
||||
|
||||
if (!wordId || isNaN(wordId)) { |
||||
return res.status(400).json({ |
||||
success: false, |
||||
error: { |
||||
code: 'INVALID_WORD_ID', |
||||
message: 'Invalid word ID' |
||||
} |
||||
}); |
||||
} |
||||
|
||||
// Get word text from database
|
||||
const wordResult = await db.execute({ |
||||
sql: 'SELECT word FROM words WHERE id = ?', |
||||
args: [wordId] |
||||
}); |
||||
|
||||
if (!wordResult.rows.length) { |
||||
return res.status(404).json({ |
||||
success: false, |
||||
error: { |
||||
code: 'WORD_NOT_FOUND', |
||||
message: 'Word not found' |
||||
} |
||||
}); |
||||
} |
||||
|
||||
const wordText = wordResult.rows[0].word as string; |
||||
const voiceId = '1FSm04EkRXraU6SyzoLr'; // Can be made configurable later
|
||||
|
||||
// Check cache first
|
||||
const cacheResult = await db.execute({ |
||||
sql: 'SELECT audio_data, audio_format FROM word_pronunciations WHERE word_id = ? AND voice_id = ?', |
||||
args: [wordId, voiceId] |
||||
}); |
||||
|
||||
if (cacheResult.rows.length > 0) { |
||||
// Return cached audio
|
||||
const audioDataValue = cacheResult.rows[0].audio_data; |
||||
const audioFormat = cacheResult.rows[0].audio_format as string; |
||||
|
||||
// Convert database BLOB to Buffer
|
||||
let audioBuffer: Buffer; |
||||
if (audioDataValue instanceof Uint8Array) { |
||||
audioBuffer = Buffer.from(audioDataValue); |
||||
} else if (audioDataValue instanceof ArrayBuffer) { |
||||
audioBuffer = Buffer.from(audioDataValue); |
||||
} else { |
||||
// Fallback: convert to string then to buffer
|
||||
audioBuffer = Buffer.from(audioDataValue as any); |
||||
} |
||||
|
||||
const contentType = audioFormat === 'mp3' ? 'audio/mpeg' :
|
||||
audioFormat === 'wav' ? 'audio/wav' :
|
||||
audioFormat === 'ogg' ? 'audio/ogg' : 'audio/mpeg'; |
||||
|
||||
res.setHeader('Content-Type', contentType); |
||||
res.setHeader('Content-Length', audioBuffer.length); |
||||
res.setHeader('Cache-Control', 'public, max-age=31536000'); // Cache for 1 year
|
||||
res.send(audioBuffer); |
||||
return; |
||||
} |
||||
|
||||
// Not cached - generate speech using ElevenLabs
|
||||
try { |
||||
const { audio, format } = await generateSpeech(wordText, voiceId); |
||||
|
||||
// Store in cache
|
||||
await db.execute({ |
||||
sql: ` |
||||
INSERT INTO word_pronunciations (word_id, voice_id, audio_data, audio_format) |
||||
VALUES (?, ?, ?, ?) |
||||
ON CONFLICT(word_id, voice_id) DO UPDATE SET |
||||
audio_data = excluded.audio_data, |
||||
audio_format = excluded.audio_format, |
||||
created_at = CURRENT_TIMESTAMP |
||||
`,
|
||||
args: [wordId, voiceId, audio, format] |
||||
}); |
||||
|
||||
// Return audio
|
||||
const contentType = format === 'mp3' ? 'audio/mpeg' :
|
||||
format === 'wav' ? 'audio/wav' :
|
||||
format === 'ogg' ? 'audio/ogg' : 'audio/mpeg'; |
||||
|
||||
res.setHeader('Content-Type', contentType); |
||||
res.setHeader('Content-Length', audio.length); |
||||
res.setHeader('Cache-Control', 'public, max-age=31536000'); // Cache for 1 year
|
||||
res.send(audio); |
||||
} catch (error: any) { |
||||
console.error('Error generating speech:', error); |
||||
|
||||
// If ElevenLabs fails and API key is not configured, return helpful error
|
||||
if (error.message.includes('not configured')) { |
||||
return res.status(503).json({ |
||||
success: false, |
||||
error: { |
||||
code: 'ELEVENLABS_NOT_CONFIGURED', |
||||
message: 'Text-to-speech is not configured. Please set ELEVENLABS_API_KEY environment variable.' |
||||
} |
||||
}); |
||||
} |
||||
|
||||
return res.status(500).json({ |
||||
success: false, |
||||
error: { |
||||
code: 'SPEECH_GENERATION_ERROR', |
||||
message: 'Failed to generate speech pronunciation' |
||||
} |
||||
}); |
||||
} |
||||
} catch (error: any) { |
||||
console.error('Pronounce word error:', error); |
||||
res.status(500).json({ |
||||
success: false, |
||||
error: { |
||||
code: 'PRONOUNCE_WORD_ERROR', |
||||
message: 'Error generating word pronunciation' |
||||
} |
||||
}); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Clear all cached pronunciations (admin only) |
||||
* Forces regeneration with current model/settings |
||||
*/ |
||||
export async function clearPronunciationsCache(req: AuthRequest, res: Response) { |
||||
try { |
||||
if (!req.userId) { |
||||
return res.status(401).json({ |
||||
success: false, |
||||
error: { |
||||
code: 'UNAUTHORIZED', |
||||
message: 'Authentication required' |
||||
} |
||||
}); |
||||
} |
||||
|
||||
// Delete all cached pronunciations
|
||||
const result = await db.execute({ |
||||
sql: 'DELETE FROM word_pronunciations' |
||||
}); |
||||
|
||||
res.json({ |
||||
success: true, |
||||
data: { |
||||
message: 'Pronunciation cache cleared successfully', |
||||
deletedCount: result.rowsAffected || 0 |
||||
} |
||||
}); |
||||
} catch (error: any) { |
||||
console.error('Clear pronunciations cache error:', error); |
||||
res.status(500).json({ |
||||
success: false, |
||||
error: { |
||||
code: 'CLEAR_CACHE_ERROR', |
||||
message: 'Error clearing pronunciation cache' |
||||
} |
||||
}); |
||||
} |
||||
} |
||||
@ -0,0 +1,14 @@
@@ -0,0 +1,14 @@
|
||||
import { Router } from 'express'; |
||||
import { pronounceWord, clearPronunciationsCache } from '../controllers/speechSounds.controller.js'; |
||||
import { authMiddleware } from '../middleware/auth.js'; |
||||
import { adminMiddleware } from '../middleware/admin.js'; |
||||
|
||||
const router = Router(); |
||||
|
||||
// Public route - no authentication required for pronunciation
|
||||
router.get('/pronounce/:wordId', pronounceWord); |
||||
|
||||
// Admin route - clear pronunciation cache
|
||||
router.delete('/cache', authMiddleware, adminMiddleware, clearPronunciationsCache); |
||||
|
||||
export default router; |
||||
@ -0,0 +1,74 @@
@@ -0,0 +1,74 @@
|
||||
import { env } from '../config/env.js'; |
||||
|
||||
const ELEVENLABS_API_URL = 'https://api.elevenlabs.io/v1'; |
||||
const DEFAULT_VOICE_ID = '21m00Tcm4TlvDq8ikWAM'; // Rachel - a friendly, clear voice
|
||||
|
||||
export interface ElevenLabsResponse { |
||||
audio: Buffer; |
||||
format: string; |
||||
} |
||||
|
||||
/** |
||||
* Generate speech audio using ElevenLabs API |
||||
* @param text The text to convert to speech |
||||
* @param voiceId Optional voice ID (defaults to Rachel) |
||||
* @returns Audio buffer and format |
||||
*/ |
||||
export async function generateSpeech( |
||||
text: string, |
||||
voiceId: string = DEFAULT_VOICE_ID |
||||
): Promise<ElevenLabsResponse> { |
||||
if (!env.elevenLabsApiKey) { |
||||
throw new Error('ElevenLabs API key is not configured'); |
||||
} |
||||
|
||||
if (!text || text.trim().length === 0) { |
||||
throw new Error('Text cannot be empty'); |
||||
} |
||||
|
||||
try { |
||||
const response = await fetch(`${ELEVENLABS_API_URL}/text-to-speech/${voiceId}`, { |
||||
method: 'POST', |
||||
headers: { |
||||
'Accept': 'audio/mpeg', |
||||
'Content-Type': 'application/json', |
||||
'xi-api-key': env.elevenLabsApiKey |
||||
}, |
||||
body: JSON.stringify({ |
||||
text: text.trim(), |
||||
model_id: 'eleven_turbo_v2_5' |
||||
// No voice_settings - uses the voice's default settings from ElevenLabs
|
||||
}) |
||||
}); |
||||
|
||||
if (!response.ok) { |
||||
const errorText = await response.text(); |
||||
console.error('ElevenLabs API error:', response.status, errorText); |
||||
throw new Error(`ElevenLabs API error: ${response.status} - ${errorText}`); |
||||
} |
||||
|
||||
const audioBuffer = Buffer.from(await response.arrayBuffer()); |
||||
const contentType = response.headers.get('content-type') || 'audio/mpeg'; |
||||
|
||||
// Determine format from content type
|
||||
let format = 'mp3'; |
||||
if (contentType.includes('mp3')) { |
||||
format = 'mp3'; |
||||
} else if (contentType.includes('wav')) { |
||||
format = 'wav'; |
||||
} else if (contentType.includes('ogg')) { |
||||
format = 'ogg'; |
||||
} |
||||
|
||||
return { |
||||
audio: audioBuffer, |
||||
format |
||||
}; |
||||
} catch (error: any) { |
||||
if (error.message.includes('ElevenLabs API error')) { |
||||
throw error; |
||||
} |
||||
console.error('Error calling ElevenLabs API:', error); |
||||
throw new Error(`Failed to generate speech: ${error.message}`); |
||||
} |
||||
} |
||||
@ -0,0 +1,47 @@
@@ -0,0 +1,47 @@
|
||||
const API_BASE_URL = import.meta.env.VITE_API_URL || 'http://localhost:8080/api'; |
||||
|
||||
/** |
||||
* Play pronunciation audio for a word |
||||
* @param wordId The ID of the word to pronounce |
||||
* @returns Promise that resolves when audio starts playing |
||||
*/ |
||||
export async function playWordPronunciation(wordId: number): Promise<void> { |
||||
return new Promise((resolve, reject) => { |
||||
const audio = new Audio(`${API_BASE_URL}/speech-sounds/pronounce/${wordId}`); |
||||
|
||||
audio.onloadeddata = () => { |
||||
audio.play().then(() => { |
||||
resolve(); |
||||
}).catch((error) => { |
||||
console.error('Error playing audio:', error); |
||||
reject(new Error('Failed to play audio')); |
||||
}); |
||||
}; |
||||
|
||||
audio.onerror = (error) => { |
||||
console.error('Audio loading error:', error); |
||||
reject(new Error('Failed to load audio')); |
||||
}; |
||||
|
||||
// Handle audio end
|
||||
audio.onended = () => { |
||||
// Clean up
|
||||
}; |
||||
}); |
||||
} |
||||
|
||||
/** |
||||
* Check if audio is available for a word (without playing it) |
||||
* @param wordId The ID of the word to check |
||||
* @returns Promise that resolves to true if audio is available |
||||
*/ |
||||
export async function checkAudioAvailability(wordId: number): Promise<boolean> { |
||||
try { |
||||
const response = await fetch(`${API_BASE_URL}/speech-sounds/pronounce/${wordId}`, { |
||||
method: 'HEAD' |
||||
}); |
||||
return response.ok; |
||||
} catch (error) { |
||||
return false; |
||||
} |
||||
} |
||||
Loading…
Reference in new issue