Please bookmark this page to avoid losing your image tool!

Image Text To Speech Converter

(Free & Supports Bulk Upload)

Drag & drop your images here or

The result will appear here...
You can edit the below JavaScript code to customize the image tool.
/**
 * Recognizes text from an image using Tesseract.js OCR and reads it aloud
 * using the browser's Web Speech API.
 *
 * This function dynamically loads the Tesseract.js library from a CDN.
 * It returns a DOM element containing the recognition status and the extracted text.
 *
 * @param {Image} originalImg The source image object to process.
 * @param {string} [lang='eng'] The language for OCR and speech. Uses Tesseract language codes (e.g., 'eng', 'rus', 'fra', 'deu').
 * @param {number} [rate=1] The speaking rate (e.g., 0.5 for slower, 1 for normal, 2 for faster).
 * @param {number} [pitch=1] The speaking pitch (0 to 2, 1 is normal).
 * @param {string} [voiceName=''] The exact name of a system voice to use (e.g., "Google US English"). If empty or not found, a default voice is used.
 * @returns {Promise<HTMLDivElement>} A promise that resolves to a div element containing the status and recognized text.
 */
async function processImage(originalImg, lang = 'eng', rate = 1, pitch = 1, voiceName = '') {
    // Create UI elements to display status and results
    const container = document.createElement('div');
    const statusEl = document.createElement('p');
    const textEl = document.createElement('pre');
    
    // Basic styling for the returned element
    container.style.fontFamily = 'sans-serif';
    container.style.padding = '10px';
    container.style.border = '1px solid #ddd';
    container.style.borderRadius = '5px';
    
    statusEl.style.fontWeight = 'bold';
    statusEl.style.margin = '0 0 10px 0';
    
    textEl.style.whiteSpace = 'pre-wrap';
    textEl.style.wordWrap = 'break-word';
    textEl.style.padding = '10px';
    textEl.style.border = '1px solid #ccc';
    textEl.style.backgroundColor = '#f9f9f9';
    textEl.style.minHeight = '50px';
    textEl.textContent = 'Initializing...';
    
    container.appendChild(statusEl);
    container.appendChild(textEl);

    const updateStatus = (message) => {
        statusEl.textContent = message;
    };

    // Helper function to dynamically load the Tesseract.js script
    const loadTesseract = () => {
        return new Promise((resolve, reject) => {
            if (window.Tesseract) {
                return resolve();
            }
            updateStatus('Initializing OCR engine...');
            const script = document.createElement('script');
            script.src = 'https://unpkg.com/tesseract.js@5/dist/tesseract.min.js';
            script.onload = () => resolve();
            script.onerror = (err) => reject(new Error('Failed to load Tesseract.js library.'));
            document.head.appendChild(script);
        });
    };

    try {
        // Step 1: Ensure Tesseract.js is loaded
        await loadTesseract();

        // Step 2: Perform OCR
        updateStatus(`Creating OCR worker for language: ${lang}`);
        const worker = await Tesseract.createWorker(lang, 1, {
            logger: m => {
                if (m.status === 'recognizing text') {
                    updateStatus(`Recognizing text... ${Math.round(m.progress * 100)}%`);
                }
            }
        });

        const { data: { text } } = await worker.recognize(originalImg);
        await worker.terminate();

        textEl.textContent = text || 'No text could be recognized.';
        
        if (!text) {
             updateStatus('Recognition complete. No text found.');
             return container;
        }

        // Step 3: Use Web Speech API for Text-to-Speech
        if ('speechSynthesis' in window) {
            updateStatus('Preparing to speak...');

            const utterance = new SpeechSynthesisUtterance(text);
            
            // Map common Tesseract lang codes to BCP 47 tags for better speech compatibility
            const langMap = {
                'eng': 'en-US', 'rus': 'ru-RU', 'deu': 'de-DE', 
                'fra': 'fr-FR', 'spa': 'es-ES', 'ita': 'it-IT',
                'jpn': 'ja-JP', 'kor': 'ko-KR', 'chi_sim': 'zh-CN'
            };
            utterance.lang = langMap[lang] || lang;
            
            // Set speech parameters, ensuring they are within valid ranges
            utterance.rate = Math.max(0.1, Math.min(10, Number(rate) || 1));
            utterance.pitch = Math.max(0, Math.min(2, Number(pitch) || 1));

            // Helper to get voices, which can load asynchronously
            const getVoices = () => new Promise(resolve => {
                const voices = window.speechSynthesis.getVoices();
                if (voices.length > 0) { resolve(voices); return; }
                window.speechSynthesis.onvoiceschanged = () => resolve(window.speechSynthesis.getVoices());
            });

            const voices = await getVoices();
            if (voiceName) {
                const selectedVoice = voices.find(voice => voice.name === voiceName);
                if (selectedVoice) {
                    utterance.voice = selectedVoice;
                } else {
                    console.warn(`Voice "${voiceName}" not found. Using default voice for language.`);
                }
            }
            
            utterance.onstart = () => updateStatus('Speaking...');
            utterance.onend = () => updateStatus('Finished.');
            utterance.onerror = (e) => updateStatus(`Error during speech synthesis: ${e.error}`);

            // Cancel any prior speech and speak the new utterance
            window.speechSynthesis.cancel();
            window.speechSynthesis.speak(utterance);

        } else {
            updateStatus('Text-to-Speech is not supported by your browser.');
        }

    } catch (error) {
        console.error('OCR/TTS Error:', error);
        updateStatus(`An error occurred: ${error.message}`);
        textEl.textContent = 'Failed to process the image.';
    }

    return container;
}

Free Image Tool Creator

Can't find the image tool you're looking for?
Create one based on your own needs now!

Description

The Image Text To Speech Converter is a tool that allows users to extract text from images and have it read aloud. Utilizing Optical Character Recognition (OCR) through Tesseract.js, the tool can recognize text in various languages and employs the Web Speech API for spoken output. This tool is useful for individuals with visual impairments who need assistance accessing printed material, for language learners who want to hear text pronounced, or for anyone wanting to convert written content into an auditory format. It supports customizable voice options and speech rates to enhance user experience.

Leave a Reply

Your email address will not be published. Required fields are marked *