You can edit the below JavaScript code to customize the image tool.
Apply Changes
/**
* Processes an image to extract text, translate it, and display the results in an Excel-like table.
* This function is resource-intensive and may take time to run, especially on the first execution,
* as it needs to download OCR and translation models into the browser.
*
* @param {Image} originalImg The original image object to process.
* @param {string} [sourceLang='eng'] The language of the text in the image.
* Supported codes: 'eng' (English), 'rus' (Russian), 'deu' (German),
* 'spa' (Spanish), 'fra' (French), 'jpn' (Japanese), 'chi_sim' (Chinese Simplified).
* @param {string} [targetLang='rus'] The language to translate the text into.
* Uses the same supported codes as sourceLang.
* @returns {Promise<HTMLElement>} A promise that resolves to an HTML div element containing the results.
* This element will show progress updates during processing and will
* contain the final table or an error message.
*/
async function processImage(originalImg, sourceLang = 'eng', targetLang = 'rus') {
const wrapper = document.createElement('div');
const statusElement = document.createElement('p');
statusElement.style.fontFamily = 'Arial, sans-serif';
statusElement.style.padding = '10px';
wrapper.appendChild(statusElement);
const updateStatus = (text) => {
statusElement.textContent = text;
console.log(text); // Also log to console for debugging
};
try {
updateStatus('Loading libraries...');
// Dynamically import ES modules from a CDN
const {
createWorker
} = await import('https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.esm.min.js');
const {
pipeline,
env
} = await import('https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.1');
// Configure transformers.js for web usage
env.allowLocalModels = false;
env.backends.onnx.wasm.numThreads = 1;
// Map user-friendly language codes to the specific codes required by Tesseract and the translation model (NLLB)
const langMap = {
'eng': { tesseract: 'eng', nllb: 'eng_Latn' },
'rus': { tesseract: 'rus', nllb: 'rus_Cyrl' },
'deu': { tesseract: 'deu', nllb: 'deu_Latn' },
'spa': { tesseract: 'spa', nllb: 'spa_Latn' },
'fra': { tesseract: 'fra', nllb: 'fra_Latn' },
'jpn': { tesseract: 'jpn', nllb: 'jpn_Jpan' },
'chi_sim': { tesseract: 'chi_sim', nllb: 'zho_Hans' },
};
const srcTesseractCode = langMap[sourceLang]?.tesseract;
const srcNllbCode = langMap[sourceLang]?.nllb;
const tgtNllbCode = langMap[targetLang]?.nllb;
if (!srcTesseractCode || !srcNllbCode || !tgtNllbCode) {
throw new Error(`Unsupported language pair. Supported codes are: ${Object.keys(langMap).join(', ')}.`);
}
// 1. OCR (Optical Character Recognition)
updateStatus('Initializing OCR engine...');
const worker = await createWorker(srcTesseractCode, 1, {
logger: m => {
if (m.status === 'recognizing text') {
updateStatus(`OCR: Recognizing text (${Math.round(m.progress * 100)}%)...`);
} else {
updateStatus(`OCR: ${m.status}...`);
}
},
});
const {
data
} = await worker.recognize(originalImg);
const originalLines = data.lines.map(line => line.text.trim()).filter(text => text);
await worker.terminate();
if (originalLines.length === 0) {
updateStatus('No text was found in the image.');
return wrapper;
}
updateStatus(`Found ${originalLines.length} lines of text. Initializing translator...`);
// 2. Translation
const translator = await pipeline('translation', 'Xenova/nllb-200-distilled-600M', {
progress_callback: (progress) => {
updateStatus(`Loading translation model: ${progress.file} (${Math.round(progress.progress)}%)`);
}
});
updateStatus('Translating text...');
const translatedOutput = await translator(originalLines, {
src_lang: srcNllbCode,
tgt_lang: tgtNllbCode
});
const translatedLines = translatedOutput.map(t => t.translation_text);
updateStatus('Translation complete. Generating table...');
// 3. Generate Excel-like HTML table
const table = document.createElement('table');
const style = document.createElement('style');
style.textContent = `
.excel-style-table {
border-collapse: collapse;
width: 100%;
font-family: Arial, sans-serif;
border: 1px solid #ccc;
box-shadow: 0 2px 3px rgba(0,0,0,0.1);
}
.excel-style-table th, .excel-style-table td {
border: 1px solid #ddd;
padding: 10px;
text-align: left;
}
.excel-style-table th {
background-color: #f2f2f2;
font-weight: bold;
color: #333;
}
.excel-style-table tr:nth-child(even) {
background-color: #f9f9f9;
}
.excel-style-table tr:hover {
background-color: #f1f1f1;
}
`;
table.className = 'excel-style-table';
// Header
const thead = table.createTHead();
const headerRow = thead.insertRow();
const header1 = document.createElement('th');
header1.textContent = `Original Text (${sourceLang.toUpperCase()})`;
headerRow.appendChild(header1);
const header2 = document.createElement('th');
header2.textContent = `Translated Text (${targetLang.toUpperCase()})`;
headerRow.appendChild(header2);
// Body
const tbody = table.createTBody();
for (let i = 0; i < originalLines.length; i++) {
const row = tbody.insertRow();
row.insertCell().textContent = originalLines[i] || '';
row.insertCell().textContent = translatedLines[i] || '';
}
// Finalize the output element
wrapper.innerHTML = ''; // Clear status message
wrapper.appendChild(style);
wrapper.appendChild(table);
return wrapper;
} catch (error) {
console.error('An error occurred during image processing:', error);
statusElement.textContent = `Error: ${error.message}`;
statusElement.style.color = 'red';
return wrapper;
}
}
Apply Changes