Please bookmark this page to avoid losing your image tool!

Image Text Box Scanner And Web Interface URL Identifier

(Free & Supports Bulk Upload)

Drag & drop your images here or

The result will appear here...
You can edit the below JavaScript code to customize the image tool.
function processImage(originalImg, lang = "eng+rus", highlightColor = "rgba(50, 205, 50, 0.4)", strokeColor = "#ff0000") {
    // Create the main container wrapper
    const wrapper = document.createElement('div');
    wrapper.style.fontFamily = 'Arial, sans-serif';
    wrapper.style.display = 'flex';
    wrapper.style.flexDirection = 'column';
    wrapper.style.gap = '15px';

    // Create the canvas container
    const canvasContainer = document.createElement('div');
    canvasContainer.style.position = 'relative';
    canvasContainer.style.maxWidth = '100%';
    canvasContainer.style.overflow = 'auto';
    canvasContainer.style.border = '1px solid #ccc';
    canvasContainer.style.borderRadius = '4px';

    // Initialize and setup the canvas with original image
    const canvas = document.createElement('canvas');
    canvas.width = originalImg.width;
    canvas.height = originalImg.height;
    canvas.style.maxWidth = '100%';
    canvas.style.height = 'auto';
    canvas.style.display = 'block';

    const ctx = canvas.getContext('2d');
    ctx.drawImage(originalImg, 0, 0);
    
    canvasContainer.appendChild(canvas);
    wrapper.appendChild(canvasContainer);

    // Create the loading/status message panel
    const statusPanel = document.createElement('div');
    statusPanel.style.padding = '12px';
    statusPanel.style.backgroundColor = '#e6f7ff';
    statusPanel.style.border = '1px solid #91d5ff';
    statusPanel.style.borderRadius = '4px';
    statusPanel.style.color = '#0050b3';
    statusPanel.innerText = 'Initializing Text Scanner...';
    wrapper.appendChild(statusPanel);

    // Create the results panel (hidden initially)
    const resultsPanel = document.createElement('div');
    resultsPanel.style.display = 'none';
    resultsPanel.style.padding = '15px';
    resultsPanel.style.backgroundColor = '#f6ffed';
    resultsPanel.style.border = '1px solid #b7eb8f';
    resultsPanel.style.borderRadius = '4px';
    wrapper.appendChild(resultsPanel);

    // Perform OCR and URL extraction asynchronously
    (async () => {
        try {
            // Dynamically load Tesseract.js if not available
            if (!window.Tesseract) {
                await new Promise((resolve, reject) => {
                    const script = document.createElement('script');
                    script.src = 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js';
                    script.onload = resolve;
                    script.onerror = reject;
                    document.head.appendChild(script);
                });
            }

            statusPanel.innerText = 'Scanning image for text and web URLs... 0%';

            // Run OCR with logging to update progress
            const result = await Tesseract.recognize(canvas, lang, {
                logger: m => {
                    if (m.status === 'recognizing text') {
                        statusPanel.innerText = `Scanning image for text and web URLs... ${Math.round(m.progress * 100)}%`;
                    } else {
                        statusPanel.innerText = `Status: ${m.status}...`;
                    }
                }
            });

            const { data } = result;

            // Regex for matching basic URL structures and domains
            const urlRegex = /^(?:https?:\/\/)?(?:www\.)?[-a-z0-9@:%._+~#=]{1,256}\.[a-z]{2,10}\b(?:[-a-z0-9@:%_+.~#?&//=]*)$/i;
            
            // Helper function to validate URLs strictly
            const isUrl = (text) => {
                if (!text || !text.includes('.')) return false;
                if (!/[a-zA-Z]/.test(text)) return false; 
                
                // Clean punctuation anomalies from OCR extremes
                const cleanText = text.replace(/^[^\w]+|[^\w]+$/g, '');
                
                // Exclude common file extensions that are improperly registered as websites
                const nonWebExtensions = /\.(jpg|jpeg|png|gif|bmp|pdf|doc|docx|xls|xlsx|ppt|pptx|zip|rar|exe|txt|csv)$/i;
                if (nonWebExtensions.test(cleanText)) return false;

                return urlRegex.test(cleanText);
            };

            const foundUrls = [];

            // Scan extracted words, identify URLs, and draw bounding boxes
            data.words.forEach(word => {
                if (isUrl(word.text)) {
                    const cleanText = word.text.replace(/^[^\w]+|[^\w]+$/g, '');
                    foundUrls.push(cleanText);

                    const { x0, y0, x1, y1 } = word.bbox;
                    ctx.fillStyle = highlightColor;
                    ctx.fillRect(x0, y0, x1 - x0, y1 - y0);
                    ctx.strokeStyle = strokeColor;
                    ctx.lineWidth = Math.max(2, Math.floor(canvas.width / 500));
                    ctx.strokeRect(x0, y0, x1 - x0, y1 - y0);
                }
            });

            // Clean up UI, remove status tracking
            wrapper.removeChild(statusPanel);

            // Display results container
            resultsPanel.style.display = 'block';
            
            const title = document.createElement('h3');
            title.style.marginTop = '0';
            title.style.color = '#389e0d';
            title.innerText = 'Identified Web Interfaces / Domains:';
            resultsPanel.appendChild(title);

            // Populate the found URL list securely
            if (foundUrls.length > 0) {
                const uniqueUrls = [...new Set(foundUrls)];
                const ul = document.createElement('ul');
                ul.style.marginBottom = '0';
                
                uniqueUrls.forEach(url => {
                    const li = document.createElement('li');
                    li.style.marginBottom = '8px';
                    
                    const a = document.createElement('a');
                    a.href = /^https?:\/\//i.test(url) ? url : 'https://' + url;
                    a.target = '_blank';
                    a.innerText = url;
                    a.style.color = '#096dd9';
                    a.style.textDecoration = 'none';
                    a.style.fontWeight = 'bold';
                    
                    // Simple hover effect
                    a.onmouseover = () => a.style.textDecoration = 'underline';
                    a.onmouseout = () => a.style.textDecoration = 'none';

                    li.appendChild(a);
                    ul.appendChild(li);
                });
                
                resultsPanel.appendChild(ul);
            } else {
                const p = document.createElement('p');
                p.style.marginBottom = '0';
                p.innerText = 'No web URLs or domains were identified in this image.';
                resultsPanel.appendChild(p);
            }

        } catch (error) {
            statusPanel.style.backgroundColor = '#fff2f0';
            statusPanel.style.border = '1px solid #ffccc7';
            statusPanel.style.color = '#cf1322';
            statusPanel.innerText = 'Error during image scanning: ' + error.message;
        }
    })();

    // Returns standard node (element wrapper) immediately while async processing operates seamlessly
    return wrapper;
}

Free Image Tool Creator

Can't find the image tool you're looking for?
Create one based on your own needs now!

Description

This tool uses Optical Character Recognition (OCR) to scan images for text and specifically identify web URLs or domain names. Once a scan is complete, it visually highlights the detected web addresses directly on the image and provides a clickable list of all identified links. It is useful for extracting website information from screenshots, posters, business cards, or any digital media where web addresses are embedded as text.

Leave a Reply

Your email address will not be published. Required fields are marked *