Please bookmark this page to avoid losing your image tool!

Audio To Spectrogram Image Generator

(Free & Supports Bulk Upload)

The result will appear here...

JavaScript Code (For Advanced Users)

You can edit the below JavaScript code to customize the image tool.

/**
 * Generates a spectrogram image from an audio source using the Web Audio API and Canvas.
 * This function is async and returns a Promise that resolves to a Canvas element.
 *
 * @param {Image} originalImg - This parameter is unused for this tool, but required by the function signature.
 * @param {string} audioUrl - URL to an audio file. Use 'default' to generate a test chirp signal.
 * @param {number} fftSize - The size of the FFT window. Must be a power of 2 (e.g., 1024, 2048). Higher values give more frequency resolution but less time resolution.
 * @param {number} hopLength - The number of audio samples to step forward for each FFT. Typically fftSize / 4.
 * @param {string} colormapName - The color scheme for the spectrogram. Supported values: 'viridis', 'inferno', 'plasma', 'grayscale'.
 * @param {number} showAxes - Set to 1 to draw time and frequency axes, 0 to hide them.
 * @returns {Promise<HTMLCanvasElement>} A canvas element containing the spectrogram image.
 */
async function processImage(originalImg, audioUrl = 'default', fftSize = 1024, hopLength = 256, colormapName = 'viridis', showAxes = 1) {

    // --- Helper Functions an d Data ---

    /**
     * In-place Radix-2 Cooley-Tukey FFT.
     * @param {Float32Array} real - Real part of the complex array.
     * @param {Float32Array} imag - Imaginary part of the complex array.
     */
    const fft = (real, imag) => {
        const n = real.length;
        if (n === 0) return;
        if ((n & (n - 1)) !== 0) throw new Error("Input length must be a power of 2.");

        // Bit-reversal permutation
        for (let i = 1, j = 0; i < n; i++) {
            let bit = n >> 1;
            while (j >= bit) {
                j -= bit;
                bit >>= 1;
            }
            j += bit;
            if (i < j) {
                [real[i], real[j]] = [real[j], real[i]];
                [imag[i], imag[j]] = [imag[j], imag[i]];
            }
        }

        // Cooley-Tukey FFT
        for (let len = 2; len <= n; len <<= 1) {
            const halfLen = len >> 1;
            const angle = -2 * Math.PI / len;
            const w_real = Math.cos(angle);
            const w_imag = Math.sin(angle);
            for (let i = 0; i < n; i += len) {
                let t_real = 1;
                let t_imag = 0;
                for (let j = 0; j < halfLen; j++) {
                    const u_real = real[i + j];
                    const u_imag = imag[i + j];
                    const v_real = real[i + j + halfLen] * t_real - imag[i + j + halfLen] * t_imag;
                    const v_imag = real[i + j + halfLen] * t_imag + imag[i + j + halfLen] * t_real;
                    real[i + j] = u_real + v_real;
                    imag[i + j] = u_imag + v_imag;
                    real[i + j + halfLen] = u_real - v_real;
                    imag[i + j + halfLen] = u_imag - v_imag;

                    const next_t_real = t_real * w_real - t_imag * w_imag;
                    t_imag = t_real * w_imag + t_imag * w_real;
                    t_real = next_t_real;
                }
            }
        }
    };

    /**
     * Perceptually uniform colormaps.
     */
    const colormaps = {
        viridis: [ [68, 1, 84], [72, 40, 120], [62, 74, 137], [49, 104, 142], [38, 130, 142], [31, 158, 137], [53, 183, 121], [109, 205, 89], [180, 222, 44], [253, 231, 37] ],
        inferno: [ [0, 0, 4], [39, 15, 101], [92, 28, 105], [144, 47, 85], [191, 71, 57], [229, 107, 28], [252, 164, 9], [249, 229, 90], [252, 255, 255] ],
        plasma: [ [13, 8, 135], [71, 1, 158], [122, 1, 156], [168, 38, 126], [208, 77, 89], [239, 120, 52], [248, 171, 36], [231, 225, 90], [240, 249, 255] ],
        grayscale: [[0, 0, 0], [255, 255, 255]]
    };

    /**
     * Maps a normalized value (0-1) to an RGB color array.
     */
    const getColorRgb = (value, mapName) => {
        value = Math.max(0, Math.min(1, value)); // Clamp value
        const cmap = colormaps[mapName] || colormaps.viridis;
        const scaledValue = value * (cmap.length - 1);
        const i = Math.floor(scaledValue);
        const j = Math.ceil(scaledValue);
        const frac = scaledValue - i;
        const c1 = cmap[i];
        const c2 = cmap[j];
        const r = Math.round(c1[0] + (c2[0] - c1[0]) * frac);
        const g = Math.round(c1[1] + (c2[1] - c1[1]) * frac);
        const b = Math.round(c1[2] + (c2[2] - c1[2]) * frac);
        return [r, g, b];
    };

    // --- Main Logic ---

    // 1. Validate parameters
    if ((fftSize & (fftSize - 1)) !== 0 || fftSize === 0) {
        throw new Error("fftSize must be a power of 2.");
    }

    // 2. Get Audio Data
    const audioCtx = new (window.AudioContext || window.webkitAudioContext)();
    let audioBuffer;

    if (audioUrl === 'default') {
        const duration = 2; const sampleRate = audioCtx.sampleRate; const length = duration * sampleRate;
        audioBuffer = audioCtx.createBuffer(1, length, sampleRate);
        const data = audioBuffer.getChannelData(0);
        const f0 = 200, f1 = sampleRate / 8;
        const k = (f1 - f0) / duration;
        for (let i = 0; i < length; i++) {
            const t = i / sampleRate;
            const phase = 2 * Math.PI * (f0 * t + (k / 2) * t * t);
            data[i] = Math.sin(phase) * 0.5;
        }
    } else {
        try {
            const response = await fetch(audioUrl);
            const arrayBuffer = await response.arrayBuffer();
            audioBuffer = await audioCtx.decodeAudioData(arrayBuffer);
        } catch (e) {
            console.error("Error processing audio:", e);
            const errCanvas = document.createElement('canvas'); errCanvas.width = 400; errCanvas.height = 100;
            const errCtx = errCanvas.getContext('2d');
            errCtx.font = '14px sans-serif'; errCtx.fillStyle = '#cc0000';
            errCtx.fillText('Error: Failed to load or decode audio file.', 10, 40);
            errCtx.fillText('Please check the URL and CORS policy.', 10, 60);
            return errCanvas;
        }
    }

    // 3. Perform Short-Time Fourier Transform (STFT)
    const pcmData = audioBuffer.getChannelData(0);
    const numFrames = Math.floor((pcmData.length - fftSize) / hopLength) + 1;
    const numBins = fftSize / 2;
    const spectrogram = [];
    const hannWindow = new Float32Array(fftSize).map((_, i) => 0.5 * (1 - Math.cos(2 * Math.PI * i / (fftSize - 1))));
    
    let minDb = Infinity, maxDb = -Infinity;

    for (let i = 0; i < numFrames; i++) {
        const start = i * hopLength;
        const frameData = new Float32Array(fftSize);
        for(let j=0; j < fftSize; j++) frameData[j] = pcmData[start + j] * hannWindow[j];
        
        const real = Array.from(frameData);
        const imag = new Array(fftSize).fill(0);
        fft(real, imag);

        const frameMagnitudes = new Float32Array(numBins);
        for (let j = 0; j < numBins; j++) {
            const mag = Math.sqrt(real[j] ** 2 + imag[j] ** 2);
            const db = 20 * Math.log10(mag);
            frameMagnitudes[j] = db;
            if (db > -Infinity) {
                if (db < minDb) minDb = db;
                if (db > maxDb) maxDb = db;
            }
        }
        spectrogram.push(frameMagnitudes);
    }
    
    const dbRange = maxDb - minDb;
    const effectiveMaxDb = maxDb;
    const effectiveMinDb = Math.max(minDb, maxDb - 90); // Use a 90dB dynamic range for visualization

    // 4. Draw Spectrogram on Canvas
    const axisMargin = showAxes ? { top: 20, right: 10, bottom: 40, left: 60 } : { top: 0, right: 0, bottom: 0, left: 0 };
    const canvas = document.createElement('canvas');
    canvas.width = numFrames + axisMargin.left + axisMargin.right;
    canvas.height = numBins + axisMargin.top + axisMargin.bottom;
    const ctx = canvas.getContext('2d');
    
    // Fill background
    ctx.fillStyle = getColorRgb(0, colormapName).join(',');
    ctx.fillRect(0, 0, canvas.width, canvas.height);

    // Create and draw image data for the spectrogram plot
    const imageData = ctx.createImageData(numFrames, numBins);
    for (let x = 0; x < numFrames; x++) {
        for (let y = 0; y < numBins; y++) {
            const db = spectrogram[x][y];
            const normalized = (db - effectiveMinDb) / (effectiveMaxDb - effectiveMinDb);
            const color = getColorRgb(normalized, colormapName);
            const pixelIndex = (x + (numBins - 1 - y) * numFrames) * 4;
            imageData.data[pixelIndex] = color[0];
            imageData.data[pixelIndex + 1] = color[1];
            imageData.data[pixelIndex + 2] = color[2];
            imageData.data[pixelIndex + 3] = 255;
        }
    }
    ctx.putImageData(imageData, axisMargin.left, axisMargin.top);

    // 5. Optionally Draw Axes
    if (showAxes) {
        ctx.fillStyle = 'white';
        ctx.font = '12px sans-serif';
        ctx.textAlign = 'center';
        ctx.textBaseline = 'middle';

        // Time Axis (X)
        ctx.fillText('Time (s)', axisMargin.left + numFrames / 2, canvas.height - 15);
        const timeStep = 1; // 1 second ticks
        const pixelsPerSecond = audioBuffer.sampleRate / hopLength;
        for (let t = 0; t <= audioBuffer.duration; t += timeStep) {
            const x = axisMargin.left + t * pixelsPerSecond;
            if(x > axisMargin.left + numFrames) break;
            ctx.fillText(t.toFixed(0), x, axisMargin.top + numBins + 10);
            ctx.fillRect(x, axisMargin.top + numBins, 1, 5);
        }

        // Frequency Axis (Y)
        ctx.save();
        ctx.translate(15, axisMargin.top + numBins / 2);
        ctx.rotate(-Math.PI / 2);
        ctx.fillText('Frequency (kHz)', 0, 0);
        ctx.restore();
        const maxFreq = audioBuffer.sampleRate / 2;
        const freqStep = 5000; // 5 kHz ticks
        for (let f = 0; f <= maxFreq; f += freqStep) {
            const y = canvas.height - axisMargin.bottom - (f / maxFreq) * numBins;
            if (y < axisMargin.top) break;
            ctx.textAlign = 'right';
            ctx.fillText((f / 1000).toFixed(1), axisMargin.left - 10, y);
            ctx.fillRect(axisMargin.left - 5, y, 5, 1);
        }
    }

    return canvas;
}

Free Image Tool Creator

Can't find the image tool you're looking for?
Create one based on your own needs now!

Create a custom image tool

Description

The Audio To Spectrogram Image Generator is a web tool that transforms audio files into visually informative spectrogram images. By processing the audio data, it produces graphics that display the frequency content of the audio signal over time, effectively translating sound into a visual format. This tool can be beneficial for sound analysis, music visualization, educational purposes in acoustics, or for audio engineers and musicians who wish to study the characteristics of audio signals more deeply. Users can customize their output with different color maps and display options to suit their needs.

JavaScript Code (For Advanced Users)

Free Image Tool Creator

Description

Leave a Reply Cancel reply

Other Image Tools: