You can edit the below JavaScript code to customize the image tool.
Apply Changes
/**
* Generates a spectrogram image from an audio source using the Web Audio API and Canvas.
* This function is async and returns a Promise that resolves to a Canvas element.
*
* @param {Image} originalImg - This parameter is unused for this tool, but required by the function signature.
* @param {string} audioUrl - URL to an audio file. Use 'default' to generate a test chirp signal.
* @param {number} fftSize - The size of the FFT window. Must be a power of 2 (e.g., 1024, 2048). Higher values give more frequency resolution but less time resolution.
* @param {number} hopLength - The number of audio samples to step forward for each FFT. Typically fftSize / 4.
* @param {string} colormapName - The color scheme for the spectrogram. Supported values: 'viridis', 'inferno', 'plasma', 'grayscale'.
* @param {number} showAxes - Set to 1 to draw time and frequency axes, 0 to hide them.
* @returns {Promise<HTMLCanvasElement>} A canvas element containing the spectrogram image.
*/
async function processImage(originalImg, audioUrl = 'default', fftSize = 1024, hopLength = 256, colormapName = 'viridis', showAxes = 1) {
// --- Helper Functions an d Data ---
/**
* In-place Radix-2 Cooley-Tukey FFT.
* @param {Float32Array} real - Real part of the complex array.
* @param {Float32Array} imag - Imaginary part of the complex array.
*/
const fft = (real, imag) => {
const n = real.length;
if (n === 0) return;
if ((n & (n - 1)) !== 0) throw new Error("Input length must be a power of 2.");
// Bit-reversal permutation
for (let i = 1, j = 0; i < n; i++) {
let bit = n >> 1;
while (j >= bit) {
j -= bit;
bit >>= 1;
}
j += bit;
if (i < j) {
[real[i], real[j]] = [real[j], real[i]];
[imag[i], imag[j]] = [imag[j], imag[i]];
}
}
// Cooley-Tukey FFT
for (let len = 2; len <= n; len <<= 1) {
const halfLen = len >> 1;
const angle = -2 * Math.PI / len;
const w_real = Math.cos(angle);
const w_imag = Math.sin(angle);
for (let i = 0; i < n; i += len) {
let t_real = 1;
let t_imag = 0;
for (let j = 0; j < halfLen; j++) {
const u_real = real[i + j];
const u_imag = imag[i + j];
const v_real = real[i + j + halfLen] * t_real - imag[i + j + halfLen] * t_imag;
const v_imag = real[i + j + halfLen] * t_imag + imag[i + j + halfLen] * t_real;
real[i + j] = u_real + v_real;
imag[i + j] = u_imag + v_imag;
real[i + j + halfLen] = u_real - v_real;
imag[i + j + halfLen] = u_imag - v_imag;
const next_t_real = t_real * w_real - t_imag * w_imag;
t_imag = t_real * w_imag + t_imag * w_real;
t_real = next_t_real;
}
}
}
};
/**
* Perceptually uniform colormaps.
*/
const colormaps = {
viridis: [ [68, 1, 84], [72, 40, 120], [62, 74, 137], [49, 104, 142], [38, 130, 142], [31, 158, 137], [53, 183, 121], [109, 205, 89], [180, 222, 44], [253, 231, 37] ],
inferno: [ [0, 0, 4], [39, 15, 101], [92, 28, 105], [144, 47, 85], [191, 71, 57], [229, 107, 28], [252, 164, 9], [249, 229, 90], [252, 255, 255] ],
plasma: [ [13, 8, 135], [71, 1, 158], [122, 1, 156], [168, 38, 126], [208, 77, 89], [239, 120, 52], [248, 171, 36], [231, 225, 90], [240, 249, 255] ],
grayscale: [[0, 0, 0], [255, 255, 255]]
};
/**
* Maps a normalized value (0-1) to an RGB color array.
*/
const getColorRgb = (value, mapName) => {
value = Math.max(0, Math.min(1, value)); // Clamp value
const cmap = colormaps[mapName] || colormaps.viridis;
const scaledValue = value * (cmap.length - 1);
const i = Math.floor(scaledValue);
const j = Math.ceil(scaledValue);
const frac = scaledValue - i;
const c1 = cmap[i];
const c2 = cmap[j];
const r = Math.round(c1[0] + (c2[0] - c1[0]) * frac);
const g = Math.round(c1[1] + (c2[1] - c1[1]) * frac);
const b = Math.round(c1[2] + (c2[2] - c1[2]) * frac);
return [r, g, b];
};
// --- Main Logic ---
// 1. Validate parameters
if ((fftSize & (fftSize - 1)) !== 0 || fftSize === 0) {
throw new Error("fftSize must be a power of 2.");
}
// 2. Get Audio Data
const audioCtx = new (window.AudioContext || window.webkitAudioContext)();
let audioBuffer;
if (audioUrl === 'default') {
const duration = 2; const sampleRate = audioCtx.sampleRate; const length = duration * sampleRate;
audioBuffer = audioCtx.createBuffer(1, length, sampleRate);
const data = audioBuffer.getChannelData(0);
const f0 = 200, f1 = sampleRate / 8;
const k = (f1 - f0) / duration;
for (let i = 0; i < length; i++) {
const t = i / sampleRate;
const phase = 2 * Math.PI * (f0 * t + (k / 2) * t * t);
data[i] = Math.sin(phase) * 0.5;
}
} else {
try {
const response = await fetch(audioUrl);
const arrayBuffer = await response.arrayBuffer();
audioBuffer = await audioCtx.decodeAudioData(arrayBuffer);
} catch (e) {
console.error("Error processing audio:", e);
const errCanvas = document.createElement('canvas'); errCanvas.width = 400; errCanvas.height = 100;
const errCtx = errCanvas.getContext('2d');
errCtx.font = '14px sans-serif'; errCtx.fillStyle = '#cc0000';
errCtx.fillText('Error: Failed to load or decode audio file.', 10, 40);
errCtx.fillText('Please check the URL and CORS policy.', 10, 60);
return errCanvas;
}
}
// 3. Perform Short-Time Fourier Transform (STFT)
const pcmData = audioBuffer.getChannelData(0);
const numFrames = Math.floor((pcmData.length - fftSize) / hopLength) + 1;
const numBins = fftSize / 2;
const spectrogram = [];
const hannWindow = new Float32Array(fftSize).map((_, i) => 0.5 * (1 - Math.cos(2 * Math.PI * i / (fftSize - 1))));
let minDb = Infinity, maxDb = -Infinity;
for (let i = 0; i < numFrames; i++) {
const start = i * hopLength;
const frameData = new Float32Array(fftSize);
for(let j=0; j < fftSize; j++) frameData[j] = pcmData[start + j] * hannWindow[j];
const real = Array.from(frameData);
const imag = new Array(fftSize).fill(0);
fft(real, imag);
const frameMagnitudes = new Float32Array(numBins);
for (let j = 0; j < numBins; j++) {
const mag = Math.sqrt(real[j] ** 2 + imag[j] ** 2);
const db = 20 * Math.log10(mag);
frameMagnitudes[j] = db;
if (db > -Infinity) {
if (db < minDb) minDb = db;
if (db > maxDb) maxDb = db;
}
}
spectrogram.push(frameMagnitudes);
}
const dbRange = maxDb - minDb;
const effectiveMaxDb = maxDb;
const effectiveMinDb = Math.max(minDb, maxDb - 90); // Use a 90dB dynamic range for visualization
// 4. Draw Spectrogram on Canvas
const axisMargin = showAxes ? { top: 20, right: 10, bottom: 40, left: 60 } : { top: 0, right: 0, bottom: 0, left: 0 };
const canvas = document.createElement('canvas');
canvas.width = numFrames + axisMargin.left + axisMargin.right;
canvas.height = numBins + axisMargin.top + axisMargin.bottom;
const ctx = canvas.getContext('2d');
// Fill background
ctx.fillStyle = getColorRgb(0, colormapName).join(',');
ctx.fillRect(0, 0, canvas.width, canvas.height);
// Create and draw image data for the spectrogram plot
const imageData = ctx.createImageData(numFrames, numBins);
for (let x = 0; x < numFrames; x++) {
for (let y = 0; y < numBins; y++) {
const db = spectrogram[x][y];
const normalized = (db - effectiveMinDb) / (effectiveMaxDb - effectiveMinDb);
const color = getColorRgb(normalized, colormapName);
const pixelIndex = (x + (numBins - 1 - y) * numFrames) * 4;
imageData.data[pixelIndex] = color[0];
imageData.data[pixelIndex + 1] = color[1];
imageData.data[pixelIndex + 2] = color[2];
imageData.data[pixelIndex + 3] = 255;
}
}
ctx.putImageData(imageData, axisMargin.left, axisMargin.top);
// 5. Optionally Draw Axes
if (showAxes) {
ctx.fillStyle = 'white';
ctx.font = '12px sans-serif';
ctx.textAlign = 'center';
ctx.textBaseline = 'middle';
// Time Axis (X)
ctx.fillText('Time (s)', axisMargin.left + numFrames / 2, canvas.height - 15);
const timeStep = 1; // 1 second ticks
const pixelsPerSecond = audioBuffer.sampleRate / hopLength;
for (let t = 0; t <= audioBuffer.duration; t += timeStep) {
const x = axisMargin.left + t * pixelsPerSecond;
if(x > axisMargin.left + numFrames) break;
ctx.fillText(t.toFixed(0), x, axisMargin.top + numBins + 10);
ctx.fillRect(x, axisMargin.top + numBins, 1, 5);
}
// Frequency Axis (Y)
ctx.save();
ctx.translate(15, axisMargin.top + numBins / 2);
ctx.rotate(-Math.PI / 2);
ctx.fillText('Frequency (kHz)', 0, 0);
ctx.restore();
const maxFreq = audioBuffer.sampleRate / 2;
const freqStep = 5000; // 5 kHz ticks
for (let f = 0; f <= maxFreq; f += freqStep) {
const y = canvas.height - axisMargin.bottom - (f / maxFreq) * numBins;
if (y < axisMargin.top) break;
ctx.textAlign = 'right';
ctx.fillText((f / 1000).toFixed(1), axisMargin.left - 10, y);
ctx.fillRect(axisMargin.left - 5, y, 5, 1);
}
}
return canvas;
}
Apply Changes