Please bookmark this page to avoid losing your image tool!

Image Verb Identification Tool

(Free & Supports Bulk Upload)

The result will appear here...

JavaScript Code (For Advanced Users)

You can edit the below JavaScript code to customize the image tool.

/**
 * Identifies human poses in an image and attempts to label the primary action (verb).
 * This function uses TensorFlow.js and the MoveNet model for pose detection.
 * It dynamically loads the necessary scripts from a CDN.
 *
 * @param {HTMLImageElement} originalImg The original image element to process.
 * @param {number} confidenceThreshold The minimum confidence score (0.0 to 1.0) for a pose to be considered.
 * @param {string} accentColor A CSS color string for drawing the skeleton and text.
 * @returns {Promise<HTMLCanvasElement>} A promise that resolves with a new canvas element showing the image with identified actions.
 */
async function processImage(originalImg, confidenceThreshold = 0.4, accentColor = '#00FFFF') {

    /**
     * Dynamically loads a script and returns a promise that resolves when it's loaded.
     * @param {string} url The URL of the script to load.
     * @returns {Promise<void>}
     */
    const loadScript = (url) => {
        return new Promise((resolve, reject) => {
            // Check if script is already present
            if (document.querySelector(`script[src="${url}"]`)) {
                // Wait for the library's global object to be available
                const checkInterval = setInterval(() => {
                    if (window.tf && window.poseDetection) {
                         clearInterval(checkInterval);
                         resolve();
                    }
                }, 100);
                return;
            }
            const script = document.createElement('script');
            script.src = url;
            script.onload = resolve;
            script.onerror = reject;
            document.head.appendChild(script);
        });
    };

    /**
     * Loads the required TensorFlow.js and pose-detection models.
     */
    const loadDependencies = async () => {
         await Promise.all([
             loadScript('https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-core@4.17.0/dist/tf-core.min.js'),
             loadScript('https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-converter@4.17.0/dist/tf-converter.min.js'),
             loadScript('https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-backend-webgl@4.17.0/dist/tf-backend-webgl.min.js'),
             loadScript('https://cdn.jsdelivr.net/npm/@tensorflow-models/pose-detection@2.1.3/dist/pose-detection.min.js')
         ]);
        await tf.setBackend('webgl');
        await tf.ready();
    };

    /**
     * Infers a verb based on the relative positions of body keypoints.
     * @param {Object.<string, {x: number, y: number, score: number}>} keypointsMap A map of keypoint names to their data.
     * @returns {string|null} The inferred verb string or null if no specific action is detected.
     */
    const inferVerb = (keypointsMap) => {
        const get = (name) => keypointsMap[name] || {
            x: 0,
            y: 0,
            score: 0
        };

        // Rule for "Waving"
        const leftWrist = get('left_wrist');
        const leftShoulder = get('left_shoulder');
        const rightWrist = get('right_wrist');
        const rightShoulder = get('right_shoulder');
        const nose = get('nose');

        if ((leftWrist.score > confidenceThreshold && leftShoulder.score > confidenceThreshold && leftWrist.y < leftShoulder.y) ||
            (rightWrist.score > confidenceThreshold && rightShoulder.score > confidenceThreshold && rightWrist.y < rightShoulder.y)) {
            return 'Waving';
        }

        // Rule for "Reaching" or "Stretching"
        if (leftWrist.score > confidenceThreshold && rightWrist.score > confidenceThreshold &&
            leftWrist.y < nose.y && rightWrist.y < nose.y) {
            return 'Reaching';
        }

        // Rule for "Kicking"
        const leftAnkle = get('left_ankle');
        const leftHip = get('left_hip');
        const rightAnkle = get('right_ankle');
        const rightHip = get('right_hip');

        if ((leftAnkle.score > confidenceThreshold && leftHip.score > confidenceThreshold && leftAnkle.y < leftHip.y) ||
            (rightAnkle.score > confidenceThreshold && rightHip.score > confidenceThreshold && rightAnkle.y < rightHip.y)) {
            return 'Kicking';
        }

        // Rule for "Squatting" or "Sitting"
        const leftKnee = get('left_knee');
        const rightKnee = get('right_knee');
        if (leftHip.score > confidenceThreshold && leftKnee.score > confidenceThreshold &&
            Math.abs(leftHip.y - leftKnee.y) < (leftShoulder.y - leftHip.y) * 0.3) {
            return 'Squatting';
        }
        if (rightHip.score > confidenceThreshold && rightKnee.score > confidenceThreshold &&
            Math.abs(rightHip.y - rightKnee.y) < (rightShoulder.y - rightHip.y) * 0.3) {
            return 'Squatting';
        }
        

        return null;
    };


    // Main execution starts here
    await loadDependencies();

    const canvas = document.createElement('canvas');
    const ctx = canvas.getContext('2d');
    canvas.width = originalImg.naturalWidth;
    canvas.height = originalImg.naturalHeight;
    ctx.drawImage(originalImg, 0, 0);

    const detectorConfig = {
        modelType: poseDetection.movenet.modelType.SINGLEPOSE_LIGHTNING
    };
    const detector = await poseDetection.createDetector(poseDetection.SupportedModels.MoveNet, detectorConfig);

    const poses = await detector.estimatePoses(originalImg);
    detector.dispose();

    poses.forEach(pose => {
        if (pose.score >= confidenceThreshold) {
            // Convert keypoints array to a more accessible map
            const keypointsMap = pose.keypoints.reduce((acc, point) => {
                acc[point.name] = point;
                return acc;
            }, {});

            // Infer and draw the verb
            const verb = inferVerb(keypointsMap);
            if (verb) {
                const nose = keypointsMap['nose'];
                if (nose && nose.score > confidenceThreshold) {
                    ctx.font = `bold ${Math.max(24, Math.min(canvas.width, canvas.height) / 20)}px 'Arial'`;
                    ctx.fillStyle = accentColor;
                    ctx.textAlign = 'center';
                    ctx.strokeStyle = 'black';
                    ctx.lineWidth = 4;
                    const textX = nose.x;
                    const textY = Math.max(0, nose.y - 30);
                    ctx.strokeText(verb.toUpperCase(), textX, textY);
                    ctx.fillText(verb.toUpperCase(), textX, textY);
                }
            }

            // Draw the skeleton
            const adjacentKeyPoints = poseDetection.util.getAdjacentKeyPoints(pose.keypoints, confidenceThreshold);
            ctx.lineWidth = 3;
            ctx.strokeStyle = accentColor;
            
            adjacentKeyPoints.forEach((points) => {
                ctx.beginPath();
                ctx.moveTo(points[0].x, points[0].y);
                ctx.lineTo(points[1].x, points[1].y);
                ctx.stroke();
            });
        }
    });

    return canvas;
}

Free Image Tool Creator

Can't find the image tool you're looking for?
Create one based on your own needs now!

Create a custom image tool

Description

The Image Verb Identification Tool analyzes human poses in images to identify and label primary actions or verbs based on the detected poses. Utilizing advanced pose detection technology, the tool can assess various activities such as waving, reaching, kicking, and squatting. This tool is invaluable for a variety of applications, including sports analysis, dance choreography, physical therapy assessments, and interactive media development, where understanding the actions of individuals within images is essential.

JavaScript Code (For Advanced Users)

Free Image Tool Creator

Description

Leave a Reply Cancel reply

Other Image Tools: