import { AutoProcessor, AutoModelForImageTextToText, RawImage, TextStreamer, } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.2'; let processor = null; let model = null; let currentVideo = null; let frameDescriptions = []; // Check WebGPU support async function checkWebGPUSupport() { const statusEl = document.getElementById('webgpuStatus'); if ('gpu' in navigator) { statusEl.textContent = '✅ Available'; statusEl.style.color = '#10b981'; return true; } else { statusEl.textContent = '❌ Not Available'; statusEl.style.color = '#ef4444'; document.getElementById('deviceSelect').value = 'wasm'; document.getElementById('deviceSelect').disabled = true; return false; } } // Initialize the model async function initializeModel() { const device = document.getElementById('deviceSelect').value; updateStatus('Loading AI model...'); try { const model_id = "onnx-community/FastVLM-0.5B-ONNX"; processor = await AutoProcessor.from_pretrained(model_id); const modelOptions = { dtype: { embed_tokens: "fp16", vision_encoder: "q4", decoder_model_merged: "q4", } }; if (device === 'webgpu') { modelOptions.device = 'webgpu'; } model = await AutoModelForImageTextToText.from_pretrained(model_id, modelOptions); updateStatus('Model loaded successfully!'); return true; } catch (error) { console.error('Model initialization error:', error); showError('Failed to load AI model. Please try again.'); return false; } } // Extract frames from video async function extractFramesFromVideo(videoFile, numFrames = 4) { return new Promise((resolve, reject) => { const video = document.createElement('video'); const canvas = document.createElement('canvas'); const ctx = canvas.getContext('2d'); video.src = URL.createObjectURL(videoFile); video.addEventListener('loadedmetadata', async () => { const duration = video.duration; const frameInterval = duration / numFrames; const frames = []; const frameTimes = []; canvas.width = Math.min(video.videoWidth, 1024); canvas.height = Math.min(video.videoHeight, 1024); for (let i = 0; i < numFrames; i++) { const currentTime = i * frameInterval; video.currentTime = currentTime; frameTimes.push(currentTime); await new Promise(r => { video.addEventListener('seeked', () => r(), { once: true }); }); ctx.drawImage(video, 0, 0, canvas.width, canvas.height); const blob = await new Promise(r => canvas.toBlob(r, 'image/png')); const image = await RawImage.fromBlob(blob); // Store frame preview const previewUrl = canvas.toDataURL('image/jpeg', 0.8); frames.push({ image, preview: previewUrl, time: currentTime }); updateProgress((i + 1) / numFrames * 30, `Extracting frame ${i + 1}/${numFrames}`); } URL.revokeObjectURL(video.src); resolve(frames); }); video.addEventListener('error', () => { URL.revokeObjectURL(video.src); reject(new Error('Failed to load video')); }); }); } // Process video async function processVideo() { const analyzeBtn = document.getElementById('analyzeBtn'); const progressSection = document.getElementById('progressSection'); const resultsSection = document.getElementById('resultsSection'); analyzeBtn.disabled = true; analyzeBtn.querySelector('.spinner').classList.remove('hidden'); analyzeBtn.querySelector('.btn-text').textContent = 'Processing...'; progressSection.classList.remove('hidden'); resultsSection.classList.add('hidden'); frameDescriptions = []; try { // Initialize model if not already loaded if (!model || !processor) { if (!await initializeModel()) { throw new Error('Model initialization failed'); } } // Extract frames const numFrames = parseInt(document.getElementById('frameCount').value); updateProgress(0, 'Extracting frames from video...'); const frames = await extractFramesFromVideo(currentVideo, numFrames); // Display frame previews const framesGrid = document.getElementById('framesGrid'); framesGrid.innerHTML = ''; // Process each frame for (let i = 0; i < frames.length; i++) { updateProgress(30 + (i / frames.length * 50), `Analyzing frame ${i + 1}/${frames.length}`); // Create frame card const frameCard = document.createElement('div'); frameCard.className = 'frame-card'; frameCard.innerHTML = ` Frame ${i + 1}

Frame ${i + 1}

${formatTime(frames[i].time)}
Analyzing...
`; framesGrid.appendChild(frameCard); // Prepare prompt const messages = [ { role: "user", content: `Describe what's happening in this frame of the video in detail.`, }, ]; const prompt = processor.apply_chat_template(messages, { add_generation_prompt: true, }); // Prepare inputs const inputs = await processor(frames[i].image, prompt, { add_special_tokens: false, }); // Generate output let generatedText = ''; try { const outputs = await model.generate({ ...inputs, max_new_tokens: 256, do_sample: false, streamer: new TextStreamer(processor.tokenizer, { skip_prompt: true, skip_special_tokens: false, callback_function: (text) => { generatedText += text; frameCard.querySelector('.frame-description').innerHTML = `

${generatedText}

`; }, }), }); // Decode output const decoded = processor.batch_decode( outputs.slice(null, [inputs.input_ids.dims.at(-1), null]), { skip_special_tokens: true }, ); frameDescriptions.push({ frame: i + 1, time: frames[i].time, description: decoded[0] || generatedText }); } catch (frameError) { console.error(`Error processing frame ${i + 1}:`, frameError); frameDescriptions.push({ frame: i + 1, time: frames[i].time, description: 'Failed to analyze this frame' }); frameCard.querySelector('.frame-description').innerHTML = `

Failed to analyze this frame

`; } } // Generate overall summary updateProgress(80, 'Generating video summary...'); const summaryCard = document.getElementById('summaryCard'); const summaryContent = document.getElementById('summaryContent'); // Create a summary based on the frame descriptions if (frameDescriptions.length > 0) { const summaryMessages = [ { role: "user", content: `Based on what you see in this video frame and knowing that the video contains the following sequence: ${frameDescriptions.map(f => `Frame ${f.frame}: ${f.description}`).join('; ')}. Provide a comprehensive summary of what the entire video is about.`, }, ]; const summaryPrompt = processor.apply_chat_template(summaryMessages, { add_generation_prompt: true, }); // Use the last frame's image for context const summaryInputs = await processor(frames[frames.length - 1].image, summaryPrompt, { add_special_tokens: false, }); let summaryText = ''; const summaryOutputs = await model.generate({ ...summaryInputs, max_new_tokens: 512, do_sample: false, streamer: new TextStreamer(processor.tokenizer, { skip_prompt: true, skip_special_tokens: false, callback_function: (text) => { summaryText += text; summaryContent.innerHTML = `

${summaryText}

`; summaryCard.classList.remove('hidden'); }, }), }); } updateProgress(100, 'Analysis complete!'); // Show results resultsSection.classList.remove('hidden'); progressSection.classList.add('hidden'); } catch (error) { console.error('Processing error:', error); showError(`Failed to process video: ${error.message}`); } finally { analyzeBtn.disabled = false; analyzeBtn.querySelector('.spinner').classList.add('hidden'); analyzeBtn.querySelector('.btn-text').textContent = 'Analyze Video'; } } // Utility functions function formatTime(seconds) { const mins = Math.floor(seconds / 60); const secs = Math.floor(seconds % 60); return `${mins}:${secs.toString().padStart(2, '0')}`; } function updateProgress(percent, status) { document.getElementById('progressFill').style.width = `${percent}%`; document.getElementById('progressText').textContent = `${Math.round(percent)}%`; document.getElementById('currentStatus').textContent = status; } function updateStatus(message) { document.getElementById('currentStatus').textContent = message; } function showError(message) { document.getElementById('errorMessage').textContent = message; document.getElementById('errorSection').classList.remove('hidden'); document.getElementById('progressSection').classList.add('hidden'); } function downloadResults() { const results = { timestamp: new Date().toISOString(), video: currentVideo.name, frames: frameDescriptions, summary: document.getElementById('summaryContent').textContent }; const blob = new Blob([JSON.stringify(results, null, 2)], { type: 'application/json' }); const url = URL.createObjectURL(blob); const a = document.createElement('a'); a.href = url; a.download = `video-analysis-${Date.now()}.json`; a.click(); URL.revokeObjectURL(url); } function resetApp() { document.getElementById('videoInput').value = ''; document.getElementById('videoInfo').innerHTML = ''; document.getElementById('videoPreview').classList.add('hidden'); document.getElementById('analyzeBtn').classList.add('hidden'); document.getElementById('progressSection').classList.add('hidden'); document.getElementById('resultsSection').classList.add('hidden'); document.getElementById('errorSection').classList.add('hidden'); currentVideo = null; frameDescriptions = []; } // Event listeners document.getElementById('videoInput').addEventListener('change', (e) => { const file = e.target.files[0]; if (file && file.type.startsWith('video/')) { currentVideo = file; // Display video info const videoInfo = document.getElementById('videoInfo'); videoInfo.innerHTML = `
${file.name} ${(file.size / 1024 / 1024).toFixed(2)} MB
`; // Show video preview const videoPreview = document.getElementById('videoPreview'); const videoElement = document.getElementById('videoElement'); videoElement.src = URL.createObjectURL(file); videoPreview.classList.remove('hidden'); // Show analyze button document.getElementById('analyzeBtn').classList.remove('hidden'); document.getElementById('analyzeBtn').disabled = false; } }); document.getElementById('analyzeBtn').addEventListener('click', processVideo); // Initialize checkWebGPUSupport();