import {
AutoProcessor,
AutoModelForImageTextToText,
RawImage,
TextStreamer,
} from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.2';
let processor = null;
let model = null;
let currentVideo = null;
let frameDescriptions = [];
// Check WebGPU support
async function checkWebGPUSupport() {
const statusEl = document.getElementById('webgpuStatus');
if ('gpu' in navigator) {
statusEl.textContent = '✅ Available';
statusEl.style.color = '#10b981';
return true;
} else {
statusEl.textContent = '❌ Not Available';
statusEl.style.color = '#ef4444';
document.getElementById('deviceSelect').value = 'wasm';
document.getElementById('deviceSelect').disabled = true;
return false;
}
}
// Initialize the model
async function initializeModel() {
const device = document.getElementById('deviceSelect').value;
updateStatus('Loading AI model...');
try {
const model_id = "onnx-community/FastVLM-0.5B-ONNX";
processor = await AutoProcessor.from_pretrained(model_id);
const modelOptions = {
dtype: {
embed_tokens: "fp16",
vision_encoder: "q4",
decoder_model_merged: "q4",
}
};
if (device === 'webgpu') {
modelOptions.device = 'webgpu';
}
model = await AutoModelForImageTextToText.from_pretrained(model_id, modelOptions);
updateStatus('Model loaded successfully!');
return true;
} catch (error) {
console.error('Model initialization error:', error);
showError('Failed to load AI model. Please try again.');
return false;
}
}
// Extract frames from video
async function extractFramesFromVideo(videoFile, numFrames = 4) {
return new Promise((resolve, reject) => {
const video = document.createElement('video');
const canvas = document.createElement('canvas');
const ctx = canvas.getContext('2d');
video.src = URL.createObjectURL(videoFile);
video.addEventListener('loadedmetadata', async () => {
const duration = video.duration;
const frameInterval = duration / numFrames;
const frames = [];
const frameTimes = [];
canvas.width = Math.min(video.videoWidth, 1024);
canvas.height = Math.min(video.videoHeight, 1024);
for (let i = 0; i < numFrames; i++) {
const currentTime = i * frameInterval;
video.currentTime = currentTime;
frameTimes.push(currentTime);
await new Promise(r => {
video.addEventListener('seeked', () => r(), { once: true });
});
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
const blob = await new Promise(r => canvas.toBlob(r, 'image/png'));
const image = await RawImage.fromBlob(blob);
// Store frame preview
const previewUrl = canvas.toDataURL('image/jpeg', 0.8);
frames.push({ image, preview: previewUrl, time: currentTime });
updateProgress((i + 1) / numFrames * 30, `Extracting frame ${i + 1}/${numFrames}`);
}
URL.revokeObjectURL(video.src);
resolve(frames);
});
video.addEventListener('error', () => {
URL.revokeObjectURL(video.src);
reject(new Error('Failed to load video'));
});
});
}
// Process video
async function processVideo() {
const analyzeBtn = document.getElementById('analyzeBtn');
const progressSection = document.getElementById('progressSection');
const resultsSection = document.getElementById('resultsSection');
analyzeBtn.disabled = true;
analyzeBtn.querySelector('.spinner').classList.remove('hidden');
analyzeBtn.querySelector('.btn-text').textContent = 'Processing...';
progressSection.classList.remove('hidden');
resultsSection.classList.add('hidden');
frameDescriptions = [];
try {
// Initialize model if not already loaded
if (!model || !processor) {
if (!await initializeModel()) {
throw new Error('Model initialization failed');
}
}
// Extract frames
const numFrames = parseInt(document.getElementById('frameCount').value);
updateProgress(0, 'Extracting frames from video...');
const frames = await extractFramesFromVideo(currentVideo, numFrames);
// Display frame previews
const framesGrid = document.getElementById('framesGrid');
framesGrid.innerHTML = '';
// Process each frame
for (let i = 0; i < frames.length; i++) {
updateProgress(30 + (i / frames.length * 50), `Analyzing frame ${i + 1}/${frames.length}`);
// Create frame card
const frameCard = document.createElement('div');
frameCard.className = 'frame-card';
frameCard.innerHTML = `
${generatedText}
`; }, }), }); // Decode output const decoded = processor.batch_decode( outputs.slice(null, [inputs.input_ids.dims.at(-1), null]), { skip_special_tokens: true }, ); frameDescriptions.push({ frame: i + 1, time: frames[i].time, description: decoded[0] || generatedText }); } catch (frameError) { console.error(`Error processing frame ${i + 1}:`, frameError); frameDescriptions.push({ frame: i + 1, time: frames[i].time, description: 'Failed to analyze this frame' }); frameCard.querySelector('.frame-description').innerHTML = `Failed to analyze this frame
`; } } // Generate overall summary updateProgress(80, 'Generating video summary...'); const summaryCard = document.getElementById('summaryCard'); const summaryContent = document.getElementById('summaryContent'); // Create a summary based on the frame descriptions if (frameDescriptions.length > 0) { const summaryMessages = [ { role: "user", content: `${summaryText}
`; summaryCard.classList.remove('hidden'); }, }), }); } updateProgress(100, 'Analysis complete!'); // Show results resultsSection.classList.remove('hidden'); progressSection.classList.add('hidden'); } catch (error) { console.error('Processing error:', error); showError(`Failed to process video: ${error.message}`); } finally { analyzeBtn.disabled = false; analyzeBtn.querySelector('.spinner').classList.add('hidden'); analyzeBtn.querySelector('.btn-text').textContent = 'Analyze Video'; } } // Utility functions function formatTime(seconds) { const mins = Math.floor(seconds / 60); const secs = Math.floor(seconds % 60); return `${mins}:${secs.toString().padStart(2, '0')}`; } function updateProgress(percent, status) { document.getElementById('progressFill').style.width = `${percent}%`; document.getElementById('progressText').textContent = `${Math.round(percent)}%`; document.getElementById('currentStatus').textContent = status; } function updateStatus(message) { document.getElementById('currentStatus').textContent = message; } function showError(message) { document.getElementById('errorMessage').textContent = message; document.getElementById('errorSection').classList.remove('hidden'); document.getElementById('progressSection').classList.add('hidden'); } function downloadResults() { const results = { timestamp: new Date().toISOString(), video: currentVideo.name, frames: frameDescriptions, summary: document.getElementById('summaryContent').textContent }; const blob = new Blob([JSON.stringify(results, null, 2)], { type: 'application/json' }); const url = URL.createObjectURL(blob); const a = document.createElement('a'); a.href = url; a.download = `video-analysis-${Date.now()}.json`; a.click(); URL.revokeObjectURL(url); } function resetApp() { document.getElementById('videoInput').value = ''; document.getElementById('videoInfo').innerHTML = ''; document.getElementById('videoPreview').classList.add('hidden'); document.getElementById('analyzeBtn').classList.add('hidden'); document.getElementById('progressSection').classList.add('hidden'); document.getElementById('resultsSection').classList.add('hidden'); document.getElementById('errorSection').classList.add('hidden'); currentVideo = null; frameDescriptions = []; } // Event listeners document.getElementById('videoInput').addEventListener('change', (e) => { const file = e.target.files[0]; if (file && file.type.startsWith('video/')) { currentVideo = file; // Display video info const videoInfo = document.getElementById('videoInfo'); videoInfo.innerHTML = `