Spaces:

akhaliq
/

vid-transcript

Running

App Files Files Community

vid-transcript / index.js

akhaliq's picture

akhaliq HF Staff

Update index.js

796efe2 verified 5 months ago

history blame contribute delete

13.4 kB

	import {
	AutoProcessor,
	AutoModelForImageTextToText,
	RawImage,
	TextStreamer,
	} from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.2';

	let processor = null;
	let model = null;
	let currentVideo = null;
	let frameDescriptions = [];

	// Check WebGPU support
	async function checkWebGPUSupport() {
	const statusEl = document.getElementById('webgpuStatus');
	if ('gpu' in navigator) {
	statusEl.textContent = '✅ Available';
	statusEl.style.color = '#10b981';
	return true;
	} else {
	statusEl.textContent = '❌ Not Available';
	statusEl.style.color = '#ef4444';
	document.getElementById('deviceSelect').value = 'wasm';
	document.getElementById('deviceSelect').disabled = true;
	return false;
	}
	}

	// Initialize the model
	async function initializeModel() {
	const device = document.getElementById('deviceSelect').value;
	updateStatus('Loading AI model...');

	try {
	const model_id = "onnx-community/FastVLM-0.5B-ONNX";

	processor = await AutoProcessor.from_pretrained(model_id);

	const modelOptions = {
	dtype: {
	embed_tokens: "fp16",
	vision_encoder: "q4",
	decoder_model_merged: "q4",
	}
	};

	if (device === 'webgpu') {
	modelOptions.device = 'webgpu';
	}

	model = await AutoModelForImageTextToText.from_pretrained(model_id, modelOptions);

	updateStatus('Model loaded successfully!');
	return true;
	} catch (error) {
	console.error('Model initialization error:', error);
	showError('Failed to load AI model. Please try again.');
	return false;
	}
	}

	// Extract frames from video
	async function extractFramesFromVideo(videoFile, numFrames = 4) {
	return new Promise((resolve, reject) => {
	const video = document.createElement('video');
	const canvas = document.createElement('canvas');
	const ctx = canvas.getContext('2d');

	video.src = URL.createObjectURL(videoFile);

	video.addEventListener('loadedmetadata', async () => {
	const duration = video.duration;
	const frameInterval = duration / numFrames;
	const frames = [];
	const frameTimes = [];

	canvas.width = Math.min(video.videoWidth, 1024);
	canvas.height = Math.min(video.videoHeight, 1024);

	for (let i = 0; i < numFrames; i++) {
	const currentTime = i * frameInterval;
	video.currentTime = currentTime;
	frameTimes.push(currentTime);

	await new Promise(r => {
	video.addEventListener('seeked', () => r(), { once: true });
	});

	ctx.drawImage(video, 0, 0, canvas.width, canvas.height);

	const blob = await new Promise(r => canvas.toBlob(r, 'image/png'));
	const image = await RawImage.fromBlob(blob);

	// Store frame preview
	const previewUrl = canvas.toDataURL('image/jpeg', 0.8);
	frames.push({ image, preview: previewUrl, time: currentTime });

	updateProgress((i + 1) / numFrames * 30, `Extracting frame ${i + 1}/${numFrames}`);
	}

	URL.revokeObjectURL(video.src);
	resolve(frames);
	});

	video.addEventListener('error', () => {
	URL.revokeObjectURL(video.src);
	reject(new Error('Failed to load video'));
	});
	});
	}

	// Process video
	async function processVideo() {
	const analyzeBtn = document.getElementById('analyzeBtn');
	const progressSection = document.getElementById('progressSection');
	const resultsSection = document.getElementById('resultsSection');

	analyzeBtn.disabled = true;
	analyzeBtn.querySelector('.spinner').classList.remove('hidden');
	analyzeBtn.querySelector('.btn-text').textContent = 'Processing...';

	progressSection.classList.remove('hidden');
	resultsSection.classList.add('hidden');
	frameDescriptions = [];

	try {
	// Initialize model if not already loaded
	if (!model \|\| !processor) {
	if (!await initializeModel()) {
	throw new Error('Model initialization failed');
	}
	}

	// Extract frames
	const numFrames = parseInt(document.getElementById('frameCount').value);
	updateProgress(0, 'Extracting frames from video...');
	const frames = await extractFramesFromVideo(currentVideo, numFrames);

	// Display frame previews
	const framesGrid = document.getElementById('framesGrid');
	framesGrid.innerHTML = '';

	// Process each frame
	for (let i = 0; i < frames.length; i++) {
	updateProgress(30 + (i / frames.length * 50), `Analyzing frame ${i + 1}/${frames.length}`);

	// Create frame card
	const frameCard = document.createElement('div');
	frameCard.className = 'frame-card';
	frameCard.innerHTML = `
	<img src="${frames[i].preview}" alt="Frame ${i + 1}">
	<div class="frame-info">
	<h4>Frame ${i + 1}</h4>
	<span class="frame-time">${formatTime(frames[i].time)}</span>
	</div>
	<div class="frame-description">
	<div class="loading-dots">Analyzing...</div>
	</div>
	`;
	framesGrid.appendChild(frameCard);

	// Prepare prompt
	const messages = [
	{
	role: "user",
	content: `<image>Describe what's happening in this frame of the video in detail.`,
	},
	];
	const prompt = processor.apply_chat_template(messages, {
	add_generation_prompt: true,
	});

	// Prepare inputs
	const inputs = await processor(frames[i].image, prompt, {
	add_special_tokens: false,
	});

	// Generate output
	let generatedText = '';
	try {
	const outputs = await model.generate({
	...inputs,
	max_new_tokens: 256,
	do_sample: false,
	streamer: new TextStreamer(processor.tokenizer, {
	skip_prompt: true,
	skip_special_tokens: false,
	callback_function: (text) => {
	generatedText += text;
	frameCard.querySelector('.frame-description').innerHTML = `<p>${generatedText}</p>`;
	},
	}),
	});

	// Decode output
	const decoded = processor.batch_decode(
	outputs.slice(null, [inputs.input_ids.dims.at(-1), null]),
	{ skip_special_tokens: true },
	);

	frameDescriptions.push({
	frame: i + 1,
	time: frames[i].time,
	description: decoded[0] \|\| generatedText
	});
	} catch (frameError) {
	console.error(`Error processing frame ${i + 1}:`, frameError);
	frameDescriptions.push({
	frame: i + 1,
	time: frames[i].time,
	description: 'Failed to analyze this frame'
	});
	frameCard.querySelector('.frame-description').innerHTML = `<p style="color: #ef4444;">Failed to analyze this frame</p>`;
	}
	}

	// Generate overall summary
	updateProgress(80, 'Generating video summary...');
	const summaryCard = document.getElementById('summaryCard');
	const summaryContent = document.getElementById('summaryContent');

	// Create a summary based on the frame descriptions
	if (frameDescriptions.length > 0) {
	const summaryMessages = [
	{
	role: "user",
	content: `<image>Based on what you see in this video frame and knowing that the video contains the following sequence: ${frameDescriptions.map(f => `Frame ${f.frame}: ${f.description}`).join('; ')}. Provide a comprehensive summary of what the entire video is about.`,
	},
	];
	const summaryPrompt = processor.apply_chat_template(summaryMessages, {
	add_generation_prompt: true,
	});

	// Use the last frame's image for context
	const summaryInputs = await processor(frames[frames.length - 1].image, summaryPrompt, {
	add_special_tokens: false,
	});

	let summaryText = '';
	const summaryOutputs = await model.generate({
	...summaryInputs,
	max_new_tokens: 512,
	do_sample: false,
	streamer: new TextStreamer(processor.tokenizer, {
	skip_prompt: true,
	skip_special_tokens: false,
	callback_function: (text) => {
	summaryText += text;
	summaryContent.innerHTML = `<p>${summaryText}</p>`;
	summaryCard.classList.remove('hidden');
	},
	}),
	});
	}

	updateProgress(100, 'Analysis complete!');

	// Show results
	resultsSection.classList.remove('hidden');
	progressSection.classList.add('hidden');

	} catch (error) {
	console.error('Processing error:', error);
	showError(`Failed to process video: ${error.message}`);
	} finally {
	analyzeBtn.disabled = false;
	analyzeBtn.querySelector('.spinner').classList.add('hidden');
	analyzeBtn.querySelector('.btn-text').textContent = 'Analyze Video';
	}
	}

	// Utility functions
	function formatTime(seconds) {
	const mins = Math.floor(seconds / 60);
	const secs = Math.floor(seconds % 60);
	return `${mins}:${secs.toString().padStart(2, '0')}`;
	}

	function updateProgress(percent, status) {
	document.getElementById('progressFill').style.width = `${percent}%`;
	document.getElementById('progressText').textContent = `${Math.round(percent)}%`;
	document.getElementById('currentStatus').textContent = status;
	}

	function updateStatus(message) {
	document.getElementById('currentStatus').textContent = message;
	}

	function showError(message) {
	document.getElementById('errorMessage').textContent = message;
	document.getElementById('errorSection').classList.remove('hidden');
	document.getElementById('progressSection').classList.add('hidden');
	}

	function downloadResults() {
	const results = {
	timestamp: new Date().toISOString(),
	video: currentVideo.name,
	frames: frameDescriptions,
	summary: document.getElementById('summaryContent').textContent
	};

	const blob = new Blob([JSON.stringify(results, null, 2)], { type: 'application/json' });
	const url = URL.createObjectURL(blob);
	const a = document.createElement('a');
	a.href = url;
	a.download = `video-analysis-${Date.now()}.json`;
	a.click();
	URL.revokeObjectURL(url);
	}

	function resetApp() {
	document.getElementById('videoInput').value = '';
	document.getElementById('videoInfo').innerHTML = '';
	document.getElementById('videoPreview').classList.add('hidden');
	document.getElementById('analyzeBtn').classList.add('hidden');
	document.getElementById('progressSection').classList.add('hidden');
	document.getElementById('resultsSection').classList.add('hidden');
	document.getElementById('errorSection').classList.add('hidden');
	currentVideo = null;
	frameDescriptions = [];
	}

	// Event listeners
	document.getElementById('videoInput').addEventListener('change', (e) => {
	const file = e.target.files[0];
	if (file && file.type.startsWith('video/')) {
	currentVideo = file;

	// Display video info
	const videoInfo = document.getElementById('videoInfo');
	videoInfo.innerHTML = `
	<div class="file-info">
	<span class="file-name">${file.name}</span>
	<span class="file-size">${(file.size / 1024 / 1024).toFixed(2)} MB</span>
	</div>
	`;

	// Show video preview
	const videoPreview = document.getElementById('videoPreview');
	const videoElement = document.getElementById('videoElement');
	videoElement.src = URL.createObjectURL(file);
	videoPreview.classList.remove('hidden');

	// Show analyze button
	document.getElementById('analyzeBtn').classList.remove('hidden');
	document.getElementById('analyzeBtn').disabled = false;
	}
	});

	document.getElementById('analyzeBtn').addEventListener('click', processVideo);

	// Initialize
	checkWebGPUSupport();