tylermullen commited on
Commit
1afc395
·
verified ·
1 Parent(s): 41ace04

Upload 2 files

Browse files
Files changed (2) hide show
  1. Notice +1 -0
  2. index.js +385 -0
Notice ADDED
@@ -0,0 +1 @@
 
 
1
+ Gemma is provided under and subject to the Gemma Terms of Use found at ai.google.dev/gemma/terms
index.js ADDED
@@ -0,0 +1,385 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright 2025 The MediaPipe Authors.
2
+
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+
15
+ // -------------------------------------------------------------------------- //
16
+
17
+ import {oauthLoginUrl, oauthHandleRedirectIfPresent} from "@huggingface/hub";
18
+ import {FilesetResolver, LlmInference} from '@mediapipe/tasks-genai';
19
+
20
+ // --- DOM Element References ---
21
+ const webcamElement = document.getElementById('webcam');
22
+ const statusMessageElement = document.getElementById(
23
+ 'status-message',
24
+ );
25
+ const responseContainer = document.getElementById(
26
+ 'response-container',
27
+ );
28
+ const promptInputElement = document.getElementById(
29
+ 'prompt-input',
30
+ );
31
+ const recordButton = document.getElementById(
32
+ 'record-button',
33
+ );
34
+ const sendButton = document.getElementById('send-button');
35
+ const recordButtonIcon = recordButton.querySelector('i');
36
+ const loaderOverlay = document.getElementById('loader-overlay');
37
+ const progressBarFill = document.getElementById('progress-bar-fill');
38
+ const signInMessage = document.getElementById('sign-in-message');
39
+ const loaderMessage = document.getElementById('loader-message');
40
+
41
+ // --- State Management ---
42
+ let isRecording = false;
43
+ let isLoading = false;
44
+ let mediaRecorder = null;
45
+ let audioChunks = [];
46
+
47
+ // --- Model-specific constants ---
48
+ // If the user wants to try running on a more limited device, they can switch
49
+ // the demo from default E4B to E2B by appending '?e2b' to the URL.
50
+ const thisUrl = new URL(window.location.href);
51
+ const use_e4b = !thisUrl.searchParams.has('e2b');
52
+ const cacheFileName = use_e4b ? "3n_e4b" : "3n_e2b";
53
+ const remoteFileUrl = use_e4b ? 'https://huggingface.co/google/gemma-3n-E4B-it-litert-lm/resolve/main/gemma-3n-E4B-it-int4-Web.litertlm'
54
+ : 'https://huggingface.co/google/gemma-3n-E2B-it-litert-lm/resolve/main/gemma-3n-E2B-it-int4-Web.litertlm';
55
+ // Model size in bytes for reliable progress indication; just hard-coded for now
56
+ const modelSize = use_e4b ? 4274978816 : 3038117888;
57
+
58
+ // --- Core Functions ---
59
+
60
+ /**
61
+ * Updates the progress bar's width.
62
+ * @param {number} percentage The progress percentage (0-100).
63
+ */
64
+ function updateProgressBar(percentage) {
65
+ if (progressBarFill) {
66
+ progressBarFill.style.width = `${percentage}%`;
67
+ }
68
+ }
69
+
70
+ /**
71
+ * Initializes our local LLM from a StreamReader.
72
+ */
73
+ let llmInference;
74
+ async function initLlm(modelReader) {
75
+ console.log('Initializing LLM');
76
+ loaderMessage.textContent = "Initializing model...";
77
+
78
+ // We have no actual progress updates for this last initialization step, but
79
+ // it's relatively short (<10s on a decent laptop). So we just set to 90%.
80
+ // TODO: It'd look nicer to have this go from 0 to 100% instead.
81
+ updateProgressBar(90);
82
+ const genaiFileset = await FilesetResolver.forGenAiTasks(
83
+ 'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai/wasm');
84
+ try {
85
+ llmInference = await LlmInference.createFromOptions(genaiFileset, {
86
+ baseOptions: {modelAssetBuffer: modelReader},
87
+ maxTokens: 2048,
88
+ maxNumImages: 1,
89
+ supportAudio: true,
90
+ });
91
+
92
+ // Enable demo now that loading has fully finished.
93
+ loaderOverlay.style.opacity = '0';
94
+ setTimeout(() => {
95
+ loaderOverlay.style.display = 'none';
96
+ promptInputElement.disabled = false;
97
+ sendButton.disabled = false;
98
+ recordButton.disabled = false;
99
+ }, 300);
100
+ } catch (error) {
101
+ console.error('Failed to initialize the LLM', error);
102
+ loaderOverlay.style.display = 'none'; // Hide loader on error
103
+ }
104
+ }
105
+
106
+ /**
107
+ * Replaces our demo with a sign-in button for HuggingFace.
108
+ */
109
+ function requireSignIn() {
110
+ document.getElementById('loader-overlay').style = "display:none";
111
+ document.getElementById('main-container').style = "display:none";
112
+ document.getElementById("signin").style.removeProperty("display");
113
+ document.getElementById('sign-in-message').style.removeProperty("display");
114
+ document.getElementById("signin").onclick = async function() {
115
+ // prompt=consent to re-trigger the consent screen instead of silently redirecting
116
+ window.location.href = (await oauthLoginUrl({scopes: window.huggingface.variables.OAUTH_SCOPES})) + "&prompt=consent";
117
+ };
118
+ // clear old oauth, if any
119
+ localStorage.removeItem("oauth");
120
+ }
121
+
122
+ /**
123
+ * Utility function to show progress while we load from remote file into local
124
+ * cache.
125
+ */
126
+ async function pipeStreamAndReportProgress(readableStream, writableStream) {
127
+ // Effectively "await responseStream.pipeTo(writeStream)", but with progress
128
+ // reporting.
129
+ const reader = readableStream.getReader();
130
+ const writer = writableStream.getWriter();
131
+ let bytesCount = 0;
132
+ let progressBarPercent = 0;
133
+ let wasAborted = false;
134
+ try {
135
+ while (true) {
136
+ const {done, value} = await reader.read();
137
+ if (done) {
138
+ break;
139
+ }
140
+ if (value) {
141
+ bytesCount += value.length;
142
+ const percentage = Math.round(bytesCount / modelSize * 90);
143
+ if (percentage > progressBarPercent) {
144
+ progressBarPercent = percentage;
145
+ updateProgressBar(progressBarPercent);
146
+ }
147
+ await writer.write(value);
148
+ }
149
+ }
150
+ } catch (error) {
151
+ console.error('Error while piping stream:', error);
152
+ // Abort the writer if there's an error
153
+ wasAborted = true;
154
+ await writer.abort(error);
155
+ throw error;
156
+ } finally {
157
+ // Release the reader lock
158
+ reader.releaseLock();
159
+ // Close the writer only if the stream wasn't aborted
160
+ if (!wasAborted) {
161
+ console.log('Closing the writer, and hence the stream');
162
+ await writer.close();
163
+ }
164
+ }
165
+ }
166
+
167
+ /**
168
+ * Loads the LLM file from either cache or OAuth-guarded remote download.
169
+ */
170
+ async function loadLlm() {
171
+ let opfs = await navigator.storage.getDirectory();
172
+ // If we can load the model from cache, then do so.
173
+ try {
174
+ const fileHandle = await opfs.getFileHandle(cacheFileName);
175
+ // Check to make sure size is as expected, and not a partially-downloaded
176
+ // or corrupt file.
177
+ console.log('Model found in cache; checking size.');
178
+ const file = await fileHandle.getFile();
179
+ console.log('File size is: ', file.size);
180
+ if (file.size !== modelSize) {
181
+ console.error('Cached model had unexpected size. Redownloading.');
182
+ throw new Error('Unexpected cached model size');
183
+ }
184
+ console.log('Model found in cache of expected size, reusing.');
185
+ const fileReader = file.stream().getReader();
186
+ await initLlm(fileReader);
187
+ } catch {
188
+ // Otherwise, we need to be download remotely, which requires oauth.
189
+ console.log('Model not found in cache: oauth and download required.');
190
+ // We first remove from cache, in case model file is corrupted/partial.
191
+ try {
192
+ await opfs.removeEntry(cacheFileName);
193
+ } catch {}
194
+ let oauthResult = localStorage.getItem("oauth");
195
+ if (oauthResult) {
196
+ try {
197
+ oauthResult = JSON.parse(oauthResult);
198
+ } catch {
199
+ oauthResult = null;
200
+ }
201
+ }
202
+ oauthResult ||= await oauthHandleRedirectIfPresent();
203
+ // If we have successful oauth from one of the methods above, download from
204
+ // remote.
205
+ if (oauthResult?.accessToken) {
206
+ localStorage.setItem("oauth", JSON.stringify(oauthResult));
207
+ const modelUrl = remoteFileUrl;
208
+ const oauthHeaders = {
209
+ "Authorization": `Bearer ${oauthResult.accessToken}`
210
+ };
211
+
212
+ const response = await fetch(modelUrl, {headers: oauthHeaders});
213
+ if (response.ok) {
214
+ const responseStream = await response.body;
215
+ // Cache locally, so we can avoid this next time.
216
+ const fileHandle =
217
+ await opfs.getFileHandle(cacheFileName, {create: true});
218
+ const writeStream = await fileHandle.createWritable();
219
+ await pipeStreamAndReportProgress(responseStream, writeStream);
220
+ console.log('Model written to cache!');
221
+ const file = await fileHandle.getFile();
222
+ const fileReader = file.stream().getReader();
223
+ await initLlm(fileReader);
224
+ } else {
225
+ console.error('Model fetch encountered error. Likely requires sign-in or Gemma license acknowledgement.');
226
+ requireSignIn();
227
+ }
228
+ } else {
229
+ // No successful oauth, so replace our demo with a HuggingFace sign-in button.
230
+ console.log('No oauth detected. Requiring sign-in.');
231
+ requireSignIn();
232
+ }
233
+ }
234
+ }
235
+
236
+ /**
237
+ * Initializes the webcam and microphone.
238
+ */
239
+ let audioUrl = undefined;
240
+ async function initMedia() {
241
+ // Disable controls on startup
242
+ promptInputElement.disabled = true;
243
+ sendButton.disabled = true;
244
+ recordButton.disabled = true;
245
+
246
+ try {
247
+ const stream = await navigator.mediaDevices.getUserMedia({
248
+ video: true,
249
+ audio: true,
250
+ });
251
+ webcamElement.srcObject = stream;
252
+ statusMessageElement.style.display = 'none';
253
+ webcamElement.style.display = 'block';
254
+
255
+ await loadLlm();
256
+
257
+ // Set up MediaRecorder for audio
258
+ mediaRecorder = new MediaRecorder(stream);
259
+ mediaRecorder.ondataavailable = (event) => {
260
+ audioChunks.push(event.data);
261
+ };
262
+ mediaRecorder.onstop = () => {
263
+ // We process the audio here, and free the previous one, if any
264
+ const blob = new Blob(audioChunks, {type: 'audio/webm'});
265
+ if (audioUrl) window.URL.revokeObjectURL(audioUrl);
266
+ audioUrl = window.URL.createObjectURL(blob);
267
+ audioChunks = [];
268
+
269
+ sendQuery({audioSource: audioUrl});
270
+ };
271
+ } catch (error) {
272
+ console.error('Error accessing media devices.', error);
273
+ audioUrl = undefined;
274
+ statusMessageElement.textContent =
275
+ 'Error: Could not access camera or microphone. Please check permissions.';
276
+ loaderOverlay.style.display = 'none'; // Hide loader on error
277
+ }
278
+ }
279
+
280
+ /**
281
+ * Toggles the audio recording state.
282
+ */
283
+ function toggleRecording() {
284
+ isRecording = !isRecording;
285
+ if (isRecording) {
286
+ if (mediaRecorder && mediaRecorder.state === 'inactive') {
287
+ mediaRecorder.start();
288
+ }
289
+ recordButton.classList.add('recording');
290
+ if (recordButtonIcon) {
291
+ recordButtonIcon.className = 'fa-solid fa-stop';
292
+ }
293
+ promptInputElement.placeholder = 'Recording... Press stop when done.';
294
+ } else {
295
+ if (mediaRecorder && mediaRecorder.state === 'recording') {
296
+ mediaRecorder.stop();
297
+ }
298
+ recordButton.classList.remove('recording');
299
+ if (recordButtonIcon) {
300
+ recordButtonIcon.className = 'fa-solid fa-microphone';
301
+ }
302
+ promptInputElement.placeholder = 'Ask a question about what you see...';
303
+ }
304
+ }
305
+
306
+ /**
307
+ * Sends a text prompt with webcam frame to the Gemma 3n model.
308
+ */
309
+ async function sendTextQuery() {
310
+ const prompt = promptInputElement.value.trim();
311
+ sendQuery(prompt);
312
+ }
313
+
314
+ /**
315
+ * Sends the user's prompt (text or audio) with webcam frame to the Gemma 3n model.
316
+ */
317
+ async function sendQuery(prompt) {
318
+ if (!prompt || isLoading) {
319
+ return;
320
+ }
321
+
322
+ setLoading(true);
323
+
324
+ try {
325
+ const query = [
326
+ '<ctrl99>user\n',
327
+ prompt, // audio or text
328
+ {imageSource: webcam},
329
+ '<ctrl100>\n<ctrl99>model\n'
330
+ ];
331
+ let resultSoFar = '';
332
+ await llmInference.generateResponse(query, (newText, isDone) => {
333
+ resultSoFar += newText;
334
+ updateResponse(resultSoFar);
335
+ });
336
+ promptInputElement.value = '';
337
+ } catch (error) {
338
+ console.error('Error running Gemma 3n on query.', error);
339
+ updateResponse(
340
+ `Error: Could not get a response. ${error instanceof Error ? error.message : String(error)}`,
341
+ );
342
+ } finally {
343
+ setLoading(false);
344
+ }
345
+ }
346
+
347
+ /**
348
+ * Updates the response container with new content.
349
+ * @param {string} text The text to display.
350
+ */
351
+ function updateResponse(text) {
352
+ responseContainer.classList.remove('thinking');
353
+ responseContainer.innerHTML = '';
354
+ const p = document.createElement('p');
355
+ p.textContent = text;
356
+ responseContainer.appendChild(p);
357
+ }
358
+
359
+ /**
360
+ * Sets the loading state of the UI.
361
+ * @param {boolean} loading - True if loading, false otherwise.
362
+ */
363
+ function setLoading(loading) {
364
+ isLoading = loading;
365
+ promptInputElement.disabled = loading;
366
+ sendButton.disabled = loading;
367
+ recordButton.disabled = loading;
368
+
369
+ if (loading) {
370
+ responseContainer.classList.add('thinking');
371
+ responseContainer.innerHTML = '<p>Processing...</p>';
372
+ }
373
+ }
374
+
375
+ // --- Event Listeners ---
376
+ recordButton.addEventListener('click', toggleRecording);
377
+ sendButton.addEventListener('click', sendTextQuery);
378
+ promptInputElement.addEventListener('keydown', (event) => {
379
+ if (event.key === 'Enter') {
380
+ sendTextQuery();
381
+ }
382
+ });
383
+
384
+ // --- Initialization ---
385
+ document.addEventListener('DOMContentLoaded', initMedia);