Skip to content

Commit 2bae512

Browse files
author
mcarbonell
committed
fix: use Chrome AI for audio transcription in extension
1 parent 11e60d1 commit 2bae512

File tree

3 files changed

+384
-57
lines changed

3 files changed

+384
-57
lines changed

build/shared/chrome-ai-apis.js

Lines changed: 321 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,321 @@
1+
// chrome-ai-apis.js - Unified wrapper for all Chrome Built-in AI APIs
2+
// Supports: Prompt, Summarizer, Translator, Language Detector, Writer, Rewriter, Proofreader
3+
4+
class ChromeAI {
5+
constructor() {
6+
this.apis = {
7+
prompt: null,
8+
summarizer: null,
9+
translator: null,
10+
detector: null,
11+
writer: null,
12+
rewriter: null,
13+
proofreader: null
14+
};
15+
}
16+
17+
// ====================
18+
// AVAILABILITY CHECKS
19+
// ====================
20+
21+
async checkAll() {
22+
return {
23+
prompt: 'LanguageModel' in self,
24+
summarizer: 'Summarizer' in self,
25+
translator: 'Translator' in self,
26+
detector: 'LanguageDetector' in self,
27+
writer: 'Writer' in self,
28+
rewriter: 'Rewriter' in self,
29+
proofreader: 'Proofreader' in self
30+
};
31+
}
32+
33+
// ====================
34+
// PROMPT API
35+
// ====================
36+
37+
async createPromptSession(options = {}) {
38+
if (!('LanguageModel' in self)) {
39+
throw new Error('Prompt API not available');
40+
}
41+
42+
const defaults = await LanguageModel.params();
43+
const config = {
44+
temperature: options.temperature ?? defaults.defaultTemperature,
45+
topK: options.topK ?? defaults.defaultTopK,
46+
initialPrompts: options.systemPrompt ? [{
47+
role: 'system',
48+
content: options.systemPrompt
49+
}] : []
50+
};
51+
52+
// Add multimodal support if requested
53+
if (options.supportsImages) {
54+
config.expectedInputs = [{ type: 'image' }];
55+
}
56+
if (options.supportsAudio) {
57+
config.expectedInputs = [{ type: 'audio' }];
58+
}
59+
60+
this.apis.prompt = await LanguageModel.create(config);
61+
return this.apis.prompt;
62+
}
63+
64+
async prompt(text, media = null, mediaType = 'image') {
65+
if (media && !this.apis.prompt) {
66+
const options = mediaType === 'audio' ? { supportsAudio: true } : { supportsImages: true };
67+
await this.createPromptSession(options);
68+
} else if (!this.apis.prompt) {
69+
await this.createPromptSession();
70+
}
71+
72+
if (media) {
73+
// Multimodal: append media first, then prompt
74+
await this.apis.prompt.append([{
75+
role: 'user',
76+
content: [
77+
{ type: 'text', value: text },
78+
{ type: mediaType, value: media }
79+
]
80+
}]);
81+
return await this.apis.prompt.prompt('');
82+
} else {
83+
// Text only
84+
return await this.apis.prompt.prompt(text);
85+
}
86+
}
87+
88+
async *promptStreaming(text, media = null, mediaType = 'image') {
89+
if (media && !this.apis.prompt) {
90+
const options = mediaType === 'audio' ? { supportsAudio: true } : { supportsImages: true };
91+
await this.createPromptSession(options);
92+
} else if (!this.apis.prompt) {
93+
await this.createPromptSession();
94+
}
95+
96+
if (media) {
97+
// Multimodal: append media first, then stream
98+
await this.apis.prompt.append([{
99+
role: 'user',
100+
content: [
101+
{ type: 'text', value: text },
102+
{ type: mediaType, value: media }
103+
]
104+
}]);
105+
}
106+
107+
const stream = media
108+
? await this.apis.prompt.promptStreaming('')
109+
: await this.apis.prompt.promptStreaming(text);
110+
111+
let previousChunk = '';
112+
113+
for await (const chunk of stream) {
114+
const newChunk = chunk.startsWith(previousChunk)
115+
? chunk.slice(previousChunk.length)
116+
: chunk;
117+
yield newChunk;
118+
previousChunk = chunk;
119+
}
120+
}
121+
122+
// ====================
123+
// SUMMARIZER API
124+
// ====================
125+
126+
async createSummarizer(options = {}) {
127+
if (!('Summarizer' in self)) {
128+
throw new Error('Summarizer API not available');
129+
}
130+
131+
const lang = document.documentElement.lang || 'en';
132+
const defaults = {
133+
type: 'tldr',
134+
format: 'plain-text',
135+
length: 'medium',
136+
outputLanguage: lang
137+
};
138+
139+
this.apis.summarizer = await Summarizer.create({ ...defaults, ...options });
140+
return this.apis.summarizer;
141+
}
142+
143+
async summarize(text, options = {}) {
144+
if (!this.apis.summarizer) await this.createSummarizer(options);
145+
return await this.apis.summarizer.summarize(text);
146+
}
147+
148+
async *summarizeStreaming(text, options = {}) {
149+
if (!this.apis.summarizer) await this.createSummarizer(options);
150+
const stream = await this.apis.summarizer.summarizeStreaming(text);
151+
152+
for await (const chunk of stream) {
153+
yield chunk;
154+
}
155+
}
156+
157+
// ====================
158+
// TRANSLATOR API
159+
// ====================
160+
161+
async createTranslator(sourceLanguage, targetLanguage) {
162+
if (!('Translator' in self)) {
163+
throw new Error('Translator API not available');
164+
}
165+
166+
this.apis.translator = await Translator.create({
167+
sourceLanguage,
168+
targetLanguage
169+
});
170+
return this.apis.translator;
171+
}
172+
173+
async translate(text, sourceLanguage = 'en', targetLanguage = 'es') {
174+
// Create new translator if languages changed
175+
if (!this.apis.translator ||
176+
this.apis.translator.sourceLanguage !== sourceLanguage ||
177+
this.apis.translator.targetLanguage !== targetLanguage) {
178+
await this.createTranslator(sourceLanguage, targetLanguage);
179+
}
180+
181+
return await this.apis.translator.translate(text);
182+
}
183+
184+
// ====================
185+
// LANGUAGE DETECTOR API
186+
// ====================
187+
188+
async createDetector() {
189+
if (!('LanguageDetector' in self)) {
190+
throw new Error('Language Detector API not available');
191+
}
192+
193+
this.apis.detector = await LanguageDetector.create();
194+
return this.apis.detector;
195+
}
196+
197+
async detectLanguage(text) {
198+
if (!this.apis.detector) await this.createDetector();
199+
const results = await this.apis.detector.detect(text);
200+
201+
// Return most likely language
202+
if (results && results.length > 0) {
203+
return {
204+
language: results[0].detectedLanguage,
205+
confidence: results[0].confidence,
206+
all: results
207+
};
208+
}
209+
return null;
210+
}
211+
212+
// ====================
213+
// WRITER API
214+
// ====================
215+
216+
async createWriter(options = {}) {
217+
if (!('Writer' in self)) {
218+
throw new Error('Writer API not available');
219+
}
220+
221+
this.apis.writer = await Writer.create(options);
222+
return this.apis.writer;
223+
}
224+
225+
async write(prompt, options = {}) {
226+
if (!this.apis.writer) await this.createWriter(options);
227+
return await this.apis.writer.write(prompt);
228+
}
229+
230+
async *writeStreaming(prompt, options = {}) {
231+
if (!this.apis.writer) await this.createWriter(options);
232+
const stream = await this.apis.writer.writeStreaming(prompt);
233+
234+
for await (const chunk of stream) {
235+
yield chunk;
236+
}
237+
}
238+
239+
// ====================
240+
// REWRITER API
241+
// ====================
242+
243+
async createRewriter(options = {}) {
244+
if (!('Rewriter' in self)) {
245+
throw new Error('Rewriter API not available');
246+
}
247+
248+
this.apis.rewriter = await Rewriter.create(options);
249+
return this.apis.rewriter;
250+
}
251+
252+
async rewrite(text, options = {}) {
253+
if (!this.apis.rewriter) await this.createRewriter();
254+
return await this.apis.rewriter.rewrite(text, options);
255+
}
256+
257+
async *rewriteStreaming(text, options = {}) {
258+
if (!this.apis.rewriter) await this.createRewriter();
259+
const stream = await this.apis.rewriter.rewriteStreaming(text, options);
260+
261+
for await (const chunk of stream) {
262+
yield chunk;
263+
}
264+
}
265+
266+
// ====================
267+
// PROOFREADER API
268+
// ====================
269+
270+
async createProofreader(options = {}) {
271+
if (!('Proofreader' in self)) {
272+
throw new Error('Proofreader API not available');
273+
}
274+
275+
this.apis.proofreader = await Proofreader.create(options);
276+
return this.apis.proofreader;
277+
}
278+
279+
async proofread(text) {
280+
if (!this.apis.proofreader) await this.createProofreader();
281+
return await this.apis.proofreader.proofread(text);
282+
}
283+
284+
// ====================
285+
// CLEANUP
286+
// ====================
287+
288+
destroy(apiName = null) {
289+
if (apiName) {
290+
if (this.apis[apiName] && this.apis[apiName].destroy) {
291+
this.apis[apiName].destroy();
292+
this.apis[apiName] = null;
293+
}
294+
} else {
295+
// Destroy all
296+
Object.keys(this.apis).forEach(key => {
297+
if (this.apis[key] && this.apis[key].destroy) {
298+
this.apis[key].destroy();
299+
}
300+
});
301+
this.apis = {
302+
prompt: null,
303+
summarizer: null,
304+
translator: null,
305+
detector: null,
306+
writer: null,
307+
rewriter: null,
308+
proofreader: null
309+
};
310+
}
311+
}
312+
}
313+
314+
// Export for both module and global usage
315+
if (typeof module !== 'undefined' && module.exports) {
316+
module.exports = ChromeAI;
317+
}
318+
319+
if (typeof window !== 'undefined') {
320+
window.ChromeAI = ChromeAI;
321+
}

extension/tools/ai/audio-transcription.html

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<head>
44
<meta charset="UTF-8">
55
<meta name="viewport" content="width=device-width, initial-scale=1.0">
6-
<title>Transcripción de Audio con IA - FastTools</title>
6+
<title>Transcripción de Audio - FastTools Extension</title>
77
<style>
88
* {
99
margin: 0;
@@ -161,9 +161,8 @@
161161
<h1>🎤 Transcripción de Audio con IA</h1>
162162
<p class="subtitle">Transcribe archivos de audio a texto usando IA</p>
163163

164-
<div class="notice">
165-
<strong>⚠️ Nota:</strong> Esta herramienta requiere Google Gemini API.
166-
Los archivos de audio se procesan localmente y solo se envía el audio a Gemini para transcripción.
164+
<div class="notice" id="serviceStatus" style="background: #d1ecf1; border-color: #bee5eb;">
165+
<strong>🔄 Verificando disponibilidad...</strong>
167166
</div>
168167

169168
<div class="upload-area" id="uploadArea">
@@ -201,7 +200,7 @@ <h3>📝 Transcripción</h3>
201200
</div>
202201
</div>
203202

204-
<script src="../../shared/gemini-api.js"></script>
203+
<script src="../../shared/chrome-ai-apis.js"></script>
205204
<script src="audio-transcription.js"></script>
206205
</body>
207206
</html>

0 commit comments

Comments
 (0)