@built-in-ai
@built-in-ai/transformers-js

Usage

Features and usage examples for @built-in-ai/transformers-js with AI SDK v6

Basic Text Generation

Streaming Text

import { streamText } from "ai";
import { transformersJS } from "@built-in-ai/transformers-js";

const result = streamText({
  model: transformersJS("HuggingFaceTB/SmolLM2-360M-Instruct"),
  prompt: 'Invent a new holiday and describe its traditions.',
});

for await (const textPart of result.textStream) {
  console.log(textPart);
}

Non-streaming Text

import { generateText } from "ai";
import { transformersJS } from "@built-in-ai/transformers-js";

const result = await generateText({
  model: transformersJS("HuggingFaceTB/SmolLM2-360M-Instruct"),
  prompt: 'Invent a new holiday and describe its traditions.',
});

console.log(result.text);

Text Embeddings

Generate text embeddings using Transformers.js embedding models:

import { embed, embedMany } from "ai";
import { transformersJS } from "@built-in-ai/transformers-js";

// Single embedding
const { embedding, usage } = await embed({
  model: transformersJS.embedding("Supabase/gte-small"),
  value: "Hello, world!",
});

// Multiple embeddings
const { embedding, usage } = await embedMany({
  model: transformersJS.embedding("Supabase/gte-small"),
  values: ["Hello", "World", "AI"],
});

Download Progress Tracking

When using models for the first time, they need to be downloaded. Track progress to improve UX:

import { streamText } from "ai";
import { transformersJS } from "@built-in-ai/transformers-js";

const model = transformersJS("HuggingFaceTB/SmolLM2-360M-Instruct");
const availability = await model.availability();

if (availability === "unavailable") {
  console.log("Browser doesn't support Transformers.js");
  return;
}

if (availability === "downloadable") {
  await model.createSessionWithProgress(({ progress }) => {
    console.log(`Download progress: ${Math.round(progress * 100)}%`);
  });
}

// Model is ready
const result = streamText({
  model,
  prompt: 'Invent a new holiday and describe its traditions.',
});

Vision Models

Process images with vision-capable models:

import { streamText } from "ai";
import { transformersJS } from "@built-in-ai/transformers-js";

const result = streamText({
  model: transformersJS("HuggingFaceTB/SmolVLM-256M-Instruct", {
    isVisionModel: true,
    device: "webgpu",
  }),
  messages: [
    {
      role: "user",
      content: [
        { type: "text", text: "Describe this image" },
        { type: "image", image: someImageBlobOrUrl },
      ],
    },
  ],
});

for await (const chunk of result.textStream) {
  console.log(chunk);
}

Audio Transcription

Transcribe audio using Whisper models:

import { experimental_transcribe as transcribe } from "ai";
import { transformersJS } from "@built-in-ai/transformers-js";

const transcript = await transcribe({
  model: transformersJS.transcription("Xenova/whisper-base"),
  audio: audioFile,
});

console.log(transcript.text);
console.log(transcript.segments); // Array of segments with timestamps

Tool Calling

For best tool calling results, use reasoning models like Qwen3.

The transformersJS model supports tool calling with multi-step execution:

import { streamText, tool, stepCountIs } from "ai";
import { transformersJS } from "@built-in-ai/transformers-js";
import { z } from "zod";

const result = await streamText({
  model: transformersJS("onnx-community/Qwen3-0.6B-ONNX"),
  messages: [{ role: "user", content: "What's the weather in San Francisco?" }],
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
  stopWhen: stepCountIs(5), // multiple steps
});

It also supports tool execution approval (needsApproval).

Tool Calling with Structured Output

import { Output, ToolLoopAgent, tool } from "ai";
import { transformersJS } from "@built-in-ai/transformers-js";
import { z } from "zod";

const agent = new ToolLoopAgent({
  model: transformersJS("onnx-community/Qwen3-0.6B-ONNX"),
  tools: {
    weather: tool({
      description: "Get the weather in a location",
      inputSchema: z.object({ city: z.string() }),
      execute: async ({ city }) => {
        // ...
      },
    }),
  },
  output: Output.object({
    schema: z.object({
      summary: z.string(),
      temperature: z.number(),
      recommendation: z.string(),
    }),
  }),
});

const { output } = await agent.generate({
  prompt: "What is the weather in San Francisco and what should I wear?",
});

Structured Output

Generate structured JSON output with schema validation:

Using generateText

import { generateText, Output } from "ai";
import { transformersJS } from "@built-in-ai/transformers-js";
import { z } from "zod";

const { output } = await generateText({
  model: transformersJS("onnx-community/Qwen3-0.6B-ONNX"),
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(
          z.object({ name: z.string(), amount: z.string() }),
        ),
        steps: z.array(z.string()),
      }),
    }),
  }),
  prompt: "Generate a lasagna recipe.",
});

Using streamText

import { streamText, Output } from "ai";
import { transformersJS } from "@built-in-ai/transformers-js";
import { z } from "zod";

const { partialOutputStream } = streamText({
  model: transformersJS("onnx-community/Qwen3-0.6B-ONNX"),
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(
          z.object({ name: z.string(), amount: z.string() }),
        ),
        steps: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

Web Worker Usage

For better performance, run models off the main thread:

1. Create worker.ts

import { TransformersJSWorkerHandler } from "@built-in-ai/transformers-js";

const handler = new TransformersJSWorkerHandler();
self.onmessage = (msg: MessageEvent) => {
  handler.onmessage(msg);
};

2. Use the worker

import { streamText } from "ai";
import { transformersJS } from "@built-in-ai/transformers-js";

const model = transformersJS("HuggingFaceTB/SmolLM2-360M-Instruct", {
  worker: new Worker(new URL("./worker.ts", import.meta.url), {
    type: "module",
  }),
  device: "webgpu",
});

const result = streamText({
  model,
  messages: [{ role: "user", content: "Hello!" }],
});

Server-side Inference

Transformers.js allows running the in-browser models on the server too:

// In a Next.js API route (app/api/chat/route.ts)
import { streamText } from "ai";
import { transformersJS } from "@built-in-ai/transformers-js";

export async function POST(req: Request) {
  const { messages } = await req.json();

  const model = transformersJS("HuggingFaceTB/SmolLM2-135M-Instruct");

  const result = streamText({
    model,
    messages,
    temperature: 0.7,
  });

  return result.toUIMessageStreamResponse();
}

On this page