Skip to content

Multimodal

llmist supports image and speech generation with automatic cost tracking.

const result = await client.image.generate({
model: 'dall-e-3',
prompt: 'A 1990s desktop computer with flying toasters as a screensaver',
size: '1024x1024',
quality: 'hd',
});
console.log('Image URL:', result.images[0].url);
console.log('Cost:', result.cost);
ModelProviderQualitySizes
dall-e-3OpenAIstandard, hd1024x1024, 1024x1792, 1792x1024
dall-e-2OpenAI-256x256, 512x512, 1024x1024
gpt-image-1OpenAIlow, medium, high1024x1024, 1024x1536, 1536x1024
const result = await client.image.generate({
model: 'dall-e-3',
prompt: '8-bit pixel art of a floppy disk with legs running away',
responseFormat: 'b64_json',
});
const buffer = Buffer.from(result.images[0].b64Json!, 'base64');
fs.writeFileSync('floppy.png', buffer);
const result = await client.speech.generate({
model: 'tts-1-hd',
input: 'Hello, welcome to llmist!',
voice: 'nova',
});
fs.writeFileSync('output.mp3', Buffer.from(result.audio));
console.log('Cost:', result.cost);
ModelProviderVoices
tts-1OpenAIalloy, echo, fable, onyx, nova, shimmer
tts-1-hdOpenAIalloy, echo, fable, onyx, nova, shimmer
Terminal window
# Image
bunx @llmist/cli image "A Windows 95 error dialog that says 'Success'" -m dall-e-3 -o success.png
# Speech
bunx @llmist/cli speech "You've got mail!" -m tts-1 --voice nova -o aol.mp3
class ScreenSaverGenerator extends Gadget({
description: 'Generates a 90s-style screensaver image',
schema: z.object({
style: z.enum(['flying-toasters', '3d-pipes', 'starfield', 'maze']),
}),
}) {
async execute(params: this['params'], ctx?: ExecutionContext): Promise<string> {
const result = await ctx!.llmist.image.generate({
model: 'dall-e-3',
prompt: `A ${params.style} screensaver in the style of Windows 95`,
});
// Cost is automatically tracked!
return result.images[0]?.url ?? 'Screensaver generated';
}
}