Realtime Models

TrueFoundry offers a secure and efficient gateway to seamlessly integrate various Large Language Models (LLMs) into your applications, including realtime models hosted on various providers.

Add Realtime Models to Gateway

The process is pretty much the same. Follow these steps -

  1. Navigate to Integrations.
  2. Click on "Add New Integration".
  3. Select the provider from the list.
  4. Enter the required API keys and access credentials.

Once completed, your models will be ready for use in both the playground and your code.

Get Started using Rest API

import asyncio
import json
import websockets

BASE_URL = "wss://llm-gateway.truefoundry.com/api/llm/api/inference/openai/v1/realtime?model=openai-main/realtime-preview"

headers = {
    "Authorization": f"Bearer Enter your API Key here",
}

async def connect():
    async with websockets.connect(BASE_URL, additional_headers=headers) as ws:
        print("WebSocket connection established!")

        # Can follow the openai realtime api docs
        # https://platform.openai.com/docs/api-reference/realtime

        # Update session
        session_update = {
            "type": "session.update",
            "session": {
                "modalities": ["text"],
                "model": "openai-main/realtime-preview",
            },
        }
        await ws.send(json.dumps(session_update))
        print("Session updated!")

        # Send user message
        message = {
            "type": "conversation.item.create",
            "item": {
                "type": "message",
                "role": "user",
                "content": [{"type": "input_text", "text": "Say hello!"}],
            },
        }
        await ws.send(json.dumps(message))
        print("Conversation item created!")

        # Request response
        response_request = {"type": "response.create"}
        await ws.send(json.dumps(response_request))
        print("Response requested!")

        # Read responses
        async for msg in ws:
            event = json.loads(msg)

            if event.get("type") == "response.text.delta":
                print(event["delta"], end="", flush=True)

            elif event.get("type") == "response.text.done":
                print("
Response completed.")

            elif event.get("type") == "response.done":
                print("Session complete, closing connection.")
                break

asyncio.run(connect())


Get Started using NodeJS


// Requires: yarn add ws @types/ws
const { OpenAIRealtimeWS } = require('openai/beta/realtime/ws');
const OpenAI = require('openai');

const openai = new OpenAI({
  apiKey: 'Enter your API Key here',
  baseURL: 'wss://llm-gateway.truefoundry.com/api/llm/api/inference/openai/v1',
});

const rt = new OpenAIRealtimeWS(
  { model: 'openai-main/realtime-preview', options: {} },
  openai
);

// Access the underlying WebSocket instance
rt.socket.on('open', () => {
  console.log('Connection opened!');

  // Can follow the openai realtime api docs
  // https://platform.openai.com/docs/api-reference/realtime
  rt.send({
    type: 'session.update',
    session: {
      modalities: ['text'],
      model: 'openai-main/realtime-preview',
    },
  });

  console.log('Session updated!');

  rt.send({
    type: 'conversation.item.create',
    item: {
      type: 'message',
      role: 'user',
      content: [{ type: 'input_text', text: 'Say a couple paragraphs!' }],
    },
  });

  console.log('Conversation item created!');

  rt.send({ type: 'response.create' });

  console.log('Response created!');
});

rt.on('error', (err) => {
  console.error('WebSocket Error:', err);
});

rt.on('session.created', (event) => {
  console.log('session created!', event.session);
  console.log();
  });

  

rt.on('response.text.delta', (event) => process.stdout.write(event.delta));
rt.on('response.text.done', () => console.log());
rt.on('response.done', () => rt.close());

rt.socket.on('close', () => console.log('
Connection closed!'));



Get Started using OpenAI Library


import asyncio
from openai import AsyncOpenAI

async def main():
    client = AsyncOpenAI(
        api_key="Enter your API Key here",
        base_url="wss://llm-gateway.truefoundry.com/api/llm/api/inference/openai/v1"
    )

    try:
        async with client.beta.realtime.connect(model="openai-main/realtime-preview") as connection:
            # Can follow the openai realtime api docs
            # https://platform.openai.com/docs/api-reference/realtime

            print("WebSocket connection established!")

            # Update session
            await connection.session.update(session={'modalities': ['text']})
            print("Session updated!")

            # Send user message
            await connection.conversation.item.create(
                item={
                    "type": "message",
                    "role": "user",
                    "content": [{"type": "input_text", "text": "Say hello!"}],
                }
            )
            print("Conversation item created!")

            # Request response
            await connection.response.create()
            print("Response requested!")

            # Read responses
            async for event in connection:
                if event.type == 'response.text.delta':
                    print(event.delta, flush=True, end="")

                elif event.type == 'response.text.done':
                    print("
Response completed.")

                elif event.type == "response.done":
                    print("Session complete, closing connection.")
                    break

    except Exception as e:
        print(f"Error: {e}")

asyncio.run(main())


Get Started using Curl


# we're using websocat for this example, but you can use any websocket client
websocat "wss://llm-gateway.truefoundry.com/api/llm/api/inference/openai/v1/realtime?model=openai-main/realtime-preview" \
  -H 'Authorization: Bearer Enter your API Key here'
# once connected, you can send your messages as you would with OpenAI's Realtime API

What’s Next

Request logging is coming soon