Source code for kani.ext.realtime.models

import abc
import uuid
from typing import Annotated, Literal

from pydantic import BaseModel, Field


# ===== client =====
# ---- session.update ----
[docs] class AudioTranscriptionConfig(BaseModel): # enabled: bool = True model: str = "whisper-1"
[docs] class TurnDetectionConfig(BaseModel): type: str = "server_vad" threshold: float = 0.5 prefix_padding_ms: int = 300 silence_duration_ms: int = 500
# ---- session.update ---- # ---- response.create ----
[docs] class FunctionDefinition(BaseModel): type: str = "function" name: str description: str parameters: dict
[docs] class ResponseConfig(BaseModel): modalities: list[str] = ["text", "audio"] instructions: str = "" voice: Literal["alloy", "echo", "shimmer"] = "alloy" output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] = "pcm16" tools: list[FunctionDefinition] = [] tool_choice: Literal["auto", "none", "required"] | str = "auto" temperature: float = 0.8
# max_output_tokens: int | Literal["inf"] | None = "inf"
[docs] class SessionConfig(ResponseConfig): input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] = "pcm16" input_audio_transcription: AudioTranscriptionConfig | None = AudioTranscriptionConfig() turn_detection: TurnDetectionConfig | None = TurnDetectionConfig()
# ---- conversation.item.create ----
[docs] class TextContentPart(BaseModel): type: Literal["input_text", "text"] = "input_text" text: str
[docs] class AudioContentPart(BaseModel): type: Literal["input_audio", "audio"] = "input_audio" audio: str | None = Field(default=None, repr=False) transcript: str | None
ContentPart = Annotated[TextContentPart | AudioContentPart, Field(discriminator="type")]
[docs] class ConversationItemBase(BaseModel, abc.ABC): id: str = Field(default_factory=lambda: uuid.uuid4().hex) type: str status: Literal["completed", "in_progress", "incomplete"] = "completed"
[docs] class MessageConversationItem(ConversationItemBase): type: Literal["message"] = "message" role: Literal["user", "assistant", "system"] content: list[ContentPart] = []
[docs] class FunctionCallConversationItem(ConversationItemBase): type: Literal["function_call"] = "function_call" call_id: str name: str arguments: str
[docs] class FunctionCallOutputConversationItem(ConversationItemBase): type: Literal["function_call_output"] = "function_call_output" output: str
ConversationItem = Annotated[ MessageConversationItem | FunctionCallConversationItem | FunctionCallOutputConversationItem, Field(discriminator="type"), ] # ===== server ===== # ---- error ----
[docs] class ErrorDetails(BaseModel): type: str code: str | None = None message: str param: str | None = None event_id: str | None = None
# ---- session.created ---- # ---- session.updated ----
[docs] class SessionDetails(SessionConfig): id: str object: Literal["realtime.session"]
# ---- conversation.created ----
[docs] class ConversationDetails(BaseModel): id: str object: Literal["realtime.conversation"]
# ---- response.created ---- # ---- response.done ----
[docs] class UsageDetails(BaseModel): total_tokens: int input_tokens: int output_tokens: int
[docs] class RealtimeResponse(BaseModel): id: str object: Literal["realtime.response"] status: Literal["in_progress", "completed", "cancelled", "failed", "incomplete"] status_details: dict | None output: list[ConversationItem] = [] usage: UsageDetails | None
# ---- rate_limits.updated ----
[docs] class RateLimitInfo(BaseModel): name: str limit: int remaining: int reset_seconds: float