Data Models

MonoLLM provides several data models for representing requests, responses, and configuration.

Core Models

LLMResponse

StreamingResponse

StreamChunk

Configuration Models

RequestConfig

Message

Usage

Provider Models

ProviderInfo

ModelInfo

Usage Examples

Creating a Request Configuration

from monollm import RequestConfig

# Basic configuration
config = RequestConfig(
    model="qwen-plus",
    temperature=0.7,
    max_tokens=1000
)

# Advanced configuration
config = RequestConfig(
    model="qwq-32b",
    temperature=0.1,
    max_tokens=2000,
    stream=True,
    show_thinking=True,
    metadata={"user_id": "123", "session_id": "abc"}
)

Creating Messages

from monollm import Message

# System message
system_msg = Message(
    role="system",
    content="You are a helpful assistant."
)

# User message
user_msg = Message(
    role="user",
    content="What is Python?"
)

# Assistant message
assistant_msg = Message(
    role="assistant",
    content="Python is a programming language..."
)

Working with Responses

from monollm import UnifiedLLMClient, RequestConfig

async def example():
    async with UnifiedLLMClient() as client:
        config = RequestConfig(model="qwen-plus")
        response = await client.generate("Hello", config)

        # Access response data
        print(f"Content: {response.content}")
        print(f"Model: {response.model}")
        print(f"Provider: {response.provider}")

        if response.usage:
            print(f"Input tokens: {response.usage.prompt_tokens}")
            print(f"Output tokens: {response.usage.completion_tokens}")
            print(f"Total tokens: {response.usage.total_tokens}")

        if response.thinking:
            print(f"Thinking: {response.thinking}")

Streaming Responses

async def streaming_example():
    async with UnifiedLLMClient() as client:
        config = RequestConfig(model="qwen-plus", stream=True)

        streaming_response = await client.generate_stream("Tell me a story", config)

        async for chunk in streaming_response:
            if chunk.content:
                print(chunk.content, end="", flush=True)

            # Access chunk metadata
            if chunk.usage:
                print(f"Tokens so far: {chunk.usage.total_tokens}")