Python SDK

The official Python SDK for BlockRun.

Installation

pip install blockrun-llm

Quick Start

from blockrun_llm import LLMClient

client = LLMClient()
response = client.chat("openai/gpt-4o", "Hello!")
print(response)

Configuration

Environment Variables

VariableDescription
BLOCKRUN_WALLET_KEYYour EVM wallet private key
BLOCKRUN_API_URLAPI endpoint (default: https://api.blockrun.ai)

Client Options

from blockrun_llm import LLMClient

client = LLMClient(
    private_key="0x...",           # Wallet key (or use env var)
    api_url="https://api.blockrun.ai",  # Optional
    timeout=60.0                   # Request timeout in seconds
)

Methods

chat(model, prompt, **options)

Simple one-line chat interface.

response = client.chat(
    "openai/gpt-4o",
    "Explain quantum computing",
    system="You are a physics teacher.",  # Optional system prompt
    max_tokens=500,                        # Optional max output
    temperature=0.7                        # Optional temperature
)

Returns: str - The assistant's response text

chat_completion(model, messages, **options)

Full OpenAI-compatible chat completion.

messages = [
    {"role": "system", "content": "You are helpful."},
    {"role": "user", "content": "What is 2+2?"}
]

result = client.chat_completion(
    "openai/gpt-4o",
    messages,
    max_tokens=100,
    temperature=0.7,
    top_p=0.9
)

print(result.choices[0].message.content)
print(f"Tokens used: {result.usage.total_tokens}")

Returns: ChatResponse object

list_models()

Get available models with pricing.

models = client.list_models()
for model in models:
    print(f"{model['id']}: ${model['inputPrice']}/M")

get_wallet_address()

Get the wallet address being used.

address = client.get_wallet_address()
print(f"Paying from: {address}")

Async Client

For async/await usage:

import asyncio
from blockrun_llm import AsyncLLMClient

async def main():
    async with AsyncLLMClient() as client:
        # Single request
        response = await client.chat("openai/gpt-4o", "Hello!")

        # Concurrent requests
        tasks = [
            client.chat("openai/gpt-4o", "What is 2+2?"),
            client.chat("anthropic/claude-sonnet-4", "What is 3+3?"),
        ]
        responses = await asyncio.gather(*tasks)

asyncio.run(main())

Error Handling

from blockrun_llm import LLMClient, APIError, PaymentError

client = LLMClient()

try:
    response = client.chat("openai/gpt-4o", "Hello!")
except PaymentError as e:
    print(f"Payment failed: {e}")
    # Check your USDC balance
except APIError as e:
    print(f"API error ({e.status_code}): {e}")
    print(f"Details: {e.response}")

Response Types

ChatResponse

class ChatResponse:
    id: str
    object: str
    created: int
    model: str
    choices: List[ChatChoice]
    usage: ChatUsage

class ChatChoice:
    index: int
    message: ChatMessage
    finish_reason: str

class ChatMessage:
    role: str
    content: str

class ChatUsage:
    prompt_tokens: int
    completion_tokens: int
    total_tokens: int

Examples

Multi-turn Conversation

from blockrun_llm import LLMClient

client = LLMClient()
messages = [
    {"role": "system", "content": "You are a helpful assistant."}
]

while True:
    user_input = input("You: ")
    if user_input.lower() == "quit":
        break

    messages.append({"role": "user", "content": user_input})
    result = client.chat_completion("openai/gpt-4o", messages)

    assistant_message = result.choices[0].message.content
    messages.append({"role": "assistant", "content": assistant_message})

    print(f"Assistant: {assistant_message}")

Code Generation

from blockrun_llm import LLMClient

client = LLMClient()

code = client.chat(
    "anthropic/claude-sonnet-4",
    "Write a Python function to calculate fibonacci numbers",
    system="You are an expert Python developer. Return only code, no explanations."
)

print(code)