Performance and Optimization

Caching Responses

import functools import instructor from openai import OpenAI from pydantic import BaseModel client = instructor.from_openai(OpenAI()) class User(BaseModel): name: str age: int @functools.cache def extract_user(text: str) -> User: """Extract user information with in-memory caching.""" return client.chat.completions.create( model="gpt-3.5-turbo", response_model=User, messages=[ { "role": "user", "content": f"Extract user information from: {text}" } ] ) user1 = extract_user("John is 30 years old") print(user1) user2 = extract_user("John is 30 years old") print(user2) import functools import inspect import instructor import diskcache from openai import OpenAI from pydantic import BaseModel client = instructor.from_openai(OpenAI()) cache = diskcache.Cache('./my_cache_directory') def instructor_cache(func): """Cache a function that returns a Pydantic model.""" return_type = inspect.signature(func).return_annotation if not issubclass(return_type, BaseModel): raise ValueError("The return type must be a Pydantic model") @functools.wraps(func) def wrapper(*args, **kwargs): # Create a cache key from the function name and arguments key = f"{func.__name__}-{functools._make_key(args, kwargs, typed=False)}" # Check if result is already cached if (cached := cache.get(key)) is not None: # Deserialize from JSON based on the return type return return_type.model_validate_json(cached) # Call the function and cache its result result = func(*args, **kwargs) serialized_result = result.model_dump_json() cache.set(key, serialized_result) return result return wrapper class Product(BaseModel): name: str price: float category: str @instructor_cache def extract_product(text: str) -> Product: """Extract product information with disk caching.""" return client.chat.completions.create( model="gpt-3.5-turbo", response_model=Product, messages=[ { "role": "user", "content": f"Extract product information from: {text}" } ] ) product = extract_product("iPhone 14 Pro costs $999 and is in the smartphones category") print(product) import redis import functools import inspect import instructor from openai import OpenAI from pydantic import BaseModel client = instructor.from_openai(OpenAI()) cache = redis.Redis(host="localhost", port=6379, db=0) def redis_cache(func): """Cache a function that returns a Pydantic model using Redis.""" return_type = inspect.signature(func).return_annotation if not issubclass(return_type, BaseModel): raise ValueError("The return type must be a Pydantic model") @functools.wraps(func) def wrapper(*args, **kwargs): # Create a cache key from the function name and arguments key = f"{func.__name__}-{functools._make_key(args, kwargs, typed=False)}" # Check if result is already cached if (cached := cache.get(key)) is not None: # Deserialize from JSON based on the return type return return_type.model_validate_json(cached) # Call the function and cache its result result = func(*args, **kwargs) serialized_result = result.model_dump_json() cache.set(key, serialized_result) return result return wrapper

Caching LLM responses can significantly improve performance and reduce costs. Instructor supports several caching strategies to suit different needs.

import functools
import instructor
from openai import OpenAI
from pydantic import BaseModel

Initialize the client with instructor

client = instructor.from_openai(OpenAI())

Define a simple model

class User(BaseModel):
    name: str
    age: int

Simple in-memory caching with functools.cache

@functools.cache
def extract_user(text: str) -> User:
    """Extract user information with in-memory caching."""
    return client.chat.completions.create(
        model="gpt-3.5-turbo",
        response_model=User,
        messages=[
            {
                "role": "user",
                "content": f"Extract user information from: {text}"
            }
        ]
    )

Example usage

user1 = extract_user("John is 30 years old")
print(user1)

This call will use the cached result (no API call)

user2 = extract_user("John is 30 years old")
print(user2)

For persistent caching across sessions, you can use disk-based caching:

import functools
import inspect
import instructor
import diskcache
from openai import OpenAI
from pydantic import BaseModel

Initialize the client with instructor

client = instructor.from_openai(OpenAI())

Initialize disk cache

cache = diskcache.Cache('./my_cache_directory')

Define a caching decorator for Pydantic models

def instructor_cache(func):
    """Cache a function that returns a Pydantic model."""
    return_type = inspect.signature(func).return_annotation
    if not issubclass(return_type, BaseModel):
        raise ValueError("The return type must be a Pydantic model")

    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        # Create a cache key from the function name and arguments
        key = f"{func.__name__}-{functools._make_key(args, kwargs, typed=False)}"

        # Check if result is already cached
        if (cached := cache.get(key)) is not None:
            # Deserialize from JSON based on the return type
            return return_type.model_validate_json(cached)

        # Call the function and cache its result
        result = func(*args, **kwargs)
        serialized_result = result.model_dump_json()
        cache.set(key, serialized_result)

        return result

    return wrapper

Define a model

class Product(BaseModel):
    name: str
    price: float
    category: str

Use the caching decorator

@instructor_cache
def extract_product(text: str) -> Product:
    """Extract product information with disk caching."""
    return client.chat.completions.create(
        model="gpt-3.5-turbo",
        response_model=Product,
        messages=[
            {
                "role": "user",
                "content": f"Extract product information from: {text}"
            }
        ]
    )

Example usage

product = extract_product("iPhone 14 Pro costs $999 and is in the smartphones category")
print(product)

For distributed systems, Redis caching is a great option:

import redis
import functools
import inspect
import instructor
from openai import OpenAI
from pydantic import BaseModel

Initialize the client with instructor

client = instructor.from_openai(OpenAI())

Initialize Redis cache

cache = redis.Redis(host="localhost", port=6379, db=0)

Define a caching decorator for Redis

def redis_cache(func):
    """Cache a function that returns a Pydantic model using Redis."""
    return_type = inspect.signature(func).return_annotation
    if not issubclass(return_type, BaseModel):
        raise ValueError("The return type must be a Pydantic model")

    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        # Create a cache key from the function name and arguments
        key = f"{func.__name__}-{functools._make_key(args, kwargs, typed=False)}"

        # Check if result is already cached
        if (cached := cache.get(key)) is not None:
            # Deserialize from JSON based on the return type
            return return_type.model_validate_json(cached)

        # Call the function and cache its result
        result = func(*args, **kwargs)
        serialized_result = result.model_dump_json()
        cache.set(key, serialized_result)

        return result

    return wrapper

Running the Example

First, install Instructor and any dependencies

$ pip install instructor pydantic

Run the Python script

$ python caching-responses.py

Further Information

Documentation link 1