Introduction

Free-form text is great for chat, but production applications need structured data. You need JSON for APIs, objects for code, and consistent formats for downstream processing.

The challenge: LLMs are trained to generate natural language, not valid JSON. Getting reliable structured outputs requires the right techniques.

The Structured Output Spectrum

From least to most reliable:

Method	Reliability	Flexibility	Provider Support
Prompt engineering	Low	High	All
JSON mode	Medium	Medium	Most
Function calling	High	Medium	Most
Structured outputs	Very High	Medium	OpenAI, Anthropic
Constrained decoding	Highest	Low	Outlines, LMQL

Method 1: Prompt Engineering

The simplest approach sometimes works:

def extract_with_prompt(text: str) -> dict:
    prompt = f"""Extract the following information from the text as JSON:
    - name (string)
    - age (integer)
    - occupation (string)

    Text: {text}

    Return ONLY valid JSON, no other text:"""

    response = llm.generate(prompt)

    # Hope for the best
    return json.loads(response)

Problems:

Model might add markdown code blocks
Extra explanatory text
Invalid JSON syntax
Missing or extra fields

Method 2: JSON Mode

Most providers support a JSON mode:

OpenAI

from openai import OpenAI

client = OpenAI()

response = client.chat.completions.create(
    model="gpt-4o",
    response_format={"type": "json_object"},
    messages=[
        {"role": "system", "content": "Extract data as JSON with fields: name, age, occupation"},
        {"role": "user", "content": "John is a 30-year-old software engineer."}
    ]
)

data = json.loads(response.choices[0].message.content)
# {"name": "John", "age": 30, "occupation": "software engineer"}

Anthropic

from anthropic import Anthropic

client = Anthropic()

response = client.messages.create(
    model="claude-sonnet-4-20250514",
    max_tokens=1024,
    messages=[
        {"role": "user", "content": """Extract as JSON: {"name": str, "age": int, "occupation": str}

        Text: John is a 30-year-old software engineer.

        JSON:"""}
    ]
)

data = json.loads(response.content[0].text)

Better, but:

Schema not enforced
Can still get wrong types
Might miss required fields

Method 3: Function Calling / Tool Use

Define your schema as a function:

OpenAI Function Calling

from openai import OpenAI
import json

client = OpenAI()

tools = [{
    "type": "function",
    "function": {
        "name": "extract_person",
        "description": "Extract person information from text",
        "parameters": {
            "type": "object",
            "properties": {
                "name": {"type": "string", "description": "Person's full name"},
                "age": {"type": "integer", "description": "Person's age in years"},
                "occupation": {"type": "string", "description": "Person's job title"}
            },
            "required": ["name", "age", "occupation"]
        }
    }
}]

response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "John is a 30-year-old software engineer."}],
    tools=tools,
    tool_choice={"type": "function", "function": {"name": "extract_person"}}
)

# Parse the function call
tool_call = response.choices[0].message.tool_calls[0]
data = json.loads(tool_call.function.arguments)

Anthropic Tool Use

from anthropic import Anthropic

client = Anthropic()

tools = [{
    "name": "extract_person",
    "description": "Extract person information from text",
    "input_schema": {
        "type": "object",
        "properties": {
            "name": {"type": "string"},
            "age": {"type": "integer"},
            "occupation": {"type": "string"}
        },
        "required": ["name", "age", "occupation"]
    }
}]

response = client.messages.create(
    model="claude-sonnet-4-20250514",
    max_tokens=1024,
    tools=tools,
    messages=[{"role": "user", "content": "John is a 30-year-old software engineer."}]
)

# Find tool use block
for block in response.content:
    if block.type == "tool_use":
        data = block.input
        break

Method 4: Native Structured Outputs

The most reliable option when available.

OpenAI Structured Outputs

from openai import OpenAI
from pydantic import BaseModel

client = OpenAI()

class Person(BaseModel):
    name: str
    age: int
    occupation: str

response = client.beta.chat.completions.parse(
    model="gpt-4o-2024-08-06",
    messages=[{"role": "user", "content": "John is a 30-year-old software engineer."}],
    response_format=Person
)

person = response.choices[0].message.parsed
# Person(name='John', age=30, occupation='software engineer')

Complex Nested Schemas

from pydantic import BaseModel, Field
from typing import List, Optional
from enum import Enum

class Priority(str, Enum):
    LOW = "low"
    MEDIUM = "medium"
    HIGH = "high"

class Task(BaseModel):
    title: str
    description: str
    priority: Priority
    estimated_hours: float = Field(ge=0)

class Project(BaseModel):
    name: str
    tasks: List[Task]
    deadline: Optional[str] = None

response = client.beta.chat.completions.parse(
    model="gpt-4o",
    messages=[{
        "role": "user",
        "content": "Create a project plan for building a REST API with 3 tasks"
    }],
    response_format=Project
)

project = response.choices[0].message.parsed
for task in project.tasks:
    print(f"- {task.title} ({task.priority.value}): {task.estimated_hours}h")

Method 5: Instructor Library

Works with multiple providers:

import instructor
from openai import OpenAI
from pydantic import BaseModel

# Patch the client
client = instructor.from_openai(OpenAI())

class Person(BaseModel):
    name: str
    age: int
    occupation: str

# Automatically handles parsing and retries
person = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "John is a 30-year-old engineer."}],
    response_model=Person,
)

print(person.name)  # John

With Anthropic

import instructor
from anthropic import Anthropic

client = instructor.from_anthropic(Anthropic())

person = client.messages.create(
    model="claude-sonnet-4-20250514",
    max_tokens=1024,
    messages=[{"role": "user", "content": "John is a 30-year-old engineer."}],
    response_model=Person,
)

Retries and Validation

from pydantic import BaseModel, field_validator

class Email(BaseModel):
    subject: str
    body: str
    recipients: List[str]

    @field_validator("recipients")
    @classmethod
    def validate_emails(cls, v):
        for email in v:
            if "@" not in email:
                raise ValueError(f"Invalid email: {email}")
        return v

# Instructor automatically retries if validation fails
email = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Draft an email to john@example.com about the meeting"}],
    response_model=Email,
    max_retries=3,  # Retry if validation fails
)

Method 6: Constrained Decoding

For maximum reliability with local models:

Outlines

import outlines

model = outlines.models.transformers("mistralai/Mistral-7B-v0.1")

# Define schema
schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "age": {"type": "integer"},
        "occupation": {"type": "string"}
    },
    "required": ["name", "age", "occupation"]
}

generator = outlines.generate.json(model, schema)

result = generator("Extract from: John is a 30-year-old engineer")
# Guaranteed valid JSON matching schema

With Pydantic

from pydantic import BaseModel
import outlines

class Person(BaseModel):
    name: str
    age: int
    occupation: str

generator = outlines.generate.json(model, Person)
person = generator("John is a 30-year-old engineer")

Handling Edge Cases

Missing Information

from pydantic import BaseModel
from typing import Optional

class Person(BaseModel):
    name: str
    age: Optional[int] = None  # Might not be mentioned
    occupation: Optional[str] = None

# Add instructions
prompt = """Extract person info. Use null for missing fields.
Text: John works in tech.
"""

# Result: Person(name='John', age=None, occupation='tech')

Ambiguous Data

from pydantic import BaseModel, Field
from typing import List

class Extraction(BaseModel):
    value: str
    confidence: float = Field(ge=0, le=1)
    alternatives: List[str] = []

class Person(BaseModel):
    name: Extraction
    age: Extraction

# Prompt for uncertainty
prompt = """Extract person info with confidence scores.
If uncertain, provide alternatives.
Text: Jon or John is around 30.
"""

# Result shows uncertainty
# Person(
#   name=Extraction(value='John', confidence=0.6, alternatives=['Jon']),
#   age=Extraction(value='30', confidence=0.8, alternatives=['29', '31'])
# )

Lists and Multiple Entities

class Person(BaseModel):
    name: str
    age: Optional[int]

class ExtractionResult(BaseModel):
    people: List[Person]

prompt = """Extract all people mentioned.
Text: John (30) and Mary work together. Bob joined recently.
"""

# Result: ExtractionResult(people=[
#   Person(name='John', age=30),
#   Person(name='Mary', age=None),
#   Person(name='Bob', age=None)
# ])

Real-World Patterns

Entity Extraction Pipeline

from pydantic import BaseModel
from typing import List, Optional
import instructor
from openai import OpenAI

client = instructor.from_openai(OpenAI())

class Entity(BaseModel):
    text: str
    type: str  # PERSON, ORG, LOCATION, etc.
    start: int
    end: int

class ExtractionResult(BaseModel):
    entities: List[Entity]
    raw_text: str

def extract_entities(text: str) -> ExtractionResult:
    return client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": """Extract named entities.
            Include character positions for each entity.
            Types: PERSON, ORG, LOCATION, DATE, MONEY"""},
            {"role": "user", "content": text}
        ],
        response_model=ExtractionResult,
    )

result = extract_entities("Apple CEO Tim Cook announced...")
for entity in result.entities:
    print(f"{entity.type}: {entity.text} [{entity.start}:{entity.end}]")

Classification with Reasoning

from enum import Enum

class Sentiment(str, Enum):
    POSITIVE = "positive"
    NEGATIVE = "negative"
    NEUTRAL = "neutral"

class Classification(BaseModel):
    sentiment: Sentiment
    confidence: float
    reasoning: str

def classify_sentiment(text: str) -> Classification:
    return client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "Classify sentiment and explain your reasoning."},
            {"role": "user", "content": text}
        ],
        response_model=Classification,
    )

result = classify_sentiment("The product is okay but shipping was slow")
print(f"{result.sentiment.value} ({result.confidence:.0%}): {result.reasoning}")

Data Transformation

class RawRecord(BaseModel):
    text: str

class NormalizedRecord(BaseModel):
    name: str
    phone: str  # Normalized format: +1-XXX-XXX-XXXX
    email: str  # Lowercase

def normalize_contact(raw: str) -> NormalizedRecord:
    return client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": """Normalize contact information.
            Phone: +1-XXX-XXX-XXXX format
            Email: lowercase
            Name: Title Case"""},
            {"role": "user", "content": raw}
        ],
        response_model=NormalizedRecord,
    )

result = normalize_contact("JOHN DOE, john.doe@GMAIL.COM, (555) 123-4567")
# NormalizedRecord(
#   name='John Doe',
#   phone='+1-555-123-4567',
#   email='john.doe@gmail.com'
# )

Best Practices

Use Pydantic for schemas: Type hints and validation built-in
Make fields Optional when appropriate: Don't force hallucination
Add Field descriptions: Helps the model understand intent
Implement retries: Structured outputs can still fail
Validate outputs: Don't trust, verify
Log failures: Track which schemas cause problems
Start simple: Add complexity only when needed

Conclusion

Structured outputs have evolved from hacky prompt engineering to first-class features. Use native structured outputs when available, Instructor for cross-provider compatibility, and constrained decoding for maximum reliability.

The key is matching your reliability requirements to the right technique and always validating the output before trusting it.