Working with JSON and REST APIs

Introduction

JSON (JavaScript Object Notation) has become the universal language of web APIs. It's lightweight, human-readable, and maps perfectly to Python data structures. When combined with REST API patterns, JSON enables powerful, scalable web services that can handle millions of requests.

In this lesson, we'll master JSON parsing and serialization, explore REST API conventions, and build robust clients that can handle complex data structures and real-world API scenarios.

JSON in Python

Python's json module provides everything you need to work with JSON data. It's part of the standard library, so no installation required.

import json

JSON Data Types

JSON supports a limited set of data types that map directly to Python:

JSON Type	Python Type	Example
string	`str`	`"hello"`
number	`int`/`float`	`42`, `3.14`
boolean	`bool`	`true`, `false`
null	`None`	`null`
array	`list`	`[1, 2, 3]`
object	`dict`	`{"key": "value"}`

Parsing JSON (Deserialization)

import json

# JSON string
json_string = '{"name": "Alice", "age": 30, "city": "New York"}'

# Parse JSON string to Python dict
data = json.loads(json_string)
print(data['name'])  # "Alice"
print(type(data))    # <class 'dict'>

Serializing to JSON (Serialization)

import json

# Python data
person = {
    "name": "Bob",
    "age": 25,
    "hobbies": ["reading", "coding"],
    "is_student": True
}

# Convert to JSON string
json_str = json.dumps(person)
print(json_str)
# {"name": "Bob", "age": 25, "hobbies": ["reading", "coding"], "is_student": true}

# Pretty print
pretty_json = json.dumps(person, indent=2)
print(pretty_json)

Working with Files

# Write JSON to file
with open('data.json', 'w') as f:
    json.dump(person, f, indent=2)

# Read JSON from file
with open('data.json', 'r') as f:
    loaded_data = json.load(f)

print(loaded_data == person)  # True

Handling Complex Data

import json
from datetime import datetime

# Custom object serialization
class Person:
    def __init__(self, name, age):
        self.name = name
        self.age = age
    
    def to_dict(self):
        return {"name": self.name, "age": self.age}
    
    @classmethod
    def from_dict(cls, data):
        return cls(data['name'], data['age'])

# Serialize custom object
person = Person("Charlie", 35)
json_data = json.dumps(person.to_dict())
print(json_data)  # {"name": "Charlie", "age": 35}

# Deserialize
parsed = json.loads(json_data)
person_obj = Person.from_dict(parsed)
print(person_obj.name)  # "Charlie"

REST API Patterns

REST APIs follow consistent patterns for resource management. Let's explore the most common patterns you'll encounter.

CRUD Operations

import requests

class RESTClient:
    def __init__(self, base_url):
        self.base_url = base_url.rstrip('/')
        self.session = requests.Session()
        self.session.headers.update({
            'Content-Type': 'application/json',
            'Accept': 'application/json'
        })
    
    def get(self, resource_id=None, **params):
        """GET - Read resource(s)"""
        url = f"{self.base_url}/{resource_id}" if resource_id else self.base_url
        response = self.session.get(url, params=params)
        response.raise_for_status()
        return response.json()
    
    def post(self, data, resource_id=None):
        """POST - Create new resource"""
        url = f"{self.base_url}/{resource_id}" if resource_id else self.base_url
        response = self.session.post(url, json=data)
        response.raise_for_status()
        return response.json()
    
    def put(self, resource_id, data):
        """PUT - Update/replace resource"""
        url = f"{self.base_url}/{resource_id}"
        response = self.session.put(url, json=data)
        response.raise_for_status()
        return response.json()
    
    def patch(self, resource_id, data):
        """PATCH - Partial update"""
        url = f"{self.base_url}/{resource_id}"
        response = self.session.patch(url, json=data)
        response.raise_for_status()
        return response.json()
    
    def delete(self, resource_id):
        """DELETE - Remove resource"""
        url = f"{self.base_url}/{resource_id}"
        response = self.session.delete(url)
        response.raise_for_status()
        return response.status_code == 204

# Usage with JSONPlaceholder API
client = RESTClient('https://jsonplaceholder.typicode.com/posts')

# GET all posts
posts = client.get()
print(f"Found {len(posts)} posts")

# GET specific post
post = client.get(1)
print(f"Post 1 title: {post['title']}")

# CREATE new post
new_post = {
    "title": "My New Post",
    "body": "This is the content",
    "userId": 1
}
created = client.post(new_post)
print(f"Created post with ID: {created['id']}")

# UPDATE post
updated_data = {"title": "Updated Title"}
updated = client.put(created['id'], updated_data)
print(f"Updated title: {updated['title']}")

Nested Resources

class NestedRESTClient:
    def __init__(self, base_url):
        self.base_url = base_url.rstrip('/')
    
    def get_nested(self, parent_resource, parent_id, child_resource=None, child_id=None):
        """Get nested resources like /users/1/posts or /users/1/posts/2"""
        url_parts = [self.base_url, parent_resource, str(parent_id)]
        
        if child_resource:
            url_parts.append(child_resource)
        if child_id:
            url_parts.append(str(child_id))
        
        url = '/'.join(url_parts)
        response = requests.get(url)
        response.raise_for_status()
        return response.json()

# Usage
client = NestedRESTClient('https://jsonplaceholder.typicode.com')

# Get user's posts
user_posts = client.get_nested('users', 1, 'posts')
print(f"User 1 has {len(user_posts)} posts")

# Get specific post by user
specific_post = client.get_nested('users', 1, 'posts', 1)
print(f"Post title: {specific_post['title']}")

Query Parameters and Filtering

def search_resources(base_url, resource, **filters):
    """Search resources with query parameters"""
    url = f"{base_url}/{resource}"
    
    # Convert filters to query parameters
    params = {}
    for key, value in filters.items():
        if isinstance(value, list):
            # Handle multiple values for same key
            params[key] = value
        else:
            params[key] = str(value)
    
    response = requests.get(url, params=params)
    response.raise_for_status()
    return response.json()

# Usage
base_url = 'https://jsonplaceholder.typicode.com'

# Search posts by user
user_posts = search_resources(base_url, 'posts', userId=1)
print(f"User 1 posts: {len(user_posts)}")

# Search with multiple filters
filtered_posts = search_resources(base_url, 'posts', userId=1, _limit=3)
print(f"Limited results: {len(filtered_posts)}")

Handling API Responses

Response Structure Patterns

def parse_api_response(response):
    """Parse different API response patterns"""
    try:
        data = response.json()
    except ValueError:
        return {"error": "Invalid JSON response"}
    
    # Pattern 1: Direct data
    if isinstance(data, (list, dict)) and 'error' not in data:
        return {"data": data, "pattern": "direct"}
    
    # Pattern 2: Wrapped response
    if isinstance(data, dict) and 'data' in data:
        return {
            "data": data['data'],
            "meta": data.get('meta', {}),
            "pattern": "wrapped"
        }
    
    # Pattern 3: Error response
    if isinstance(data, dict) and ('error' in data or 'message' in data):
        return {
            "error": data.get('error') or data.get('message'),
            "code": data.get('code'),
            "pattern": "error"
        }
    
    return {"data": data, "pattern": "unknown"}

# Test different response patterns
responses = [
    requests.get('https://jsonplaceholder.typicode.com/posts/1'),  # Direct data
    requests.get('https://httpbin.org/json'),  # Wrapped data
]

for resp in responses:
    parsed = parse_api_response(resp)
    print(f"Pattern: {parsed['pattern']}")
    if 'data' in parsed:
        print(f"Data keys: {list(parsed['data'].keys())[:3]}")

Pagination Handling

def fetch_paginated_data(base_url, endpoint, **params):
    """Fetch all pages of paginated API data"""
    all_data = []
    page = 1
    
    while True:
        # Add pagination parameters
        request_params = params.copy()
        request_params['_page'] = page
        request_params['_limit'] = 10  # Items per page
        
        response = requests.get(f"{base_url}/{endpoint}", params=request_params)
        response.raise_for_status()
        
        data = response.json()
        
        # Break if no more data
        if not data:
            break
        
        all_data.extend(data)
        page += 1
        
        # Safety check to prevent infinite loops
        if page > 100:
            break
    
    return all_data

# Usage
posts = fetch_paginated_data('https://jsonplaceholder.typicode.com', 'posts')
print(f"Total posts fetched: {len(posts)}")

Rate Limiting

import time

class RateLimitedClient:
    def __init__(self, requests_per_minute=60):
        self.requests_per_minute = requests_per_minute
        self.requests_made = []
    
    def can_make_request(self):
        """Check if we can make another request"""
        now = time.time()
        
        # Remove old requests outside the time window
        self.requests_made = [t for t in self.requests_made 
                            if now - t < 60]  # 60 seconds = 1 minute
        
        return len(self.requests_made) < self.requests_per_minute
    
    def make_request(self, method, url, **kwargs):
        """Make a rate-limited request"""
        if not self.can_make_request():
            wait_time = 60 - (time.time() - self.requests_made[0])
            print(f"Rate limit reached. Waiting {wait_time:.1f} seconds...")
            time.sleep(wait_time)
        
        response = requests.request(method, url, **kwargs)
        self.requests_made.append(time.time())
        
        return response

# Usage
client = RateLimitedClient(requests_per_minute=30)

for i in range(35):  # Try to make more requests than allowed
    response = client.make_request('GET', 'https://httpbin.org/get')
    print(f"Request {i+1}: {response.status_code}")

Data Validation and Transformation

Schema Validation

def validate_user_data(user_data):
    """Validate user data against expected schema"""
    required_fields = ['name', 'email', 'age']
    errors = []
    
    # Check required fields
    for field in required_fields:
        if field not in user_data:
            errors.append(f"Missing required field: {field}")
    
    # Validate data types
    if 'name' in user_data and not isinstance(user_data['name'], str):
        errors.append("Name must be a string")
    
    if 'email' in user_data:
        import re
        email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
        if not re.match(email_pattern, user_data['email']):
            errors.append("Invalid email format")
    
    if 'age' in user_data:
        if not isinstance(user_data['age'], int) or user_data['age'] < 0:
            errors.append("Age must be a positive integer")
    
    return errors

# Test validation
test_users = [
    {"name": "Alice", "email": "alice@example.com", "age": 25},
    {"name": "Bob", "email": "invalid-email", "age": "thirty"},
    {"email": "charlie@example.com"}  # Missing name and age
]

for user in test_users:
    errors = validate_user_data(user)
    if errors:
        print(f"Validation errors for {user.get('name', 'Unknown')}:")
        for error in errors:
            print(f"  - {error}")
    else:
        print(f"✓ {user['name']} data is valid")

Data Transformation

def transform_api_data(raw_data, transformations):
    """Transform API data according to specifications"""
    transformed = {}
    
    for target_field, source_spec in transformations.items():
        if isinstance(source_spec, str):
            # Simple field mapping
            transformed[target_field] = raw_data.get(source_spec)
        elif isinstance(source_spec, dict):
            # Complex transformation
            operation = source_spec.get('operation')
            source_field = source_spec.get('field')
            value = raw_data.get(source_field)
            
            if operation == 'uppercase' and value:
                transformed[target_field] = value.upper()
            elif operation == 'lowercase' and value:
                transformed[target_field] = value.lower()
            elif operation == 'split' and value:
                delimiter = source_spec.get('delimiter', ' ')
                transformed[target_field] = value.split(delimiter)
            else:
                transformed[target_field] = value
        else:
            # Direct value
            transformed[target_field] = source_spec
    
    return transformed

# Define transformations
transform_spec = {
    'full_name': 'name',
    'email_upper': {'operation': 'uppercase', 'field': 'email'},
    'name_parts': {'operation': 'split', 'field': 'name', 'delimiter': ' '},
    'is_adult': {'operation': 'compare', 'field': 'age', 'threshold': 18}
}

# Test transformation
api_data = {
    "name": "John Doe",
    "email": "john@example.com",
    "age": 30
}

transformed = transform_api_data(api_data, transform_spec)
print("Transformed data:")
for key, value in transformed.items():
    print(f"  {key}: {value}")

Error Handling and Resilience

Robust API Client

import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

class ResilientAPIClient:
    def __init__(self, base_url, max_retries=3, backoff_factor=0.3):
        self.base_url = base_url.rstrip('/')
        
        # Configure retry strategy
        retry_strategy = Retry(
            total=max_retries,
            status_forcelist=[429, 500, 502, 503, 504],
            backoff_factor=backoff_factor
        )
        
        # Create session with retry adapter
        self.session = requests.Session()
        adapter = HTTPAdapter(max_retries=retry_strategy)
        self.session.mount("http://", adapter)
        self.session.mount("https://", adapter)
        
        self.session.headers.update({
            'User-Agent': 'ResilientAPIClient/1.0',
            'Accept': 'application/json'
        })
    
    def request(self, method, endpoint, **kwargs):
        """Make a resilient API request"""
        url = f"{self.base_url}/{endpoint.lstrip('/')}"
        
        try:
            response = self.session.request(method, url, **kwargs)
            response.raise_for_status()
            return response.json()
        except requests.exceptions.HTTPError as e:
            # Handle specific HTTP errors
            if response.status_code == 401:
                raise ValueError("Authentication failed") from e
            elif response.status_code == 403:
                raise ValueError("Access forbidden") from e
            elif response.status_code == 404:
                raise ValueError("Resource not found") from e
            elif response.status_code == 429:
                raise ValueError("Rate limit exceeded") from e
            else:
                raise ValueError(f"HTTP {response.status_code}: {response.text}") from e
        except requests.exceptions.ConnectionError:
            raise ValueError("Connection failed - check network connectivity")
        except requests.exceptions.Timeout:
            raise ValueError("Request timed out")
        except ValueError as e:
            # JSON parsing error
            raise ValueError(f"Invalid JSON response: {str(e)}")

# Usage
client = ResilientAPIClient('https://jsonplaceholder.typicode.com')

try:
    # This should work
    posts = client.request('GET', '/posts', params={'_limit': 2})
    print(f"Retrieved {len(posts)} posts")
    
    # This will fail with 404
    # client.request('GET', '/nonexistent')
    
except ValueError as e:
    print(f"API Error: {e}")

Real-World API Integration

GitHub API Client

import os
import requests

class GitHubAPI:
    def __init__(self, token=None):
        self.base_url = 'https://api.github.com'
        self.session = requests.Session()
        
        # Set authentication if token provided
        if token:
            self.session.headers.update({
                'Authorization': f'token {token}',
                'Accept': 'application/vnd.github.v3+json'
            })
        else:
            self.session.headers.update({
                'Accept': 'application/vnd.github.v3+json'
            })
    
    def get_user(self, username):
        """Get user information"""
        response = self.session.get(f'{self.base_url}/users/{username}')
        response.raise_for_status()
        return response.json()
    
    def get_repos(self, username, **params):
        """Get user's repositories"""
        url = f'{self.base_url}/users/{username}/repos'
        response = self.session.get(url, params=params)
        response.raise_for_status()
        return response.json()
    
    def create_issue(self, owner, repo, title, body):
        """Create a new issue"""
        url = f'{self.base_url}/repos/{owner}/{repo}/issues'
        data = {'title': title, 'body': body}
        response = self.session.post(url, json=data)
        response.raise_for_status()
        return response.json()

# Usage
github = GitHubAPI()  # No token for public data

try:
    # Get user info
    user = github.get_user('octocat')
    print(f"User: {user['name']} ({user['login']})")
    print(f"Followers: {user['followers']}")
    
    # Get repositories
    repos = github.get_repos('octocat', sort='updated', per_page=3)
    print(f"Recent repos: {[repo['name'] for repo in repos]}")
    
except requests.exceptions.RequestException as e:
    print(f"GitHub API error: {e}")

Key Points to Remember

JSON is the standard data format for REST APIs, easily parsed with json.loads() and json.dumps()
REST APIs follow consistent patterns: GET for reading, POST for creating, PUT for updating, DELETE for removing
Response handling requires checking status codes and parsing JSON appropriately
Pagination is common in APIs and requires handling multiple requests
Rate limiting protects APIs and requires client-side throttling
Error handling should be comprehensive, covering network issues, HTTP errors, and data validation
Authentication is crucial for accessing protected API resources
Data validation ensures API responses match expected schemas

You've now mastered the fundamentals of working with JSON and REST APIs. In our final lesson, we'll build complete API client libraries with proper error handling, authentication, and production-ready features that you can use in real applications.