- Master JSON parsing and serialization in Python
- Work with REST API patterns and conventions
- Handle complex JSON structures and nested data
- Implement proper data validation and error handling
Working with JSON and REST APIs
Introduction
JSON (JavaScript Object Notation) has become the universal language of web APIs. It's lightweight, human-readable, and maps perfectly to Python data structures. When combined with REST API patterns, JSON enables powerful, scalable web services that can handle millions of requests.
In this lesson, we'll master JSON parsing and serialization, explore REST API conventions, and build robust clients that can handle complex data structures and real-world API scenarios.
JSON in Python
Python's json module provides everything you need to work with JSON data. It's part of the standard library, so no installation required.
import json
JSON Data Types
JSON supports a limited set of data types that map directly to Python:
| JSON Type | Python Type | Example |
|---|---|---|
| string | str |
"hello" |
| number | int/float |
42, 3.14 |
| boolean | bool |
true, false |
| null | None |
null |
| array | list |
[1, 2, 3] |
| object | dict |
{"key": "value"} |
Parsing JSON (Deserialization)
import json
# JSON string
json_string = '{"name": "Alice", "age": 30, "city": "New York"}'
# Parse JSON string to Python dict
data = json.loads(json_string)
print(data['name']) # "Alice"
print(type(data)) # <class 'dict'>
Serializing to JSON (Serialization)
import json
# Python data
person = {
"name": "Bob",
"age": 25,
"hobbies": ["reading", "coding"],
"is_student": True
}
# Convert to JSON string
json_str = json.dumps(person)
print(json_str)
# {"name": "Bob", "age": 25, "hobbies": ["reading", "coding"], "is_student": true}
# Pretty print
pretty_json = json.dumps(person, indent=2)
print(pretty_json)
Working with Files
# Write JSON to file
with open('data.json', 'w') as f:
json.dump(person, f, indent=2)
# Read JSON from file
with open('data.json', 'r') as f:
loaded_data = json.load(f)
print(loaded_data == person) # True
Handling Complex Data
import json
from datetime import datetime
# Custom object serialization
class Person:
def __init__(self, name, age):
self.name = name
self.age = age
def to_dict(self):
return {"name": self.name, "age": self.age}
@classmethod
def from_dict(cls, data):
return cls(data['name'], data['age'])
# Serialize custom object
person = Person("Charlie", 35)
json_data = json.dumps(person.to_dict())
print(json_data) # {"name": "Charlie", "age": 35}
# Deserialize
parsed = json.loads(json_data)
person_obj = Person.from_dict(parsed)
print(person_obj.name) # "Charlie"
REST API Patterns
REST APIs follow consistent patterns for resource management. Let's explore the most common patterns you'll encounter.
CRUD Operations
import requests
class RESTClient:
def __init__(self, base_url):
self.base_url = base_url.rstrip('/')
self.session = requests.Session()
self.session.headers.update({
'Content-Type': 'application/json',
'Accept': 'application/json'
})
def get(self, resource_id=None, **params):
"""GET - Read resource(s)"""
url = f"{self.base_url}/{resource_id}" if resource_id else self.base_url
response = self.session.get(url, params=params)
response.raise_for_status()
return response.json()
def post(self, data, resource_id=None):
"""POST - Create new resource"""
url = f"{self.base_url}/{resource_id}" if resource_id else self.base_url
response = self.session.post(url, json=data)
response.raise_for_status()
return response.json()
def put(self, resource_id, data):
"""PUT - Update/replace resource"""
url = f"{self.base_url}/{resource_id}"
response = self.session.put(url, json=data)
response.raise_for_status()
return response.json()
def patch(self, resource_id, data):
"""PATCH - Partial update"""
url = f"{self.base_url}/{resource_id}"
response = self.session.patch(url, json=data)
response.raise_for_status()
return response.json()
def delete(self, resource_id):
"""DELETE - Remove resource"""
url = f"{self.base_url}/{resource_id}"
response = self.session.delete(url)
response.raise_for_status()
return response.status_code == 204
# Usage with JSONPlaceholder API
client = RESTClient('https://jsonplaceholder.typicode.com/posts')
# GET all posts
posts = client.get()
print(f"Found {len(posts)} posts")
# GET specific post
post = client.get(1)
print(f"Post 1 title: {post['title']}")
# CREATE new post
new_post = {
"title": "My New Post",
"body": "This is the content",
"userId": 1
}
created = client.post(new_post)
print(f"Created post with ID: {created['id']}")
# UPDATE post
updated_data = {"title": "Updated Title"}
updated = client.put(created['id'], updated_data)
print(f"Updated title: {updated['title']}")
Nested Resources
class NestedRESTClient:
def __init__(self, base_url):
self.base_url = base_url.rstrip('/')
def get_nested(self, parent_resource, parent_id, child_resource=None, child_id=None):
"""Get nested resources like /users/1/posts or /users/1/posts/2"""
url_parts = [self.base_url, parent_resource, str(parent_id)]
if child_resource:
url_parts.append(child_resource)
if child_id:
url_parts.append(str(child_id))
url = '/'.join(url_parts)
response = requests.get(url)
response.raise_for_status()
return response.json()
# Usage
client = NestedRESTClient('https://jsonplaceholder.typicode.com')
# Get user's posts
user_posts = client.get_nested('users', 1, 'posts')
print(f"User 1 has {len(user_posts)} posts")
# Get specific post by user
specific_post = client.get_nested('users', 1, 'posts', 1)
print(f"Post title: {specific_post['title']}")
Query Parameters and Filtering
def search_resources(base_url, resource, **filters):
"""Search resources with query parameters"""
url = f"{base_url}/{resource}"
# Convert filters to query parameters
params = {}
for key, value in filters.items():
if isinstance(value, list):
# Handle multiple values for same key
params[key] = value
else:
params[key] = str(value)
response = requests.get(url, params=params)
response.raise_for_status()
return response.json()
# Usage
base_url = 'https://jsonplaceholder.typicode.com'
# Search posts by user
user_posts = search_resources(base_url, 'posts', userId=1)
print(f"User 1 posts: {len(user_posts)}")
# Search with multiple filters
filtered_posts = search_resources(base_url, 'posts', userId=1, _limit=3)
print(f"Limited results: {len(filtered_posts)}")
Handling API Responses
Response Structure Patterns
def parse_api_response(response):
"""Parse different API response patterns"""
try:
data = response.json()
except ValueError:
return {"error": "Invalid JSON response"}
# Pattern 1: Direct data
if isinstance(data, (list, dict)) and 'error' not in data:
return {"data": data, "pattern": "direct"}
# Pattern 2: Wrapped response
if isinstance(data, dict) and 'data' in data:
return {
"data": data['data'],
"meta": data.get('meta', {}),
"pattern": "wrapped"
}
# Pattern 3: Error response
if isinstance(data, dict) and ('error' in data or 'message' in data):
return {
"error": data.get('error') or data.get('message'),
"code": data.get('code'),
"pattern": "error"
}
return {"data": data, "pattern": "unknown"}
# Test different response patterns
responses = [
requests.get('https://jsonplaceholder.typicode.com/posts/1'), # Direct data
requests.get('https://httpbin.org/json'), # Wrapped data
]
for resp in responses:
parsed = parse_api_response(resp)
print(f"Pattern: {parsed['pattern']}")
if 'data' in parsed:
print(f"Data keys: {list(parsed['data'].keys())[:3]}")
Pagination Handling
def fetch_paginated_data(base_url, endpoint, **params):
"""Fetch all pages of paginated API data"""
all_data = []
page = 1
while True:
# Add pagination parameters
request_params = params.copy()
request_params['_page'] = page
request_params['_limit'] = 10 # Items per page
response = requests.get(f"{base_url}/{endpoint}", params=request_params)
response.raise_for_status()
data = response.json()
# Break if no more data
if not data:
break
all_data.extend(data)
page += 1
# Safety check to prevent infinite loops
if page > 100:
break
return all_data
# Usage
posts = fetch_paginated_data('https://jsonplaceholder.typicode.com', 'posts')
print(f"Total posts fetched: {len(posts)}")
Rate Limiting
import time
class RateLimitedClient:
def __init__(self, requests_per_minute=60):
self.requests_per_minute = requests_per_minute
self.requests_made = []
def can_make_request(self):
"""Check if we can make another request"""
now = time.time()
# Remove old requests outside the time window
self.requests_made = [t for t in self.requests_made
if now - t < 60] # 60 seconds = 1 minute
return len(self.requests_made) < self.requests_per_minute
def make_request(self, method, url, **kwargs):
"""Make a rate-limited request"""
if not self.can_make_request():
wait_time = 60 - (time.time() - self.requests_made[0])
print(f"Rate limit reached. Waiting {wait_time:.1f} seconds...")
time.sleep(wait_time)
response = requests.request(method, url, **kwargs)
self.requests_made.append(time.time())
return response
# Usage
client = RateLimitedClient(requests_per_minute=30)
for i in range(35): # Try to make more requests than allowed
response = client.make_request('GET', 'https://httpbin.org/get')
print(f"Request {i+1}: {response.status_code}")
Data Validation and Transformation
Schema Validation
def validate_user_data(user_data):
"""Validate user data against expected schema"""
required_fields = ['name', 'email', 'age']
errors = []
# Check required fields
for field in required_fields:
if field not in user_data:
errors.append(f"Missing required field: {field}")
# Validate data types
if 'name' in user_data and not isinstance(user_data['name'], str):
errors.append("Name must be a string")
if 'email' in user_data:
import re
email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
if not re.match(email_pattern, user_data['email']):
errors.append("Invalid email format")
if 'age' in user_data:
if not isinstance(user_data['age'], int) or user_data['age'] < 0:
errors.append("Age must be a positive integer")
return errors
# Test validation
test_users = [
{"name": "Alice", "email": "alice@example.com", "age": 25},
{"name": "Bob", "email": "invalid-email", "age": "thirty"},
{"email": "charlie@example.com"} # Missing name and age
]
for user in test_users:
errors = validate_user_data(user)
if errors:
print(f"Validation errors for {user.get('name', 'Unknown')}:")
for error in errors:
print(f" - {error}")
else:
print(f"✓ {user['name']} data is valid")
Data Transformation
def transform_api_data(raw_data, transformations):
"""Transform API data according to specifications"""
transformed = {}
for target_field, source_spec in transformations.items():
if isinstance(source_spec, str):
# Simple field mapping
transformed[target_field] = raw_data.get(source_spec)
elif isinstance(source_spec, dict):
# Complex transformation
operation = source_spec.get('operation')
source_field = source_spec.get('field')
value = raw_data.get(source_field)
if operation == 'uppercase' and value:
transformed[target_field] = value.upper()
elif operation == 'lowercase' and value:
transformed[target_field] = value.lower()
elif operation == 'split' and value:
delimiter = source_spec.get('delimiter', ' ')
transformed[target_field] = value.split(delimiter)
else:
transformed[target_field] = value
else:
# Direct value
transformed[target_field] = source_spec
return transformed
# Define transformations
transform_spec = {
'full_name': 'name',
'email_upper': {'operation': 'uppercase', 'field': 'email'},
'name_parts': {'operation': 'split', 'field': 'name', 'delimiter': ' '},
'is_adult': {'operation': 'compare', 'field': 'age', 'threshold': 18}
}
# Test transformation
api_data = {
"name": "John Doe",
"email": "john@example.com",
"age": 30
}
transformed = transform_api_data(api_data, transform_spec)
print("Transformed data:")
for key, value in transformed.items():
print(f" {key}: {value}")
Error Handling and Resilience
Robust API Client
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
class ResilientAPIClient:
def __init__(self, base_url, max_retries=3, backoff_factor=0.3):
self.base_url = base_url.rstrip('/')
# Configure retry strategy
retry_strategy = Retry(
total=max_retries,
status_forcelist=[429, 500, 502, 503, 504],
backoff_factor=backoff_factor
)
# Create session with retry adapter
self.session = requests.Session()
adapter = HTTPAdapter(max_retries=retry_strategy)
self.session.mount("http://", adapter)
self.session.mount("https://", adapter)
self.session.headers.update({
'User-Agent': 'ResilientAPIClient/1.0',
'Accept': 'application/json'
})
def request(self, method, endpoint, **kwargs):
"""Make a resilient API request"""
url = f"{self.base_url}/{endpoint.lstrip('/')}"
try:
response = self.session.request(method, url, **kwargs)
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as e:
# Handle specific HTTP errors
if response.status_code == 401:
raise ValueError("Authentication failed") from e
elif response.status_code == 403:
raise ValueError("Access forbidden") from e
elif response.status_code == 404:
raise ValueError("Resource not found") from e
elif response.status_code == 429:
raise ValueError("Rate limit exceeded") from e
else:
raise ValueError(f"HTTP {response.status_code}: {response.text}") from e
except requests.exceptions.ConnectionError:
raise ValueError("Connection failed - check network connectivity")
except requests.exceptions.Timeout:
raise ValueError("Request timed out")
except ValueError as e:
# JSON parsing error
raise ValueError(f"Invalid JSON response: {str(e)}")
# Usage
client = ResilientAPIClient('https://jsonplaceholder.typicode.com')
try:
# This should work
posts = client.request('GET', '/posts', params={'_limit': 2})
print(f"Retrieved {len(posts)} posts")
# This will fail with 404
# client.request('GET', '/nonexistent')
except ValueError as e:
print(f"API Error: {e}")
Real-World API Integration
GitHub API Client
import os
import requests
class GitHubAPI:
def __init__(self, token=None):
self.base_url = 'https://api.github.com'
self.session = requests.Session()
# Set authentication if token provided
if token:
self.session.headers.update({
'Authorization': f'token {token}',
'Accept': 'application/vnd.github.v3+json'
})
else:
self.session.headers.update({
'Accept': 'application/vnd.github.v3+json'
})
def get_user(self, username):
"""Get user information"""
response = self.session.get(f'{self.base_url}/users/{username}')
response.raise_for_status()
return response.json()
def get_repos(self, username, **params):
"""Get user's repositories"""
url = f'{self.base_url}/users/{username}/repos'
response = self.session.get(url, params=params)
response.raise_for_status()
return response.json()
def create_issue(self, owner, repo, title, body):
"""Create a new issue"""
url = f'{self.base_url}/repos/{owner}/{repo}/issues'
data = {'title': title, 'body': body}
response = self.session.post(url, json=data)
response.raise_for_status()
return response.json()
# Usage
github = GitHubAPI() # No token for public data
try:
# Get user info
user = github.get_user('octocat')
print(f"User: {user['name']} ({user['login']})")
print(f"Followers: {user['followers']}")
# Get repositories
repos = github.get_repos('octocat', sort='updated', per_page=3)
print(f"Recent repos: {[repo['name'] for repo in repos]}")
except requests.exceptions.RequestException as e:
print(f"GitHub API error: {e}")
Key Points to Remember
- JSON is the standard data format for REST APIs, easily parsed with
json.loads()andjson.dumps() - REST APIs follow consistent patterns: GET for reading, POST for creating, PUT for updating, DELETE for removing
- Response handling requires checking status codes and parsing JSON appropriately
- Pagination is common in APIs and requires handling multiple requests
- Rate limiting protects APIs and requires client-side throttling
- Error handling should be comprehensive, covering network issues, HTTP errors, and data validation
- Authentication is crucial for accessing protected API resources
- Data validation ensures API responses match expected schemas
You've now mastered the fundamentals of working with JSON and REST APIs. In our final lesson, we'll build complete API client libraries with proper error handling, authentication, and production-ready features that you can use in real applications.
