CSV and JSON Files: Structured Data Formats
Welcome to structured data formats! CSV and JSON are like organized filing systems for your data. CSV is great for tabular data, while JSON handles complex nested structures.
CSV Files - Tabular Data
What is CSV?
CSV (Comma-Separated Values) is like a spreadsheet in text form:
- Each line is a row
- Values are separated by commas
- First row often contains column headers
Name,Age,City
Alice,25,New York
Bob,30,London
Charlie,35,Paris
Reading CSV Files
import csv
# Basic CSV reading
with open("data.csv", "r") as file:
csv_reader = csv.reader(file)
for row in csv_reader:
print(row)
Reading CSV with Headers
import csv
with open("people.csv", "r") as file:
csv_reader = csv.reader(file)
# Read header row
headers = next(csv_reader)
print(f"Headers: {headers}")
# Read data rows
for row in csv_reader:
name, age, city = row
print(f"{name} is {age} years old and lives in {city}")
CSV with Dictionaries
import csv
# Reading as dictionaries (better for headers)
with open("people.csv", "r") as file:
csv_reader = csv.DictReader(file)
for row in csv_reader:
print(f"Name: {row['Name']}")
print(f"Age: {row['Age']}")
print(f"City: {row['City']}")
print("-" * 20)
Writing CSV Files
Basic CSV Writing
import csv
# Sample data
data = [
["Name", "Age", "City"],
["Alice", "25", "New York"],
["Bob", "30", "London"],
["Charlie", "35", "Paris"]
]
with open("output.csv", "w", newline="") as file:
csv_writer = csv.writer(file)
# Write all rows
for row in data:
csv_writer.writerow(row)
print("CSV file 'output.csv' created!")
Writing CSV with Dictionaries
import csv
# Data as dictionaries
people = [
{"name": "Alice", "age": 25, "city": "New York"},
{"name": "Bob", "age": 30, "city": "London"},
{"name": "Charlie", "age": 35, "city": "Paris"}
]
with open("people_dict.csv", "w", newline="") as file:
# Specify field names
fieldnames = ["name", "age", "city"]
writer = csv.DictWriter(file, fieldnames=fieldnames)
# Write header
writer.writeheader()
# Write data
for person in people:
writer.writerow(person)
print("Dictionary CSV file created!")
JSON Files - Structured Data
What is JSON?
JSON (JavaScript Object Notation) is like Python data structures in text form:
- Objects become dictionaries
{"key": "value"} - Arrays become lists
[1, 2, 3] - Supports nested structures
- Human-readable and machine-friendly
Writing JSON Files
import json
# Python data
user_data = {
"name": "Alice",
"age": 25,
"hobbies": ["reading", "coding", "gaming"],
"address": {
"street": "123 Main St",
"city": "New York",
"zipcode": "10001"
},
"is_active": True
}
# Write to JSON file
with open("user.json", "w") as file:
json.dump(user_data, file, indent=2)
print("JSON file 'user.json' created!")
Reading JSON Files
import json
# Read from JSON file
with open("user.json", "r") as file:
loaded_data = json.load(file)
print("Loaded data:")
print(f"Name: {loaded_data['name']}")
print(f"Age: {loaded_data['age']}")
print(f"Hobbies: {loaded_data['hobbies']}")
print(f"City: {loaded_data['address']['city']}")
print(f"Active: {loaded_data['is_active']}")
JSON Formatting Options
import json
data = {"name": "Alice", "scores": [85, 92, 88], "active": True}
# Compact JSON (no extra spaces)
compact = json.dumps(data)
print("Compact:", compact)
# Pretty-printed JSON (readable)
pretty = json.dumps(data, indent=2)
print("Pretty:")
print(pretty)
# Sorted keys
sorted_json = json.dumps(data, indent=2, sort_keys=True)
print("Sorted keys:")
print(sorted_json)
Real-World CSV Examples
Example 1: Student Grade Tracker
import csv
def add_student(filename, name, grades):
"""Add a student with grades to CSV file."""
# Calculate average
average = sum(grades) / len(grades)
# Determine letter grade
if average >= 90:
letter = "A"
elif average >= 80:
letter = "B"
elif average >= 70:
letter = "C"
elif average >= 60:
letter = "D"
else:
letter = "F"
# Prepare row data
row = [name] + grades + [round(average, 2), letter]
# Append to CSV
with open(filename, "a", newline="") as file:
writer = csv.writer(file)
writer.writerow(row)
print(f"Student {name} added with grade {letter}")
def view_students(filename):
"""Display all students from CSV file."""
try:
with open(filename, "r") as file:
reader = csv.reader(file)
students = list(reader)
if not students:
print("No students found.")
return
print("Student Report:")
print("-" * 60)
print("15")
print("-" * 60)
for student in students:
if len(student) >= 6: # name + 3 grades + avg + letter
name = student[0]
grades = student[1:4]
avg = student[4]
letter = student[5]
print("15")
except FileNotFoundError:
print(f"File '{filename}' not found.")
# Create CSV with headers
with open("students.csv", "w", newline="") as file:
writer = csv.writer(file)
writer.writerow(["Name", "Math", "Science", "English", "Average", "Grade"])
# Add students
add_student("students.csv", "Alice", [95, 92, 88])
add_student("students.csv", "Bob", [78, 85, 90])
add_student("students.csv", "Charlie", [88, 76, 82])
view_students("students.csv")
Example 2: Expense Tracker
import csv
from datetime import datetime
def add_expense(filename, description, amount, category):
"""Add an expense to the tracker."""
date = datetime.now().strftime("%Y-%m-%d")
row = [date, description, amount, category]
with open(filename, "a", newline="") as file:
writer = csv.writer(file)
writer.writerow(row)
print(f"Expense added: {description} - ${amount}")
def calculate_totals(filename):
"""Calculate expense totals by category."""
try:
with open(filename, "r") as file:
reader = csv.DictReader(file)
totals = {}
total_amount = 0
for row in reader:
category = row["category"]
amount = float(row["amount"])
totals[category] = totals.get(category, 0) + amount
total_amount += amount
return totals, total_amount
except FileNotFoundError:
return {}, 0
def generate_report(filename):
"""Generate an expense report."""
totals, total_amount = calculate_totals(filename)
if not totals:
print("No expenses found.")
return
print("Expense Report")
print("=" * 40)
print(f"Total Expenses: ${total_amount:.2f}")
print()
print("By Category:")
for category, amount in sorted(totals.items()):
percentage = (amount / total_amount) * 100
print("15")
# Create expense file with headers
with open("expenses.csv", "w", newline="") as file:
writer = csv.writer(file)
writer.writerow(["date", "description", "amount", "category"])
# Add expenses
add_expense("expenses.csv", "Coffee", 4.50, "Food")
add_expense("expenses.csv", "Gas", 45.00, "Transportation")
add_expense("expenses.csv", "Movie tickets", 24.00, "Entertainment")
add_expense("expenses.csv", "Groceries", 67.50, "Food")
add_expense("expenses.csv", "Bus pass", 60.00, "Transportation")
generate_report("expenses.csv")
Real-World JSON Examples
Example 1: Configuration Manager
import json
import os
class ConfigManager:
def __init__(self, config_file="config.json"):
self.config_file = config_file
self.defaults = {
"app_name": "MyApp",
"version": "1.0.0",
"settings": {
"theme": "light",
"language": "en",
"notifications": True,
"auto_save": False
},
"window": {
"width": 800,
"height": 600,
"maximized": False
}
}
self.config = self.load_config()
def load_config(self):
"""Load configuration from file."""
if os.path.exists(self.config_file):
try:
with open(self.config_file, "r") as file:
return json.load(file)
except json.JSONDecodeError:
print("Warning: Config file corrupted. Using defaults.")
return self.defaults.copy()
else:
# Create config file with defaults
self.save_config()
return self.defaults.copy()
def save_config(self):
"""Save current configuration to file."""
with open(self.config_file, "w") as file:
json.dump(self.config, file, indent=2)
def get(self, key_path, default=None):
"""Get a configuration value using dot notation."""
keys = key_path.split(".")
value = self.config
try:
for key in keys:
value = value[key]
return value
except (KeyError, TypeError):
return default
def set(self, key_path, value):
"""Set a configuration value using dot notation."""
keys = key_path.split(".")
config = self.config
# Navigate to the parent of the target key
for key in keys[:-1]:
if key not in config:
config[key] = {}
config = config[key]
# Set the value
config[keys[-1]] = value
self.save_config()
def reset(self):
"""Reset configuration to defaults."""
self.config = self.defaults.copy()
self.save_config()
# Usage
config = ConfigManager()
print(f"App name: {config.get('app_name')}")
print(f"Theme: {config.get('settings.theme')}")
print(f"Window width: {config.get('window.width')}")
# Change settings
config.set("settings.theme", "dark")
config.set("window.maximized", True)
config.set("settings.notifications", False)
print(f"\nUpdated theme: {config.get('settings.theme')}")
print(f"Window maximized: {config.get('window.maximized')}")
Example 2: Task Manager
import json
import os
from datetime import datetime
class TaskManager:
def __init__(self, data_file="tasks.json"):
self.data_file = data_file
self.tasks = self.load_tasks()
def load_tasks(self):
"""Load tasks from JSON file."""
if os.path.exists(self.data_file):
try:
with open(self.data_file, "r") as file:
return json.load(file)
except json.JSONDecodeError:
print("Warning: Task file corrupted. Starting fresh.")
return []
return []
def save_tasks(self):
"""Save tasks to JSON file."""
with open(self.data_file, "w") as file:
json.dump(self.tasks, file, indent=2)
def add_task(self, title, description="", priority="medium"):
"""Add a new task."""
task = {
"id": len(self.tasks) + 1,
"title": title,
"description": description,
"priority": priority,
"completed": False,
"created": datetime.now().isoformat(),
"completed_date": None
}
self.tasks.append(task)
self.save_tasks()
print(f"Task '{title}' added successfully!")
def complete_task(self, task_id):
"""Mark a task as completed."""
for task in self.tasks:
if task["id"] == task_id and not task["completed"]:
task["completed"] = True
task["completed_date"] = datetime.now().isoformat()
self.save_tasks()
print(f"Task '{task['title']}' marked as completed!")
return True
print(f"Task with ID {task_id} not found or already completed.")
return False
def list_tasks(self, show_completed=False):
"""List all tasks."""
if not self.tasks:
print("No tasks found.")
return
print("Task List:")
print("-" * 60)
for task in self.tasks:
if not show_completed and task["completed"]:
continue
status = "✓" if task["completed"] else "☐"
priority_icon = {"high": "🔴", "medium": "🟡", "low": "🟢"}.get(task["priority"], "⚪")
print("2")
print("-" * 60)
total = len(self.tasks)
completed = sum(1 for task in self.tasks if task["completed"])
print(f"Total: {total}, Completed: {completed}, Pending: {total - completed}")
def get_task_stats(self):
"""Get task statistics."""
if not self.tasks:
return {"total": 0, "completed": 0, "pending": 0, "completion_rate": 0}
total = len(self.tasks)
completed = sum(1 for task in self.tasks if task["completed"])
pending = total - completed
completion_rate = (completed / total * 100) if total > 0 else 0
return {
"total": total,
"completed": completed,
"pending": pending,
"completion_rate": round(completion_rate, 1)
}
# Usage
manager = TaskManager()
manager.add_task("Learn Python", "Complete Python mastery course", "high")
manager.add_task("Exercise", "Go for a 30-minute run", "medium")
manager.add_task("Read book", "Read 20 pages of current book", "low")
manager.list_tasks()
manager.complete_task(2) # Complete exercise
print("\nAfter completing exercise:")
manager.list_tasks()
stats = manager.get_task_stats()
print(f"\nStats: {stats['completed']}/{stats['total']} tasks completed ({stats['completion_rate']}%)")
Converting Between CSV and JSON
CSV to JSON
import csv
import json
def csv_to_json(csv_file, json_file):
"""Convert CSV file to JSON."""
data = []
with open(csv_file, "r") as file:
csv_reader = csv.DictReader(file)
for row in csv_reader:
data.append(row)
with open(json_file, "w") as file:
json.dump(data, file, indent=2)
print(f"Converted {csv_file} to {json_file}")
# Usage
csv_to_json("people.csv", "people.json")
JSON to CSV
import csv
import json
def json_to_csv(json_file, csv_file):
"""Convert JSON file to CSV."""
with open(json_file, "r") as file:
data = json.load(file)
if not data:
print("No data found in JSON file.")
return
# Get field names from first item
fieldnames = list(data[0].keys())
with open(csv_file, "w", newline="") as file:
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(data)
print(f"Converted {json_file} to {csv_file}")
# Usage
json_to_csv("people.json", "people_from_json.csv")
Best Practices
CSV Best Practices
import csv
# ✅ Use newline='' to avoid extra blank lines on Windows
with open("data.csv", "w", newline="") as file:
writer = csv.writer(file)
writer.writerow(["Name", "Age"])
# ✅ Handle commas and quotes in data
writer.writerow(['John "Johnny" Doe', 30]) # Quotes are handled automatically
# ✅ Use csv.DictReader/DictWriter for clarity
with open("data.csv", "r") as file:
reader = csv.DictReader(file)
for row in reader:
print(row["Name"], row["Age"])
JSON Best Practices
import json
# ✅ Use indent for readability in development
data = {"name": "Alice", "scores": [85, 92]}
with open("data.json", "w") as file:
json.dump(data, file, indent=2)
# ✅ Use sort_keys for consistent output
json.dump(data, file, indent=2, sort_keys=True)
# ✅ Handle datetime objects
import datetime
data["timestamp"] = datetime.datetime.now().isoformat()
json.dump(data, file, indent=2) # datetime objects need to be converted to strings
Practice Exercises
Exercise 1: CSV Grade Analyzer
Create a grade analyzer that:
- Reads student grades from CSV
- Calculates averages and letter grades
- Generates summary reports
- Saves results back to CSV
Exercise 2: JSON Todo App
Build a todo application that:
- Stores tasks in JSON format
- Add, complete, and delete tasks
- Filter tasks by status/priority
- Save/load todo list
Exercise 3: CSV to JSON Converter
Create a converter that:
- Reads CSV files with headers
- Converts to JSON format
- Handles different data types
- Preserves data integrity
Exercise 4: Configuration System
Build a configuration manager that:
- Loads settings from JSON
- Provides get/set methods
- Validates configuration values
- Saves changes automatically
Exercise 5: Data Exporter
Create a data export system that:
- Exports data to both CSV and JSON
- Handles nested data structures
- Provides different export options
- Generates metadata about exports
Summary
CSV and JSON enable structured data exchange:
CSV (Comma-Separated Values):
- Tabular data format
csv.reader()for readingcsv.writer()for writingcsv.DictReader/DictWriterfor headers
JSON (JavaScript Object Notation):
- Hierarchical data format
json.load()for readingjson.dump()for writing- Supports nested structures
Use CSV for:
- Spreadsheet-like data
- Simple tabular structures
- Data exchange with Excel
Use JSON for:
- Complex nested data
- Configuration files
- API data exchange
- Python-like data structures
Best Practices:
- Handle file encoding properly
- Use appropriate formatting
- Validate data when loading
- Handle errors gracefully
Next: Error Handling - making programs robust! 🛡️