Data Visualization: Creating Charts with Matplotlib

Welcome to Data Visualization! Think of visualization as storytelling with data - you transform numbers into pictures that make complex information instantly understandable. A good chart can reveal insights that numbers alone never would.

Why Visualization Matters

Humans are visual creatures - we process images 60,000 times faster than text:

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Numbers alone are hard to understand
data = [85, 92, 78, 96, 88, 91, 83, 89]
print("Raw data:", data)
print("Mean:", np.mean(data))
print("Std:", np.std(data))

# Visualization makes patterns obvious
plt.figure(figsize=(10, 6))
plt.plot(data, marker='o', linewidth=2, markersize=8)
plt.title('Student Scores Over Time', fontsize=16, fontweight='bold')
plt.xlabel('Week', fontsize=12)
plt.ylabel('Score', fontsize=12)
plt.grid(True, alpha=0.3)
plt.ylim(70, 100)
plt.show()

Matplotlib Basics

Figure and Axes

import matplotlib.pyplot as plt

# Create figure and axes
fig, ax = plt.subplots(figsize=(10, 6))

# Simple plot
x = [1, 2, 3, 4, 5]
y = [2, 4, 6, 8, 10]
ax.plot(x, y)

# Customize
ax.set_title('Simple Line Plot', fontsize=16, fontweight='bold')
ax.set_xlabel('X Values', fontsize=12)
ax.set_ylabel('Y Values', fontsize=12)
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

Multiple Subplots

# Create multiple subplots
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
fig.suptitle('Multiple Plots Example', fontsize=16, fontweight='bold')

# Data
x = np.linspace(0, 10, 100)
y1 = np.sin(x)
y2 = np.cos(x)
y3 = np.sin(x) * np.cos(x)
y4 = np.exp(-x/10)

# Plot on each subplot
axes[0, 0].plot(x, y1, 'b-', label='sin(x)')
axes[0, 0].set_title('Sine Wave')
axes[0, 0].legend()

axes[0, 1].plot(x, y2, 'r-', label='cos(x)')
axes[0, 1].set_title('Cosine Wave')
axes[0, 1].legend()

axes[1, 0].plot(x, y3, 'g-', label='sin(x)*cos(x)')
axes[1, 0].set_title('Product')
axes[1, 0].legend()

axes[1, 1].plot(x, y4, 'm-', label='exp(-x/10)')
axes[1, 1].set_title('Exponential Decay')
axes[1, 1].legend()

plt.tight_layout()
plt.show()

Line Plots

Basic Line Plot

# Sample time series data
dates = pd.date_range('2023-01-01', periods=30, freq='D')
sales = np.random.normal(1000, 200, 30).cumsum() + 5000

plt.figure(figsize=(12, 6))
plt.plot(dates, sales, linewidth=2, marker='o', markersize=4, alpha=0.7)

plt.title('Daily Sales Trend', fontsize=16, fontweight='bold')
plt.xlabel('Date', fontsize=12)
plt.ylabel('Sales ($)', fontsize=12)
plt.grid(True, alpha=0.3)

# Format x-axis dates
plt.gcf().autofmt_xdate()

plt.tight_layout()
plt.show()

Multiple Lines

# Compare multiple trends
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']
product_a = [100, 120, 140, 160, 180, 200]
product_b = [80, 100, 120, 140, 130, 150]
product_c = [60, 80, 100, 90, 110, 130]

plt.figure(figsize=(12, 6))

plt.plot(months, product_a, 'b-o', linewidth=3, markersize=8, 
         label='Product A', alpha=0.8)
plt.plot(months, product_b, 'r-s', linewidth=3, markersize=8, 
         label='Product B', alpha=0.8)
plt.plot(months, product_c, 'g-^', linewidth=3, markersize=8, 
         label='Product C', alpha=0.8)

plt.title('Product Sales Comparison', fontsize=16, fontweight='bold')
plt.xlabel('Month', fontsize=12)
plt.ylabel('Sales ($)', fontsize=12)
plt.legend(fontsize=12)
plt.grid(True, alpha=0.3)

# Add value labels on points
for i, (a, b, c) in enumerate(zip(product_a, product_b, product_c)):
    plt.text(i, a+5, f'{a}', ha='center', va='bottom', fontsize=10)
    plt.text(i, b+5, f'{b}', ha='center', va='bottom', fontsize=10)
    plt.text(i, c+5, f'{c}', ha='center', va='bottom', fontsize=10)

plt.tight_layout()
plt.show()

Bar Charts

Vertical Bar Chart

# Sales by category
categories = ['Electronics', 'Clothing', 'Books', 'Home', 'Sports']
sales = [45000, 32000, 18000, 28000, 22000]
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']

plt.figure(figsize=(10, 6))
bars = plt.bar(categories, sales, color=colors, alpha=0.8, width=0.6)

plt.title('Sales by Category', fontsize=16, fontweight='bold')
plt.xlabel('Category', fontsize=12)
plt.ylabel('Sales ($)', fontsize=12)
plt.grid(True, alpha=0.3, axis='y')

# Add value labels on bars
for bar, value in zip(bars, sales):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 500, 
             f'${value:,}', ha='center', va='bottom', fontsize=11, fontweight='bold')

plt.tight_layout()
plt.show()

Horizontal Bar Chart

# Sort data for horizontal bars
sorted_indices = np.argsort(sales)
categories_sorted = np.array(categories)[sorted_indices]
sales_sorted = np.array(sales)[sorted_indices]

plt.figure(figsize=(10, 6))
bars = plt.barh(categories_sorted, sales_sorted, color=colors, alpha=0.8)

plt.title('Sales by Category (Sorted)', fontsize=16, fontweight='bold')
plt.xlabel('Sales ($)', fontsize=12)
plt.ylabel('Category', fontsize=12)
plt.grid(True, alpha=0.3, axis='x')

# Add value labels
for bar, value in zip(bars, sales_sorted):
    plt.text(bar.get_width() + 500, bar.get_y() + bar.get_height()/2, 
             f'${value:,}', ha='left', va='center', fontsize=11, fontweight='bold')

plt.tight_layout()
plt.show()

Grouped Bar Chart

# Sales comparison by quarter
quarters = ['Q1', 'Q2', 'Q3', 'Q4']
product_a = [12000, 15000, 18000, 14000]
product_b = [10000, 13000, 16000, 12000]

x = np.arange(len(quarters))
width = 0.35

plt.figure(figsize=(10, 6))
bars1 = plt.bar(x - width/2, product_a, width, label='Product A', 
                color='#1f77b4', alpha=0.8)
bars2 = plt.bar(x + width/2, product_b, width, label='Product B', 
                color='#ff7f0e', alpha=0.8)

plt.title('Quarterly Sales Comparison', fontsize=16, fontweight='bold')
plt.xlabel('Quarter', fontsize=12)
plt.ylabel('Sales ($)', fontsize=12)
plt.xticks(x, quarters)
plt.legend(fontsize=12)
plt.grid(True, alpha=0.3, axis='y')

# Add value labels
for bars, values in [(bars1, product_a), (bars2, product_b)]:
    for bar, value in zip(bars, values):
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 200, 
                 f'${value:,}', ha='center', va='bottom', fontsize=10)

plt.tight_layout()
plt.show()

Scatter Plots

Basic Scatter Plot

# Relationship between advertising spend and sales
np.random.seed(42)
advertising = np.random.uniform(1000, 10000, 50)
sales = advertising * 2 + np.random.normal(0, 1000, 50)

plt.figure(figsize=(10, 6))
plt.scatter(advertising, sales, alpha=0.7, s=50, c='blue', edgecolors='black')

plt.title('Advertising Spend vs Sales', fontsize=16, fontweight='bold')
plt.xlabel('Advertising Spend ($)', fontsize=12)
plt.ylabel('Sales ($)', fontsize=12)
plt.grid(True, alpha=0.3)

# Add trend line
z = np.polyfit(advertising, sales, 1)
p = np.poly1d(z)
plt.plot(advertising, p(advertising), "r--", alpha=0.8, linewidth=2, label='Trend')

plt.legend()
plt.tight_layout()
plt.show()

Bubble Chart

# Three-dimensional data with bubble sizes
regions = ['North', 'South', 'East', 'West', 'Central']
x_pos = np.random.uniform(0, 100, 5)  # Market share
y_pos = np.random.uniform(0, 100, 5)  # Growth rate
sizes = np.random.uniform(20, 100, 5)  # Revenue (bubble size)

plt.figure(figsize=(10, 6))
scatter = plt.scatter(x_pos, y_pos, s=sizes*10, c=sizes, cmap='viridis', 
                     alpha=0.7, edgecolors='black')

# Add region labels
for i, region in enumerate(regions):
    plt.annotate(region, (x_pos[i], y_pos[i]), 
                xytext=(5, 5), textcoords='offset points',
                fontsize=10, fontweight='bold')

plt.title('Market Analysis: Share vs Growth', fontsize=16, fontweight='bold')
plt.xlabel('Market Share (%)', fontsize=12)
plt.ylabel('Growth Rate (%)', fontsize=12)
plt.grid(True, alpha=0.3)

# Add colorbar
plt.colorbar(scatter, label='Revenue Scale')

plt.tight_layout()
plt.show()

Histograms and Distributions

Basic Histogram

# Exam scores distribution
np.random.seed(42)
scores = np.random.normal(75, 15, 200)
scores = np.clip(scores, 0, 100)  # Keep between 0-100

plt.figure(figsize=(10, 6))
n, bins, patches = plt.hist(scores, bins=20, alpha=0.7, color='skyblue', 
                           edgecolor='black', linewidth=1)

plt.title('Exam Scores Distribution', fontsize=16, fontweight='bold')
plt.xlabel('Score', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.grid(True, alpha=0.3, axis='y')

# Add mean line
mean_score = np.mean(scores)
plt.axvline(mean_score, color='red', linestyle='--', linewidth=2, 
           label=f'Mean: {mean_score:.1f}')
plt.legend()

# Add statistics text
plt.text(0.02, 0.98, f'Mean: {mean_score:.1f}\nStd: {np.std(scores):.1f}\nN: {len(scores)}', 
         transform=plt.gca().transAxes, verticalalignment='top',
         bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

plt.tight_layout()
plt.show()

Multiple Histograms

# Compare score distributions
class_a_scores = np.random.normal(78, 12, 100)
class_b_scores = np.random.normal(72, 15, 100)
class_c_scores = np.random.normal(80, 10, 100)

plt.figure(figsize=(12, 6))

plt.hist(class_a_scores, bins=15, alpha=0.7, label='Class A', 
         color='blue', edgecolor='black')
plt.hist(class_b_scores, bins=15, alpha=0.7, label='Class B', 
         color='red', edgecolor='black')
plt.hist(class_c_scores, bins=15, alpha=0.7, label='Class C', 
         color='green', edgecolor='black')

plt.title('Score Distributions by Class', fontsize=16, fontweight='bold')
plt.xlabel('Score', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.legend()
plt.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

Box Plots

Single Box Plot

# Test scores by subject
math_scores = np.random.normal(75, 10, 50)
science_scores = np.random.normal(78, 12, 50)
english_scores = np.random.normal(82, 8, 50)

plt.figure(figsize=(10, 6))
box = plt.boxplot([math_scores, science_scores, english_scores], 
                 labels=['Math', 'Science', 'English'],
                 patch_artist=True,
                 boxprops=dict(facecolor='lightblue', color='blue'),
                 medianprops=dict(color='red', linewidth=2),
                 whiskerprops=dict(color='blue'),
                 capprops=dict(color='blue'),
                 flierprops=dict(marker='o', markerfacecolor='red', markersize=5))

plt.title('Test Scores by Subject', fontsize=16, fontweight='bold')
plt.ylabel('Score', fontsize=12)
plt.grid(True, alpha=0.3, axis='y')

# Add statistics
subjects = ['Math', 'Science', 'English']
scores_data = [math_scores, science_scores, english_scores]

for i, (subject, scores) in enumerate(zip(subjects, scores_data)):
    plt.text(i+1, np.max(scores)+2, f'Mean: {np.mean(scores):.1f}', 
             ha='center', va='bottom', fontsize=10)

plt.tight_layout()
plt.show()

Multiple Box Plots

# Scores by grade level
grade_9 = np.random.normal(70, 15, 30)
grade_10 = np.random.normal(75, 12, 30)
grade_11 = np.random.normal(78, 10, 30)
grade_12 = np.random.normal(82, 8, 30)

all_scores = [grade_9, grade_10, grade_11, grade_12]
labels = ['9th Grade', '10th Grade', '11th Grade', '12th Grade']

plt.figure(figsize=(12, 6))
box = plt.boxplot(all_scores, labels=labels, patch_artist=True)

# Color each box differently
colors = ['lightblue', 'lightgreen', 'lightyellow', 'lightcoral']
for patch, color in zip(box['boxes'], colors):
    patch.set_facecolor(color)

plt.title('Test Scores by Grade Level', fontsize=16, fontweight='bold')
plt.ylabel('Score', fontsize=12)
plt.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

Pie Charts

Basic Pie Chart

# Market share data
companies = ['Company A', 'Company B', 'Company C', 'Company D', 'Others']
market_share = [35, 25, 20, 12, 8]
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']
explode = [0.1, 0, 0, 0, 0]  # Explode first slice

plt.figure(figsize=(10, 8))
plt.pie(market_share, labels=companies, colors=colors, autopct='%1.1f%%',
        startangle=90, explode=explode, shadow=True, textprops={'fontsize': 12})

plt.title('Market Share Distribution', fontsize=16, fontweight='bold')
plt.axis('equal')  # Equal aspect ratio ensures pie is drawn as a circle

plt.tight_layout()
plt.show()

Donut Chart

# Create donut chart
plt.figure(figsize=(10, 8))

# Outer pie
plt.pie(market_share, labels=companies, colors=colors, autopct='%1.1f%%',
        startangle=90, pctdistance=0.85)

# Inner circle
centre_circle = plt.Circle((0, 0), 0.70, fc='white')
plt.gca().add_artist(centre_circle)

plt.title('Market Share Distribution (Donut)', fontsize=16, fontweight='bold')
plt.axis('equal')

# Add total in center
total = sum(market_share)
plt.text(0, 0, f'Total\n${total}M', ha='center', va='center', 
         fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()

Advanced Visualizations

Heatmap

# Correlation matrix
np.random.seed(42)
data = pd.DataFrame({
    'Math': np.random.normal(75, 10, 100),
    'Science': np.random.normal(78, 12, 100),
    'English': np.random.normal(82, 8, 100),
    'History': np.random.normal(70, 15, 100),
    'Art': np.random.normal(85, 5, 100)
})

correlation_matrix = data.corr()

plt.figure(figsize=(8, 6))
heatmap = plt.imshow(correlation_matrix, cmap='coolwarm', aspect='auto')

plt.colorbar(heatmap, label='Correlation')
plt.xticks(range(len(correlation_matrix.columns)), correlation_matrix.columns, rotation=45)
plt.yticks(range(len(correlation_matrix.columns)), correlation_matrix.columns)

# Add correlation values
for i in range(len(correlation_matrix.columns)):
    for j in range(len(correlation_matrix.columns)):
        plt.text(j, i, f'{correlation_matrix.iloc[i, j]:.2f}', 
                ha='center', va='center', color='white', fontweight='bold')

plt.title('Subject Correlation Matrix', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

Time Series with Multiple Y-Axes

# Stock price and volume
dates = pd.date_range('2023-01-01', periods=100, freq='D')
price = np.random.normal(100, 5, 100).cumsum() + 1000
volume = np.random.normal(1000000, 200000, 100)

fig, ax1 = plt.subplots(figsize=(12, 6))

# Price plot
color = 'tab:blue'
ax1.set_xlabel('Date')
ax1.set_ylabel('Stock Price ($)', color=color)
ax1.plot(dates, price, color=color, linewidth=2)
ax1.tick_params(axis='y', labelcolor=color)

# Volume plot (secondary y-axis)
ax2 = ax1.twinx()
color = 'tab:red'
ax2.set_ylabel('Volume', color=color)
ax2.bar(dates, volume, color=color, alpha=0.3, width=1)
ax2.tick_params(axis='y', labelcolor=color)

plt.title('Stock Price and Volume Over Time', fontsize=16, fontweight='bold')
plt.gcf().autofmt_xdate()
plt.tight_layout()
plt.show()

Practical Examples

Example 1: Sales Dashboard

# Create comprehensive sales dashboard
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('Sales Performance Dashboard', fontsize=16, fontweight='bold')

# Sample data
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']
sales = [45000, 52000, 48000, 61000, 55000, 67000]
categories = ['Electronics', 'Clothing', 'Books', 'Home']
category_sales = [180000, 120000, 80000, 100000]
regions = ['North', 'South', 'East', 'West']
region_performance = [35, 28, 22, 15]

# 1. Monthly sales trend
axes[0, 0].plot(months, sales, 'b-o', linewidth=3, markersize=8)
axes[0, 0].set_title('Monthly Sales Trend', fontweight='bold')
axes[0, 0].set_ylabel('Sales ($)')
axes[0, 0].grid(True, alpha=0.3)

# 2. Sales by category
bars = axes[0, 1].bar(categories, category_sales, color='skyblue', alpha=0.8)
axes[0, 1].set_title('Sales by Category', fontweight='bold')
axes[0, 1].set_ylabel('Sales ($)')
axes[0, 1].tick_params(axis='x', rotation=45)

# Add value labels
for bar, value in zip(bars, category_sales):
    axes[0, 1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 2000, 
                    f'${value:,}', ha='center', va='bottom')

# 3. Regional performance pie chart
axes[1, 0].pie(region_performance, labels=regions, autopct='%1.1f%%', 
               colors=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'])
axes[1, 0].set_title('Regional Performance', fontweight='bold')

# 4. Sales distribution histogram
sales_daily = np.random.normal(5000, 1000, 1000)
axes[1, 1].hist(sales_daily, bins=30, alpha=0.7, color='green', edgecolor='black')
axes[1, 1].set_title('Daily Sales Distribution', fontweight='bold')
axes[1, 1].set_xlabel('Daily Sales ($)')
axes[1, 1].set_ylabel('Frequency')
axes[1, 1].axvline(np.mean(sales_daily), color='red', linestyle='--', 
                   label=f'Mean: ${np.mean(sales_daily):.0f}')
axes[1, 1].legend()

plt.tight_layout()
plt.show()

Example 2: Student Performance Analysis

# Student performance data
students = pd.DataFrame({
    'name': [f'Student_{i}' for i in range(1, 51)],
    'math': np.random.normal(75, 15, 50),
    'science': np.random.normal(78, 12, 50),
    'english': np.random.normal(82, 10, 50),
    'history': np.random.normal(70, 18, 50)
})

# Clip scores to 0-100
for col in ['math', 'science', 'english', 'history']:
    students[col] = np.clip(students[col], 0, 100)

# Calculate averages
students['average'] = students[['math', 'science', 'english', 'history']].mean(axis=1)

# Create comprehensive visualization
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
fig.suptitle('Student Performance Analysis', fontsize=16, fontweight='bold')

# 1. Subject-wise averages
subjects = ['math', 'science', 'english', 'history']
avg_scores = [students[sub].mean() for sub in subjects]
axes[0, 0].bar(subjects, avg_scores, color=['blue', 'green', 'red', 'orange'], alpha=0.7)
axes[0, 0].set_title('Average Scores by Subject')
axes[0, 0].set_ylabel('Average Score')
axes[0, 0].set_ylim(0, 100)

# 2. Score distributions
for i, subject in enumerate(subjects):
    row, col = divmod(i, 2)
    axes[row, col+1].hist(students[subject], bins=10, alpha=0.7, edgecolor='black')
    axes[row, col+1].set_title(f'{subject.title()} Distribution')
    axes[row, col+1].set_xlabel('Score')
    axes[row, col+1].set_ylabel('Frequency')
    axes[row, col+1].axvline(students[subject].mean(), color='red', linestyle='--')

# 3. Overall performance distribution
axes[1, 2].hist(students['average'], bins=10, alpha=0.7, color='purple', edgecolor='black')
axes[1, 2].set_title('Overall Average Distribution')
axes[1, 2].set_xlabel('Average Score')
axes[1, 2].set_ylabel('Frequency')
axes[1, 2].axvline(students['average'].mean(), color='red', linestyle='--', 
                   label=f'Mean: {students["average"].mean():.1f}')
axes[1, 2].legend()

plt.tight_layout()
plt.show()

# Scatter plot: Math vs Science performance
plt.figure(figsize=(10, 6))
plt.scatter(students['math'], students['science'], alpha=0.7, s=50)
plt.title('Math vs Science Performance', fontsize=16, fontweight='bold')
plt.xlabel('Math Score')
plt.ylabel('Science Score')
plt.grid(True, alpha=0.3)

# Add correlation line
z = np.polyfit(students['math'], students['science'], 1)
p = np.poly1d(z)
plt.plot(students['math'], p(students['math']), "r--", alpha=0.8)

# Add correlation coefficient
corr = students['math'].corr(students['science'])
plt.text(0.05, 0.95, f'Correlation: {corr:.3f}', 
         transform=plt.gca().transAxes, fontsize=12,
         bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

plt.tight_layout()
plt.show()

Best Practices

1. Choose the Right Chart Type

# For showing trends over time: Line chart
# For comparing categories: Bar chart
# For showing distributions: Histogram
# For showing relationships: Scatter plot
# For showing parts of a whole: Pie chart
# For comparing multiple variables: Box plot

2. Use Color Effectively

# Use colorblind-friendly palettes
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']

# Use consistent colors for categories
category_colors = {
    'Electronics': '#1f77b4',
    'Clothing': '#ff7f0e',
    'Books': '#2ca02c',
    'Home': '#d62728'
}

3. Add Context and Annotations

# Always include titles, labels, and legends
plt.title('Clear, Descriptive Title', fontsize=16, fontweight='bold')
plt.xlabel('X-axis label with units')
plt.ylabel('Y-axis label with units')
plt.legend()

# Add reference lines
plt.axhline(y=threshold, color='red', linestyle='--', label='Threshold')

# Add text annotations
plt.text(x, y, 'Important point', ha='center', va='bottom')

4. Make it Publication-Ready

# Set figure size appropriately
plt.figure(figsize=(12, 8))

# Use high DPI for crisp images
plt.savefig('chart.png', dpi=300, bbox_inches='tight')

# Remove unnecessary elements
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)

Practice Exercises

Exercise 1: Basic Charts

Create visualizations for:

Line chart of monthly website traffic
Bar chart of product sales by category
Scatter plot of height vs weight
Histogram of exam scores

Exercise 2: Comparative Analysis

Create a dashboard showing:

Sales comparison between two years
Performance metrics for different departments
Customer satisfaction by region
Product ratings distribution

Exercise 3: Time Series Analysis

Visualize:

Stock price movement over time
Temperature variations throughout the year
Website traffic patterns
Sales seasonality

Exercise 4: Statistical Visualizations

Create charts for:

Correlation matrix heatmap
Box plots for multiple groups
Q-Q plots for normality testing
Confidence intervals visualization

Exercise 5: Interactive Dashboard

Build a comprehensive dashboard with:

Multiple chart types
Consistent styling
Clear titles and labels
Data insights and annotations
Professional appearance

Summary

Data visualization transforms data into insights:

Chart Types:

# Line plot for trends
plt.plot(x, y)

# Bar chart for comparisons
plt.bar(categories, values)

# Scatter plot for relationships
plt.scatter(x, y)

# Histogram for distributions
plt.hist(data, bins=30)

# Box plot for statistics
plt.boxplot(data)

Best Practices:

# Clear titles and labels
plt.title('Descriptive Title', fontsize=16, fontweight='bold')
plt.xlabel('X-axis label')
plt.ylabel('Y-axis label')

# Professional styling
plt.grid(True, alpha=0.3)
plt.legend()
plt.tight_layout()

# Save high-quality images
plt.savefig('chart.png', dpi=300, bbox_inches='tight')

Key Concepts:

Choosing appropriate chart types
Effective use of color and styling
Adding context with annotations
Creating publication-ready visualizations
Building comprehensive dashboards

Next: Statistical Analysis - Basic statistics and correlations! 📈

Popular Topics

Categories