Skip to content

Data Classes

Less boilerplate, more Pythonic data classes.

Data classes are a feature in Python that provides a decorator and functions for automatically adding special methods to user-defined classes. They’re particularly useful for classes that primarily store data, significantly reducing boilerplate code.

Without dataclasses, you need to manually implement several methods:

without_dataclass.py
class User:
def __init__(self, username: str, email: str, age: int):
self.username = username
self.email = email
self.age = age
def __repr__(self):
return f"User(username='{self.username}', email='{self.email}', age={self.age})"
def __eq__(self, other):
if not isinstance(other, User):
return False
return (self.username == other.username and
self.email == other.email and
self.age == other.age)
def __hash__(self):
return hash((self.username, self.email, self.age))
# Lots of boilerplate code!

With dataclasses, this becomes much simpler:

with_dataclass.py
from dataclasses import dataclass
@dataclass
class User:
username: str
email: str
age: int
# Automatically provides:
# - __init__
# - __repr__
# - __eq__
# - And more!
basic_dataclass.py
from dataclasses import dataclass
@dataclass
class Point:
x: float
y: float
# Usage
p1 = Point(1.0, 2.0)
p2 = Point(1.0, 2.0)
print(p1) # Point(x=1.0, y=2.0) - automatic __repr__
print(p1 == p2) # True - automatic __eq__

The @dataclass decorator automatically generates:

  • __init__ - Constructor
  • __repr__ - String representation
  • __eq__ - Equality comparison
  • __hash__ - Hash function (if frozen=True)
generated_methods.py
from dataclasses import dataclass
@dataclass
class Product:
name: str
price: float
in_stock: bool = True # Default value
# __init__ is generated
product = Product("Laptop", 999.99)
print(product) # Product(name='Laptop', price=999.99, in_stock=True)
# __eq__ is generated
p1 = Product("Mouse", 29.99)
p2 = Product("Mouse", 29.99)
print(p1 == p2) # True
# __repr__ is generated
print(repr(product)) # Product(name='Laptop', price=999.99, in_stock=True)

You can provide default values for fields:

default_values.py
from dataclasses import dataclass
@dataclass
class Product:
name: str
price: float
in_stock: bool = True # Default value
discount: float = 0.0 # Default value
# Can create with or without defaults
product1 = Product("Laptop", 999.99)
print(product1) # Product(name='Laptop', price=999.99, in_stock=True, discount=0.0)
product2 = Product("Mouse", 29.99, False, 0.1)
print(product2) # Product(name='Mouse', price=29.99, in_stock=False, discount=0.1)

Making a dataclass frozen makes instances immutable:

frozen_dataclass.py
from dataclasses import dataclass
@dataclass(frozen=True)
class ImmutableUser:
username: str
email: str
age: int
user = ImmutableUser("alice", "alice@example.com", 30)
print(user) # ImmutableUser(username='alice', email='alice@example.com', age=30)
# user.age = 31 # FrozenInstanceError - cannot modify frozen dataclass

Enable automatic comparison operators (<, >, <=, >=):

ordering.py
from dataclasses import dataclass
@dataclass(order=True)
class Point:
x: int
y: int
p1 = Point(1, 2)
p2 = Point(2, 3)
p3 = Point(1, 2)
print(p1 < p2) # True - compares (1,2) < (2,3)
print(p1 <= p3) # True - compares (1,2) <= (1,2)
print(p2 > p1) # True - compares (2,3) > (1,2)

Use __post_init__ to perform additional processing after initialization:

post_init.py
from dataclasses import dataclass
@dataclass
class Employee:
name: str
salary: float
tax_rate: float = 0.2
net_salary: float = 0.0 # Will be calculated
def __post_init__(self):
"""Called after __init__"""
self.net_salary = self.salary * (1 - self.tax_rate)
employee = Employee("Alice", 50000.0, tax_rate=0.25)
print(employee.net_salary) # 37500.0 (calculated automatically)

For mutable default values (like lists or dicts), use field(default_factory):

default_factory.py
from dataclasses import dataclass, field
@dataclass
class ShoppingCart:
customer_name: str
items: list = field(default_factory=list) # Mutable default
def add_item(self, item: str):
self.items.append(item)
# Each instance gets its own list
cart1 = ShoppingCart("Alice")
cart2 = ShoppingCart("Bob")
cart1.add_item("Laptop")
cart2.add_item("Mouse")
print(cart1.items) # ['Laptop']
print(cart2.items) # ['Mouse'] - separate lists!

Without default_factory (WRONG):

wrong_mutable_default.py
@dataclass
class BadCart:
items: list = [] # WRONG! Shared across all instances
cart1 = BadCart()
cart2 = BadCart()
cart1.items.append("Item1")
print(cart2.items) # ['Item1'] - Oops! Shared list!

With default_factory (CORRECT):

correct_mutable_default.py
@dataclass
class GoodCart:
items: list = field(default_factory=list) # Correct!
cart1 = GoodCart()
cart2 = GoodCart()
cart1.items.append("Item1")
print(cart2.items) # [] - Separate lists!
ecommerce_dataclasses.py
from dataclasses import dataclass, field
from datetime import datetime
from typing import List
@dataclass
class Address:
street: str
city: str
state: str
zip_code: str
@dataclass
class Product:
sku: str
name: str
price: float
in_stock: bool = True
tags: List[str] = field(default_factory=list)
@dataclass
class OrderItem:
product: Product
quantity: int
@property
def total(self) -> float:
return self.product.price * self.quantity
@dataclass
class Order:
order_id: str
customer_name: str
items: List[OrderItem] = field(default_factory=list)
shipping_address: Address = None
order_date: datetime = field(default_factory=datetime.now)
status: str = "pending"
def __post_init__(self):
"""Calculate total after initialization"""
self.total = sum(item.total for item in self.items)
def add_item(self, product: Product, quantity: int):
"""Add item to order"""
self.items.append(OrderItem(product, quantity))
self.total = sum(item.total for item in self.items)
# Usage
laptop = Product("LAP-001", "Laptop", 999.99, tags=["electronics", "computers"])
mouse = Product("MOU-001", "Mouse", 29.99, tags=["electronics", "accessories"])
address = Address("123 Main St", "San Francisco", "CA", "94102")
order = Order("ORD-001", "Alice", shipping_address=address)
order.add_item(laptop, 1)
order.add_item(mouse, 2)
print(order)
# Order(order_id='ORD-001', customer_name='Alice',
# items=[OrderItem(...), OrderItem(...)],
# shipping_address=Address(...),
# order_date=datetime(...), status='pending')
print(f"Total: ${order.total:.2f}") # Total: $1059.97

You can customize fields using the field() function:

field_customization.py
from dataclasses import dataclass, field
@dataclass
class User:
username: str
email: str
age: int = field(default=18) # Default value
tags: list = field(default_factory=list) # Mutable default
_password: str = field(repr=False) # Don't show in repr
def __post_init__(self):
if self.age < 0:
raise ValueError("Age cannot be negative")
user = User("alice", "alice@example.com", 25, _password="secret")
print(user) # User(username='alice', email='alice@example.com', age=25)
# Password not shown in repr due to repr=False
Diagram