Day 1 - Part 3: Deep Learning with Flexibility
Master 2 (203) in Financial Markets, Paris Dauphine - PSL University
2025-10-31
Installing PyTorch:
Verify installation:
Tensors are the fundamental data structure:
import torch
import numpy as np
# Create tensors
x = torch.tensor([1, 2, 3, 4, 5])
y = torch.zeros(3, 4)
z = torch.ones(2, 3)
random = torch.randn(2, 3) # Normal distribution
# From numpy
arr = np.array([1, 2, 3])
tensor_from_numpy = torch.from_numpy(arr)
# To numpy
numpy_array = tensor_from_numpy.numpy()
print(f"Tensor shape: {x.shape}")
print(f"Tensor dtype: {x.dtype}")# Basic operations
a = torch.tensor([1.0, 2.0, 3.0])
b = torch.tensor([4.0, 5.0, 6.0])
# Element-wise operations
c = a + b
d = a * b
e = torch.sin(a)
# Matrix operations
A = torch.randn(3, 4)
B = torch.randn(4, 5)
C = torch.mm(A, B) # Matrix multiplication
# Reshaping
x = torch.randn(12)
y = x.view(3, 4) # Reshape to 3x4
z = x.view(-1, 2) # Reshape to (?, 2)
print(f"Result shape: {C.shape}")# Check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
# Move tensors to GPU
x = torch.randn(1000, 1000)
x_gpu = x.to(device)
# Or create directly on GPU
y_gpu = torch.randn(1000, 1000, device=device)
# Compute on GPU
z_gpu = torch.mm(x_gpu, y_gpu)
# Move back to CPU
z_cpu = z_gpu.to('cpu')PyTorch’s most powerful feature:
import torch
# Create tensor with gradient tracking
x = torch.tensor([2.0], requires_grad=True)
# Define function: y = x^3 + 2x^2 + 5
y = x**3 + 2*x**2 + 5
# Compute gradients (dy/dx = 3x^2 + 4x)
y.backward()
print(f"x = {x.item()}")
print(f"y = {y.item()}")
print(f"dy/dx = {x.grad.item()}") # Should be 3(4) + 4(2) = 20Useful for optimization and understanding functions:
import torch
import matplotlib.pyplot as plt
# Define function: f(x) = sin(x) + x^2/10
x = torch.linspace(-5, 5, 100, requires_grad=True)
f = torch.sin(x) + x**2 / 10
# Compute gradient
f.sum().backward() # Need scalar for backward()
df_dx = x.grad
# Plotting
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(x.detach().numpy(), f.detach().numpy())
plt.title('Function f(x)')
plt.subplot(1, 2, 2)
plt.plot(x.detach().numpy(), df_dx.numpy())
plt.title("Derivative f'(x)")
plt.show()Using torch.nn module:
import torch
import torch.nn as nn
# Define a simple neural network
class SimpleNet(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(SimpleNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
return x
# Create model
model = SimpleNet(input_size=10, hidden_size=64, output_size=1)
print(model)Approximating a non-linear function:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
# Generate data
X = np.linspace(-5, 5, 1000).reshape(-1, 1)
y = np.sin(X) + np.cos(2*X) + np.random.randn(1000, 1) * 0.1
# Convert to tensors
X_tensor = torch.FloatTensor(X)
y_tensor = torch.FloatTensor(y)
# Split data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
X_tensor, y_tensor, test_size=0.2, random_state=42
)class RegressionNet(nn.Module):
def __init__(self):
super(RegressionNet, self).__init__()
self.fc1 = nn.Linear(1, 64)
self.fc2 = nn.Linear(64, 64)
self.fc3 = nn.Linear(64, 32)
self.fc4 = nn.Linear(32, 1)
self.relu = nn.ReLU()
def forward(self, x):
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
x = self.relu(self.fc3(x))
x = self.fc4(x)
return x
# Create model
model = RegressionNet()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)# Training loop
num_epochs = 100
losses = []
for epoch in range(num_epochs):
# Forward pass
outputs = model(X_train)
loss = criterion(outputs, y_train)
# Backward pass and optimization
optimizer.zero_grad() # Clear gradients
loss.backward() # Compute gradients
optimizer.step() # Update weights
losses.append(loss.item())
if (epoch + 1) % 10 == 0:
print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
# Evaluate
model.eval()
with torch.no_grad():
y_pred = model(X_test)
test_loss = criterion(y_pred, y_test)
print(f'Test Loss: {test_loss.item():.4f}')Creating a classifier:
from sklearn.datasets import make_moons
from sklearn.preprocessing import StandardScaler
# Generate data
X, y = make_moons(n_samples=1000, noise=0.1, random_state=42)
# Scale data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Convert to tensors
X_tensor = torch.FloatTensor(X_scaled)
y_tensor = torch.FloatTensor(y).view(-1, 1)
# Split
X_train, X_test, y_train, y_test = train_test_split(
X_tensor, y_tensor, test_size=0.2, random_state=42
)class Classifier(nn.Module):
def __init__(self):
super(Classifier, self).__init__()
self.fc1 = nn.Linear(2, 32)
self.fc2 = nn.Linear(32, 16)
self.fc3 = nn.Linear(16, 1)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(0.3)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = self.relu(self.fc1(x))
x = self.dropout(x)
x = self.relu(self.fc2(x))
x = self.dropout(x)
x = self.sigmoid(self.fc3(x))
return x
model = Classifier()
criterion = nn.BCELoss() # Binary Cross Entropy
optimizer = optim.Adam(model.parameters(), lr=0.001)# Training
num_epochs = 100
train_losses = []
train_accuracies = []
for epoch in range(num_epochs):
model.train()
# Forward pass
outputs = model(X_train)
loss = criterion(outputs, y_train)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Calculate accuracy
predictions = (outputs > 0.5).float()
accuracy = (predictions == y_train).float().mean()
train_losses.append(loss.item())
train_accuracies.append(accuracy.item())
if (epoch + 1) % 20 == 0:
print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Acc: {accuracy.item():.4f}')# Evaluate on test set
model.eval()
with torch.no_grad():
test_outputs = model(X_test)
test_predictions = (test_outputs > 0.5).float()
test_accuracy = (test_predictions == y_test).float().mean()
print(f'Test Accuracy: {test_accuracy.item():.4f}')
# Confusion matrix
from sklearn.metrics import confusion_matrix, classification_report
y_pred_np = test_predictions.numpy()
y_test_np = y_test.numpy()
print("Confusion Matrix:")
print(confusion_matrix(y_test_np, y_pred_np))
print("\nClassification Report:")
print(classification_report(y_test_np, y_pred_np))Efficient data loading:
from torch.utils.data import TensorDataset, DataLoader
# Create dataset
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)
# Create data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
# Training with batches
for epoch in range(num_epochs):
for batch_X, batch_y in train_loader:
outputs = model(batch_X)
loss = criterion(outputs, batch_y)
optimizer.zero_grad()
loss.backward()
optimizer.step()# Define custom loss
class CustomLoss(nn.Module):
def __init__(self):
super(CustomLoss, self).__init__()
def forward(self, predictions, targets):
# Example: MSE + L1 regularization
mse = torch.mean((predictions - targets)**2)
l1 = torch.mean(torch.abs(predictions - targets))
return mse + 0.1 * l1
# Use custom loss
custom_criterion = CustomLoss()
loss = custom_criterion(outputs, y_train)from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau
# Step LR: reduce LR every 30 epochs
scheduler = StepLR(optimizer, step_size=30, gamma=0.1)
# Reduce on plateau: reduce when metric stops improving
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5,
patience=10, verbose=True)
# In training loop
for epoch in range(num_epochs):
# ... training code ...
# Step the scheduler
scheduler.step() # For StepLR
# scheduler.step(val_loss) # For ReduceLROnPlateauCustom gradient computation:
import torch
def compute_gradient(func, x_val):
"""Compute gradient of function at x_val"""
x = torch.tensor([x_val], requires_grad=True)
y = func(x)
y.backward()
return x.grad.item()
# Example: gradient of x^2 at x=3
def f(x):
return x**2
grad = compute_gradient(f, 3.0)
print(f"Gradient of x^2 at x=3: {grad}") # Should be 6
# Example: gradient of sin(x) at x=0
grad = compute_gradient(lambda x: torch.sin(x), 0.0)
print(f"Gradient of sin(x) at x=0: {grad}") # Should be 1Using gradients to find minimum:
import torch
import torch.optim as optim
# Function to minimize: f(x) = (x-3)^2 + 5
x = torch.tensor([0.0], requires_grad=True)
optimizer = optim.SGD([x], lr=0.1)
# Optimization loop
for i in range(100):
optimizer.zero_grad()
# Compute function value
y = (x - 3)**2 + 5
# Compute gradient and update
y.backward()
optimizer.step()
if i % 20 == 0:
print(f'Iter {i}: x={x.item():.4f}, f(x)={y.item():.4f}')
print(f'Minimum at x={x.item():.4f}') # Should be close to 3# Save model
torch.save(model.state_dict(), 'model_weights.pth')
# Save entire model
torch.save(model, 'complete_model.pth')
# Save checkpoint (for resuming training)
checkpoint = {
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': loss
}
torch.save(checkpoint, 'checkpoint.pth')
# Load model
model = Classifier()
model.load_state_dict(torch.load('model_weights.pth'))
model.eval()
# Load checkpoint
checkpoint = torch.load('checkpoint.pth')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])Strengths:
Considerations:
.to(device)✅ Use for:
❌ Not ideal for:
Resources: