Day 1 - Part 2: Neural Networks Made Easy
Master 2 (203) in Financial Markets, Paris Dauphine - PSL University
2025-10-31
We’ll use a Conda environment. Activate it first, then install with pip or conda as noted.
Windows (CPU):
macOS Apple Silicon (M1/M2):
macOS Intel without AVX (fallback using Keras 3 + PyTorch backend):
Verify (portable):
import os
# Use torch backend on macOS by default; otherwise TensorFlow
import platform
if 'KERAS_BACKEND' not in os.environ:
os.environ['KERAS_BACKEND'] = 'torch' if platform.system() == 'Darwin' else 'tensorflow'
import keras
from keras import layers
# Core modules (portable)
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import Adam, SGD
from keras.losses import MeanSquaredError
from keras.metrics import Accuracy
from keras.callbacks import EarlyStoppingSequential API (simpler):
Functional API (more flexible):
fit()Approximating a non-linear function:
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
# Generate data: y = sin(x) + cos(2*x)
X = np.linspace(-5, 5, 1000).reshape(-1, 1)
y = np.sin(X) + np.cos(2 * X) + np.random.randn(1000, 1) * 0.1
# Split data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)# Create model
model = keras.Sequential([
layers.Dense(64, activation='relu', input_shape=(1,)),
layers.Dense(64, activation='relu'),
layers.Dense(32, activation='relu'),
layers.Dense(1) # Output layer (no activation for regression)
])
# Compile model
model.compile(
optimizer='adam',
loss='mse', # Mean Squared Error
metrics=['mae'] # Mean Absolute Error
)
# View model architecture
model.summary()# Train model
history = model.fit(
X_train, y_train,
epochs=100,
batch_size=32,
validation_split=0.2, # Use 20% of training data for validation
verbose=1
)
# Evaluate on test set
test_loss, test_mae = model.evaluate(X_test, y_test)
print(f"Test MAE: {test_mae:.4f}")
# Make predictions
y_pred = model.predict(X_test)import matplotlib.pyplot as plt
# Plot training history
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True, alpha=0.3)
plt.subplot(1, 2, 2)
plt.plot(history.history['mae'], label='Training MAE')
plt.plot(history.history['val_mae'], label='Validation MAE')
plt.xlabel('Epoch')
plt.ylabel('MAE')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()Creating a 2D classification problem:
from sklearn.datasets import make_moons
# Generate moon-shaped data (non-linearly separable)
X, y = make_moons(n_samples=1000, noise=0.1, random_state=42)
# Split data
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# Normalize features (important for neural networks!)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)# Create classification model
classifier = keras.Sequential([
layers.Dense(32, activation='relu', input_shape=(2,)),
layers.Dropout(0.3), # Regularization
layers.Dense(16, activation='relu'),
layers.Dropout(0.2),
layers.Dense(1, activation='sigmoid') # Binary classification
])
# Compile
classifier.compile(
optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy']
)
classifier.summary()# Train with early stopping
from tensorflow.keras.callbacks import EarlyStopping
early_stop = EarlyStopping(
monitor='val_loss',
patience=10,
restore_best_weights=True
)
history = classifier.fit(
X_train, y_train,
epochs=100,
batch_size=32,
validation_split=0.2,
callbacks=[early_stop],
verbose=1
)
# Evaluate
test_loss, test_acc = classifier.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc:.4f}")# Predict probabilities
y_proba = classifier.predict(X_test)
# Convert to binary predictions (threshold = 0.5)
y_pred = (y_proba > 0.5).astype(int)
# Evaluation metrics
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))
# Confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(f"Confusion Matrix:\n{cm}")Classifying into 3 classes:
from sklearn.datasets import make_blobs
# Generate 3-class data
X, y = make_blobs(
n_samples=1000, centers=3, n_features=2,
cluster_std=1.5, random_state=42
)
# Split and scale
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)# Create multi-class classifier
multi_clf = keras.Sequential([
layers.Dense(64, activation='relu', input_shape=(2,)),
layers.Dense(32, activation='relu'),
layers.Dense(3, activation='softmax') # 3 classes
])
# Compile
multi_clf.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy', # For integer labels
metrics=['accuracy']
)
# Train
history = multi_clf.fit(
X_train, y_train,
epochs=50,
batch_size=32,
validation_split=0.2,
verbose=0
)More control over training:
import tensorflow as tf
# Define optimizer and loss
optimizer = keras.optimizers.Adam(learning_rate=0.001)
loss_fn = keras.losses.SparseCategoricalCrossentropy()
# Training step
@tf.function
def train_step(x, y):
with tf.GradientTape() as tape:
predictions = multi_clf(x, training=True)
loss = loss_fn(y, predictions)
gradients = tape.gradient(loss, multi_clf.trainable_variables)
optimizer.apply_gradients(zip(gradients, multi_clf.trainable_variables))
return lossfrom tensorflow.keras.callbacks import (
EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
)
callbacks = [
# Stop if validation loss doesn't improve
EarlyStopping(monitor='val_loss', patience=10,
restore_best_weights=True),
# Save best model
ModelCheckpoint('best_model.h5', save_best_only=True),
# Reduce learning rate when plateauing
ReduceLROnPlateau(monitor='val_loss', factor=0.5,
patience=5, min_lr=1e-7)
]
model.fit(X_train, y_train, epochs=100,
validation_split=0.2, callbacks=callbacks)# Common activation functions
model = keras.Sequential([
layers.Dense(64, activation='relu'), # ReLU: max(0, x)
layers.Dense(64, activation='tanh'), # Tanh: [-1, 1]
layers.Dense(64, activation='sigmoid'), # Sigmoid: [0, 1]
layers.Dense(64, activation='elu'), # ELU: smooth ReLU
layers.Dense(64, activation='selu'), # SELU: self-normalizing
layers.Dense(64, activation='swish'), # Swish: x * sigmoid(x)
])
# For output layers:
# Regression: no activation or 'linear'
# Binary classification: 'sigmoid'
# Multi-class classification: 'softmax'from tensorflow.keras import regularizers
# L1, L2, and Dropout regularization
model = keras.Sequential([
layers.Dense(64, activation='relu',
kernel_regularizer=regularizers.l2(0.01)),
layers.Dropout(0.5), # Drop 50% of neurons
layers.Dense(32, activation='relu',
kernel_regularizer=regularizers.l1_l2(l1=0.01, l2=0.01)),
layers.Dropout(0.3),
layers.Dense(1, activation='sigmoid')
])# Different optimizers
optimizers_list = [
keras.optimizers.SGD(learning_rate=0.01), # Stochastic Gradient Descent
keras.optimizers.SGD(learning_rate=0.01, momentum=0.9), # SGD with momentum
keras.optimizers.RMSprop(learning_rate=0.001), # RMSprop
keras.optimizers.Adam(learning_rate=0.001), # Adam (most popular)
keras.optimizers.Adamax(learning_rate=0.001), # Adamax
keras.optimizers.Nadam(learning_rate=0.001), # Nadam
]
# Adam is usually the best default choice
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=0.001),
loss='mse'
)# Save entire model
model.save('my_model.h5')
model.save('my_model.keras') # Keras format (recommended)
# Save only weights
model.save_weights('model_weights.h5')
# Load model
loaded_model = keras.models.load_model('my_model.keras')
# Load weights into existing model
model.load_weights('model_weights.h5')
# Make predictions with loaded model
predictions = loaded_model.predict(X_test)✅ Use for:
⚠️ Consider alternatives when:
Resources: