Source code for phitodeep.layers.base

import numpy as np

from ..optimization.initialization import Initializer, InitType, He


[docs] class Layer: """ Base class for all layers in the network. """ def __init__(self, name: str, initializer: Initializer=Initializer()) -> None: self.name = name self.cache = {} self.grads = {} self.initializer = initializer
[docs] def forward(self, X: np.ndarray): raise NotImplementedError(f"Block '{self.name}' must implement forward method")
[docs] def backward(self, dL_dZ): """ Backward pass through the block. Args: dL_dZ: gradient of loss w.r.t. output of this block Returns: dL_dX: gradient of loss w.r.t. input (to pass to previous layer) """ raise NotImplementedError(f"Block '{self.name}' must implement backward method")
[docs] def copy(self): raise NotImplementedError(f"Block '{self.name}' must implement copy method")
[docs] class Flatten(Layer): """ Flattens the input tensor into a 2D tensor. """ def __init__(self): super().__init__("flatten", He())
[docs] def forward(self, X: np.ndarray): """ X: (batch_size, ...) -> (batch_size, ...) """ self.cache["X"] = X return X.reshape(X.shape[0], -1)
[docs] def backward(self, dL_dZ): """ dL_dZ: (batch_size, ...) -> (batch_size, ...) """ X = self.cache["X"] return dL_dZ.reshape(X.shape)
[docs] def copy(self): new_layer = Flatten() new_layer.cache = self.cache.copy() return new_layer
[docs] class Dense(Layer): """ Fully connected layer. """ def __init__(self, input_size: int, output_size: int, initializer: Initializer=He()): super().__init__("dense", initializer) self.grads = {} self.input_size = input_size self.output_size = output_size # Initialize weights and biases rng = np.random.default_rng() if initializer.init_type == InitType.NORMAL: weights = rng.normal(size=(input_size, output_size)) else: weights = rng.uniform(size=(input_size, output_size)) self.W = weights * initializer.get_scale(weights) self.b = np.zeros(output_size)
[docs] def forward(self, X: np.ndarray): """ X: (batch_size, input_size) -> (batch_size, output_size) """ self.cache["X"] = X Z = np.dot(X, self.W) + self.b return Z
[docs] def backward(self, dL_dZ): """ Backpropagate through Dense layer. Args: dL_dZ: (batch_size, output_size) - gradient of loss w.r.t. output Returns: dL_dX: (batch_size, input_size) - gradient to pass to previous layer """ X = self.cache["X"] m = X.shape[0] # batch size # Gradient w.r.t. weights: (1/m) * X^T @ dL_dZ self.grads["W"] = np.dot(X.T, dL_dZ) / m # Gradient w.r.t. bias: (1/m) * sum(dL_dZ) self.grads["b"] = np.sum(dL_dZ, axis=0) / m # Gradient w.r.t. input: dL_dZ @ W^T dL_dX = np.dot(dL_dZ, self.W.T) return dL_dX
[docs] def copy(self): new_layer = Dense(self.input_size, self.output_size) new_layer.W = self.W.copy() new_layer.b = self.b.copy() new_layer.grads = {k: v.copy() for k, v in self.grads.items()} new_layer.cache = {k: v.copy() for k, v in self.cache.items()} return new_layer