Source code for phitodeep.layers.base

import numpy as np

from ..optimization.initialization import Initializer, InitType, He



[docs]
class Layer:
    """
    Base class for all layers in the network.
    """

    def __init__(self, name: str, initializer: Initializer=Initializer()) -> None:
        self.name = name
        self.cache = {}
        self.grads = {}
        self.initializer = initializer


[docs]
    def forward(self, X: np.ndarray):
        raise NotImplementedError(f"Block '{self.name}' must implement forward method")



[docs]
    def backward(self, dL_dZ):
        """
        Backward pass through the block.

        Args:
            dL_dZ: gradient of loss w.r.t. output of this block

        Returns:
            dL_dX: gradient of loss w.r.t. input (to pass to previous layer)
        """
        raise NotImplementedError(f"Block '{self.name}' must implement backward method")



[docs]
    def copy(self):
        raise NotImplementedError(f"Block '{self.name}' must implement copy method")





[docs]
class Flatten(Layer):
    """
    Flattens the input tensor into a 2D tensor.
    """

    def __init__(self):
        super().__init__("flatten", He())


[docs]
    def forward(self, X: np.ndarray):
        """
        X: (batch_size, ...) -> (batch_size, ...)
        """
        self.cache["X"] = X
        return X.reshape(X.shape[0], -1)



[docs]
    def backward(self, dL_dZ):
        """
        dL_dZ: (batch_size, ...) -> (batch_size, ...)
        """
        X = self.cache["X"]
        return dL_dZ.reshape(X.shape)



[docs]
    def copy(self):
        new_layer = Flatten()
        new_layer.cache = self.cache.copy()
        return new_layer





[docs]
class Dense(Layer):
    """
    Fully connected layer.
    """

    def __init__(self, input_size: int, output_size: int, initializer: Initializer=He()):
        super().__init__("dense", initializer)
        self.grads = {}
        self.input_size = input_size
        self.output_size = output_size

        # Initialize weights and biases
        rng = np.random.default_rng()
        if initializer.init_type == InitType.NORMAL:
            weights = rng.normal(size=(input_size, output_size))
        else:
            weights = rng.uniform(size=(input_size, output_size))
        self.W = weights * initializer.get_scale(weights)
        self.b = np.zeros(output_size)


[docs]
    def forward(self, X: np.ndarray):
        """
        X: (batch_size, input_size) -> (batch_size, output_size)
        """
        self.cache["X"] = X
        Z = np.dot(X, self.W) + self.b
        return Z



[docs]
    def backward(self, dL_dZ):
        """
        Backpropagate through Dense layer.

        Args:
            dL_dZ: (batch_size, output_size) - gradient of loss w.r.t. output

        Returns:
            dL_dX: (batch_size, input_size) - gradient to pass to previous layer
        """
        X = self.cache["X"]
        m = X.shape[0]  # batch size

        # Gradient w.r.t. weights: (1/m) * X^T @ dL_dZ
        self.grads["W"] = np.dot(X.T, dL_dZ) / m

        # Gradient w.r.t. bias: (1/m) * sum(dL_dZ)
        self.grads["b"] = np.sum(dL_dZ, axis=0) / m

        # Gradient w.r.t. input: dL_dZ @ W^T
        dL_dX = np.dot(dL_dZ, self.W.T)

        return dL_dX



[docs]
    def copy(self):
        new_layer = Dense(self.input_size, self.output_size)
        new_layer.W = self.W.copy()
        new_layer.b = self.b.copy()
        new_layer.grads = {k: v.copy() for k, v in self.grads.items()}
        new_layer.cache = {k: v.copy() for k, v in self.cache.items()}
        return new_layer