Source code for phitodeep.layers.activation

import numpy as np

from .base import Layer



[docs]
class ReLu(Layer):
    def __init__(self) -> None:
        super().__init__("relu")


[docs]
    def forward(self, X):
        self.cache["X"] = X
        return np.maximum(0, X)



[docs]
    def backward(self, dL_dZ):
        """
        Backpropagate through ReLU activation.
        ReLU derivative: 1 if X > 0, else 0
        """
        X = self.cache["X"]
        dL_dX = dL_dZ * (X > 0).astype(float)
        return dL_dX



[docs]
    def copy(self):
        new_layer = ReLu()
        new_layer.cache = self.cache.copy()
        return new_layer




[docs]
class LeakyReLu(Layer):
    def __init__(self, alpha=0.01) -> None:
        super().__init__("leaky_relu")
        self.alpha = alpha


[docs]
    def forward(self, X):
        self.cache["X"] = X
        return np.where(X > 0, X, self.alpha * X)



[docs]
    def backward(self, dL_dZ):
        X = self.cache["X"]
        dL_dX = dL_dZ * np.where(X > 0, 1, self.alpha)
        return dL_dX



[docs]
    def copy(self):
        new_layer = LeakyReLu()
        new_layer.cache = self.cache.copy()
        return new_layer




[docs]
class GELU(Layer):
    def __init__(self) -> None:
        super().__init__("gelu")
        self.constant = 0.044715


[docs]
    def forward(self, X):
        self.cache["X"] = X
        inner = (np.sqrt(2.0 / np.pi) * (X + self.constant * X ** 3))
        t = Tanh().forward(inner)
        self.cache["t"] = t
        return 0.5 * X * (1 + t)



[docs]
    def backward(self, dL_dZ):
        t = self.cache["t"]
        X = self.cache["X"]
        dL_dX = 0.5 * (1 + t)
        dL_dX += 0.5 * X * (1 - t ** 2) * np.sqrt(2.0 / np.pi)
        dL_dX *= (1 + 3 * self.constant * X ** 3)
        dL_dX *= dL_dZ
        return dL_dX



[docs]
    def copy(self):
        new_layer = GELU()
        new_layer.cache = self.cache.copy()
        return new_layer




[docs]
class Swish(Layer):
    def __init__(self) -> None:
        super().__init__("swish")


[docs]
    def forward(self, X):
        self.cache["X"] = X
        Z = 1 / (1 + np.exp(-X))
        self.cache["Z"] = Z
        return X * Z



[docs]
    def backward(self, dL_dZ):
        X = self.cache["X"]
        Z = self.cache["Z"]
        dL_dX =  dL_dZ * (Z + X * Z * (1 - Z))
        return dL_dX



[docs]
    def copy(self):
        new_layer = Swish()
        new_layer.cache = self.cache.copy()
        return new_layer




[docs]
class Sigmoid(Layer):
    def __init__(self) -> None:
        super().__init__("sigmoid")


[docs]
    def forward(self, X):
        self.cache["X"] = X
        self.cache["Z"] = 1 / (1 + np.exp(-X))
        return self.cache["Z"]



[docs]
    def backward(self, dL_dZ):
        """
        Backpropagate through Sigmoid activation.
        Sigmoid derivative: sigmoid(Z) * (1 - sigmoid(Z))
        """
        Z = self.cache["Z"]
        dL_dX = dL_dZ * Z * (1 - Z)
        return dL_dX



[docs]
    def copy(self):
        new_layer = Sigmoid()
        new_layer.cache = self.cache.copy()
        return new_layer





[docs]
class Tanh(Layer):
    def __init__(self) -> None:
        super().__init__("tanh")


[docs]
    def forward(self, X):
        self.cache["X"] = X
        e_x = np.exp(X)
        e_neg_x = np.exp(-X)
        self.cache["Z"] = (e_x - e_neg_x) / (e_x + e_neg_x)
        return self.cache["Z"]



[docs]
    def backward(self, dL_dZ):
        """
        Backpropagate through Tanh activation.
        Tanh derivative: 1 - tanh(Z)^2
        """
        Z = self.cache["Z"]
        dL_dX = dL_dZ * (1 - Z**2)
        return dL_dX



[docs]
    def copy(self):
        new_layer = Tanh()
        new_layer.cache = self.cache.copy()
        return new_layer





[docs]
class Softmax(Layer):
    def __init__(self) -> None:
        super().__init__("softmax")


[docs]
    def forward(self, X):
        self.cache["X"] = X
        axis = None if X.ndim < 2 else 1
        max_a = np.max(X, axis=axis, keepdims=True)

        dividend = np.exp(X - max_a)
        divisor = np.sum(np.exp(X - max_a), axis=axis, keepdims=True)

        self.cache["Z"] = dividend / divisor
        return self.cache["Z"]



[docs]
    def backward(self, dL_dZ):
        """
        Backpropagate through Softmax activation.
        When paired with CategoricalCrossEntropy, the combined gradient
        (y_pred - one_hot(y_true)) / N is computed entirely in the loss,
        so this layer is a straight pass-through.
        """
        return dL_dZ



[docs]
    def copy(self):
        new_layer = Softmax()
        new_layer.cache = self.cache.copy()
        return new_layer





[docs]
class ELU(Layer):
    def __init__(self, alpha=1.0) -> None:
        super().__init__("elu")
        self.alpha_activation = alpha


[docs]
    def forward(self, X):
        self.cache["X"] = X
        self.cache["Z"] = np.where(X > 0, X, self.alpha_activation * (np.exp(X) - 1))
        return self.cache["Z"]



[docs]
    def backward(self, dL_dZ):
        """
        Backpropagate through ELU activation.
        ELU derivative: 1 if X > 0, else alpha * exp(X)
        """
        X = self.cache["X"]
        dL_dX = dL_dZ * np.where(X > 0, 1.0, self.alpha_activation * np.exp(X))
        return dL_dX



[docs]
    def copy(self):
        new_layer = ELU(self.alpha_activation)
        new_layer.cache = self.cache.copy()
        return new_layer