Source code for phitodeep.layers.activation

import numpy as np

from .base import Layer


[docs] class ReLu(Layer): def __init__(self) -> None: super().__init__("relu")
[docs] def forward(self, X): self.cache["X"] = X return np.maximum(0, X)
[docs] def backward(self, dL_dZ): """ Backpropagate through ReLU activation. ReLU derivative: 1 if X > 0, else 0 """ X = self.cache["X"] dL_dX = dL_dZ * (X > 0).astype(float) return dL_dX
[docs] def copy(self): new_layer = ReLu() new_layer.cache = self.cache.copy() return new_layer
[docs] class LeakyReLu(Layer): def __init__(self, alpha=0.01) -> None: super().__init__("leaky_relu") self.alpha = alpha
[docs] def forward(self, X): self.cache["X"] = X return np.where(X > 0, X, self.alpha * X)
[docs] def backward(self, dL_dZ): X = self.cache["X"] dL_dX = dL_dZ * np.where(X > 0, 1, self.alpha) return dL_dX
[docs] def copy(self): new_layer = LeakyReLu() new_layer.cache = self.cache.copy() return new_layer
[docs] class GELU(Layer): def __init__(self) -> None: super().__init__("gelu") self.constant = 0.044715
[docs] def forward(self, X): self.cache["X"] = X inner = (np.sqrt(2.0 / np.pi) * (X + self.constant * X ** 3)) t = Tanh().forward(inner) self.cache["t"] = t return 0.5 * X * (1 + t)
[docs] def backward(self, dL_dZ): t = self.cache["t"] X = self.cache["X"] dL_dX = 0.5 * (1 + t) dL_dX += 0.5 * X * (1 - t ** 2) * np.sqrt(2.0 / np.pi) dL_dX *= (1 + 3 * self.constant * X ** 3) dL_dX *= dL_dZ return dL_dX
[docs] def copy(self): new_layer = GELU() new_layer.cache = self.cache.copy() return new_layer
[docs] class Swish(Layer): def __init__(self) -> None: super().__init__("swish")
[docs] def forward(self, X): self.cache["X"] = X Z = 1 / (1 + np.exp(-X)) self.cache["Z"] = Z return X * Z
[docs] def backward(self, dL_dZ): X = self.cache["X"] Z = self.cache["Z"] dL_dX = dL_dZ * (Z + X * Z * (1 - Z)) return dL_dX
[docs] def copy(self): new_layer = Swish() new_layer.cache = self.cache.copy() return new_layer
[docs] class Sigmoid(Layer): def __init__(self) -> None: super().__init__("sigmoid")
[docs] def forward(self, X): self.cache["X"] = X self.cache["Z"] = 1 / (1 + np.exp(-X)) return self.cache["Z"]
[docs] def backward(self, dL_dZ): """ Backpropagate through Sigmoid activation. Sigmoid derivative: sigmoid(Z) * (1 - sigmoid(Z)) """ Z = self.cache["Z"] dL_dX = dL_dZ * Z * (1 - Z) return dL_dX
[docs] def copy(self): new_layer = Sigmoid() new_layer.cache = self.cache.copy() return new_layer
[docs] class Tanh(Layer): def __init__(self) -> None: super().__init__("tanh")
[docs] def forward(self, X): self.cache["X"] = X e_x = np.exp(X) e_neg_x = np.exp(-X) self.cache["Z"] = (e_x - e_neg_x) / (e_x + e_neg_x) return self.cache["Z"]
[docs] def backward(self, dL_dZ): """ Backpropagate through Tanh activation. Tanh derivative: 1 - tanh(Z)^2 """ Z = self.cache["Z"] dL_dX = dL_dZ * (1 - Z**2) return dL_dX
[docs] def copy(self): new_layer = Tanh() new_layer.cache = self.cache.copy() return new_layer
[docs] class Softmax(Layer): def __init__(self) -> None: super().__init__("softmax")
[docs] def forward(self, X): self.cache["X"] = X axis = None if X.ndim < 2 else 1 max_a = np.max(X, axis=axis, keepdims=True) dividend = np.exp(X - max_a) divisor = np.sum(np.exp(X - max_a), axis=axis, keepdims=True) self.cache["Z"] = dividend / divisor return self.cache["Z"]
[docs] def backward(self, dL_dZ): """ Backpropagate through Softmax activation. When paired with CategoricalCrossEntropy, the combined gradient (y_pred - one_hot(y_true)) / N is computed entirely in the loss, so this layer is a straight pass-through. """ return dL_dZ
[docs] def copy(self): new_layer = Softmax() new_layer.cache = self.cache.copy() return new_layer
[docs] class ELU(Layer): def __init__(self, alpha=1.0) -> None: super().__init__("elu") self.alpha_activation = alpha
[docs] def forward(self, X): self.cache["X"] = X self.cache["Z"] = np.where(X > 0, X, self.alpha_activation * (np.exp(X) - 1)) return self.cache["Z"]
[docs] def backward(self, dL_dZ): """ Backpropagate through ELU activation. ELU derivative: 1 if X > 0, else alpha * exp(X) """ X = self.cache["X"] dL_dX = dL_dZ * np.where(X > 0, 1.0, self.alpha_activation * np.exp(X)) return dL_dX
[docs] def copy(self): new_layer = ELU(self.alpha_activation) new_layer.cache = self.cache.copy() return new_layer