From 51126f9feb5a0dbb5cbb25db4799f735a3a92741 Mon Sep 17 00:00:00 2001 From: Soumya Snigdha Kundu Date: Fri, 9 Jan 2026 11:09:09 +0000 Subject: [PATCH 1/2] Fix GEGLU docstring: Sigmoid -> GELU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The docstring incorrectly stated GEGLU uses Sigmoid, but the code correctly uses GELU. Per the original paper (Shazeer, 2020): - GLU uses Sigmoid: GLU(x) = σ(xW) ⊗ xV - GEGLU uses GELU: GEGLU(x) = GELU(xW) ⊗ xV Reference: https://arxiv.org/abs/2002.05202 Signed-off-by: Soumya Snigdha Kundu --- monai/networks/blocks/activation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/networks/blocks/activation.py b/monai/networks/blocks/activation.py index 1e5e979dff..a182e5d47e 100644 --- a/monai/networks/blocks/activation.py +++ b/monai/networks/blocks/activation.py @@ -168,7 +168,7 @@ class GEGLU(nn.Module): r"""Applies the element-wise function: .. math:: - \text{GEGLU}(x) = x_1 * \text{Sigmoid}(x_2) + \text{GEGLU}(x) = x_1 * \text{GELU}(x_2) where :math:`x_1` and :math:`x_2` are split from the input tensor along the last dimension. From f5a63ce7c0e2a806f3488e755c53caff5633fcef Mon Sep 17 00:00:00 2001 From: Soumya Snigdha Kundu Date: Wed, 14 Jan 2026 10:15:03 +0000 Subject: [PATCH 2/2] docs: add Examples section to GEGLU docstring Add example showing usage of GEGLU to align docstring style with other activation classes. No functional changes. Signed-off-by: Soumya Snigdha Kundu --- monai/networks/blocks/activation.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/monai/networks/blocks/activation.py b/monai/networks/blocks/activation.py index a182e5d47e..98990abfe7 100644 --- a/monai/networks/blocks/activation.py +++ b/monai/networks/blocks/activation.py @@ -177,6 +177,14 @@ class GEGLU(nn.Module): Shape: - Input: :math:`(N, *, 2 * D)` - Output: :math:`(N, *, D)`, where `*` means, any number of additional dimensions + + Examples:: + + >>> import torch + >>> from monai.networks.layers.factories import Act + >>> m = Act['geglu']() + >>> input = torch.randn(2, 8) # last dim must be even + >>> output = m(input) """ def forward(self, input: torch.Tensor):