Skip to content

Transformer

rydberggpt.models.transformer

layers

DecoderLayer

Bases: Module

Decoder is made of self-attn, src-attn, and feed forward.

Parameters:

Name Type Description Default
size int

The input size. (d_model)

required
self_attn MultiheadAttention

The self-attention module.

required
src_attn MultiheadAttention

The source-attention module.

required
feed_forward PositionwiseFeedForward

The feed forward module.

required
dropout float

The dropout rate.

required
Source code in src/rydberggpt/models/transformer/layers.py
class DecoderLayer(nn.Module):
    """
    Decoder is made of self-attn, src-attn, and feed forward.

    Args:
        size (int): The input size. (d_model)
        self_attn (nn.MultiheadAttention): The self-attention module.
        src_attn (nn.MultiheadAttention): The source-attention module.
        feed_forward (PositionwiseFeedForward): The feed forward module.
        dropout (float): The dropout rate.
    """

    def __init__(
        self,
        size: int,
        self_attn: nn.MultiheadAttention,
        src_attn: nn.MultiheadAttention,
        feed_forward: PositionwiseFeedForward,
        dropout: float,
    ):
        super(DecoderLayer, self).__init__()
        self.size = size
        self.self_attn = self_attn
        self.src_attn = src_attn
        self.feed_forward = feed_forward
        self.sublayer = clones(SublayerConnection(size, dropout), 3)

    def forward(
        self, x: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor
    ) -> torch.Tensor:
        """
        Compute the forward pass through the decoder.

        Args:
            x (torch.Tensor): The input tensor.
            memory (torch.Tensor): The memory tensor.
            batch_mask (torch.Tensor): The mask tensor for batches.

        Returns:
            (torch.Tensor): The output tensor.
        """

        causal_attn_mask = torch.meshgrid(
            torch.arange(x.shape[-2], device=x.device),
            torch.arange(x.shape[-2], device=x.device),
            indexing="ij",
        )
        causal_attn_mask = causal_attn_mask[0] >= causal_attn_mask[1]
        causal_attn_mask = torch.logical_not(causal_attn_mask)

        batch_key_mask = batch_mask
        batch_key_mask = torch.logical_not(batch_key_mask)

        m = memory
        x = self.sublayer[0](
            x, lambda x: self.self_attn(x, x, x, attn_mask=causal_attn_mask)[0]
        )
        x = self.sublayer[1](
            x, lambda x: self.src_attn(x, m, m, key_padding_mask=batch_key_mask)[0]
        )
        return self.sublayer[2](x, self.feed_forward)
forward(x: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor

Compute the forward pass through the decoder.

Parameters:

Name Type Description Default
x Tensor

The input tensor.

required
memory Tensor

The memory tensor.

required
batch_mask Tensor

The mask tensor for batches.

required

Returns:

Type Description
Tensor

The output tensor.

Source code in src/rydberggpt/models/transformer/layers.py
def forward(
    self, x: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor
) -> torch.Tensor:
    """
    Compute the forward pass through the decoder.

    Args:
        x (torch.Tensor): The input tensor.
        memory (torch.Tensor): The memory tensor.
        batch_mask (torch.Tensor): The mask tensor for batches.

    Returns:
        (torch.Tensor): The output tensor.
    """

    causal_attn_mask = torch.meshgrid(
        torch.arange(x.shape[-2], device=x.device),
        torch.arange(x.shape[-2], device=x.device),
        indexing="ij",
    )
    causal_attn_mask = causal_attn_mask[0] >= causal_attn_mask[1]
    causal_attn_mask = torch.logical_not(causal_attn_mask)

    batch_key_mask = batch_mask
    batch_key_mask = torch.logical_not(batch_key_mask)

    m = memory
    x = self.sublayer[0](
        x, lambda x: self.self_attn(x, x, x, attn_mask=causal_attn_mask)[0]
    )
    x = self.sublayer[1](
        x, lambda x: self.src_attn(x, m, m, key_padding_mask=batch_key_mask)[0]
    )
    return self.sublayer[2](x, self.feed_forward)

EncoderLayer

Bases: Module

Encoder is made up of self-attn and feed forward.

Parameters:

Name Type Description Default
size int

The input size. (d_model)

required
self_attn MultiheadAttention

The self-attention module.

required
feed_forward PositionwiseFeedForward

The feed forward module.

required
dropout float

The dropout rate.

required
Source code in src/rydberggpt/models/transformer/layers.py
class EncoderLayer(nn.Module):
    """
    Encoder is made up of self-attn and feed forward.

    Args:
        size (int): The input size. (d_model)
        self_attn (nn.MultiheadAttention): The self-attention module.
        feed_forward (PositionwiseFeedForward): The feed forward module.
        dropout (float): The dropout rate.
    """

    def __init__(
        self,
        size: int,
        self_attn: nn.MultiheadAttention,
        feed_forward: PositionwiseFeedForward,
        dropout: float,
    ):
        super(EncoderLayer, self).__init__()
        self.self_attn = self_attn
        self.feed_forward = feed_forward
        self.sublayer = clones(SublayerConnection(size, dropout), 2)
        self.size = size

    def forward(self, x: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor:
        """
        Compute the forward pass through the encoder.

        Args:
            x (torch.Tensor): The input tensor.
            batch_mask (torch.Tensor): The mask tensor for batches.

        Returns:
            (torch.Tensor): The output tensor.
        """

        batch_key_mask = batch_mask
        batch_key_mask = torch.logical_not(batch_key_mask)

        x = self.sublayer[0](
            x,
            lambda x: torch.nan_to_num(
                self.self_attn(x, x, x, key_padding_mask=batch_key_mask)[0]
            ),
        )
        return self.sublayer[1](x, self.feed_forward)
forward(x: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor

Compute the forward pass through the encoder.

Parameters:

Name Type Description Default
x Tensor

The input tensor.

required
batch_mask Tensor

The mask tensor for batches.

required

Returns:

Type Description
Tensor

The output tensor.

Source code in src/rydberggpt/models/transformer/layers.py
def forward(self, x: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor:
    """
    Compute the forward pass through the encoder.

    Args:
        x (torch.Tensor): The input tensor.
        batch_mask (torch.Tensor): The mask tensor for batches.

    Returns:
        (torch.Tensor): The output tensor.
    """

    batch_key_mask = batch_mask
    batch_key_mask = torch.logical_not(batch_key_mask)

    x = self.sublayer[0](
        x,
        lambda x: torch.nan_to_num(
            self.self_attn(x, x, x, key_padding_mask=batch_key_mask)[0]
        ),
    )
    return self.sublayer[1](x, self.feed_forward)

models

Decoder

Bases: Module

The core of the transformer, which consists of a stack of decoder layers.

Source code in src/rydberggpt/models/transformer/models.py
class Decoder(nn.Module):
    """
    The core of the transformer, which consists of a stack of decoder layers.
    """

    def __init__(self, layer: nn.Module, n_layers: int):
        """
        Initialize the Decoder class.

        Args:
            layer (nn.Module): A single instance of the decoder layer to be cloned.
            n_layers (int): The number of decoder layers in the stack.
        """
        super(Decoder, self).__init__()
        self.layers = clones(layer, n_layers)
        self.norm = nn.LayerNorm(layer.size)

    def forward(
        self, x: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor
    ) -> torch.Tensor:
        """
        Pass the (masked) input through all layers of the decoder.

        Args:
            x (torch.Tensor): The input tensor to the decoder of shape (batch_size, seq_length, d_model).
            memory (torch.Tensor): The memory tensor, typically the output of the encoder.
            batch_mask (torch.Tensor): The mask tensor for batches.

        Returns:
            (torch.Tensor): The output tensor after passing through all layers of the decoder of shape (batch_size, seq_length, d_model).
        """
        for layer in self.layers:
            x = layer(x, memory, batch_mask=batch_mask)
        return self.norm(x)  # [batch_size, seq_len, d_model]
__init__(layer: nn.Module, n_layers: int)

Initialize the Decoder class.

Parameters:

Name Type Description Default
layer Module

A single instance of the decoder layer to be cloned.

required
n_layers int

The number of decoder layers in the stack.

required
Source code in src/rydberggpt/models/transformer/models.py
def __init__(self, layer: nn.Module, n_layers: int):
    """
    Initialize the Decoder class.

    Args:
        layer (nn.Module): A single instance of the decoder layer to be cloned.
        n_layers (int): The number of decoder layers in the stack.
    """
    super(Decoder, self).__init__()
    self.layers = clones(layer, n_layers)
    self.norm = nn.LayerNorm(layer.size)
forward(x: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor

Pass the (masked) input through all layers of the decoder.

Parameters:

Name Type Description Default
x Tensor

The input tensor to the decoder of shape (batch_size, seq_length, d_model).

required
memory Tensor

The memory tensor, typically the output of the encoder.

required
batch_mask Tensor

The mask tensor for batches.

required

Returns:

Type Description
Tensor

The output tensor after passing through all layers of the decoder of shape (batch_size, seq_length, d_model).

Source code in src/rydberggpt/models/transformer/models.py
def forward(
    self, x: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor
) -> torch.Tensor:
    """
    Pass the (masked) input through all layers of the decoder.

    Args:
        x (torch.Tensor): The input tensor to the decoder of shape (batch_size, seq_length, d_model).
        memory (torch.Tensor): The memory tensor, typically the output of the encoder.
        batch_mask (torch.Tensor): The mask tensor for batches.

    Returns:
        (torch.Tensor): The output tensor after passing through all layers of the decoder of shape (batch_size, seq_length, d_model).
    """
    for layer in self.layers:
        x = layer(x, memory, batch_mask=batch_mask)
    return self.norm(x)  # [batch_size, seq_len, d_model]

Encoder

Bases: Module

The core encoder, which consists of a stack of N layers.

Source code in src/rydberggpt/models/transformer/models.py
class Encoder(nn.Module):
    """
    The core encoder, which consists of a stack of N layers.
    """

    def __init__(self, layer: nn.Module, N: int):
        """
        Initialize the Encoder class.

        Args:
            layer (nn.Module): A single instance of the encoder layer to be cloned.
            N (int): The number of encoder layers in the stack.
        """
        super(Encoder, self).__init__()
        self.layers = clones(layer, N)
        self.norm = nn.LayerNorm(layer.size)

    def forward(self, x: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor:
        """
        Pass the input through each layer in turn.

        Args:
            x (torch.Tensor): The input tensor to the encoder of shape (batch_size, seq_length, d_model).
            batch_mask (torch.Tensor): The mask tensor for batches.

        Returns:
            (torch.Tensor): The output tensor after passing through all layers of the encoder,
                          with the same shape as the input tensor (batch_size, seq_length, d_model).
        """
        for layer in self.layers:
            x = layer(x, batch_mask=batch_mask)
        return self.norm(x)  # [batch_size, seq_length, d_model]
__init__(layer: nn.Module, N: int)

Initialize the Encoder class.

Parameters:

Name Type Description Default
layer Module

A single instance of the encoder layer to be cloned.

required
N int

The number of encoder layers in the stack.

required
Source code in src/rydberggpt/models/transformer/models.py
def __init__(self, layer: nn.Module, N: int):
    """
    Initialize the Encoder class.

    Args:
        layer (nn.Module): A single instance of the encoder layer to be cloned.
        N (int): The number of encoder layers in the stack.
    """
    super(Encoder, self).__init__()
    self.layers = clones(layer, N)
    self.norm = nn.LayerNorm(layer.size)
forward(x: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor

Pass the input through each layer in turn.

Parameters:

Name Type Description Default
x Tensor

The input tensor to the encoder of shape (batch_size, seq_length, d_model).

required
batch_mask Tensor

The mask tensor for batches.

required

Returns:

Type Description
Tensor

The output tensor after passing through all layers of the encoder, with the same shape as the input tensor (batch_size, seq_length, d_model).

Source code in src/rydberggpt/models/transformer/models.py
def forward(self, x: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor:
    """
    Pass the input through each layer in turn.

    Args:
        x (torch.Tensor): The input tensor to the encoder of shape (batch_size, seq_length, d_model).
        batch_mask (torch.Tensor): The mask tensor for batches.

    Returns:
        (torch.Tensor): The output tensor after passing through all layers of the encoder,
                      with the same shape as the input tensor (batch_size, seq_length, d_model).
    """
    for layer in self.layers:
        x = layer(x, batch_mask=batch_mask)
    return self.norm(x)  # [batch_size, seq_length, d_model]

EncoderDecoder

Bases: LightningModule

A standard Encoder-Decoder architecture. Base for this and many other models.

Source code in src/rydberggpt/models/transformer/models.py
class EncoderDecoder(pl.LightningModule):
    """
    A standard Encoder-Decoder architecture. Base for this and many other models.
    """

    def __init__(
        self,
        encoder: nn.Module,
        decoder: nn.Module,
        src_embed: nn.Module,
        tgt_embed: nn.Module,
        generator: nn.Module,
    ):
        """
        Initialize the EncoderDecoder class.

        Args:
            encoder (nn.Module): The encoder module.
            decoder (nn.Module): The decoder module.
            tgt_embed (nn.Module): The target embedding module.
            generator (nn.Module): The generator module.
        """
        super(EncoderDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.src_embed = src_embed
        self.tgt_embed = tgt_embed
        self.generator = generator

    def forward(self, tgt: torch.Tensor, src: torch.Tensor) -> torch.Tensor:
        """
        Take in and process masked src and target sequences.

        Args:
            tgt (torch.Tensor): The target tensor of shape (batch_size, tgt_seq_length, d_model_tgt).
            src (torch.Tensor): The source tensor of shape (batch_size, src_seq_length, d_model_src).

        Returns:
            (torch.Tensor): The output tensor after passing through the encoder-decoder architecture,
                          with shape (batch_size, tgt_seq_length, d_model).
        """

        memory, batch_mask = self.encode(src)

        return self.decode(tgt, memory, batch_mask)

    def encode(self, src: torch.Tensor) -> torch.Tensor:
        """
        Encode the source tensor.

        Args:
            src (torch.Tensor): The source tensor of shape (batch_size, src_seq_length, d_model_src).

        Returns:
            (torch.Tensor): The encoded tensor of shape (batch_size, src_seq_length, d_model_tgt).
        """

        x, batch_mask = self.src_embed(src)

        return self.encoder(x, batch_mask=batch_mask), batch_mask

    def decode(
        self, tgt: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor
    ) -> torch.Tensor:
        """
        Decode the target tensor using the memory tensor.

        Args:
            tgt (torch.Tensor): The target tensor of shape (batch_size, tgt_seq_length, d_model_tgt).
            memory (torch.Tensor): The memory tensor of shape (batch_size, src_seq_length, d_model).

        Returns:
            (torch.Tensor): The decoded tensor of shape (batch_size, tgt_seq_length, d_model).
        """
        return self.decoder(self.tgt_embed(tgt), memory, batch_mask=batch_mask)
__init__(encoder: nn.Module, decoder: nn.Module, src_embed: nn.Module, tgt_embed: nn.Module, generator: nn.Module)

Initialize the EncoderDecoder class.

Parameters:

Name Type Description Default
encoder Module

The encoder module.

required
decoder Module

The decoder module.

required
tgt_embed Module

The target embedding module.

required
generator Module

The generator module.

required
Source code in src/rydberggpt/models/transformer/models.py
def __init__(
    self,
    encoder: nn.Module,
    decoder: nn.Module,
    src_embed: nn.Module,
    tgt_embed: nn.Module,
    generator: nn.Module,
):
    """
    Initialize the EncoderDecoder class.

    Args:
        encoder (nn.Module): The encoder module.
        decoder (nn.Module): The decoder module.
        tgt_embed (nn.Module): The target embedding module.
        generator (nn.Module): The generator module.
    """
    super(EncoderDecoder, self).__init__()
    self.encoder = encoder
    self.decoder = decoder
    self.src_embed = src_embed
    self.tgt_embed = tgt_embed
    self.generator = generator
decode(tgt: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor

Decode the target tensor using the memory tensor.

Parameters:

Name Type Description Default
tgt Tensor

The target tensor of shape (batch_size, tgt_seq_length, d_model_tgt).

required
memory Tensor

The memory tensor of shape (batch_size, src_seq_length, d_model).

required

Returns:

Type Description
Tensor

The decoded tensor of shape (batch_size, tgt_seq_length, d_model).

Source code in src/rydberggpt/models/transformer/models.py
def decode(
    self, tgt: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor
) -> torch.Tensor:
    """
    Decode the target tensor using the memory tensor.

    Args:
        tgt (torch.Tensor): The target tensor of shape (batch_size, tgt_seq_length, d_model_tgt).
        memory (torch.Tensor): The memory tensor of shape (batch_size, src_seq_length, d_model).

    Returns:
        (torch.Tensor): The decoded tensor of shape (batch_size, tgt_seq_length, d_model).
    """
    return self.decoder(self.tgt_embed(tgt), memory, batch_mask=batch_mask)
encode(src: torch.Tensor) -> torch.Tensor

Encode the source tensor.

Parameters:

Name Type Description Default
src Tensor

The source tensor of shape (batch_size, src_seq_length, d_model_src).

required

Returns:

Type Description
Tensor

The encoded tensor of shape (batch_size, src_seq_length, d_model_tgt).

Source code in src/rydberggpt/models/transformer/models.py
def encode(self, src: torch.Tensor) -> torch.Tensor:
    """
    Encode the source tensor.

    Args:
        src (torch.Tensor): The source tensor of shape (batch_size, src_seq_length, d_model_src).

    Returns:
        (torch.Tensor): The encoded tensor of shape (batch_size, src_seq_length, d_model_tgt).
    """

    x, batch_mask = self.src_embed(src)

    return self.encoder(x, batch_mask=batch_mask), batch_mask
forward(tgt: torch.Tensor, src: torch.Tensor) -> torch.Tensor

Take in and process masked src and target sequences.

Parameters:

Name Type Description Default
tgt Tensor

The target tensor of shape (batch_size, tgt_seq_length, d_model_tgt).

required
src Tensor

The source tensor of shape (batch_size, src_seq_length, d_model_src).

required

Returns:

Type Description
Tensor

The output tensor after passing through the encoder-decoder architecture, with shape (batch_size, tgt_seq_length, d_model).

Source code in src/rydberggpt/models/transformer/models.py
def forward(self, tgt: torch.Tensor, src: torch.Tensor) -> torch.Tensor:
    """
    Take in and process masked src and target sequences.

    Args:
        tgt (torch.Tensor): The target tensor of shape (batch_size, tgt_seq_length, d_model_tgt).
        src (torch.Tensor): The source tensor of shape (batch_size, src_seq_length, d_model_src).

    Returns:
        (torch.Tensor): The output tensor after passing through the encoder-decoder architecture,
                      with shape (batch_size, tgt_seq_length, d_model).
    """

    memory, batch_mask = self.encode(src)

    return self.decode(tgt, memory, batch_mask)

Generator

Bases: Module

Linear + softmax layer for generation step. vocab_size for Rydberg is 2.

Source code in src/rydberggpt/models/transformer/models.py
class Generator(nn.Module):
    """
    Linear + softmax layer for generation step. vocab_size for Rydberg is 2.
    """

    def __init__(self, d_model: int, vocab_size: int):
        """
        Initialize the Generator class.

        Args:
            d_model (int): The dimension of the input features (i.e., the last dimension of the input tensor).
            vocab_size (int): The size of the vocabulary, which determines the last dimension of the output tensor.
        """
        super(Generator, self).__init__()
        self.proj = nn.Linear(d_model, vocab_size)  # [batch_size, seq_len, vocab_size]

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Compute the forward pass of the Generator.

        Args:
            x (torch.Tensor): The input tensor of shape (batch_size, seq_length, d_model).

        Returns:
            (torch.Tensor): The output tensor of shape (batch_size, seq_length, vocab_size),
                          with log-softmax applied along the last dimension.
        """

        proj_offset = self.proj(x) + 1e-10
        return F.log_softmax(proj_offset, dim=-1)  # [batch_size, seq_len, vocab_size]
__init__(d_model: int, vocab_size: int)

Initialize the Generator class.

Parameters:

Name Type Description Default
d_model int

The dimension of the input features (i.e., the last dimension of the input tensor).

required
vocab_size int

The size of the vocabulary, which determines the last dimension of the output tensor.

required
Source code in src/rydberggpt/models/transformer/models.py
def __init__(self, d_model: int, vocab_size: int):
    """
    Initialize the Generator class.

    Args:
        d_model (int): The dimension of the input features (i.e., the last dimension of the input tensor).
        vocab_size (int): The size of the vocabulary, which determines the last dimension of the output tensor.
    """
    super(Generator, self).__init__()
    self.proj = nn.Linear(d_model, vocab_size)  # [batch_size, seq_len, vocab_size]
forward(x: torch.Tensor) -> torch.Tensor

Compute the forward pass of the Generator.

Parameters:

Name Type Description Default
x Tensor

The input tensor of shape (batch_size, seq_length, d_model).

required

Returns:

Type Description
Tensor

The output tensor of shape (batch_size, seq_length, vocab_size), with log-softmax applied along the last dimension.

Source code in src/rydberggpt/models/transformer/models.py
def forward(self, x: torch.Tensor) -> torch.Tensor:
    """
    Compute the forward pass of the Generator.

    Args:
        x (torch.Tensor): The input tensor of shape (batch_size, seq_length, d_model).

    Returns:
        (torch.Tensor): The output tensor of shape (batch_size, seq_length, vocab_size),
                      with log-softmax applied along the last dimension.
    """

    proj_offset = self.proj(x) + 1e-10
    return F.log_softmax(proj_offset, dim=-1)  # [batch_size, seq_len, vocab_size]

modules

Embeddings

Bases: Module

The embedding layer.

Parameters:

Name Type Description Default
d_model int

The embedding size.

required
vocab_size int

The vocabulary size.

required
Source code in src/rydberggpt/models/transformer/modules.py
class Embeddings(nn.Module):
    """
    The embedding layer.

    Args:
        d_model (int): The embedding size.
        vocab_size (int): The vocabulary size.
    """

    def __init__(self, d_model: int, vocab_size: int):
        super(Embeddings, self).__init__()
        self.lut = nn.Linear(vocab_size, d_model)
        self.d_model = d_model

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Compute the forward pass through the module.

        Parameters:
            x (torch.Tensor): The input tensor.

        Returns:
            (torch.Tensor): The output tensor.
        """
        x = self.lut(x) * math.sqrt(self.d_model)
        return x
forward(x: torch.Tensor) -> torch.Tensor

Compute the forward pass through the module.

Parameters:

Name Type Description Default
x Tensor

The input tensor.

required

Returns:

Type Description
Tensor

The output tensor.

Source code in src/rydberggpt/models/transformer/modules.py
def forward(self, x: torch.Tensor) -> torch.Tensor:
    """
    Compute the forward pass through the module.

    Parameters:
        x (torch.Tensor): The input tensor.

    Returns:
        (torch.Tensor): The output tensor.
    """
    x = self.lut(x) * math.sqrt(self.d_model)
    return x

PositionalEncoding

Bases: Module

Implement the PE function.

Source code in src/rydberggpt/models/transformer/modules.py
class PositionalEncoding(nn.Module):
    "Implement the PE function."

    def __init__(self, d_model, dropout, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        # Compute the positional encodings once in log space.
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, d_model, 2) * -(math.log(10000.0) / d_model)
        )
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer("pe", pe)

    def forward(self, x):
        x = x + self.pe[:, : x.size(1)].requires_grad_(False)
        return self.dropout(x)

PositionwiseFeedForward

Bases: Module

A two-layer feed-forward network.

Parameters:

Name Type Description Default
d_model int

The input size.

required
d_ff int

The hidden size.

required
dropout float

The dropout rate. Defaults to 0.1.

0.1
Source code in src/rydberggpt/models/transformer/modules.py
class PositionwiseFeedForward(nn.Module):
    """
    A two-layer feed-forward network.

    Args:
        d_model (int): The input size.
        d_ff (int): The hidden size.
        dropout (float, optional): The dropout rate. Defaults to 0.1.
    """

    def __init__(self, d_model: int, d_ff: int, dropout: float = 0.1):
        super(PositionwiseFeedForward, self).__init__()
        self.w_1 = nn.Linear(d_model, d_ff)
        self.w_2 = nn.Linear(d_ff, d_model)
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Compute the forward pass through the module.

        Args:
            x (torch.Tensor): The input tensor.

        Returns:
            (torch.Tensor): The output tensor.
        """
        return self.w_2(self.dropout(F.relu(self.w_1(x))))
forward(x: torch.Tensor) -> torch.Tensor

Compute the forward pass through the module.

Parameters:

Name Type Description Default
x Tensor

The input tensor.

required

Returns:

Type Description
Tensor

The output tensor.

Source code in src/rydberggpt/models/transformer/modules.py
def forward(self, x: torch.Tensor) -> torch.Tensor:
    """
    Compute the forward pass through the module.

    Args:
        x (torch.Tensor): The input tensor.

    Returns:
        (torch.Tensor): The output tensor.
    """
    return self.w_2(self.dropout(F.relu(self.w_1(x))))

SublayerConnection

Bases: Module

This module implements a residual connection followed by a layer norm.

Parameters:

Name Type Description Default
size int

The input size.

required
dropout float

The dropout rate.

required
Source code in src/rydberggpt/models/transformer/modules.py
class SublayerConnection(nn.Module):
    """
    This module implements a residual connection followed by a layer norm.

    Args:
        size (int): The input size.
        dropout (float): The dropout rate.
    """

    def __init__(self, size: int, dropout: float):
        super(SublayerConnection, self).__init__()
        self.layer_norm = nn.LayerNorm(size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x: torch.Tensor, sublayer: nn.Module) -> torch.Tensor:
        """
        Compute the forward pass through the module.

        Args:
            x (torch.Tensor): The input tensor.
            sublayer (nn.Module): The sublayer module.

        Returns:
            (torch.Tensor): The output tensor.
        """
        # NOTE For GPT2 the authors moved Layer normalization (Ba et al., 2016)
        # to the input of each sub-block.
        # see Sec. 2.3 https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf
        return x + self.dropout(sublayer(self.layer_norm(x)))
forward(x: torch.Tensor, sublayer: nn.Module) -> torch.Tensor

Compute the forward pass through the module.

Parameters:

Name Type Description Default
x Tensor

The input tensor.

required
sublayer Module

The sublayer module.

required

Returns:

Type Description
Tensor

The output tensor.

Source code in src/rydberggpt/models/transformer/modules.py
def forward(self, x: torch.Tensor, sublayer: nn.Module) -> torch.Tensor:
    """
    Compute the forward pass through the module.

    Args:
        x (torch.Tensor): The input tensor.
        sublayer (nn.Module): The sublayer module.

    Returns:
        (torch.Tensor): The output tensor.
    """
    # NOTE For GPT2 the authors moved Layer normalization (Ba et al., 2016)
    # to the input of each sub-block.
    # see Sec. 2.3 https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf
    return x + self.dropout(sublayer(self.layer_norm(x)))

utils

clones(module: nn.Module, n_clones: int)

helper function which produces n_clones copies of a layer

Source code in src/rydberggpt/models/transformer/utils.py
7
8
9
def clones(module: nn.Module, n_clones: int):
    """helper function which produces n_clones copies of a layer"""
    return nn.ModuleList([copy.deepcopy(module) for _ in range(n_clones)])

flattened_snake_flip(x: torch.Tensor, Lx: int, Ly: int) -> torch.Tensor

Implements a "snake" flip which reorders the flattened 2D tensor into snake order.

Parameters:

Name Type Description Default
x Tensor

The tensor to apply the snake flip to, dimensions should be [..., Ly * Lx].

required

Returns:

Type Description
Tensor

The "snake" flipped tensor, dimensions will be [..., Ly * Lx].

Source code in src/rydberggpt/models/transformer/utils.py
def flattened_snake_flip(x: torch.Tensor, Lx: int, Ly: int) -> torch.Tensor:
    """
    Implements a "snake" flip which reorders the flattened 2D tensor into snake order.

    Args:
        x (torch.Tensor): The tensor to apply the snake flip to, dimensions should be [..., Ly * Lx].

    Returns:
        (torch.Tensor): The "snake" flipped tensor, dimensions will be [..., Ly * Lx].
    """
    return snake_flip(x.reshape(*x.shape[:-1], Ly, Lx)).reshape(*x.shape[:-1], -1)

snake_flip(x: torch.Tensor) -> torch.Tensor

Implements a "snake" flip which reorders the 2D tensor into snake order when flattened.

Parameters:

Name Type Description Default
x Tensor

The tensor to apply the snake flip to, dimensions should be [..., Ly, Lx].

required

Returns:

Type Description
Tensor

The "snake" flipped tensor, dimensions will be [..., Ly, Lx].

Source code in src/rydberggpt/models/transformer/utils.py
def snake_flip(x: torch.Tensor) -> torch.Tensor:
    """
    Implements a "snake" flip which reorders the 2D tensor into snake order when flattened.

    Args:
        x (torch.Tensor): The tensor to apply the snake flip to, dimensions should be [..., Ly, Lx].

    Returns:
        (torch.Tensor): The "snake" flipped tensor, dimensions will be [..., Ly, Lx].
    """

    if not isinstance(x, torch.Tensor):
        raise TypeError("Function only supports torch.Tensor")

    y = x.clone()

    for i in range(y.shape[-2]):
        if i % 2 == 1:
            y[..., i, :] = torch.flip(y[..., i, :], dims=(-1,))

    return y