Skip to content

Architectures¶

Common¶

class NeuralNet(nn.Module):
    def __init__(self, init_data, hidden_layers):
        # init network architecture
        pass
    def forward(self, x):
        return self.network(self.reshape(x)).squeeze()

    # Helper Functions
    def predict_logits(self, X):
        return self.forward(X)
    def predict_proba(self, X):
        return torch.softmax(self.predict_logits(X), dim=0)
    def predict_from_proba(self, proba)
        return proba.argmax(axis=1)
    def predict(self, X):
        return self.predict_from_proba(self.predict_proba(X))

One-vs-Rest Classifier¶

class NeuralNet(nn.Module):
    def __init__(self, init_data, hidden_layers):
        super().__init__()

        for x, y in DataLoader(init_data):
            self.input_size = x.shape[-1]
            self.output_size = y.shape[-1]
            break

        output_layer = nn.LazyLinear(self.output_size) # output layer

        layers = (
            # [input_layer] +
            hidden_layers +
            [output_layer]
        )

        self.network = nn.Sequential(
            *layers
        )

        # init lazy layers
        self.forward(x)

    def reshape(self, x):
        # batch_size, no_of_channels, width, height
        return x.view(x.shape[0], 1, x.shape[1], x.shape[2])

    def forward(self, x):
        return self.network(self.reshape(x)).squeeze()

One vs Rest with \(k-1\) Classifiers¶

Advantage: Will save compute if lots of neurons in pre-output layer, which are connected to output layer
Disadvantage: Looks confusing

class NeuralNet(nn.Module):
    def __init__(self, init_data, hidden_layers):
        super().__init__()

        for x, y in DataLoader(init_data):
            self.input_size = x.shape[-1]
            self.output_size = y.shape[-1]
            break

        output_layer = nn.LazyLinear(self.output_size - 1) # output layer

        layers = (
            # [input_layer] +
            hidden_layers +
            [output_layer]
        )

        self.network = nn.Sequential(
            *layers
        )

        # init lazy layers
        self.forward(x)

    def reshape(self, x):
        # batch_size, no_of_channels, width, height
        return x.view(x.shape[0], 1, x.shape[1], x.shape[2])

    def forward(self, x):
        logits_except_last = self.network(self.reshape(x)).squeeze()
        logit_last = torch.log(1 - torch.exp(logits_except_last).sum())

        logits = (logits_except_last, logit_last.view(-1))
        return logits

Testing logic

# Given logits for the first two classes
probs_except_last = torch.tensor([0.1, 0.2])
logits_except_last = probs_except_last.log()

# Compute the logit for the last class
logit_last = torch.log(1 - torch.exp(logits_except_last).sum())

# Combine all logits
logits = torch.cat((logits_except_last, logit_last.view(-1)))

# Compute softmax probabilities
probabilities = torch.softmax(logits, dim=0)

# Verify that probabilities sum to 1
print(f"{probs_except_last = }")
print(f"{logits_except_last = }")
print()
print(f"{probabilities = }")
print(f"{logits = }")

probs_except_last = tensor([0.1000, 0.2000])
logits_except_last = tensor([-2.3026, -1.6094])

probabilities = tensor([0.1000, 0.2000, 0.7000])
logits = tensor([-2.3026, -1.6094, -0.3567])

Last Updated: 2024-12-26 ; Contributors: AhmedThahir, web-flow

Comments