Source code for classes.MyModel

import torch.nn as nn
import torch
import math
from typing import Callable

from modules.helper_ML_functions import find_device
from modules.helper_functions_tsp import cost_fn_tensor


[docs]
def estimate_cost_fn_gradient(my_input:torch.Tensor, 
                              output:torch.Tensor, 
                              cost_fn: Callable[[list], int]) -> torch.Tensor:
    """estimate the gradient of the cost function by 
    changing each bit in turn and calculating the difference in cost function.
    
    Parameters
    ----------
    my_input : torch.Tensor
        The input tensor
    output : torch.Tensor
        Contains a precalculated run of the cost function 
        for performance reasons
    cost_fn : Callable[[list], int]
        The cost function to be used

    Returns
    -------
    torch.Tensor
        The estimated gradient.
    """
    device = find_device()
    gradient_est = torch.zeros_like(my_input)  # Initialize with zeros for clarity

    dim0 = my_input.size(0)
    dim1 = my_input.size(1)

    my_input_clone = my_input.clone()  # Clone once, modify in-place
    for i in range(dim0): 
        for j in range(dim1):
            old_bit = my_input[i,j]
            sign = 2 * old_bit - 1  # Convert to -1 or 1
            my_input_clone[i,j] = 1 - old_bit
            new_output = cost_fn_tensor(my_input_clone[[i]], cost_fn).to(device)
            gradient_est[i, j] = (output[i] - new_output) / sign
            my_input_clone[i, j] = old_bit 
    return gradient_est



[docs]
class CostFunction(torch.autograd.Function):
    """A custom autograd function to calculate the cost function and estimate the gradient"""

[docs]
    @staticmethod
    def forward(ctx, input, cost_fn):
        # Save the gradient for the backward pass
        device = find_device()
        output = cost_fn_tensor(input, cost_fn).to(device)
        gradient_est = estimate_cost_fn_gradient(input, output, cost_fn)
        ctx.grad = gradient_est
        return output



[docs]
    @staticmethod
    def backward(ctx, grad_output):
        # Read the gradient from the forward pass
        grad_cost_fn = None
        return ctx.grad, grad_cost_fn


    

[docs]
class MySine(nn.Module):
    """Returns a sine function symmetric about 0.5"""
    def __init__(self):
        super(MySine, self).__init__()  # Initialize parent class
        self.register_buffer("pi", torch.tensor(math.pi))


[docs]
    def forward(self, x):
        #rads = ((x - 0.5) * torch.tensor(math.pi))
        #return 0.5 * (1 + torch.sin(rads)) 
        return 0.5 * (1 + torch.sin((x - 0.5) * self.pi))


    

[docs]
class Sample_Binary(nn.Module):
    """Return probability in forward, linear backwards"""
    def __init__(self):
        super(Sample_Binary, self).__init__()  # Initialize parent class


[docs]
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        sampled = (torch.rand_like(x) < x).float()  # 0/1 values as float
        output = sampled.to(torch.int)
        return x + (output - x).detach()




[docs]
class BinaryToCost(nn.Module):
    """Convert a bit string to a cost in forwards, estimate gradient backwards"""
    def __init__(self, cost_fn:Callable[[list], int]):
        super(BinaryToCost, self).__init__() # Intialize parent class
        self.cost_fn = cost_fn
    

[docs]
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """calculate cost forwards"""
        x = CostFunction.apply(x, self.cost_fn)
        return x




[docs]
class MyModel(nn.Module):
    """A simple feedforward neural network model for TSP"""
    def __init__(self, sdl, cost_fn:Callable[[list], int]):
        """Initialize the model"""
        super(MyModel, self).__init__()
        self.bits = sdl.qubits
        self.layers = sdl.layers
        self.std_dev = sdl.std_dev
        self.cost_fn = cost_fn
        self.hot_start = sdl.hot_start
        self.mode = sdl.mode
        self.gradient_type = sdl.gradient_type
        if self.mode in [8, 9]:
            self.activation = MySine()
        elif self.mode in [18, 19]:
            self.activation = nn.Sigmoid()
        else:
            raise Exception(f'Mode {self.mode} is not supported')
        self._build_layers()

    def _init_weights(self, fc, first_layer:bool=False):
        """Helper: initialize weights depending on hot_start mode"""
        with torch.no_grad():
            if not self.hot_start:
                # Xavier initialization for sigmoid, SIREN for sine
                if self.gradient_type in ['SGD+X', 'Adam+X']:
                    if isinstance(self.activation, nn.Sigmoid):
                        gain = nn.init.calculate_gain("sigmoid")
                        nn.init.xavier_uniform_(fc.weight, gain=gain)
                        if fc.bias is not None:
                            nn.init.zeros_(fc.bias)
                    elif isinstance(self.activation, MySine):
                        # Sitzmann et al. 2020 (SIREN)
                        if first_layer:
                            # First layer: uniform(-1/num_inputs, 1/num_inputs)
                            fc.weight.uniform_(-1 / self.bits, 1 / self.bits)
                            if fc.bias is not None:
                                fc.bias.zero_()
                        else:
                            # Hidden layers: uniform(-sqrt(6 / num_inputs)/ω, sqrt(6 / num_inputs)/ω)
                            # Default ω₀ = 30 in the SIREN paper
                            w0 = 30.0
                            bound = (6 / self.bits) ** 0.5 / w0
                            fc.weight.uniform_(-bound, bound)
                            if fc.bias is not None:
                                fc.bias.uniform_(-bound, bound)
                    else:
                        raise Exception('Activation function {self.activation} not supported for SGD+X')

            else:
                fc.weight.copy_(torch.eye(self.bits))
                fc.weight.add_(torch.normal(mean=0.0, std=self.std_dev, size=fc.weight.shape))
                fc.bias.copy_(torch.normal(mean=0.0, std=self.std_dev, size=fc.bias.shape))

    def _build_layers(self):
        """Create layers fc1..fcN and act1..actN."""

        for i in range(1, self.layers + 1):
            fc = nn.Linear(in_features=self.bits, out_features=self.bits)
            self._init_weights(fc, first_layer=(i == 1))
            setattr(self, f"fc{i}", fc)
            setattr(self, f"act{i}", self.activation)
        self.sample = Sample_Binary()
        self.cost = BinaryToCost(self.cost_fn)


[docs]
    def forward(self, x):
        """Define the forward pass"""
        for i in range(1, self.layers + 1):
            #iterate through the layers and create a forward pass
            fc = getattr(self, f'fc{i}')
            act = getattr(self, f'act{i}')
            x = fc(x)
            x = act(x)
        x = self.sample(x)
        x = self.cost(x)
        return(x)