import torch import torch.nn as nn import torch.nn.functional as F class VGGish(nn.Module): """ PyTorch implementation of the VGGish model. Adapted from: https://github.com/harritaylor/torch-vggish The following modifications were made: (i) correction for the missing ReLU layers, (ii) correction for the improperly formatted data when transitioning from NHWC --> NCHW in the fully-connected layers, and (iii) correction for flattening in the fully-connected layers. """ def __init__(self, params): super(VGGish, self).__init__() self.features = nn.Sequential( nn.Conv2d(1, 64, 3, stride=1, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2), nn.Conv2d(64, 128, 3, stride=1, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2), nn.Conv2d(128, 256, 3, stride=1, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, 3, stride=1, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2), nn.Conv2d(256, 512, 3, stride=1, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, 3, stride=1, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2) ) self.fc = nn.Sequential( nn.Linear(512 * 31 * 4, 4096), nn.ReLU(inplace=True), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Linear(4096, 128), nn.ReLU(inplace=True) ) self.final_fc = nn.Linear(128, params.num_class, bias=True) def forward(self, x): x = self.features(x).permute(0, 2, 3, 1).contiguous() x = x.view(x.size(0), -1) x = self.fc(x) x = self.final_fc(x) x = torch.sigmoid(x) return x