import torch import torch.nn as nn import torch.nn.functional as F import torchaudio import boto3 import os from six import BytesIO import numpy as np s3 = boto3.client('s3') class NetM3(nn.Module): def __init__(self, num_classes=2): super(NetM3, self).__init__() self.conv1 = nn.Conv1d(1, 128, 80, 4) self.bn1 = nn.BatchNorm1d(128) self.pool1 = nn.MaxPool1d(4) self.conv2 = nn.Conv1d(128, 128, 3) self.bn2 = nn.BatchNorm1d(128) self.pool2 = nn.MaxPool1d(4) self.conv3 = nn.Conv1d(128, 256, 3) self.bn3 = nn.BatchNorm1d(256) self.pool3 = nn.MaxPool1d(4) self.conv4 = nn.Conv1d(256, 512, 3) self.bn4 = nn.BatchNorm1d(512) self.pool4 = nn.MaxPool1d(4) self.avgPool = nn.AvgPool1d(30) # input should be 512x30 so this outputs a 512x1 self.fc1 = nn.Linear(512, num_classes) def forward(self, x): x = self.conv1(x) x = F.relu(self.bn1(x)) x = self.pool1(x) x = self.conv2(x) x = F.relu(self.bn2(x)) x = self.pool2(x) x = self.conv3(x) x = F.relu(self.bn3(x)) x = self.pool3(x) x = self.conv4(x) x = F.relu(self.bn4(x)) x = self.pool4(x) x = self.avgPool(x) x = x.permute(0, 2, 1) # change the 512x1 to 1x512 x = self.fc1(x) return F.log_softmax(x, dim=2) # Output: torch.Size([N, 1, num_classes]) def model_fn(model_dir): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = NetM3() if torch.cuda.device_count() > 1: print("Gpu count: {}".format(torch.cuda.device_count())) model = nn.DataParallel(model) with open(os.path.join(model_dir, "model.pth"), "rb") as f: model.load_state_dict(torch.load(f)) return model.to(device) def input_fn(request_body, request_content_type): if request_content_type == 'text/csv': new_sr=8000 audio_len=20 sampling_ratio=5 tmp=request_body[5:] bucket=tmp[:tmp.index('/')] print("bucket: {}".format(bucket)) obj=tmp[tmp.index('/')+1:] print("object: {}".format(obj)) s3.download_file(bucket, obj, '/audioinput.wav') print("audio input file size: {}".format(os.path.getsize('/audioinput.wav'))) waveform, sample_rate = torchaudio.load('/audioinput.wav') waveform = torchaudio.transforms.Resample(sample_rate, new_sr)(waveform[0, :].view(1, -1)) const_len = new_sr * audio_len tempData = torch.zeros([1, const_len]) if waveform.shape[1] < const_len: tempData[0, : waveform.shape[1]] = waveform[:] else: tempData[0, :] = waveform[0, :const_len] sound = tempData tempData = torch.zeros([1, const_len]) if sound.shape[1] < const_len: tempData[0, : sound.shape[1]] = sound[:] else: tempData[0, :] = sound[0, :const_len] sound = tempData new_const_len = const_len // sampling_ratio soundFormatted = torch.zeros([1, 1, new_const_len]) soundFormatted[0, 0, :] = sound[0, ::5] return soundFormatted elif request_content_type in ['application/x-npy', 'application/python-pickle']: return torch.tensor(np.load(BytesIO(request_body), allow_pickle=True)) else: print("unknown request content type: {}".format(request_content_type)) return request_body def predict_fn(input_data, model): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model.to(device) model.eval() with torch.no_grad(): output = model(input_data.to(device)) output = output.permute(1, 0, 2)[0] pred_prob = np.exp( output.cpu().detach().numpy()[:,1] ) return pred_prob if __name__ == "__main__": model = NetM3() print("Test 1:") data = torch.ones([1, 1, 32000]) print("input:", data.shape) output = model(data) print("output:", output.shape) print("Test 2:") datapath = Path("/home/ec2-user/SageMaker/Coswara-Data") csvpath = datapath / "breathing-deep-metadata.csv" dataset = CoswareDataset( csv_path=csvpath, file_path=datapath, new_sr=8000, audio_len=20, sampling_ratio=5, ) # Check 1st samples data, label = dataset[0] data = data.unsqueeze(0) print("input:", data.shape) output = model(data) print("output:", output.shape)