# Labeling and Modeling DICOM images using Amazon SageMaker

This notebook walks through the output of from SageMaker GroundTruth job of labeling and annotating DICOM images. The output manifest file of DICOM labeling job contains reference to labels and annotation performed on DICOM images and saved in S3 bucket. This notebook demonstrates using output manifest file from SageMaker GroundTruth job and building a model. 

## Get the output manifest file

In [None]:
import boto3
import os

JOBNAME = '' #Replace it with the labeling job name
REGION = '' #Replace it with the job region
client = boto3.client('sagemaker',region_name=REGION)
response = client.describe_labeling_job(LabelingJobName= JOBNAME)
file = response['LabelingJobOutput']['OutputDatasetS3Uri']
output_manifest = os.path.basename(file)
!aws s3 cp $file ./

## Read manifest file

In [None]:
import pandas as pd
import numpy as np
import json

extracted_labels = []

output_manifest = pd.read_json("output-manifest.manifest", lines=True)
def extract_label(x):
 labels = x['dicom-label-job']['annotationsFromAllWorkers'][0]['annotationData']['content']['labels']['label']
 extracted_labels.append(labels) 

In [None]:
output_manifest.apply(extract_label, axis=1)

In [None]:
output_manifest['label'] = extracted_labels

In [None]:
df = output_manifest.drop(['labels', 'dicom-label-job', 'dicom-label-job-metadata'], axis=1)

## Load DICOM images

In [None]:
import boto3
import pydicom
from pydicom.filebase import DicomBytesIO
from pydicom.filereader import dcmread
from PIL import Image

s3 = boto3.client('s3')
images = []

def get_dicom_image(x):
 bucket = x['source-ref'].split("/")[2]
 prefixes = x['source-ref'].split("/")
 for i in range(3):
 prefixes.pop(0)
 key = "/".join(prefixes)
 fileobj = s3.get_object(Bucket=bucket, Key=key)
 dicom_data = fileobj['Body'].read()
 dicom_bytes = DicomBytesIO(dicom_data)
 ds = dcmread(dicom_bytes)
 img = Image.fromarray(ds.pixel_array)
 images.append(img)

In [None]:
df.apply(get_dicom_image, axis=1)

In [None]:
resized_images = []
def imresize(arr, size, resample):
 resized_images.append(np.array(Image.fromarray(arr).resize(size, resample)))

In [None]:
for img in images:
 imresize(np.array(img), (224, 224), resample = Image.NEAREST)

In [None]:
resized_images = np.array(resized_images).reshape((-1, 224, 224, 1))

## Train model using GroundTruth Labels

In [None]:
X = np.array(resized_images)
y = df['label']

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import class_weight
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
cls_weight = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)

In [None]:
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
 featurewise_center=False, 
 samplewise_center=False, 
 featurewise_std_normalization=False,
 samplewise_std_normalization=False, 
 zca_whitening=False, 
 rotation_range=10, 
 zoom_range = 0.1, 
 width_shift_range=0.1, 
 height_shift_range=0.1,
 horizontal_flip=False, 
 vertical_flip=False) 

datagen.fit(x_train)

In [None]:
from keras.utils.np_utils import to_categorical 
y_train = to_categorical(y_train, num_classes=14)
y_test = to_categorical(y_test, num_classes=14)

In [None]:
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras import backend as K

model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Conv2D(32, kernel_size=(5, 5), activation='relu', input_shape=(224,224,1)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.40))
model.add(tf.keras.layers.Conv2D(64, (5, 5), activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2)))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dropout(0.40))
model.add(tf.keras.layers.Dense(14, activation='softmax'))

In [None]:

optimizer = tf.keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])

In [None]:
from keras.callbacks import ReduceLROnPlateau

learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', 
 patience=10, 
 verbose=1, 
 factor=0.5, 
 min_lr=0.001)

In [None]:
model.compile(optimizer='adam',
 loss='categorical_crossentropy',
 metrics=['accuracy'])

In [None]:
epochs = 50
batch_size = 10
history = model.fit_generator(datagen.flow(x_train,y_train, batch_size=10),
 epochs = epochs, validation_data = (x_test, y_test),
 verbose = 1, class_weight = cls_weight,steps_per_epoch=x_train.shape[0]/batch_size
 ,callbacks=[learning_rate_reduction])