#!/usr/bin/env python import argparse import dcmstack from glob import glob import pydicom import nibabel as nib import numpy as np import sys import os import json import time import logging from nilearn import plotting import matplotlib.pyplot as plt import radiomics_utils as utils logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--subject', type=str, default='R01-003', help='Subject ID (default: R01-003)') parser.add_argument('--feature_store_name', type=str, default='nsclc-radiogenomics-imaging-feature-group', help='SageMaker Feature Store Group Name (default: nsclc-radiogenomics-imaging-feature-group)') parser.add_argument('--offline_store_s3uri', type=str, help='SageMaker Feature Offline Store S3 URI Example: s3://multimodal-image-data-processed/nsclc-radiogenomics-multimodal-imaging-featurestore.') args = parser.parse_args() data_dir = '/opt/ml/processing/input/' output_dir = '/opt/ml/processing/output/' # assume one subject comes in, # we need to find out where the CT dicom files are # and segmentation file logger.info('Locating DICOM files') jsons = glob(os.path.join(data_dir, '*', '*', '*.json')) logger.info('%d jsons found.' % len(jsons)) print('%d jsons found.' % len(jsons)) valid_jsons = [] file_info = [] seg_string = ['3D Slicer segmentation result', 'ePAD Generated DSO'] for i, jsonfile in enumerate(jsons): with open(jsonfile) as f: aa = json.load(f) if seg_string[0].lower() in aa['Total'][-1].lower() or seg_string[1].lower() in aa['Total'][-1].lower(): logger.debug(aa['Total'][-3:]) print(aa['Total'][-3:]) valid_jsons.append(jsonfile) file_info.append([aa['StudyUID'], aa['Date'], aa['SeriesUID'], jsonfile]) if len(valid_jsons) > 1: raise Exception('there are more than one segmentation for this patient') print(file_info) src_dcms = glob(os.path.join(data_dir, file_info[0][0], file_info[0][1], '*', '*dcm')) src_seg_dcm = [i for i in src_dcms if file_info[0][2] in i] logging.info(src_seg_dcm) src_dcms.remove(src_seg_dcm[0]) #print(src_dcms) print('# of src_dcms: %d' % len(src_dcms)) print('# of src_seg_dcm: %d' % len(src_seg_dcm)) # work with CT scan and load as a Nifti image logger.info('Creating nifti images from DICOM files') stacks = dcmstack.parse_and_stack(src_dcms) stack = list(stacks.values())[0] nii = stack.to_nifti() img = nii.get_fdata() # work with CT segmentation file, load as a numpy array and create a Nifti image dcm = pydicom.dcmread(src_seg_dcm[0]) n_frames_seg = int(dcm.NumberOfFrames) seg = dcm.pixel_array # reorient the seg array seg = np.fliplr(seg.T) # if seg and img don't have the same dimension, pad the images if img.shape != seg.shape: # read all dicoms and parse out the instance number and slice location # assuming the files are from R01-098 onwards with ePAD Generated DSO d_sort_instance_number=[] for tmp_dcm_fname in src_dcms: tmp_dcm = pydicom.dcmread(tmp_dcm_fname) d_sort_instance_number.append((int(tmp_dcm[0x0020, 0x0013].value), tmp_dcm[0x0020, 0x0032].value)) d_sort_instance_number = sorted(d_sort_instance_number, key=lambda aa: aa[0]) patient_img_position_first = dcm[0x5200, 0x9230][0][0x0020, 0x9113][0]['ImagePositionPatient'].value patient_img_position_last = dcm[0x5200, 0x9230][-1][0x0020, 0x9113][0]['ImagePositionPatient'].value slice_instance_number_1 = [i for i, j in d_sort_instance_number if j == patient_img_position_first][0] slice_instance_number_2 = [i for i, j in d_sort_instance_number if j == patient_img_position_last][0] top_slice_instance_number = min(slice_instance_number_1, slice_instance_number_2) # logger.debug(np.nonzero(seg.sum(axis=1).sum(axis=1))[0]) tmp_seg = np.zeros_like(img) starting_index = img.shape[-1] - top_slice_instance_number - n_frames_seg # the seg and the image is flipped and need to locate from bottom. ending_index = starting_index + n_frames_seg tmp_seg[:, :, starting_index:ending_index] = seg seg = tmp_seg seg_nii = nib.Nifti1Image(seg, nii.affine, header = nii.header) # save some viz logger.info('Saving files.') prefix = '%s_%s' % (args.subject, file_info[0][1]) f1 = plt.figure(figsize=(16,6)) g1 = plotting.plot_roi(seg_nii, bg_img = nii, figure = f1, alpha = 0.4, title = 'Lung CT with segmentation') g1.savefig(os.path.join(output_dir, 'PNG', '%s_ortho-view.png' % prefix), dpi = 150) f2 = plt.figure(figsize=(16,6)) g2 = plotting.plot_roi(seg_nii, bg_img = nii, figure = f2, alpha = 0.4, title = 'Lung CT with segmentation', display_mode='z', cut_coords=4) g2.savefig(os.path.join(output_dir, 'PNG', '%s_z-view.png' % prefix), dpi = 150) # save images imageName = os.path.join(output_dir, 'CT-Nifti', '%s.nii.gz' % prefix) maskName = os.path.join(output_dir, 'CT-SEG', '%s.nii.gz' % prefix) nii.to_filename(imageName) seg_nii.to_filename(maskName) # compute radiomic features logging.info('Computing radiomic features') df = utils.compute_features(imageName, maskName) # format dataframe for feature store record_id_column = 'Subject' event_time_column = 'EventTime' df[record_id_column] = args.subject current_time_sec = float(round(time.time())) df[event_time_column] = current_time_sec df['ScanDate'] = file_info[0][1] utils.cast_object_to_string(df) # check if feature store exists feature_group = utils.check_feature_group(args.feature_store_name) if not feature_group: feature_group = utils.create_feature_group(args.feature_store_name, df, args.offline_store_s3uri, record_id = record_id_column, event_time = event_time_column, enable_online_store = True) # ingest features into a FeatureStore feature_group.ingest(data_frame=df, max_workers=1, wait=True) print('Processing done for %s' % prefix) logging.info('Processing done for %s' % prefix)