This notebook tests the output of the bioimage-search image-preprocessing service.
It assumes the "standard" organization of both the BBBC-021 dataset and the test output.

In [None]:
!pip install shortuuid

In [None]:
!pip install s3fs

In [None]:
!pwd

In [None]:
import boto3
import sys
import pandas as pd
import numpy as np
import s3fs
# path setup for SageMaker Studio
sys.path.insert(0, "../../../../bioimage-search/datasets/bbbc-021/scripts")
import bbbc021common as bb
sys.path.insert(0, "../../../../bioimage-search/main/src/common")
import bioimageimage as bi
from PIL import Image
from skimage.exposure import histogram
import matplotlib.pyplot as plt
import math

In [None]:
s3c = boto3.client('s3')

In [None]:
s3f = s3fs.S3FileSystem(anon=False)

In [None]:
BBBC021_SOURCE_BUCKET='bioimagesearchbbbc021stack-bbbc021bucket544c3e64-10ecnwo51127'
BIOIMAGE_SEARCH_TEST_BUCKET='bioimagesearchbasestack-bioimagesearchtestbucket3-djdwcbvul5zb'

In [None]:
plateName = 'Week10_40111'

In [None]:
Week10_40111_Info = bb.Bbbc021PlateInfo(BBBC021_SOURCE_BUCKET, plateName)

In [None]:
dapiFileList = Week10_40111_Info.getDapiFileList()

We use the DAPI channel as the index key for the full set of channels

In [None]:
testImageKey = dapiFileList[0]

In [None]:
testImageKey

Load generate MIP of source images

In [None]:
def getBbbcImageFilesByDapiKey(dapiImageKey, plateInfo):
 fileKeys={}
 fileKeys['dapi']=dapiImageKey
 fileKeys['actin']=plateInfo.getActinFileByDapi(dapiImageKey)
 fileKeys['tubulin']=plateInfo.getTubulinFileByDapi(dapiImageKey)
 return fileKeys

In [None]:
t1 = getBbbcImageFilesByDapiKey(testImageKey, Week10_40111_Info)

In [None]:
t1

In [None]:
def getBbbcMipByDapiKey(dapiImageKey, bucket, plateInfo):
 input_arr=[]
 fileKeys = getBbbcImageFilesByDapiKey(dapiImageKey, plateInfo)
 fileArr = []
 fileArr.append(fileKeys['dapi'])
 fileArr.append(fileKeys['tubulin'])
 fileArr.append(fileKeys['actin'])
 for key in fileArr:
 fileObject = s3c.get_object(Bucket=bucket, Key=key)
 file_stream = fileObject['Body']
 im = Image.open(file_stream)
 pix = np.array(im)
 input_arr.append(pix)
 input_data = np.array(input_arr)
 input_data = bi.normImageData(input_data)

 bavgFill = np.zeros(shape=input_data[0].shape, dtype=input_data.dtype)
 for c in range(input_data.shape[0]):
 channelData = input_data[c]
 h1 = histogram(channelData, 100)
 bcut = bi.findHistCutoff(h1, 0.20)
 bavg = bi.findCutoffAvg(channelData, bcut)
 bavgFill.fill(bavg)
 bi.normalizeChannel(bavgFill, channelData)
 
 ca = bi.getColors(input_data.shape[0])
 mip = bi.calcMip(input_data, ca)
 return mip

In [None]:
mip = getBbbcMipByDapiKey(testImageKey, BBBC021_SOURCE_BUCKET, Week10_40111_Info)

In [None]:
mip.shape

In [None]:
def showNdFloatArrImage(ndarr):
 plt.figure(figsize=(20,15))
 plt.subplot(1,1,1)
 plt.xticks([])
 plt.yticks([])
 im=Image.fromarray(ndarr)
 print("shape=", im.size)
 print("format=", im.format)
 ip=plt.imshow(im)
 #plt.xlabel(0)
 plt.show()

In [None]:
showNdFloatArrImage(mip)

Next, we load the ROIs and show outlines:

ROI compute:

 /ROI//-roi.npy (contains normalized multichannel ROI data ready for training)
 /ROI//-roi.json (contains list of ROI coordinates wrt raw image, ordered wrt the npy file)

In [None]:
def loadROIDataByDapiKey(dapiImageKey, bucket):
 roiKeyPrefix = 'ROI/' + dapiImageKey[:-4] + '-roi'
 roiDataKey = roiKeyPrefix + '.npy'
 roiJsonKey = roiKeyPrefix + '.json'
 roiData = bi.getNumpyArrayFromS3(bucket, roiDataKey)
 roiInfo = bi.loadJsonObjectFromS3(roiJsonKey, bucket)
 return roiData, roiInfo

In [None]:
roiData, roiInfo = loadROIDataByDapiKey(testImageKey, BIOIMAGE_SEARCH_TEST_BUCKET)

In [None]:
roiInfo

In [None]:
def roiAnnotate2DDataXY(data, roiInfo, color, index=-1):
 roiSize = roiInfo['roisize']
 roiX=roiSize['x']
 roiY=roiSize['y']
 roiArr = roiInfo['roi']
 i=0
 for roi in roiArr:
 if index<0 or i==index:
 x=roi['x']
 y=roi['y']
 for xi in range(roiX):
 data[y][x+xi]=color
 data[y+roiY][x+xi]=color
 for yi in range(roiY):
 data[y+yi][x]=color
 data[y+yi][x+roiX]=color
 i+=1

In [None]:
mip2=np.copy(mip)

In [None]:
roiAnnotate2DDataXY(mip2, roiInfo, [255.0, 0.0, 0.0], 0)

In [None]:
showNdFloatArrImage(mip2)

In [None]:
def displayRoiImages(mip, roiInfo):
 plt.figure(figsize=(25,35))
 roiSize = roiInfo['roisize']
 roiX=roiSize['x']
 roiY=roiSize['y']
 roiArr = roiInfo['roi']
 i=0
 l =len(roiArr)
 spx=10
 spy=math.ceil(l/spx)
 print("Count=", l)
 for roi in roiArr:
 x0=roi['x']
 y0=roi['y']
 x1 = x0 + roiX
 y1 = y0 + roiY
 mipData=mip[y0:y1,x0:x1]
 im=Image.fromarray(mipData)
 plt.subplot(spy,spx,i+1)
 plt.xticks([])
 plt.yticks([])
 plt.imshow(im)
 plt.xlabel(i)
 i+=1

In [None]:
displayRoiImages(mip, roiInfo)

In [None]:
def displayRoiChannelImages(mip, roiInfo, roiData):
 #plt.figure(figsize=(25,600))
 roiSize = roiInfo['roisize']
 roiX=roiSize['x']
 roiY=roiSize['y']
 roiArr = roiInfo['roi']
 i=0
 #s=0
 #l =len(roiArr)
 w = roiData[0].shape[0] + 1
 spx=w
 #spy=l
 for roi in roiArr:
 s=0
 plt.figure(figsize=(25,35))
 l = 1
 spy = l
 # Mip
 x0=roi['x']
 y0=roi['y']
 x1 = x0 + roiX
 y1 = y0 + roiY
 mipData=mip[y0:y1,x0:x1]
 im=Image.fromarray(mipData)
 plt.subplot(spy,spx,s+1)
 plt.xticks([])
 plt.yticks([])
 plt.imshow(im)
 plt.xlabel(i)
 s+=1
 chanData=roiData[i]
 for c in range(chanData.shape[0]):
 chan=chanData[c]
 mc = bi.calcMip(chan, bi.colors)
 ci=Image.fromarray(mc)
 plt.subplot(spy,spx,s+1)
 plt.xticks([])
 plt.yticks([])
 plt.imshow(ci)
 plt.xlabel(i)
 s+=1
 i+=1

In [None]:
displayRoiChannelImages(mip, roiInfo, roiData)