{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook tests the output of the bioimage-search image-preprocessing service.\n", "It assumes the \"standard\" organization of both the BBBC-021 dataset and the test output." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install shortuuid" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install s3fs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pwd" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import boto3\n", "import sys\n", "import pandas as pd\n", "import numpy as np\n", "import s3fs\n", "# path setup for SageMaker Studio\n", "sys.path.insert(0, \"../../../../bioimage-search/datasets/bbbc-021/scripts\")\n", "import bbbc021common as bb\n", "sys.path.insert(0, \"../../../../bioimage-search/main/src/common\")\n", "import bioimageimage as bi\n", "from PIL import Image\n", "from skimage.exposure import histogram\n", "import matplotlib.pyplot as plt\n", "import math" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "s3c = boto3.client('s3')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "s3f = s3fs.S3FileSystem(anon=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "BBBC021_SOURCE_BUCKET='bioimagesearchbbbc021stack-bbbc021bucket544c3e64-10ecnwo51127'\n", "BIOIMAGE_SEARCH_TEST_BUCKET='bioimagesearchbasestack-bioimagesearchtestbucket3-djdwcbvul5zb'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plateName = 'Week10_40111'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "Week10_40111_Info = bb.Bbbc021PlateInfo(BBBC021_SOURCE_BUCKET, plateName)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dapiFileList = Week10_40111_Info.getDapiFileList()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We use the DAPI channel as the index key for the full set of channels" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "testImageKey = dapiFileList[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "testImageKey" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Load generate MIP of source images" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def getBbbcImageFilesByDapiKey(dapiImageKey, plateInfo):\n", " fileKeys={}\n", " fileKeys['dapi']=dapiImageKey\n", " fileKeys['actin']=plateInfo.getActinFileByDapi(dapiImageKey)\n", " fileKeys['tubulin']=plateInfo.getTubulinFileByDapi(dapiImageKey)\n", " return fileKeys" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "t1 = getBbbcImageFilesByDapiKey(testImageKey, Week10_40111_Info)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "t1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def getBbbcMipByDapiKey(dapiImageKey, bucket, plateInfo):\n", " input_arr=[]\n", " fileKeys = getBbbcImageFilesByDapiKey(dapiImageKey, plateInfo)\n", " fileArr = []\n", " fileArr.append(fileKeys['dapi'])\n", " fileArr.append(fileKeys['tubulin'])\n", " fileArr.append(fileKeys['actin'])\n", " for key in fileArr:\n", " fileObject = s3c.get_object(Bucket=bucket, Key=key)\n", " file_stream = fileObject['Body']\n", " im = Image.open(file_stream)\n", " pix = np.array(im)\n", " input_arr.append(pix)\n", " input_data = np.array(input_arr)\n", " input_data = bi.normImageData(input_data)\n", "\n", " bavgFill = np.zeros(shape=input_data[0].shape, dtype=input_data.dtype)\n", " for c in range(input_data.shape[0]):\n", " channelData = input_data[c]\n", " h1 = histogram(channelData, 100)\n", " bcut = bi.findHistCutoff(h1, 0.20)\n", " bavg = bi.findCutoffAvg(channelData, bcut)\n", " bavgFill.fill(bavg)\n", " bi.normalizeChannel(bavgFill, channelData)\n", " \n", " ca = bi.getColors(input_data.shape[0])\n", " mip = bi.calcMip(input_data, ca)\n", " return mip" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mip = getBbbcMipByDapiKey(testImageKey, BBBC021_SOURCE_BUCKET, Week10_40111_Info)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mip.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def showNdFloatArrImage(ndarr):\n", " plt.figure(figsize=(20,15))\n", " plt.subplot(1,1,1)\n", " plt.xticks([])\n", " plt.yticks([])\n", " im=Image.fromarray(ndarr)\n", " print(\"shape=\", im.size)\n", " print(\"format=\", im.format)\n", " ip=plt.imshow(im)\n", " #plt.xlabel(0)\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "showNdFloatArrImage(mip)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Next, we load the ROIs and show outlines:\n", "\n", "ROI compute:\n", "\n", " /ROI//-roi.npy (contains normalized multichannel ROI data ready for training)\n", " /ROI//-roi.json (contains list of ROI coordinates wrt raw image, ordered wrt the npy file)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def loadROIDataByDapiKey(dapiImageKey, bucket):\n", " roiKeyPrefix = 'ROI/' + dapiImageKey[:-4] + '-roi'\n", " roiDataKey = roiKeyPrefix + '.npy'\n", " roiJsonKey = roiKeyPrefix + '.json'\n", " roiData = bi.getNumpyArrayFromS3(bucket, roiDataKey)\n", " roiInfo = bi.loadJsonObjectFromS3(roiJsonKey, bucket)\n", " return roiData, roiInfo" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "roiData, roiInfo = loadROIDataByDapiKey(testImageKey, BIOIMAGE_SEARCH_TEST_BUCKET)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "roiInfo" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def roiAnnotate2DDataXY(data, roiInfo, color, index=-1):\n", " roiSize = roiInfo['roisize']\n", " roiX=roiSize['x']\n", " roiY=roiSize['y']\n", " roiArr = roiInfo['roi']\n", " i=0\n", " for roi in roiArr:\n", " if index<0 or i==index:\n", " x=roi['x']\n", " y=roi['y']\n", " for xi in range(roiX):\n", " data[y][x+xi]=color\n", " data[y+roiY][x+xi]=color\n", " for yi in range(roiY):\n", " data[y+yi][x]=color\n", " data[y+yi][x+roiX]=color\n", " i+=1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mip2=np.copy(mip)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "roiAnnotate2DDataXY(mip2, roiInfo, [255.0, 0.0, 0.0], 0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "showNdFloatArrImage(mip2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def displayRoiImages(mip, roiInfo):\n", " plt.figure(figsize=(25,35))\n", " roiSize = roiInfo['roisize']\n", " roiX=roiSize['x']\n", " roiY=roiSize['y']\n", " roiArr = roiInfo['roi']\n", " i=0\n", " l =len(roiArr)\n", " spx=10\n", " spy=math.ceil(l/spx)\n", " print(\"Count=\", l)\n", " for roi in roiArr:\n", " x0=roi['x']\n", " y0=roi['y']\n", " x1 = x0 + roiX\n", " y1 = y0 + roiY\n", " mipData=mip[y0:y1,x0:x1]\n", " im=Image.fromarray(mipData)\n", " plt.subplot(spy,spx,i+1)\n", " plt.xticks([])\n", " plt.yticks([])\n", " plt.imshow(im)\n", " plt.xlabel(i)\n", " i+=1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "displayRoiImages(mip, roiInfo)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def displayRoiChannelImages(mip, roiInfo, roiData):\n", " #plt.figure(figsize=(25,600))\n", " roiSize = roiInfo['roisize']\n", " roiX=roiSize['x']\n", " roiY=roiSize['y']\n", " roiArr = roiInfo['roi']\n", " i=0\n", " #s=0\n", " #l =len(roiArr)\n", " w = roiData[0].shape[0] + 1\n", " spx=w\n", " #spy=l\n", " for roi in roiArr:\n", " s=0\n", " plt.figure(figsize=(25,35))\n", " l = 1\n", " spy = l\n", " # Mip\n", " x0=roi['x']\n", " y0=roi['y']\n", " x1 = x0 + roiX\n", " y1 = y0 + roiY\n", " mipData=mip[y0:y1,x0:x1]\n", " im=Image.fromarray(mipData)\n", " plt.subplot(spy,spx,s+1)\n", " plt.xticks([])\n", " plt.yticks([])\n", " plt.imshow(im)\n", " plt.xlabel(i)\n", " s+=1\n", " chanData=roiData[i]\n", " for c in range(chanData.shape[0]):\n", " chan=chanData[c]\n", " mc = bi.calcMip(chan, bi.colors)\n", " ci=Image.fromarray(mc)\n", " plt.subplot(spy,spx,s+1)\n", " plt.xticks([])\n", " plt.yticks([])\n", " plt.imshow(ci)\n", " plt.xlabel(i)\n", " s+=1\n", " i+=1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "displayRoiChannelImages(mip, roiInfo, roiData)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "instance_type": "ml.t3.medium", "kernelspec": { "display_name": "Python 3 (Data Science)", "language": "python", "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }