{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebook tests the output of the bioimage-search image-preprocessing service.\n",
    "It assumes the \"standard\" organization of both the BBBC-021 dataset and the test output."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install shortuuid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install s3fs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pwd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import boto3\n",
    "import sys\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import s3fs\n",
    "# path setup for SageMaker Studio\n",
    "sys.path.insert(0, \"../../../../bioimage-search/datasets/bbbc-021/scripts\")\n",
    "import bbbc021common as bb\n",
    "sys.path.insert(0, \"../../../../bioimage-search/main/src/common\")\n",
    "import bioimageimage as bi\n",
    "from PIL import Image\n",
    "from skimage.exposure import histogram\n",
    "import matplotlib.pyplot as plt\n",
    "import math"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "s3c = boto3.client('s3')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "s3f = s3fs.S3FileSystem(anon=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "BBBC021_SOURCE_BUCKET='bioimagesearchbbbc021stack-bbbc021bucket544c3e64-10ecnwo51127'\n",
    "BIOIMAGE_SEARCH_TEST_BUCKET='bioimagesearchbasestack-bioimagesearchtestbucket3-djdwcbvul5zb'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plateName = 'Week10_40111'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Week10_40111_Info = bb.Bbbc021PlateInfo(BBBC021_SOURCE_BUCKET, plateName)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dapiFileList = Week10_40111_Info.getDapiFileList()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We use the DAPI channel as the index key for the full set of channels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "testImageKey = dapiFileList[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "testImageKey"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Load generate MIP of source images"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getBbbcImageFilesByDapiKey(dapiImageKey, plateInfo):\n",
    "    fileKeys={}\n",
    "    fileKeys['dapi']=dapiImageKey\n",
    "    fileKeys['actin']=plateInfo.getActinFileByDapi(dapiImageKey)\n",
    "    fileKeys['tubulin']=plateInfo.getTubulinFileByDapi(dapiImageKey)\n",
    "    return fileKeys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "t1 = getBbbcImageFilesByDapiKey(testImageKey, Week10_40111_Info)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "t1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getBbbcMipByDapiKey(dapiImageKey, bucket, plateInfo):\n",
    "    input_arr=[]\n",
    "    fileKeys = getBbbcImageFilesByDapiKey(dapiImageKey, plateInfo)\n",
    "    fileArr = []\n",
    "    fileArr.append(fileKeys['dapi'])\n",
    "    fileArr.append(fileKeys['tubulin'])\n",
    "    fileArr.append(fileKeys['actin'])\n",
    "    for key in fileArr:\n",
    "        fileObject = s3c.get_object(Bucket=bucket, Key=key)\n",
    "        file_stream = fileObject['Body']\n",
    "        im = Image.open(file_stream)\n",
    "        pix = np.array(im)\n",
    "        input_arr.append(pix)\n",
    "    input_data = np.array(input_arr)\n",
    "    input_data = bi.normImageData(input_data)\n",
    "\n",
    "    bavgFill = np.zeros(shape=input_data[0].shape, dtype=input_data.dtype)\n",
    "    for c in range(input_data.shape[0]):\n",
    "        channelData = input_data[c]\n",
    "        h1 = histogram(channelData, 100)\n",
    "        bcut = bi.findHistCutoff(h1, 0.20)\n",
    "        bavg = bi.findCutoffAvg(channelData, bcut)\n",
    "        bavgFill.fill(bavg)\n",
    "        bi.normalizeChannel(bavgFill, channelData)\n",
    "        \n",
    "    ca = bi.getColors(input_data.shape[0])\n",
    "    mip = bi.calcMip(input_data, ca)\n",
    "    return mip"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mip = getBbbcMipByDapiKey(testImageKey, BBBC021_SOURCE_BUCKET, Week10_40111_Info)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mip.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def showNdFloatArrImage(ndarr):\n",
    "    plt.figure(figsize=(20,15))\n",
    "    plt.subplot(1,1,1)\n",
    "    plt.xticks([])\n",
    "    plt.yticks([])\n",
    "    im=Image.fromarray(ndarr)\n",
    "    print(\"shape=\", im.size)\n",
    "    print(\"format=\", im.format)\n",
    "    ip=plt.imshow(im)\n",
    "    #plt.xlabel(0)\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "showNdFloatArrImage(mip)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Next, we load the ROIs and show outlines:\n",
    "\n",
    "ROI compute:\n",
    "\n",
    "    <output bucket>/ROI/<Plate>/<raw DAPI tif prefix>-roi.npy (contains normalized multichannel ROI data ready for training)\n",
    "    <output bucket>/ROI/<Plate>/<raw DAPI tif prefix>-roi.json (contains list of ROI coordinates wrt raw image, ordered wrt the npy file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def loadROIDataByDapiKey(dapiImageKey, bucket):\n",
    "    roiKeyPrefix = 'ROI/' + dapiImageKey[:-4] + '-roi'\n",
    "    roiDataKey = roiKeyPrefix + '.npy'\n",
    "    roiJsonKey = roiKeyPrefix + '.json'\n",
    "    roiData = bi.getNumpyArrayFromS3(bucket, roiDataKey)\n",
    "    roiInfo = bi.loadJsonObjectFromS3(roiJsonKey, bucket)\n",
    "    return roiData, roiInfo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "roiData, roiInfo = loadROIDataByDapiKey(testImageKey, BIOIMAGE_SEARCH_TEST_BUCKET)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "roiInfo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def roiAnnotate2DDataXY(data, roiInfo, color, index=-1):\n",
    "    roiSize = roiInfo['roisize']\n",
    "    roiX=roiSize['x']\n",
    "    roiY=roiSize['y']\n",
    "    roiArr = roiInfo['roi']\n",
    "    i=0\n",
    "    for roi in roiArr:\n",
    "        if index<0 or i==index:\n",
    "            x=roi['x']\n",
    "            y=roi['y']\n",
    "            for xi in range(roiX):\n",
    "                data[y][x+xi]=color\n",
    "                data[y+roiY][x+xi]=color\n",
    "            for yi in range(roiY):\n",
    "                data[y+yi][x]=color\n",
    "                data[y+yi][x+roiX]=color\n",
    "        i+=1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mip2=np.copy(mip)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "roiAnnotate2DDataXY(mip2, roiInfo, [255.0, 0.0, 0.0], 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "showNdFloatArrImage(mip2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def displayRoiImages(mip, roiInfo):\n",
    "    plt.figure(figsize=(25,35))\n",
    "    roiSize = roiInfo['roisize']\n",
    "    roiX=roiSize['x']\n",
    "    roiY=roiSize['y']\n",
    "    roiArr = roiInfo['roi']\n",
    "    i=0\n",
    "    l =len(roiArr)\n",
    "    spx=10\n",
    "    spy=math.ceil(l/spx)\n",
    "    print(\"Count=\", l)\n",
    "    for roi in roiArr:\n",
    "        x0=roi['x']\n",
    "        y0=roi['y']\n",
    "        x1 = x0 + roiX\n",
    "        y1 = y0 + roiY\n",
    "        mipData=mip[y0:y1,x0:x1]\n",
    "        im=Image.fromarray(mipData)\n",
    "        plt.subplot(spy,spx,i+1)\n",
    "        plt.xticks([])\n",
    "        plt.yticks([])\n",
    "        plt.imshow(im)\n",
    "        plt.xlabel(i)\n",
    "        i+=1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "displayRoiImages(mip, roiInfo)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def displayRoiChannelImages(mip, roiInfo, roiData):\n",
    "    #plt.figure(figsize=(25,600))\n",
    "    roiSize = roiInfo['roisize']\n",
    "    roiX=roiSize['x']\n",
    "    roiY=roiSize['y']\n",
    "    roiArr = roiInfo['roi']\n",
    "    i=0\n",
    "    #s=0\n",
    "    #l =len(roiArr)\n",
    "    w = roiData[0].shape[0] + 1\n",
    "    spx=w\n",
    "    #spy=l\n",
    "    for roi in roiArr:\n",
    "        s=0\n",
    "        plt.figure(figsize=(25,35))\n",
    "        l = 1\n",
    "        spy = l\n",
    "        # Mip\n",
    "        x0=roi['x']\n",
    "        y0=roi['y']\n",
    "        x1 = x0 + roiX\n",
    "        y1 = y0 + roiY\n",
    "        mipData=mip[y0:y1,x0:x1]\n",
    "        im=Image.fromarray(mipData)\n",
    "        plt.subplot(spy,spx,s+1)\n",
    "        plt.xticks([])\n",
    "        plt.yticks([])\n",
    "        plt.imshow(im)\n",
    "        plt.xlabel(i)\n",
    "        s+=1\n",
    "        chanData=roiData[i]\n",
    "        for c in range(chanData.shape[0]):\n",
    "            chan=chanData[c]\n",
    "            mc = bi.calcMip(chan, bi.colors)\n",
    "            ci=Image.fromarray(mc)\n",
    "            plt.subplot(spy,spx,s+1)\n",
    "            plt.xticks([])\n",
    "            plt.yticks([])\n",
    "            plt.imshow(ci)\n",
    "            plt.xlabel(i)\n",
    "            s+=1\n",
    "        i+=1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "displayRoiChannelImages(mip, roiInfo, roiData)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "instance_type": "ml.t3.medium",
  "kernelspec": {
   "display_name": "Python 3 (Data Science)",
   "language": "python",
   "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}