import sys
import argparse
import boto3
from pathlib import Path
import bbbc021common as bb

s3c = boto3.client('s3')

sys.path.insert(0, "../../../cli/bioims/src")
import bioims

parser = argparse.ArgumentParser()

parser.add_argument('--bbbc021-bucket', type=str, required=True, help='bbbc021 bucket')
parser.add_argument('--bioims-resource-bucket', type=str, required=True, help='resource bucket')
parser.add_argument('--embeddingName', type=str, required=True, help='embedding name')

args = parser.parse_args()

BBBC021_BUCKET = args.bbbc021_bucket
BIOIMS_INPUT_BUCKET = args.bioims_resource_bucket
EMBEDDING = args.embeddingName

image_df, moa_df = bb.Bbbc021PlateInfoByDF.getDataFrames(BBBC021_BUCKET)
compound_moa_map = bb.Bbbc021PlateInfoByDF.getCompoundMoaMapFromDf(moa_df)

# We need to go from imageId->ImageSourceId->compound->moa
# 'Image_FileName_DAPI[:-4]' serves as the ImageSourceId
sourceCompoundMap={}
for i in range(len(image_df.index)):
    r = image_df.iloc[i]
    imageSourceId = r['Image_FileName_DAPI'][:-4]
    compound = r['Image_Metadata_Compound']
    sourceCompoundMap[imageSourceId]=compound

bbbc021ImageCount = len(image_df.index)
print("BBBC-021 image count={}".format(bbbc021ImageCount))

#imagesRemovedByCompound={}
moaDict={}
i=0
for k, v in compound_moa_map.items():
    print("i={} key={} value={}".format(i,k,v))
    moaDict[v]=True
#    removedList = []
#    imagesRemovedByCompound[k]=removedList
    i+=1
    
imageClient = bioims.client('image-management')
trainingConfigurationClient = bioims.client('training-configuration')
tagClient = bioims.client('tag')

embeddingInfo = trainingConfigurationClient.getEmbeddingInfo(EMBEDDING)
print(embeddingInfo)
width = embeddingInfo['inputWidth']
height = embeddingInfo['inputHeight']
depth = embeddingInfo['inputDepth']
channels = embeddingInfo['inputChannels']

print("list compatible plates: width={} height={} depth={} channels={}".format(width, height, depth, channels))
plateList = imageClient.listCompatiblePlates(width, height, depth, channels)
pl=len(plateList)
print("found {} compatible plates".format(pl))

tagList = tagClient.getAllTags()
tagIdMap={}
for tagInfo in tagList:
    print("{} {}".format(tagInfo['id'], tagInfo['tagValue']))
    tagIdMap[tagInfo['tagValue']] = tagInfo['id']
    
def cleanLabel(label):
    c1 = "".join(label.split())
    c2 = c1.replace('/','-')
    return c2
    
def getBatchTagFromPlateSourceId(psi):
    ca = psi.split('_')
    return "batch:" + ca[0]

for i, pi in enumerate(plateList):
    plateId = pi['plateId']
    print("Plate {} {}".format(i, plateId))
    imageList = imageClient.getImagesByPlateId(plateId)
    for imageItem in imageList:
        image = imageItem['Item']
        imageId = image['imageId']
        imageSourceId = image['imageSourceId']
        tagList = []
        if 'plateSourceId' in image:
            plateSourceId = image['plateSourceId']
            batchTag = getBatchTagFromPlateSourceId(plateSourceId)
            batchTagId = tagIdMap[batchTag]
            tagList.append(batchTagId)
        if imageSourceId in sourceCompoundMap:
            imageCompound = cleanLabel(sourceCompoundMap[imageSourceId])
            compoundTag = "compound:" + imageCompound
            if compoundTag in tagIdMap:
                compoundId = tagIdMap[compoundTag]
                print("{} {} {}".format(imageId, compoundTag, compoundId))
                tagList.append(compoundId)
                if 'trainCategory' in image and 'trainLabel' in image:
                    trainCategory = image['trainCategory']
                    trainLabel = image['trainLabel']
                    if trainCategory=='moa' and trainLabel in moaDict:
                        moa = cleanLabel(trainLabel)
                        moaTag = "moa:" + moa
                        moaId = tagIdMap[moaTag]
                        print("{} {} {}".format(imageId, moaTag, moaId))
                        tagList.append(moaId)
        if len(tagList)>0:
            imageClient.updateImageTags(imageId, tagList)