3 NbL@sddlZddlmZddlZddlmZGdddejjZ dWddZ dXd d Z dYd d Z dZddZ d[ddZddZddZd\ddZd]ddZddZejdd^fdd Zejdd_fd!d"Zd#d$Zd`d%d&Zejddafd'd Zejddbfd(d"ZGd)d*d*ejjZddlZdd+lmZdd,lmZdd-lmZGd.d/d/eZdcd1d2Z ddd3d4Z!ddl"jZdd5l#m$Z$ddlZded6d7Z%dfd9d:Z&Gd;d<dd>ejZ(Gd?d@d@ejZ)GdAdBdBejZ*GdCdDdDejZ+GdEdFdFejZ,GdGdHdHe,Z-GdIdJdJe,Z.dKdLZ/ddMlm0Z0ddlZddNl1m2Z3GdOdPdPejjZ4GdQdRdRejjZ5GdSdTdTejjZ6GdUdVdVejjZ7dS)gN)structural_similarity) get_shapecs(eZdZd fdd Zd d d ZZS) PerceptualLossnet-linalexrgbNFTcs@tt|j||_||_t|_|jj||||||jddS)N)modelnetuse_gpu colorspace model_pathspatial)superr__init__r r DistModelr initialize)selfrr r r r r ) __class__G/home/ec2-user/SageMaker/lama/saicinpainting/evaluation/losses/lpips.pyrs zPerceptualLoss.__init__cCs(|rd|d}d|d}|j||S)a8 Pred and target are Variables. If normalize is True, assumes the images are between [0,1] and then scales them between [-1,+1] If normalize is False, assumes the images are already between [-1,+1] Inputs pred and target are Nx3xHxW Output pytorch Variable N long )r)rpredtarget normalizerrrforwards   zPerceptualLoss.forward)rrrNFT)T)__name__ __module__ __qualname__rr __classcell__rr)rrrs r绽|=cCs&tjtj|dddd}|||S)NrrT)dimkeepdim)torchsqrtsum)Zin_featepsZ norm_factorrrrnormalize_tensor-sr'o@cCsdtj||||dS)Ng?r)npmean)p0p1rangerrrl22sr.cCs,dtj|dtjd|d|dS)N rg?)r)log10r*)r+r,peakrrrpsnr6sr2cCsdt|||dddS)NrT) data_rangeZ multichannelg@)Z compare_ssim)r+r,r-rrrdssim:sr4FcCsJddlm}|j|}|rF|dddddfd|dddddf<|S)Nr)color2)skimager5rgb2lab)Zin_imgZ mean_centr5img_labrrrr8>s   ,r8cCs|djjjjdS)Nrrr)rrr)cpufloatnumpy transpose)Z tensor_objrrr tensor2npFsr>cCs*tj|ddddddtjfjdS)Nrrr)r?rrr)r#Tensorr)newaxisr=)Znp_objrrr np2tensorKsrBTcCsddlm}t|}|j|}|rN|dddddfd|dddddf<|r| r|dddddfd|dddddf<|d}t|S)Nr)r5r6gY@)r7r5 tensor2imr8rB) image_tensorto_normZmc_onlyr5imgr9rrrtensor2tensorlabPs  , ,rGcCsddlm}ddl}|jdt|d}|dddddfd|dddddf<dtj|j|jddd}|r|j |jd }d tj ||d d }t tj |d dddddtj f}t||fSt|SdS)Nr)r5ignoregY@r6go@r;ruint8g?g@)atolr)axis)r7r5warningsfilterwarningsr>r)clipZlab2rgbastyper8iscloserBprodrA im2tensor)Z lab_tensorZ return_inbndr5rLlabZrgb_backZlab_backmaskrrrtensorlab2tensor_s   ,& rUcCsddlm}|j|dS)Nr)r5go@)r7r5r8)inputr5rrrr8rs g?g@cCs2|djjj}tj|d||}|j|S)Nrrr)rrr)r:r;r<r)r=rO)rDimtypecentfactor image_numpyrrrrCwsrCcCs2tj|||ddddddtjfjdS)Nr?rrr)r?rrr)r#r@r)rAr=)imagerWrXrYrrrrR}s rRcCs"|jjjddddddfS)Nr)datar:r<)Z vector_tensorrrr tensor2vecsr]c Cs|rXd}xtjdddD]:}tj||kdkr4d}ntj|||k}||d}qWntjdg|dgf}tjdg|dgf}x:t|jddd D]$}tj||d||||d<qWtj|dd|dd kd}tj||d||||d}|S) z ap = voc_ap(rec, prec, [use_07_metric]) Compute VOC AP given precision and recall. If use_07_metric is true, uses the VOC 07 11 point method (default:False). gg?g?rg&@g?rNr^) r)aranger%max concatenater-sizemaximumwhere) recprecZ use_07_metricaptpZmrecZmpreirrrvoc_aps$"&rkcCs2|djjj}tj|d||}|j|S)Nrrr)rrr)r:r;r<r)r=rO)rDrWrXrYrZrrrrCscCs2tj|||ddddddtjfjdS)Nr?rrr)r?rrr)r#r@r)rAr=)r[rWrXrYrrrrRs cseZdZfddZddZdddZdd Zd d Zd d ZddZ ddZ ddZ ddZ ddZ ddZdd ZdddZZS) BaseModelcstjdS)N)rr)r)rrrrszBaseModel.__init__cCsdS)Nrlr)rrrrnameszBaseModel.nameTcCs ||_dS)N)r )rr rrrrszBaseModel.initializecCsdS)Nr)rrrrrszBaseModel.forwardcCsdS)Nr)rrrrget_image_pathsszBaseModel.get_image_pathscCsdS)Nr)rrrroptimize_parametersszBaseModel.optimize_parameterscCs|jS)N)rV)rrrrget_current_visualsszBaseModel.get_current_visualscCsiS)Nr)rrrrget_current_errorsszBaseModel.get_current_errorscCsdS)Nr)rlabelrrrsaveszBaseModel.savecCs.d||f}tjj||}tj|j|dS)Nz %s_net_%s.pth)ospathjoinr#rs state_dict)rnetworkru network_label epoch_label save_filename save_pathrrr save_networks zBaseModel.save_networkcCs@d||f}tjj|j|}td||jtj|dddS)Nz %s_net_%s.pthzLoading network from %sr:) map_location)rtrurvsave_dirprintload_state_dictr#load)rrxryrzr{r|rrr load_networks  zBaseModel.load_networkcCsdS)Nrrrrrupdate_learning_rateszBaseModel.update_learning_ratecCs|jS)N)Z image_paths)rrrrrnsFcCs:tjtjj|jd|tjtjj|jd|gdddS)NZ done_flagz%i)fmt)r)rsrtrurvrsavetxt)rflagrrr save_doneszBaseModel.save_done)T)F)rrrrrmrrrnrorprqrsr}rrrrrr)rrrls  rl) OrderedDict)zoom)tqdmc @sxeZdZddZd$d d Zd%ddZddZddZddZddZ ddZ ddZ ddZ ddZ d d!Zd"d#ZdS)&rcCs|jS)N) model_name)rrrrrmszDistModel.namenet-linrLabFNT-C6??0.1c Cstj||d||_||_| |_| |_d||f|_|jdkrt|||d| | dd|_tdd}|dkrd dl }t j j t j j t j jtd d d d d |d }| s|jjtj|f|ddnn|jdkrt||dd|_nR|jdkrt||d|_d|_n0|jdkrt||d|_d|_ntd|jt|jj|_|jrt|_|jt|jjj7_| |_| |_tjj|j| | dfd|_n |jj |rt!dt"|jt!ddS) aJ INPUTS model - ['net-lin'] for linearly calibrated network ['net'] for off-the-shelf network ['L2'] for L2 distance in Lab colorspace ['SSIM'] for ssim in RGB colorspace net - ['squeeze','alex','vgg'] model_path - if None, will look in weights/[NET_NAME].pth colorspace - ['Lab','RGB'] colorspace to use for L2 and SSIM use_gpu - bool - whether or not to use a GPU printNet - bool - whether or not to print network architecture out spatial - bool - whether to output an array containing varying distances across spatial dimensions spatial_shape - if given, output spatial shape. if None then spatial shape is determined automatically via spatial_factor (see below). spatial_factor - if given, specifies upsampling factor relative to the largest spatial extent of a convolutional layer. if None then resized to size of input images. spatial_order - spline order of filter for upsampling in spatial mode, by default 1 (bilinear). is_train - bool - [True] for training mode lr - float - initial learning rate beta1 - float - initial momentum term for adam version - 0.1 for latest, 0.0 was original (with a bug) )r z%s [%s]znet-linT) pnet_rand pnet_tune pnet_type use_dropoutr versionlpipsr:)r~Nrz..modelsZ lpips_modelsz.pthF)strictr )rrrL2r.)r r DSSIMr4SSIMssimzModel [%s] not recognized.g+?)lrbetasz----------- Networks initialized -------------z/-----------------------------------------------)rr.)rr4rr)#rlrrr is_trainr rPNetLindictinspectrtruabspathrvdirname__file__rr#rrr ValueErrorlist parametersZtrainable_parametersBCERankingLossrankLossrold_lroptimAdam optimizer_netevalr print_network)rrr r rrr r ZprintNetr rrbeta1rkwrrrrrsJ  (     zDistModel.initializecCs|j|||dS)z Function computes the distance between image patches in0 and in1 INPUTS in0, in1 - torch.Tensor object of shape Nx3xXxY - image patch scaled to [-1,1] OUTPUT computed distances between in0 and in1 ) retPerLayer)r )rin0in1rrrrrJszDistModel.forwardcCs0|j|jj|j|jj|jdS)N) forward_trainr zero_gradbackward_trainstep clamp_weights)rrrrroUs   zDistModel.optimize_parameterscCsBx<|jjD].}t|dr |jdkr tj|jjdd|j_q WdS)Nweightrr)min)rr)r moduleshasattr kernel_sizer#clamprr\)rmodulerrrr\szDistModel.clamp_weightscCs,|d|_|d|_|d|_|d|_dS)Nrefr+r,judge)Z input_refZinput_p0Zinput_p1 input_judge)rr\rrr set_inputas   zDistModel.set_inputcCsds td||j|j|_||j|j|_|j|j|j|j|_t d|jj |jj |_ |j |j|j|j dd|_|jS)NFz5We shoud've not get here when using LPIPS as a metricg?g@)AssertionErrorvar_refvar_p0d0var_p1d1compute_accuracyracc_rVariableviewrbZ var_judger loss_total)rrrrrqs zDistModel.forward_traincCstj|jjdS)N)r#r*rbackward)rrrrrszDistModel.backward_traincCs>||kjjjj}|jjj}||d|d|S)z) d0, d1 are Variables, judge is a Tensor r)r:r\r<flatten)rrrrZd1_lt_d0Z judge_perrrrrszDistModel.compute_accuracycCsJtd|jjjjfd|jfg}x"|jD]}tj||||<q,W|S)Nrr) rrr\r:r<rkeysr)r*)rZretDictkeyrrrrqs zDistModel.get_current_errorscCsd|jjjd}t|jj}t|jj}t|jj}t|||dgdd}t|||dgdd}t|||dgdd}td|fd|fd|fgS) Nrrr)orderrr+r,)rr\rbrCrrrr)rZ zoom_factorZref_imgZp0_imgZp1_imgZ ref_img_visZ p0_img_visZ p1_img_visrrrrps   zDistModel.get_current_visualscCsF|jr|j|jj|d|n|j|j|d||j|jj|d|dS)Nrank)r r}r rr)rrurrrrrrsszDistModel.savecCsL|j|}|j|}x|jjD] }||d<qWtdt|j|f||_dS)Nrzupdate lr [%s] decay: %f -> %f)rrr param_groupsrtype)rZ nepoch_decayZlrdr param_grouprrrrs    zDistModel.update_learning_rate) rrrFFNTFFFrrr)F)rrrrmrrrorrrrrrqrprsrrrrrrs  K  rrcCsg}g}g}xt|j|dD]p}|||d|djjjjj7}|||d|djjjjj7}||djjjj7}qWtj|}tj|}tj|}||kd|||k|||kd}tj |t ||||dfS) a Function computes Two Alternative Forced Choice (2AFC) score using distance function 'func' in dataset 'data_loader' INPUTS data_loader - CustomDatasetDataLoader object - contains a TwoAFCDataset inside func - callable distance function - calling d=func(in0,in1) should take 2 pytorch tensors with shape Nx3xXxY, and return numpy array of length N OUTPUTS [0] - 2AFC score in [0,1], fraction of time func agrees with human evaluators [1] - dictionary with following elements d0s,d1s - N arrays containing distances between reference patch to perturbed patches gts - N array in [0,1], preferred patch selected by human evaluators (closer to "0" for left patch p0, "1" for right patch p1, "0.6" means 60pct people preferred right patch, 40pct preferred left) scores - N array in [0,1], corresponding to what percentage function agreed with humans CONSTS N - number of test triplets in data_loader )descrr+r,rg?g?)d0sd1sgtsscores) r load_datar\r:r<rtolistr)arrayr*r) data_loaderfuncrmrrrr\rrrrscore_2afc_datasets((    (rcCsg}g}xXt|j|dD]D}|||d|djjjj7}||djjjj7}qWtj|}tj|}tj |}||}||} tj | } tj d| } tj | | } | | | } | | | }t || }|t ||dfS)a Function computes JND score using distance function 'func' in dataset 'data_loader' INPUTS data_loader - CustomDatasetDataLoader object - contains a JNDDataset inside func - callable distance function - calling d=func(in0,in1) should take 2 pytorch tensors with shape Nx3xXxY, and return pytorch array of length N OUTPUTS [0] - JND score in [0,1], mAP score (area under precision-recall curve) [1] - dictionary with following elements ds - N array containing distances between two patches shown to human evaluator sames - N array containing fraction of people who thought the two patches were identical CONSTS N - number of test triplets in data_loader )rr+r,samer)dssames)rrr\r:r<rrr)rargsortcumsumr%rkr)rrrmrrr\rZ sorted_indsZ ds_sortedZ sames_sortedZTPsZFPsZFNsZprecsrecsscorerrrscore_jnd_datasets"$        r)rcCs|jddg|dS)Nrr?)r")r*)in_tensr"rrrspatial_average sr@cCs*|jd}d||}tj|ddd|S)Nrg?bilinearF) scale_factormode align_corners)shapennUpsample)rout_HZin_Hrrrrupsample s  rcs(eZdZd fdd Zd ddZZS) rvggFT0.1c stt|j||_||_||_||_||_||_t |_ |jdkrZt }dddddg|_ nB|jdkrzt }ddd ddg|_ n"|jd krt}dddd d ddg|_ t|j |_||j |jd |_|rt|j d |d |_t|j d|d |_t|j d|d |_t|j d|d |_t|j d|d |_|j|j|j|j|jg|_|jd krt|j d|d |_t|j d|d |_|j|j|jg7_dS)Nrvgg16rririsqueeze) pretrained requires_gradr)rrrr?)rr)rrrrrrr rr ScalingLayer scaling_layerrZchnsalexnet squeezenetlenLr NetLinLayerZlin0Zlin1Zlin2Zlin3Zlin4linsZlin5Zlin6) rrrrrr rrZnet_type)rrrrs<     zPNetLin.__init__csbjdkrjj|fn|f\}}j|j|}}iii}} xLtjD]>} t|| t|| || <| | <|| | | d| <q\WjrjrȇfddtjD} nfddtjD} n<jrfddtjD} nfddtjD} | d} x"td jD]} | | | 7} q6W|rZ| | fS| SdS) Nz0.1rcs.g|]&}tj|j|jddqS)r)r)rrrr).0kk)diffsrrrr Hsz#PNetLin.forward..cs(g|] }tj|j|ddqS)T)r")rrr)rr)rrrrrJscs,g|]$}t|jdddjddqS)rT)r!r"r)r)rr%r)rr)rrrrrMscs&g|]}t|jdddddqS)rT)r!r")r")rr%)rr)rrrrOsrr)rrr r-r r'rr )rrrrZ in0_inputZ in1_inputZouts0Zouts1Zfeats0Zfeats1rresvallr)rrrrr;s( "zPNetLin.forward)rFFTFrT)F)rrrrrrrr)rrrs$rcs$eZdZfddZddZZS)rcsbtt|j|jdtjd d d gdddddf|jdtjdddgdddddfdS) NshiftgQ?gI +?gMb?scalegZd;O?gy&1?g?gQgI +gMbȿ)rrrregister_bufferr#r@)r)rrrr\s(zScalingLayer.__init__cCs||j|jS)N)rr)rinprrrraszScalingLayer.forward)rrrrrrrr)rrr[s rcs"eZdZdZdfdd ZZS)r z- A single linear layer which does a 1x1 conv rFc sLtt|j|rtjgng}|tj||dddddg7}tj||_dS)NrrF)stridepaddingbias)rr rrDropoutConv2d Sequentialr)rZchn_inZchn_outrlayers)rrrrhszNetLinLayer.__init__)rF)rrr__doc__rrrr)rrr esr cs,eZdZdZd fdd Zd ddZZS) Dist2LogitLayerzc takes 2 distances, puts through fc layers, spits out value between [0,1] (if use_sigmoid is True) Tc stt|jtjd|dddddg}|tjddg7}|tj||dddddg7}|tjddg7}|tj|ddddddg7}|r|tjg7}tj||_dS)NrrrT)rrrg?) rr"rrr LeakyReLUSigmoidrr)rchn_midZ use_sigmoidr )rrrrsszDist2LogitLayer.__init__皙?c Cs2|jtj||||||||||fddS)Nr)r!)rr#cat)rrrr&rrrrszDist2LogitLayer.forward)r#T)r')rrrr!rrrrr)rrr"ps r"cs&eZdZdfdd ZddZZS)rr#cs*tt|jt|d|_tjj|_dS)N)r&) rrrr"r r#rBCELossloss)rr&)rrrrs zBCERankingLoss.__init__cCs(|dd}|j|||_|j|j|S)Ng?g@)r logitr*)rrrrperrrrrs zBCERankingLoss.forward)r#)rrrrrrrr)rrrsrcseZdZdfdd ZZS)FakeNetTrcstt|j||_||_dS)N)rr-rr r )rr r )rrrrszFakeNet.__init__)Tr)rrrrrrr)rrr-sr-c@seZdZdddZdS)rNc Cs|jddkst|jdkrz|j\}}}}tjtjtj||dddj|d||ddj|dd|ddj|}|S|jdkrttt|j dd tt|j dd d d j d }t tj |f} | SdS) NrrRGBr)r!r?rF)rEgY@)r-r;) rbrr r#r*rr.r>rGr\rOrr@) rrrrNCXYvalueret_varrrrrs : z L2.forward)N)rrrrrrrrrsrc@seZdZdddZdS)rNcCs|jddkst|jdkrHtdt|jdt|jddjd}n:|jdkrttt|jd d tt|jd d d djd}t t j |f}|S) Nrrr.g?go@)r-r;rF)rEgY@) rbrr r4rCr\rOr>rGrr#r@)rrrrr3r4rrrrs * z DSSIM.forward)N)rrrrrrrrrsrcCs<d}x|jD]}||j7}qWtd|td|dS)NrZNetworkzTotal number of parameters: %d)rnumelr)r Z num_paramsparamrrrrs  r) namedtuple)rcs&eZdZdfdd ZddZZS)r FTcstt|jtj|dj}tjj|_ tjj|_ tjj|_ tjj|_ tjj|_ tjj|_tjj|_d|_x&tdD]}|j jt|||qWx(tddD]}|j jt|||qWx(tddD]}|j jt|||qWx(tddD]}|j jt|||qWx*tddD]}|j jt|||q(Wx*tddD]}|jjt|||qTWx*tdd D]}|jjt|||qW|sx|jD] }d |_qWdS) N)rrrr/ F)rr rtv squeezenet1_1featuresr#rrslice1slice2slice3slice4slice5slice6slice7N_slicesr- add_modulestrrr)rrrZpretrained_featuresxr6)rrrrs6       zsqueezenet.__init__c Cs|j|}|}|j|}|}|j|}|}|j|}|}|j|}|}|j|}|}|j|}|} tddddddddg} | ||||||| } | S) NZSqueezeOutputsrelu1relu2relu3relu4relu5relu6Zrelu7)r@rArBrCrDrErFr7) rr1hh_relu1h_relu2h_relu3h_relu4h_relu5Zh_relu6Zh_relu7 vgg_outputsoutrrrrs"       zsqueezenet.forward)FT)rrrrrrrr)rrr sr cs&eZdZdfdd ZddZZS)r FTcsTtt|jtj|dj}tjj|_tjj|_ tjj|_ tjj|_ tjj|_ d|_ x&tdD]}|jjt|||qhWx(tddD]}|j jt|||qWx(tddD]}|j jt|||qWx(tddD]}|j jt|||qWx*tddD]}|j jt|||qW|sPx|jD] }d|_q@WdS)N)rrrr9r/r;F)rr rr=r?r#rrr@rArBrCrDrGr-rHrIrr)rrrZalexnet_pretrained_featuresrJr6)rrrrs*     zalexnet.__init__c Csn|j|}|}|j|}|}|j|}|}|j|}|}|j|}|}tddddddg}||||||} | S)NZAlexnetOutputsrKrLrMrNrO)r@rArBrCrDr7) rr1rQrRrSrTrUrVZalexnet_outputsrXrrrrs     zalexnet.forward)FT)rrrrrrrr)rrr sr cs&eZdZdfdd ZddZZS)rFTcsTtt|jtj|dj}tjj|_tjj|_ tjj|_ tjj|_ tjj|_ d|_ x&tdD]}|jjt|||qhWx(tddD]}|j jt|||qWx(tddD]}|j jt|||qWx(tddD]}|j jt|||qWx*tddD]}|j jt|||qW|sPx|jD] }d|_q@WdS) N)rrr F)rrrr=r?r#rrr@rArBrCrDrGr-rHrIrr)rrrZvgg_pretrained_featuresrJr6)rrrr)s*     zvgg16.__init__c Csn|j|}|}|j|}|}|j|}|}|j|}|}|j|}|}tddddddg}||||||} | S)NZ VggOutputsZrelu1_2Zrelu2_2Zrelu3_3Zrelu4_3Zrelu5_3)r@rArBrCrDr7) rr1rQZ h_relu1_2Z h_relu2_2Z h_relu3_3Z h_relu4_3Z h_relu5_3rWrXrrrr@s     z vgg16.forward)FT)rrrrrrrr)rrr(srcs&eZdZdfdd ZddZZS) resnetFTcstt|j|dkr&tj|d|_n^|dkr>tj|d|_nF|dkrVtj|d|_n.|dkrntj|d|_n|dkrtj |d|_d|_ |jj |_ |jj |_ |jj |_ |jj|_|jj|_|jj|_|jj|_|jj|_dS)Nr^)r"r6er)rr]rr=resnet18r resnet34resnet50 resnet101 resnet152rGconv1bn1relumaxpoollayer1layer2layer3layer4)rrrnum)rrrrRs(       zresnet.__init__c Cs|j|}|j|}|j|}|}|j|}|j|}|}|j|}|}|j|}|}|j|}|}tddddddg}||||||} | S)NZOutputsrKconv2conv3conv4conv5) rgrhrirjrkrlrmrnr7) rr1rQrRZh_conv2Zh_conv3Zh_conv4Zh_conv5outputsrXrrrris         zresnet.forward)FTr^)rrrrrrrr)rrr]Qsr])r )r()r()r()F)TF)Fg_@g_@)Fg_@g_@)r)r)T)r)8r<r)Zskimage.metricsrr#saicinpainting.utilsrrModulerr'r.r2r4r8r>rBrGrUrIrCrRr]rkrlrt collectionsr scipy.ndimagerrrrrtorch.nnZtorch.autogradrrrrrr r"rr-rrrr7 torchvisionrr=r r rr]rrrr sd          " 8   = $ ,    F    3))