#!/bin/bash
#
#	Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
#	SPDX-License-Identifier: MIT-0
#
# script to summarize test results

# we want pipe failures to cascade
set -o pipefail

function bailOnError {

	result=$?
	if [[ $result -ne 0 ]]
	then
		echo "ERROR:  $1 ($result)"
		exit $result
	fi
}

# dependencies
BC=`which bc`
bailOnError "We need bc to continue"
GREP=`which grep`
bailOnError "We need grep to continue"
CUT=`which cut`
bailOnError="We need cut to continue"
WC=`which wc`
bailOnError "We need wc to continue"
JQ=`which jq`
bailOnError "We need jq to continue"
HEAD=`which head`
bailOnError "We need head to continue"
AWK=`which awk`
bailOnError "We need awk to continue"

resultsFile="$1"
if [[ -z "${resultsFile}" ]]
then
	echo "ERROR:  Please specify a file name to analyze"
	exit 1
fi

if [[ ! -r "${resultsFile}" ]]
then
	echo "ERROR:  Could not find file to summarize (${resultsFile})"
	exit 1
fi

echo "Summary from ${resultsFile}..."

totalTests=`$GREP '^[-+]' ${resultsFile} | $WC -l`
successfulTests=`$GREP '^+' ${resultsFile} | $WC -l`
failedTests=`$GREP '^-' ${resultsFile} | $WC -l`
missedUtterances=`$GREP '^-  Sequence \[.*\] FAILED! ~ {"message":"Missed utterance' ${resultsFile} | $WC -l`
misclassifiedIntents=`$GREP '^-  Sequence \[.*\] FAILED! ~ {"message":"Intent name did not match' ${resultsFile} | $WC -l`
#TODO:  missedSlots=
incorrectSlotValues=`$GREP '^-  Sequence \[.*\] FAILED! ~ {"message":"Slot value .* did not match' ${resultsFile} | $WC -l`

echo "${totalTests} Total tests"
# bc truncates
echo "${successfulTests} Successful (`echo "scale=3; (${successfulTests} * 100)  / ${totalTests}" | $BC -l`%)"
echo "${failedTests} Failed (`echo "scale=3; (${failedTests} * 100)  / ${totalTests}" | $BC -l`%)"

echo "${missedUtterances} Missed utterances (`echo "scale=3; (${missedUtterances} * 100)  / ${totalTests}" | $BC -l`%)"
echo "${misclassifiedIntents} Misclassified intents (`echo "scale=3; (${misclassifiedIntents} * 100)  / ${totalTests}" | $BC -l`%)"

echo "${incorrectSlotValues} Incorrect slot values (`echo "scale=3; (${incorrectSlotValues} * 100)  / ${totalTests}" | $BC -l`%)"

# calculations
tp=`$GREP '^+' ${resultsFile} | $CUT -d '~' -f 2- | $JQ 'select(.sequence.sequence[0].postConditions.intentName != null) | .sequence.name' - | $WC -l`
tn=`$GREP '^+' ${resultsFile} | $CUT -d '~' -f 2- | $JQ 'select(.sequence.sequence[0].postConditions.intentName == null) | .sequence.name' - | $WC -l`
fp=`$GREP '^-' ${resultsFile} | $CUT -d '~' -f 2- | $JQ 'select(.sequence.sequence[0].postConditions.intentName == null) | .sequence.name' - | $WC -l`
fn=`$GREP '^-.*Missed utterance' ${resultsFile} | $WC -l`
mm=`$GREP '^-' ${resultsFile} | $CUT -d '~' -f 2- | $GREP 'Intent name did not match' | $JQ 'select(.sequence.sequence[0].postConditions.intentName != null) | .sequence.name' - | $WC -l`
ac=0; pr=0; re=0; fm=0
if [[ $tp -gt 0 ]]
then
	ac=0`echo "scale=6; ($tp + $tn) / ($tp + $tn + $fp + $fn + $mm)" | $BC -l`
	pr=0`echo "scale=6; ($tp) / ($tp + $fp + $mm)" | $BC -l`
	re=0`echo "scale=6; ($tp) / ($tp + $fn + $mm)" | $BC -l`
	fm=0`echo "scale=6; (2 * $pr * $re) / ($pr + $re)" | $BC -l`
fi

echo ""
echo "Intent Classification Metrics"
echo "${tp} True positives (correctly classified an utterance that was not 'out of domain')"
echo "${tn} True negatives (correctly classified an utterance that was 'out of domain')"
echo "${fp} False positives (incorrectly classified an utterance that was 'out of domain' as some intent)"
echo "${fn} False negatives (incorrectly classified an utterance (as 'out of domain') that was not 'out of domain')"
echo "${mm} Mismatches (incorrectly classified an utterance as some other (not 'out of domain') intent than it was"
echo "${ac} Accuracy - (tp + tn) / (tp + fp + tn + fn + mm)"
echo "${pr} Precision - tp / (tp + fp + mm)"
echo "${re} Recall - tp / (tp + fn + mm)"
echo "${fm} F-measure - (2 * precision * recall) / (precision + recall)"

echo ""
echo "Examples of missed utterances"
if [[ $missedUtterances -gt 2000 ]]
then
	$GREP '^-  Sequence \[.*\] FAILED! ~ {"message":"Missed utterance' ${resultsFile} | $CUT -d '~' -f 2 | $JQ -r '"  " + .message' | \
	$AWK 'BEGIN {srand()} !/^$/ { if (rand() <= .01) print $0}' | \
	$HEAD -n 40
else
	$GREP '^-  Sequence \[.*\] FAILED! ~ {"message":"Missed utterance' ${resultsFile} | $CUT -d '~' -f 2 | $JQ -r '"  " + .message' | $HEAD -n 20
fi

echo ""
echo "Examples of misclassified intents"
if [[ $misclassifiedIntents -gt 2000 ]]
then
	$GREP '^-  Sequence \[.*\] FAILED! ~ {"message":"Intent name did not match' ${resultsFile} | $CUT -d '~' -f 2 | $JQ -r '"  " + .message' | \
	$AWK 'BEGIN {srand()} !/^$/ { if (rand() <= .01) print $0}' | \
	$HEAD -n 40
else
	$GREP '^-  Sequence \[.*\] FAILED! ~ {"message":"Intent name did not match' ${resultsFile} | $CUT -d '~' -f 2 | $JQ -r '"  " + .message' | $HEAD -n 20
fi

echo ""
echo "Examples of incorrect slot values"
if [[ $incorrectSlotValues -gt 2000 ]]
then
	$GREP '^-  Sequence \[.*\] FAILED! ~ {"message":"Slot value .* did not match' ${resultsFile} | \
		$CUT -d '~' -f 2-3 | $JQ -r '"  " + .message' | \
		$AWK 'BEGIN {srand()} !/^$/ { if (rand() <= .01) print $0}' | \
		$HEAD -n 40
else
	$GREP '^-  Sequence \[.*\] FAILED! ~ {"message":"Slot value .* did not match' ${resultsFile} | $CUT -d '~' -f 2-3 | $JQ -r '"  " + .message' | $HEAD -n 20
fi

echo ""
echo "Examples of incorrect dialog states (which may be missed slots)"
$GREP '^-  Sequence \[.*\] FAILED! ~ {"message":"Dialog state did not match (actual)' ${resultsFile} | $CUT -d '~' -f 2-3 | $JQ -r '"  " + .message + " for [" + .sequence.sequence[].utterance + "] response [" +  .lexResponse.message + "]"' | $HEAD -n 20

if [[ $fp -gt 0 ]]
then
	echo ""
	echo "Examples of false positives"
	$GREP '^-' ${resultsFile} | $CUT -d '~' -f 2- | $JQ -r 'select(.sequence.sequence[0].postConditions.intentName == null) | .message + " for [" + .sequence.sequence[].utterance + "]"' - | $HEAD -n 20
fi