"""Utility functions for the high confidence stopping rule. This stopping criterion operates by estimating the amount of missing optima, and stops once the estimated mass of missing optima is within an acceptable range, given some confidence. """ from __future__ import division from collections import Counter from math import log, sqrt def num_one_occurrences(observed_obj_vals, tolerance): """ Determines the number of optima that have only been observed once. Needed to estimate missing mass of optima. """ obj_value_distribution = Counter(observed_obj_vals) sorted_histogram = list(sorted(obj_value_distribution.items())) if tolerance == 0: return sum(1 for _, count in sorted_histogram if count == 1) else: # Need to apply a tolerance to each value num_obj_vals_only_observed_once = 0 for i, tup in enumerate(sorted_histogram): obj_val, count = tup if count == 1: # look at previous and next elements to make sure that they are # not within the tolerance if (i > 0 and obj_val - sorted_histogram[i - 1][0] <= tolerance): continue if (i < len(sorted_histogram) - 1 and sorted_histogram[i + 1][0] - obj_val <= tolerance): continue num_obj_vals_only_observed_once += 1 return num_obj_vals_only_observed_once def should_stop(solutions, stopping_mass, stopping_delta, tolerance): """ Determines if the missing mass of unseen local optima is acceptable based on the High Confidence stopping rule. """ f = num_one_occurrences(solutions, tolerance) n = len(solutions) if n == 0: return False # Do not stop if no solutions have been found. d = stopping_delta c = stopping_mass confidence = f / n + (2 * sqrt(2) + sqrt(3) ) * sqrt(log(3 / d) / n) return confidence < c