#  ___________________________________________________________________________
#
#  Pyomo: Python Optimization Modeling Objects
#  Copyright 2017 National Technology and Engineering Solutions of Sandia, LLC
#  Under the terms of Contract DE-NA0003525 with National Technology and
#  Engineering Solutions of Sandia, LLC, the U.S. Government retains certain
#  rights in this software.
#  This software is distributed under the 3-clause BSD License.
#  ___________________________________________________________________________

import re
import importlib as im
import logging
import types
import json
from itertools import combinations

from pyomo.common.dependencies import (
    attempt_import,
    numpy as np, numpy_available,
    pandas as pd, pandas_available,
    scipy, scipy_available,
)

import pyomo.environ as pyo
import pyomo.pysp.util.rapper as st
from pyomo.pysp.scenariotree import tree_structure
from pyomo.pysp.scenariotree.tree_structure_model import (
    CreateAbstractScenarioTreeModel
)
from pyomo.opt import SolverFactory
from pyomo.environ import Block, ComponentUID

import pyomo.contrib.parmest.mpi_utils as mpiu
import pyomo.contrib.parmest.ipopt_solver_wrapper as ipopt_solver_wrapper
from pyomo.contrib.parmest.graphics import (fit_rect_dist,
                                            fit_mvn_dist,
                                            fit_kde_dist)

parmest_available = numpy_available & pandas_available & scipy_available

inverse_reduced_hessian, inverse_reduced_hessian_available = attempt_import(
    'pyomo.contrib.interior_point.inverse_reduced_hessian')

logger = logging.getLogger(__name__)

__version__ = 0.1


#=============================================
def _object_from_string(instance, vstr):
    """
    Create a Pyomo object from a string; it is attached to instance
    args:
        instance: a concrete pyomo model
        vstr: a particular Var or Param (e.g. "pp.Keq_a[2]")
    output:
        the object
    NOTE: We need to deal with blocks
          and with indexes that might really be strings or ints
    """
    # pull off the index
    l = vstr.find('[')
    if l == -1:
        indexstr = None
        basestr = vstr
    else:
        r = vstr.find(']')
        indexstr = vstr[l+1:r]
        basestr = vstr[:l]
    # get the blocks and the name
    parts = basestr.split('.')
    name = parts[-1]
    retval = instance
    for i in range(len(parts)-1):
        retval = getattr(retval, parts[i])
    retval = getattr(retval, name)
    if indexstr is None:
        return retval
    # dlw jan 2018: TBD improve index handling... multiple indexes, e.g.
    try:
        indexstr = int(indexstr)  # hack...
    except:
        pass
    return retval[indexstr]

#=============================================
def _ef_ROOT_node_Object_from_string(efinstance, vstr):
    """
    Wrapper for _object_from_string for PySP extensive forms
    but only for Vars at the node named RootNode.
    DLW April 2018: needs work to be generalized.
    """
    efvstr = "MASTER_BLEND_VAR_RootNode["+vstr+"]"
    return _object_from_string(efinstance, efvstr)

#=============================================
###def _build_compdatalists(model, complist):
    # March 2018: not used
    """
    Convert a list of names of pyomo components (Var and Param)
    into two lists of so-called data objects found on model.

    args:
        model: ConcreteModel
        complist: pyo.Var and pyo.Param names in model
    return:
        vardatalist: a list of Vardata objects (perhaps empty)
        paramdatalist: a list of Paramdata objects or (perhaps empty)
    """
    """
    vardatalist = list()
    paramdatalist = list()

    if complist is None:
        raise RuntimeError("Internal: complist cannot be empty")
    # TBD: require a list (even if it there is only a single element
    
    for comp in complist:
        c = getattr(model, comp)
        if c.is_indexed() and isinstance(c, pyo.Var):
            vardatalist.extend([c[i] for i in sorted(c.keys())])
        elif isinstance(c, pyo.Var):
            vardatalist.append(c)
        elif c.is_indexed() and isinstance(c, pyo.Param):
            paramdatalist.extend([c[i] for i in sorted(c.keys())])
        elif isinstance(c, pyo.Param):
            paramdatalist.append(c)
        else:
            raise RuntimeError("Invalid component list entry= "+\
                               (str(c)) + " Expecting Param or Var")
    
    return vardatalist, paramdatalist
    """    

def _pysp_instance_creation_callback(scenario_tree_model,
                                    scenario_name, node_names):
    """
    This is going to be called by PySP and it will call into
    the user's model's callback.

    Parameters:
    -----------
    scenario_tree_model: `pysp scenario tree`
        Standard pysp scenario tree, but with things tacked on:
        `CallbackModule` : `str` or `types.ModuleType`
        `CallbackFunction`: `str` or `callable`
        NOTE: if CallbackFunction is callable, you don't need a module.
    scenario_name: `str`
         `cb_data`: optional to pass through to user's callback function
        Scenario name should end with a number
    node_names: `None`
        Not used here 

    Returns:
    --------
    instance: `ConcreteModel`
        instantiated scenario

    Note:
    ----
    There is flexibility both in how the function is passed and its signature.
    """
    scen_num_str = re.compile(r'(\d+)$').search(scenario_name).group(1)
    scen_num = int(scen_num_str)
    basename = scenario_name[:-len(scen_num_str)] # to reconstruct name
    
    # The module and callback function names need to have been on tacked on the tree.
    # We allow a lot of flexibility in these things.
    if not hasattr(scenario_tree_model, "CallbackFunction"):
        raise RuntimeError(\
            "Internal Error: tree needs callback in parmest callback function")
    elif callable(scenario_tree_model.CallbackFunction):
        callback = scenario_tree_model.CallbackFunction
    else:
        cb_name = scenario_tree_model.CallbackFunction

        if not hasattr(scenario_tree_model, "CallbackModule"):
            raise RuntimeError(\
                "Internal Error: tree needs CallbackModule in parmest callback")
        else:
            modname = scenario_tree_model.CallbackModule

        if isinstance(modname, str):
            cb_module = im.import_module(modname, package=None)
        elif isinstance(modname, types.ModuleType):
            cb_module = modname
        else:
            print("Internal Error: bad CallbackModule")
            raise

        try:
            callback = getattr(cb_module, cb_name)
        except:
            print("Error getting function="+cb_name+" from module="+str(modname))
            raise
    
    if hasattr(scenario_tree_model, "BootList"):
        bootlist = scenario_tree_model.BootList
        #print("debug in callback: using bootlist=",str(bootlist))
        # assuming bootlist itself is zero based
        exp_num = bootlist[scen_num]
    else:
        exp_num = scen_num

    scen_name = basename + str(exp_num)

    cb_data = scenario_tree_model.cb_data # cb_data might be None.

    # at least three signatures are supported. The first is preferred
    try:
        instance = callback(experiment_number = exp_num, cb_data = cb_data)
    except TypeError:
        try:
            instance = callback(scenario_tree_model, scen_name, node_names)
        except TypeError:  # deprecated signature?
            try:
                instance = callback(scen_name, node_names)
            except:
                print("Failed to create instance using callback; TypeError+")
                raise
        except:
            print("Failed to create instance using callback.")
            raise

    if hasattr(scenario_tree_model, "ThetaVals"):
        thetavals = scenario_tree_model.ThetaVals

        # dlw august 2018: see mea code for more general theta
        for vstr in thetavals:
            object = _object_from_string(instance, vstr)
            if thetavals[vstr] is not None:
                #print("Fixing",vstr,"at",str(thetavals[vstr]))
                object.fix(thetavals[vstr])
            else:
                #print("Freeing",vstr)
                object.fixed = False

    return instance

#=============================================
def _treemaker(scenlist):
    """
    Makes a scenario tree (avoids dependence on daps)
    
    Parameters
    ---------- 
    scenlist (list of `int`): experiment (i.e. scenario) numbers

    Returns
    -------
    a `ConcreteModel` that is the scenario tree
    """

    num_scenarios = len(scenlist)
    m = CreateAbstractScenarioTreeModel().create_instance()
    m.Stages.add('Stage1')
    m.Stages.add('Stage2')
    m.Nodes.add('RootNode')
    for i in scenlist:
        m.Nodes.add('LeafNode_Experiment'+str(i))
        m.Scenarios.add('Experiment'+str(i))
    m.NodeStage['RootNode'] = 'Stage1'
    m.ConditionalProbability['RootNode'] = 1.0
    for node in m.Nodes:
        if node != 'RootNode':
            m.NodeStage[node] = 'Stage2'
            m.Children['RootNode'].add(node)
            m.Children[node].clear()
            m.ConditionalProbability[node] = 1.0/num_scenarios
            m.ScenarioLeafNode[node.replace('LeafNode_','')] = node

    return m

    
def group_data(data, groupby_column_name, use_mean=None):
    """
    Group data by scenario
    
    Parameters
    ----------
    data: DataFrame
        Data
    groupby_column_name: strings
        Name of data column which contains scenario numbers
    use_mean: list of column names or None, optional
        Name of data columns which should be reduced to a single value per 
        scenario by taking the mean
        
    Returns
    ----------
    grouped_data: list of dictionaries
        Grouped data
    """
    grouped_data = []
    for exp_num, group in data.groupby(data[groupby_column_name]):
        d = {}
        for col in group.columns:
            if col in use_mean:
                d[col] = group[col].mean()
            else:
                d[col] = list(group[col])
        grouped_data.append(d)

    return grouped_data


class _SecondStateCostExpr(object):
    """
    Class to pass objective expression into the Pyomo model
    """
    def __init__(self, ssc_function, data):
        self._ssc_function = ssc_function
        self._data = data
    def __call__(self, model):
        return self._ssc_function(model, self._data)


class Estimator(object):
    """
    Parameter estimation class

    Parameters
    ----------
    model_function: function
        Function that generates an instance of the Pyomo model using 'data' 
        as the input argument
    data: pandas DataFrame, list of dictionaries, or list of json file names
        Data that is used to build an instance of the Pyomo model and build 
        the objective function
    theta_names: list of strings
        List of Var names to estimate
    obj_function: function, optional
        Function used to formulate parameter estimation objective, generally
        sum of squared error between measurements and model variables.  
        If no function is specified, the model is used 
        "as is" and should be defined with a "FirstStateCost" and 
        "SecondStageCost" expression that are used to build an objective 
        for pysp.
    tee: bool, optional
        Indicates that ef solver output should be teed
    diagnostic_mode: bool, optional
        If True, print diagnostics from the solver
    solver_options: dict, optional
        Provides options to the solver (also the name of an attribute)
    """
    def __init__(self, model_function, data, theta_names, obj_function=None, 
                 tee=False, diagnostic_mode=False, solver_options=None):
        
        self.model_function = model_function
        self.callback_data = data

        if len(theta_names) == 0:
            self.theta_names = ['parmest_dummy_var']
        else:
            self.theta_names = theta_names 
            
        self.obj_function = obj_function 
        self.tee = tee
        self.diagnostic_mode = diagnostic_mode
        self.solver_options = solver_options
        
        self._second_stage_cost_exp = "SecondStageCost"
        self._numbers_list = list(range(len(data)))


    def _create_parmest_model(self, data):
        """
        Modify the Pyomo model for parameter estimation
        """
        from pyomo.core import Objective
        
        model = self.model_function(data)
        
        if (len(self.theta_names) == 1) and (self.theta_names[0] == 'parmest_dummy_var'):
            model.parmest_dummy_var = pyo.Var(initialize = 1.0)
            
        for i, theta in enumerate(self.theta_names):
            # First, leverage the parser in ComponentUID to locate the
            # component.  If that fails, fall back on the original
            # (insecure) use of 'eval'
            var_cuid = ComponentUID(theta)
            var_validate = var_cuid.find_component_on(model)
            if var_validate is None:
                logger.warning(
                    "theta_name[%s] (%s) was not found on the model",
                    (i, theta))
            else:
                try:
                    # If the component that was found is not a variable,
                    # this will generate an exception (and the warning
                    # in the 'except')
                    var_validate.fixed = False
                    # We want to standardize on the CUID string
                    # representation (which is what PySP will use
                    # internally)
                    self.theta_names[i] = repr(var_cuid)
                except:
                    logger.warning(theta + ' is not a variable')
        
        if self.obj_function:
            for obj in model.component_objects(Objective):
                obj.deactivate()
        
            def FirstStageCost_rule(model):
                return 0
            model.FirstStageCost = pyo.Expression(rule=FirstStageCost_rule)
            model.SecondStageCost = pyo.Expression(rule=_SecondStateCostExpr(self.obj_function, data))
            
            def TotalCost_rule(model):
                return model.FirstStageCost + model.SecondStageCost
            model.Total_Cost_Objective = pyo.Objective(rule=TotalCost_rule, sense=pyo.minimize)
        
        self.parmest_model = model
        
        return model
    
    
    def _instance_creation_callback(self, experiment_number=None, cb_data=None):
        
        # DataFrame
        if isinstance(cb_data, pd.DataFrame):
            # Keep single experiments in a Dataframe (not a Series)
            exp_data = cb_data.loc[experiment_number,:].to_frame().transpose() 
        
        # List of dictionaries OR list of json file names
        elif isinstance(cb_data, list):
            exp_data = cb_data[experiment_number]
            if isinstance(exp_data, dict):
                pass
            if isinstance(exp_data, str):
                try:
                    with open(exp_data,'r') as infile:
                        exp_data = json.load(infile)
                except:
                    print('Unexpected data format')
                    return
        else:
            print('Unexpected data format')
            return
        model = self._create_parmest_model(exp_data)
        
        return model
    

    def _Q_opt(self, ThetaVals=None, solver="ef_ipopt",
               return_values=[], bootlist=None, calc_cov=False):
        """
        Set up all thetas as first stage Vars, return resulting theta
        values as well as the objective function value.

        NOTE: If thetavals is present it will be attached to the
        scenario tree so it can be used by the scenario creation
        callback.  Side note (feb 2018, dlw): if you later decide to
        construct the tree just once and reuse it, then remember to
        remove thetavals from it when none is desired.
        """
        
        assert(solver != "k_aug" or ThetaVals == None)
        # Create a tree with dummy scenarios (callback will supply when needed).
        # Which names to use (i.e., numbers) depends on if it is for bootstrap.
        # (Bootstrap scenarios will use indirection through the bootlist)
        if bootlist is None:
            tree_model = _treemaker(self._numbers_list)
        else:
            tree_model = _treemaker(range(len(self._numbers_list)))
        stage1 = tree_model.Stages[1]
        stage2 = tree_model.Stages[2]
        tree_model.StageVariables[stage1] = self.theta_names
        tree_model.StageVariables[stage2] = []
        tree_model.StageCost[stage1] = "FirstStageCost"
        tree_model.StageCost[stage2] = "SecondStageCost"

        # Now attach things to the tree_model to pass them to the callback
        tree_model.CallbackModule = None
        tree_model.CallbackFunction = self._instance_creation_callback
        if ThetaVals is not None:
            tree_model.ThetaVals = ThetaVals
        if bootlist is not None:
            tree_model.BootList = bootlist
        tree_model.cb_data = self.callback_data  # None is OK

        try:
            # For structured models, it is important that we use the
            # updated version of the CUID representation.  PySP (for
            # backwards compatibility reasons, and so a ton of tests
            # don't have to be updated) still defaults to the old
            # representation.
            _cuidver = tree_structure.CUID_repr_version
            tree_structure.CUID_repr_version = 2
            stsolver = st.StochSolver(
                fsfile = "pyomo.contrib.parmest.parmest",
                fsfct = "_pysp_instance_creation_callback",
                tree_model = tree_model
            )
        finally:
            tree_structure.CUID_repr_version = _cuidver
                
        # Solve the extensive form with ipopt
        if solver == "ef_ipopt":
        
            # Generate the extensive form of the stochastic program using pysp
            self.ef_instance = stsolver.make_ef()

            # need_gap is a holdover from solve_ef in rapper.py. Would we ever want
            # need_gap = True with parmest?
            need_gap = False
            
            assert not (need_gap and self.calc_cov), "Calculating both the gap and reduced hessian (covariance) is not currently supported."

            if not calc_cov:
                # Do not calculate the reduced hessian

                solver = SolverFactory('ipopt')
                if self.solver_options is not None:
                    for key in self.solver_options:
                        solver.options[key] = self.solver_options[key]

                if need_gap:
                    solve_result = solver.solve(self.ef_instance, tee = self.tee, load_solutions=False)
                    if len(solve_result.solution) > 0:
                        absgap = solve_result.solution(0).gap
                    else:
                        absgap = None
                    self.ef_instance.solutions.load_from(solve_result)
                else:
                    solve_result = solver.solve(self.ef_instance, tee = self.tee)

            # The import error will be raised when we attempt to use
            # inv_reduced_hessian_barrier below.
            #
            #elif not asl_available:
            #    raise ImportError("parmest requires ASL to calculate the "
            #                      "covariance matrix with solver 'ipopt'")
            else:
                # parmest makes the fitted parameters stage 1 variables
                # thus we need to convert from var names (string) to 
                # Pyomo vars
                ind_vars = []
                for v in self.theta_names:

                    #ind_vars.append(eval('ef.'+v))
                    ind_vars.append(self.ef_instance.MASTER_BLEND_VAR_RootNode[v])
        
                # calculate the reduced hessian
                solve_result, inv_red_hes = \
                    inverse_reduced_hessian.inv_reduced_hessian_barrier(
                        self.ef_instance,
                        independent_variables= ind_vars,
                        solver_options=self.solver_options,
                        tee=self.tee)
            
            # Extract solution from pysp
            stsolver.scenario_tree.pullScenarioSolutionsFromInstances()
            stsolver.scenario_tree.snapshotSolutionFromScenarios() # update nodes
                                
            if self.diagnostic_mode:
                print('    Solver termination condition = ',
                       str(solve_result.solver.termination_condition))

            # assume all first stage are thetas...
            thetavals = {}
            for name, solval in stsolver.root_Var_solution():
                 thetavals[name] = solval

            objval = stsolver.root_E_obj()
            
            if calc_cov:
                # Calculate the covariance matrix
                
                # Extract number of data points considered
                n = len(self.callback_data)
                
                # Extract number of fitted parameters
                l = len(thetavals)
                
                # Assumption: Objective value is sum of squared errors
                sse = objval
                
                '''Calculate covariance assuming experimental observation errors are
                independent and follow a Gaussian 
                distribution with constant variance.
                
                The formula used in parmest was verified against equations (7-5-15) and
                (7-5-16) in "Nonlinear Parameter Estimation", Y. Bard, 1974.
                
                This formula is also applicable if the objective is scaled by a constant;
                the constant cancels out. (PySP scaled by 1/n because it computes an
                expected value.)
                '''
                cov = 2 * sse / (n - l) * inv_red_hes
            
            if len(return_values) > 0:
                var_values = []
                for exp_i in self.ef_instance.component_objects(Block, descend_into=False):
                    vals = {}
                    for var in return_values:
                        exp_i_var = exp_i.find_component(str(var))
                        temp = [pyo.value(_) for _ in exp_i_var.itervalues()]
                        if len(temp) == 1:
                            vals[var] = temp[0]
                        else:
                            vals[var] = temp                    
                    var_values.append(vals)                    
                var_values = pd.DataFrame(var_values)
                if calc_cov:
                    return objval, thetavals, var_values, cov
                else:
                    return objval, thetavals, var_values

            if calc_cov:
                return objval, thetavals, cov
            else:
                return objval, thetavals
        
        # Solve with sipopt and k_aug
        elif solver == "k_aug":
            # Just hope for the best with respect to degrees of freedom.

            model = stsolver.make_ef()
            stream_solver = True
            ipopt = SolverFactory('ipopt')
            sipopt = SolverFactory('ipopt_sens')
            kaug = SolverFactory('k_aug')

            #: ipopt suffixes  REQUIRED FOR K_AUG!
            model.dual = pyo.Suffix(direction=pyo.Suffix.IMPORT_EXPORT)
            model.ipopt_zL_out = pyo.Suffix(direction=pyo.Suffix.IMPORT)
            model.ipopt_zU_out = pyo.Suffix(direction=pyo.Suffix.IMPORT)
            model.ipopt_zL_in = pyo.Suffix(direction=pyo.Suffix.EXPORT)
            model.ipopt_zU_in = pyo.Suffix(direction=pyo.Suffix.EXPORT)

            # declare the suffix to be imported by the solver
            model.red_hessian = pyo.Suffix(direction=pyo.Suffix.EXPORT)
            #: K_AUG SUFFIXES
            model.dof_v = pyo.Suffix(direction=pyo.Suffix.EXPORT) 
            model.rh_name = pyo.Suffix(direction=pyo.Suffix.IMPORT)

            for vstrindex in range(len(self.theta_names)):
                vstr = self.theta_names[vstrindex]
                varobject = _ef_ROOT_node_Object_from_string(model, vstr)
                varobject.set_suffix_value(model.red_hessian, vstrindex+1)
                varobject.set_suffix_value(model.dof_v, 1)
            
            #: rh_name will tell us which position the corresponding variable has on the reduced hessian text file.
            #: be sure to declare the suffix value (order)
            # dof_v is "degree of freedom variable"
            kaug.options["compute_inv"] = ""  #: if the reduced hessian is desired.
            #: please check the inv_.in file if the compute_inv option was used

            #: write some options for ipopt sens
            with open('ipopt.opt', 'w') as f:
                f.write('compute_red_hessian yes\n')  #: computes the reduced hessian (sens_ipopt)
                f.write('output_file my_ouput.txt\n')
                f.write('rh_eigendecomp yes\n')
                f.close()
            #: Solve
            sipopt.solve(model, tee=stream_solver)
            with open('ipopt.opt', 'w') as f:
                f.close()

            ipopt.solve(model, tee=stream_solver)

            model.ipopt_zL_in.update(model.ipopt_zL_out)
            model.ipopt_zU_in.update(model.ipopt_zU_out)

            #: k_aug
            print('k_aug \n\n\n')
            #m.write('problem.nl', format=ProblemFormat.nl)
            kaug.solve(model, tee=stream_solver)
            HessDict = {}
            thetavals = {}
            print('k_aug red_hess')
            with open('result_red_hess.txt', 'r') as f:
                lines = f.readlines()
            # asseble the return values
            objval = model.MASTER_OBJECTIVE_EXPRESSION.expr()
            for i in range(len(lines)):
                HessDict[self.theta_names[i]] = {}
                linein = lines[i]
                print(linein)
                parts = linein.split()
                for j in range(len(parts)):
                    HessDict[self.theta_names[i]][self.theta_names[j]] = \
                        float(parts[j])
                # Get theta value (there is probably a better way...)
                vstr = self.theta_names[i]
                varobject = _ef_ROOT_node_Object_from_string(model, vstr)
                thetavals[self.theta_names[i]] = pyo.value(varobject)
            return objval, thetavals, HessDict

        else:
            raise RuntimeError("Unknown solver in Q_Opt="+solver)
        

    def _Q_at_theta(self, thetavals):
        """
        Return the objective function value with fixed theta values.
        
        Parameters
        ----------
        thetavals: dict
            A dictionary of theta values.

        Returns
        -------
        objectiveval: float
            The objective function value.
        thetavals: dict
            A dictionary of all values for theta that were input.
        solvertermination: Pyomo TerminationCondition
            Tries to return the "worst" solver status across the scenarios.
            pyo.TerminationCondition.optimal is the best and 
            pyo.TerminationCondition.infeasible is the worst.
        """

        optimizer = pyo.SolverFactory('ipopt')
        dummy_tree = lambda: None # empty object (we don't need a tree)
        dummy_tree.CallbackModule = None
        dummy_tree.CallbackFunction = self._instance_creation_callback
        dummy_tree.ThetaVals = thetavals
        dummy_tree.cb_data = self.callback_data
        
        if self.diagnostic_mode:
            print('    Compute objective at theta = ',str(thetavals))

        # start block of code to deal with models with no constraints
        # (ipopt will crash or complain on such problems without special care)
        instance = _pysp_instance_creation_callback(dummy_tree, "FOO1", None)    
        try: # deal with special problems so Ipopt will not crash
            first = next(instance.component_objects(pyo.Constraint, active=True))
        except:
            sillylittle = True 
        else:
            sillylittle = False
        # end block of code to deal with models with no constraints

        WorstStatus = pyo.TerminationCondition.optimal
        totobj = 0
        for snum in self._numbers_list:
            sname = "scenario_NODE"+str(snum)
            instance = _pysp_instance_creation_callback(dummy_tree,
                                                        sname, None)
            if not sillylittle:
                if self.diagnostic_mode:
                    print('      Experiment = ',snum)
                    print('     First solve with with special diagnostics wrapper')
                    status_obj, solved, iters, time, regu \
                        = ipopt_solver_wrapper.ipopt_solve_with_stats(instance, optimizer, max_iter=500, max_cpu_time=120)
                    print("   status_obj, solved, iters, time, regularization_stat = ",
                           str(status_obj), str(solved), str(iters), str(time), str(regu))

                results = optimizer.solve(instance)
                if self.diagnostic_mode:
                    print('standard solve solver termination condition=',
                            str(results.solver.termination_condition))

                if results.solver.termination_condition \
                   != pyo.TerminationCondition.optimal :
                    # DLW: Aug2018: not distinguishing "middlish" conditions
                    if WorstStatus != pyo.TerminationCondition.infeasible:
                        WorstStatus = results.solver.termination_condition
                    
            objobject = getattr(instance, self._second_stage_cost_exp)
            objval = pyo.value(objobject)
            totobj += objval
        retval = totobj / len(self._numbers_list) # -1??

        return retval, thetavals, WorstStatus

    def _get_sample_list(self, samplesize, num_samples, replacement=True):
        
        samplelist = list()
        
        if num_samples is None:
            # This could get very large
            for i, l in enumerate(combinations(self._numbers_list, samplesize)):
                samplelist.append((i, np.sort(l)))
        else:
            for i in range(num_samples):
                attempts = 0
                unique_samples = 0 # check for duplicates in each sample
                duplicate = False # check for duplicates between samples
                while (unique_samples <= len(self.theta_names)) and (not duplicate):
                    sample = np.random.choice(self._numbers_list,
                                                samplesize,
                                                replace=replacement)
                    sample = np.sort(sample).tolist()
                    unique_samples = len(np.unique(sample))
                    if sample in samplelist:
                        duplicate = True
                    
                    attempts += 1
                    if attempts > num_samples: # arbitrary timeout limit
                        raise RuntimeError("""Internal error: timeout constructing 
                                           a sample, the dim of theta may be too 
                                           close to the samplesize""")
    
                samplelist.append((i, sample))
            
        return samplelist
    
    def theta_est(self, solver="ef_ipopt", return_values=[], bootlist=None, calc_cov=False): 
        """
        Parameter estimation using all scenarios in the data

        Parameters
        ----------
        solver: string, optional
            "ef_ipopt" or "k_aug". Default is "ef_ipopt".
        return_values: list, optional
            List of Variable names used to return values from the model
        bootlist: list, optional
            List of bootstrap sample numbers, used internally when calling theta_est_bootstrap
        calc_cov: boolean, optional
            If True, calculate and return the covariance matrix (only for "ef_ipopt" solver)
            
        Returns
        -------
        objectiveval: float
            The objective function value
        thetavals: dict
            A dictionary of all values for theta
        variable values: pd.DataFrame
            Variable values for each variable name in return_values (only for ef_ipopt)
        Hessian: dict
            A dictionary of dictionaries for the Hessian.
            The Hessian is not returned if the solver is ef_ipopt.
        cov: numpy.array
            Covariance matrix of the fitted parameters (only for ef_ipopt)
        """
        assert isinstance(solver, str)
        assert isinstance(return_values, list)
        assert isinstance(bootlist, (type(None), list))
        
        return self._Q_opt(solver=solver, return_values=return_values,
                           bootlist=bootlist, calc_cov=calc_cov)
    
    
    def theta_est_bootstrap(self, bootstrap_samples, samplesize=None, 
                            replacement=True, seed=None, return_samples=False):
        """
        Parameter estimation using bootstrap resampling of the data

        Parameters
        ----------
        bootstrap_samples: int
            Number of bootstrap samples to draw from the data
        samplesize: int or None, optional
            Size of each bootstrap sample. If samplesize=None, samplesize will be 
			set to the number of samples in the data
        replacement: bool, optional
            Sample with or without replacement
        seed: int or None, optional
            Random seed
        return_samples: bool, optional
            Return a list of sample numbers used in each bootstrap estimation
        
        Returns
        -------
        bootstrap_theta: DataFrame 
            Theta values for each sample and (if return_samples = True) 
            the sample numbers used in each estimation
        """
        assert isinstance(bootstrap_samples, int)
        assert isinstance(samplesize, (type(None), int))
        assert isinstance(replacement, bool)
        assert isinstance(seed, (type(None), int))
        assert isinstance(return_samples, bool)
        
        if samplesize is None:
            samplesize = len(self._numbers_list)  
        
        if seed is not None:
            np.random.seed(seed)
        
        global_list = self._get_sample_list(samplesize, bootstrap_samples, 
                                            replacement)

        task_mgr = mpiu.ParallelTaskManager(bootstrap_samples)
        local_list = task_mgr.global_to_local_data(global_list)

        # Reset numbers_list
        self._numbers_list =  list(range(samplesize))
        
        bootstrap_theta = list()
        for idx, sample in local_list:
            objval, thetavals = self.theta_est(bootlist=list(sample))
            thetavals['samples'] = sample
            bootstrap_theta.append(thetavals)
            
        # Reset numbers_list (back to original)
        self._numbers_list =  list(range(len(self.callback_data)))
        
        global_bootstrap_theta = task_mgr.allgather_global_data(bootstrap_theta)
        bootstrap_theta = pd.DataFrame(global_bootstrap_theta)       

        if not return_samples:
            del bootstrap_theta['samples']
            
        return bootstrap_theta
    
    
    def theta_est_leaveNout(self, lNo, lNo_samples=None, seed=None, 
                            return_samples=False):
        """
        Parameter estimation where N data points are left out of each sample

        Parameters
        ----------
        lNo: int
            Number of data points to leave out for parameter estimation
        lNo_samples: int
            Number of leave-N-out samples. If lNo_samples=None, the maximum 
            number of combinations will be used
        seed: int or None, optional
            Random seed
        return_samples: bool, optional
            Return a list of sample numbers that were left out
        
        Returns
        -------
        lNo_theta: DataFrame 
            Theta values for each sample and (if return_samples = True) 
            the sample numbers left out of each estimation
        """
        assert isinstance(lNo, int)
        assert isinstance(lNo_samples, (type(None), int))
        assert isinstance(seed, (type(None), int))
        assert isinstance(return_samples, bool)
        
        samplesize = len(self._numbers_list)-lNo

        if seed is not None:
            np.random.seed(seed)
        
        global_list = self._get_sample_list(samplesize, lNo_samples, replacement=False)
            
        task_mgr = mpiu.ParallelTaskManager(len(global_list))
        local_list = task_mgr.global_to_local_data(global_list)
        
        # Reset numbers_list
        self._numbers_list =  list(range(samplesize))
        
        lNo_theta = list()
        for idx, sample in local_list:
            objval, thetavals = self.theta_est(bootlist=list(sample))
            lNo_s = list(set(range(len(self.callback_data))) - set(sample))
            thetavals['lNo'] = np.sort(lNo_s)
            lNo_theta.append(thetavals)
        
        # Reset numbers_list (back to original)
        self._numbers_list =  list(range(len(self.callback_data)))
        
        global_bootstrap_theta = task_mgr.allgather_global_data(lNo_theta)
        lNo_theta = pd.DataFrame(global_bootstrap_theta)   
        
        if not return_samples:
            del lNo_theta['lNo']
                    
        return lNo_theta
    
    
    def leaveNout_bootstrap_test(self, lNo, lNo_samples, bootstrap_samples, 
                                     distribution, alphas, seed=None):
        """
        Leave-N-out bootstrap test to compare theta values where N data points are 
        left out to a bootstrap analysis using the remaining data, 
        results indicate if theta is within a confidence region
        determined by the bootstrap analysis

        Parameters
        ----------
        lNo: int
            Number of data points to leave out for parameter estimation
        lNo_samples: int
            Leave-N-out sample size. If lNo_samples=None, the maximum number 
            of combinations will be used
        bootstrap_samples: int:
            Bootstrap sample size
        distribution: string
            Statistical distribution used to define a confidence region,  
            options = 'MVN' for multivariate_normal, 'KDE' for gaussian_kde, 
            and 'Rect' for rectangular.
        alphas: list
            List of alpha values used to determine if theta values are inside 
            or outside the region.
        seed: int or None, optional
            Random seed
            
        Returns
        ----------
        List of tuples with one entry per lNo_sample:
            
        * The first item in each tuple is the list of N samples that are left 
          out.
        * The second item in each tuple is a DataFrame of theta estimated using 
          the N samples.
        * The third item in each tuple is a DataFrame containing results from 
          the bootstrap analysis using the remaining samples.
        
        For each DataFrame a column is added for each value of alpha which 
        indicates if the theta estimate is in (True) or out (False) of the 
        alpha region for a given distribution (based on the bootstrap results)
        """
        assert isinstance(lNo, int)
        assert isinstance(lNo_samples, (type(None), int))
        assert isinstance(bootstrap_samples, int)
        assert distribution in ['Rect', 'MVN', 'KDE']
        assert isinstance(alphas, list)
        assert isinstance(seed, (type(None), int))
        
        if seed is not None:
            np.random.seed(seed)
            
        data = self.callback_data.copy()
        
        global_list = self._get_sample_list(lNo, lNo_samples, replacement=False)
            
        results = []
        for idx, sample in global_list:
            
            # Reset callback_data and numbers_list
            self.callback_data = data.loc[sample,:] 
            self._numbers_list = self.callback_data.index
            obj, theta = self.theta_est()
            
            # Reset callback_data and numbers_list
            self.callback_data = data.drop(index=sample)
            self._numbers_list = self.callback_data.index
            bootstrap_theta = self.theta_est_bootstrap(bootstrap_samples)
            
            training, test = self.confidence_region_test(bootstrap_theta, 
                                    distribution=distribution, alphas=alphas, 
                                    test_theta_values=theta)
                
            results.append((sample, test, training))
        
        # Reset callback_data and numbers_list (back to original)
        self.callback_data = data
        self._numbers_list = self.callback_data.index
        
        return results
    
    
    def objective_at_theta(self, theta_values):
        """
        Objective value for each theta

        Parameters
        ----------
        theta_values: DataFrame, columns=theta_names
            Values of theta used to compute the objective
            
        Returns
        -------
        obj_at_theta: DataFrame
            Objective value for each theta (infeasible solutions are 
            omitted).
        """
        assert isinstance(theta_values, pd.DataFrame)
        
        # for parallel code we need to use lists and dicts in the loop
        theta_names = theta_values.columns
        all_thetas = theta_values.to_dict('records')
        task_mgr = mpiu.ParallelTaskManager(len(all_thetas))
        local_thetas = task_mgr.global_to_local_data(all_thetas)
        
        # walk over the mesh, return objective function
        all_obj = list()
        for Theta in local_thetas:
            obj, thetvals, worststatus = self._Q_at_theta(Theta)
            if worststatus != pyo.TerminationCondition.infeasible:
                 all_obj.append(list(Theta.values()) + [obj])
            # DLW, Aug2018: should we also store the worst solver status?
            
        global_all_obj = task_mgr.allgather_global_data(all_obj)
        dfcols = list(theta_names) + ['obj']
        obj_at_theta = pd.DataFrame(data=global_all_obj, columns=dfcols)
            
        return obj_at_theta
    
    
    def likelihood_ratio_test(self, obj_at_theta, obj_value, alphas, 
                              return_thresholds=False):
        """
        Likelihood ratio test to identify theta values within a confidence 
        region using the :math:`\chi^2` distribution
        
        Parameters
        ----------
        obj_at_theta: DataFrame, columns = theta_names + 'obj'
            Objective values for each theta value (returned by 
            objective_at_theta)
        obj_value: int or float
            Objective value from parameter estimation using all data
        alphas: list
            List of alpha values to use in the chi2 test
        return_thresholds: bool, optional
            Return the threshold value for each alpha
            
        Returns
        -------
        LR: DataFrame 
            Objective values for each theta value along with True or False for 
            each alpha
        thresholds: dictionary
            If return_threshold = True, the thresholds are also returned.
        """
        assert isinstance(obj_at_theta, pd.DataFrame)
        assert isinstance(obj_value, (int, float))
        assert isinstance(alphas, list)
        assert isinstance(return_thresholds, bool)
            
        LR = obj_at_theta.copy()
        S = len(self.callback_data)
        thresholds = {}
        for a in alphas:
            chi2_val = scipy.stats.chi2.ppf(a, 2)
            thresholds[a] = obj_value * ((chi2_val / (S - 2)) + 1)
            LR[a] = LR['obj'] < thresholds[a]
        
        if return_thresholds:
            return LR, thresholds
        else:
            return LR

    def confidence_region_test(self, theta_values, distribution, alphas, 
                               test_theta_values=None):
        """
        Confidence region test to determine if theta values are within a 
        rectangular, multivariate normal, or Gaussian kernel density distribution 
        for a range of alpha values
        
        Parameters
        ----------
        theta_values: DataFrame, columns = theta_names
            Theta values used to generate a confidence region 
            (generally returned by theta_est_bootstrap)
        distribution: string
            Statistical distribution used to define a confidence region,  
            options = 'MVN' for multivariate_normal, 'KDE' for gaussian_kde, 
            and 'Rect' for rectangular.
        alphas: list
            List of alpha values used to determine if theta values are inside 
            or outside the region.
        test_theta_values: dictionary or DataFrame, keys/columns = theta_names, optional
            Additional theta values that are compared to the confidence region
            to determine if they are inside or outside.
        
        Returns
        -------
        training_results: DataFrame 
            Theta value used to generate the confidence region along with True 
            (inside) or False (outside) for each alpha
        test_results: DataFrame 
            If test_theta_values is not None, returns test theta value along 
            with True (inside) or False (outside) for each alpha
        """
        assert isinstance(theta_values, pd.DataFrame)
        assert distribution in ['Rect', 'MVN', 'KDE']
        assert isinstance(alphas, list)
        assert isinstance(test_theta_values, (type(None), dict, pd.DataFrame))
        
        if isinstance(test_theta_values, dict):
            test_theta_values = pd.Series(test_theta_values).to_frame().transpose()
            
        training_results = theta_values.copy()
        
        if test_theta_values is not None:
            test_result = test_theta_values.copy()
        
        for a in alphas:
            
            if distribution == 'Rect':
                lb, ub = fit_rect_dist(theta_values, a)
                training_results[a] = ((theta_values > lb).all(axis=1) & \
                                  (theta_values < ub).all(axis=1))
                
                if test_theta_values is not None:
                    # use upper and lower bound from the training set
                    test_result[a] = ((test_theta_values > lb).all(axis=1) & \
                                  (test_theta_values < ub).all(axis=1))
                    
            elif distribution == 'MVN':
                dist = fit_mvn_dist(theta_values)
                Z = dist.pdf(theta_values)
                score = scipy.stats.scoreatpercentile(Z, (1-a)*100) 
                training_results[a] = (Z >= score)
                
                if test_theta_values is not None:
                    # use score from the training set
                    Z = dist.pdf(test_theta_values)
                    test_result[a] = (Z >= score) 
                
            elif distribution == 'KDE':
                dist = fit_kde_dist(theta_values)
                Z = dist.pdf(theta_values.transpose())
                score = scipy.stats.scoreatpercentile(Z, (1-a)*100) 
                training_results[a] = (Z >= score)
                
                if test_theta_values is not None:
                    # use score from the training set
                    Z = dist.pdf(test_theta_values.transpose())
                    test_result[a] = (Z >= score) 
                    
        if test_theta_values is not None:
            return training_results, test_result
        else:
            return training_results