import time
from time import gmtime, strftime

import boto3
import gym
import numpy as np
import requests
from gym.spaces import Box, Discrete

cloudwatch_cli = boto3.client("cloudwatch", region_name="us-west-2")


class GameServerEnv(gym.Env):
    def __init__(self, env_config={}):
        print("in __init__")
        print("env_config {}".format(env_config))
        self.namespace = env_config["cloudwatch_namespace"]
        self.gs_inventory_url = env_config["gs_inventory_url"]
        self.learning_freq = env_config["learning_freq"]
        self.min_servers = int(env_config["min_servers"])
        self.max_servers = int(env_config["max_servers"])
        self.action_factor = int(env_config["action_factor"])
        self.over_prov_factor = int(env_config["over_prov_factor"])
        self.num_steps = 0
        self.max_num_steps = 301
        self.history_len = 5
        self.total_num_of_obs = 1
        # we have two observation array, allocation and demand. allocation is alloc_observation, demand is observation hence *2
        self.observation_space = Box(
            low=np.array([self.min_servers] * self.history_len * 2),
            high=np.array([self.max_servers] * self.history_len * 2),
            dtype=np.uint32,
        )

        # How many servers should the agent spin up at each time step
        self.action_space = Box(low=np.array([0]), high=np.array([1]), dtype=np.float32)

    def reset(self):
        print("in reset")
        # self.populate_cloudwatch_metric(self.namespace,1,'reset')
        self.num_steps = 0
        self.current_min = 0
        self.demand_observation = np.array([self.min_servers] * self.history_len)
        self.alloc_observation = np.array([self.min_servers] * self.history_len)

        print("self.demand_observation " + str(self.demand_observation))
        print("self.alloc_observation " + str(self.alloc_observation))
        return np.concatenate((self.demand_observation, self.alloc_observation))

    def step(self, action):
        print("in step - action recieved from model" + str(action))
        self.num_steps += 1
        self.total_num_of_obs += 1
        print("total_num_of_obs={}".format(self.total_num_of_obs))

        raw_action = float(action)
        self.curr_action = raw_action * self.action_factor
        self.curr_action = np.clip(self.curr_action, self.min_servers, self.max_servers)
        print("self.curr_action={}".format(self.curr_action))

        if self.gs_inventory_url != "local":
            # get the demand from the matchmaking service
            print("quering matchmaking service for current demand, curr_demand")
            try:
                gs_url = self.gs_inventory_url
                req = requests.get(url=gs_url)
                data = req.json()
                self.curr_demand = float(data["Prediction"]["num_of_gameservers"])

            except requests.exceptions.RequestException as e:
                print(e)
                print(
                    "if matchmaking did not respond just randomized curr_demand between limit, reward will correct"
                )
                self.curr_demand = float(np.random.randint(self.min_servers, self.max_servers))
        if self.gs_inventory_url == "local":
            print("local matchmaking service for current demand, curr_demand")
            data = self.get_curr_sine1h()
            self.curr_demand = float(data["Prediction"]["num_of_gameservers"])
        # clip the demand to the allowed range
        self.curr_demand = np.clip(self.curr_demand, self.min_servers, self.max_servers)
        print("self.curr_demand={}".format(self.curr_demand))

        # time-horizon - use the oldest observation for current allocation
        self.curr_alloc = self.alloc_observation[0]
        print("self.curr_alloc={}".format(self.curr_alloc))

        # Assumes it takes history_len time steps to create or delete
        # the game server from allocation
        # self.action_observation = self.action_observation[1:]
        # self.action_observation = np.append(self.action_observation, self.curr_action)
        # print('self.action_observation={}'.format(self.action_observation))

        # store the current demand in the history array demand_observation
        self.demand_observation = self.demand_observation[
            1:
        ]  # shift the observation by one to remove one history point
        self.demand_observation = np.append(self.demand_observation, self.curr_demand)
        print("self.demand_observation={}".format(self.demand_observation))

        # store the current allocation in the history array alloc_observation
        self.alloc_observation = self.alloc_observation[1:]
        self.alloc_observation = np.append(self.alloc_observation, self.curr_action)
        print("self.alloc_observation={}".format(self.alloc_observation))

        # reward calculation - in case of over provision just 1-ratio. under provision is more severe so 500% more negative reward
        print(
            "calculate the reward, calculate the ratio between allocation and demand, we use the first allocation in the series of history of five, first_alloc/curr_demand"
        )
        print(
            "history of previous predictions made by the model ={}".format(self.alloc_observation)
        )

        ratio = self.curr_alloc / self.curr_demand
        print("ratio={}".format(ratio))
        if ratio > 1:
            # reward=1-ratio
            reward = -1 * (self.curr_alloc - self.curr_demand)
            print("reward over provision - ratio>1 - {}".format(reward))
        if ratio < 1:
            # reward=-50*ratio
            reward = -5 * (self.curr_demand - self.curr_alloc)
            print("reward under provision - ratio<1 - {}".format(reward))
        if ratio == 1:
            reward = 1
            print("ratio=1")
        reward -= (self.curr_demand - self.curr_alloc) * self.over_prov_factor
        print("ratio={}".format(ratio))
        print("reward={}".format(reward))

        # Instrumnet the supply and demand in cloudwatch
        print("populating cloudwatch - self.curr_demand={}".format(self.curr_demand))
        self.populate_cloudwatch_metric(self.namespace, self.curr_demand, "curr_demand")
        print("populating cloudwatch - self.curr_alloc={}".format(self.curr_action))
        self.populate_cloudwatch_metric(self.namespace, self.curr_action, "curr_alloc")
        print("populating cloudwatch - reward={}".format(reward))
        self.populate_cloudwatch_metric(self.namespace, reward, "reward")

        if self.num_steps >= self.max_num_steps:
            done = True
            print("self.num_steps " + str(self.num_steps))
            print("self.max_num_steps " + str(self.max_num_steps))
        else:
            done = False

        print("time.sleep() for {} before next iteration".format(self.learning_freq))
        time.sleep(int(self.learning_freq))

        extra_info = {}
        # the next state includes the demand and allocation history.
        next_state = np.concatenate((self.demand_observation, self.alloc_observation))
        print("next_state={}".format(next_state))
        return next_state, reward, done, extra_info

    def render(self, mode):
        print("in render")
        pass

    def populate_cloudwatch_metric(self, namespace, metric_value, metric_name):
        print(
            "in populate_cloudwatch_metric metric_value="
            + str(metric_value)
            + " metric_name="
            + metric_name
        )
        response = cloudwatch_cli.put_metric_data(
            Namespace=namespace,
            MetricData=[
                {
                    "MetricName": metric_name,
                    "Unit": "None",
                    "Value": metric_value,
                },
            ],
        )
        print("response from cloud watch" + str(response))

    def get_curr_sine1h(self):
        max_servers = self.max_servers * 0.9
        print("in get_curr_sine1h")
        cycle_arr = np.linspace(0.2, 3.1, 61)
        self.current_min = (self.current_min + 1) % 60
        current_min = self.current_min
        print("current_min={}".format(current_min))
        current_point = cycle_arr[int(current_min)]
        sine = max_servers * np.sin(current_point)
        print("sine({})={}".format(current_point, sine))
        return {"Prediction": {"num_of_gameservers": sine}}