# Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance
# with the License. A copy of the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and
# limitations under the License.
from enum import Enum
from typing import Dict, List

import pytest

from pcluster.aws.aws_resources import InstanceTypeInfo
from pcluster.config.cluster_config import AllocationStrategy, CapacityType
from pcluster.validators.instances_validators import (
    InstancesAcceleratorsValidator,
    InstancesAllocationStrategyValidator,
    InstancesCPUValidator,
    InstancesEFAValidator,
    InstancesMemorySchedulingWarningValidator,
    InstancesNetworkingValidator,
)
from tests.pcluster.validators.utils import assert_failure_messages


@pytest.mark.parametrize(
    "compute_resource_name, instance_types_info, disable_simultaneous_multithreading, expected_message",
    [
        # Instance Types should have the same number of CPUs
        (
            "TestComputeResource",
            {
                "t2.micro": InstanceTypeInfo(
                    {"InstanceType": "t2.micro", "VCpuInfo": {"DefaultVCpus": 4, "DefaultCores": 2}}
                ),
                "t3.micro": InstanceTypeInfo(
                    {"InstanceType": "t3.micro", "VCpuInfo": {"DefaultVCpus": 5, "DefaultCores": 2}}
                ),
            },
            False,
            "Instance types listed under Compute Resource TestComputeResource must have the same number of vCPUs "
            "({'t2.micro': 4, 't3.micro': 5}).",
        ),
        (
            "TestComputeResource",
            {
                "t2.micro": InstanceTypeInfo(
                    {"InstanceType": "t2.micro", "VCpuInfo": {"DefaultVCpus": 4, "DefaultCores": 2}}
                ),
                "t3.micro": InstanceTypeInfo(
                    {"InstanceType": "t3.micro", "VCpuInfo": {"DefaultVCpus": 4, "DefaultCores": 2}}
                ),
            },
            False,
            "",
        ),
        # InstanceTypes should have the same number of cores if simultaneous multithreading is disabled
        (
            "TestComputeResource",
            {
                "t2.micro": InstanceTypeInfo(
                    {"InstanceType": "t2.micro", "VCpuInfo": {"DefaultVCpus": 4, "DefaultCores": 1}}
                ),
                "t3.micro": InstanceTypeInfo(
                    {"InstanceType": "t3.micro", "VCpuInfo": {"DefaultVCpus": 4, "DefaultCores": 2}}
                ),
            },
            True,
            "Instance types listed under Compute Resource TestComputeResource must have the same number of CPU "
            "cores when Simultaneous Multithreading is disabled ({'t2.micro': 1, 't3.micro': 2}).",
        ),
        (
            "TestComputeResource",
            {
                "t2.micro": InstanceTypeInfo({"VCpuInfo": {"DefaultVCpus": 4, "DefaultCores": 1}}),
                "t3.micro": InstanceTypeInfo({"VCpuInfo": {"DefaultVCpus": 4, "DefaultCores": 2}}),
            },
            False,
            "",
        ),
    ],
)
def test_instances_cpu_validator(
    compute_resource_name,
    instance_types_info,
    disable_simultaneous_multithreading,
    expected_message,
):
    actual_failures = InstancesCPUValidator().execute(
        compute_resource_name,
        instance_types_info,
        disable_simultaneous_multithreading,
    )
    assert_failure_messages(actual_failures, expected_message)


@pytest.mark.parametrize(
    "compute_resource_name, instance_types_info, expected_message",
    [
        # Instance Types should have the same number of GPUs
        (
            "TestComputeResource",
            {
                "g4dn.xlarge": InstanceTypeInfo(
                    {
                        "InstanceType": "g4dn.xlarge",
                        "GpuInfo": {
                            "Gpus": [
                                {"Name": "T4", "Manufacturer": "NVIDIA", "Count": 1, "MemoryInfo": {"SizeInMiB": 16384}}
                            ],
                            "TotalGpuMemoryInMiB": 16384,
                        },
                    }
                ),
                "g5.xlarge": InstanceTypeInfo(
                    {
                        "InstanceType": "g5.xlarge",
                        "GpuInfo": {
                            "Gpus": [
                                {
                                    "Name": "A10G",
                                    "Manufacturer": "NVIDIA",
                                    "Count": 2,
                                    "MemoryInfo": {"SizeInMiB": 24576},
                                }
                            ],
                            "TotalGpuMemoryInMiB": 24576,
                        },
                    }
                ),
            },
            "Instance types listed under Compute Resource TestComputeResource must have the same number of GPUs ({"
            "'g4dn.xlarge': 1, 'g5.xlarge': 2}).",
        ),
        (
            "TestComputeResource",
            {
                "g4dn.xlarge": InstanceTypeInfo(
                    {
                        "GpuInfo": {
                            "Gpus": [
                                {"Name": "T4", "Manufacturer": "NVIDIA", "Count": 2, "MemoryInfo": {"SizeInMiB": 16384}}
                            ],
                            "TotalGpuMemoryInMiB": 16384,
                        },
                    }
                ),
                "g5.xlarge": InstanceTypeInfo(
                    {
                        "GpuInfo": {
                            "Gpus": [
                                {
                                    "Name": "A10G",
                                    "Manufacturer": "NVIDIA",
                                    "Count": 2,
                                    "MemoryInfo": {"SizeInMiB": 24576},
                                }
                            ],
                            "TotalGpuMemoryInMiB": 24576,
                        },
                    }
                ),
            },
            "",
        ),
        # Instance Types should have the same number of Accelerators
        (
            "TestComputeResource",
            {
                "inf1.6xlarge": InstanceTypeInfo(
                    {
                        "InstanceType": "inf1.6xlarge",
                        "InferenceAcceleratorInfo": {
                            "Accelerators": [{"Count": 4, "Name": "Inferentia", "Manufacturer": "AWS"}]
                        },
                    }
                ),
                "inf1.2xlarge": InstanceTypeInfo(
                    {
                        "InstanceType": "inf1.2xlarge",
                        "InferenceAcceleratorInfo": {
                            "Accelerators": [{"Count": 1, "Name": "Inferentia", "Manufacturer": "AWS"}]
                        },
                    }
                ),
            },
            "Instance types listed under Compute Resource TestComputeResource must have the same number of Inference "
            "Accelerators ({'inf1.6xlarge': 4, 'inf1.2xlarge': 1}).",
        ),
        (
            "TestComputeResource",
            {
                "inf1.6xlarge": InstanceTypeInfo(
                    {
                        "InferenceAcceleratorInfo": {
                            "Accelerators": [{"Count": 4, "Name": "Inferentia", "Manufacturer": "AWS"}]
                        },
                    }
                ),
                "inf1.2xlarge": InstanceTypeInfo(
                    {
                        "InferenceAcceleratorInfo": {
                            "Accelerators": [{"Count": 4, "Name": "Inferentia", "Manufacturer": "AWS"}]
                        },
                    }
                ),
            },
            "",
        ),
        # Instance Types should have the same GPU manufacturer
        (
            "TestComputeResource",
            {
                "g4dn.xlarge": InstanceTypeInfo(
                    {
                        "InstanceType": "g4dn.xlarge",
                        "GpuInfo": {
                            "Gpus": [
                                {"Name": "T4", "Manufacturer": "NVIDIA", "Count": 2, "MemoryInfo": {"SizeInMiB": 16384}}
                            ],
                            "TotalGpuMemoryInMiB": 16384,
                        },
                    }
                ),
                "g5.xlarge": InstanceTypeInfo(
                    {
                        "InstanceType": "g5.xlarge",
                        "GpuInfo": {
                            "Gpus": [
                                {
                                    "Name": "A10G",
                                    "Manufacturer": "OtherGPUManufacturers",
                                    "Count": 2,
                                    "MemoryInfo": {"SizeInMiB": 24576},
                                }
                            ],
                            "TotalGpuMemoryInMiB": 24576,
                        },
                    }
                ),
            },
            "Instance types listed under Compute Resource TestComputeResource must have the same GPU manufacturer ({"
            "'g4dn.xlarge': 'NVIDIA', 'g5.xlarge': 'OtherGPUManufacturers'}).",
        ),
        (
            "TestComputeResource",
            {
                "g4dn.xlarge": InstanceTypeInfo(
                    {
                        "GpuInfo": {
                            "Gpus": [
                                {"Name": "T4", "Manufacturer": "NVIDIA", "Count": 2, "MemoryInfo": {"SizeInMiB": 16384}}
                            ],
                            "TotalGpuMemoryInMiB": 16384,
                        },
                    }
                ),
                "g5.xlarge": InstanceTypeInfo(
                    {
                        "GpuInfo": {
                            "Gpus": [
                                {
                                    "Name": "A10G",
                                    "Manufacturer": "NVIDIA",
                                    "Count": 2,
                                    "MemoryInfo": {"SizeInMiB": 24576},
                                }
                            ],
                            "TotalGpuMemoryInMiB": 24576,
                        },
                    }
                ),
            },
            "",
        ),
        # Instance Types should have the same Accelerator Manufacturer (Inferentia)
        (
            "TestComputeResource",
            {
                "inf1.6xlarge": InstanceTypeInfo(
                    {
                        "InstanceType": "inf1.6xlarge",
                        "InferenceAcceleratorInfo": {
                            "Accelerators": [{"Count": 4, "Name": "Inferentia", "Manufacturer": "AWS"}]
                        },
                    }
                ),
                "inf1.2xlarge": InstanceTypeInfo(
                    {
                        "InstanceType": "inf1.2xlarge",
                        "InferenceAcceleratorInfo": {
                            "Accelerators": [{"Count": 4, "Name": "Inferentia", "Manufacturer": "NotAWS"}]
                        },
                    }
                ),
            },
            "Instance types listed under Compute Resource TestComputeResource must have the same inference "
            "accelerator manufacturer ({'inf1.6xlarge': 'AWS', 'inf1.2xlarge': 'NotAWS'}).",
        ),
        (
            "TestComputeResource",
            {
                "inf1.6xlarge": InstanceTypeInfo(
                    {
                        "InferenceAcceleratorInfo": {
                            "Accelerators": [{"Count": 4, "Name": "Inferentia", "Manufacturer": "AWS"}]
                        },
                    }
                ),
                "inf1.2xlarge": InstanceTypeInfo(
                    {
                        "InferenceAcceleratorInfo": {
                            "Accelerators": [{"Count": 4, "Name": "Inferentia", "Manufacturer": "AWS"}]
                        },
                    }
                ),
            },
            "",
        ),
    ],
)
def test_instances_accelerators_validator(compute_resource_name, instance_types_info, expected_message):
    actual_failures = InstancesAcceleratorsValidator().execute(
        compute_resource_name,
        instance_types_info,
    )
    assert_failure_messages(actual_failures, expected_message)


@pytest.mark.parametrize(
    "compute_resource_name, instance_types_info, efa_enabled, multiaz_queue, expected_message",
    [
        # Instance Types should have the same EFA support status if EFA is enabled
        (
            "TestComputeResource",
            {
                "t2.micro": InstanceTypeInfo({"NetworkInfo": {"EfaSupported": False}}),
                "t3.micro": InstanceTypeInfo({"NetworkInfo": {"EfaSupported": False}}),
                "c5n.18xlarge": InstanceTypeInfo({"NetworkInfo": {"EfaSupported": True}}),
            },
            True,
            False,
            "Instance type(s) (t2.micro,t3.micro) do not support EFA and cannot be launched when EFA is enabled in "
            "Compute Resource: TestComputeResource.",
        ),
        (
            "TestComputeResource",
            {
                "c5n.9xlarge": InstanceTypeInfo({"NetworkInfo": {"EfaSupported": True}}),
                "c5n.18xlarge": InstanceTypeInfo({"NetworkInfo": {"EfaSupported": True}}),
            },
            True,
            False,
            "",
        ),
        # If EFA is NOT enabled and one or more instance types supports EFA, a WARNING message should be printed
        (
            "TestComputeResource",
            {
                "t2.micro": InstanceTypeInfo({"NetworkInfo": {"EfaSupported": False}}),
                "t3.micro": InstanceTypeInfo({"NetworkInfo": {"EfaSupported": False}}),
                "c5n.18xlarge": InstanceTypeInfo({"NetworkInfo": {"EfaSupported": True}}),
            },
            False,
            False,
            "The EC2 instance type(s) selected (c5n.18xlarge) for the Compute Resource TestComputeResource support "
            "enhanced networking capabilities using Elastic Fabric Adapter (EFA). EFA enables you to run applications "
            "requiring high levels of inter-node communications at scale on AWS at no additional charge. You can "
            "update the cluster's configuration to enable EFA ("
            "https://docs.aws.amazon.com/parallelcluster/latest/ug/efa-v3.html).",
        ),
        # If EFA is NOT enabled and one or more instance types supports EFA, but MultiAZ is defined in the queue
        # no WARNING message should be printed
        (
            "TestComputeResource",
            {
                "t2.micro": InstanceTypeInfo({"NetworkInfo": {"EfaSupported": False}}),
                "t3.micro": InstanceTypeInfo({"NetworkInfo": {"EfaSupported": False}}),
                "c5n.18xlarge": InstanceTypeInfo({"NetworkInfo": {"EfaSupported": True}}),
            },
            False,
            True,
            "",
        ),
        # If EFA is enabled and NONE of the instance types supports EFA, an ERROR message should be printed
        (
            "TestComputeResource",
            {
                "t3.micro": InstanceTypeInfo({"NetworkInfo": {"EfaSupported": False}}),
                "t2.micro": InstanceTypeInfo({"NetworkInfo": {"EfaSupported": False}}),
            },
            True,
            False,
            "Instance type(s) (t2.micro,t3.micro) do not support EFA and cannot be launched when EFA is enabled in "
            "Compute Resource: TestComputeResource.",
        ),
    ],
)
def test_instances_efa_validator(
    compute_resource_name,
    instance_types_info,
    efa_enabled,
    multiaz_queue,
    expected_message,
):
    actual_failures = InstancesEFAValidator().execute(
        compute_resource_name, instance_types_info, efa_enabled, multiaz_queue
    )
    assert_failure_messages(actual_failures, expected_message)


@pytest.mark.parametrize(
    "queue_name, compute_resource_name, instance_types_info, placement_group_enabled, expected_message",
    [
        # Instance Types with varying Maximum NICs will have the smallest one used when setting the launch template
        (
            "TestQueue10",
            "TestComputeResource",
            {
                "t2.micro": InstanceTypeInfo({"NetworkInfo": {"MaximumNetworkCards": 4}}),
                "t3.micro": InstanceTypeInfo({"NetworkInfo": {"MaximumNetworkCards": 2}}),
            },
            False,
            "Compute Resource TestComputeResource has instance types with varying numbers of network cards (Min: 2, "
            "Max: 4). Compute Resource will be created with 2 network cards.",
        ),
        (
            "TestQueue10",
            "TestComputeResource",
            {
                "t2.micro": InstanceTypeInfo({"NetworkInfo": {"MaximumNetworkCards": 4}}),
                "t3.micro": InstanceTypeInfo({"NetworkInfo": {"MaximumNetworkCards": 4}}),
            },
            False,
            "",
        ),
        # Using a placement group while having compute resources with multiple instance types increases the chances of
        # getting an Insufficient Capacity Error
        (
            "TestQueue11",
            "TestComputeResource",
            {
                "t2.micro": InstanceTypeInfo({"NetworkInfo": {"MaximumNetworkCards": 4}}),
                "t3.micro": InstanceTypeInfo({"NetworkInfo": {"MaximumNetworkCards": 4}}),
            },
            True,
            "Enabling placement groups for queue: TestQueue11 may result in Insufficient Capacity Errors due to the "
            "use of multiple instance types for Compute Resource: TestComputeResource ("
            "https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/placement-groups.html#placement-groups-cluster).",
        ),
        (
            "TestQueue11",
            "TestComputeResource",
            {
                "t2.micro": InstanceTypeInfo({"NetworkInfo": {"MaximumNetworkCards": 4}}),
            },
            True,
            "",
        ),
    ],
)
def test_instances_networking_validator(
    queue_name: str,
    compute_resource_name: str,
    instance_types_info: Dict[str, InstanceTypeInfo],
    placement_group_enabled: bool,
    expected_message: str,
):
    actual_failures = InstancesNetworkingValidator().execute(
        queue_name, compute_resource_name, instance_types_info, placement_group_enabled
    )
    assert_failure_messages(actual_failures, expected_message)


@pytest.mark.parametrize(
    "compute_resource_name, capacity_type, allocation_strategy, expected_message",
    [
        # OnDemand Capacity type only supports "lowest-price" allocation strategy
        # Spot Capacity type supports both "lowest-price" and "capacity-optimized" allocation strategy
        (
            "TestComputeResource",
            CapacityType.ONDEMAND,
            AllocationStrategy.CAPACITY_OPTIMIZED,
            "Compute Resource TestComputeResource is using an OnDemand CapacityType but the Allocation Strategy "
            "specified is capacity-optimized. OnDemand CapacityType can only use 'lowest-price' allocation strategy.",
        ),
        ("TestComputeResource", CapacityType.ONDEMAND, AllocationStrategy.LOWEST_PRICE, ""),
    ],
)
def test_instances_allocation_strategy_validator(
    compute_resource_name: str, capacity_type: Enum, allocation_strategy: Enum, expected_message: str
):
    actual_failures = InstancesAllocationStrategyValidator().execute(
        compute_resource_name, capacity_type, allocation_strategy
    )
    assert_failure_messages(actual_failures, expected_message)


# Memory-based scheduling is allowed for Compute Resource that use multiple instance type under 'Instances'
# but a warning is triggered to inform customers of possible wasted resources.
@pytest.mark.parametrize(
    "compute_resource_name, instance_types_info, memory_scheduling_enabled, expected_message",
    [
        pytest.param(
            "TestComputeResource",
            {
                "t1.micro": InstanceTypeInfo(
                    {"VCpuInfo": {"DefaultVCpus": 4, "DefaultCores": 2}, "MemoryInfo": {"SizeInMiB": 2048}}
                ),
                "t2.micro": InstanceTypeInfo(
                    {"VCpuInfo": {"DefaultVCpus": 4, "DefaultCores": 2}, "MemoryInfo": {"SizeInMiB": 4096}}
                ),
                "t3.micro": InstanceTypeInfo(
                    {"VCpuInfo": {"DefaultVCpus": 4, "DefaultCores": 2}, "MemoryInfo": {"SizeInMiB": 8192}}
                ),
            },
            True,
            'Enabling Memory-based scheduling when a Compute Resource ("TestComputeResource") has more than one '
            "instance type specified may lead to unused resources since only the minimum available memory across "
            "all instance-types can be specified in the Slurm node definition.",
            id="Memory Diff exceeds both Absolute (4G) and Percentage (0.20) threshold, so a Warning is triggered",
        ),
        pytest.param(
            "TestComputeResource",
            {
                "t1.micro": InstanceTypeInfo(
                    {"VCpuInfo": {"DefaultVCpus": 4, "DefaultCores": 2}, "MemoryInfo": {"SizeInMiB": 20024}}
                ),
                "t2.micro": InstanceTypeInfo(
                    {"VCpuInfo": {"DefaultVCpus": 4, "DefaultCores": 2}, "MemoryInfo": {"SizeInMiB": 22048}}
                ),
                "t3.micro": InstanceTypeInfo(
                    {"VCpuInfo": {"DefaultVCpus": 4, "DefaultCores": 2}, "MemoryInfo": {"SizeInMiB": 24096}}
                ),
            },
            True,
            "",
            id="Memory Diff exceeds only Absolute (4G) but not Percentage (0.2), so NO Warning is triggered",
        ),
        pytest.param(
            "TestComputeResource",
            {
                "t1.micro": InstanceTypeInfo(
                    {"VCpuInfo": {"DefaultVCpus": 4, "DefaultCores": 2}, "MemoryInfo": {"SizeInMiB": 256}}
                ),
                "t2.micro": InstanceTypeInfo(
                    {"VCpuInfo": {"DefaultVCpus": 4, "DefaultCores": 2}, "MemoryInfo": {"SizeInMiB": 1024}}
                ),
                "t3.micro": InstanceTypeInfo(
                    {"VCpuInfo": {"DefaultVCpus": 4, "DefaultCores": 2}, "MemoryInfo": {"SizeInMiB": 2048}}
                ),
            },
            True,
            "",
            id="Memory Diff exceeds only Percentage (0.20) but not Absolute (4G), so NO Warning is triggered",
        ),
        pytest.param(
            "TestComputeResource",
            {
                "t2.micro": InstanceTypeInfo({"VCpuInfo": {"DefaultVCpus": 4, "DefaultCores": 2}}),
            },
            True,
            "",
            id="Only one instance type is specified so NO Warning is triggered",
        ),
        pytest.param(
            "TestComputeResource",
            {
                "t2.micro": InstanceTypeInfo({"VCpuInfo": {"DefaultVCpus": 4, "DefaultCores": 2}}),
            },
            False,
            "",
            id="Memory Based Scheduling is disabled so no Warning is triggered",
        ),
    ],
)
def test_instances_memory_scheduling_validator(
    compute_resource_name: str,
    instance_types_info: List[InstanceTypeInfo],
    memory_scheduling_enabled: bool,
    expected_message: str,
):
    actual_failures = InstancesMemorySchedulingWarningValidator().execute(
        compute_resource_name, instance_types_info, memory_scheduling_enabled
    )
    assert_failure_messages(actual_failures, expected_message)