// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

// Package diff provides functionalities to compare two YAML documents.
package diff

import (
	"fmt"
	"io"
	"sort"

	"gopkg.in/yaml.v3"
)

// Tree represents a difference tree between two YAML documents.
type Tree struct {
	root diffNode
}

func (t Tree) Write(w io.Writer) error {
	tw := &treeWriter{t, w}
	return tw.write()
}

// diffNode is the interface to represents the difference between two *yaml.Node.
type diffNode interface {
	key() string
	newYAML() *yaml.Node
	oldYAML() *yaml.Node
	children() []diffNode
}

// keyNode is a concrete implementation of a diffNode.
type keyNode struct {
	keyValue   string
	childNodes []diffNode // A list of non-empty pointers to the children nodes.

	oldV *yaml.Node // Only populated for a leaf node (i.e. that has no child node).
	newV *yaml.Node // Only populated for a leaf node (i.e. that has no child node).
}

func (n *keyNode) key() string {
	return n.keyValue
}

func (n *keyNode) newYAML() *yaml.Node {
	return n.newV
}

func (n *keyNode) oldYAML() *yaml.Node {
	return n.oldV
}

func (n *keyNode) children() []diffNode {
	return n.childNodes
}

type unchangedNode struct {
	count int
}

func (n *unchangedNode) children() []diffNode {
	return nil
}

func (n *unchangedNode) key() string {
	return ""
}

func (n *unchangedNode) newYAML() *yaml.Node {
	return nil
}

func (n *unchangedNode) oldYAML() *yaml.Node {
	return nil
}

func (n *unchangedNode) unchangedCount() int {
	return n.count
}

type seqItemNode struct {
	keyNode
}

// From is the YAML document that another YAML document is compared against.
type From []byte

// ParseWithCFNOverriders constructs a diff tree that represent the differences of a YAML document against the From document with 
// overriders designed for CFN documents, including:
// 1. An ignorer that ignores diffs under "Metadata.Manifest".
// 2. An overrider that is able to compare intrinsic functions with full/short form correctly.
func (from From) ParseWithCFNOverriders(to []byte) (Tree, error) {
	return from.Parse(to,
		&ignorer{
			curr: &ignoreSegment{
				key: "Metadata",
				next: &ignoreSegment{
					key: "Manifest",
				},
			},
		},
		&getAttConverter{},
		&intrinsicFuncMapTagConverter{})
}

// Parse constructs a diff tree that represent the differences of a YAML document against the From document.
func (from From) Parse(to []byte, overriders ...overrider) (Tree, error) {
	var toNode, fromNode yaml.Node
	if err := yaml.Unmarshal(to, &toNode); err != nil {
		return Tree{}, fmt.Errorf("unmarshal current template: %w", err)
	}
	if err := yaml.Unmarshal(from, &fromNode); err != nil {
		return Tree{}, fmt.Errorf("unmarshal old template: %w", err)
	}
	var root diffNode
	var err error
	switch {
	// NOTE: If Kind is 0, it means the document is empty and nothing is unmarshalled.
	case fromNode.Kind == 0 && toNode.Kind == 0:
		return Tree{}, nil
	case fromNode.Kind == 0:
		root, err = parse(nil, &toNode, "", overriders...)
	case toNode.Kind == 0:
		root, err = parse(&fromNode, nil, "", overriders...)
	default:
		root, err = parse(&fromNode, &toNode, "", overriders...)
	}
	if err != nil {
		return Tree{}, err
	}
	if root == nil {
		return Tree{}, nil
	}
	return Tree{
		root: root,
	}, nil
}

func parse(from, to *yaml.Node, key string, overriders ...overrider) (diffNode, error) {
	for _, overrider := range overriders {
		if overrider.match(from, to, key, overrider) {
			return overrider.parse(from, to, key, overrider)
		}
	}
	// Handle base cases.
	if to == nil || from == nil || to.Kind != from.Kind {
		return &keyNode{
			keyValue: key,
			newV:     to,
			oldV:     from,
		}, nil
	}
	if isYAMLLeaf(to) && isYAMLLeaf(from) {
		if to.Value == from.Value {
			return nil, nil
		}
		return &keyNode{
			keyValue: key,
			newV:     to,
			oldV:     from,
		}, nil
	}

	var children []diffNode
	var err error
	switch {
	case to.Kind == yaml.SequenceNode && from.Kind == yaml.SequenceNode:
		children, err = parseSequence(from, to, overriders...)
	case to.Kind == yaml.DocumentNode && from.Kind == yaml.DocumentNode:
		fallthrough
	case to.Kind == yaml.MappingNode && from.Kind == yaml.MappingNode:
		children, err = parseMap(from, to, overriders...)
	default:
		return nil, fmt.Errorf("unknown combination of node kinds: %v, %v", to.Kind, from.Kind)
	}
	if err != nil {
		return nil, fmt.Errorf("parse YAML content with key %s: %w", key, err)
	}
	if len(children) == 0 {
		return nil, nil
	}
	return &keyNode{
		keyValue:   key,
		childNodes: children,
	}, nil
}

func isYAMLLeaf(node *yaml.Node) bool {
	return len(node.Content) == 0
}

func parseSequence(fromNode, toNode *yaml.Node, overriders ...overrider) ([]diffNode, error) {
	fromSeq, toSeq := make([]yaml.Node, len(fromNode.Content)), make([]yaml.Node, len(toNode.Content)) // NOTE: should be the same as calling `Decode`.
	for idx, v := range fromNode.Content {
		fromSeq[idx] = *v
	}
	for idx, v := range toNode.Content {
		toSeq[idx] = *v
	}
	type cachedEntry struct {
		node diffNode
		err  error
	}
	cachedDiff := make(map[string]cachedEntry)
	lcsIndices := longestCommonSubsequence(fromSeq, toSeq, func(idxFrom, idxTo int) bool {
		// Note: This function passed as `eq` should be a pure function. Therefore, its output is the same
		// given the same `idxFrom` and `idxTo`. Hence, it is not necessary to parse the nodes again.
		// In `lcs.go`, `eq` can be called twice on the same indices: once when computing LCS length, and
		// once when back-tracing to construct the LCS.
		if diff, ok := cachedDiff[cacheKey(idxFrom, idxTo)]; ok {
			return diff.err == nil && diff.node == nil
		}
		diff, err := parse(&(fromSeq[idxFrom]), &(toSeq[idxTo]), "", overriders...)
		if diff != nil { // NOTE: cache the diff only if a modification could have happened at this position.
			cachedDiff[cacheKey(idxFrom, idxTo)] = cachedEntry{
				node: diff,
				err:  err,
			}
		}
		return err == nil && diff == nil
	})
	// No difference if the two sequences have the same size and the LCS is the entire sequence.
	if len(fromSeq) == len(toSeq) && len(lcsIndices) == len(fromSeq) {
		return nil, nil
	}
	var children []diffNode
	var matchCount int
	inspector := newLCSStateMachine(fromSeq, toSeq, lcsIndices)
	for action := inspector.action(); action != actionDone; action = inspector.action() {
		switch action {
		case actionMatch:
			matchCount++
			if action := inspector.peek(); action != actionMatch {
				children = append(children, &unchangedNode{count: matchCount})
				matchCount = 0
			}
		case actionMod:
			diff := cachedDiff[cacheKey(inspector.fromIndex(), inspector.toIndex())]
			if diff.err != nil {
				return nil, diff.err
			}
			children = append(children, &seqItemNode{
				keyNode{
					keyValue:   diff.node.key(),
					childNodes: diff.node.children(),
					oldV:       diff.node.oldYAML(),
					newV:       diff.node.newYAML(),
				},
			})
		case actionDel:
			item := inspector.fromItem()
			children = append(children, &seqItemNode{
				keyNode{
					oldV: &item,
				},
			})
		case actionInsert:
			item := inspector.toItem()
			children = append(children, &seqItemNode{
				keyNode{
					newV: &item,
				},
			})
		}
		inspector.next()
	}
	return children, nil
}

func parseMap(from, to *yaml.Node, overriders ...overrider) ([]diffNode, error) {
	currMap, oldMap := make(map[string]yaml.Node), make(map[string]yaml.Node)
	if err := to.Decode(currMap); err != nil {
		return nil, err
	}
	if err := from.Decode(oldMap); err != nil {
		return nil, err
	}
	keys := unionOfKeys(currMap, oldMap)
	sort.SliceStable(keys, func(i, j int) bool { return keys[i] < keys[j] }) // NOTE: to avoid flaky unit tests.
	var children []diffNode
	for _, k := range keys {
		var currV, oldV *yaml.Node
		if v, ok := oldMap[k]; ok {
			oldV = &v
		}
		if v, ok := currMap[k]; ok {
			currV = &v
		}
		kDiff, err := parse(oldV, currV, k, overriders...)
		if err != nil {
			return nil, err
		}
		if kDiff != nil {
			children = append(children, kDiff)
		}
	}
	return children, nil
}

func unionOfKeys[T any](a, b map[string]T) []string {
	exists, keys := struct{}{}, make(map[string]struct{})
	for k := range a {
		keys[k] = exists
	}
	for k := range b {
		keys[k] = exists
	}
	keySlice, idx := make([]string, len(keys)), 0
	for k := range keys {
		keySlice[idx] = k
		idx++
	}
	return keySlice
}

func cacheKey(inFrom, inTo int) string {
	return fmt.Sprintf("%d,%d", inFrom, inTo)
}