using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using CTA.Rules.Config;
using HtmlAgilityPack;
namespace CTA.WebForms.Helpers
{
public class Utilities
{
public const int SpacesPerLevel = 4;
public static Regex InvalidNamespaceIdentifierCharactersRegex => new Regex(@"[^\w.]");
public static Regex DoublePeriodRegex => new Regex(@"[.]{2,}");
public static Regex ValidNamespaceIdentifierStart => new Regex(@"^[a-zA-Z_]");
public static Regex UnderscoreReplaceableCharacters => new Regex(@"[- ]");
public static string SeparateStringsWithNewLine(params string[] strings)
{
return strings == null
? string.Empty
: string.Join(Environment.NewLine, strings);
}
public static string NormalizeNamespaceIdentifier(string namespaceIdentifier)
{
// NOTE: When creating a project with spaces or hyphens in it, each space/hyphen
// turns into an underscore, they don't get compressed into one
namespaceIdentifier = UnderscoreReplaceableCharacters.Replace(namespaceIdentifier, "_");
namespaceIdentifier = InvalidNamespaceIdentifierCharactersRegex.Replace(namespaceIdentifier, string.Empty);
namespaceIdentifier = DoublePeriodRegex.Replace(namespaceIdentifier, ".");
var isValidStart = ValidNamespaceIdentifierStart.IsMatch(namespaceIdentifier);
if (!isValidStart)
{
namespaceIdentifier = "_" + namespaceIdentifier;
}
return namespaceIdentifier;
}
///
/// Normalizes spacing of content of . It is recommended
/// to use this on the document node as opposed to other node types.
///
/// The node whose content should be normalized.
public static void NormalizeHtmlContent(HtmlNode node)
{
var normalizationQueue = new Queue<(HtmlNode node, int ancestors)>();
var textInsertionList = new List<(HtmlNode node, string text, bool insertAfter)>();
var baseAncestorCount = node.Ancestors()?.Count() ?? 0;
normalizationQueue.Enqueue((node, baseAncestorCount));
while (normalizationQueue.Any())
{
var queueTuple = normalizationQueue.Dequeue();
var requiredPreviousSpaces = SpacesPerLevel * queueTuple.ancestors;
var requiredFinalSpaces = SpacesPerLevel * Math.Max(0, queueTuple.ancestors - 1);
var requiredPreviousText = Environment.NewLine + new string(' ', requiredPreviousSpaces);
var requiredFinalText = Environment.NewLine + new string(' ', requiredFinalSpaces);
RemoveSpaceTextChildren(queueTuple.node);
HandleNonTextChildren(queueTuple.node, queueTuple.ancestors, requiredPreviousText, normalizationQueue, textInsertionList);
HandleTextChildren(queueTuple.node, requiredPreviousText);
HandleLastChild(queueTuple.node, requiredFinalText, textInsertionList);
}
PerformTextInsertions(textInsertionList);
}
///
/// Removes any text children of the given node that contain only whitespace
/// characters. Multiple text children in a row can cause spacing issues if
/// some contain only space characters. For use only by .
///
/// The node currently being processed.
private static void RemoveSpaceTextChildren(HtmlNode node)
{
var spaceTextChildren = node.ChildNodes?
.Where(child => child.NodeType == HtmlNodeType.Text && string.IsNullOrWhiteSpace(child.InnerHtml))
.ToList()
?? Enumerable.Empty();
foreach (var child in spaceTextChildren)
{
node.RemoveChild(child);
}
}
///
/// Ensures that any child comment or element nodes are preceded by a newline and the
/// appropriate number of spaces. For use only by .
///
/// The node currently being processed.
/// The number of ancestors of the node being processed.
/// The spacing text to use before children of this type.
/// The queue of nodes that still need normalization.
/// The list of nodes needing preceding or following space text and their
/// recommended space texts.
private static void HandleNonTextChildren(
HtmlNode node,
int ancestors,
string requiredPreviousText,
Queue<(HtmlNode node, int ancestors)> normalizationQueue,
List<(HtmlNode node, string text, bool insertAfter)> textInsertionList)
{
var nonTextChildren = node.ChildNodes?
.Where(child => child.NodeType == HtmlNodeType.Element || child.NodeType == HtmlNodeType.Comment)
?? Enumerable.Empty();
foreach (var child in nonTextChildren)
{
var previousSibling = child.PreviousSibling;
if (previousSibling == null || previousSibling.NodeType != HtmlNodeType.Text)
{
textInsertionList.Add((child, requiredPreviousText, false));
}
else
{
previousSibling.InnerHtml = previousSibling.InnerHtml.TrimEnd() + requiredPreviousText;
}
if (child.NodeType == HtmlNodeType.Element)
{
normalizationQueue.Enqueue((child, ancestors + 1));
}
}
}
///
/// Ensures that any child text nodes are preceded by a newline and
/// the appropriate number of spaces. For use only by .
///
/// The node currently being processed.
/// The spacing text to use before children of this type.
private static void HandleTextChildren(
HtmlNode node,
string requiredPreviousText)
{
var textChildren = node.ChildNodes?
.Where(child => child.NodeType == HtmlNodeType.Text)
?? Enumerable.Empty();
foreach (var child in textChildren)
{
child.InnerHtml = requiredPreviousText + child.InnerHtml.TrimStart();
}
}
///
/// Ensure that there is a newline and the appropriate number of
/// spaces preceding the closing tag, but only if the current
/// element has children. For use only by .
///
/// The node currently being processed.
/// The spacing text to use after the final child node.
/// The list of nodes needing preceding or following space text and their
/// recommended space texts.
private static void HandleLastChild(
HtmlNode node,
string requiredFinalText,
List<(HtmlNode node, string text, bool insertAfter)> textInsertionList)
{
var lastChild = node.LastChild;
if (lastChild != null)
{
if (lastChild.NodeType != HtmlNodeType.Text)
{
textInsertionList.Add((lastChild, requiredFinalText, true));
}
else
{
lastChild.InnerHtml = lastChild.InnerHtml.TrimEnd() + requiredFinalText;
}
}
}
///
/// Performs insertion of any new text nodes needed to normalize
/// the html structure. For use only by .
///
/// The list of nodes needing preceding or following space text and their
/// recommended space texts.
private static void PerformTextInsertions(List<(HtmlNode node, string text, bool insertAfter)> textInsertionList)
{
foreach (var textInsertion in textInsertionList)
{
var textNode = HtmlNode.CreateNode(textInsertion.text);
var nonTextNode = textInsertion.node;
var parent = nonTextNode.ParentNode;
if (parent == null)
{
LogHelper.LogError($"{Rules.Config.Constants.WebFormsErrorTag}Could not find parent of node " +
$"{nonTextNode.Name} when trying to normalize html");
continue;
}
if (textInsertion.insertAfter)
{
parent.InsertAfter(textNode, nonTextNode);
}
else
{
parent.InsertBefore(textNode, nonTextNode);
}
}
}
}
}