Мне недавно пришлось внедрить хэш "контрольную сумму" для частичных документов XML на работе (мы используем XElement). Простые тесты производительности показали ~ 3-кратное ускорение на моем компьютере при использовании таблицы поиска для создания хэша шестнадцатеричной строки по сравнению с без.
Вот моя реализация:
using System.Xml.Linq;
using System.Security.Cryptography;
using System.Text;
using System.Linq;
/// <summary>
/// Provides a way to easily compute SHA256 hash strings for XML objects.
/// </summary>
public static class XMLHashUtils
{
/// <summary>
/// Precompute a hexadecimal lookup table for runtime performance gain, at the cost of memory and startup performance loss.
/// SOURCE: https://stackoverflow.com/a/18574846
/// </summary>
static readonly string[] hexLookupTable = Enumerable.Range(0, 256).Select(integer => integer.ToString("x2")).ToArray();
static readonly SHA256Managed sha256 = new SHA256Managed();
/// <summary>
/// Computes a SHA256 hash string from an XElement and its children.
/// </summary>
public static string Hash(XElement xml)
{
string xmlString = xml.ToString(SaveOptions.DisableFormatting); // Outputs XML as single line
return Hash(xmlString);
}
/// <summary>
/// Computes a SHA256 hash string from a string.
/// </summary>
static string Hash(string stringValue)
{
byte[] hashBytes = sha256.ComputeHash(Encoding.UTF8.GetBytes(stringValue));
return BytesToHexString(hashBytes);
}
/// <summary>
/// Converts a byte array to a hexadecimal string using a lookup table.
/// </summary>
static string BytesToHexString(byte[] bytes)
{
int length = bytes.Length;
StringBuilder sb = new StringBuilder(length * 2); // Capacity fits hash string length
for (var i = 0; i < length; i++)
{
sb.Append(hexLookupTable[bytes[i]]); // Using lookup table for faster runtime conversion
}
return sb.ToString();
}
}
И вот пара юнит-тестов для него (с использованием инфраструктуры NUnit):
using NUnit.Framework;
using System.Linq;
using System.Xml.Linq;
public class XMLHashUtilsTest
{
/// <summary>
/// Outputs XML: <root><child attribute="value" /></root>
/// where <child /> node repeats according to childCount
/// </summary>
XElement CreateXML(int childCount)
{
return new XElement("root", Enumerable.Repeat(new XElement("child", new XAttribute("attribute", "value")), childCount));
}
[Test]
public void HashIsDeterministic([Values(0,1,10)] int childCount)
{
var xml = CreateXML(childCount);
Assert.AreEqual(XMLHashUtils.Hash(xml), XMLHashUtils.Hash(xml));
}
[Test]
public void HashChanges_WhenChildrenAreDifferent([Values(0,1,10)] int childCount)
{
var xml1 = CreateXML(childCount);
var xml2 = CreateXML(childCount + 1);
Assert.AreNotEqual(XMLHashUtils.Hash(xml1), XMLHashUtils.Hash(xml2));
}
[Test]
public void HashChanges_WhenRootNameIsDifferent([Values("A","B","C")]string nameSuffix)
{
var xml1 = CreateXML(1);
var xml2 = CreateXML(1);
xml2.Name = xml2.Name + nameSuffix;
Assert.AreNotEqual(XMLHashUtils.Hash(xml1), XMLHashUtils.Hash(xml2));
}
[Test]
public void HashChanges_WhenRootAttributesAreDifferent([Values("A","B","C")]string attributeName)
{
var xml1 = CreateXML(1);
var xml2 = CreateXML(1);
xml2.Add(new XAttribute(attributeName, "value"));
Assert.AreNotEqual(XMLHashUtils.Hash(xml1), XMLHashUtils.Hash(xml2));
}
}