Text Share Online

using System.Collections.Generic;
using System.Linq;

namespace ExcelRowComparison;

/// <summary>
/// Column indices for a row array. Index 0 = date of transaction, 1 = contractual payment amount,
/// 2 = funds received, 3 = amount incurred, 4 = description, 5 = contractual due date.
/// </summary>
public static class RowColumns
{
public const int DateOfTransaction = 0;
public const int ContractualPaymentAmount = 1;
public const int FundsReceived = 2;
public const int AmountIncurred = 3;
public const int Description = 4;
public const int ContractualDueDate = 5;
public const int CellCount = 6;
}

/// <summary>
/// Result of comparing row counts between the first and second file.
/// </summary>
public record RowCountResult(int Count1, int Count2, bool Match);

/// <summary>
/// Result of comparing a single cell between source and target row.
/// </summary>
public record CellComparison(int Index, object? SourceValue, object? TargetValue, bool AreEqual);

/// <summary>
/// Comparison of a common row: source row, target row, and per-cell comparison results.
/// </summary>
public record CommonRowComparison(object[] SourceRow, object[] TargetRow, IReadOnlyList<CellComparison> CellComparisons);

/// <summary>
/// Full result of comparing two lists of rows.
/// </summary>
public record ComparisonResult(
RowCountResult RowCountResult,
IReadOnlyList<object[]> OnlyInFirst,
IReadOnlyList<object[]> OnlyInSecond,
IReadOnlyList<CommonRowComparison> CommonRowComparisons);

/// <summary>
/// Row identity for matching: transaction date + description.
/// </summary>
internal readonly record struct RowKey(DateTime? TransactionDate, string Description)
{
public static RowKey FromRow(object[] row)
{
if (row == null || row.Length <= Math.Max(RowColumns.DateOfTransaction, RowColumns.Description))
return new RowKey(null, string.Empty);

var date = NormalizeDate(row[RowColumns.DateOfTransaction]);
var description = row[RowColumns.Description]?.ToString() ?? string.Empty;
return new RowKey(date, description);
}

private static DateTime? NormalizeDate(object? value)
{
if (value == null) return null;
if (value is DateTime dt) return dt.Date;
if (value is DateOnly d) return d.ToDateTime(TimeOnly.MinValue);
if (value is string s && DateTime.TryParse(s, out var parsed)) return parsed.Date;
return null;
}
}

/// <summary>
/// Compares two lists of row arrays (e.g. from parsed Excel files) by date+description,
/// and produces row-count check, only-in-first, only-in-second, and cell-by-cell comparison for common rows.
/// </summary>
public static class ExcelRowComparer
{
/// <summary>
/// Compares the two lists: checks row counts, sorts by date then contractual payment then funds received then contractual due date,
/// finds rows only in first, only in second, common rows by date+description, and cell-by-cell comparison for common rows.
/// Null rows are skipped. Duplicate keys are matched first-to-first after sort.
/// </summary>
public static ComparisonResult Compare(IReadOnlyList<object[]> list1, IReadOnlyList<object[]> list2)
{
var safe1 = (list1 ?? Array.Empty<object[]>()).Where(r => r != null).ToList();
var safe2 = (list2 ?? Array.Empty<object[]>()).Where(r => r != null).ToList();

var rowCountResult = new RowCountResult(safe1.Count, safe2.Count, safe1.Count == safe2.Count);

var sorted1 = safe1.OrderBy(r => r, RowComparer.Instance).ToList();
var sorted2 = safe2.OrderBy(r => r, RowComparer.Instance).ToList();

var byKey1 = sorted1.GroupBy(r => RowKey.FromRow(r)).ToDictionary(g => g.Key, g => g.ToList());
var byKey2 = sorted2.GroupBy(r => RowKey.FromRow(r)).ToDictionary(g => g.Key, g => g.ToList());

var onlyInFirst = new List<object[]>();
var onlyInSecond = new List<object[]>();
var commonComparisons = new List<CommonRowComparison>();

foreach (var (key, rows1) in byKey1)
{
if (!byKey2.TryGetValue(key, out var rows2))
{
onlyInFirst.AddRange(rows1);
continue;
}

var count = Math.Min(rows1.Count, rows2.Count);
for (var i = 0; i < count; i++)
commonComparisons.Add(CompareRows(rows1[i], rows2[i]));

for (var i = count; i < rows1.Count; i++)
onlyInFirst.Add(rows1[i]);
}

foreach (var (key, rows2) in byKey2)
{
if (byKey1.ContainsKey(key)) continue;
onlyInSecond.AddRange(rows2);
}

return new ComparisonResult(
rowCountResult,
onlyInFirst,
onlyInSecond,
commonComparisons);
}

private static CommonRowComparison CompareRows(object[] source, object[] target)
{
var cells = new List<CellComparison>(RowColumns.CellCount);
for (var i = 0; i < RowColumns.CellCount; i++)
{
var sourceVal = source != null && i < source.Length ? source[i] : null;
var targetVal = target != null && i < target.Length ? target[i] : null;
cells.Add(new CellComparison(i, sourceVal, targetVal, CellsEqual(sourceVal, targetVal)));
}
return new CommonRowComparison(source, target, cells);
}

private static bool CellsEqual(object? a, object? b)
{
if (a == null && b == null) return true;
if (a == null || b == null) return false;

if (a is DateTime dta && b is DateTime dtb) return dta.Date == dtb.Date;
if (a is DateOnly doa && b is DateOnly dob) return doa == dob;
if (a is DateTime dta2 && b is DateOnly dob2) return dta2.Date == dob2.ToDateTime(TimeOnly.MinValue);
if (a is DateOnly doa2 && b is DateTime dtb2) return doa2.ToDateTime(TimeOnly.MinValue) == dtb2.Date;

if (IsNumeric(a) && IsNumeric(b))
return NormalizeDecimal(a) == NormalizeDecimal(b);

return string.Equals(a.ToString(), b.ToString(), StringComparison.Ordinal);
}

private static bool IsNumeric(object? value)
{
return value is int or long or short or byte or decimal or double or float;
}

private static decimal? NormalizeDecimal(object? value)
{
if (value == null) return null;
return value switch
{
decimal d => d,
double d => (decimal)d,
float f => (decimal)f,
int i => i,
long l => l,
short s => s,
byte b => b,
string s when decimal.TryParse(s, out var d) => d,
_ => null
};
}

private sealed class RowComparer : IComparer<object[]>
{
public static readonly RowComparer Instance = new();

public int Compare(object[]? a, object[]? b)
{
if (a == null && b == null) return 0;
if (a == null) return -1;
if (b == null) return 1;

var c = CompareCell(a, b, RowColumns.DateOfTransaction);
if (c != 0) return c;
c = CompareCell(a, b, RowColumns.ContractualPaymentAmount);
if (c != 0) return c;
c = CompareCell(a, b, RowColumns.FundsReceived);
if (c != 0) return c;
c = CompareCell(a, b, RowColumns.ContractualDueDate);
return c;
}

private static int CompareCell(object[] a, object[] b, int index)
{
var va = GetCell(a, index);
var vb = GetCell(b, index);
if (va == null && vb == null) return 0;
if (va == null) return -1;
if (vb == null) return 1;

if (va is DateTime dta && vb is DateTime dtb)
return dta.Date.CompareTo(dtb.Date);
if (va is DateOnly doa && vb is DateOnly dob)
return doa.CompareTo(dob);
if (va is DateTime dta2 && vb is DateOnly dob2)
return dta2.Date.CompareTo(dob2.ToDateTime(TimeOnly.MinValue));
if (va is DateOnly doa2 && vb is DateTime dtb2)
return doa2.ToDateTime(TimeOnly.MinValue).CompareTo(dtb2.Date);

if (IsNumeric(va) && IsNumeric(vb))
{
var na = NormalizeDecimal(va);
var nb = NormalizeDecimal(vb);
if (na.HasValue && nb.HasValue) return na.Value.CompareTo(nb.Value);
}

return string.Compare(va.ToString(), vb.ToString(), StringComparison.Ordinal);
}

private static object? GetCell(object[] row, int index)
{
return row != null && index < row.Length ? row[index] : null;
}
}
}

Share This: