NumericTextComparator.java

// Generated by delombok at Mon Nov 18 07:27:48 UTC 2024
package de.larssh.utils.text;

import java.util.Comparator;
import de.larssh.utils.annotations.PackagePrivate;
import edu.umd.cs.findbugs.annotations.Nullable;

/**
 * A {@link Comparator} that orders alpha numeric strings in <i>natural</i>
 * order, either case sensitive or case insensitive. It is appreciated to use
 * this kind of ordering for user output.
 *
 * <p>
 * Because numeric values are not deserialized into numeric data types, their
 * length is not limited. Fractions are not supported and will be handled as two
 * separate numeric values.
 *
 * <p>
 * Numeric values can have a leading plus or minus sign when following a
 * whitespace character or at a strings start.
 *
 * <p>
 * The following lists some example values to demonstrate the ordering.
 * <ul>
 * <li>Banana -12 Circus
 * <li>Banana -5 Circus
 * <li>Banana +5 Circus
 * <li>Banana 5 Circus
 * <li>Banana +5 Dolphin
 * <li>Banana 8 Circus
 * <li>Banana 12 Circus
 * <li>Banana-5 Circus
 * <li>Banana-12 Circus
 * <li>Banana--5 Circus
 * <li>Elephant 5 Circus
 * </ul>
 *
 * <p>
 * This comparator permits null values.
 *
 * <p>
 * Note that this Comparator does <em>not</em> take locale into account, and
 * will result in an unsatisfactory ordering for certain locales.
 */
@PackagePrivate
final class NumericTextComparator implements Comparator<String> {
	/**
	 * Singleton instance of {@link NumericTextComparator} to compare strings case
	 * insensitive.
	 */
	@PackagePrivate
	static final Comparator<String> COMPARATOR_CASE_INSENSITIVE = new NumericTextComparator(true);
	/**
	 * Singleton instance of {@link NumericTextComparator} to compare strings case
	 * sensitive.
	 */
	@PackagePrivate
	static final Comparator<String> COMPARATOR_CASE_SENSITIVE = new NumericTextComparator(false);
	/**
	 * Specifies case sensitivity for comparison
	 */
	private final boolean caseInsensitive;

	/**
	 * Compares its two arguments for numeric <i>and</i> text order. Returns a
	 * negative integer, zero, or a positive integer as the first argument is less
	 * than, equal to, or greater than the second.
	 *
	 * <p>
	 * This comparator permits null values.
	 *
	 * @param first  the first char sequence to be compared
	 * @param second the second char sequence to be compared
	 * @return a negative integer, zero, or a positive integer as the first argument
	 *         is less than, equal to, or greater than the second
	 */
	@Override
	public int compare(@Nullable final String first, @Nullable final String second) {
		if (first == null) {
			return second == null ? 0 : -1;
		}
		return second == null ? 1 : new NumericTextComparatorContext(first, second).compare();
	}


	/**
	 * Internal context per each comparison, holding the strings to compare and
	 * their current indexes
	 */
	private final class NumericTextComparatorContext {
		/**
		 * First string to be compared
		 */
		private final String first;
		/**
		 * State of the comparison: Current index of the first string
		 */
		private int firstIndex;
		/**
		 * Length of the first string (cached)
		 */
		private final int firstLength;
		/**
		 * Second string to be compared
		 */
		private final String second;
		/**
		 * State of the comparison: Current index of the second string
		 */
		private int secondIndex;
		/**
		 * Length of the second string (cached)
		 */
		private final int secondLength;

		/* package */ /**
		 * Constructor for the internal context per each comparison, holding the strings
		 * to compare and their current indexes
		 *
		 * @param first  the first char sequence to be compared
		 * @param second the second char sequence to be compared
		 */
		NumericTextComparatorContext(final String first, final String second) {
			this.first = first;
			firstLength = first.length();
			this.second = second;
			secondLength = second.length();
		}

		/**
		 * Compares both strings
		 *
		 * @return comparison result
		 */
		public int compare() {
			while (!isRest()) {
				if (isNumeric()) {
					final int compared = compareSignedNumeric();
					if (compared != 0) {
						return compared;
					}
				} else {
					final int compared = compareText();
					if (compared != 0) {
						return compared;
					}
				}
			}
			final int compared = compareRest();
			if (compared != 0) {
				return compared;
			}
			return isCaseInsensitive() ? first.compareToIgnoreCase(second) : first.compareTo(second);
		}

		/**
		 * Returns {@code true} if at least one of the strings end has been reached.
		 *
		 * @return {@code true} if a strings end has been reached
		 */
		private boolean isRest() {
			return firstIndex >= firstLength || secondIndex >= secondLength;
		}

		/**
		 * Returns {@code true} if at least of the strings continues with an optionally
		 * signed numeric value.
		 *
		 * <p>
		 * This method is used to initiate the numeric comparison. It loads one
		 * character of both strings <i>without</i> validation.
		 *
		 * @return {@code true} if any sequence continues with an optionally signed
		 *         numeric value
		 */
		private boolean isNumeric() {
			return Characters.isAsciiDigit(first.charAt(firstIndex)) || Characters.isAsciiDigit(second.charAt(secondIndex)) || isSignedNumeric(first, firstLength, firstIndex) || isSignedNumeric(second, secondLength, secondIndex);
		}

		/**
		 * Returns {@code true} if {@code value} has a signed numeric value at
		 * {@code index}.
		 *
		 * <p>
		 * Signed values need to be at index {@code 0} or require a whitespace character
		 * at {@code index - 1}.
		 *
		 * @param value  the string to check
		 * @param length the length of {@code value} (cached)
		 * @param index  the index to check
		 * @return {@code true} if {@code value} has a signed numeric value at
		 *         {@code index}
		 */
		private boolean isSignedNumeric(final String value, final int length, final int index) {
			final char sign = value.charAt(index);
			if (sign != '-' && sign != '+' || index > 0 && !Characters.isAsciiWhitespace(value.charAt(index - 1)) || index + 1 >= length) {
				return false;
			}
			return Characters.isAsciiDigit(value.charAt(index + 1));
		}

		/**
		 * Compares an optionally signed numeric value.
		 *
		 * <p>
		 * This method updates the internal contexts state. It loads one character of
		 * both strings <i>without</i> validation.
		 *
		 * @return comparison result
		 */
		private int compareSignedNumeric() {
			// Calculate length of the numeric chunks
			final int firstSignedNumericLength = getNumericLength(first, firstLength, firstIndex);
			final int secondSignedNumericLength = getNumericLength(second, secondLength, secondIndex);
			// Return if one is not numeric
			if (firstSignedNumericLength == 0 || secondSignedNumericLength == 0) {
				return firstSignedNumericLength == 0 ? 1 : -1;
			}
			// Handle negative sign
			final boolean firstIsNegative = first.charAt(firstIndex) == '-';
			final boolean secondIsNegative = second.charAt(secondIndex) == '-';
			if (firstIsNegative != secondIsNegative) {
				return firstIsNegative ? -1 : 1;
			}
			// Calculate numeric length without signs and leading zeros
			final int firstNumericLength = getDigitsLength(first, firstIndex, firstSignedNumericLength);
			final int secondNumericLength = getDigitsLength(second, secondIndex, secondSignedNumericLength);
			// Update indexes
			firstIndex += firstSignedNumericLength;
			secondIndex += secondSignedNumericLength;
			// Compare
			return (firstIsNegative ? -1 : 1) * compareNumeric(firstNumericLength, secondNumericLength);
		}

		/**
		 * Returns the length of an optionally signed numeric in {@code value} at
		 * {@code index} or zero.
		 *
		 * @param value  the string to check
		 * @param length the length of {@code value} (cached)
		 * @param index  the index to check
		 * @return length of an optionally signed numeric in {@code value} at
		 *         {@code index} or zero
		 */
		private int getNumericLength(final String value, final int length, final int index) {
			int numericIndex = index;
			if (isSignedNumeric(value, length, numericIndex)) {
				numericIndex += 2;
			}
			while (numericIndex < length && Characters.isAsciiDigit(value.charAt(numericIndex))) {
				numericIndex += 1;
			}
			return numericIndex - index;
		}

		/**
		 * Returns the length of the absolute value of an optionally signed numeric
		 * without leading zeros in {@code value} at {@code index} for signed length
		 * {@code signedNumericLength}.
		 *
		 * <p>
		 * In case the numeric value is zero a length of one is returned.
		 *
		 * <p>
		 * This method does not validate if characters in {@code value} from
		 * {@code index} to length {@code signedNumericLength} are valid numeric values!
		 *
		 * @param value               the string to check
		 * @param index               the index to check
		 * @param signedNumericLength the length of the optionally signed numeric to
		 *                            check (cached)
		 * @return length of the absolute value of an optionally signed numeric without
		 *         leading zeros
		 */
		private int getDigitsLength(final String value, final int index, final int signedNumericLength) {
			for (int numericIndex = 0; numericIndex < signedNumericLength; numericIndex += 1) {
				final char character = value.charAt(index + numericIndex);
				if (character != '0' && character != '-' && character != '+') {
					return signedNumericLength - numericIndex;
				}
			}
			return 1;
		}

		/**
		 * Compares numeric values at the current index and of the given lengths.
		 *
		 * <p>
		 * This method requires its arguments to match the rules of
		 * {@link #getNumericLength(String, int, int)}.
		 *
		 * @param firstNumericLength  length of the first numeric
		 * @param secondNumericLength length of the second numeric
		 * @return comparison result
		 */
		private int compareNumeric(final int firstNumericLength, final int secondNumericLength) {
			// Return on different lengths
			if (firstNumericLength != secondNumericLength) {
				return firstNumericLength - secondNumericLength;
			}
			// Compare each numeric character from highest to lowest
			for (int numericIndex = firstNumericLength; numericIndex > 0; numericIndex -= 1) {
				final char firstNumericCharacter = first.charAt(firstIndex - numericIndex);
				final char secondNumericCharacter = second.charAt(secondIndex - numericIndex);
				if (firstNumericCharacter != secondNumericCharacter) {
					return firstNumericCharacter - secondNumericCharacter;
				}
			}
			return 0;
		}

		/**
		 * Compares a single textual character.
		 *
		 * <p>
		 * This method updates the internal contexts state. It loads one character of
		 * both strings <i>without</i> validation.
		 *
		 * @return comparison result
		 */
		private int compareText() {
			final char firstCharacter = first.charAt(firstIndex);
			final char secondCharacter = second.charAt(secondIndex);
			firstIndex += 1;
			secondIndex += 1;
			return isCaseInsensitive() ? Characters.compareIgnoreCase(firstCharacter, secondCharacter) : firstCharacter - secondCharacter;
		}

		/**
		 * Compares the strings if at least one has reached its end.
		 *
		 * @return comparison result
		 */
		private int compareRest() {
			return firstLength - firstIndex - (secondLength - secondIndex);
		}
	}

	/**
	 * Specifies case sensitivity for comparison
	 *
	 * @return {@code true} if comparison should take place case insensitive
	 */
	@java.lang.SuppressWarnings("all")
	@edu.umd.cs.findbugs.annotations.SuppressFBWarnings(justification = "generated code")
	@lombok.Generated
	public boolean isCaseInsensitive() {
		return this.caseInsensitive;
	}

	/**
	 * Creates a new {@code NumericTextComparator} instance.
	 *
	 * @param caseInsensitive Specifies case sensitivity for comparison
	 */
	@java.lang.SuppressWarnings("all")
	@edu.umd.cs.findbugs.annotations.SuppressFBWarnings(justification = "generated code")
	@lombok.Generated
	private NumericTextComparator(final boolean caseInsensitive) {
		this.caseInsensitive = caseInsensitive;
	}
}