Fix inconsistent tiebreak scores when --nth is used

Make sure to consistently calculate tiebreak scores based on the original line. This change may not be preferable if you filter aligned tabular input on a subset of columns using --nth. However, if we calculate length tiebreak only on the matched components instead of the entire line, the result can be very confusing when multiple --nth components are specified, so let's keep it simple and consistent. Close #926
2025-08-01 12:42:01 -07:00 · 2017-06-02 13:25:35 +09:00
parent 5d6eb5bfd6
commit 2e3dc75425
8 changed files with 49 additions and 95 deletions
--- a/src/tokenizer.go
+++ b/src/tokenizer.go
@@ -20,7 +20,6 @@ type Range struct {
 type Token struct {
 	text         *util.Chars
 	prefixLength int32
-	trimLength   int32
 }

 // Delimiter for tokenizing the input
@@ -81,7 +80,7 @@ func withPrefixLengths(tokens []util.Chars, begin int) []Token {
 	prefixLength := begin
 	for idx, token := range tokens {
 		// NOTE: &tokens[idx] instead of &tokens
-		ret[idx] = Token{&tokens[idx], int32(prefixLength), int32(token.TrimLength())}
+		ret[idx] = Token{&tokens[idx], int32(prefixLength)}
 		prefixLength += token.Length()
 	}
 	return ret
@@ -242,7 +241,7 @@ func Transform(tokens []Token, withNth []Range) []Token {
 		} else {
 			prefixLength = 0
 		}
-		transTokens[idx] = Token{&merged, prefixLength, int32(merged.TrimLength())}
+		transTokens[idx] = Token{&merged, prefixLength}
 	}
 	return transTokens
 }