Fix inconsistent tiebreak scores when --nth is used

Make sure to consistently calculate tiebreak scores based on the original line. This change may not be preferable if you filter aligned tabular input on a subset of columns using --nth. However, if we calculate length tiebreak only on the matched components instead of the entire line, the result can be very confusing when multiple --nth components are specified, so let's keep it simple and consistent. Close #926
2025-08-06 15:12:05 -07:00 · 2017-06-02 13:25:35 +09:00
parent 5d6eb5bfd6
commit 2e3dc75425
8 changed files with 49 additions and 95 deletions
--- a/src/pattern.go
+++ b/src/pattern.go
@@ -299,20 +299,20 @@ func (p *Pattern) matchChunk(chunk *Chunk, space []*Result, slab *util.Slab) []*
 // MatchItem returns true if the Item is a match
 func (p *Pattern) MatchItem(item *Item, withPos bool, slab *util.Slab) (*Result, []Offset, *[]int) {
 	if p.extended {
-		if offsets, bonus, trimLen, pos := p.extendedMatch(item, withPos, slab); len(offsets) == len(p.termSets) {
-			return buildResult(item, offsets, bonus, trimLen), offsets, pos
+		if offsets, bonus, pos := p.extendedMatch(item, withPos, slab); len(offsets) == len(p.termSets) {
+			return buildResult(item, offsets, bonus), offsets, pos
 		}
 		return nil, nil, nil
 	}
-	offset, bonus, trimLen, pos := p.basicMatch(item, withPos, slab)
+	offset, bonus, pos := p.basicMatch(item, withPos, slab)
 	if sidx := offset[0]; sidx >= 0 {
 		offsets := []Offset{offset}
-		return buildResult(item, offsets, bonus, trimLen), offsets, pos
+		return buildResult(item, offsets, bonus), offsets, pos
 	}
 	return nil, nil, nil
 }

-func (p *Pattern) basicMatch(item *Item, withPos bool, slab *util.Slab) (Offset, int, int, *[]int) {
+func (p *Pattern) basicMatch(item *Item, withPos bool, slab *util.Slab) (Offset, int, *[]int) {
 	input := p.prepareInput(item)
 	if p.fuzzy {
 		return p.iter(p.fuzzyAlgo, input, p.caseSensitive, p.normalize, p.forward, p.text, withPos, slab)
@@ -320,11 +320,10 @@ func (p *Pattern) basicMatch(item *Item, withPos bool, slab *util.Slab) (Offset,
 	return p.iter(algo.ExactMatchNaive, input, p.caseSensitive, p.normalize, p.forward, p.text, withPos, slab)
 }

-func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Offset, int, int, *[]int) {
+func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Offset, int, *[]int) {
 	input := p.prepareInput(item)
 	offsets := []Offset{}
 	var totalScore int
-	var totalTrimLen int
 	var allPos *[]int
 	if withPos {
 		allPos = &[]int{}
@@ -332,16 +331,15 @@ func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Of
 	for _, termSet := range p.termSets {
 		var offset Offset
 		var currentScore int
-		var trimLen int
 		matched := false
 		for _, term := range termSet {
 			pfun := p.procFun[term.typ]
-			off, score, tLen, pos := p.iter(pfun, input, term.caseSensitive, p.normalize, p.forward, term.text, withPos, slab)
+			off, score, pos := p.iter(pfun, input, term.caseSensitive, p.normalize, p.forward, term.text, withPos, slab)
 			if sidx := off[0]; sidx >= 0 {
 				if term.inv {
 					continue
 				}
-				offset, currentScore, trimLen = off, score, tLen
+				offset, currentScore = off, score
 				matched = true
 				if withPos {
 					if pos != nil {
@@ -354,7 +352,7 @@ func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Of
 				}
 				break
 			} else if term.inv {
-				offset, currentScore, trimLen = Offset{0, 0}, 0, 0
+				offset, currentScore = Offset{0, 0}, 0
 				matched = true
 				continue
 			}
@@ -362,10 +360,9 @@ func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Of
 		if matched {
 			offsets = append(offsets, offset)
 			totalScore += currentScore
-			totalTrimLen += trimLen
 		}
 	}
-	return offsets, totalScore, totalTrimLen, allPos
+	return offsets, totalScore, allPos
 }

 func (p *Pattern) prepareInput(item *Item) []Token {
@@ -375,7 +372,7 @@ func (p *Pattern) prepareInput(item *Item) []Token {

 	var ret []Token
 	if len(p.nth) == 0 {
-		ret = []Token{Token{text: &item.text, prefixLength: 0, trimLength: int32(item.text.TrimLength())}}
+		ret = []Token{Token{text: &item.text, prefixLength: 0}}
 	} else {
 		tokens := Tokenize(item.text, p.delimiter)
 		ret = Transform(tokens, p.nth)
@@ -384,7 +381,7 @@ func (p *Pattern) prepareInput(item *Item) []Token {
 	return ret
 }

-func (p *Pattern) iter(pfun algo.Algo, tokens []Token, caseSensitive bool, normalize bool, forward bool, pattern []rune, withPos bool, slab *util.Slab) (Offset, int, int, *[]int) {
+func (p *Pattern) iter(pfun algo.Algo, tokens []Token, caseSensitive bool, normalize bool, forward bool, pattern []rune, withPos bool, slab *util.Slab) (Offset, int, *[]int) {
 	for _, part := range tokens {
 		if res, pos := pfun(caseSensitive, normalize, forward, *part.text, pattern, withPos, slab); res.Start >= 0 {
 			sidx := int32(res.Start) + part.prefixLength
@@ -394,8 +391,8 @@ func (p *Pattern) iter(pfun algo.Algo, tokens []Token, caseSensitive bool, norma
 					(*pos)[idx] += int(part.prefixLength)
 				}
 			}
-			return Offset{sidx, eidx}, res.Score, int(part.trimLength), pos
+			return Offset{sidx, eidx}, res.Score, pos
 		}
 	}
-	return Offset{-1, -1}, 0, -1, nil
+	return Offset{-1, -1}, 0, nil
 }