Fix inconsistent tiebreak scores when --nth is used

Make sure to consistently calculate tiebreak scores based on the
original line.

This change may not be preferable if you filter aligned tabular input on
a subset of columns using --nth. However, if we calculate length
tiebreak only on the matched components instead of the entire line, the
result can be very confusing when multiple --nth components are
specified, so let's keep it simple and consistent.

Close #926
This commit is contained in:
Junegunn Choi
2017-06-02 13:25:35 +09:00
parent 5d6eb5bfd6
commit 2e3dc75425
8 changed files with 49 additions and 95 deletions

View File

@@ -299,20 +299,20 @@ func (p *Pattern) matchChunk(chunk *Chunk, space []*Result, slab *util.Slab) []*
// MatchItem returns true if the Item is a match
func (p *Pattern) MatchItem(item *Item, withPos bool, slab *util.Slab) (*Result, []Offset, *[]int) {
if p.extended {
if offsets, bonus, trimLen, pos := p.extendedMatch(item, withPos, slab); len(offsets) == len(p.termSets) {
return buildResult(item, offsets, bonus, trimLen), offsets, pos
if offsets, bonus, pos := p.extendedMatch(item, withPos, slab); len(offsets) == len(p.termSets) {
return buildResult(item, offsets, bonus), offsets, pos
}
return nil, nil, nil
}
offset, bonus, trimLen, pos := p.basicMatch(item, withPos, slab)
offset, bonus, pos := p.basicMatch(item, withPos, slab)
if sidx := offset[0]; sidx >= 0 {
offsets := []Offset{offset}
return buildResult(item, offsets, bonus, trimLen), offsets, pos
return buildResult(item, offsets, bonus), offsets, pos
}
return nil, nil, nil
}
func (p *Pattern) basicMatch(item *Item, withPos bool, slab *util.Slab) (Offset, int, int, *[]int) {
func (p *Pattern) basicMatch(item *Item, withPos bool, slab *util.Slab) (Offset, int, *[]int) {
input := p.prepareInput(item)
if p.fuzzy {
return p.iter(p.fuzzyAlgo, input, p.caseSensitive, p.normalize, p.forward, p.text, withPos, slab)
@@ -320,11 +320,10 @@ func (p *Pattern) basicMatch(item *Item, withPos bool, slab *util.Slab) (Offset,
return p.iter(algo.ExactMatchNaive, input, p.caseSensitive, p.normalize, p.forward, p.text, withPos, slab)
}
func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Offset, int, int, *[]int) {
func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Offset, int, *[]int) {
input := p.prepareInput(item)
offsets := []Offset{}
var totalScore int
var totalTrimLen int
var allPos *[]int
if withPos {
allPos = &[]int{}
@@ -332,16 +331,15 @@ func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Of
for _, termSet := range p.termSets {
var offset Offset
var currentScore int
var trimLen int
matched := false
for _, term := range termSet {
pfun := p.procFun[term.typ]
off, score, tLen, pos := p.iter(pfun, input, term.caseSensitive, p.normalize, p.forward, term.text, withPos, slab)
off, score, pos := p.iter(pfun, input, term.caseSensitive, p.normalize, p.forward, term.text, withPos, slab)
if sidx := off[0]; sidx >= 0 {
if term.inv {
continue
}
offset, currentScore, trimLen = off, score, tLen
offset, currentScore = off, score
matched = true
if withPos {
if pos != nil {
@@ -354,7 +352,7 @@ func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Of
}
break
} else if term.inv {
offset, currentScore, trimLen = Offset{0, 0}, 0, 0
offset, currentScore = Offset{0, 0}, 0
matched = true
continue
}
@@ -362,10 +360,9 @@ func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Of
if matched {
offsets = append(offsets, offset)
totalScore += currentScore
totalTrimLen += trimLen
}
}
return offsets, totalScore, totalTrimLen, allPos
return offsets, totalScore, allPos
}
func (p *Pattern) prepareInput(item *Item) []Token {
@@ -375,7 +372,7 @@ func (p *Pattern) prepareInput(item *Item) []Token {
var ret []Token
if len(p.nth) == 0 {
ret = []Token{Token{text: &item.text, prefixLength: 0, trimLength: int32(item.text.TrimLength())}}
ret = []Token{Token{text: &item.text, prefixLength: 0}}
} else {
tokens := Tokenize(item.text, p.delimiter)
ret = Transform(tokens, p.nth)
@@ -384,7 +381,7 @@ func (p *Pattern) prepareInput(item *Item) []Token {
return ret
}
func (p *Pattern) iter(pfun algo.Algo, tokens []Token, caseSensitive bool, normalize bool, forward bool, pattern []rune, withPos bool, slab *util.Slab) (Offset, int, int, *[]int) {
func (p *Pattern) iter(pfun algo.Algo, tokens []Token, caseSensitive bool, normalize bool, forward bool, pattern []rune, withPos bool, slab *util.Slab) (Offset, int, *[]int) {
for _, part := range tokens {
if res, pos := pfun(caseSensitive, normalize, forward, *part.text, pattern, withPos, slab); res.Start >= 0 {
sidx := int32(res.Start) + part.prefixLength
@@ -394,8 +391,8 @@ func (p *Pattern) iter(pfun algo.Algo, tokens []Token, caseSensitive bool, norma
(*pos)[idx] += int(part.prefixLength)
}
}
return Offset{sidx, eidx}, res.Score, int(part.trimLength), pos
return Offset{sidx, eidx}, res.Score, pos
}
}
return Offset{-1, -1}, 0, -1, nil
return Offset{-1, -1}, 0, nil
}