benchsuite: remove sift, pt and ucg

None of these tools got particularly popular (except for pt briefly),
but they do not appear to be active projects nowadays. While ucg was
fast, sift and pt were ecscruiating slow in a number of cases that
required special care in the benchmarks.

This also fixes the ordering of benchmark output to reflect the ordering
in the source of the benchsuite script.
This commit is contained in:
Andrew Gallant 2020-10-14 15:01:15 -04:00
parent b0066274cb
commit 5ebb3ad039
No known key found for this signature in database
GPG Key ID: B2E3A4923F8B0D44

View File

@ -71,15 +71,8 @@ def bench_linux_literal_default(suite_dir):
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', pat]), mkcmd('rg', ['rg', pat]),
mkcmd('ag', ['ag', pat]), mkcmd('ag', ['ag', pat]),
# ucg reports the exact same matches as ag and rg even though it
# doesn't read gitignore files. Instead, it has a file whitelist
# that happens to match up exactly with the gitignores for this search.
mkcmd('ucg', ['ucg', pat]),
# I guess setting LC_ALL=en_US.UTF-8 probably isn't necessarily the # I guess setting LC_ALL=en_US.UTF-8 probably isn't necessarily the
# default, but I'd guess it to be on most desktop systems. # default, but I'd guess it to be on most desktop systems.
mkcmd('pt', ['pt', pat]),
# sift reports an extra line here for a binary file matched.
mkcmd('sift', ['sift', pat]),
mkcmd('git grep', ['git', 'grep', pat], env={'LC_ALL': 'en_US.UTF-8'}), mkcmd('git grep', ['git', 'grep', pat], env={'LC_ALL': 'en_US.UTF-8'}),
]) ])
@ -102,16 +95,12 @@ def bench_linux_literal(suite_dir):
return Command(*args, **kwargs) return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
mkcmd('rg (ignore)', ['rg', '-n', pat]), mkcmd('rg', ['rg', '-n', pat]),
mkcmd('rg (ignore) (mmap)', ['rg', '-n', '--mmap', pat]), mkcmd('rg (mmap)', ['rg', '-n', '--mmap', pat]),
mkcmd('ag (ignore) (mmap)', ['ag', '-s', pat]), mkcmd('ag (mmap)', ['ag', '-s', pat]),
mkcmd('pt (ignore)', ['pt', pat]), mkcmd('git grep', [
mkcmd('sift (ignore)', SIFT + ['-n', '--git', pat]),
mkcmd('git grep (ignore)', [
'git', 'grep', '-I', '-n', pat, 'git', 'grep', '-I', '-n', pat,
], env={'LC_ALL': 'C'}), ], env={'LC_ALL': 'C'}),
mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', pat]),
]) ])
@ -131,31 +120,22 @@ def bench_linux_literal_casei(suite_dir):
return Command(*args, **kwargs) return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
mkcmd('rg (ignore)', ['rg', '-n', '-i', pat]), mkcmd('rg', ['rg', '-n', '-i', pat]),
mkcmd('rg (ignore) (mmap)', ['rg', '-n', '-i', '--mmap', pat]), mkcmd('rg (mmap)', ['rg', '-n', '-i', '--mmap', pat]),
mkcmd('ag (ignore) (mmap)', ['ag', '-i', pat]), mkcmd('ag (mmap)', ['ag', '-i', pat]),
mkcmd('pt (ignore)', ['pt', '-i', pat]),
mkcmd('sift (ignore)', SIFT + ['-n', '-i', '--git', pat]),
# It'd technically be more appropriate to set LC_ALL=en_US.UTF-8 here, # It'd technically be more appropriate to set LC_ALL=en_US.UTF-8 here,
# since that is certainly what ripgrep is doing, but this is for an # since that is certainly what ripgrep is doing, but this is for an
# ASCII literal, so we should give `git grep` all the opportunity to # ASCII literal, so we should give `git grep` all the opportunity to
# do its best. # do its best.
mkcmd('git grep (ignore)', [ mkcmd('git grep', [
'git', 'grep', '-I', '-n', '-i', pat, 'git', 'grep', '-I', '-n', '-i', pat,
], env={'LC_ALL': 'C'}), ], env={'LC_ALL': 'C'}),
mkcmd('rg (whitelist)', [
'rg', '-n', '-i', '--no-ignore', '-tall', pat,
]),
mkcmd('ucg (whitelist)', ['ucg', '-i', pat]),
]) ])
def bench_linux_re_literal_suffix(suite_dir): def bench_linux_re_literal_suffix(suite_dir):
''' '''
Benchmark the speed of a literal inside a regex. Benchmark the speed of a literal inside a regex.
This, for example, inhibits a prefix byte optimization used
inside of Go's regex engine (relevant for sift and pt).
''' '''
require(suite_dir, 'linux') require(suite_dir, 'linux')
cwd = path.join(suite_dir, LINUX_DIR) cwd = path.join(suite_dir, LINUX_DIR)
@ -166,26 +146,19 @@ def bench_linux_re_literal_suffix(suite_dir):
return Command(*args, **kwargs) return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
mkcmd('rg (ignore)', ['rg', '-n', pat]), mkcmd('rg', ['rg', '-n', pat]),
mkcmd('ag (ignore)', ['ag', '-s', pat]), mkcmd('ag', ['ag', '-s', pat]),
mkcmd('pt (ignore)', ['pt', '-e', pat]),
mkcmd('sift (ignore)', SIFT + ['-n', '--git', pat]),
mkcmd( mkcmd(
'git grep (ignore)', 'git grep',
['git', 'grep', '-E', '-I', '-n', pat], ['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'}, env={'LC_ALL': 'C'},
), ),
mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', pat]),
]) ])
def bench_linux_word(suite_dir): def bench_linux_word(suite_dir):
''' '''
Benchmark use of the -w ("match word") flag in each tool. Benchmark use of the -w ("match word") flag in each tool.
sift has a lot of trouble with this because it forces it into Go's
regex engine by surrounding the pattern with \b assertions.
''' '''
require(suite_dir, 'linux') require(suite_dir, 'linux')
cwd = path.join(suite_dir, LINUX_DIR) cwd = path.join(suite_dir, LINUX_DIR)
@ -196,28 +169,19 @@ def bench_linux_word(suite_dir):
return Command(*args, **kwargs) return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
mkcmd('rg (ignore)', ['rg', '-n', '-w', pat]), mkcmd('rg', ['rg', '-n', '-w', pat]),
mkcmd('ag (ignore)', ['ag', '-s', '-w', pat]), mkcmd('ag', ['ag', '-s', '-w', pat]),
mkcmd('pt (ignore)', ['pt', '-w', pat]),
mkcmd('sift (ignore)', SIFT + ['-n', '-w', '--git', pat]),
mkcmd( mkcmd(
'git grep (ignore)', 'git grep',
['git', 'grep', '-E', '-I', '-n', '-w', pat], ['git', 'grep', '-E', '-I', '-n', '-w', pat],
env={'LC_ALL': 'C'}, env={'LC_ALL': 'C'},
), ),
mkcmd('rg (whitelist)', [
'rg', '-n', '-w', '--no-ignore', '-tall', pat,
]),
mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', '-w', pat]),
]) ])
def bench_linux_unicode_greek(suite_dir): def bench_linux_unicode_greek(suite_dir):
''' '''
Benchmark matching of a Unicode category. Benchmark matching of a Unicode category.
Only three tools (ripgrep, sift and pt) support this. We omit
pt because it is too slow.
''' '''
require(suite_dir, 'linux') require(suite_dir, 'linux')
cwd = path.join(suite_dir, LINUX_DIR) cwd = path.join(suite_dir, LINUX_DIR)
@ -229,8 +193,6 @@ def bench_linux_unicode_greek(suite_dir):
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', pat]), mkcmd('rg', ['rg', '-n', pat]),
mkcmd('pt', ['pt', '-e', pat]),
mkcmd('sift', SIFT + ['-n', '--git', pat]),
]) ])
@ -250,8 +212,6 @@ def bench_linux_unicode_greek_casei(suite_dir):
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', '-i', pat]), mkcmd('rg', ['rg', '-n', '-i', pat]),
mkcmd('pt', ['pt', '-i', '-e', pat]),
mkcmd('sift', SIFT + ['-n', '-i', '--git', pat]),
]) ])
@ -272,26 +232,19 @@ def bench_linux_unicode_word(suite_dir):
return Command(*args, **kwargs) return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
mkcmd('rg (ignore)', ['rg', '-n', pat]), mkcmd('rg', ['rg', '-n', pat]),
mkcmd('rg (ignore) (ASCII)', ['rg', '-n', '(?-u)' + pat]), mkcmd('rg (ASCII)', ['rg', '-n', '(?-u)' + pat]),
mkcmd('ag (ignore) (ASCII)', ['ag', '-s', pat]), mkcmd('ag (ASCII)', ['ag', '-s', pat]),
mkcmd('pt (ignore) (ASCII)', ['pt', '-e', pat]),
mkcmd('sift (ignore) (ASCII)', SIFT + ['-n', '--git', pat]),
mkcmd( mkcmd(
'git grep (ignore)', 'git grep',
['git', 'grep', '-E', '-I', '-n', pat], ['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'en_US.UTF-8'}, env={'LC_ALL': 'en_US.UTF-8'},
), ),
mkcmd( mkcmd(
'git grep (ignore) (ASCII)', 'git grep (ASCII)',
['git', 'grep', '-E', '-I', '-n', pat], ['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'}, env={'LC_ALL': 'C'},
), ),
mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
mkcmd('rg (whitelist) (ASCII)', [
'rg', '-n', '--no-ignore', '-tall', '(?-u)' + pat,
]),
mkcmd('ucg (ASCII)', ['ucg', '--nosmart-case', pat]),
]) ])
@ -313,26 +266,19 @@ def bench_linux_no_literal(suite_dir):
return Command(*args, **kwargs) return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
mkcmd('rg (ignore)', ['rg', '-n', pat]), mkcmd('rg', ['rg', '-n', pat]),
mkcmd('rg (ignore) (ASCII)', ['rg', '-n', '(?-u)' + pat]), mkcmd('rg (ASCII)', ['rg', '-n', '(?-u)' + pat]),
mkcmd('ag (ignore) (ASCII)', ['ag', '-s', pat]), mkcmd('ag (ASCII)', ['ag', '-s', pat]),
mkcmd('pt (ignore) (ASCII)', ['pt', '-e', pat]),
mkcmd('sift (ignore) (ASCII)', SIFT + ['-n', '--git', pat]),
mkcmd( mkcmd(
'git grep (ignore)', 'git grep',
['git', 'grep', '-E', '-I', '-n', pat], ['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'en_US.UTF-8'}, env={'LC_ALL': 'en_US.UTF-8'},
), ),
mkcmd( mkcmd(
'git grep (ignore) (ASCII)', 'git grep (ASCII)',
['git', 'grep', '-E', '-I', '-n', pat], ['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'}, env={'LC_ALL': 'C'},
), ),
mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
mkcmd('rg (whitelist) (ASCII)', [
'rg', '-n', '--no-ignore', '-tall', '(?-u)' + pat,
]),
mkcmd('ucg (whitelist) (ASCII)', ['ucg', '--nosmart-case', pat]),
]) ])
@ -354,15 +300,13 @@ def bench_linux_alternates(suite_dir):
return Command(*args, **kwargs) return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
mkcmd('rg (ignore)', ['rg', '-n', pat]), mkcmd('rg', ['rg', '-n', pat]),
mkcmd('ag (ignore)', ['ag', '-s', pat]), mkcmd('ag', ['ag', '-s', pat]),
mkcmd( mkcmd(
'git grep (ignore)', 'git grep',
['git', 'grep', '-E', '-I', '-n', pat], ['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'}, env={'LC_ALL': 'C'},
), ),
mkcmd('rg (whitelist)', ['rg', '--no-ignore', '-n', pat]),
mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', pat]),
]) ])
@ -377,15 +321,13 @@ def bench_linux_alternates_casei(suite_dir):
return Command(*args, **kwargs) return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
mkcmd('rg (ignore)', ['rg', '-n', '-i', pat]), mkcmd('rg', ['rg', '-n', '-i', pat]),
mkcmd('ag (ignore)', ['ag', '-i', pat]), mkcmd('ag', ['ag', '-i', pat]),
mkcmd( mkcmd(
'git grep (ignore)', 'git grep',
['git', 'grep', '-E', '-I', '-n', '-i', pat], ['git', 'grep', '-E', '-I', '-n', '-i', pat],
env={'LC_ALL': 'C'}, env={'LC_ALL': 'C'},
), ),
mkcmd('rg (whitelist)', ['rg', '--no-ignore', '-n', '-i', pat]),
mkcmd('ucg (whitelist)', ['ucg', '-i', pat]),
]) ])
@ -400,15 +342,10 @@ def bench_subtitles_en_literal(suite_dir):
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', pat, en]), Command('rg', ['rg', pat, en]),
Command('rg (no mmap)', ['rg', '--no-mmap', pat, en]), Command('rg (no mmap)', ['rg', '--no-mmap', pat, en]),
Command('pt', ['pt', '-N', pat, en]), Command('grep', ['grep', pat, en], env=GREP_ASCII),
Command('sift', ['sift', pat, en]),
Command('grep', ['grep', '-a', pat, en], env=GREP_ASCII),
Command('rg (lines)', ['rg', '-n', pat, en]), Command('rg (lines)', ['rg', '-n', pat, en]),
Command('ag (lines)', ['ag', '-s', pat, en]), Command('ag (lines)', ['ag', '-s', pat, en]),
Command('ucg (lines)', ['ucg', '--nosmart-case', pat, en]), Command('grep (lines)', ['grep', '-n', pat, en], env=GREP_ASCII),
Command('pt (lines)', ['pt', pat, en]),
Command('sift (lines)', ['sift', '-n', pat, en]),
Command('grep (lines)', ['grep', '-an', pat, en], env=GREP_ASCII),
]) ])
@ -428,7 +365,6 @@ def bench_subtitles_en_literal_casei(suite_dir):
], env=GREP_ASCII), ], env=GREP_ASCII),
Command('rg (lines)', ['rg', '-n', '-i', pat, en]), Command('rg (lines)', ['rg', '-n', '-i', pat, en]),
Command('ag (lines) (ASCII)', ['ag', '-i', pat, en]), Command('ag (lines) (ASCII)', ['ag', '-i', pat, en]),
Command('ucg (lines) (ASCII)', ['ucg', '-i', pat, en]),
]) ])
@ -445,7 +381,6 @@ def bench_subtitles_en_literal_word(suite_dir):
'rg', '-n', r'(?-u:\b)' + pat + r'(?-u:\b)', en, 'rg', '-n', r'(?-u:\b)' + pat + r'(?-u:\b)', en,
]), ]),
Command('ag (ASCII)', ['ag', '-sw', pat, en]), Command('ag (ASCII)', ['ag', '-sw', pat, en]),
Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, en]),
Command('grep (ASCII)', [ Command('grep (ASCII)', [
'grep', '-anw', pat, en, 'grep', '-anw', pat, en,
], env=GREP_ASCII), ], env=GREP_ASCII),
@ -471,7 +406,6 @@ def bench_subtitles_en_alternate(suite_dir):
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
Command('rg (lines)', ['rg', '-n', pat, en]), Command('rg (lines)', ['rg', '-n', pat, en]),
Command('ag (lines)', ['ag', '-s', pat, en]), Command('ag (lines)', ['ag', '-s', pat, en]),
Command('ucg (lines)', ['ucg', '--nosmart-case', pat, en]),
Command('grep (lines)', [ Command('grep (lines)', [
'grep', '-E', '-an', pat, en, 'grep', '-E', '-an', pat, en,
], env=GREP_ASCII), ], env=GREP_ASCII),
@ -498,7 +432,6 @@ def bench_subtitles_en_alternate_casei(suite_dir):
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
Command('ag (ASCII)', ['ag', '-s', '-i', pat, en]), Command('ag (ASCII)', ['ag', '-s', '-i', pat, en]),
Command('ucg (ASCII)', ['ucg', '-i', pat, en]),
Command('grep (ASCII)', [ Command('grep (ASCII)', [
'grep', '-E', '-ani', pat, en, 'grep', '-E', '-ani', pat, en,
], env=GREP_ASCII), ], env=GREP_ASCII),
@ -520,7 +453,6 @@ def bench_subtitles_en_surrounding_words(suite_dir):
Command('grep', ['grep', '-E', '-an', pat, en], env=GREP_UNICODE), Command('grep', ['grep', '-E', '-an', pat, en], env=GREP_UNICODE),
Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, en]), Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, en]),
Command('ag (ASCII)', ['ag', '-s', pat, en]), Command('ag (ASCII)', ['ag', '-s', pat, en]),
Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, en]),
Command('grep (ASCII)', [ Command('grep (ASCII)', [
'grep', '-E', '-an', pat, en, 'grep', '-E', '-an', pat, en,
], env=GREP_ASCII), ], env=GREP_ASCII),
@ -544,7 +476,6 @@ def bench_subtitles_en_no_literal(suite_dir):
Command('rg', ['rg', '-n', pat, en]), Command('rg', ['rg', '-n', pat, en]),
Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, en]), Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, en]),
Command('ag (ASCII)', ['ag', '-s', pat, en]), Command('ag (ASCII)', ['ag', '-s', pat, en]),
Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, en]),
Command('grep (ASCII)', [ Command('grep (ASCII)', [
'grep', '-E', '-an', pat, en, 'grep', '-E', '-an', pat, en,
], env=GREP_ASCII), ], env=GREP_ASCII),
@ -562,14 +493,9 @@ def bench_subtitles_ru_literal(suite_dir):
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', pat, ru]), Command('rg', ['rg', pat, ru]),
Command('rg (no mmap)', ['rg', '--no-mmap', pat, ru]), Command('rg (no mmap)', ['rg', '--no-mmap', pat, ru]),
Command('pt', ['pt', '-N', pat, ru]),
Command('sift', ['sift', pat, ru]),
Command('grep', ['grep', '-a', pat, ru], env=GREP_ASCII), Command('grep', ['grep', '-a', pat, ru], env=GREP_ASCII),
Command('rg (lines)', ['rg', '-n', pat, ru]), Command('rg (lines)', ['rg', '-n', pat, ru]),
Command('ag (lines)', ['ag', '-s', pat, ru]), Command('ag (lines)', ['ag', '-s', pat, ru]),
Command('ucg (lines)', ['ucg', '--nosmart-case', pat, ru]),
Command('pt (lines)', ['pt', pat, ru]),
Command('sift (lines)', ['sift', '-n', pat, ru]),
Command('grep (lines)', ['grep', '-an', pat, ru], env=GREP_ASCII), Command('grep (lines)', ['grep', '-an', pat, ru], env=GREP_ASCII),
]) ])
@ -590,7 +516,6 @@ def bench_subtitles_ru_literal_casei(suite_dir):
], env=GREP_ASCII), ], env=GREP_ASCII),
Command('rg (lines)', ['rg', '-n', '-i', pat, ru]), Command('rg (lines)', ['rg', '-n', '-i', pat, ru]),
Command('ag (lines) (ASCII)', ['ag', '-i', pat, ru]), Command('ag (lines) (ASCII)', ['ag', '-i', pat, ru]),
Command('ucg (lines) (ASCII)', ['ucg', '-i', pat, ru]),
]) ])
@ -607,7 +532,6 @@ def bench_subtitles_ru_literal_word(suite_dir):
'rg', '-n', r'(?-u:\b)' + pat + r'(?-u:\b)', ru, 'rg', '-n', r'(?-u:\b)' + pat + r'(?-u:\b)', ru,
]), ]),
Command('ag (ASCII)', ['ag', '-sw', pat, ru]), Command('ag (ASCII)', ['ag', '-sw', pat, ru]),
Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, ru]),
Command('grep (ASCII)', [ Command('grep (ASCII)', [
'grep', '-anw', pat, ru, 'grep', '-anw', pat, ru,
], env=GREP_ASCII), ], env=GREP_ASCII),
@ -633,7 +557,6 @@ def bench_subtitles_ru_alternate(suite_dir):
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
Command('rg (lines)', ['rg', '-n', pat, ru]), Command('rg (lines)', ['rg', '-n', pat, ru]),
Command('ag (lines)', ['ag', '-s', pat, ru]), Command('ag (lines)', ['ag', '-s', pat, ru]),
Command('ucg (lines)', ['ucg', '--nosmart-case', pat, ru]),
Command('grep (lines)', [ Command('grep (lines)', [
'grep', '-E', '-an', pat, ru, 'grep', '-E', '-an', pat, ru,
], env=GREP_ASCII), ], env=GREP_ASCII),
@ -660,7 +583,6 @@ def bench_subtitles_ru_alternate_casei(suite_dir):
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
Command('ag (ASCII)', ['ag', '-s', '-i', pat, ru]), Command('ag (ASCII)', ['ag', '-s', '-i', pat, ru]),
Command('ucg (ASCII)', ['ucg', '-i', pat, ru]),
Command('grep (ASCII)', [ Command('grep (ASCII)', [
'grep', '-E', '-ani', pat, ru, 'grep', '-E', '-ani', pat, ru,
], env=GREP_ASCII), ], env=GREP_ASCII),
@ -681,7 +603,6 @@ def bench_subtitles_ru_surrounding_words(suite_dir):
Command('rg', ['rg', '-n', pat, ru]), Command('rg', ['rg', '-n', pat, ru]),
Command('grep', ['grep', '-E', '-an', pat, ru], env=GREP_UNICODE), Command('grep', ['grep', '-E', '-an', pat, ru], env=GREP_UNICODE),
Command('ag (ASCII)', ['ag', '-s', pat, ru]), Command('ag (ASCII)', ['ag', '-s', pat, ru]),
Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, ru]),
Command('grep (ASCII)', [ Command('grep (ASCII)', [
'grep', '-E', '-an', pat, ru, 'grep', '-E', '-an', pat, ru,
], env=GREP_ASCII), ], env=GREP_ASCII),
@ -705,7 +626,6 @@ def bench_subtitles_ru_no_literal(suite_dir):
Command('rg', ['rg', '-n', pat, ru]), Command('rg', ['rg', '-n', pat, ru]),
Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, ru]), Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, ru]),
Command('ag (ASCII)', ['ag', '-s', pat, ru]), Command('ag (ASCII)', ['ag', '-s', pat, ru]),
Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, ru]),
Command('grep (ASCII)', [ Command('grep (ASCII)', [
'grep', '-E', '-an', pat, ru, 'grep', '-E', '-an', pat, ru,
], env=GREP_ASCII), ], env=GREP_ASCII),
@ -758,7 +678,7 @@ class Benchmark(object):
def __init__(self, name=None, pattern=None, commands=None, def __init__(self, name=None, pattern=None, commands=None,
warmup_count=1, count=3, line_count=True, warmup_count=1, count=3, line_count=True,
allow_missing_commands=False, allow_missing_commands=False,
disabled_cmds=None): disabled_cmds=None, order=0):
''' '''
Create a single benchmark. Create a single benchmark.
@ -794,6 +714,8 @@ class Benchmark(object):
will simply skip it. will simply skip it.
:param list(str) disabled_cmds: :param list(str) disabled_cmds:
A list of commands to skip. A list of commands to skip.
:param int order:
An integer indicating the sequence number of this benchmark.
''' '''
self.name = name self.name = name
self.pattern = pattern self.pattern = pattern
@ -803,6 +725,7 @@ class Benchmark(object):
self.line_count = line_count self.line_count = line_count
self.allow_missing_commands = allow_missing_commands self.allow_missing_commands = allow_missing_commands
self.disabled_cmds = set(disabled_cmds or []) self.disabled_cmds = set(disabled_cmds or [])
self.order = order
def raise_if_missing(self): def raise_if_missing(self):
''' '''
@ -1165,19 +1088,22 @@ def collect_benchmarks(suite_dir, filter_pat=None,
requires corpora that are missing, then a log message is requires corpora that are missing, then a log message is
emitted to stderr and it is not yielded. emitted to stderr and it is not yielded.
''' '''
for fun in sorted(globals()): benchmarks = []
if not fun.startswith('bench_'): for global_name in globals():
if not global_name.startswith('bench_'):
continue continue
name = re.sub('^bench_', '', fun) name = re.sub('^bench_', '', global_name)
if filter_pat is not None and not re.search(filter_pat, name): if filter_pat is not None and not re.search(filter_pat, name):
continue continue
try: try:
benchmark = globals()[fun](suite_dir) fun = globals()[global_name]
benchmark = fun(suite_dir)
benchmark.name = name benchmark.name = name
benchmark.warmup_count = warmup_iter benchmark.warmup_count = warmup_iter
benchmark.count = bench_iter benchmark.count = bench_iter
benchmark.allow_missing_commands = allow_missing_commands benchmark.allow_missing_commands = allow_missing_commands
benchmark.disabled_cmds = disabled_cmds benchmark.disabled_cmds = disabled_cmds
benchmark.order = fun.__code__.co_firstlineno
benchmark.raise_if_missing() benchmark.raise_if_missing()
except MissingDependencies as e: except MissingDependencies as e:
eprint( eprint(
@ -1192,7 +1118,8 @@ def collect_benchmarks(suite_dir, filter_pat=None,
'(run with --allow-missing to run incomplete benchmarks)' '(run with --allow-missing to run incomplete benchmarks)'
eprint(fmt % (', '.join(e.missing_names), name)) eprint(fmt % (', '.join(e.missing_names), name))
continue continue
yield benchmark benchmarks.append(benchmark)
return sorted(benchmarks, key=lambda b: b.order)
def main(): def main():