benchsuite: add ugrep commands to benchmarks

This commit is contained in:
Andrew Gallant 2020-10-14 17:00:35 -04:00
parent c55e7af675
commit de0c24f31c
No known key found for this signature in database
GPG Key ID: B2E3A4923F8B0D44

View File

@ -57,8 +57,10 @@ def bench_linux_literal_default(suite_dir):
Benchmark the speed of a literal using *default* settings. Benchmark the speed of a literal using *default* settings.
This is a purposefully unfair benchmark for use in performance This is a purposefully unfair benchmark for use in performance
analysis, but it is pedagogically useful to demonstrate how analysis, but it is pedagogically useful to demonstrate how default
default behaviors differ. behaviors differ. For example, ugrep and grep don't do any smart
filtering by default, so they will invariably search more files
than ripgrep, ag or git grep.
''' '''
require(suite_dir, 'linux') require(suite_dir, 'linux')
cwd = path.join(suite_dir, LINUX_DIR) cwd = path.join(suite_dir, LINUX_DIR)
@ -73,7 +75,9 @@ def bench_linux_literal_default(suite_dir):
mkcmd('ag', ['ag', pat]), mkcmd('ag', ['ag', pat]),
# I guess setting LC_ALL=en_US.UTF-8 probably isn't necessarily the # I guess setting LC_ALL=en_US.UTF-8 probably isn't necessarily the
# default, but I'd guess it to be on most desktop systems. # default, but I'd guess it to be on most desktop systems.
mkcmd('git grep', ['git', 'grep', pat], env={'LC_ALL': 'en_US.UTF-8'}), mkcmd('git grep', ['git', 'grep', pat], env=GREP_UNICODE),
mkcmd('ugrep', ['ugrep', '-r', pat, './']),
mkcmd('grep', ['grep', '-r', pat, './'], env=GREP_UNICODE),
]) ])
@ -101,6 +105,10 @@ def bench_linux_literal(suite_dir):
mkcmd('git grep', [ mkcmd('git grep', [
'git', 'grep', '-I', '-n', pat, 'git', 'grep', '-I', '-n', pat,
], env={'LC_ALL': 'C'}), ], env={'LC_ALL': 'C'}),
mkcmd('ugrep', [
'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
'-n', pat, './',
])
]) ])
@ -130,6 +138,10 @@ def bench_linux_literal_casei(suite_dir):
mkcmd('git grep', [ mkcmd('git grep', [
'git', 'grep', '-I', '-n', '-i', pat, 'git', 'grep', '-I', '-n', '-i', pat,
], env={'LC_ALL': 'C'}), ], env={'LC_ALL': 'C'}),
mkcmd('ugrep', [
'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
'-n', '-i', pat, './',
])
]) ])
@ -153,6 +165,10 @@ def bench_linux_re_literal_suffix(suite_dir):
['git', 'grep', '-E', '-I', '-n', pat], ['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'}, env={'LC_ALL': 'C'},
), ),
mkcmd('ugrep', [
'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
'-n', pat, './',
])
]) ])
@ -176,6 +192,10 @@ def bench_linux_word(suite_dir):
['git', 'grep', '-E', '-I', '-n', '-w', pat], ['git', 'grep', '-E', '-I', '-n', '-w', pat],
env={'LC_ALL': 'C'}, env={'LC_ALL': 'C'},
), ),
mkcmd('ugrep', [
'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
'-n', '-w', pat, './',
])
]) ])
@ -193,6 +213,10 @@ def bench_linux_unicode_greek(suite_dir):
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', pat]), mkcmd('rg', ['rg', '-n', pat]),
mkcmd('ugrep', [
'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
'-n', pat, './',
])
]) ])
@ -212,6 +236,10 @@ def bench_linux_unicode_greek_casei(suite_dir):
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', '-i', pat]), mkcmd('rg', ['rg', '-n', '-i', pat]),
mkcmd('ugrep', [
'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
'-n', '-i', pat, './',
])
]) ])
@ -245,6 +273,14 @@ def bench_linux_unicode_word(suite_dir):
['git', 'grep', '-E', '-I', '-n', pat], ['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'}, env={'LC_ALL': 'C'},
), ),
mkcmd('ugrep', [
'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
'-n', pat, './',
]),
mkcmd('ugrep (ASCII)', [
'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
'-n', '-U', pat, './',
]),
]) ])
@ -279,6 +315,14 @@ def bench_linux_no_literal(suite_dir):
['git', 'grep', '-E', '-I', '-n', pat], ['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'}, env={'LC_ALL': 'C'},
), ),
mkcmd('ugrep', [
'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
'-n', pat, './',
]),
mkcmd('ugrep (ASCII)', [
'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
'-n', '-U', pat, './',
]),
]) ])
@ -307,6 +351,10 @@ def bench_linux_alternates(suite_dir):
['git', 'grep', '-E', '-I', '-n', pat], ['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'}, env={'LC_ALL': 'C'},
), ),
mkcmd('ugrep', [
'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
'-n', pat, './',
])
]) ])
@ -328,6 +376,10 @@ def bench_linux_alternates_casei(suite_dir):
['git', 'grep', '-E', '-I', '-n', '-i', pat], ['git', 'grep', '-E', '-I', '-n', '-i', pat],
env={'LC_ALL': 'C'}, env={'LC_ALL': 'C'},
), ),
mkcmd('ugrep', [
'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
'-n', '-i', pat, './',
])
]) ])
@ -346,6 +398,7 @@ def bench_subtitles_en_literal(suite_dir):
Command('rg (lines)', ['rg', '-n', pat, en]), Command('rg (lines)', ['rg', '-n', pat, en]),
Command('ag (lines)', ['ag', '-s', pat, en]), Command('ag (lines)', ['ag', '-s', pat, en]),
Command('grep (lines)', ['grep', '-n', pat, en], env=GREP_ASCII), Command('grep (lines)', ['grep', '-n', pat, en], env=GREP_ASCII),
Command('ugrep (lines)', ['ugrep', '-n', pat, en])
]) ])
@ -363,6 +416,7 @@ def bench_subtitles_en_literal_casei(suite_dir):
Command('grep (ASCII)', ['grep', '-E', '-i', pat, en], env=GREP_ASCII), Command('grep (ASCII)', ['grep', '-E', '-i', pat, en], env=GREP_ASCII),
Command('rg (lines)', ['rg', '-n', '-i', pat, en]), Command('rg (lines)', ['rg', '-n', '-i', pat, en]),
Command('ag (lines) (ASCII)', ['ag', '-i', pat, en]), Command('ag (lines) (ASCII)', ['ag', '-i', pat, en]),
Command('ugrep (lines)', ['ugrep', '-n', '-i', pat, en])
]) ])
@ -380,6 +434,7 @@ def bench_subtitles_en_literal_word(suite_dir):
]), ]),
Command('ag (ASCII)', ['ag', '-sw', pat, en]), Command('ag (ASCII)', ['ag', '-sw', pat, en]),
Command('grep (ASCII)', ['grep', '-nw', pat, en], env=GREP_ASCII), Command('grep (ASCII)', ['grep', '-nw', pat, en], env=GREP_ASCII),
Command('ugrep (ASCII)', ['ugrep', '-nw', pat, en]),
Command('rg', ['rg', '-nw', pat, en]), Command('rg', ['rg', '-nw', pat, en]),
Command('grep', ['grep', '-nw', pat, en], env=GREP_UNICODE), Command('grep', ['grep', '-nw', pat, en], env=GREP_UNICODE),
]) ])
@ -403,6 +458,7 @@ def bench_subtitles_en_alternate(suite_dir):
Command('rg (lines)', ['rg', '-n', pat, en]), Command('rg (lines)', ['rg', '-n', pat, en]),
Command('ag (lines)', ['ag', '-s', pat, en]), Command('ag (lines)', ['ag', '-s', pat, en]),
Command('grep (lines)', ['grep', '-E', '-n', pat, en], env=GREP_ASCII), Command('grep (lines)', ['grep', '-E', '-n', pat, en], env=GREP_ASCII),
Command('ugrep (lines)', ['ugrep', '-n', pat, en]),
Command('rg', ['rg', pat, en]), Command('rg', ['rg', pat, en]),
Command('grep', ['grep', '-E', pat, en], env=GREP_ASCII), Command('grep', ['grep', '-E', pat, en], env=GREP_ASCII),
]) ])
@ -427,6 +483,7 @@ def bench_subtitles_en_alternate_casei(suite_dir):
Command('grep (ASCII)', [ Command('grep (ASCII)', [
'grep', '-E', '-ni', pat, en, 'grep', '-E', '-ni', pat, en,
], env=GREP_ASCII), ], env=GREP_ASCII),
Command('ugrep (ASCII)', ['ugrep', '-n', '-i', pat, en]),
Command('rg', ['rg', '-n', '-i', pat, en]), Command('rg', ['rg', '-n', '-i', pat, en]),
Command('grep', ['grep', '-E', '-ni', pat, en], env=GREP_UNICODE), Command('grep', ['grep', '-E', '-ni', pat, en], env=GREP_UNICODE),
]) ])
@ -443,9 +500,11 @@ def bench_subtitles_en_surrounding_words(suite_dir):
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', '-n', pat, en]), Command('rg', ['rg', '-n', pat, en]),
Command('grep', ['grep', '-E', '-n', pat, en], env=GREP_UNICODE), Command('grep', ['grep', '-E', '-n', pat, en], env=GREP_UNICODE),
Command('ugrep', ['ugrep', '-n', pat, en]),
Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, en]), Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, en]),
Command('ag (ASCII)', ['ag', '-s', pat, en]), Command('ag (ASCII)', ['ag', '-s', pat, en]),
Command('grep (ASCII)', ['grep', '-E', '-n', pat, en], env=GREP_ASCII), Command('grep (ASCII)', ['grep', '-E', '-n', pat, en], env=GREP_ASCII),
Command('ugrep (ASCII)', ['ugrep', '-n', '-U', pat, en])
]) ])
@ -464,9 +523,11 @@ def bench_subtitles_en_no_literal(suite_dir):
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', '-n', pat, en]), Command('rg', ['rg', '-n', pat, en]),
Command('ugrep', ['ugrep', '-n', pat, en]),
Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, en]), Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, en]),
Command('ag (ASCII)', ['ag', '-s', pat, en]), Command('ag (ASCII)', ['ag', '-s', pat, en]),
Command('grep (ASCII)', ['grep', '-E', '-n', pat, en], env=GREP_ASCII), Command('grep (ASCII)', ['grep', '-E', '-n', pat, en], env=GREP_ASCII),
Command('ugrep (ASCII)', ['ugrep', '-n', '-U', pat, en])
]) ])
@ -485,6 +546,7 @@ def bench_subtitles_ru_literal(suite_dir):
Command('rg (lines)', ['rg', '-n', pat, ru]), Command('rg (lines)', ['rg', '-n', pat, ru]),
Command('ag (lines)', ['ag', '-s', pat, ru]), Command('ag (lines)', ['ag', '-s', pat, ru]),
Command('grep (lines)', ['grep', '-n', pat, ru], env=GREP_ASCII), Command('grep (lines)', ['grep', '-n', pat, ru], env=GREP_ASCII),
Command('ugrep (lines)', ['ugrep', '-n', pat, ru])
]) ])
@ -502,6 +564,7 @@ def bench_subtitles_ru_literal_casei(suite_dir):
Command('grep (ASCII)', ['grep', '-E', '-i', pat, ru], env=GREP_ASCII), Command('grep (ASCII)', ['grep', '-E', '-i', pat, ru], env=GREP_ASCII),
Command('rg (lines)', ['rg', '-n', '-i', pat, ru]), Command('rg (lines)', ['rg', '-n', '-i', pat, ru]),
Command('ag (lines) (ASCII)', ['ag', '-i', pat, ru]), Command('ag (lines) (ASCII)', ['ag', '-i', pat, ru]),
Command('ugrep (lines) (ASCII)', ['ugrep', '-n', '-i', pat, ru])
]) ])
@ -515,12 +578,17 @@ def bench_subtitles_ru_literal_word(suite_dir):
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
Command('rg (ASCII)', [ Command('rg (ASCII)', [
'rg', '-n', r'(?-u:\b)' + pat + r'(?-u:\b)', ru, # You might think we'd use \b here for word boundaries, but both
# GNU grep and ripgrep implement -w with the formulation below.
# Since we can't use Unicode in a pattern and disable Unicode word
# boundaries, we just hand-jam this ourselves.
'rg', '-n', r'(?-u:^|\W)' + pat + r'(?-u:$|\W)', ru,
]), ]),
Command('ag (ASCII)', ['ag', '-sw', pat, ru]), Command('ag (ASCII)', ['ag', '-sw', pat, ru]),
Command('grep (ASCII)', [ Command('grep (ASCII)', [
'grep', '-nw', pat, ru, 'grep', '-nw', pat, ru,
], env=GREP_ASCII), ], env=GREP_ASCII),
Command('ugrep (ASCII)', ['ugrep', '-nw', pat, ru]),
Command('rg', ['rg', '-nw', pat, ru]), Command('rg', ['rg', '-nw', pat, ru]),
Command('grep', ['grep', '-nw', pat, ru], env=GREP_UNICODE), Command('grep', ['grep', '-nw', pat, ru], env=GREP_UNICODE),
]) ])
@ -544,6 +612,7 @@ def bench_subtitles_ru_alternate(suite_dir):
Command('rg (lines)', ['rg', '-n', pat, ru]), Command('rg (lines)', ['rg', '-n', pat, ru]),
Command('ag (lines)', ['ag', '-s', pat, ru]), Command('ag (lines)', ['ag', '-s', pat, ru]),
Command('grep (lines)', ['grep', '-E', '-n', pat, ru], env=GREP_ASCII), Command('grep (lines)', ['grep', '-E', '-n', pat, ru], env=GREP_ASCII),
Command('ugrep (lines)', ['ugrep', '-n', pat, ru]),
Command('rg', ['rg', pat, ru]), Command('rg', ['rg', pat, ru]),
Command('grep', ['grep', '-E', pat, ru], env=GREP_ASCII), Command('grep', ['grep', '-E', pat, ru], env=GREP_ASCII),
]) ])
@ -568,6 +637,7 @@ def bench_subtitles_ru_alternate_casei(suite_dir):
Command('grep (ASCII)', [ Command('grep (ASCII)', [
'grep', '-E', '-ni', pat, ru, 'grep', '-E', '-ni', pat, ru,
], env=GREP_ASCII), ], env=GREP_ASCII),
Command('ugrep (ASCII)', ['ugrep', '-n', '-i', pat, ru]),
Command('rg', ['rg', '-n', '-i', pat, ru]), Command('rg', ['rg', '-n', '-i', pat, ru]),
Command('grep', ['grep', '-E', '-ni', pat, ru], env=GREP_UNICODE), Command('grep', ['grep', '-E', '-ni', pat, ru], env=GREP_UNICODE),
]) ])
@ -584,8 +654,10 @@ def bench_subtitles_ru_surrounding_words(suite_dir):
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', '-n', pat, ru]), Command('rg', ['rg', '-n', pat, ru]),
Command('grep', ['grep', '-E', '-n', pat, ru], env=GREP_UNICODE), Command('grep', ['grep', '-E', '-n', pat, ru], env=GREP_UNICODE),
Command('ugrep', ['ugrep', '-n', pat, ru]),
Command('ag (ASCII)', ['ag', '-s', pat, ru]), Command('ag (ASCII)', ['ag', '-s', pat, ru]),
Command('grep (ASCII)', ['grep', '-E', '-n', pat, ru], env=GREP_ASCII), Command('grep (ASCII)', ['grep', '-E', '-n', pat, ru], env=GREP_ASCII),
Command('ugrep (ASCII)', ['ugrep', '-n', '-U', pat, ru]),
]) ])
@ -604,9 +676,11 @@ def bench_subtitles_ru_no_literal(suite_dir):
return Benchmark(pattern=pat, commands=[ return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', '-n', pat, ru]), Command('rg', ['rg', '-n', pat, ru]),
Command('ugrep', ['ugrep', '-n', pat, ru]),
Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, ru]), Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, ru]),
Command('ag (ASCII)', ['ag', '-s', pat, ru]), Command('ag (ASCII)', ['ag', '-s', pat, ru]),
Command('grep (ASCII)', ['grep', '-E', '-n', pat, ru], env=GREP_ASCII), Command('grep (ASCII)', ['grep', '-E', '-n', pat, ru], env=GREP_ASCII),
Command('ugrep (ASCII)', ['ugrep', '-n', '-U', pat, ru])
]) ])