Speed up filename filtering.

Before there was a `getcwd` syscall for every filename which was filtered.

Instead this is now cached per-run.

- When all files are identified by filename only: ~45% improvement
- When no files are identified by filename only: ~55% improvement

This makes little difference to overall execution, the bigger win is
eliminating the `memoize_by_cwd` hack.  Just removing the memoization would
have *increased* the runtime by 300-500%.
This commit is contained in:
Anthony Sottile 2019-01-23 20:42:27 -08:00
parent 38308dc02d
commit b1389603e0
6 changed files with 61 additions and 103 deletions

View file

@ -1,17 +1,14 @@
from __future__ import unicode_literals
import os.path
import random
import pytest
from pre_commit.util import CalledProcessError
from pre_commit.util import clean_path_on_failure
from pre_commit.util import cmd_output
from pre_commit.util import memoize_by_cwd
from pre_commit.util import parse_version
from pre_commit.util import tmpdir
from testing.util import cwd
def test_CalledProcessError_str():
@ -42,37 +39,6 @@ def test_CalledProcessError_str_nooutput():
)
@pytest.fixture
def memoized_by_cwd():
@memoize_by_cwd
def func(arg):
return arg + str(random.getrandbits(64))
return func
def test_memoized_by_cwd_returns_same_twice_in_a_row(memoized_by_cwd):
ret = memoized_by_cwd('baz')
ret2 = memoized_by_cwd('baz')
assert ret is ret2
def test_memoized_by_cwd_returns_different_for_different_args(memoized_by_cwd):
ret = memoized_by_cwd('baz')
ret2 = memoized_by_cwd('bar')
assert ret.startswith('baz')
assert ret2.startswith('bar')
assert ret != ret2
def test_memoized_by_cwd_changes_with_different_cwd(memoized_by_cwd):
ret = memoized_by_cwd('baz')
with cwd('.git'):
ret2 = memoized_by_cwd('baz')
assert ret != ret2
def test_clean_on_failure_noop(in_tmpdir):
with clean_path_on_failure('foo'):
pass