Speed up filename filtering.

Before there was a `getcwd` syscall for every filename which was filtered.

Instead this is now cached per-run.

- When all files are identified by filename only: ~45% improvement
- When no files are identified by filename only: ~55% improvement

This makes little difference to overall execution, the bigger win is
eliminating the `memoize_by_cwd` hack.  Just removing the memoization would
have *increased* the runtime by 300-500%.
This commit is contained in:
Anthony Sottile 2019-01-23 20:42:27 -08:00
parent 38308dc02d
commit b1389603e0
6 changed files with 61 additions and 103 deletions

View file

@ -2,7 +2,6 @@ from __future__ import unicode_literals
import contextlib
import errno
import functools
import os.path
import shutil
import stat
@ -31,23 +30,6 @@ def mkdirp(path):
raise
def memoize_by_cwd(func):
"""Memoize a function call based on os.getcwd()."""
@functools.wraps(func)
def wrapper(*args):
cwd = os.getcwd()
key = (cwd,) + args
try:
return wrapper._cache[key]
except KeyError:
ret = wrapper._cache[key] = func(*args)
return ret
wrapper._cache = {}
return wrapper
@contextlib.contextmanager
def clean_path_on_failure(path):
"""Cleans up the directory on an exceptional failure."""