diff --git a/pre_commit/store.py b/pre_commit/store.py index 71e339ae..f004c9be 100644 --- a/pre_commit/store.py +++ b/pre_commit/store.py @@ -1,9 +1,11 @@ from __future__ import unicode_literals +import contextlib import io import logging import os import os.path +import sqlite3 import tempfile from cached_property import cached_property @@ -12,7 +14,6 @@ from pre_commit.prefixed_command_runner import PrefixedCommandRunner from pre_commit.util import clean_path_on_failure from pre_commit.util import cmd_output from pre_commit.util import cwd -from pre_commit.util import hex_md5 logger = logging.getLogger('pre_commit') @@ -58,11 +59,35 @@ class Store(object): 'Learn more: https://github.com/pre-commit/pre-commit\n' ) + def _write_sqlite_db(self): + # To avoid a race where someone ^Cs between db creation and execution + # of the CREATE TABLE statement + fd, tmpfile = tempfile.mkstemp() + # We'll be managing this file ourselves + os.close(fd) + # sqlite doesn't close its fd with its contextmanager >.< + # contextlib.closing fixes this. + # See: http://stackoverflow.com/a/28032829/812183 + with contextlib.closing(sqlite3.connect(tmpfile)) as db: + db.executescript( + 'CREATE TABLE repos (' + ' repo CHAR(255) NOT NULL,' + ' ref CHAR(255) NOT NULL,' + ' path CHAR(255) NOT NULL,' + ' PRIMARY KEY (repo, ref)' + ');' + ) + + # Atomic file move + os.rename(tmpfile, self.db_path) + def _create(self): - if os.path.exists(self.directory): + if os.path.exists(self.db_path): return - os.makedirs(self.directory) - self._write_readme() + if not os.path.exists(self.directory): + os.makedirs(self.directory) + self._write_readme() + self._write_sqlite_db() def require_created(self): """Require the pre-commit file store to be created.""" @@ -77,9 +102,13 @@ class Store(object): self.require_created() # Check if we already exist - sha_path = os.path.join(self.directory, sha + '_' + hex_md5(url)) - if os.path.exists(sha_path): - return os.readlink(sha_path) + with sqlite3.connect(self.db_path) as db: + result = db.execute( + 'SELECT path FROM repos WHERE repo = ? AND ref = ?', + [url, sha], + ).fetchone() + if result: + return result[0] logger.info('Initializing environment for {0}.'.format(url)) @@ -89,8 +118,12 @@ class Store(object): with cwd(dir): cmd_output('git', 'checkout', sha) - # Make a symlink from sha->repo - os.symlink(dir, sha_path) + # Update our db with the created repo + with sqlite3.connect(self.db_path) as db: + db.execute( + 'INSERT INTO repos (repo, ref, path) VALUES (?, ?, ?)', + [url, sha, dir], + ) return dir def get_repo_path_getter(self, repo, sha): @@ -99,3 +132,7 @@ class Store(object): @cached_property def cmd_runner(self): return PrefixedCommandRunner(self.directory) + + @cached_property + def db_path(self): + return os.path.join(self.directory, 'db.db') diff --git a/pre_commit/util.py b/pre_commit/util.py index 1891c067..9f94accc 100644 --- a/pre_commit/util.py +++ b/pre_commit/util.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import contextlib import functools -import hashlib import os import os.path import shutil @@ -60,14 +59,6 @@ def shell_escape(arg): return "'" + arg.replace("'", "'\"'\"'".strip()) + "'" -def hex_md5(s): - """Hexdigest an md5 of the string. - - :param text s: - """ - return hashlib.md5(s.encode('utf-8')).hexdigest() - - @contextlib.contextmanager def tarfile_open(*args, **kwargs): """Compatibility layer because python2.6""" diff --git a/tests/store_test.py b/tests/store_test.py index deac0e1b..04107731 100644 --- a/tests/store_test.py +++ b/tests/store_test.py @@ -5,6 +5,7 @@ import io import os import os.path import shutil +import sqlite3 import mock import pytest @@ -14,7 +15,6 @@ from pre_commit.store import _get_default_directory from pre_commit.store import Store from pre_commit.util import cmd_output from pre_commit.util import cwd -from pre_commit.util import hex_md5 from testing.fixtures import git_dir from testing.util import get_head_sha @@ -74,6 +74,7 @@ def test_does_not_recreate_if_directory_already_exists(store): # Note: we're intentionally leaving out the README file. This is so we can # know that `Store` didn't call create os.mkdir(store.directory) + io.open(store.db_path, 'a+').close() # Call require_created, this should not call create store.require_created() assert not os.path.exists(os.path.join(store.directory, 'README')) @@ -101,11 +102,13 @@ def test_clone(store, tmpdir_factory, log_info_mock): # Should be checked out to the sha we specified assert get_head_sha(ret) == sha - # Assert that we made a symlink from the sha to the repo - sha_path = os.path.join(store.directory, sha + '_' + hex_md5(path)) - assert os.path.exists(sha_path) - assert os.path.islink(sha_path) - assert os.readlink(sha_path) == ret + # Assert there's an entry in the sqlite db for this + with sqlite3.connect(store.db_path) as db: + path, = db.execute( + 'SELECT path from repos WHERE repo = ? and ref = ?', + [path, sha], + ).fetchone() + assert path == ret def test_clone_cleans_up_on_checkout_failure(store): @@ -129,15 +132,22 @@ def test_has_cmd_runner_at_directory(store): def test_clone_when_repo_already_exists(store): - # Create a symlink and directory in the store simulating an already - # created repository. + # Create an entry in the sqlite db that makes it look like the repo has + # been cloned. store.require_created() - repo_dir_path = os.path.join(store.directory, 'repo_dir') - os.mkdir(repo_dir_path) - os.symlink( - repo_dir_path, - os.path.join(store.directory, 'fake_sha' + '_' + hex_md5('url')), - ) - ret = store.clone('url', 'fake_sha') - assert ret == repo_dir_path + with sqlite3.connect(store.db_path) as db: + db.execute( + 'INSERT INTO repos (repo, ref, path) ' + 'VALUES ("fake_repo", "fake_ref", "fake_path")' + ) + + assert store.clone('fake_repo', 'fake_ref') == 'fake_path' + + +def test_require_created_when_directory_exists_but_not_db(store): + # In versions <= 0.3.5, there was no sqlite db causing a need for + # backward compatibility + os.makedirs(store.directory) + store.require_created() + assert os.path.exists(store.db_path)