Use sqlite3 instead of symlinks for managing repositories

This commit is contained in:
Anthony Sottile 2015-01-18 23:30:19 -08:00
parent 3babceb2ca
commit 56e5c4eb2d
3 changed files with 72 additions and 34 deletions

View file

@ -1,9 +1,11 @@
from __future__ import unicode_literals
import contextlib
import io
import logging
import os
import os.path
import sqlite3
import tempfile
from cached_property import cached_property
@ -12,7 +14,6 @@ from pre_commit.prefixed_command_runner import PrefixedCommandRunner
from pre_commit.util import clean_path_on_failure
from pre_commit.util import cmd_output
from pre_commit.util import cwd
from pre_commit.util import hex_md5
logger = logging.getLogger('pre_commit')
@ -58,11 +59,35 @@ class Store(object):
'Learn more: https://github.com/pre-commit/pre-commit\n'
)
def _write_sqlite_db(self):
# To avoid a race where someone ^Cs between db creation and execution
# of the CREATE TABLE statement
fd, tmpfile = tempfile.mkstemp()
# We'll be managing this file ourselves
os.close(fd)
# sqlite doesn't close its fd with its contextmanager >.<
# contextlib.closing fixes this.
# See: http://stackoverflow.com/a/28032829/812183
with contextlib.closing(sqlite3.connect(tmpfile)) as db:
db.executescript(
'CREATE TABLE repos ('
' repo CHAR(255) NOT NULL,'
' ref CHAR(255) NOT NULL,'
' path CHAR(255) NOT NULL,'
' PRIMARY KEY (repo, ref)'
');'
)
# Atomic file move
os.rename(tmpfile, self.db_path)
def _create(self):
if os.path.exists(self.directory):
if os.path.exists(self.db_path):
return
os.makedirs(self.directory)
self._write_readme()
if not os.path.exists(self.directory):
os.makedirs(self.directory)
self._write_readme()
self._write_sqlite_db()
def require_created(self):
"""Require the pre-commit file store to be created."""
@ -77,9 +102,13 @@ class Store(object):
self.require_created()
# Check if we already exist
sha_path = os.path.join(self.directory, sha + '_' + hex_md5(url))
if os.path.exists(sha_path):
return os.readlink(sha_path)
with sqlite3.connect(self.db_path) as db:
result = db.execute(
'SELECT path FROM repos WHERE repo = ? AND ref = ?',
[url, sha],
).fetchone()
if result:
return result[0]
logger.info('Initializing environment for {0}.'.format(url))
@ -89,8 +118,12 @@ class Store(object):
with cwd(dir):
cmd_output('git', 'checkout', sha)
# Make a symlink from sha->repo
os.symlink(dir, sha_path)
# Update our db with the created repo
with sqlite3.connect(self.db_path) as db:
db.execute(
'INSERT INTO repos (repo, ref, path) VALUES (?, ?, ?)',
[url, sha, dir],
)
return dir
def get_repo_path_getter(self, repo, sha):
@ -99,3 +132,7 @@ class Store(object):
@cached_property
def cmd_runner(self):
return PrefixedCommandRunner(self.directory)
@cached_property
def db_path(self):
return os.path.join(self.directory, 'db.db')

View file

@ -2,7 +2,6 @@ from __future__ import unicode_literals
import contextlib
import functools
import hashlib
import os
import os.path
import shutil
@ -60,14 +59,6 @@ def shell_escape(arg):
return "'" + arg.replace("'", "'\"'\"'".strip()) + "'"
def hex_md5(s):
"""Hexdigest an md5 of the string.
:param text s:
"""
return hashlib.md5(s.encode('utf-8')).hexdigest()
@contextlib.contextmanager
def tarfile_open(*args, **kwargs):
"""Compatibility layer because python2.6"""

View file

@ -5,6 +5,7 @@ import io
import os
import os.path
import shutil
import sqlite3
import mock
import pytest
@ -14,7 +15,6 @@ from pre_commit.store import _get_default_directory
from pre_commit.store import Store
from pre_commit.util import cmd_output
from pre_commit.util import cwd
from pre_commit.util import hex_md5
from testing.fixtures import git_dir
from testing.util import get_head_sha
@ -74,6 +74,7 @@ def test_does_not_recreate_if_directory_already_exists(store):
# Note: we're intentionally leaving out the README file. This is so we can
# know that `Store` didn't call create
os.mkdir(store.directory)
io.open(store.db_path, 'a+').close()
# Call require_created, this should not call create
store.require_created()
assert not os.path.exists(os.path.join(store.directory, 'README'))
@ -101,11 +102,13 @@ def test_clone(store, tmpdir_factory, log_info_mock):
# Should be checked out to the sha we specified
assert get_head_sha(ret) == sha
# Assert that we made a symlink from the sha to the repo
sha_path = os.path.join(store.directory, sha + '_' + hex_md5(path))
assert os.path.exists(sha_path)
assert os.path.islink(sha_path)
assert os.readlink(sha_path) == ret
# Assert there's an entry in the sqlite db for this
with sqlite3.connect(store.db_path) as db:
path, = db.execute(
'SELECT path from repos WHERE repo = ? and ref = ?',
[path, sha],
).fetchone()
assert path == ret
def test_clone_cleans_up_on_checkout_failure(store):
@ -129,15 +132,22 @@ def test_has_cmd_runner_at_directory(store):
def test_clone_when_repo_already_exists(store):
# Create a symlink and directory in the store simulating an already
# created repository.
# Create an entry in the sqlite db that makes it look like the repo has
# been cloned.
store.require_created()
repo_dir_path = os.path.join(store.directory, 'repo_dir')
os.mkdir(repo_dir_path)
os.symlink(
repo_dir_path,
os.path.join(store.directory, 'fake_sha' + '_' + hex_md5('url')),
)
ret = store.clone('url', 'fake_sha')
assert ret == repo_dir_path
with sqlite3.connect(store.db_path) as db:
db.execute(
'INSERT INTO repos (repo, ref, path) '
'VALUES ("fake_repo", "fake_ref", "fake_path")'
)
assert store.clone('fake_repo', 'fake_ref') == 'fake_path'
def test_require_created_when_directory_exists_but_not_db(store):
# In versions <= 0.3.5, there was no sqlite db causing a need for
# backward compatibility
os.makedirs(store.directory)
store.require_created()
assert os.path.exists(store.db_path)