pre-commit/tests/file_classifier/classifier_test.py
2016-01-20 15:16:13 -08:00

143 lines
4.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# encoding: utf-8
from __future__ import unicode_literals
from contextlib import contextmanager
import mock
import pytest
from pre_commit.file_classifier.classifier import _file_is_binary
from pre_commit.file_classifier.classifier import _guess_types_from_extension
from pre_commit.file_classifier.classifier import _guess_types_from_shebang
from pre_commit.file_classifier.classifier import _read_interpreter_from_shebang # noqa
from pre_commit.file_classifier.classifier import classify
from pre_commit.git import GIT_MODE_EXECUTABLE
from pre_commit.git import GIT_MODE_FILE
from pre_commit.git import GIT_MODE_SUBMODULE
from pre_commit.git import GIT_MODE_SYMLINK
@contextmanager
def mock_open(read_data):
# mock_open doesn't support reading binary data :\
# https://bugs.python.org/issue23004
with mock.patch('io.open') as m:
mock_read = m.return_value.__enter__().read
mock_read.return_value = read_data
yield m
@pytest.mark.parametrize('path,data,mode,expected', [
(
'test.py',
b'def main():\n pass\n',
GIT_MODE_FILE,
['file', 'text', 'python', 'nonexecutable'],
),
(
'Makefile',
b'test:\n\ttac /etc/passwd\n',
GIT_MODE_FILE,
['file', 'text', 'make', 'nonexecutable'],
),
(
'delete-everything',
b'#!/bin/bash\nrm -rf /\n',
GIT_MODE_EXECUTABLE,
['file', 'text', 'shell', 'executable'],
),
(
'bin/bash',
b'\x7f\x45\x4c\x46\x02\x01\x01',
GIT_MODE_EXECUTABLE,
['file', 'binary', 'executable'],
),
(
'modules/apache2',
None,
GIT_MODE_SUBMODULE,
['submodule'],
),
(
'some/secret',
None,
GIT_MODE_SYMLINK,
['symlink'],
),
])
def test_classify(path, data, mode, expected):
with mock_open(data):
assert set(classify(path, mode)) == set(expected)
def test_classify_invalid():
# should raise ValueError if given a mode that it doesn't know about
with pytest.raises(ValueError):
classify('some_path', 9999)
@pytest.mark.parametrize('path,expected', [
('/hello/foo.py', ['python']),
('a/b/c/d/e.rb', ['ruby']),
('derp.sh', ['shell']),
('derp.tmpl.sh', ['shell']),
('', []),
('derpsh', []),
('\x7f\x45\x4c\x46\x02\x01\x01\x00\x00', []),
])
def test_guess_types_from_extension(path, expected):
assert set(_guess_types_from_extension(path)) == set(expected)
@pytest.mark.parametrize('data,expected', [
(b'#!/usr/bin/env python3\nasdf', ['python']),
(b'#!/usr/bin/env /usr/bin/python2.7\nasdf', ['python']),
(b'#!/bin/bash -euxm', ['shell']),
(b'#!/bin/sh -euxm', ['shell']),
(b'', []),
(b'\x7f\x45\x4c\x46\x02\x01\x01\x00\x00', []),
])
def test_guess_types_from_shebang(data, expected):
with mock_open(data):
assert set(_guess_types_from_shebang('/etc/passwd')) == set(expected)
@pytest.mark.parametrize('data,expected', [
(b'#!/usr/bin/env python3\nasdf', 'python3'),
(b'#!/bin/bash -euxm', 'bash'),
(b'#!/bin/bash -e -u -x -m', 'bash'),
(b'#! /usr/bin/python ', 'python'),
(b'what is this', None),
(b'', None),
(b'#!\n/usr/bin/python', None),
(b'\n#!/usr/bin/python', None),
('#!/usr/bin/énv python3\nasdf'.encode('utf8'), None),
(b'#! ', None),
(b'\x7f\x45\x4c\x46\x02\x01\x01\x00\x00', None),
(b'#!\x7f\x45\x4c\x46\x02\x01\x01\x00\x00', None),
])
def test_read_interpreter_from_shebang(data, expected):
with mock_open(data) as m:
assert _read_interpreter_from_shebang('/etc/passwd') == expected
m.assert_called_once_with('/etc/passwd', 'rb')
@pytest.mark.parametrize('data,expected', [
(b'hello world', False),
(b'', False),
('éóñəå ⊂(◉‿◉)つ(ノ≥∇≤)'.encode('utf8'), False),
('¯\_(ツ)_/¯'.encode('utf8'), False),
('♪┏(・o・)┛♪┗ ( ・o・) ┓♪┏ ( ) ┛♪┗ (・o・ ) ┓♪┏(・o・)┛♪'.encode('utf8'), False),
('éóñå'.encode('latin1'), False),
(b'hello world\x00', True),
(b'\x7f\x45\x4c\x46\x02\x01\x01', True), # first few bytes of /bin/bash
(b'\x43\x92\xd9\x0f\xaf\x32\x2c', True), # some /dev/urandom output
])
def test_file_is_binary(data, expected):
with mock_open(data) as m:
assert _file_is_binary('/etc/passwd') is expected
m.assert_called_once_with('/etc/passwd', 'rb')