diff options
-rw-r--r-- | src/pkgdev/mangle.py | 52 | ||||
-rw-r--r-- | src/pkgdev/scripts/pkgdev_commit.py | 52 | ||||
-rw-r--r-- | tests/test_mangle.py | 25 |
3 files changed, 80 insertions, 49 deletions
diff --git a/src/pkgdev/mangle.py b/src/pkgdev/mangle.py index fd4cac5..c957e58 100644 --- a/src/pkgdev/mangle.py +++ b/src/pkgdev/mangle.py @@ -12,7 +12,7 @@ from snakeoil.cli.exceptions import UserException from snakeoil.mappings import OrderedSet copyright_regex = re.compile( - r'^# Copyright (?P<begin>\d{4}-)?(?P<end>\d{4}) (?P<holder>.+)$') + r'^# Copyright (?P<date>(?P<begin>\d{4}-)?(?P<end>\d{4})) (?P<holder>.+)$') def mangle(name): @@ -37,11 +37,11 @@ class Mangler: # mapping of mangling types to functions _mangle_funcs = {} - def __init__(self, paths, skip_regex=None): + def __init__(self, changes, skip_regex=None): self.jobs = os.cpu_count() if skip_regex is not None: - paths = (x for x in paths if not skip_regex.match(x)) - self.paths = OrderedSet(paths) + changes = (c for c in changes if not skip_regex.match(c.full_path)) + self.changes = OrderedSet(changes) # setup for parallelizing the mangling procedure across files self._mp_ctx = multiprocessing.get_context('fork') @@ -58,9 +58,9 @@ class Mangler: lambda f, g: lambda x: f(g(self, x)), self._mangle_funcs.values(), lambda x: x) @mangle('EOF') - def _eof(self, data): + def _eof(self, change): """Drop EOF whitespace and forcibly add EOF newline.""" - return data.rstrip() + '\n' + return change.update(change.data.rstrip() + '\n') def _kill_pipe(self, *args, error=None): """Handle terminating the mangling process group.""" @@ -90,26 +90,20 @@ class Mangler: return path - def _mangle_file(self, path): - """Run composed mangling function across a given file path.""" - try: - with open(path, 'r+', encoding='utf-8') as f: - if orig_data := f.read(): - data = self.composed_func(orig_data) - if data != orig_data: - f.seek(0) - f.truncate() - f.write(data) - return path - except (FileNotFoundError, UnicodeDecodeError): - pass + def _mangle(self, change): + """Run composed mangling function across a given change.""" + if orig_data := change.read(): + change = self.composed_func(change) + if change.data != orig_data: + change.sync() + return change def _run_manglers(self, paths_q): """Consumer that runs mangling functions, queuing mangled paths for output.""" try: - for path in iter(paths_q.get, None): - if mangled_path := self._mangle_file(path): - self._mangled_paths_q.put(mangled_path) + for change in iter(paths_q.get, None): + if mangled_change := self._mangle(change): + self._mangled_paths_q.put(mangled_change.path) except Exception: # pragma: no cover # traceback can't be pickled so serialize it tb = traceback.format_exc() @@ -124,8 +118,8 @@ class Mangler: pool.close() # queue paths for processing - for path in self.paths: - paths_q.put(path) + for change in self.changes: + paths_q.put(change) # notify consumers that no more work exists for i in range(self.jobs): paths_q.put(None) @@ -141,10 +135,12 @@ class GentooMangler(Mangler): _mangle_funcs = Mangler._mangle_funcs.copy() @mangle('copyright') - def _copyright(self, data): + def _copyright(self, change): """Fix copyright headers and dates.""" - lines = data.splitlines() + lines = change.data.splitlines() if mo := copyright_regex.match(lines[0]): - lines[0] = re.sub(mo.group('end'), self._current_year, lines[0]) + # replace entire date range for new files + group = 'date' if change.status == 'A' else 'end' + lines[0] = re.sub(mo.group(group), self._current_year, lines[0]) lines[0] = re.sub('Gentoo Foundation', 'Gentoo Authors', lines[0]) - return '\n'.join(lines) + '\n' + return change.update('\n'.join(lines) + '\n') diff --git a/src/pkgdev/scripts/pkgdev_commit.py b/src/pkgdev/scripts/pkgdev_commit.py index 4a30a51..920555b 100644 --- a/src/pkgdev/scripts/pkgdev_commit.py +++ b/src/pkgdev/scripts/pkgdev_commit.py @@ -490,21 +490,28 @@ class GitChanges(UserDict): if status == 'R' and (om := self._ebuild_re.match(old_path)): old = atom_cls(f"={om.group('category')}/{om.group('package')}") changes[PkgChange].add(PkgChange( - status, path, atom=atom, ebuild=True, old=old)) + self._repo.location, status, path, atom=atom, ebuild=True, old=old)) except MalformedAtom: continue else: # non-ebuild package level changes atom = atom_cls(os.sep.join(path_components[:2])) - changes[PkgChange].add(PkgChange(status, path, atom=atom, ebuild=False)) + changes[PkgChange].add( + PkgChange(self._repo.location, status, path, atom=atom, ebuild=False)) elif mo := self._eclass_re.match(path): - changes[EclassChange].add(EclassChange(status, path, name=mo.group('name'))) + changes[EclassChange].add( + EclassChange(self._repo.location, status, path, name=mo.group('name'))) else: - changes[path_components[0]].add(Change(status, path)) + changes[path_components[0]].add(Change(self._repo.location, status, path)) return changes @jit_attr + def all(self): + """Ordered set of all change objects.""" + return OrderedFrozenSet(chain.from_iterable(self.data.values())) + + @jit_attr def pkg_changes(self): """Ordered set of all package change objects.""" return OrderedFrozenSet(self.data.get(PkgChange, ())) @@ -515,11 +522,6 @@ class GitChanges(UserDict): return OrderedFrozenSet(x for x in self.pkg_changes if x.ebuild) @jit_attr - def paths(self): - """Ordered set of all staged paths.""" - return OrderedFrozenSet(x.path for x in chain.from_iterable(self.data.values())) - - @jit_attr def prefix(self): """Determine commit message prefix using GLEP 66 as a guide. @@ -573,10 +575,38 @@ class GitChanges(UserDict): @dataclass(frozen=True) class Change: """Generic file change.""" + repo: str status: str path: str @property + def full_path(self): + return pjoin(self.repo, self.path) + + def read(self): + """Read data from the change's file.""" + try: + with open(self.full_path, 'r', encoding='utf-8') as f: + data = f.read() + except (FileNotFoundError, UnicodeDecodeError): + data = None + object.__setattr__(self, "data", data) + return data + + def update(self, data): + """Update the change's cached file data.""" + object.__setattr__(self, "data", data) + return self + + def sync(self): + """Write the change's cached file data back to its file.""" + try: + with open(self.full_path, 'w', encoding='utf-8') as f: + f.write(self.data) + except AttributeError: + pass + + @property def prefix(self): if os.sep in self.path: # use change path's parent directory @@ -736,8 +766,8 @@ def _commit(options, out, err): # don't mangle FILESDIR content skip_regex = re.compile(rf'^{repo.location}/[^/]+/[^/]+/files/.+$') mangler = GentooMangler if options.gentoo_repo else Mangler - paths = (pjoin(repo.location, x) for x in changes.paths) - options.git_add_files.extend(mangler(paths, skip_regex=skip_regex)) + files = mangler(changes.all, skip_regex=skip_regex) + options.git_add_files.extend(files) # stage modified files if options.git_add_files: diff --git a/tests/test_mangle.py b/tests/test_mangle.py index 1df646c..651f8b4 100644 --- a/tests/test_mangle.py +++ b/tests/test_mangle.py @@ -2,23 +2,29 @@ import os import multiprocessing import re import signal +from functools import partial from unittest.mock import patch from pkgdev.mangle import Mangler +from pkgdev.scripts.pkgdev_commit import Change import pytest from snakeoil.cli.exceptions import UserException +def fake_change(s): + return Change('/repo', 'A', str(s)) + + class TestMangler: def test_nonexistent_file(self, tmp_path): path = tmp_path / 'nonexistent' - assert list(Mangler([str(path)])) == [] + assert list(Mangler([fake_change(path)])) == [] def test_empty_file(self, tmp_path): path = tmp_path / 'empty' path.touch() - assert list(Mangler([str(path)])) == [] + assert list(Mangler([fake_change(path)])) == [] def test_skipped_file(self, tmp_path): paths = [(tmp_path / x) for x in ('file', 'file.patch')] @@ -27,24 +33,24 @@ class TestMangler: p.write_text('# comment') # skip patch files skip_regex = re.compile(r'.+\.patch$') - mangled_paths = set(Mangler(map(str, paths), skip_regex=skip_regex)) + mangled_paths = set(Mangler(map(fake_change, paths), skip_regex=skip_regex)) assert mangled_paths == {str(tmp_path / 'file')} for p in paths: p.write_text('# comment') # don't skip any files - mangled_paths = set(Mangler(map(str, paths))) + mangled_paths = set(Mangler(map(fake_change, paths))) assert mangled_paths == set(map(str, paths)) def test_nonmangled_file(self, tmp_path): path = tmp_path / 'file' path.write_text('# comment\n') - assert list(Mangler([str(path)])) == [] + assert list(Mangler([fake_change(path)])) == [] def test_mangled_file(self, tmp_path): path = tmp_path / 'file' path.write_text('# comment') - assert list(Mangler([str(path)])) == [str(path)] + assert list(Mangler([fake_change(path)])) == [str(path)] assert path.read_text() == '# comment\n' def test_iterator_exceptions(self, tmp_path): @@ -55,9 +61,9 @@ class TestMangler: def _mangle_func(self, data): raise Exception('func failed') - with patch('pkgdev.mangle.Mangler._mangle_file', _mangle_func): + with patch('pkgdev.mangle.Mangler._mangle', _mangle_func): with pytest.raises(UserException, match='Exception: func failed'): - list(Mangler([str(path)])) + list(Mangler([fake_change(path)])) def test_sigint_handling(self, tmp_path): """Verify SIGINT is properly handled by the parallelized pipeline.""" @@ -68,7 +74,6 @@ class TestMangler: """Mangler run in a separate process that gets interrupted.""" import sys import time - from functools import partial from unittest.mock import patch from pkgdev.mangle import Mangler @@ -81,7 +86,7 @@ class TestMangler: with patch('pkgdev.mangle.Mangler.__iter__') as fake_iter: fake_iter.side_effect = partial(sleep) try: - iter(Mangler([str(path)])) + iter(Mangler([fake_change(path)])) except KeyboardInterrupt: queue.put(None) sys.exit(0) |