diff options
author | Tim Harder <radhermit@gmail.com> | 2021-07-28 23:39:28 -0600 |
---|---|---|
committer | Tim Harder <radhermit@gmail.com> | 2021-07-28 23:42:31 -0600 |
commit | 57b8fafce24ae1959705eca04248153d654c9ba8 (patch) | |
tree | 9cfb77e98229a72d1edd9959d3bae86fd70be783 | |
parent | pkgdev commit: use filtered pkg list for manifest file targets (diff) | |
download | pkgdev-57b8fafce24ae1959705eca04248153d654c9ba8.tar.gz pkgdev-57b8fafce24ae1959705eca04248153d654c9ba8.tar.bz2 pkgdev-57b8fafce24ae1959705eca04248153d654c9ba8.zip |
pkgdev commit: run mangling across change objects instead of raw paths
This allows for inspecting change attributes to alter potential
mangling. For example, now the entire date range in the copyright header
is replaced for new files instead of keeping the original start date
from the old files they were based on.
-rw-r--r-- | src/pkgdev/mangle.py | 52 | ||||
-rw-r--r-- | src/pkgdev/scripts/pkgdev_commit.py | 52 | ||||
-rw-r--r-- | tests/test_mangle.py | 25 |
3 files changed, 80 insertions, 49 deletions
diff --git a/src/pkgdev/mangle.py b/src/pkgdev/mangle.py index fd4cac5..c957e58 100644 --- a/src/pkgdev/mangle.py +++ b/src/pkgdev/mangle.py @@ -12,7 +12,7 @@ from snakeoil.cli.exceptions import UserException from snakeoil.mappings import OrderedSet copyright_regex = re.compile( - r'^# Copyright (?P<begin>\d{4}-)?(?P<end>\d{4}) (?P<holder>.+)$') + r'^# Copyright (?P<date>(?P<begin>\d{4}-)?(?P<end>\d{4})) (?P<holder>.+)$') def mangle(name): @@ -37,11 +37,11 @@ class Mangler: # mapping of mangling types to functions _mangle_funcs = {} - def __init__(self, paths, skip_regex=None): + def __init__(self, changes, skip_regex=None): self.jobs = os.cpu_count() if skip_regex is not None: - paths = (x for x in paths if not skip_regex.match(x)) - self.paths = OrderedSet(paths) + changes = (c for c in changes if not skip_regex.match(c.full_path)) + self.changes = OrderedSet(changes) # setup for parallelizing the mangling procedure across files self._mp_ctx = multiprocessing.get_context('fork') @@ -58,9 +58,9 @@ class Mangler: lambda f, g: lambda x: f(g(self, x)), self._mangle_funcs.values(), lambda x: x) @mangle('EOF') - def _eof(self, data): + def _eof(self, change): """Drop EOF whitespace and forcibly add EOF newline.""" - return data.rstrip() + '\n' + return change.update(change.data.rstrip() + '\n') def _kill_pipe(self, *args, error=None): """Handle terminating the mangling process group.""" @@ -90,26 +90,20 @@ class Mangler: return path - def _mangle_file(self, path): - """Run composed mangling function across a given file path.""" - try: - with open(path, 'r+', encoding='utf-8') as f: - if orig_data := f.read(): - data = self.composed_func(orig_data) - if data != orig_data: - f.seek(0) - f.truncate() - f.write(data) - return path - except (FileNotFoundError, UnicodeDecodeError): - pass + def _mangle(self, change): + """Run composed mangling function across a given change.""" + if orig_data := change.read(): + change = self.composed_func(change) + if change.data != orig_data: + change.sync() + return change def _run_manglers(self, paths_q): """Consumer that runs mangling functions, queuing mangled paths for output.""" try: - for path in iter(paths_q.get, None): - if mangled_path := self._mangle_file(path): - self._mangled_paths_q.put(mangled_path) + for change in iter(paths_q.get, None): + if mangled_change := self._mangle(change): + self._mangled_paths_q.put(mangled_change.path) except Exception: # pragma: no cover # traceback can't be pickled so serialize it tb = traceback.format_exc() @@ -124,8 +118,8 @@ class Mangler: pool.close() # queue paths for processing - for path in self.paths: - paths_q.put(path) + for change in self.changes: + paths_q.put(change) # notify consumers that no more work exists for i in range(self.jobs): paths_q.put(None) @@ -141,10 +135,12 @@ class GentooMangler(Mangler): _mangle_funcs = Mangler._mangle_funcs.copy() @mangle('copyright') - def _copyright(self, data): + def _copyright(self, change): """Fix copyright headers and dates.""" - lines = data.splitlines() + lines = change.data.splitlines() if mo := copyright_regex.match(lines[0]): - lines[0] = re.sub(mo.group('end'), self._current_year, lines[0]) + # replace entire date range for new files + group = 'date' if change.status == 'A' else 'end' + lines[0] = re.sub(mo.group(group), self._current_year, lines[0]) lines[0] = re.sub('Gentoo Foundation', 'Gentoo Authors', lines[0]) - return '\n'.join(lines) + '\n' + return change.update('\n'.join(lines) + '\n') diff --git a/src/pkgdev/scripts/pkgdev_commit.py b/src/pkgdev/scripts/pkgdev_commit.py index 4a30a51..920555b 100644 --- a/src/pkgdev/scripts/pkgdev_commit.py +++ b/src/pkgdev/scripts/pkgdev_commit.py @@ -490,21 +490,28 @@ class GitChanges(UserDict): if status == 'R' and (om := self._ebuild_re.match(old_path)): old = atom_cls(f"={om.group('category')}/{om.group('package')}") changes[PkgChange].add(PkgChange( - status, path, atom=atom, ebuild=True, old=old)) + self._repo.location, status, path, atom=atom, ebuild=True, old=old)) except MalformedAtom: continue else: # non-ebuild package level changes atom = atom_cls(os.sep.join(path_components[:2])) - changes[PkgChange].add(PkgChange(status, path, atom=atom, ebuild=False)) + changes[PkgChange].add( + PkgChange(self._repo.location, status, path, atom=atom, ebuild=False)) elif mo := self._eclass_re.match(path): - changes[EclassChange].add(EclassChange(status, path, name=mo.group('name'))) + changes[EclassChange].add( + EclassChange(self._repo.location, status, path, name=mo.group('name'))) else: - changes[path_components[0]].add(Change(status, path)) + changes[path_components[0]].add(Change(self._repo.location, status, path)) return changes @jit_attr + def all(self): + """Ordered set of all change objects.""" + return OrderedFrozenSet(chain.from_iterable(self.data.values())) + + @jit_attr def pkg_changes(self): """Ordered set of all package change objects.""" return OrderedFrozenSet(self.data.get(PkgChange, ())) @@ -515,11 +522,6 @@ class GitChanges(UserDict): return OrderedFrozenSet(x for x in self.pkg_changes if x.ebuild) @jit_attr - def paths(self): - """Ordered set of all staged paths.""" - return OrderedFrozenSet(x.path for x in chain.from_iterable(self.data.values())) - - @jit_attr def prefix(self): """Determine commit message prefix using GLEP 66 as a guide. @@ -573,10 +575,38 @@ class GitChanges(UserDict): @dataclass(frozen=True) class Change: """Generic file change.""" + repo: str status: str path: str @property + def full_path(self): + return pjoin(self.repo, self.path) + + def read(self): + """Read data from the change's file.""" + try: + with open(self.full_path, 'r', encoding='utf-8') as f: + data = f.read() + except (FileNotFoundError, UnicodeDecodeError): + data = None + object.__setattr__(self, "data", data) + return data + + def update(self, data): + """Update the change's cached file data.""" + object.__setattr__(self, "data", data) + return self + + def sync(self): + """Write the change's cached file data back to its file.""" + try: + with open(self.full_path, 'w', encoding='utf-8') as f: + f.write(self.data) + except AttributeError: + pass + + @property def prefix(self): if os.sep in self.path: # use change path's parent directory @@ -736,8 +766,8 @@ def _commit(options, out, err): # don't mangle FILESDIR content skip_regex = re.compile(rf'^{repo.location}/[^/]+/[^/]+/files/.+$') mangler = GentooMangler if options.gentoo_repo else Mangler - paths = (pjoin(repo.location, x) for x in changes.paths) - options.git_add_files.extend(mangler(paths, skip_regex=skip_regex)) + files = mangler(changes.all, skip_regex=skip_regex) + options.git_add_files.extend(files) # stage modified files if options.git_add_files: diff --git a/tests/test_mangle.py b/tests/test_mangle.py index 1df646c..651f8b4 100644 --- a/tests/test_mangle.py +++ b/tests/test_mangle.py @@ -2,23 +2,29 @@ import os import multiprocessing import re import signal +from functools import partial from unittest.mock import patch from pkgdev.mangle import Mangler +from pkgdev.scripts.pkgdev_commit import Change import pytest from snakeoil.cli.exceptions import UserException +def fake_change(s): + return Change('/repo', 'A', str(s)) + + class TestMangler: def test_nonexistent_file(self, tmp_path): path = tmp_path / 'nonexistent' - assert list(Mangler([str(path)])) == [] + assert list(Mangler([fake_change(path)])) == [] def test_empty_file(self, tmp_path): path = tmp_path / 'empty' path.touch() - assert list(Mangler([str(path)])) == [] + assert list(Mangler([fake_change(path)])) == [] def test_skipped_file(self, tmp_path): paths = [(tmp_path / x) for x in ('file', 'file.patch')] @@ -27,24 +33,24 @@ class TestMangler: p.write_text('# comment') # skip patch files skip_regex = re.compile(r'.+\.patch$') - mangled_paths = set(Mangler(map(str, paths), skip_regex=skip_regex)) + mangled_paths = set(Mangler(map(fake_change, paths), skip_regex=skip_regex)) assert mangled_paths == {str(tmp_path / 'file')} for p in paths: p.write_text('# comment') # don't skip any files - mangled_paths = set(Mangler(map(str, paths))) + mangled_paths = set(Mangler(map(fake_change, paths))) assert mangled_paths == set(map(str, paths)) def test_nonmangled_file(self, tmp_path): path = tmp_path / 'file' path.write_text('# comment\n') - assert list(Mangler([str(path)])) == [] + assert list(Mangler([fake_change(path)])) == [] def test_mangled_file(self, tmp_path): path = tmp_path / 'file' path.write_text('# comment') - assert list(Mangler([str(path)])) == [str(path)] + assert list(Mangler([fake_change(path)])) == [str(path)] assert path.read_text() == '# comment\n' def test_iterator_exceptions(self, tmp_path): @@ -55,9 +61,9 @@ class TestMangler: def _mangle_func(self, data): raise Exception('func failed') - with patch('pkgdev.mangle.Mangler._mangle_file', _mangle_func): + with patch('pkgdev.mangle.Mangler._mangle', _mangle_func): with pytest.raises(UserException, match='Exception: func failed'): - list(Mangler([str(path)])) + list(Mangler([fake_change(path)])) def test_sigint_handling(self, tmp_path): """Verify SIGINT is properly handled by the parallelized pipeline.""" @@ -68,7 +74,6 @@ class TestMangler: """Mangler run in a separate process that gets interrupted.""" import sys import time - from functools import partial from unittest.mock import patch from pkgdev.mangle import Mangler @@ -81,7 +86,7 @@ class TestMangler: with patch('pkgdev.mangle.Mangler.__iter__') as fake_iter: fake_iter.side_effect = partial(sleep) try: - iter(Mangler([str(path)])) + iter(Mangler([fake_change(path)])) except KeyboardInterrupt: queue.put(None) sys.exit(0) |