aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Harder <radhermit@gmail.com>2021-07-28 23:39:28 -0600
committerTim Harder <radhermit@gmail.com>2021-07-28 23:42:31 -0600
commit57b8fafce24ae1959705eca04248153d654c9ba8 (patch)
tree9cfb77e98229a72d1edd9959d3bae86fd70be783
parentpkgdev commit: use filtered pkg list for manifest file targets (diff)
downloadpkgdev-57b8fafce24ae1959705eca04248153d654c9ba8.tar.gz
pkgdev-57b8fafce24ae1959705eca04248153d654c9ba8.tar.bz2
pkgdev-57b8fafce24ae1959705eca04248153d654c9ba8.zip
pkgdev commit: run mangling across change objects instead of raw paths
This allows for inspecting change attributes to alter potential mangling. For example, now the entire date range in the copyright header is replaced for new files instead of keeping the original start date from the old files they were based on.
-rw-r--r--src/pkgdev/mangle.py52
-rw-r--r--src/pkgdev/scripts/pkgdev_commit.py52
-rw-r--r--tests/test_mangle.py25
3 files changed, 80 insertions, 49 deletions
diff --git a/src/pkgdev/mangle.py b/src/pkgdev/mangle.py
index fd4cac5..c957e58 100644
--- a/src/pkgdev/mangle.py
+++ b/src/pkgdev/mangle.py
@@ -12,7 +12,7 @@ from snakeoil.cli.exceptions import UserException
from snakeoil.mappings import OrderedSet
copyright_regex = re.compile(
- r'^# Copyright (?P<begin>\d{4}-)?(?P<end>\d{4}) (?P<holder>.+)$')
+ r'^# Copyright (?P<date>(?P<begin>\d{4}-)?(?P<end>\d{4})) (?P<holder>.+)$')
def mangle(name):
@@ -37,11 +37,11 @@ class Mangler:
# mapping of mangling types to functions
_mangle_funcs = {}
- def __init__(self, paths, skip_regex=None):
+ def __init__(self, changes, skip_regex=None):
self.jobs = os.cpu_count()
if skip_regex is not None:
- paths = (x for x in paths if not skip_regex.match(x))
- self.paths = OrderedSet(paths)
+ changes = (c for c in changes if not skip_regex.match(c.full_path))
+ self.changes = OrderedSet(changes)
# setup for parallelizing the mangling procedure across files
self._mp_ctx = multiprocessing.get_context('fork')
@@ -58,9 +58,9 @@ class Mangler:
lambda f, g: lambda x: f(g(self, x)), self._mangle_funcs.values(), lambda x: x)
@mangle('EOF')
- def _eof(self, data):
+ def _eof(self, change):
"""Drop EOF whitespace and forcibly add EOF newline."""
- return data.rstrip() + '\n'
+ return change.update(change.data.rstrip() + '\n')
def _kill_pipe(self, *args, error=None):
"""Handle terminating the mangling process group."""
@@ -90,26 +90,20 @@ class Mangler:
return path
- def _mangle_file(self, path):
- """Run composed mangling function across a given file path."""
- try:
- with open(path, 'r+', encoding='utf-8') as f:
- if orig_data := f.read():
- data = self.composed_func(orig_data)
- if data != orig_data:
- f.seek(0)
- f.truncate()
- f.write(data)
- return path
- except (FileNotFoundError, UnicodeDecodeError):
- pass
+ def _mangle(self, change):
+ """Run composed mangling function across a given change."""
+ if orig_data := change.read():
+ change = self.composed_func(change)
+ if change.data != orig_data:
+ change.sync()
+ return change
def _run_manglers(self, paths_q):
"""Consumer that runs mangling functions, queuing mangled paths for output."""
try:
- for path in iter(paths_q.get, None):
- if mangled_path := self._mangle_file(path):
- self._mangled_paths_q.put(mangled_path)
+ for change in iter(paths_q.get, None):
+ if mangled_change := self._mangle(change):
+ self._mangled_paths_q.put(mangled_change.path)
except Exception: # pragma: no cover
# traceback can't be pickled so serialize it
tb = traceback.format_exc()
@@ -124,8 +118,8 @@ class Mangler:
pool.close()
# queue paths for processing
- for path in self.paths:
- paths_q.put(path)
+ for change in self.changes:
+ paths_q.put(change)
# notify consumers that no more work exists
for i in range(self.jobs):
paths_q.put(None)
@@ -141,10 +135,12 @@ class GentooMangler(Mangler):
_mangle_funcs = Mangler._mangle_funcs.copy()
@mangle('copyright')
- def _copyright(self, data):
+ def _copyright(self, change):
"""Fix copyright headers and dates."""
- lines = data.splitlines()
+ lines = change.data.splitlines()
if mo := copyright_regex.match(lines[0]):
- lines[0] = re.sub(mo.group('end'), self._current_year, lines[0])
+ # replace entire date range for new files
+ group = 'date' if change.status == 'A' else 'end'
+ lines[0] = re.sub(mo.group(group), self._current_year, lines[0])
lines[0] = re.sub('Gentoo Foundation', 'Gentoo Authors', lines[0])
- return '\n'.join(lines) + '\n'
+ return change.update('\n'.join(lines) + '\n')
diff --git a/src/pkgdev/scripts/pkgdev_commit.py b/src/pkgdev/scripts/pkgdev_commit.py
index 4a30a51..920555b 100644
--- a/src/pkgdev/scripts/pkgdev_commit.py
+++ b/src/pkgdev/scripts/pkgdev_commit.py
@@ -490,21 +490,28 @@ class GitChanges(UserDict):
if status == 'R' and (om := self._ebuild_re.match(old_path)):
old = atom_cls(f"={om.group('category')}/{om.group('package')}")
changes[PkgChange].add(PkgChange(
- status, path, atom=atom, ebuild=True, old=old))
+ self._repo.location, status, path, atom=atom, ebuild=True, old=old))
except MalformedAtom:
continue
else:
# non-ebuild package level changes
atom = atom_cls(os.sep.join(path_components[:2]))
- changes[PkgChange].add(PkgChange(status, path, atom=atom, ebuild=False))
+ changes[PkgChange].add(
+ PkgChange(self._repo.location, status, path, atom=atom, ebuild=False))
elif mo := self._eclass_re.match(path):
- changes[EclassChange].add(EclassChange(status, path, name=mo.group('name')))
+ changes[EclassChange].add(
+ EclassChange(self._repo.location, status, path, name=mo.group('name')))
else:
- changes[path_components[0]].add(Change(status, path))
+ changes[path_components[0]].add(Change(self._repo.location, status, path))
return changes
@jit_attr
+ def all(self):
+ """Ordered set of all change objects."""
+ return OrderedFrozenSet(chain.from_iterable(self.data.values()))
+
+ @jit_attr
def pkg_changes(self):
"""Ordered set of all package change objects."""
return OrderedFrozenSet(self.data.get(PkgChange, ()))
@@ -515,11 +522,6 @@ class GitChanges(UserDict):
return OrderedFrozenSet(x for x in self.pkg_changes if x.ebuild)
@jit_attr
- def paths(self):
- """Ordered set of all staged paths."""
- return OrderedFrozenSet(x.path for x in chain.from_iterable(self.data.values()))
-
- @jit_attr
def prefix(self):
"""Determine commit message prefix using GLEP 66 as a guide.
@@ -573,10 +575,38 @@ class GitChanges(UserDict):
@dataclass(frozen=True)
class Change:
"""Generic file change."""
+ repo: str
status: str
path: str
@property
+ def full_path(self):
+ return pjoin(self.repo, self.path)
+
+ def read(self):
+ """Read data from the change's file."""
+ try:
+ with open(self.full_path, 'r', encoding='utf-8') as f:
+ data = f.read()
+ except (FileNotFoundError, UnicodeDecodeError):
+ data = None
+ object.__setattr__(self, "data", data)
+ return data
+
+ def update(self, data):
+ """Update the change's cached file data."""
+ object.__setattr__(self, "data", data)
+ return self
+
+ def sync(self):
+ """Write the change's cached file data back to its file."""
+ try:
+ with open(self.full_path, 'w', encoding='utf-8') as f:
+ f.write(self.data)
+ except AttributeError:
+ pass
+
+ @property
def prefix(self):
if os.sep in self.path:
# use change path's parent directory
@@ -736,8 +766,8 @@ def _commit(options, out, err):
# don't mangle FILESDIR content
skip_regex = re.compile(rf'^{repo.location}/[^/]+/[^/]+/files/.+$')
mangler = GentooMangler if options.gentoo_repo else Mangler
- paths = (pjoin(repo.location, x) for x in changes.paths)
- options.git_add_files.extend(mangler(paths, skip_regex=skip_regex))
+ files = mangler(changes.all, skip_regex=skip_regex)
+ options.git_add_files.extend(files)
# stage modified files
if options.git_add_files:
diff --git a/tests/test_mangle.py b/tests/test_mangle.py
index 1df646c..651f8b4 100644
--- a/tests/test_mangle.py
+++ b/tests/test_mangle.py
@@ -2,23 +2,29 @@ import os
import multiprocessing
import re
import signal
+from functools import partial
from unittest.mock import patch
from pkgdev.mangle import Mangler
+from pkgdev.scripts.pkgdev_commit import Change
import pytest
from snakeoil.cli.exceptions import UserException
+def fake_change(s):
+ return Change('/repo', 'A', str(s))
+
+
class TestMangler:
def test_nonexistent_file(self, tmp_path):
path = tmp_path / 'nonexistent'
- assert list(Mangler([str(path)])) == []
+ assert list(Mangler([fake_change(path)])) == []
def test_empty_file(self, tmp_path):
path = tmp_path / 'empty'
path.touch()
- assert list(Mangler([str(path)])) == []
+ assert list(Mangler([fake_change(path)])) == []
def test_skipped_file(self, tmp_path):
paths = [(tmp_path / x) for x in ('file', 'file.patch')]
@@ -27,24 +33,24 @@ class TestMangler:
p.write_text('# comment')
# skip patch files
skip_regex = re.compile(r'.+\.patch$')
- mangled_paths = set(Mangler(map(str, paths), skip_regex=skip_regex))
+ mangled_paths = set(Mangler(map(fake_change, paths), skip_regex=skip_regex))
assert mangled_paths == {str(tmp_path / 'file')}
for p in paths:
p.write_text('# comment')
# don't skip any files
- mangled_paths = set(Mangler(map(str, paths)))
+ mangled_paths = set(Mangler(map(fake_change, paths)))
assert mangled_paths == set(map(str, paths))
def test_nonmangled_file(self, tmp_path):
path = tmp_path / 'file'
path.write_text('# comment\n')
- assert list(Mangler([str(path)])) == []
+ assert list(Mangler([fake_change(path)])) == []
def test_mangled_file(self, tmp_path):
path = tmp_path / 'file'
path.write_text('# comment')
- assert list(Mangler([str(path)])) == [str(path)]
+ assert list(Mangler([fake_change(path)])) == [str(path)]
assert path.read_text() == '# comment\n'
def test_iterator_exceptions(self, tmp_path):
@@ -55,9 +61,9 @@ class TestMangler:
def _mangle_func(self, data):
raise Exception('func failed')
- with patch('pkgdev.mangle.Mangler._mangle_file', _mangle_func):
+ with patch('pkgdev.mangle.Mangler._mangle', _mangle_func):
with pytest.raises(UserException, match='Exception: func failed'):
- list(Mangler([str(path)]))
+ list(Mangler([fake_change(path)]))
def test_sigint_handling(self, tmp_path):
"""Verify SIGINT is properly handled by the parallelized pipeline."""
@@ -68,7 +74,6 @@ class TestMangler:
"""Mangler run in a separate process that gets interrupted."""
import sys
import time
- from functools import partial
from unittest.mock import patch
from pkgdev.mangle import Mangler
@@ -81,7 +86,7 @@ class TestMangler:
with patch('pkgdev.mangle.Mangler.__iter__') as fake_iter:
fake_iter.side_effect = partial(sleep)
try:
- iter(Mangler([str(path)]))
+ iter(Mangler([fake_change(path)]))
except KeyboardInterrupt:
queue.put(None)
sys.exit(0)