aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/pkgdev/mangle.py52
-rw-r--r--src/pkgdev/scripts/pkgdev_commit.py52
-rw-r--r--tests/test_mangle.py25
3 files changed, 80 insertions, 49 deletions
diff --git a/src/pkgdev/mangle.py b/src/pkgdev/mangle.py
index fd4cac5..c957e58 100644
--- a/src/pkgdev/mangle.py
+++ b/src/pkgdev/mangle.py
@@ -12,7 +12,7 @@ from snakeoil.cli.exceptions import UserException
from snakeoil.mappings import OrderedSet
copyright_regex = re.compile(
- r'^# Copyright (?P<begin>\d{4}-)?(?P<end>\d{4}) (?P<holder>.+)$')
+ r'^# Copyright (?P<date>(?P<begin>\d{4}-)?(?P<end>\d{4})) (?P<holder>.+)$')
def mangle(name):
@@ -37,11 +37,11 @@ class Mangler:
# mapping of mangling types to functions
_mangle_funcs = {}
- def __init__(self, paths, skip_regex=None):
+ def __init__(self, changes, skip_regex=None):
self.jobs = os.cpu_count()
if skip_regex is not None:
- paths = (x for x in paths if not skip_regex.match(x))
- self.paths = OrderedSet(paths)
+ changes = (c for c in changes if not skip_regex.match(c.full_path))
+ self.changes = OrderedSet(changes)
# setup for parallelizing the mangling procedure across files
self._mp_ctx = multiprocessing.get_context('fork')
@@ -58,9 +58,9 @@ class Mangler:
lambda f, g: lambda x: f(g(self, x)), self._mangle_funcs.values(), lambda x: x)
@mangle('EOF')
- def _eof(self, data):
+ def _eof(self, change):
"""Drop EOF whitespace and forcibly add EOF newline."""
- return data.rstrip() + '\n'
+ return change.update(change.data.rstrip() + '\n')
def _kill_pipe(self, *args, error=None):
"""Handle terminating the mangling process group."""
@@ -90,26 +90,20 @@ class Mangler:
return path
- def _mangle_file(self, path):
- """Run composed mangling function across a given file path."""
- try:
- with open(path, 'r+', encoding='utf-8') as f:
- if orig_data := f.read():
- data = self.composed_func(orig_data)
- if data != orig_data:
- f.seek(0)
- f.truncate()
- f.write(data)
- return path
- except (FileNotFoundError, UnicodeDecodeError):
- pass
+ def _mangle(self, change):
+ """Run composed mangling function across a given change."""
+ if orig_data := change.read():
+ change = self.composed_func(change)
+ if change.data != orig_data:
+ change.sync()
+ return change
def _run_manglers(self, paths_q):
"""Consumer that runs mangling functions, queuing mangled paths for output."""
try:
- for path in iter(paths_q.get, None):
- if mangled_path := self._mangle_file(path):
- self._mangled_paths_q.put(mangled_path)
+ for change in iter(paths_q.get, None):
+ if mangled_change := self._mangle(change):
+ self._mangled_paths_q.put(mangled_change.path)
except Exception: # pragma: no cover
# traceback can't be pickled so serialize it
tb = traceback.format_exc()
@@ -124,8 +118,8 @@ class Mangler:
pool.close()
# queue paths for processing
- for path in self.paths:
- paths_q.put(path)
+ for change in self.changes:
+ paths_q.put(change)
# notify consumers that no more work exists
for i in range(self.jobs):
paths_q.put(None)
@@ -141,10 +135,12 @@ class GentooMangler(Mangler):
_mangle_funcs = Mangler._mangle_funcs.copy()
@mangle('copyright')
- def _copyright(self, data):
+ def _copyright(self, change):
"""Fix copyright headers and dates."""
- lines = data.splitlines()
+ lines = change.data.splitlines()
if mo := copyright_regex.match(lines[0]):
- lines[0] = re.sub(mo.group('end'), self._current_year, lines[0])
+ # replace entire date range for new files
+ group = 'date' if change.status == 'A' else 'end'
+ lines[0] = re.sub(mo.group(group), self._current_year, lines[0])
lines[0] = re.sub('Gentoo Foundation', 'Gentoo Authors', lines[0])
- return '\n'.join(lines) + '\n'
+ return change.update('\n'.join(lines) + '\n')
diff --git a/src/pkgdev/scripts/pkgdev_commit.py b/src/pkgdev/scripts/pkgdev_commit.py
index 4a30a51..920555b 100644
--- a/src/pkgdev/scripts/pkgdev_commit.py
+++ b/src/pkgdev/scripts/pkgdev_commit.py
@@ -490,21 +490,28 @@ class GitChanges(UserDict):
if status == 'R' and (om := self._ebuild_re.match(old_path)):
old = atom_cls(f"={om.group('category')}/{om.group('package')}")
changes[PkgChange].add(PkgChange(
- status, path, atom=atom, ebuild=True, old=old))
+ self._repo.location, status, path, atom=atom, ebuild=True, old=old))
except MalformedAtom:
continue
else:
# non-ebuild package level changes
atom = atom_cls(os.sep.join(path_components[:2]))
- changes[PkgChange].add(PkgChange(status, path, atom=atom, ebuild=False))
+ changes[PkgChange].add(
+ PkgChange(self._repo.location, status, path, atom=atom, ebuild=False))
elif mo := self._eclass_re.match(path):
- changes[EclassChange].add(EclassChange(status, path, name=mo.group('name')))
+ changes[EclassChange].add(
+ EclassChange(self._repo.location, status, path, name=mo.group('name')))
else:
- changes[path_components[0]].add(Change(status, path))
+ changes[path_components[0]].add(Change(self._repo.location, status, path))
return changes
@jit_attr
+ def all(self):
+ """Ordered set of all change objects."""
+ return OrderedFrozenSet(chain.from_iterable(self.data.values()))
+
+ @jit_attr
def pkg_changes(self):
"""Ordered set of all package change objects."""
return OrderedFrozenSet(self.data.get(PkgChange, ()))
@@ -515,11 +522,6 @@ class GitChanges(UserDict):
return OrderedFrozenSet(x for x in self.pkg_changes if x.ebuild)
@jit_attr
- def paths(self):
- """Ordered set of all staged paths."""
- return OrderedFrozenSet(x.path for x in chain.from_iterable(self.data.values()))
-
- @jit_attr
def prefix(self):
"""Determine commit message prefix using GLEP 66 as a guide.
@@ -573,10 +575,38 @@ class GitChanges(UserDict):
@dataclass(frozen=True)
class Change:
"""Generic file change."""
+ repo: str
status: str
path: str
@property
+ def full_path(self):
+ return pjoin(self.repo, self.path)
+
+ def read(self):
+ """Read data from the change's file."""
+ try:
+ with open(self.full_path, 'r', encoding='utf-8') as f:
+ data = f.read()
+ except (FileNotFoundError, UnicodeDecodeError):
+ data = None
+ object.__setattr__(self, "data", data)
+ return data
+
+ def update(self, data):
+ """Update the change's cached file data."""
+ object.__setattr__(self, "data", data)
+ return self
+
+ def sync(self):
+ """Write the change's cached file data back to its file."""
+ try:
+ with open(self.full_path, 'w', encoding='utf-8') as f:
+ f.write(self.data)
+ except AttributeError:
+ pass
+
+ @property
def prefix(self):
if os.sep in self.path:
# use change path's parent directory
@@ -736,8 +766,8 @@ def _commit(options, out, err):
# don't mangle FILESDIR content
skip_regex = re.compile(rf'^{repo.location}/[^/]+/[^/]+/files/.+$')
mangler = GentooMangler if options.gentoo_repo else Mangler
- paths = (pjoin(repo.location, x) for x in changes.paths)
- options.git_add_files.extend(mangler(paths, skip_regex=skip_regex))
+ files = mangler(changes.all, skip_regex=skip_regex)
+ options.git_add_files.extend(files)
# stage modified files
if options.git_add_files:
diff --git a/tests/test_mangle.py b/tests/test_mangle.py
index 1df646c..651f8b4 100644
--- a/tests/test_mangle.py
+++ b/tests/test_mangle.py
@@ -2,23 +2,29 @@ import os
import multiprocessing
import re
import signal
+from functools import partial
from unittest.mock import patch
from pkgdev.mangle import Mangler
+from pkgdev.scripts.pkgdev_commit import Change
import pytest
from snakeoil.cli.exceptions import UserException
+def fake_change(s):
+ return Change('/repo', 'A', str(s))
+
+
class TestMangler:
def test_nonexistent_file(self, tmp_path):
path = tmp_path / 'nonexistent'
- assert list(Mangler([str(path)])) == []
+ assert list(Mangler([fake_change(path)])) == []
def test_empty_file(self, tmp_path):
path = tmp_path / 'empty'
path.touch()
- assert list(Mangler([str(path)])) == []
+ assert list(Mangler([fake_change(path)])) == []
def test_skipped_file(self, tmp_path):
paths = [(tmp_path / x) for x in ('file', 'file.patch')]
@@ -27,24 +33,24 @@ class TestMangler:
p.write_text('# comment')
# skip patch files
skip_regex = re.compile(r'.+\.patch$')
- mangled_paths = set(Mangler(map(str, paths), skip_regex=skip_regex))
+ mangled_paths = set(Mangler(map(fake_change, paths), skip_regex=skip_regex))
assert mangled_paths == {str(tmp_path / 'file')}
for p in paths:
p.write_text('# comment')
# don't skip any files
- mangled_paths = set(Mangler(map(str, paths)))
+ mangled_paths = set(Mangler(map(fake_change, paths)))
assert mangled_paths == set(map(str, paths))
def test_nonmangled_file(self, tmp_path):
path = tmp_path / 'file'
path.write_text('# comment\n')
- assert list(Mangler([str(path)])) == []
+ assert list(Mangler([fake_change(path)])) == []
def test_mangled_file(self, tmp_path):
path = tmp_path / 'file'
path.write_text('# comment')
- assert list(Mangler([str(path)])) == [str(path)]
+ assert list(Mangler([fake_change(path)])) == [str(path)]
assert path.read_text() == '# comment\n'
def test_iterator_exceptions(self, tmp_path):
@@ -55,9 +61,9 @@ class TestMangler:
def _mangle_func(self, data):
raise Exception('func failed')
- with patch('pkgdev.mangle.Mangler._mangle_file', _mangle_func):
+ with patch('pkgdev.mangle.Mangler._mangle', _mangle_func):
with pytest.raises(UserException, match='Exception: func failed'):
- list(Mangler([str(path)]))
+ list(Mangler([fake_change(path)]))
def test_sigint_handling(self, tmp_path):
"""Verify SIGINT is properly handled by the parallelized pipeline."""
@@ -68,7 +74,6 @@ class TestMangler:
"""Mangler run in a separate process that gets interrupted."""
import sys
import time
- from functools import partial
from unittest.mock import patch
from pkgdev.mangle import Mangler
@@ -81,7 +86,7 @@ class TestMangler:
with patch('pkgdev.mangle.Mangler.__iter__') as fake_iter:
fake_iter.side_effect = partial(sleep)
try:
- iter(Mangler([str(path)]))
+ iter(Mangler([fake_change(path)]))
except KeyboardInterrupt:
queue.put(None)
sys.exit(0)