From 1cbb68495a45a1a97425cbe40c3f3608eb1d0a9e Mon Sep 17 00:00:00 2001 From: Nicholas Vinson Date: Mon, 17 Mar 2025 11:30:19 -0400 Subject: [PATCH 01/10] add support for digest API Signed-off-by: Nicholas Vinson --- libarchive/entry.py | 81 +++++++++++++++++++++++++++++++++++++++++++++ libarchive/ffi.py | 38 ++++++++++++++++++++- 2 files changed, 118 insertions(+), 1 deletion(-) diff --git a/libarchive/entry.py b/libarchive/entry.py index 70701ef..cdbf203 100644 --- a/libarchive/entry.py +++ b/libarchive/entry.py @@ -86,6 +86,12 @@ def modify(self, header_codec=None, **attributes): rdev (int | Tuple[int, int]): device number, if the file is a device rdevmajor (int): major part of the device number rdevminor (int): minor part of the device number + md5Digest (bytes): MD5 digest + rmd160Digest (bytes): RMD160 digest + sha1Digest (bytes): SHA1 digest + sha256Digest (bytes): SHA256 digest + sha384Digest (bytes): SHA384 digest + sha512Digest (bytes): SHA512 digest """ if header_codec: self.header_codec = header_codec @@ -433,6 +439,81 @@ def rdevminor(self): def rdevminor(self, value): ffi.entry_set_rdevminor(self._entry_p, value) + @property + def md5Digest(self): + return self._digest(ffi.ARCHIVE_ENTRY_DIGEST_MD5) + + @md5Digest.setter + def md5Digest(self, value): + self._set_digest(ffi.ARCHIVE_ENTRY_DIGEST_MD5, value) + + @property + def rmd160Digest(self): + return self._digest(ffi.ARCHIVE_ENTRY_DIGEST_RMD160) + + @rmd160Digest.setter + def rmd160Digest(self, value): + self._set_digest(ffi.ARCHIVE_ENTRY_DIGEST_RMD160, value) + + @property + def sha1Digest(self): + return self._digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA1) + + @sha1Digest.setter + def sha1Digest(self, value): + self._set_digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA1, value) + + @property + def sha256Digest(self): + return self._digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA256) + + @sha256Digest.setter + def sha256Digest(self, value): + self._set_digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA256, value) + + @property + def sha384Digest(self): + return self._digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA384) + + @sha384Digest.setter + def sha384Digest(self, value): + self._set_digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA384, value) + + @property + def sha512Digest(self): + return self._digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA512) + + @sha512Digest.setter + def sha512Digest(self, value): + self._set_digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA512, value) + + def _digest(self, digestType): + try: + ptr = ffi.entry_digest(self._entry_p, digestType) + if ptr: + return bytes(ptr[:ffi._DIGEST_LENGTHS[digestType - 1]]) + except AttributeError: + raise NotImplementedError(f"the libarchive being used (version " + f"{ffi.version_number()} path " + f"{ffi.libarchive_path}) doesn't " + f"support read-only digest APIs") + return None + + def _set_digest(self, digestType, value): + try: + digestLen = ffi._DIGEST_LENGTHS[digestType - 1] + if len(value) != digestLen: + raise ValueError(f"Invalid input digest Expected {digestLen} " + f"bytes. Got {len(value)}.") + buffer = (digestLen * ffi.c_ubyte)(*value) + ffi.entry_set_digest(self._entry_p, digestType, buffer) + except AttributeError: + raise NotImplementedError(f"the libarchive being used (version " + f"{ffi.version_number()} path " + f"{ffi.libarchive_path}) doesn't support " + f"writable digest APIs") + return None + class ConsumedArchiveEntry(ArchiveEntry): diff --git a/libarchive/ffi.py b/libarchive/ffi.py index 172fe87..65868e9 100644 --- a/libarchive/ffi.py +++ b/libarchive/ffi.py @@ -1,6 +1,6 @@ from ctypes import ( c_char_p, c_int, c_uint, c_long, c_longlong, c_size_t, c_int64, - c_void_p, c_wchar_p, CFUNCTYPE, POINTER, + c_ubyte, c_void_p, c_wchar_p, CFUNCTYPE, POINTER, ) try: @@ -365,3 +365,39 @@ def get_write_filter_function(filter_name): f"the libarchive being used (version {version_number()}, " f"path {libarchive_path}) doesn't support encryption" ) + +# archive digest API +try: + ffi('entry_digest', [c_archive_entry_p, c_int], POINTER(c_ubyte)) + + ARCHIVE_ENTRY_DIGEST_MD5 = 1 + ARCHIVE_ENTRY_DIGEST_RMD160 = 2 + ARCHIVE_ENTRY_DIGEST_SHA1 = 3 + ARCHIVE_ENTRY_DIGEST_SHA256 = 4 + ARCHIVE_ENTRY_DIGEST_SHA384 = 5 + ARCHIVE_ENTRY_DIGEST_SHA512 = 6 + + _DIGEST_LENGTHS = [ + 16, # MD5 + 20, # RMD160 + 20, # SHA1 + 32, # SHA256 + 48, # SHA384 + 64, # SHA512 + ] + +except AttributeError: + logger.info( + f"the libarchive being used (version {version_number()}, " + f"path {libarchive_path}) doesn't support read-only message digest API" + ) + +try: + ffi('entry_set_digest', + [ctypes.c_void_p, ctypes.c_int, ctypes.POINTER(ctypes.c_ubyte)], + ctypes.c_int) +except AttributeError: + logger.info( + f"the libarchive being used (version {version_number()}, " + f"path {libarchive_path}) doesn't support mutable message digest API" + ) From 99f9eff2581bca31c8ba606bff6fb78beef5c932 Mon Sep 17 00:00:00 2001 From: Changaco Date: Sun, 30 Mar 2025 10:48:33 +0200 Subject: [PATCH 02/10] don't introduce camelCase where there was none --- libarchive/entry.py | 66 ++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/libarchive/entry.py b/libarchive/entry.py index cdbf203..2fa0416 100644 --- a/libarchive/entry.py +++ b/libarchive/entry.py @@ -86,12 +86,12 @@ def modify(self, header_codec=None, **attributes): rdev (int | Tuple[int, int]): device number, if the file is a device rdevmajor (int): major part of the device number rdevminor (int): minor part of the device number - md5Digest (bytes): MD5 digest - rmd160Digest (bytes): RMD160 digest - sha1Digest (bytes): SHA1 digest - sha256Digest (bytes): SHA256 digest - sha384Digest (bytes): SHA384 digest - sha512Digest (bytes): SHA512 digest + md5digest (bytes): MD5 digest + rmd160digest (bytes): RMD160 digest + sha1digest (bytes): SHA1 digest + sha256digest (bytes): SHA256 digest + sha384digest (bytes): SHA384 digest + sha512digest (bytes): SHA512 digest """ if header_codec: self.header_codec = header_codec @@ -440,58 +440,58 @@ def rdevminor(self, value): ffi.entry_set_rdevminor(self._entry_p, value) @property - def md5Digest(self): + def md5digest(self): return self._digest(ffi.ARCHIVE_ENTRY_DIGEST_MD5) - @md5Digest.setter - def md5Digest(self, value): + @md5digest.setter + def md5digest(self, value): self._set_digest(ffi.ARCHIVE_ENTRY_DIGEST_MD5, value) @property - def rmd160Digest(self): + def rmd160digest(self): return self._digest(ffi.ARCHIVE_ENTRY_DIGEST_RMD160) - @rmd160Digest.setter - def rmd160Digest(self, value): + @rmd160digest.setter + def rmd160digest(self, value): self._set_digest(ffi.ARCHIVE_ENTRY_DIGEST_RMD160, value) @property - def sha1Digest(self): + def sha1digest(self): return self._digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA1) - @sha1Digest.setter - def sha1Digest(self, value): + @sha1digest.setter + def sha1digest(self, value): self._set_digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA1, value) @property - def sha256Digest(self): + def sha256digest(self): return self._digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA256) - @sha256Digest.setter - def sha256Digest(self, value): + @sha256digest.setter + def sha256digest(self, value): self._set_digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA256, value) @property - def sha384Digest(self): + def sha384digest(self): return self._digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA384) - @sha384Digest.setter - def sha384Digest(self, value): + @sha384digest.setter + def sha384digest(self, value): self._set_digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA384, value) @property - def sha512Digest(self): + def sha512digest(self): return self._digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA512) - @sha512Digest.setter - def sha512Digest(self, value): + @sha512digest.setter + def sha512digest(self, value): self._set_digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA512, value) - def _digest(self, digestType): + def _digest(self, digest_type): try: - ptr = ffi.entry_digest(self._entry_p, digestType) + ptr = ffi.entry_digest(self._entry_p, digest_type) if ptr: - return bytes(ptr[:ffi._DIGEST_LENGTHS[digestType - 1]]) + return bytes(ptr[:ffi._DIGEST_LENGTHS[digest_type - 1]]) except AttributeError: raise NotImplementedError(f"the libarchive being used (version " f"{ffi.version_number()} path " @@ -499,14 +499,14 @@ def _digest(self, digestType): f"support read-only digest APIs") return None - def _set_digest(self, digestType, value): + def _set_digest(self, digest_type, value): try: - digestLen = ffi._DIGEST_LENGTHS[digestType - 1] - if len(value) != digestLen: - raise ValueError(f"Invalid input digest Expected {digestLen} " + digest_length = ffi._DIGEST_LENGTHS[digest_type - 1] + if len(value) != digest_length: + raise ValueError(f"Invalid input digest Expected {digest_length} " f"bytes. Got {len(value)}.") - buffer = (digestLen * ffi.c_ubyte)(*value) - ffi.entry_set_digest(self._entry_p, digestType, buffer) + buffer = (digest_length * ffi.c_ubyte)(*value) + ffi.entry_set_digest(self._entry_p, digest_type, buffer) except AttributeError: raise NotImplementedError(f"the libarchive being used (version " f"{ffi.version_number()} path " From 8a0ec1dc40e1e248e1606fca2981493080d44de8 Mon Sep 17 00:00:00 2001 From: Changaco Date: Sun, 30 Mar 2025 14:25:15 +0200 Subject: [PATCH 03/10] more rewording and refactoring --- libarchive/entry.py | 131 ++++++++++++++++++++++++-------------------- libarchive/ffi.py | 49 +++++++++-------- 2 files changed, 99 insertions(+), 81 deletions(-) diff --git a/libarchive/entry.py b/libarchive/entry.py index 2fa0416..c8aa2cf 100644 --- a/libarchive/entry.py +++ b/libarchive/entry.py @@ -4,6 +4,7 @@ import math from . import ffi +from .exception import ArchiveError class FileType(IntEnum): @@ -86,12 +87,12 @@ def modify(self, header_codec=None, **attributes): rdev (int | Tuple[int, int]): device number, if the file is a device rdevmajor (int): major part of the device number rdevminor (int): minor part of the device number - md5digest (bytes): MD5 digest - rmd160digest (bytes): RMD160 digest - sha1digest (bytes): SHA1 digest - sha256digest (bytes): SHA256 digest - sha384digest (bytes): SHA384 digest - sha512digest (bytes): SHA512 digest + md5 (bytes): MD5 digest + rmd160 (bytes): RMD160 digest + sha1 (bytes): SHA1 digest + sha256 (bytes): SHA256 digest + sha384 (bytes): SHA384 digest + sha512 (bytes): SHA512 digest """ if header_codec: self.header_codec = header_codec @@ -440,79 +441,93 @@ def rdevminor(self, value): ffi.entry_set_rdevminor(self._entry_p, value) @property - def md5digest(self): - return self._digest(ffi.ARCHIVE_ENTRY_DIGEST_MD5) + def md5(self): + return self.get_stored_digest('md5') - @md5digest.setter - def md5digest(self, value): - self._set_digest(ffi.ARCHIVE_ENTRY_DIGEST_MD5, value) + @md5.setter + def md5(self, value): + self.set_stored_digest('md5', value) @property - def rmd160digest(self): - return self._digest(ffi.ARCHIVE_ENTRY_DIGEST_RMD160) + def rmd160(self): + return self.get_stored_digest('rmd160') - @rmd160digest.setter - def rmd160digest(self, value): - self._set_digest(ffi.ARCHIVE_ENTRY_DIGEST_RMD160, value) + @rmd160.setter + def rmd160(self, value): + self.set_stored_digest('rmd160', value) @property - def sha1digest(self): - return self._digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA1) + def sha1(self): + return self.get_stored_digest('sha1') - @sha1digest.setter - def sha1digest(self, value): - self._set_digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA1, value) + @sha1.setter + def sha1(self, value): + self.set_stored_digest('sha1', value) @property - def sha256digest(self): - return self._digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA256) + def sha256(self): + return self.get_stored_digest('sha256') - @sha256digest.setter - def sha256digest(self, value): - self._set_digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA256, value) + @sha256.setter + def sha256(self, value): + self.set_stored_digest('sha256', value) @property - def sha384digest(self): - return self._digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA384) + def sha384(self): + return self.get_stored_digest('sha384') - @sha384digest.setter - def sha384digest(self, value): - self._set_digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA384, value) + @sha384.setter + def sha384(self, value): + self.set_stored_digest('sha384', value) @property - def sha512digest(self): - return self._digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA512) + def sha512(self): + return self.get_stored_digest('sha512') - @sha512digest.setter - def sha512digest(self, value): - self._set_digest(ffi.ARCHIVE_ENTRY_DIGEST_SHA512, value) + @sha512.setter + def sha512(self, value): + self.set_stored_digest('sha512', value) - def _digest(self, digest_type): + def get_stored_digest(self, algorithm_name): + algorithm = ffi.DIGEST_ALGORITHMS[algorithm_name] try: - ptr = ffi.entry_digest(self._entry_p, digest_type) - if ptr: - return bytes(ptr[:ffi._DIGEST_LENGTHS[digest_type - 1]]) + ptr = ffi.entry_digest(self._entry_p, algorithm.libarchive_id) except AttributeError: - raise NotImplementedError(f"the libarchive being used (version " - f"{ffi.version_number()} path " - f"{ffi.libarchive_path}) doesn't " - f"support read-only digest APIs") - return None - - def _set_digest(self, digest_type, value): + raise NotImplementedError( + f"the libarchive being used (version {ffi.version_number()}, path " + f"{ffi.libarchive_path}) doesn't support reading entry digests" + ) from None + except ArchiveError: + raise NotImplementedError( + f"the libarchive being used (version {ffi.version_number()}, path " + f"{ffi.libarchive_path}) doesn't support {algorithm_name} digests" + ) from None + return bytes(ptr[:algorithm.bytes_length]) + + def set_stored_digest(self, algorithm_name, value): + algorithm = ffi.DIGEST_ALGORITHMS[algorithm_name] + expected_length = algorithm.bytes_length + if len(value) != expected_length: + raise ValueError( + f"invalid input digest: expected {expected_length} bytes, " + f"got {len(value)}" + ) try: - digest_length = ffi._DIGEST_LENGTHS[digest_type - 1] - if len(value) != digest_length: - raise ValueError(f"Invalid input digest Expected {digest_length} " - f"bytes. Got {len(value)}.") - buffer = (digest_length * ffi.c_ubyte)(*value) - ffi.entry_set_digest(self._entry_p, digest_type, buffer) + retcode = ffi.entry_set_digest( + self._entry_p, + algorithm.libarchive_id, + (expected_length * ffi.c_ubyte)(*value) + ) except AttributeError: - raise NotImplementedError(f"the libarchive being used (version " - f"{ffi.version_number()} path " - f"{ffi.libarchive_path}) doesn't support " - f"writable digest APIs") - return None + raise NotImplementedError( + f"the libarchive being used (version {ffi.version_number()}, path " + f"{ffi.libarchive_path}) doesn't support writing entry digests" + ) from None + if retcode < 0: + raise NotImplementedError( + f"the libarchive being used (version {ffi.version_number()}, path " + f"{ffi.libarchive_path}) doesn't support {algorithm_name} digests" + ) from None class ConsumedArchiveEntry(ArchiveEntry): diff --git a/libarchive/ffi.py b/libarchive/ffi.py index 65868e9..d960b59 100644 --- a/libarchive/ffi.py +++ b/libarchive/ffi.py @@ -366,38 +366,41 @@ def get_write_filter_function(filter_name): f"path {libarchive_path}) doesn't support encryption" ) -# archive digest API -try: - ffi('entry_digest', [c_archive_entry_p, c_int], POINTER(c_ubyte)) - - ARCHIVE_ENTRY_DIGEST_MD5 = 1 - ARCHIVE_ENTRY_DIGEST_RMD160 = 2 - ARCHIVE_ENTRY_DIGEST_SHA1 = 3 - ARCHIVE_ENTRY_DIGEST_SHA256 = 4 - ARCHIVE_ENTRY_DIGEST_SHA384 = 5 - ARCHIVE_ENTRY_DIGEST_SHA512 = 6 - - _DIGEST_LENGTHS = [ - 16, # MD5 - 20, # RMD160 - 20, # SHA1 - 32, # SHA256 - 48, # SHA384 - 64, # SHA512 - ] +# archive entry digests (a.k.a. hashes) + +class DigestAlgorithm: + __slots__ = ('name', 'libarchive_id', 'bytes_length') + + def __init__(self, name, libarchive_id, bytes_length): + self.name = name + self.libarchive_id = libarchive_id + self.bytes_length = bytes_length + + +DIGEST_ALGORITHMS = { + 'md5': DigestAlgorithm('md5', libarchive_id=1, bytes_length=16), + 'rmd160': DigestAlgorithm('rmd160', libarchive_id=2, bytes_length=20), + 'sha1': DigestAlgorithm('sha1', libarchive_id=3, bytes_length=20), + 'sha256': DigestAlgorithm('sha256', libarchive_id=4, bytes_length=32), + 'sha384': DigestAlgorithm('sha384', libarchive_id=5, bytes_length=48), + 'sha512': DigestAlgorithm('sha512', libarchive_id=6, bytes_length=64), +} + +try: + ffi('entry_digest', [c_archive_entry_p, c_int], POINTER(c_ubyte), check_null) except AttributeError: logger.info( f"the libarchive being used (version {version_number()}, " - f"path {libarchive_path}) doesn't support read-only message digest API" + f"path {libarchive_path}) doesn't support reading entry digests" ) try: ffi('entry_set_digest', - [ctypes.c_void_p, ctypes.c_int, ctypes.POINTER(ctypes.c_ubyte)], - ctypes.c_int) + [c_archive_entry_p, c_int, POINTER(c_ubyte)], + c_int, check_int) except AttributeError: logger.info( f"the libarchive being used (version {version_number()}, " - f"path {libarchive_path}) doesn't support mutable message digest API" + f"path {libarchive_path}) doesn't support modifying entry digests" ) From 6e5bee066a9e714c2b891768ae78c30d8838ddfc Mon Sep 17 00:00:00 2001 From: Changaco Date: Sun, 30 Mar 2025 21:02:59 +0200 Subject: [PATCH 04/10] add test for archive entry digests --- tests/test_entry.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/tests/test_entry.py b/tests/test_entry.py index 34543a5..0bb1704 100644 --- a/tests/test_entry.py +++ b/tests/test_entry.py @@ -9,7 +9,9 @@ import pytest -from libarchive import ArchiveError, memory_reader, memory_writer +from libarchive import ( + ArchiveError, ffi, file_reader, file_writer, memory_reader, memory_writer, +) from libarchive.entry import ArchiveEntry, ConsumedArchiveEntry, PassedArchiveEntry from . import data_dir, get_entries, get_tarinfos @@ -155,3 +157,26 @@ def test_non_ASCII_encoding_of_file_metadata(): with memory_reader(buf, header_codec='cp037') as archive: entry = next(iter(archive)) assert entry.pathname == file_name + + +@pytest.mark.xfail( + condition=ffi.version_number() < 3008000, + reason="libarchive < 3.8", +) +def test_writing_and_reading_entry_digests(tmpdir): + fake_hashes = dict( + md5=b'0000000000000000', + rmd160=b'00000000000000000000', + sha1=b'00000000000000000000', + sha256=b'00000000000000000000000000000000', + sha384=b'000000000000000000000000000000000000000000000000', + sha512=b'0000000000000000000000000000000000000000000000000000000000000000', + ) + archive_path = str(tmpdir / 'mtree') + with file_writer(archive_path, 'mtree') as archive: + # Add an empty file, with fake hashes. + archive.add_file_from_memory('empty.txt', 0, b'', **fake_hashes) + with file_reader(archive_path) as archive: + entry = next(iter(archive)) + for key, value in fake_hashes.items(): + assert getattr(entry, key) == value From 0c3098b8aceb080ffdc569db567b95c1c817c3d3 Mon Sep 17 00:00:00 2001 From: Changaco Date: Wed, 9 Apr 2025 14:32:50 +0200 Subject: [PATCH 05/10] read and write `unsigned char` more efficiently --- libarchive/entry.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libarchive/entry.py b/libarchive/entry.py index c8aa2cf..e6c4050 100644 --- a/libarchive/entry.py +++ b/libarchive/entry.py @@ -1,5 +1,5 @@ from contextlib import contextmanager -from ctypes import create_string_buffer +from ctypes import create_string_buffer, string_at from enum import IntEnum import math @@ -502,7 +502,7 @@ def get_stored_digest(self, algorithm_name): f"the libarchive being used (version {ffi.version_number()}, path " f"{ffi.libarchive_path}) doesn't support {algorithm_name} digests" ) from None - return bytes(ptr[:algorithm.bytes_length]) + return string_at(ptr, algorithm.bytes_length) def set_stored_digest(self, algorithm_name, value): algorithm = ffi.DIGEST_ALGORITHMS[algorithm_name] @@ -516,7 +516,7 @@ def set_stored_digest(self, algorithm_name, value): retcode = ffi.entry_set_digest( self._entry_p, algorithm.libarchive_id, - (expected_length * ffi.c_ubyte)(*value) + (expected_length * ffi.c_ubyte).from_buffer_copy(value) ) except AttributeError: raise NotImplementedError( From 10a11e37adb008230237497cb2a87ac034904952 Mon Sep 17 00:00:00 2001 From: Changaco Date: Wed, 9 Apr 2025 21:55:13 +0200 Subject: [PATCH 06/10] switch to a single `stored_digests` property --- libarchive/entry.py | 58 ++++++--------------------------------------- tests/test_entry.py | 5 ++-- 2 files changed, 9 insertions(+), 54 deletions(-) diff --git a/libarchive/entry.py b/libarchive/entry.py index e6c4050..519fae7 100644 --- a/libarchive/entry.py +++ b/libarchive/entry.py @@ -87,12 +87,7 @@ def modify(self, header_codec=None, **attributes): rdev (int | Tuple[int, int]): device number, if the file is a device rdevmajor (int): major part of the device number rdevminor (int): minor part of the device number - md5 (bytes): MD5 digest - rmd160 (bytes): RMD160 digest - sha1 (bytes): SHA1 digest - sha256 (bytes): SHA256 digest - sha384 (bytes): SHA384 digest - sha512 (bytes): SHA512 digest + stored_digests (dict[str, bytes]): hashes of the file's contents """ if header_codec: self.header_codec = header_codec @@ -441,52 +436,13 @@ def rdevminor(self, value): ffi.entry_set_rdevminor(self._entry_p, value) @property - def md5(self): - return self.get_stored_digest('md5') + def stored_digests(self): + return {name: self.get_stored_digest(name) for name in ffi.DIGEST_ALGORITHMS} - @md5.setter - def md5(self, value): - self.set_stored_digest('md5', value) - - @property - def rmd160(self): - return self.get_stored_digest('rmd160') - - @rmd160.setter - def rmd160(self, value): - self.set_stored_digest('rmd160', value) - - @property - def sha1(self): - return self.get_stored_digest('sha1') - - @sha1.setter - def sha1(self, value): - self.set_stored_digest('sha1', value) - - @property - def sha256(self): - return self.get_stored_digest('sha256') - - @sha256.setter - def sha256(self, value): - self.set_stored_digest('sha256', value) - - @property - def sha384(self): - return self.get_stored_digest('sha384') - - @sha384.setter - def sha384(self, value): - self.set_stored_digest('sha384', value) - - @property - def sha512(self): - return self.get_stored_digest('sha512') - - @sha512.setter - def sha512(self, value): - self.set_stored_digest('sha512', value) + @stored_digests.setter + def stored_digests(self, values): + for name, value in values.items(): + self.set_stored_digest(name, value) def get_stored_digest(self, algorithm_name): algorithm = ffi.DIGEST_ALGORITHMS[algorithm_name] diff --git a/tests/test_entry.py b/tests/test_entry.py index 0bb1704..95686c2 100644 --- a/tests/test_entry.py +++ b/tests/test_entry.py @@ -175,8 +175,7 @@ def test_writing_and_reading_entry_digests(tmpdir): archive_path = str(tmpdir / 'mtree') with file_writer(archive_path, 'mtree') as archive: # Add an empty file, with fake hashes. - archive.add_file_from_memory('empty.txt', 0, b'', **fake_hashes) + archive.add_file_from_memory('empty.txt', 0, b'', stored_digests=fake_hashes) with file_reader(archive_path) as archive: entry = next(iter(archive)) - for key, value in fake_hashes.items(): - assert getattr(entry, key) == value + assert entry.stored_digests == fake_hashes From c119d968c51212deeb25333246a10a7380d63b16 Mon Sep 17 00:00:00 2001 From: Changaco Date: Fri, 11 Apr 2025 09:30:23 +0200 Subject: [PATCH 07/10] test reading and writing entry digests separately --- tests/test_entry.py | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/tests/test_entry.py b/tests/test_entry.py index 95686c2..5f1814c 100644 --- a/tests/test_entry.py +++ b/tests/test_entry.py @@ -9,9 +9,7 @@ import pytest -from libarchive import ( - ArchiveError, ffi, file_reader, file_writer, memory_reader, memory_writer, -) +from libarchive import ArchiveError, ffi, file_writer, memory_reader, memory_writer from libarchive.entry import ArchiveEntry, ConsumedArchiveEntry, PassedArchiveEntry from . import data_dir, get_entries, get_tarinfos @@ -159,23 +157,37 @@ def test_non_ASCII_encoding_of_file_metadata(): assert entry.pathname == file_name +fake_hashes = dict( + md5=b'!' * 16, + rmd160=b'!' * 20, + sha1=b'!' * 20, + sha256=b'!' * 32, + sha384=b'!' * 48, + sha512=b'!' * 64, +) +mtree = ( + '#mtree\n' + './empty.txt nlink=0 time=0.0 mode=664 gid=0 uid=0 type=file size=0 ' + f'md5={'21'*16} rmd160={'21'*20} sha1={'21'*20} sha256={'21'*32} ' + f'sha384={'21'*48} sha512={'21'*64}\n' +) + + +def test_reading_entry_digests(tmpdir): + with memory_reader(mtree.encode('ascii')) as archive: + entry = next(iter(archive)) + assert entry.stored_digests == fake_hashes + + @pytest.mark.xfail( condition=ffi.version_number() < 3008000, reason="libarchive < 3.8", ) -def test_writing_and_reading_entry_digests(tmpdir): - fake_hashes = dict( - md5=b'0000000000000000', - rmd160=b'00000000000000000000', - sha1=b'00000000000000000000', - sha256=b'00000000000000000000000000000000', - sha384=b'000000000000000000000000000000000000000000000000', - sha512=b'0000000000000000000000000000000000000000000000000000000000000000', - ) +def test_writing_entry_digests(tmpdir): archive_path = str(tmpdir / 'mtree') with file_writer(archive_path, 'mtree') as archive: # Add an empty file, with fake hashes. archive.add_file_from_memory('empty.txt', 0, b'', stored_digests=fake_hashes) - with file_reader(archive_path) as archive: - entry = next(iter(archive)) - assert entry.stored_digests == fake_hashes + with open(archive_path) as f: + libarchive_mtree = f.read() + assert libarchive_mtree == mtree From 164f605d45a6bd2faa5865da69f07c2d5f19d3ad Mon Sep 17 00:00:00 2001 From: Changaco Date: Sun, 13 Apr 2025 11:09:43 +0200 Subject: [PATCH 08/10] drop remnants of Python 2 in tests --- tests/test_entry.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tests/test_entry.py b/tests/test_entry.py index 5f1814c..3c7871b 100644 --- a/tests/test_entry.py +++ b/tests/test_entry.py @@ -1,6 +1,3 @@ -# -*- coding: utf-8 -*- - -from codecs import open import json import locale from os import environ, stat @@ -15,8 +12,6 @@ from . import data_dir, get_entries, get_tarinfos -text_type = unicode if str is bytes else str # noqa: F821 - locale.setlocale(locale.LC_ALL, '') # needed for sane time stamp comparison @@ -106,7 +101,7 @@ def check_entries(test_file, regen=False, ignore=''): # Normalize all unicode (can vary depending on the system) for d in (e1, e2): for key in d: - if isinstance(d[key], text_type): + if isinstance(d[key], str): d[key] = unicodedata.normalize('NFC', d[key]) assert e1 == e2 From 1e40b89eb7d62ab5c38b1de359e04132ada18ceb Mon Sep 17 00:00:00 2001 From: Changaco Date: Wed, 21 May 2025 14:18:30 +0200 Subject: [PATCH 09/10] fix the new write test for archive entry digests --- tests/test_entry.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/test_entry.py b/tests/test_entry.py index 3c7871b..7c8afa7 100644 --- a/tests/test_entry.py +++ b/tests/test_entry.py @@ -162,9 +162,9 @@ def test_non_ASCII_encoding_of_file_metadata(): ) mtree = ( '#mtree\n' - './empty.txt nlink=0 time=0.0 mode=664 gid=0 uid=0 type=file size=0 ' - f'md5={'21'*16} rmd160={'21'*20} sha1={'21'*20} sha256={'21'*32} ' - f'sha384={'21'*48} sha512={'21'*64}\n' + './empty.txt nlink=0 time=0.0 mode=664 gid=0 uid=0 type=file size=42 ' + f'md5digest={'21'*16} rmd160digest={'21'*20} sha1digest={'21'*20} ' + f'sha256digest={'21'*32} sha384digest={'21'*48} sha512digest={'21'*64}\n' ) @@ -180,9 +180,10 @@ def test_reading_entry_digests(tmpdir): ) def test_writing_entry_digests(tmpdir): archive_path = str(tmpdir / 'mtree') - with file_writer(archive_path, 'mtree') as archive: + options = ','.join(fake_hashes.keys()) + with file_writer(archive_path, 'mtree', options=options) as archive: # Add an empty file, with fake hashes. - archive.add_file_from_memory('empty.txt', 0, b'', stored_digests=fake_hashes) + archive.add_file_from_memory('empty.txt', 42, (), stored_digests=fake_hashes) with open(archive_path) as f: libarchive_mtree = f.read() assert libarchive_mtree == mtree From b2feb75a33553cd08ae3440abcd46938a51ef104 Mon Sep 17 00:00:00 2001 From: Changaco Date: Wed, 21 May 2025 22:35:53 +0200 Subject: [PATCH 10/10] add a docstring to `ArchiveEntry.stored_digests` --- libarchive/entry.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libarchive/entry.py b/libarchive/entry.py index 519fae7..863dbc7 100644 --- a/libarchive/entry.py +++ b/libarchive/entry.py @@ -437,6 +437,14 @@ def rdevminor(self, value): @property def stored_digests(self): + """The file's hashes stored in the archive. + + libarchive only supports reading and writing digests from and to 'mtree' + files. Setting the digests requires at least version 3.8.0 of libarchive + (released in May 2025). It also requires including the names of the + digest algorithms in the string of options passed to the archive writer + (e.g. `file_writer(archive_path, 'mtree', options='md5,rmd160,sha256')`). + """ return {name: self.get_stored_digest(name) for name in ffi.DIGEST_ALGORITHMS} @stored_digests.setter