Skip to content

Commit 98d42b2

Browse files
serhiy-storchakamiss-islington
authored andcommitted
gh-148914: Fix memoization of in-band PickleBuffer in the Python implementation (GH-149052)
Previously, identical PickleBuffers did not preserve identity. Also, empty writable PickleBuffer memoized an empty bytearray object in place of b'' which is a singleton in CPython, so the following references to b'' were unpickled as an empty bytearray object. (cherry picked from commit b897356) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent 9670560 commit 98d42b2

3 files changed

Lines changed: 54 additions & 9 deletions

File tree

Lib/pickle.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -904,17 +904,11 @@ def save_picklebuffer(self, obj):
904904
# Write data in-band
905905
# XXX The C implementation avoids a copy here
906906
buf = m.tobytes()
907-
in_memo = id(buf) in self.memo
908907
if m.readonly:
909-
if in_memo:
910-
self._save_bytes_no_memo(buf)
911-
else:
912-
self.save_bytes(buf)
908+
self._save_bytes_no_memo(buf)
913909
else:
914-
if in_memo:
915-
self._save_bytearray_no_memo(buf)
916-
else:
917-
self.save_bytearray(buf)
910+
self._save_bytearray_no_memo(buf)
911+
self.memoize(obj)
918912
else:
919913
# Write data out-of-band
920914
self.write(NEXT_BUFFER)

Lib/test/pickletester.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2855,6 +2855,51 @@ def test_bytearray_memoization(self):
28552855
self.assertIsNot(b2a, b2b)
28562856
self.assert_is_copy(b2a, b2b)
28572857

2858+
def test_picklebuffer_memoization(self):
2859+
if self.py_version < (3, 8):
2860+
self.skipTest('not supported in Python < 3.8')
2861+
array_types = [bytes, bytearray]
2862+
for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
2863+
for array_type in array_types:
2864+
for s in b'', b'xyz', b'xyz'*100:
2865+
with self.subTest(proto=proto, array_type=array_type, s=s, independent=False):
2866+
b = pickle.PickleBuffer(array_type(s))
2867+
p = self.dumps((b, b), proto)
2868+
b1, b2 = self.loads(p)
2869+
self.assertIs(b1, b2)
2870+
2871+
with self.subTest(proto=proto, array_type=array_type, s=s, independent=True):
2872+
b = array_type(s)
2873+
b1a = pickle.PickleBuffer(b)
2874+
b2a = pickle.PickleBuffer(b)
2875+
p = self.dumps((b1a, b2a), proto)
2876+
b1b, b2b = self.loads(p)
2877+
if array_type is not bytes:
2878+
self.assertIsNot(b1b, b2b)
2879+
self.assert_is_copy(b1b, b)
2880+
self.assert_is_copy(b2b, b)
2881+
2882+
def test_empty_picklebuffer_memoization(self):
2883+
# gh-148914: Empty writable PickleBuffer memoized an empty bytearray
2884+
# with the id of b'' (a singleton in CPython).
2885+
if self.py_version < (3, 8):
2886+
self.skipTest('not supported in Python < 3.8')
2887+
for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
2888+
for readonly in False, True:
2889+
with self.subTest(proto=proto, readonly=readonly):
2890+
b = b''
2891+
ba = bytearray()
2892+
buf = pickle.PickleBuffer(b if readonly else ba)
2893+
p = self.dumps((buf, b, ba), proto)
2894+
buf, b, ba = self.loads(p)
2895+
array_type = bytes if readonly else bytearray
2896+
self.assertIsInstance(buf, array_type)
2897+
self.assertIsInstance(b, bytes)
2898+
self.assertIsInstance(ba, bytearray)
2899+
self.assertEqual(buf, b'')
2900+
self.assertEqual(b, b'')
2901+
self.assertEqual(ba, b'')
2902+
28582903
def test_ints(self):
28592904
for proto in protocols:
28602905
n = sys.maxsize
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Fix memoization of in-band :class:`~pickle.PickleBuffer` in the Python
2+
implementation of :mod:`pickle`. Previously, identical
3+
:class:`!PickleBuffer`\ s did not preserve identity, and empty writable
4+
:class:`!PickleBuffer` memoized an empty bytearray object in place of
5+
``b''``, so the following references to ``b''`` were unpickled as an empty
6+
bytearray object.

0 commit comments

Comments
 (0)