Skip to content

Commit 0b0e700

Browse files
committed
Fix SIGSEGV in marshal.loads via self-referencing containers (GHSA-m7gv-g5p9-9qqq)
TYPE_TUPLE, TYPE_LIST, TYPE_DICT, and TYPE_SET used R_REF() to register containers in p->refs immediately after allocation, before populating their slots. A crafted payload containing a TYPE_REF back-reference to the partial container could reach a hashing or iteration site with NULL slots, causing tuplehash/PyObject_Hash(NULL) -> SIGSEGV. Fix: use the existing two-phase r_ref_reserve()/r_ref_insert() pattern (already used by TYPE_FROZENSET, TYPE_CODE, and TYPE_SLICE) for all four container types. r_ref_reserve() places Py_None as a placeholder in p->refs; the TYPE_REF handler (line 1675) already detects Py_None and raises ValueError("bad marshal data (invalid reference)"). After the container is fully populated, r_ref_insert() replaces the placeholder with the real object. Includes regression tests for tuple, list, set, and dict self-reference payloads.
1 parent 54607ee commit 0b0e700

2 files changed

Lines changed: 60 additions & 11 deletions

File tree

Lib/test/test_marshal.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,5 +731,51 @@ def test_read_object_from_file(self):
731731
os_helper.unlink(os_helper.TESTFN)
732732

733733

734+
class SelfRefContainerTest(unittest.TestCase):
735+
"""Regression tests for containers with FLAG_REF back-references.
736+
737+
Before the fix, R_REF registered containers in p->refs before their
738+
slots were populated. A TYPE_REF back-reference to the partial
739+
container could then reach a hashing or iteration site with NULL
740+
slots, crashing the interpreter (SIGSEGV).
741+
742+
The fix uses the two-phase r_ref_reserve/r_ref_insert pattern so the
743+
Py_None placeholder is detected by the TYPE_REF handler, raising
744+
ValueError("bad marshal data (invalid reference)") instead.
745+
"""
746+
747+
def test_self_ref_tuple(self):
748+
# TYPE_TUPLE|FLAG_REF n=2; NONE; TYPE_SET n=1; TYPE_REF(0)
749+
payload = (b'\xa8\x02\x00\x00\x00N'
750+
b'<\x01\x00\x00\x00r\x00\x00\x00\x00')
751+
with self.assertRaises(ValueError):
752+
marshal.loads(payload)
753+
754+
def test_self_ref_list(self):
755+
# TYPE_LIST|FLAG_REF n=2; NONE; TYPE_SET n=1; TYPE_REF(0)
756+
payload = (b'\xa9\x02\x00\x00\x00N'
757+
b'<\x01\x00\x00\x00r\x00\x00\x00\x00')
758+
with self.assertRaises(ValueError):
759+
marshal.loads(payload)
760+
761+
def test_self_ref_set(self):
762+
# TYPE_SET|FLAG_REF n=1; TYPE_TUPLE|FLAG_REF n=1; TYPE_REF(0)
763+
# The set element is a tuple that back-refs the set itself.
764+
payload = (b'\xbc\x01\x00\x00\x00'
765+
b'\xa8\x01\x00\x00\x00'
766+
b'r\x00\x00\x00\x00')
767+
with self.assertRaises(ValueError):
768+
marshal.loads(payload)
769+
770+
def test_self_ref_dict(self):
771+
# TYPE_DICT|FLAG_REF; key=NONE, val=TYPE_SET n=1 TYPE_REF(0); sentinel
772+
payload = (b'\xfb'
773+
b'N'
774+
b'<\x01\x00\x00\x00r\x00\x00\x00\x00'
775+
b'\x00')
776+
with self.assertRaises(ValueError):
777+
marshal.loads(payload)
778+
779+
734780
if __name__ == "__main__":
735781
unittest.main()

Python/marshal.c

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1385,7 +1385,9 @@ r_object(RFILE *p)
13851385
}
13861386
_read_tuple:
13871387
v = PyTuple_New(n);
1388-
R_REF(v);
1388+
idx = r_ref_reserve(flag, p);
1389+
if (idx < 0)
1390+
Py_CLEAR(v);
13891391
if (v == NULL)
13901392
break;
13911393

@@ -1400,6 +1402,7 @@ r_object(RFILE *p)
14001402
}
14011403
PyTuple_SET_ITEM(v, i, v2);
14021404
}
1405+
v = r_ref_insert(v, idx, flag, p);
14031406
retval = v;
14041407
break;
14051408

@@ -1413,7 +1416,9 @@ r_object(RFILE *p)
14131416
break;
14141417
}
14151418
v = PyList_New(n);
1416-
R_REF(v);
1419+
idx = r_ref_reserve(flag, p);
1420+
if (idx < 0)
1421+
Py_CLEAR(v);
14171422
if (v == NULL)
14181423
break;
14191424
for (i = 0; i < n; i++) {
@@ -1427,13 +1432,16 @@ r_object(RFILE *p)
14271432
}
14281433
PyList_SET_ITEM(v, i, v2);
14291434
}
1435+
v = r_ref_insert(v, idx, flag, p);
14301436
retval = v;
14311437
break;
14321438

14331439
case TYPE_DICT:
14341440
case TYPE_FROZENDICT:
14351441
v = PyDict_New();
1436-
R_REF(v);
1442+
idx = r_ref_reserve(flag, p);
1443+
if (idx < 0)
1444+
Py_CLEAR(v);
14371445
if (v == NULL)
14381446
break;
14391447
for (;;) {
@@ -1466,6 +1474,7 @@ r_object(RFILE *p)
14661474
Py_CLEAR(v);
14671475
}
14681476
}
1477+
v = r_ref_insert(v, idx, flag, p);
14691478
retval = v;
14701479
break;
14711480

@@ -1490,12 +1499,7 @@ r_object(RFILE *p)
14901499
}
14911500
else {
14921501
v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1493-
if (type == TYPE_SET) {
1494-
R_REF(v);
1495-
} else {
1496-
/* must use delayed registration of frozensets because they must
1497-
* be init with a refcount of 1
1498-
*/
1502+
{
14991503
idx = r_ref_reserve(flag, p);
15001504
if (idx < 0)
15011505
Py_CLEAR(v); /* signal error */
@@ -1520,8 +1524,7 @@ r_object(RFILE *p)
15201524
}
15211525
Py_DECREF(v2);
15221526
}
1523-
if (type != TYPE_SET)
1524-
v = r_ref_insert(v, idx, flag, p);
1527+
v = r_ref_insert(v, idx, flag, p);
15251528
retval = v;
15261529
}
15271530
break;

0 commit comments

Comments
 (0)