diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index ce6324d0a8e0b9..80c11b753be7e6 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1094,7 +1094,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG }, [BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, - [BINARY_OP_EXTEND] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [BINARY_OP_EXTEND] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG }, @@ -1347,7 +1347,7 @@ _PyOpcode_macro_expansion[256] = { [BINARY_OP_ADD_FLOAT] = { .nuops = 5, .uops = { { _GUARD_TOS_FLOAT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_ADD_INT] = { .nuops = 5, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_INT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_ADD_UNICODE] = { .nuops = 5, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_UNICODE, OPARG_SIMPLE, 5 }, { _POP_TOP_UNICODE, OPARG_SIMPLE, 5 }, { _POP_TOP_UNICODE, OPARG_SIMPLE, 5 } } }, - [BINARY_OP_EXTEND] = { .nuops = 2, .uops = { { _GUARD_BINARY_OP_EXTEND, 4, 1 }, { _BINARY_OP_EXTEND, 4, 1 } } }, + [BINARY_OP_EXTEND] = { .nuops = 4, .uops = { { _GUARD_BINARY_OP_EXTEND, 4, 1 }, { _BINARY_OP_EXTEND, 4, 1 }, { _POP_TOP, OPARG_SIMPLE, 5 }, { _POP_TOP, OPARG_SIMPLE, 5 } } }, [BINARY_OP_INPLACE_ADD_UNICODE] = { .nuops = 3, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_INPLACE_ADD_UNICODE, OPARG_SIMPLE, 5 } } }, [BINARY_OP_MULTIPLY_FLOAT] = { .nuops = 5, .uops = { { _GUARD_TOS_FLOAT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, { _BINARY_OP_MULTIPLY_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_MULTIPLY_INT] = { .nuops = 5, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_INT, OPARG_SIMPLE, 0 }, { _BINARY_OP_MULTIPLY_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 5cda407ba89d93..ccdcc27f903cb9 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -389,7 +389,7 @@ extern "C" { #define _BINARY_OP_ADD_UNICODE_r03 586 #define _BINARY_OP_ADD_UNICODE_r13 587 #define _BINARY_OP_ADD_UNICODE_r23 588 -#define _BINARY_OP_EXTEND_r21 589 +#define _BINARY_OP_EXTEND_r23 589 #define _BINARY_OP_INPLACE_ADD_UNICODE_r21 590 #define _BINARY_OP_MULTIPLY_FLOAT_r03 591 #define _BINARY_OP_MULTIPLY_FLOAT_r13 592 diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index c2e0f4a2c391ed..d51ed9c697ab8c 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -115,7 +115,7 @@ const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = { [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_PURE_FLAG, [_BINARY_OP_INPLACE_ADD_UNICODE] = HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GUARD_BINARY_OP_EXTEND] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, - [_BINARY_OP_EXTEND] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_BINARY_OP_EXTEND] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_OP_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, @@ -1113,7 +1113,7 @@ const _PyUopCachingInfo _PyUop_Caching[MAX_UOP_ID+1] = { .entries = { { -1, -1, -1 }, { -1, -1, -1 }, - { 1, 2, _BINARY_OP_EXTEND_r21 }, + { 3, 2, _BINARY_OP_EXTEND_r23 }, { -1, -1, -1 }, }, }, @@ -3670,7 +3670,7 @@ const uint16_t _PyUop_Uncached[MAX_UOP_REGS_ID+1] = { [_BINARY_OP_ADD_UNICODE_r23] = _BINARY_OP_ADD_UNICODE, [_BINARY_OP_INPLACE_ADD_UNICODE_r21] = _BINARY_OP_INPLACE_ADD_UNICODE, [_GUARD_BINARY_OP_EXTEND_r22] = _GUARD_BINARY_OP_EXTEND, - [_BINARY_OP_EXTEND_r21] = _BINARY_OP_EXTEND, + [_BINARY_OP_EXTEND_r23] = _BINARY_OP_EXTEND, [_BINARY_SLICE_r31] = _BINARY_SLICE, [_STORE_SLICE_r30] = _STORE_SLICE, [_BINARY_OP_SUBSCR_LIST_INT_r23] = _BINARY_OP_SUBSCR_LIST_INT, @@ -4239,7 +4239,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1] = { [_BINARY_OP_ADD_UNICODE_r13] = "_BINARY_OP_ADD_UNICODE_r13", [_BINARY_OP_ADD_UNICODE_r23] = "_BINARY_OP_ADD_UNICODE_r23", [_BINARY_OP_EXTEND] = "_BINARY_OP_EXTEND", - [_BINARY_OP_EXTEND_r21] = "_BINARY_OP_EXTEND_r21", + [_BINARY_OP_EXTEND_r23] = "_BINARY_OP_EXTEND_r23", [_BINARY_OP_INPLACE_ADD_UNICODE] = "_BINARY_OP_INPLACE_ADD_UNICODE", [_BINARY_OP_INPLACE_ADD_UNICODE_r21] = "_BINARY_OP_INPLACE_ADD_UNICODE_r21", [_BINARY_OP_MULTIPLY_FLOAT] = "_BINARY_OP_MULTIPLY_FLOAT", diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 7c33320e9f1785..5a0a379e31a962 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2897,6 +2897,23 @@ def testfunc(n): self.assertIn("_POP_TOP_NOP", uops) self.assertLessEqual(count_ops(ex, "_POP_TOP"), 2) + def test_binary_op_extend_float_long_add_refcount_elimination(self): + def testfunc(n): + a = 1.5 + b = 2 + res = 0.0 + for _ in range(n): + res = a + b + return res + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, 3.5) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_BINARY_OP_EXTEND", uops) + self.assertIn("_POP_TOP_NOP", uops) + self.assertLessEqual(count_ops(ex, "_POP_TOP"), 2) + def test_remove_guard_for_slice_list(self): def f(n): for i in range(n): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-01-19-01-26-12.gh-issue-144005.Z3O33m.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-01-19-01-26-12.gh-issue-144005.Z3O33m.rst new file mode 100644 index 00000000000000..b3582197f45dda --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-01-19-01-26-12.gh-issue-144005.Z3O33m.rst @@ -0,0 +1 @@ +Eliminate redundant refcounting from ``BINARY_OP_EXTEND``. diff --git a/Modules/_testinternalcapi/test_cases.c.h b/Modules/_testinternalcapi/test_cases.c.h index c02d236fc3e8ac..a7d589dbe7b274 100644 --- a/Modules/_testinternalcapi/test_cases.c.h +++ b/Modules/_testinternalcapi/test_cases.c.h @@ -317,6 +317,9 @@ _PyStackRef left; _PyStackRef right; _PyStackRef res; + _PyStackRef l; + _PyStackRef r; + _PyStackRef value; /* Skip 1 cache entry */ // _GUARD_BINARY_OP_EXTEND { @@ -348,25 +351,32 @@ STAT_INC(BINARY_OP, hit); _PyFrame_SetStackPointer(frame, stack_pointer); PyObject *res_o = d->action(left_o, right_o); - _PyStackRef tmp = right; - right = PyStackRef_NULL; - stack_pointer[-1] = right; - PyStackRef_CLOSE(tmp); - tmp = left; - left = PyStackRef_NULL; - stack_pointer[-2] = left; - PyStackRef_CLOSE(tmp); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -2; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); if (res_o == NULL) { JUMP_TO_LABEL(error); } res = PyStackRef_FromPyObjectSteal(res_o); + l = left; + r = right; + } + // _POP_TOP + { + value = r; + stack_pointer[-2] = res; + stack_pointer[-1] = l; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); + } + // _POP_TOP + { + value = l; + stack_pointer += -1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); } - stack_pointer[0] = res; - stack_pointer += 1; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index cef368e9b0721a..36df991e84dc2c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -829,7 +829,7 @@ dummy_func( DEOPT_IF(!res); } - op(_BINARY_OP_EXTEND, (descr/4, left, right -- res)) { + op(_BINARY_OP_EXTEND, (descr/4, left, right -- res, l, r)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5); @@ -838,13 +838,18 @@ dummy_func( STAT_INC(BINARY_OP, hit); PyObject *res_o = d->action(left_o, right_o); - DECREF_INPUTS(); - ERROR_IF(res_o == NULL); + if (res_o == NULL) { + ERROR_NO_POP(); + } res = PyStackRef_FromPyObjectSteal(res_o); + l = left; + r = right; + DEAD(left); + DEAD(right); } macro(BINARY_OP_EXTEND) = - unused/1 + _GUARD_BINARY_OP_EXTEND + rewind/-4 + _BINARY_OP_EXTEND; + unused/1 + _GUARD_BINARY_OP_EXTEND + rewind/-4 + _BINARY_OP_EXTEND + POP_TOP + POP_TOP; macro(BINARY_OP_INPLACE_ADD_UNICODE) = _GUARD_TOS_UNICODE + _GUARD_NOS_UNICODE + unused/5 + _BINARY_OP_INPLACE_ADD_UNICODE; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 4d4084971c75ad..9098bd219ed506 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5143,12 +5143,14 @@ break; } - case _BINARY_OP_EXTEND_r21: { + case _BINARY_OP_EXTEND_r23: { CHECK_CURRENT_CACHED_VALUES(2); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); _PyStackRef right; _PyStackRef left; _PyStackRef res; + _PyStackRef l; + _PyStackRef r; _PyStackRef _stack_item_0 = _tos_cache0; _PyStackRef _stack_item_1 = _tos_cache1; right = _stack_item_1; @@ -5165,26 +5167,20 @@ ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); _PyFrame_SetStackPointer(frame, stack_pointer); PyObject *res_o = d->action(left_o, right_o); - _PyStackRef tmp = right; - right = PyStackRef_NULL; - stack_pointer[-1] = right; - PyStackRef_CLOSE(tmp); - tmp = left; - left = PyStackRef_NULL; - stack_pointer[-2] = left; - PyStackRef_CLOSE(tmp); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -2; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); if (res_o == NULL) { SET_CURRENT_CACHED_VALUES(0); JUMP_TO_ERROR(); } res = PyStackRef_FromPyObjectSteal(res_o); + l = left; + r = right; + _tos_cache2 = r; + _tos_cache1 = l; _tos_cache0 = res; - _tos_cache1 = PyStackRef_ZERO_BITS; - _tos_cache2 = PyStackRef_ZERO_BITS; - SET_CURRENT_CACHED_VALUES(1); + SET_CURRENT_CACHED_VALUES(3); + stack_pointer += -2; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 194fbe4f268cb4..9df6b2f70f96df 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -317,6 +317,9 @@ _PyStackRef left; _PyStackRef right; _PyStackRef res; + _PyStackRef l; + _PyStackRef r; + _PyStackRef value; /* Skip 1 cache entry */ // _GUARD_BINARY_OP_EXTEND { @@ -348,25 +351,32 @@ STAT_INC(BINARY_OP, hit); _PyFrame_SetStackPointer(frame, stack_pointer); PyObject *res_o = d->action(left_o, right_o); - _PyStackRef tmp = right; - right = PyStackRef_NULL; - stack_pointer[-1] = right; - PyStackRef_CLOSE(tmp); - tmp = left; - left = PyStackRef_NULL; - stack_pointer[-2] = left; - PyStackRef_CLOSE(tmp); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -2; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); if (res_o == NULL) { JUMP_TO_LABEL(error); } res = PyStackRef_FromPyObjectSteal(res_o); + l = left; + r = right; + } + // _POP_TOP + { + value = r; + stack_pointer[-2] = res; + stack_pointer[-1] = l; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); + } + // _POP_TOP + { + value = l; + stack_pointer += -1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); } - stack_pointer[0] = res; - stack_pointer += 1; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 1584e731d1b2d4..de7a2313f9220a 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -311,6 +311,12 @@ dummy_func(void) { r = right; } + op(_BINARY_OP_EXTEND, (left, right -- res, l, r)) { + res = sym_new_not_null(ctx); + l = left; + r = right; + } + op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right -- res)) { if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { assert(PyUnicode_CheckExact(sym_get_const(ctx, left))); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 341805d51e24cd..5fca69462669b5 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -882,11 +882,22 @@ } case _BINARY_OP_EXTEND: { + JitOptRef right; + JitOptRef left; JitOptRef res; + JitOptRef l; + JitOptRef r; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + PyObject *descr = (PyObject *)this_instr->operand0; res = sym_new_not_null(ctx); - CHECK_STACK_BOUNDS(-1); + l = left; + r = right; + CHECK_STACK_BOUNDS(1); stack_pointer[-2] = res; - stack_pointer += -1; + stack_pointer[-1] = l; + stack_pointer[0] = r; + stack_pointer += 1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; }