# HG changeset patch
# User scoder <stefan_ml@behnel.de>
# Date 1593336668 -7200
#      Sun Jun 28 11:31:08 2020 +0200
# Node ID 2e52db00a2935aaf13c1c9d56faf98502f28e0a1
# Parent  06fee89cc305068fddd7a501d5a40173158f7986
Validate and fix temp releasing (GH-3708)

* Fix a temp leak in the type init code.
* Fix temp leaks in fused types initialisation code.
* Correctly release the buffer index temps allocated for index calculations.
* Make tests fails hard if a temp variable is not released at the end of a generated function.
* Fix temp leak in switch statement code.
* Make end-to-end tests fail on refnanny output.
* Fix result temp leak in PyTypeTestNode.
* Fix result temp leak in external type/function import code and enable the refnanny check for them.
* Fix temp leak when try-return-finally is used in generators.
* Make it explicit when an allocated temp is not meant to be reused.
* Fix temp leak when assigning to the real/imag attributes of complex numbers.
* Fix temp leak when assigning to a memoryview slice.
* Clean up "num_threads" result temp in parallel section, not only in prange loop.
* Fix temp leak in Pythran buffer setitem code.
* Simplify NumPyMethodCallNode since it does not need the Python function anymore. Previously, it generated code that needlessly looked up the Python function without actually using it.
* Fix temp leak when deleting C++ objects.
* Add a test that actually reusing temps when deleting C++ objects works correctly.

diff --git a/Cython/Compiler/Code.py b/Cython/Compiler/Code.py
--- a/Cython/Compiler/Code.py
+++ b/Cython/Compiler/Code.py
@@ -721,6 +721,7 @@
         self.temps_allocated = []  # of (name, type, manage_ref, static)
         self.temps_free = {}  # (type, manage_ref) -> list of free vars with same type/managed status
         self.temps_used_type = {}  # name -> (type, manage_ref)
+        self.zombie_temps = set()  # temps that must not be reused after release
         self.temp_counter = 0
         self.closure_temps = None
 
@@ -740,14 +741,14 @@
     def validate_exit(self):
         # validate that all allocated temps have been freed
         if self.temps_allocated:
-            leftovers = set(self.all_managed_temps()).difference(self.all_free_managed_temps())
+            leftovers = self.temps_in_use()
             if leftovers:
-                msg = "Temps left over at end of '%s': %s" % (
-                    self.scope.name,
-                    ', '.join(map(str, sorted(leftovers, key=operator.itemgetter(0)))),
+                msg = "TEMPGUARD: Temps left over at end of '%s': %s" % (self.scope.name, ', '.join([
+                    '%s [%s]' % (name, ctype)
+                    for name, ctype, is_pytemp in sorted(leftovers)]),
                 )
-                print(msg)
-                #raise RuntimeError(msg)
+                #print(msg)
+                raise RuntimeError(msg)
 
     # labels
 
@@ -818,7 +819,7 @@
 
     # temp handling
 
-    def allocate_temp(self, type, manage_ref, static=False):
+    def allocate_temp(self, type, manage_ref, static=False, reusable=True):
         """
         Allocates a temporary (which may create a new one or get a previously
         allocated and released one of the same type). Type is simply registered
@@ -837,6 +838,8 @@
         This is only used when allocating backing store for a module-level
         C array literals.
 
+        if reusable=False, the temp will not be reused after release.
+
         A C string referring to the variable is returned.
         """
         if type.is_cv_qualified and not type.is_reference:
@@ -852,7 +855,7 @@
             manage_ref = False
 
         freelist = self.temps_free.get((type, manage_ref))
-        if freelist is not None and freelist[0]:
+        if reusable and freelist is not None and freelist[0]:
             result = freelist[0].pop()
             freelist[1].remove(result)
         else:
@@ -861,9 +864,11 @@
                 result = "%s%d" % (Naming.codewriter_temp_prefix, self.temp_counter)
                 if result not in self.names_taken: break
             self.temps_allocated.append((result, type, manage_ref, static))
+            if not reusable:
+                self.zombie_temps.add(result)
         self.temps_used_type[result] = (type, manage_ref)
         if DebugFlags.debug_temp_code_comments:
-            self.owner.putln("/* %s allocated (%s) */" % (result, type))
+            self.owner.putln("/* %s allocated (%s)%s */" % (result, type, "" if reusable else " - zombie"))
 
         if self.collect_temps_stack:
             self.collect_temps_stack[-1].add((result, type))
@@ -882,10 +887,12 @@
             self.temps_free[(type, manage_ref)] = freelist
         if name in freelist[1]:
             raise RuntimeError("Temp %s freed twice!" % name)
-        freelist[0].append(name)
+        if name not in self.zombie_temps:
+            freelist[0].append(name)
         freelist[1].add(name)
         if DebugFlags.debug_temp_code_comments:
-            self.owner.putln("/* %s released */" % name)
+            self.owner.putln("/* %s released %s*/" % (
+                name, " - zombie" if name in self.zombie_temps else ""))
 
     def temps_in_use(self):
         """Return a list of (cname,type,manage_ref) tuples of temp names and their type
diff --git a/Cython/Compiler/ExprNodes.py b/Cython/Compiler/ExprNodes.py
--- a/Cython/Compiler/ExprNodes.py
+++ b/Cython/Compiler/ExprNodes.py
@@ -4396,6 +4396,7 @@
                 pythran_indexing_code(self.indices),
                 op,
                 rhs.pythran_result()))
+            code.funcstate.release_temp(obj)
             return
 
         # Used from generate_assignment_code and InPlaceAssignmentNode
@@ -4436,11 +4437,11 @@
             code.putln("%s = (PyObject *) *%s;" % (self.result(), self.buffer_ptr_code))
             code.putln("__Pyx_INCREF((PyObject*)%s);" % self.result())
 
-    def free_temps(self, code):
+    def free_subexpr_temps(self, code):
         for temp in self.index_temps:
             code.funcstate.release_temp(temp)
         self.index_temps = ()
-        super(BufferIndexNode, self).free_temps(code)
+        super(BufferIndexNode, self).free_subexpr_temps(code)
 
 
 class MemoryViewIndexNode(BufferIndexNode):
@@ -4717,6 +4718,7 @@
         self.dst.generate_evaluation_code(code)
         self._generate_assignment_code(rhs, code)
         self.dst.generate_disposal_code(code)
+        self.dst.free_temps(code)
         rhs.generate_disposal_code(code)
         rhs.free_temps(code)
 
@@ -5641,7 +5643,7 @@
             env.add_include_file(pythran_get_func_include_file(function))
             return NumPyMethodCallNode.from_node(
                 self,
-                function=function,
+                function_cname=pythran_functor(function),
                 arg_tuple=self.arg_tuple,
                 type=PythranExpr(pythran_func_type(function, self.arg_tuple.args)),
             )
@@ -6047,13 +6049,13 @@
                 code.funcstate.release_temp(self.opt_arg_struct)
 
 
-class NumPyMethodCallNode(SimpleCallNode):
+class NumPyMethodCallNode(ExprNode):
     # Pythran call to a NumPy function or method.
     #
-    # function    ExprNode      the function/method to call
-    # arg_tuple   TupleNode     the arguments as an args tuple
-
-    subexprs = ['function', 'arg_tuple']
+    # function_cname  string      the function/method to call
+    # arg_tuple       TupleNode   the arguments as an args tuple
+
+    subexprs = ['arg_tuple']
     is_temp = True
     may_return_none = True
 
@@ -6061,7 +6063,6 @@
         code.mark_pos(self.pos)
         self.allocate_temp_result(code)
 
-        self.function.generate_evaluation_code(code)
         assert self.arg_tuple.mult_factor is None
         args = self.arg_tuple.args
         for arg in args:
@@ -6072,7 +6073,7 @@
         code.putln("new (&%s) decltype(%s){%s{}(%s)};" % (
             self.result(),
             self.result(),
-            pythran_functor(self.function),
+            self.function_cname,
             ", ".join(a.pythran_result() for a in args)))
 
 
@@ -7287,6 +7288,8 @@
                 self.member.upper(),
                 self.obj.result_as(self.obj.type),
                 rhs.result_as(self.ctype())))
+            rhs.generate_disposal_code(code)
+            rhs.free_temps(code)
         else:
             select_code = self.result()
             if self.type.is_pyobject and self.use_managed_ref:
@@ -8140,21 +8143,19 @@
         return t
 
     def allocate_temp_result(self, code):
-        if self.type.is_array and self.in_module_scope:
-            self.temp_code = code.funcstate.allocate_temp(
-                self.type, manage_ref=False, static=True)
+        if self.type.is_array:
+            if self.in_module_scope:
+                self.temp_code = code.funcstate.allocate_temp(
+                    self.type, manage_ref=False, static=True, reusable=False)
+            else:
+                # To be valid C++, we must allocate the memory on the stack
+                # manually and be sure not to reuse it for something else.
+                # Yes, this means that we leak a temp array variable.
+                self.temp_code = code.funcstate.allocate_temp(
+                    self.type, manage_ref=False, reusable=False)
         else:
             SequenceNode.allocate_temp_result(self, code)
 
-    def release_temp_result(self, env):
-        if self.type.is_array:
-            # To be valid C++, we must allocate the memory on the stack
-            # manually and be sure not to reuse it for something else.
-            # Yes, this means that we leak a temp array variable.
-            pass
-        else:
-            SequenceNode.release_temp_result(self, env)
-
     def calculate_constant_result(self):
         if self.mult_factor:
             raise ValueError()  # may exceed the compile time memory
@@ -13104,9 +13105,18 @@
     def generate_post_assignment_code(self, code):
         self.arg.generate_post_assignment_code(code)
 
+    def allocate_temp_result(self, code):
+        pass
+
+    def release_temp_result(self, code):
+        pass
+
     def free_temps(self, code):
         self.arg.free_temps(code)
 
+    def free_subexpr_temps(self, code):
+        self.arg.free_subexpr_temps(code)
+
 
 class NoneCheckNode(CoercionNode):
     # This node is used to check that a Python object is not None and
diff --git a/Cython/Compiler/FusedNode.py b/Cython/Compiler/FusedNode.py
--- a/Cython/Compiler/FusedNode.py
+++ b/Cython/Compiler/FusedNode.py
@@ -924,17 +924,23 @@
                                     (self.resulting_fused_function.result(),
                                      self.__signatures__.result()))
             self.__signatures__.generate_giveref(code)
+            self.__signatures__.generate_post_assignment_code(code)
+            self.__signatures__.free_temps(code)
 
             self.fused_func_assignment.generate_execution_code(code)
 
             # Dispose of results
             self.resulting_fused_function.generate_disposal_code(code)
+            self.resulting_fused_function.free_temps(code)
             self.defaults_tuple.generate_disposal_code(code)
+            self.defaults_tuple.free_temps(code)
             self.code_object.generate_disposal_code(code)
+            self.code_object.free_temps(code)
 
         for default in self.defaults:
             if default is not None:
                 default.generate_disposal_code(code)
+                default.free_temps(code)
 
     def annotate(self, code):
         for stat in self.stats:
diff --git a/Cython/Compiler/ModuleNode.py b/Cython/Compiler/ModuleNode.py
--- a/Cython/Compiler/ModuleNode.py
+++ b/Cython/Compiler/ModuleNode.py
@@ -3413,6 +3413,7 @@
                     module.qualified_name,
                     temp,
                     code.error_goto(self.pos)))
+            code.put_gotref(temp, py_object_type)
             for entry in entries:
                 if env is module:
                     cname = entry.cname
@@ -3423,7 +3424,8 @@
                     'if (__Pyx_ImportVoidPtr(%s, "%s", (void **)&%s, "%s") < 0) %s' % (
                         temp, entry.name, cname, signature,
                         code.error_goto(self.pos)))
-            code.putln("Py_DECREF(%s); %s = 0;" % (temp, temp))
+            code.put_decref_clear(temp, py_object_type)
+            code.funcstate.release_temp(temp)
 
     def generate_c_function_import_code_for_module(self, module, env, code):
         # Generate import code for all exported C functions in a cimported module.
@@ -3441,6 +3443,7 @@
                     module.qualified_name,
                     temp,
                     code.error_goto(self.pos)))
+            code.put_gotref(temp, py_object_type)
             for entry in entries:
                 code.putln(
                     'if (__Pyx_ImportFunction(%s, %s, (void (**)(void))&%s, "%s") < 0) %s' % (
@@ -3449,7 +3452,8 @@
                         entry.cname,
                         entry.type.signature_string(),
                         code.error_goto(self.pos)))
-            code.putln("Py_DECREF(%s); %s = 0;" % (temp, temp))
+            code.put_decref_clear(temp, py_object_type)
+            code.funcstate.release_temp(temp)
 
     def generate_type_init_code(self, env, code):
         # Generate type import code for extern extension types
diff --git a/Cython/Compiler/Nodes.py b/Cython/Compiler/Nodes.py
--- a/Cython/Compiler/Nodes.py
+++ b/Cython/Compiler/Nodes.py
@@ -5182,9 +5182,9 @@
             return
         if entry.visibility != 'extern':
             code.putln("#if CYTHON_COMPILING_IN_LIMITED_API")
-            tuple_temp = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
             base_type = scope.parent_type.base_type
             if base_type:
+                tuple_temp = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
                 code.putln(
                     "%s = PyTuple_Pack(1, (PyObject *)%s); %s" % (
                     tuple_temp,
@@ -6238,6 +6238,7 @@
                 arg.generate_evaluation_code(code)
                 code.putln("delete %s;" % arg.result())
                 arg.generate_disposal_code(code)
+                arg.free_temps(code)
             # else error reported earlier
 
     def annotate(self, code):
@@ -6758,6 +6759,8 @@
             # generate the switch statement, so shouldn't be bothered).
             code.putln("default: break;")
         code.putln("}")
+        self.test.generate_disposal_code(code)
+        self.test.free_temps(code)
 
     def generate_function_definitions(self, env, code):
         self.test.generate_function_definitions(env, code)
@@ -8044,9 +8047,10 @@
                     if self.func_return_type.is_pyobject:
                         code.putln("%s = 0;" % ret_temp)
                     code.funcstate.release_temp(ret_temp)
-                    ret_temp = None
                 if self.in_generator:
                     self.put_error_uncatcher(code, exc_vars)
+                    for cname in exc_vars:
+                        code.funcstate.release_temp(cname)
 
             if not self.finally_clause.is_terminator:
                 code.put_goto(old_label)
@@ -9177,6 +9181,11 @@
         self.begin_of_parallel_control_block_point = None
         self.begin_of_parallel_control_block_point_after_decls = None
 
+        if self.num_threads is not None:
+            # FIXME: is it the right place? should not normally produce code.
+            self.num_threads.generate_disposal_code(code)
+            self.num_threads.free_temps(code)
+
         # Firstly, always prefer errors over returning, continue or break
         if self.error_label_used:
             c.putln("const char *%s = NULL; int %s = 0, %s = 0;" % self.parallel_pos_info)
@@ -9557,7 +9566,7 @@
 
         # And finally, release our privates and write back any closure
         # variables
-        for temp in start_stop_step + (self.chunksize, self.num_threads):
+        for temp in start_stop_step + (self.chunksize,):
             if temp is not None:
                 temp.generate_disposal_code(code)
                 temp.free_temps(code)
diff --git a/runtests.py b/runtests.py
--- a/runtests.py
+++ b/runtests.py
@@ -1777,6 +1777,8 @@
                 cmd.append(command)
                 out.append(_out)
                 err.append(_err)
+            if res == 0 and b'REFNANNY: ' in _out:
+                res = -1
             if res != 0:
                 for c, o, e in zip(cmd, out, err):
                     sys.stderr.write("%s\n%s\n%s\n\n" % (
diff --git a/tests/run/cpp_classes.pyx b/tests/run/cpp_classes.pyx
--- a/tests/run/cpp_classes.pyx
+++ b/tests/run/cpp_classes.pyx
@@ -212,6 +212,7 @@
 cdef int f(int x):
     return x
 
+
 def test_nested_del():
     """
     >>> test_nested_del()
@@ -220,3 +221,15 @@
     v.push_back(new vector[int]())
     del v[0]
     del create_to_delete()[f(f(0))]
+
+
+def test_nested_del_repeat():
+    """
+    >>> test_nested_del_repeat()
+    """
+    cdef vector[vector_int_ptr] v
+    v.push_back(new vector[int]())
+    del v[0]
+    del create_to_delete()[f(f(0))]
+    del create_to_delete()[f(f(0))]
+    del create_to_delete()[f(f(0))]