diff --git a/.travis.yml b/.travis.yml
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_LnRyYXZpcy55bWw=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_LnRyYXZpcy55bWw= 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -75,6 +75,14 @@
       dist: xenial    # Required for Python 3.7
       sudo: required  # travis-ci/travis-ci#9069
       env: BACKEND=cpp
+    - python: 3.9-dev
+      dist: xenial    # Required for Python 3.7
+      sudo: required  # travis-ci/travis-ci#9069
+      env: BACKEND=c
+    - python: 3.9-dev
+      dist: xenial    # Required for Python 3.7
+      sudo: required  # travis-ci/travis-ci#9069
+      env: BACKEND=cpp
     - os: osx
       osx_image: xcode6.4
       env: PY=2
@@ -98,6 +106,7 @@
     - env: STACKLESS=true BACKEND=c PY=3
       python: 3.6
   allow_failures:
+    - python: 3.9-dev
     - python: pypy
     - python: pypy3
 
@@ -108,7 +117,7 @@
 
 before_install:
   - |
-    if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then
+    if [ "$TRAVIS_OS_NAME" == "linux" ]; then
       # adding apt repos in travis is really fragile => retry a couple of times.
       for i in {1..10}; do travis_retry sudo apt-add-repository --yes 'ppa:ubuntu-toolchain-r/test' && break; sleep 2; done
       for i in {1..10}; do travis_retry sudo apt-get update && travis_retry sudo apt-get install --yes gcc-8  $(if [ -z "${BACKEND##*cpp*}" ]; then echo -n "g++-8"; fi ) && break; sleep 2; done
@@ -119,8 +128,10 @@
     fi
 
   - |
-    if [[ "$TRAVIS_OS_NAME" == "osx" ]] || [[ "$STACKLESS" == "true" ]]; then # Install Miniconda
-      if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then CONDA_PLATFORM=MacOSX; else CONDA_PLATFORM=Linux; fi
-      travis_retry curl -s -o miniconda.sh https://repo.continuum.io/miniconda/Miniconda$PY-latest-${CONDA_PLATFORM}-x86_64.sh
-      bash miniconda.sh -b -p $HOME/miniconda && rm miniconda.sh
+    if [ "$TRAVIS_OS_NAME" == "osx" -o "$STACKLESS" == "true" ]; then
+      echo "Installing Miniconda"
+      if [ "$TRAVIS_OS_NAME" == "osx" ]; then CONDA_PLATFORM=MacOSX; else CONDA_PLATFORM=Linux; fi
+      travis_retry wget -O miniconda.sh https://repo.continuum.io/miniconda/Miniconda$PY-latest-${CONDA_PLATFORM}-x86_64.sh || exit 1
+      bash miniconda.sh -b -p $HOME/miniconda && rm miniconda.sh || exit 1
+      conda --version || exit 1
       #conda install --quiet --yes nomkl --file=test-requirements.txt --file=test-requirements-cpython.txt
@@ -126,5 +137,5 @@
       #conda install --quiet --yes nomkl --file=test-requirements.txt --file=test-requirements-cpython.txt
-      if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then
+      if [ "$TRAVIS_OS_NAME" == "osx" ]; then
         which clang && clang --version && export CC=clang || true
         which clang++ && clang++ --version && export CXX=clang++ || true
       fi
@@ -133,7 +144,7 @@
   - if [ -n "$CC" ]; then which $CC; $CC --version; fi
   - if [ -n "$CXX" ]; then which $CXX; $CXX --version; fi
 
-  - if [[ "$STACKLESS" == "true" ]]; then
+  - if [ "$STACKLESS" == "true" ]; then
       conda config --add channels stackless;
       travis_retry conda install --quiet --yes stackless;
     fi
diff --git a/CHANGES.rst b/CHANGES.rst
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q0hBTkdFUy5yc3Q=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q0hBTkdFUy5yc3Q= 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -2,6 +2,155 @@
 Cython Changelog
 ================
 
+0.29.20 (2020-06-10)
+====================
+
+Bugs fixed
+----------
+
+* Nested try-except statements with multiple ``return`` statements could crash
+  due to incorrect deletion of the ``except as`` target variable.
+  (Github issue #3666)
+
+* The ``@classmethod`` decorator no longer rejects unknown input from other decorators.
+  Patch by David Woods.  (Github issue #3660)
+
+* Fused types could leak into unrelated usages.
+  Patch by David Woods.  (Github issue #3642)
+
+* Now uses ``Py_SET_SIZE()`` and ``Py_SET_REFCNT()`` in Py3.9+ to avoid low-level
+  write access to these object fields.
+  Patch by Victor Stinner.  (Github issue #3639)
+
+* The built-in ``abs()`` function could lead to undefined behaviour when used on
+  the negative-most value of a signed C integer type.
+  Patch by Serge Guelton.  (Github issue #1911)
+
+* Usages of ``sizeof()`` and ``typeid()`` on uninitialised variables no longer
+  produce a warning.
+  Patch by Celelibi.  (Github issue #3575)
+
+* The C++ ``typeid()`` function was allowed in C mode.
+  Patch by Celelibi.  (Github issue #3637)
+
+* The error position reported for errors found in f-strings was misleading.
+  (Github issue #3674)
+
+* The new ``c_api_binop_methods`` directive was added for forward compatibility, but can
+  only be set to True (the current default value).  It can be disabled in Cython 3.0.
+
+
+0.29.19 (2020-05-20)
+====================
+
+Bugs fixed
+----------
+
+* A typo in Windows specific code in 0.29.18 was fixed that broke "libc.math".
+  (Github issue #3622)
+
+* A platform specific test failure in 0.29.18 was fixed.
+  Patch by smutch.  (Github issue #3620)
+
+
+0.29.18 (2020-05-18)
+====================
+
+Bugs fixed
+----------
+
+* Exception position reporting could run into race conditions on threaded code.
+  It now uses function-local variables again.
+
+* Error handling early in the module init code could lead to a crash.
+
+* Error handling in ``cython.array`` creation was improved to avoid calling
+  C-API functions with an error held.
+
+* A memory corruption was fixed when garbage collection was triggered during calls
+  to ``PyType_Ready()`` of extension type subclasses.
+  (Github issue #3603)
+
+* Memory view slicing generated unused error handling code which could negatively
+  impact the C compiler optimisations for parallel OpenMP code etc.  Also, it is
+  now helped by static branch hints.
+  (Github issue #2987)
+
+* Cython's built-in OpenMP functions were not translated inside of call arguments.
+  Original patch by Celelibi and David Woods.  (Github issue #3594)
+
+* Complex buffer item types of structs of arrays could fail to validate.
+  Patch by Leo and smutch.  (Github issue #1407)
+
+* Decorators were not allowed on nested `async def` functions.
+  (Github issue #1462)
+
+* C-tuples could use invalid C struct casting.
+  Patch by MegaIng.  (Github issue #3038)
+
+* Optimised ``%d`` string formatting into f-strings failed on float values.
+  (Github issue #3092)
+
+* Optimised aligned string formatting (``%05s``, ``%-5s``) failed.
+  (Github issue #3476)
+
+* When importing the old Cython ``build_ext`` integration with distutils, the
+  additional command line arguments leaked into the regular command.
+  Patch by Kamekameha.  (Github issue #2209)
+
+* When using the ``CYTHON_NO_PYINIT_EXPORT`` option in C++, the module init function
+  was not declared as ``extern "C"``.
+  (Github issue #3414)
+
+* Three missing timedelta access macros were added in ``cpython.datetime``.
+
+* The signature of the NumPy C-API function ``PyArray_SearchSorted()`` was fixed.
+  Patch by Brock Mendel.  (Github issue #3606)
+
+
+0.29.17 (2020-04-26)
+====================
+
+Features added
+--------------
+
+* ``std::move()`` is now available from ``libcpp.utility``.
+  Patch by Omer Ozarslan.  (Github issue #2169)
+
+* The ``@cython.binding`` decorator is available in Python code.
+  (Github issue #3505)
+
+Bugs fixed
+----------
+
+* Creating an empty unicode slice with large bounds could crash.
+  Patch by Sam Sneddon.  (Github issue #3531)
+
+* Decoding an empty bytes/char* slice with large bounds could crash.
+  Patch by Sam Sneddon.  (Github issue #3534)
+
+* Re-importing a Cython extension no longer raises the error
+  "``__reduce_cython__ not found``".
+  (Github issue #3545)
+
+* Unused C-tuples could generate incorrect code in 0.29.16.
+  Patch by Kirk Meyer.  (Github issue #3543)
+
+* Creating a fused function attached it to the garbage collector before it
+  was fully initialised, thus risking crashes in rare failure cases.
+  Original patch by achernomorov.  (Github issue #3215)
+
+* Temporary buffer indexing variables were not released and could show up in
+  C compiler warnings, e.g. in generators.
+  Patch by David Woods.  (Github issues #3430, #3522)
+
+* The compilation cache in ``cython.inline("…")`` failed to take the language
+  level into account.
+  Patch by will-ca.  (Github issue #3419)
+
+* The deprecated ``PyUnicode_GET_SIZE()`` function is no longer used in Py3.
+
+
 0.29.16 (2020-03-24)
 ====================
 
diff --git a/Cython/Build/Inline.py b/Cython/Build/Inline.py
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL0J1aWxkL0lubGluZS5weQ==..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL0J1aWxkL0lubGluZS5weQ== 100644
--- a/Cython/Build/Inline.py
+++ b/Cython/Build/Inline.py
@@ -144,6 +144,10 @@
             else:
                 print("Couldn't find %r" % symbol)
 
+def _inline_key(orig_code, arg_sigs, language_level):
+    key = orig_code, arg_sigs, sys.version_info, sys.executable, language_level, Cython.__version__
+    return hashlib.sha1(_unicode(key).encode('utf-8')).hexdigest()
+
 def cython_inline(code, get_type=unsafe_type,
                   lib_dir=os.path.join(get_cython_cache_dir(), 'inline'),
                   cython_include_dirs=None, cython_compiler_directives=None,
@@ -153,9 +157,15 @@
         get_type = lambda x: 'object'
     ctx = _create_context(tuple(cython_include_dirs)) if cython_include_dirs else _cython_inline_default_context
 
+    cython_compiler_directives = dict(cython_compiler_directives or {})
+    if language_level is None and 'language_level' not in cython_compiler_directives:
+        language_level = '3str'
+    if language_level is not None:
+        cython_compiler_directives['language_level'] = language_level
+
     # Fast path if this has been called in this session.
     _unbound_symbols = _cython_inline_cache.get(code)
     if _unbound_symbols is not None:
         _populate_unbound(kwds, _unbound_symbols, locals, globals)
         args = sorted(kwds.items())
         arg_sigs = tuple([(get_type(value, ctx), arg) for arg, value in args])
@@ -156,10 +166,11 @@
     # Fast path if this has been called in this session.
     _unbound_symbols = _cython_inline_cache.get(code)
     if _unbound_symbols is not None:
         _populate_unbound(kwds, _unbound_symbols, locals, globals)
         args = sorted(kwds.items())
         arg_sigs = tuple([(get_type(value, ctx), arg) for arg, value in args])
-        invoke = _cython_inline_cache.get((code, arg_sigs))
+        key_hash = _inline_key(code, arg_sigs, language_level)
+        invoke = _cython_inline_cache.get((code, arg_sigs, key_hash))
         if invoke is not None:
             arg_list = [arg[1] for arg in args]
             return invoke(*arg_list)
@@ -180,10 +191,6 @@
             # Parsing from strings not fully supported (e.g. cimports).
             print("Could not parse code as a string (to extract unbound symbols).")
 
-    cython_compiler_directives = dict(cython_compiler_directives or {})
-    if language_level is not None:
-        cython_compiler_directives['language_level'] = language_level
-
     cimports = []
     for name, arg in list(kwds.items()):
         if arg is cython_module:
@@ -191,8 +198,8 @@
             del kwds[name]
     arg_names = sorted(kwds)
     arg_sigs = tuple([(get_type(kwds[arg], ctx), arg) for arg in arg_names])
-    key = orig_code, arg_sigs, sys.version_info, sys.executable, language_level, Cython.__version__
-    module_name = "_cython_inline_" + hashlib.md5(_unicode(key).encode('utf-8')).hexdigest()
+    key_hash = _inline_key(orig_code, arg_sigs, language_level)
+    module_name = "_cython_inline_" + key_hash
 
     if module_name in sys.modules:
         module = sys.modules[module_name]
@@ -259,7 +266,7 @@
 
         module = load_dynamic(module_name, module_path)
 
-    _cython_inline_cache[orig_code, arg_sigs] = module.__invoke
+    _cython_inline_cache[orig_code, arg_sigs, key_hash] = module.__invoke
     arg_list = [kwds[arg] for arg in arg_names]
     return module.__invoke(*arg_list)
 
diff --git a/Cython/Build/Tests/TestInline.py b/Cython/Build/Tests/TestInline.py
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL0J1aWxkL1Rlc3RzL1Rlc3RJbmxpbmUucHk=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL0J1aWxkL1Rlc3RzL1Rlc3RJbmxpbmUucHk= 100644
--- a/Cython/Build/Tests/TestInline.py
+++ b/Cython/Build/Tests/TestInline.py
@@ -74,6 +74,18 @@
             6
         )
 
+    def test_lang_version(self):
+        # GH-3419. Caching for inline code didn't always respect compiler directives.
+        inline_divcode = "def f(int a, int b): return a/b"
+        self.assertEqual(
+            inline(inline_divcode, language_level=2)['f'](5,2),
+            2
+        )
+        self.assertEqual(
+            inline(inline_divcode, language_level=3)['f'](5,2),
+            2.5
+        )
+
     if has_numpy:
 
         def test_numpy(self):
diff --git a/Cython/Compiler/Code.py b/Cython/Compiler/Code.py
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL0NvbXBpbGVyL0NvZGUucHk=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL0NvbXBpbGVyL0NvZGUucHk= 100644
--- a/Cython/Compiler/Code.py
+++ b/Cython/Compiler/Code.py
@@ -280,7 +280,7 @@
         _, ext = os.path.splitext(path)
         if ext in ('.pyx', '.py', '.pxd', '.pxi'):
             comment = '#'
-            strip_comments = partial(re.compile(r'^\s*#.*').sub, '')
+            strip_comments = partial(re.compile(r'^\s*#(?!\s*cython\s*:).*').sub, '')
             rstrip = StringEncoding._unicode.rstrip
         else:
             comment = '/'
@@ -543,7 +543,7 @@
 
         impl = re.sub(r'PY(IDENT|UNICODE)\("([^"]+)"\)', externalise, impl)
         assert 'PYIDENT(' not in impl and 'PYUNICODE(' not in impl
-        return bool(replacements), impl
+        return True, impl
 
     def inject_unbound_methods(self, impl, output):
         """Replace 'UNBOUND_METHOD(type, "name")' by a constant Python identifier cname.
@@ -551,7 +551,6 @@
         if 'CALL_UNBOUND_METHOD(' not in impl:
             return False, impl
 
-        utility_code = set()
         def externalise(matchobj):
             type_cname, method_name, obj_cname, args = matchobj.groups()
             args = [arg.strip() for arg in args[1:].split(',')] if args else []
@@ -567,9 +566,7 @@
             r'\)', externalise, impl)
         assert 'CALL_UNBOUND_METHOD(' not in impl
 
-        for helper in sorted(utility_code):
-            output.use_utility_code(UtilityCode.load_cached(helper, "ObjectHandling.c"))
-        return bool(utility_code), impl
+        return True, impl
 
     def wrap_c_strings(self, impl):
         """Replace CSTRING('''xyz''') by a C compatible string
@@ -2344,12 +2341,5 @@
         self.funcstate.should_declare_error_indicator = True
         if used:
             self.funcstate.uses_error_indicator = True
-        if self.code_config.c_line_in_traceback:
-            cinfo = " %s = %s;" % (Naming.clineno_cname, Naming.line_c_macro)
-        else:
-            cinfo = ""
-
-        return "%s = %s[%s]; %s = %s;%s" % (
-            Naming.filename_cname,
-            Naming.filetable_cname,
+        return "__PYX_MARK_ERR_POS(%s, %s)" % (
             self.lookup_filename(pos[0]),
@@ -2355,10 +2345,8 @@
             self.lookup_filename(pos[0]),
-            Naming.lineno_cname,
-            pos[1],
-            cinfo)
-
-    def error_goto(self, pos):
+            pos[1])
+
+    def error_goto(self, pos, used=True):
         lbl = self.funcstate.error_label
         self.funcstate.use_label(lbl)
         if pos is None:
             return 'goto %s;' % lbl
@@ -2361,7 +2349,10 @@
         lbl = self.funcstate.error_label
         self.funcstate.use_label(lbl)
         if pos is None:
             return 'goto %s;' % lbl
+        self.funcstate.should_declare_error_indicator = True
+        if used:
+            self.funcstate.uses_error_indicator = True
         return "__PYX_ERR(%s, %s, %s)" % (
             self.lookup_filename(pos[0]),
             pos[1],
diff --git a/Cython/Compiler/ExprNodes.py b/Cython/Compiler/ExprNodes.py
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL0NvbXBpbGVyL0V4cHJOb2Rlcy5weQ==..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL0NvbXBpbGVyL0V4cHJOb2Rlcy5weQ== 100644
--- a/Cython/Compiler/ExprNodes.py
+++ b/Cython/Compiler/ExprNodes.py
@@ -3222,7 +3222,7 @@
     # {}-delimited portions of an f-string
     #
     # value           ExprNode                The expression itself
-    # conversion_char str or None             Type conversion (!s, !r, !a, or none)
+    # conversion_char str or None             Type conversion (!s, !r, !a, or none, or 'd' for integer conversion)
     # format_spec     JoinedStrNode or None   Format string passed to __format__
     # c_format_spec   str or None             If not None, formatting can be done at the C level
 
@@ -3236,6 +3236,7 @@
         's': 'PyObject_Unicode',
         'r': 'PyObject_Repr',
         'a': 'PyObject_ASCII',  # NOTE: mapped to PyObject_Repr() in Py2
+        'd': '__Pyx_PyNumber_IntOrLong',  # NOTE: internal mapping for '%d' formatting
     }.get
 
     def may_be_none(self):
@@ -4200,6 +4201,9 @@
     # Whether we're assigning to a buffer (in that case it needs to be writable)
     writable_needed = False
 
+    # Any indexing temp variables that we need to clean up.
+    index_temps = ()
+
     def analyse_target_types(self, env):
         self.analyse_types(env, getting=False)
 
@@ -4284,7 +4288,7 @@
                     warning(self.pos, "Use boundscheck(False) for faster access", level=1)
 
         # Assign indices to temps of at least (s)size_t to allow further index calculations.
-        index_temps = [self.get_index_in_temp(code,ivar) for ivar in self.indices]
+        self.index_temps = index_temps = [self.get_index_in_temp(code,ivar) for ivar in self.indices]
 
         # Generate buffer access code using these temps
         from . import Buffer
@@ -4370,6 +4374,12 @@
             code.putln("%s = (PyObject *) *%s;" % (self.result(), self.buffer_ptr_code))
             code.putln("__Pyx_INCREF((PyObject*)%s);" % self.result())
 
+    def free_temps(self, code):
+        for temp in self.index_temps:
+            code.funcstate.release_temp(temp)
+        self.index_temps = ()
+        super(BufferIndexNode, self).free_temps(code)
+
 
 class MemoryViewIndexNode(BufferIndexNode):
 
@@ -5461,7 +5471,7 @@
         func_type = self.function_type()
         if func_type.is_pyobject:
             self.gil_error()
-        elif not getattr(func_type, 'nogil', False):
+        elif not func_type.is_error and not getattr(func_type, 'nogil', False):
             self.gil_error()
 
     gil_message = "Calling gil-requiring function"
@@ -5829,6 +5839,17 @@
         if function.is_name or function.is_attribute:
             code.globalstate.use_entry_utility_code(function.entry)
 
+        abs_function_cnames = ('abs', 'labs', '__Pyx_abs_longlong')
+        is_signed_int = self.type.is_int and self.type.signed
+        if self.overflowcheck and is_signed_int and function.result() in abs_function_cnames:
+            code.globalstate.use_utility_code(UtilityCode.load_cached("Common", "Overflow.c"))
+            code.putln('if (unlikely(%s == __PYX_MIN(%s))) {\
+                PyErr_SetString(PyExc_OverflowError,\
+                                "Trying to take the absolute value of the most negative integer is not defined."); %s; }' % (
+                            self.args[0].result(),
+                            self.args[0].type.empty_declaration_code(),
+                            code.error_goto(self.pos)))
+
         if not function.type.is_pyobject or len(self.arg_tuple.args) > 1 or (
                 self.arg_tuple.args and self.arg_tuple.is_literal):
             super(SimpleCallNode, self).generate_evaluation_code(code)
@@ -5931,13 +5952,7 @@
                                             self.result() if self.type.is_pyobject else None,
                                             func_type.exception_value, self.nogil)
                 else:
-                    if (self.overflowcheck
-                        and self.type.is_int
-                        and self.type.signed
-                        and self.function.result() in ('abs', 'labs', '__Pyx_abs_longlong')):
-                        goto_error = 'if (unlikely(%s < 0)) { PyErr_SetString(PyExc_OverflowError, "value too large"); %s; }' % (
-                            self.result(), code.error_goto(self.pos))
-                    elif exc_checks:
+                    if exc_checks:
                         goto_error = code.error_goto_if(" && ".join(exc_checks), self.pos)
                     else:
                         goto_error = ""
@@ -9387,7 +9402,7 @@
         if self.specialized_cpdefs or self.is_specialization:
             code.globalstate.use_utility_code(
                 UtilityCode.load_cached("FusedFunction", "CythonFunction.c"))
-            constructor = "__pyx_FusedFunction_NewEx"
+            constructor = "__pyx_FusedFunction_New"
         else:
             code.globalstate.use_utility_code(
                 UtilityCode.load_cached("CythonFunction", "CythonFunction.c"))
@@ -9391,7 +9406,7 @@
         else:
             code.globalstate.use_utility_code(
                 UtilityCode.load_cached("CythonFunction", "CythonFunction.c"))
-            constructor = "__Pyx_CyFunction_NewEx"
+            constructor = "__Pyx_CyFunction_New"
 
         if self.code_object:
             code_object_result = self.code_object.py_result()
@@ -10688,6 +10703,11 @@
             code.putln(code.error_goto(self.operand.pos))
             code.putln("}")
 
-        code.putln("%s = __pyx_format_from_typeinfo(&%s);" %
-                                                (format_temp, type_info))
+        code.putln("%s = __pyx_format_from_typeinfo(&%s); %s" % (
+            format_temp,
+            type_info,
+            code.error_goto_if_null(format_temp, self.pos),
+        ))
+        code.put_gotref(format_temp)
+
         buildvalue_fmt = " __PYX_BUILD_PY_SSIZE_T " * len(shapes)
@@ -10693,12 +10713,10 @@
         buildvalue_fmt = " __PYX_BUILD_PY_SSIZE_T " * len(shapes)
-        code.putln('%s = Py_BuildValue((char*) "(" %s ")", %s);' % (
-            shapes_temp, buildvalue_fmt, ", ".join(shapes)))
-
-        err = "!%s || !%s || !PyBytes_AsString(%s)" % (format_temp,
-                                                       shapes_temp,
-                                                       format_temp)
-        code.putln(code.error_goto_if(err, self.pos))
-        code.put_gotref(format_temp)
+        code.putln('%s = Py_BuildValue((char*) "(" %s ")", %s); %s' % (
+            shapes_temp,
+            buildvalue_fmt,
+            ", ".join(shapes),
+            code.error_goto_if_null(shapes_temp, self.pos),
+        ))
         code.put_gotref(shapes_temp)
 
         tup = (self.result(), shapes_temp, itemsize, format_temp,
@@ -10853,7 +10871,10 @@
         typeinfo_entry = typeinfo_module.lookup('type_info')
         return PyrexTypes.CFakeReferenceType(PyrexTypes.c_const_type(typeinfo_entry.type))
 
-    def analyse_types(self, env):
+    cpp_message = 'typeid operator'
+
+    def analyse_types(self, env):
+        self.cpp_check(env)
         type_info = self.get_type_info_type(env)
         if not type_info:
             self.error("The 'libcpp.typeinfo' module must be cimported to use the typeid() operator")
diff --git a/Cython/Compiler/FlowControl.py b/Cython/Compiler/FlowControl.py
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL0NvbXBpbGVyL0Zsb3dDb250cm9sLnB5..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL0NvbXBpbGVyL0Zsb3dDb250cm9sLnB5 100644
--- a/Cython/Compiler/FlowControl.py
+++ b/Cython/Compiler/FlowControl.py
@@ -884,6 +884,12 @@
         self.mark_position(node)
         return node
 
+    def visit_SizeofVarNode(self, node):
+        return node
+
+    def visit_TypeidNode(self, node):
+        return node
+
     def visit_IfStatNode(self, node):
         next_block = self.flow.newblock()
         parent = self.flow.block
@@ -1192,8 +1198,6 @@
         if self.flow.loops:
             self.flow.loops[-1].exceptions.append(descr)
         self.flow.block = body_block
-        ## XXX: Is it still required
-        body_block.add_child(entry_point)
         self.flow.nextblock()
         self._visit(node.body)
         self.flow.exceptions.pop()
@@ -1227,11 +1231,18 @@
         self.mark_position(node)
         self.visitchildren(node)
 
-        for exception in self.flow.exceptions[::-1]:
-            if exception.finally_enter:
-                self.flow.block.add_child(exception.finally_enter)
-                if exception.finally_exit:
-                    exception.finally_exit.add_child(self.flow.exit_point)
+        outer_exception_handlers = iter(self.flow.exceptions[::-1])
+        for handler in outer_exception_handlers:
+            if handler.finally_enter:
+                self.flow.block.add_child(handler.finally_enter)
+                if handler.finally_exit:
+                    # 'return' goes to function exit, or to the next outer 'finally' clause
+                    exit_point = self.flow.exit_point
+                    for next_handler in outer_exception_handlers:
+                        if next_handler.finally_enter:
+                            exit_point = next_handler.finally_enter
+                            break
+                    handler.finally_exit.add_child(exit_point)
                 break
         else:
             if self.flow.block:
diff --git a/Cython/Compiler/MemoryView.py b/Cython/Compiler/MemoryView.py
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL0NvbXBpbGVyL01lbW9yeVZpZXcucHk=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL0NvbXBpbGVyL01lbW9yeVZpZXcucHk= 100644
--- a/Cython/Compiler/MemoryView.py
+++ b/Cython/Compiler/MemoryView.py
@@ -291,7 +291,6 @@
 
             dim += 1
             access, packing = self.type.axes[dim]
-            error_goto = code.error_goto(index.pos)
 
             if isinstance(index, ExprNodes.SliceNode):
                 # slice, unspecified dimension, or part of ellipsis
@@ -308,6 +307,7 @@
                     util_name = "SimpleSlice"
                 else:
                     util_name = "ToughSlice"
+                    d['error_goto'] = code.error_goto(index.pos)
 
                 new_ndim += 1
             else:
@@ -325,5 +325,5 @@
                 d = dict(
                     locals(),
                     wraparound=int(directives['wraparound']),
-                    boundscheck=int(directives['boundscheck'])
+                    boundscheck=int(directives['boundscheck']),
                 )
@@ -329,4 +329,6 @@
                 )
+                if d['boundscheck']:
+                    d['error_goto'] = code.error_goto(index.pos)
                 util_name = "SliceIndex"
 
             _, impl = TempitaUtilityCode.load_as_string(util_name, "MemoryView_C.c", context=d)
diff --git a/Cython/Compiler/ModuleNode.py b/Cython/Compiler/ModuleNode.py
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL0NvbXBpbGVyL01vZHVsZU5vZGUucHk=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL0NvbXBpbGVyL01vZHVsZU5vZGUucHk= 100644
--- a/Cython/Compiler/ModuleNode.py
+++ b/Cython/Compiler/ModuleNode.py
@@ -29,7 +29,7 @@
 from .Errors import error, warning
 from .PyrexTypes import py_object_type
 from ..Utils import open_new_file, replace_suffix, decode_filename, build_hex_version
-from .Code import UtilityCode, IncludeCode
+from .Code import UtilityCode, IncludeCode, TempitaUtilityCode
 from .StringEncoding import EncodedString
 from .Pythran import has_np_pythran
 
@@ -580,7 +580,7 @@
             definition = module is env
             type_entries = []
             for entry in module.type_entries:
-                if entry.type.is_ctuple:
+                if entry.type.is_ctuple and entry.used:
                     if entry.name not in ctuple_names:
                         ctuple_names.add(entry.name)
                         type_entries.append(entry)
@@ -658,4 +658,5 @@
         self._put_setup_code(code, "PythonCompatibility")
         self._put_setup_code(code, "MathInitCode")
 
+        # Using "(void)cname" to prevent "unused" warnings.
         if options.c_line_in_traceback:
@@ -661,4 +662,4 @@
         if options.c_line_in_traceback:
-            cinfo = "%s = %s; " % (Naming.clineno_cname, Naming.line_c_macro)
+            cinfo = "%s = %s; (void)%s; " % (Naming.clineno_cname, Naming.line_c_macro, Naming.clineno_cname)
         else:
             cinfo = ""
@@ -663,11 +664,13 @@
         else:
             cinfo = ""
-        code.put("""
-#define __PYX_ERR(f_index, lineno, Ln_error) \\
-{ \\
-  %s = %s[f_index]; %s = lineno; %sgoto Ln_error; \\
-}
-""" % (Naming.filename_cname, Naming.filetable_cname, Naming.lineno_cname, cinfo))
+        code.putln("#define __PYX_MARK_ERR_POS(f_index, lineno) \\")
+        code.putln("    { %s = %s[f_index]; (void)%s; %s = lineno; (void)%s; %s}" % (
+            Naming.filename_cname, Naming.filetable_cname, Naming.filename_cname,
+            Naming.lineno_cname, Naming.lineno_cname,
+            cinfo
+        ))
+        code.putln("#define __PYX_ERR(f_index, lineno, Ln_error) \\")
+        code.putln("    { __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; }")
 
         code.putln("")
         self.generate_extern_c_macro_definition(code)
@@ -1252,6 +1255,9 @@
                         self.generate_dict_getter_function(scope, code)
                     if scope.defines_any_special(TypeSlots.richcmp_special_methods):
                         self.generate_richcmp_function(scope, code)
+                    for slot in TypeSlots.PyNumberMethods:
+                        if slot.is_binop and scope.defines_any_special(slot.user_methods):
+                            self.generate_binop_function(scope, slot, code)
                     self.generate_property_accessors(scope, code)
                     self.generate_method_table(scope, code)
                     self.generate_getset_table(scope, code)
@@ -1551,5 +1557,7 @@
         code.putln("{")
         code.putln("PyObject *etype, *eval, *etb;")
         code.putln("PyErr_Fetch(&etype, &eval, &etb);")
-        code.putln("++Py_REFCNT(o);")
+        # increase the refcount while we are calling into user code
+        # to prevent recursive deallocation
+        code.putln("__Pyx_SET_REFCNT(o, Py_REFCNT(o) + 1);")
         code.putln("%s(o);" % entry.func_cname)
@@ -1555,5 +1563,5 @@
         code.putln("%s(o);" % entry.func_cname)
-        code.putln("--Py_REFCNT(o);")
+        code.putln("__Pyx_SET_REFCNT(o, Py_REFCNT(o) - 1);")
         code.putln("PyErr_Restore(etype, eval, etb);")
         code.putln("}")
 
@@ -1889,6 +1897,64 @@
         code.putln("}")  # switch
         code.putln("}")
 
+    def generate_binop_function(self, scope, slot, code):
+        func_name = scope.mangle_internal(slot.slot_name)
+        if scope.directives['c_api_binop_methods']:
+            code.putln('#define %s %s' % (func_name, slot.left_slot.slot_code(scope)))
+            return
+        else:
+            error(self.pos,
+                  "The 'c_api_binop_methods' directive is only supported for forward compatibility"
+                  " and must be True.")
+
+        code.putln()
+        preprocessor_guard = slot.preprocessor_guard_code()
+        if preprocessor_guard:
+            code.putln(preprocessor_guard)
+
+        if slot.left_slot.signature == TypeSlots.binaryfunc:
+            slot_type = 'binaryfunc'
+            extra_arg = extra_arg_decl = ''
+        elif slot.left_slot.signature == TypeSlots.ternaryfunc:
+            slot_type = 'ternaryfunc'
+            extra_arg = ', extra_arg'
+            extra_arg_decl = ', PyObject* extra_arg'
+        else:
+            error(entry.pos, "Unexpected type lost signature: %s" % slot)
+
+        def has_slot_method(method_name):
+            entry = scope.lookup(method_name)
+            return bool(entry and entry.is_special and entry.func_cname)
+        def call_slot_method(method_name, reverse):
+            entry = scope.lookup(method_name)
+            if entry and entry.is_special and entry.func_cname:
+                return "%s(%s%s)" % (
+                    entry.func_cname,
+                    "right, left" if reverse else "left, right",
+                    extra_arg)
+            else:
+                return '%s_maybe_call_slot(%s, left, right %s)' % (
+                    func_name,
+                    'Py_TYPE(right)->tp_base' if reverse else 'Py_TYPE(left)->tp_base',
+                    extra_arg)
+
+        code.putln(
+            TempitaUtilityCode.load_as_string(
+                "BinopSlot", "ExtensionTypes.c",
+                context={
+                    "func_name": func_name,
+                    "slot_name": slot.slot_name,
+                    "overloads_left": int(has_slot_method(slot.left_slot.method_name)),
+                    "call_left": call_slot_method(slot.left_slot.method_name, reverse=False),
+                    "call_right": call_slot_method(slot.right_slot.method_name, reverse=True),
+                    "type_cname": scope.parent_type.typeptr_cname,
+                    "slot_type": slot_type,
+                    "extra_arg": extra_arg,
+                    "extra_arg_decl": extra_arg_decl,
+                    })[1])
+        if preprocessor_guard:
+            code.putln("#endif")
+
     def generate_getattro_function(self, scope, code):
         # First try to get the attribute using __getattribute__, if defined, or
         # PyObject_GenericGetAttr.
@@ -2293,7 +2359,7 @@
         code.exit_cfunc_scope()  # done with labels
 
     def generate_module_init_func(self, imported_modules, env, code):
-        subfunction = self.mod_init_subfunction(self.scope, code)
+        subfunction = self.mod_init_subfunction(self.pos, self.scope, code)
 
         code.enter_cfunc_scope(self.scope)
         code.putln("")
@@ -2419,6 +2485,6 @@
 
         if Options.cache_builtins:
             code.putln("/*--- Builtin init code ---*/")
-            code.put_error_if_neg(None, "__Pyx_InitCachedBuiltins()")
+            code.put_error_if_neg(self.pos, "__Pyx_InitCachedBuiltins()")
 
         code.putln("/*--- Constants init code ---*/")
@@ -2423,6 +2489,6 @@
 
         code.putln("/*--- Constants init code ---*/")
-        code.put_error_if_neg(None, "__Pyx_InitCachedConstants()")
+        code.put_error_if_neg(self.pos, "__Pyx_InitCachedConstants()")
 
         code.putln("/*--- Global type/function init code ---*/")
 
@@ -2513,7 +2579,7 @@
 
         code.exit_cfunc_scope()
 
-    def mod_init_subfunction(self, scope, orig_code):
+    def mod_init_subfunction(self, pos, scope, orig_code):
         """
         Return a context manager that allows deviating the module init code generation
         into a separate function and instead inserts a call to it.
@@ -2569,9 +2635,8 @@
                 code.putln("")
 
                 if needs_error_handling:
-                    self.call_code.use_label(orig_code.error_label)
-                    self.call_code.putln("if (unlikely(%s() != 0)) goto %s;" % (
-                        self.cfunc_name, orig_code.error_label))
+                    self.call_code.putln(
+                        self.call_code.error_goto_if_neg("%s()" % self.cfunc_name, pos))
                 else:
                     self.call_code.putln("(void)%s();" % self.cfunc_name)
                 self.call_code = None
diff --git a/Cython/Compiler/Nodes.py b/Cython/Compiler/Nodes.py
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL0NvbXBpbGVyL05vZGVzLnB5..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL0NvbXBpbGVyL05vZGVzLnB5 100644
--- a/Cython/Compiler/Nodes.py
+++ b/Cython/Compiler/Nodes.py
@@ -1023,8 +1023,6 @@
                 if scope is None:
                     # Maybe it's a cimport.
                     scope = env.find_imported_module(self.module_path, self.pos)
-                    if scope:
-                        scope.fused_to_specific = env.fused_to_specific
             else:
                 scope = env
 
diff --git a/Cython/Compiler/Optimize.py b/Cython/Compiler/Optimize.py
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL0NvbXBpbGVyL09wdGltaXplLnB5..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL0NvbXBpbGVyL09wdGltaXplLnB5 100644
--- a/Cython/Compiler/Optimize.py
+++ b/Cython/Compiler/Optimize.py
@@ -4292,10 +4292,10 @@
         return self.visit_BinopNode(node)
 
     _parse_string_format_regex = (
-        u'(%(?:'            # %...
-        u'(?:[0-9]+|[ ])?'  # width (optional) or space prefix fill character (optional)
-        u'(?:[.][0-9]+)?'   # precision (optional)
-        u')?.)'             # format type (or something different for unsupported formats)
+        u'(%(?:'              # %...
+        u'(?:[-0-9]+|[ ])?'   # width (optional) or space prefix fill character (optional)
+        u'(?:[.][0-9]+)?'     # precision (optional)
+        u')?.)'               # format type (or something different for unsupported formats)
     )
 
     def _build_fstring(self, pos, ustring, format_args):
@@ -4327,8 +4327,9 @@
                 break
             if format_type in u'asrfdoxX':
                 format_spec = s[1:]
+                conversion_char = None
                 if format_type in u'doxX' and u'.' in format_spec:
                     # Precision is not allowed for integers in format(), but ok in %-formatting.
                     can_be_optimised = False
                 elif format_type in u'ars':
                     format_spec = format_spec[:-1]
@@ -4330,7 +4331,17 @@
                 if format_type in u'doxX' and u'.' in format_spec:
                     # Precision is not allowed for integers in format(), but ok in %-formatting.
                     can_be_optimised = False
                 elif format_type in u'ars':
                     format_spec = format_spec[:-1]
+                    conversion_char = format_type
+                    if format_spec.startswith('0'):
+                        format_spec = '>' + format_spec[1:]  # right-alignment '%05s' spells '{:>5}'
+                elif format_type == u'd':
+                    # '%d' formatting supports float, but '{obj:d}' does not => convert to int first.
+                    conversion_char = 'd'
+
+                if format_spec.startswith('-'):
+                    format_spec = '<' + format_spec[1:]  # left-alignment '%-5s' spells '{:<5}'
+
                 substrings.append(ExprNodes.FormattedValueNode(
                     arg.pos, value=arg,
@@ -4335,6 +4346,6 @@
                 substrings.append(ExprNodes.FormattedValueNode(
                     arg.pos, value=arg,
-                    conversion_char=format_type if format_type in u'ars' else None,
+                    conversion_char=conversion_char,
                     format_spec=ExprNodes.UnicodeNode(
                         pos, value=EncodedString(format_spec), constant_result=format_spec)
                         if format_spec else None,
diff --git a/Cython/Compiler/Options.py b/Cython/Compiler/Options.py
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL0NvbXBpbGVyL09wdGlvbnMucHk=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL0NvbXBpbGVyL09wdGlvbnMucHk= 100644
--- a/Cython/Compiler/Options.py
+++ b/Cython/Compiler/Options.py
@@ -178,6 +178,7 @@
     'auto_pickle': None,
     'cdivision': False,  # was True before 0.12
     'cdivision_warnings': False,
+    'c_api_binop_methods': True,  # Change for 3.0
     'overflowcheck': False,
     'overflowcheck.fold': True,
     'always_allow_keywords': False,
diff --git a/Cython/Compiler/ParseTreeTransforms.py b/Cython/Compiler/ParseTreeTransforms.py
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL0NvbXBpbGVyL1BhcnNlVHJlZVRyYW5zZm9ybXMucHk=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL0NvbXBpbGVyL1BhcnNlVHJlZVRyYW5zZm9ybXMucHk= 100644
--- a/Cython/Compiler/ParseTreeTransforms.py
+++ b/Cython/Compiler/ParseTreeTransforms.py
@@ -1161,6 +1161,7 @@
     def visit_CallNode(self, node):
         self.visit(node.function)
         if not self.parallel_directive:
+            self.visitchildren(node, exclude=('function',))
             return node
 
         # We are a parallel directive, replace this node with the
diff --git a/Cython/Compiler/Parsing.pxd b/Cython/Compiler/Parsing.pxd
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL0NvbXBpbGVyL1BhcnNpbmcucHhk..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL0NvbXBpbGVyL1BhcnNpbmcucHhk 100644
--- a/Cython/Compiler/Parsing.pxd
+++ b/Cython/Compiler/Parsing.pxd
@@ -69,7 +69,8 @@
 @cython.locals(systr=unicode, is_python3_source=bint, is_raw=bint)
 cdef p_string_literal(PyrexScanner s, kind_override=*)
 cdef _append_escape_sequence(kind, builder, unicode escape_sequence, PyrexScanner s)
-@cython.locals(i=Py_ssize_t, size=Py_ssize_t, c=Py_UCS4)
+cdef tuple _f_string_error_pos(pos, string, Py_ssize_t i)
+@cython.locals(i=Py_ssize_t, size=Py_ssize_t, c=Py_UCS4, next_start=Py_ssize_t)
 cdef list p_f_string(PyrexScanner s, unicode_value, pos, bint is_raw)
 @cython.locals(i=Py_ssize_t, size=Py_ssize_t, c=Py_UCS4, quote_char=Py_UCS4, NO_CHAR=Py_UCS4)
 cdef tuple p_f_string_expr(PyrexScanner s, unicode_value, pos, Py_ssize_t starting_index, bint is_raw)
diff --git a/Cython/Compiler/Parsing.py b/Cython/Compiler/Parsing.py
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL0NvbXBpbGVyL1BhcnNpbmcucHk=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL0NvbXBpbGVyL1BhcnNpbmcucHk= 100644
--- a/Cython/Compiler/Parsing.py
+++ b/Cython/Compiler/Parsing.py
@@ -882,6 +882,7 @@
     pos = s.position()
     is_python3_source = s.context.language_level >= 3
     has_non_ascii_literal_characters = False
+    string_start_pos = (pos[0], pos[1], pos[2] + len(s.systring))
     kind_string = s.systring.rstrip('"\'').lower()
     if len(kind_string) > 1:
         if len(set(kind_string)) != len(kind_string):
@@ -965,7 +966,7 @@
                 s.error("bytes can only contain ASCII literal characters.", pos=pos)
             bytes_value = None
     if kind == 'f':
-        unicode_value = p_f_string(s, unicode_value, pos, is_raw='r' in kind_string)
+        unicode_value = p_f_string(s, unicode_value, string_start_pos, is_raw='r' in kind_string)
     s.next()
     return (kind, bytes_value, unicode_value)
 
@@ -1037,6 +1038,10 @@
     for is_raw in (True, False)]
 
 
+def _f_string_error_pos(pos, string, i):
+    return (pos[0], pos[1], pos[2] + i + 1)  # FIXME: handle newlines in string
+
+
 def p_f_string(s, unicode_value, pos, is_raw):
     # Parses a PEP 498 f-string literal into a list of nodes. Nodes are either UnicodeNodes
     # or FormattedValueNodes.
@@ -1044,8 +1049,7 @@
     next_start = 0
     size = len(unicode_value)
     builder = StringEncoding.UnicodeLiteralBuilder()
-    error_pos = list(pos)  # [src, line, column]
     _parse_seq = _parse_escape_sequences_raw if is_raw else _parse_escape_sequences
 
     while next_start < size:
         end = next_start
@@ -1048,7 +1052,6 @@
     _parse_seq = _parse_escape_sequences_raw if is_raw else _parse_escape_sequences
 
     while next_start < size:
         end = next_start
-        error_pos[2] = pos[2] + end  # FIXME: handle newlines in string
         match = _parse_seq(unicode_value, next_start)
         if match is None:
@@ -1053,6 +1056,6 @@
         match = _parse_seq(unicode_value, next_start)
         if match is None:
-            error(tuple(error_pos), "Invalid escape sequence")
+            error(_f_string_error_pos(pos, unicode_value, next_start), "Invalid escape sequence")
 
         next_start = match.end()
         part = match.group()
@@ -1076,7 +1079,8 @@
             if part == '}}':
                 builder.append('}')
             else:
-                s.error("f-string: single '}' is not allowed", pos=tuple(error_pos))
+                error(_f_string_error_pos(pos, unicode_value, end),
+                      "f-string: single '}' is not allowed")
         else:
             builder.append(part)
 
@@ -1097,6 +1101,7 @@
     nested_depth = 0
     quote_char = NO_CHAR
     in_triple_quotes = False
+    backslash_reported = False
 
     while True:
         if i >= size:
@@ -1100,8 +1105,8 @@
 
     while True:
         if i >= size:
-            s.error("missing '}' in format string expression")
+            break  # error will be reported below
         c = unicode_value[i]
 
         if quote_char != NO_CHAR:
             if c == '\\':
@@ -1104,9 +1109,12 @@
         c = unicode_value[i]
 
         if quote_char != NO_CHAR:
             if c == '\\':
-                error_pos = (pos[0], pos[1] + i, pos[2])  # FIXME: handle newlines in string
-                error(error_pos, "backslashes not allowed in f-strings")
+                # avoid redundant error reports along '\' sequences
+                if not backslash_reported:
+                    error(_f_string_error_pos(pos, unicode_value, i),
+                          "backslashes not allowed in f-strings")
+                backslash_reported = True
             elif c == quote_char:
                 if in_triple_quotes:
                     if i + 2 < size and unicode_value[i + 1] == c and unicode_value[i + 2] == c:
@@ -1125,7 +1133,8 @@
         elif nested_depth != 0 and c in '}])':
             nested_depth -= 1
         elif c == '#':
-            s.error("format string cannot include #")
+            error(_f_string_error_pos(pos, unicode_value, i),
+                  "format string cannot include #")
         elif nested_depth == 0 and c in '!:}':
             # allow != as a special case
             if c == '!' and i + 1 < size and unicode_value[i + 1] == '=':
@@ -1141,8 +1150,9 @@
     expr_pos = (pos[0], pos[1], pos[2] + starting_index + 2)  # TODO: find exact code position (concat, multi-line, ...)
 
     if not expr_str.strip():
-        error(expr_pos, "empty expression not allowed in f-string")
+        error(_f_string_error_pos(pos, unicode_value, starting_index),
+              "empty expression not allowed in f-string")
 
     if terminal_char == '!':
         i += 1
         if i + 2 > size:
@@ -1145,8 +1155,8 @@
 
     if terminal_char == '!':
         i += 1
         if i + 2 > size:
-            error(expr_pos, "invalid conversion char at end of string")
+            pass  # error will be reported below
         else:
             conversion_char = unicode_value[i]
             i += 1
@@ -1159,7 +1169,7 @@
         start_format_spec = i + 1
         while True:
             if i >= size:
-                s.error("missing '}' in format specifier", pos=expr_pos)
+                break  # error will be reported below
             c = unicode_value[i]
             if not in_triple_quotes and not in_string:
                 if c == '{':
@@ -1181,7 +1191,9 @@
         format_spec_str = unicode_value[start_format_spec:i]
 
     if terminal_char != '}':
-        s.error("missing '}' in format string expression', found '%s'" % terminal_char)
+        error(_f_string_error_pos(pos, unicode_value, i),
+              "missing '}' in format string expression" + (
+                  ", found '%s'" % terminal_char if terminal_char else ""))
 
     # parse the expression as if it was surrounded by parentheses
     buf = StringIO('(%s)' % expr_str)
@@ -1190,7 +1202,7 @@
 
     # validate the conversion char
     if conversion_char is not None and not ExprNodes.FormattedValueNode.find_conversion_func(conversion_char):
-        error(pos, "invalid conversion character '%s'" % conversion_char)
+        error(expr_pos, "invalid conversion character '%s'" % conversion_char)
 
     # the format spec is itself treated like an f-string
     if format_spec_str:
@@ -2237,7 +2249,7 @@
             s.error('decorator not allowed here')
         s.level = ctx.level
         decorators = p_decorators(s)
-        if not ctx.allow_struct_enum_decorator and s.sy not in ('def', 'cdef', 'cpdef', 'class'):
+        if not ctx.allow_struct_enum_decorator and s.sy not in ('def', 'cdef', 'cpdef', 'class', 'async'):
             if s.sy == 'IDENT' and s.systring == 'async':
                 pass  # handled below
             else:
diff --git a/Cython/Compiler/PyrexTypes.py b/Cython/Compiler/PyrexTypes.py
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL0NvbXBpbGVyL1B5cmV4VHlwZXMucHk=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL0NvbXBpbGVyL1B5cmV4VHlwZXMucHk= 100644
--- a/Cython/Compiler/PyrexTypes.py
+++ b/Cython/Compiler/PyrexTypes.py
@@ -4034,6 +4034,9 @@
         env.use_utility_code(self._convert_from_py_code)
         return True
 
+    def cast_code(self, expr_code):
+        return expr_code
+
 
 def c_tuple_type(components):
     components = tuple(components)
diff --git a/Cython/Compiler/Scanning.py b/Cython/Compiler/Scanning.py
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL0NvbXBpbGVyL1NjYW5uaW5nLnB5..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL0NvbXBpbGVyL1NjYW5uaW5nLnB5 100644
--- a/Cython/Compiler/Scanning.py
+++ b/Cython/Compiler/Scanning.py
@@ -41,8 +41,8 @@
     "global", "nonlocal", "def", "class", "print", "del", "pass", "break",
     "continue", "return", "raise", "import", "exec", "try",
     "except", "finally", "while", "if", "elif", "else", "for",
-    "in", "assert", "and", "or", "not", "is", "in", "lambda",
-    "from", "yield", "with", "nonlocal",
+    "in", "assert", "and", "or", "not", "is", "lambda",
+    "from", "yield", "with",
 ]
 
 pyx_reserved_words = py_reserved_words + [
diff --git a/Cython/Compiler/TypeSlots.py b/Cython/Compiler/TypeSlots.py
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL0NvbXBpbGVyL1R5cGVTbG90cy5weQ==..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL0NvbXBpbGVyL1R5cGVTbG90cy5weQ== 100644
--- a/Cython/Compiler/TypeSlots.py
+++ b/Cython/Compiler/TypeSlots.py
@@ -180,10 +180,10 @@
     #  ifdef                         Full #ifdef string that slot is wrapped in. Using this causes py3, py2 and flags to be ignored.)
 
     def __init__(self, slot_name, dynamic=False, inherited=False,
-                 py3=True, py2=True, ifdef=None):
+                 py3=True, py2=True, ifdef=None, is_binop=False):
         self.slot_name = slot_name
         self.is_initialised_dynamically = dynamic
         self.is_inherited = inherited
         self.ifdef = ifdef
         self.py3 = py3
         self.py2 = py2
@@ -184,9 +184,10 @@
         self.slot_name = slot_name
         self.is_initialised_dynamically = dynamic
         self.is_inherited = inherited
         self.ifdef = ifdef
         self.py3 = py3
         self.py2 = py2
+        self.is_binop = is_binop
 
     def preprocessor_guard_code(self):
         ifdef = self.ifdef
@@ -405,6 +406,17 @@
             return self.default_value
 
 
+class BinopSlot(SyntheticSlot):
+    def __init__(self, signature, slot_name, left_method, **kargs):
+        assert left_method.startswith('__')
+        right_method = '__r' + left_method[2:]
+        SyntheticSlot.__init__(
+                self, slot_name, [left_method, right_method], "0", is_binop=True, **kargs)
+        # MethodSlot causes special method registration.
+        self.left_slot = MethodSlot(signature, "", left_method)
+        self.right_slot = MethodSlot(signature, "", right_method)
+
+
 class RichcmpSlot(MethodSlot):
     def slot_code(self, scope):
         entry = scope.lookup_here(self.method_name)
@@ -728,15 +740,15 @@
 PyNumberMethods_Py3_GUARD = "PY_MAJOR_VERSION < 3 || (CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x03050000)"
 
 PyNumberMethods = (
-    MethodSlot(binaryfunc, "nb_add", "__add__"),
-    MethodSlot(binaryfunc, "nb_subtract", "__sub__"),
-    MethodSlot(binaryfunc, "nb_multiply", "__mul__"),
-    MethodSlot(binaryfunc, "nb_divide", "__div__", ifdef = PyNumberMethods_Py3_GUARD),
-    MethodSlot(binaryfunc, "nb_remainder", "__mod__"),
-    MethodSlot(binaryfunc, "nb_divmod", "__divmod__"),
-    MethodSlot(ternaryfunc, "nb_power", "__pow__"),
+    BinopSlot(binaryfunc, "nb_add", "__add__"),
+    BinopSlot(binaryfunc, "nb_subtract", "__sub__"),
+    BinopSlot(binaryfunc, "nb_multiply", "__mul__"),
+    BinopSlot(binaryfunc, "nb_divide", "__div__", ifdef = PyNumberMethods_Py3_GUARD),
+    BinopSlot(binaryfunc, "nb_remainder", "__mod__"),
+    BinopSlot(binaryfunc, "nb_divmod", "__divmod__"),
+    BinopSlot(ternaryfunc, "nb_power", "__pow__"),
     MethodSlot(unaryfunc, "nb_negative", "__neg__"),
     MethodSlot(unaryfunc, "nb_positive", "__pos__"),
     MethodSlot(unaryfunc, "nb_absolute", "__abs__"),
     MethodSlot(inquiry, "nb_nonzero", "__nonzero__", py3 = ("nb_bool", "__bool__")),
     MethodSlot(unaryfunc, "nb_invert", "__invert__"),
@@ -738,13 +750,13 @@
     MethodSlot(unaryfunc, "nb_negative", "__neg__"),
     MethodSlot(unaryfunc, "nb_positive", "__pos__"),
     MethodSlot(unaryfunc, "nb_absolute", "__abs__"),
     MethodSlot(inquiry, "nb_nonzero", "__nonzero__", py3 = ("nb_bool", "__bool__")),
     MethodSlot(unaryfunc, "nb_invert", "__invert__"),
-    MethodSlot(binaryfunc, "nb_lshift", "__lshift__"),
-    MethodSlot(binaryfunc, "nb_rshift", "__rshift__"),
-    MethodSlot(binaryfunc, "nb_and", "__and__"),
-    MethodSlot(binaryfunc, "nb_xor", "__xor__"),
-    MethodSlot(binaryfunc, "nb_or", "__or__"),
+    BinopSlot(binaryfunc, "nb_lshift", "__lshift__"),
+    BinopSlot(binaryfunc, "nb_rshift", "__rshift__"),
+    BinopSlot(binaryfunc, "nb_and", "__and__"),
+    BinopSlot(binaryfunc, "nb_xor", "__xor__"),
+    BinopSlot(binaryfunc, "nb_or", "__or__"),
     EmptySlot("nb_coerce", ifdef = PyNumberMethods_Py3_GUARD),
     MethodSlot(unaryfunc, "nb_int", "__int__", fallback="__long__"),
     MethodSlot(unaryfunc, "nb_long", "__long__", fallback="__int__", py3 = "<RESERVED>"),
@@ -767,8 +779,8 @@
 
     # Added in release 2.2
     # The following require the Py_TPFLAGS_HAVE_CLASS flag
-    MethodSlot(binaryfunc, "nb_floor_divide", "__floordiv__"),
-    MethodSlot(binaryfunc, "nb_true_divide", "__truediv__"),
+    BinopSlot(binaryfunc, "nb_floor_divide", "__floordiv__"),
+    BinopSlot(binaryfunc, "nb_true_divide", "__truediv__"),
     MethodSlot(ibinaryfunc, "nb_inplace_floor_divide", "__ifloordiv__"),
     MethodSlot(ibinaryfunc, "nb_inplace_true_divide", "__itruediv__"),
 
@@ -776,7 +788,7 @@
     MethodSlot(unaryfunc, "nb_index", "__index__"),
 
     # Added in release 3.5
-    MethodSlot(binaryfunc, "nb_matrix_multiply", "__matmul__", ifdef="PY_VERSION_HEX >= 0x03050000"),
+    BinopSlot(binaryfunc, "nb_matrix_multiply", "__matmul__", ifdef="PY_VERSION_HEX >= 0x03050000"),
     MethodSlot(ibinaryfunc, "nb_inplace_matrix_multiply", "__imatmul__", ifdef="PY_VERSION_HEX >= 0x03050000"),
 )
 
diff --git a/Cython/Distutils/old_build_ext.py b/Cython/Distutils/old_build_ext.py
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL0Rpc3R1dGlscy9vbGRfYnVpbGRfZXh0LnB5..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL0Rpc3R1dGlscy9vbGRfYnVpbGRfZXh0LnB5 100644
--- a/Cython/Distutils/old_build_ext.py
+++ b/Cython/Distutils/old_build_ext.py
@@ -84,9 +84,9 @@
     description = "build C/C++ and Cython extensions (compile/link to build directory)"
 
     sep_by = _build_ext.build_ext.sep_by
-    user_options = _build_ext.build_ext.user_options
-    boolean_options = _build_ext.build_ext.boolean_options
-    help_options = _build_ext.build_ext.help_options
+    user_options = _build_ext.build_ext.user_options[:]
+    boolean_options = _build_ext.build_ext.boolean_options[:]
+    help_options = _build_ext.build_ext.help_options[:]
 
     # Add the pyrex specific data.
     user_options.extend([
diff --git a/Cython/Includes/cpython/datetime.pxd b/Cython/Includes/cpython/datetime.pxd
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL0luY2x1ZGVzL2NweXRob24vZGF0ZXRpbWUucHhk..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL0luY2x1ZGVzL2NweXRob24vZGF0ZXRpbWUucHhk 100644
--- a/Cython/Includes/cpython/datetime.pxd
+++ b/Cython/Includes/cpython/datetime.pxd
@@ -90,9 +90,9 @@
     int PyDateTime_TIME_GET_MICROSECOND(object o)
 
     # Getters for timedelta (C macros).
-    #int PyDateTime_DELTA_GET_DAYS(object o)
-    #int PyDateTime_DELTA_GET_SECONDS(object o)
-    #int PyDateTime_DELTA_GET_MICROSECONDS(object o)
+    int PyDateTime_DELTA_GET_DAYS(object o)
+    int PyDateTime_DELTA_GET_SECONDS(object o)
+    int PyDateTime_DELTA_GET_MICROSECONDS(object o)
 
     # PyDateTime CAPI object.
     PyDateTime_CAPI *PyDateTimeAPI
diff --git a/Cython/Includes/libcpp/utility.pxd b/Cython/Includes/libcpp/utility.pxd
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL0luY2x1ZGVzL2xpYmNwcC91dGlsaXR5LnB4ZA==..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL0luY2x1ZGVzL2xpYmNwcC91dGlsaXR5LnB4ZA== 100644
--- a/Cython/Includes/libcpp/utility.pxd
+++ b/Cython/Includes/libcpp/utility.pxd
@@ -13,3 +13,17 @@
         bint operator>(pair&, pair&)
         bint operator<=(pair&, pair&)
         bint operator>=(pair&, pair&)
+
+cdef extern from * namespace "cython_std" nogil:
+    """
+    #if __cplusplus > 199711L
+    #include <type_traits>
+
+    namespace cython_std {
+    template <typename T> typename std::remove_reference<T>::type&& move(T& t) noexcept { return std::move(t); }
+    template <typename T> typename std::remove_reference<T>::type&& move(T&& t) noexcept { return std::move(t); }
+    }
+
+    #endif
+    """
+    cdef T move[T](T)
diff --git a/Cython/Includes/numpy/__init__.pxd b/Cython/Includes/numpy/__init__.pxd
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL0luY2x1ZGVzL251bXB5L19faW5pdF9fLnB4ZA==..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL0luY2x1ZGVzL251bXB5L19faW5pdF9fLnB4ZA== 100644
--- a/Cython/Includes/numpy/__init__.pxd
+++ b/Cython/Includes/numpy/__init__.pxd
@@ -341,7 +341,6 @@
                 PyObject_Free(info.strides)
                 # info.shape was stored after info.strides in the same block
 
-
     ctypedef unsigned char      npy_bool
 
     ctypedef signed char      npy_byte
@@ -686,7 +685,7 @@
     object PyArray_Choose (ndarray, object, ndarray, NPY_CLIPMODE)
     int PyArray_Sort (ndarray, int, NPY_SORTKIND)
     object PyArray_ArgSort (ndarray, int, NPY_SORTKIND)
-    object PyArray_SearchSorted (ndarray, object, NPY_SEARCHSIDE)
+    object PyArray_SearchSorted (ndarray, object, NPY_SEARCHSIDE, PyObject*)
     object PyArray_ArgMax (ndarray, int, ndarray)
     object PyArray_ArgMin (ndarray, int, ndarray)
     object PyArray_Reshape (ndarray, object)
diff --git a/Cython/Shadow.py b/Cython/Shadow.py
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL1NoYWRvdy5weQ==..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL1NoYWRvdy5weQ== 100644
--- a/Cython/Shadow.py
+++ b/Cython/Shadow.py
@@ -1,7 +1,7 @@
 # cython.* namespace for pure mode.
 from __future__ import absolute_import
 
-__version__ = "0.29.16"
+__version__ = "0.29.20"
 
 try:
     from __builtin__ import basestring
@@ -123,6 +123,8 @@
 
 final = internal = type_version_tag = no_gc_clear = no_gc = _empty_decorator
 
+binding = lambda _: _empty_decorator
+
 
 _cython_inline = None
 def inline(f, *args, **kwds):
diff --git a/Cython/Utility/Buffer.c b/Cython/Utility/Buffer.c
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL1V0aWxpdHkvQnVmZmVyLmM=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL1V0aWxpdHkvQnVmZmVyLmM= 100644
--- a/Cython/Utility/Buffer.c
+++ b/Cython/Utility/Buffer.c
@@ -602,9 +602,8 @@
 __pyx_buffmt_parse_array(__Pyx_BufFmt_Context* ctx, const char** tsp)
 {
     const char *ts = *tsp;
-    int i = 0, number;
-    int ndim = ctx->head->field->type->ndim;
-;
+    int i = 0, number, ndim;
+
     ++ts;
     if (ctx->new_count != 1) {
         PyErr_SetString(PyExc_ValueError,
@@ -615,6 +614,9 @@
     /* Process the previous element */
     if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
 
+    // store ndim now, as field advanced by __Pyx_BufFmt_ProcessTypeChunk call
+    ndim = ctx->head->field->type->ndim;
+
     /* Parse all numbers in the format string */
     while (*ts && *ts != ')') {
         // ignore space characters (not using isspace() due to C/C++ problem on MacOS-X)
@@ -757,8 +759,8 @@
       case 'l': case 'L': case 'q': case 'Q':
       case 'f': case 'd': case 'g':
       case 'O': case 'p':
-        if (ctx->enc_type == *ts && got_Z == ctx->is_complex &&
-            ctx->enc_packmode == ctx->new_packmode) {
+        if ((ctx->enc_type == *ts) && (got_Z == ctx->is_complex) &&
+            (ctx->enc_packmode == ctx->new_packmode) && (!ctx->is_valid_array)) {
           /* Continue pooling same type */
           ctx->enc_count += ctx->new_count;
           ctx->new_count = 1;
diff --git a/Cython/Utility/Builtins.c b/Cython/Utility/Builtins.c
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL1V0aWxpdHkvQnVpbHRpbnMuYw==..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL1V0aWxpdHkvQnVpbHRpbnMuYw== 100644
--- a/Cython/Utility/Builtins.c
+++ b/Cython/Utility/Builtins.c
@@ -279,7 +279,8 @@
     {
         PyObject *copy = _PyLong_Copy((PyLongObject*)n);
         if (likely(copy)) {
-            Py_SIZE(copy) = -(Py_SIZE(copy));
+            // negate the size to swap the sign
+            __Pyx_SET_SIZE(copy, -Py_SIZE(copy));
         }
         return copy;
     }
diff --git a/Cython/Utility/CythonFunction.c b/Cython/Utility/CythonFunction.c
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL1V0aWxpdHkvQ3l0aG9uRnVuY3Rpb24uYw==..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL1V0aWxpdHkvQ3l0aG9uRnVuY3Rpb24uYw== 100644
--- a/Cython/Utility/CythonFunction.c
+++ b/Cython/Utility/CythonFunction.c
@@ -1,2 +1,1 @@
-
 
@@ -2,5 +1,6 @@
 
-//////////////////// CythonFunction.proto ////////////////////
+//////////////////// CythonFunctionShared.proto ////////////////////
+
 #define __Pyx_CyFunction_USED 1
 
 #define __Pyx_CYFUNCTION_STATICMETHOD  0x01
@@ -50,10 +50,7 @@
 
 #define __Pyx_CyFunction_Check(obj)  (__Pyx_TypeCheck(obj, __pyx_CyFunctionType))
 
-#define __Pyx_CyFunction_NewEx(ml, flags, qualname, self, module, globals, code) \
-    __Pyx_CyFunction_New(__pyx_CyFunctionType, ml, flags, qualname, self, module, globals, code)
-
-static PyObject *__Pyx_CyFunction_New(PyTypeObject *, PyMethodDef *ml,
+static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject* op, PyMethodDef *ml,
                                       int flags, PyObject* qualname,
                                       PyObject *self,
                                       PyObject *module, PyObject *globals,
@@ -72,7 +69,8 @@
 
 static int __pyx_CyFunction_init(void);
 
-//////////////////// CythonFunction ////////////////////
+
+//////////////////// CythonFunctionShared ////////////////////
 //@substitute: naming
 //@requires: CommonStructures.c::FetchCommonType
 ////@requires: ObjectHandling.c::PyObjectGetAttrStr
@@ -446,11 +444,9 @@
 #define __Pyx_CyFunction_weakreflist(cyfunc) ((cyfunc)->func.m_weakreflist)
 #endif
 
-
-static PyObject *__Pyx_CyFunction_New(PyTypeObject *type, PyMethodDef *ml, int flags, PyObject* qualname,
-                                      PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) {
-    __pyx_CyFunctionObject *op = PyObject_GC_New(__pyx_CyFunctionObject, type);
-    if (op == NULL)
+static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject *op, PyMethodDef *ml, int flags, PyObject* qualname,
+                                       PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) {
+    if (unlikely(op == NULL))
         return NULL;
     op->flags = flags;
     __Pyx_CyFunction_weakreflist(op) = NULL;
@@ -478,7 +474,6 @@
     op->defaults_kwdict = NULL;
     op->defaults_getter = NULL;
     op->func_annotations = NULL;
-    PyObject_GC_Track(op);
     return (PyObject *) op;
 }
 
@@ -779,7 +774,32 @@
     Py_INCREF(dict);
 }
 
+
+//////////////////// CythonFunction.proto ////////////////////
+
+static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml,
+                                      int flags, PyObject* qualname,
+                                      PyObject *closure,
+                                      PyObject *module, PyObject *globals,
+                                      PyObject* code);
+
+//////////////////// CythonFunction ////////////////////
+//@requires: CythonFunctionShared
+
+static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, int flags, PyObject* qualname,
+                                      PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) {
+    PyObject *op = __Pyx_CyFunction_Init(
+        PyObject_GC_New(__pyx_CyFunctionObject, __pyx_CyFunctionType),
+        ml, flags, qualname, closure, module, globals, code
+    );
+    if (likely(op)) {
+        PyObject_GC_Track(op);
+    }
+    return op;
+}
+
+
 //////////////////// CyFunctionClassCell.proto ////////////////////
 static int __Pyx_CyFunction_InitClassCell(PyObject *cyfunctions, PyObject *classobj);/*proto*/
 
 //////////////////// CyFunctionClassCell ////////////////////
@@ -782,8 +802,8 @@
 //////////////////// CyFunctionClassCell.proto ////////////////////
 static int __Pyx_CyFunction_InitClassCell(PyObject *cyfunctions, PyObject *classobj);/*proto*/
 
 //////////////////// CyFunctionClassCell ////////////////////
-//@requires: CythonFunction
+//@requires: CythonFunctionShared
 
 static int __Pyx_CyFunction_InitClassCell(PyObject *cyfunctions, PyObject *classobj) {
     Py_ssize_t i, count = PyList_GET_SIZE(cyfunctions);
@@ -806,4 +826,5 @@
     return 0;
 }
 
+
 //////////////////// FusedFunction.proto ////////////////////
@@ -809,4 +830,5 @@
 //////////////////// FusedFunction.proto ////////////////////
+
 typedef struct {
     __pyx_CyFunctionObject func;
     PyObject *__signatures__;
@@ -814,11 +836,8 @@
     PyObject *self;
 } __pyx_FusedFunctionObject;
 
-#define __pyx_FusedFunction_NewEx(ml, flags, qualname, self, module, globals, code)         \
-        __pyx_FusedFunction_New(__pyx_FusedFunctionType, ml, flags, qualname, self, module, globals, code)
-static PyObject *__pyx_FusedFunction_New(PyTypeObject *type,
-                                         PyMethodDef *ml, int flags,
-                                         PyObject *qualname, PyObject *self,
+static PyObject *__pyx_FusedFunction_New(PyMethodDef *ml, int flags,
+                                         PyObject *qualname, PyObject *closure,
                                          PyObject *module, PyObject *globals,
                                          PyObject *code);
 
@@ -829,6 +848,6 @@
 #define __Pyx_FusedFunction_USED
 
 //////////////////// FusedFunction ////////////////////
-//@requires: CythonFunction
+//@requires: CythonFunctionShared
 
 static PyObject *
@@ -833,7 +852,7 @@
 
 static PyObject *
-__pyx_FusedFunction_New(PyTypeObject *type, PyMethodDef *ml, int flags,
-                        PyObject *qualname, PyObject *self,
+__pyx_FusedFunction_New(PyMethodDef *ml, int flags,
+                        PyObject *qualname, PyObject *closure,
                         PyObject *module, PyObject *globals,
                         PyObject *code)
 {
@@ -837,16 +856,19 @@
                         PyObject *module, PyObject *globals,
                         PyObject *code)
 {
-    __pyx_FusedFunctionObject *fusedfunc =
-        (__pyx_FusedFunctionObject *) __Pyx_CyFunction_New(type, ml, flags, qualname,
-                                                           self, module, globals, code);
-    if (!fusedfunc)
-        return NULL;
-
-    fusedfunc->__signatures__ = NULL;
-    fusedfunc->type = NULL;
-    fusedfunc->self = NULL;
-    return (PyObject *) fusedfunc;
+    PyObject *op = __Pyx_CyFunction_Init(
+        // __pyx_CyFunctionObject is correct below since that's the cast that we want.
+        PyObject_GC_New(__pyx_CyFunctionObject, __pyx_FusedFunctionType),
+        ml, flags, qualname, closure, module, globals, code
+    );
+    if (likely(op)) {
+        __pyx_FusedFunctionObject *fusedfunc = (__pyx_FusedFunctionObject *) op;
+        fusedfunc->__signatures__ = NULL;
+        fusedfunc->type = NULL;
+        fusedfunc->self = NULL;
+        PyObject_GC_Track(op);
+    }
+    return op;
 }
 
 static void
@@ -896,7 +918,7 @@
     if (obj == Py_None)
         obj = NULL;
 
-    meth = (__pyx_FusedFunctionObject *) __pyx_FusedFunction_NewEx(
+    meth = (__pyx_FusedFunctionObject *) __pyx_FusedFunction_New(
                     ((PyCFunctionObject *) func)->m_ml,
                     ((__pyx_CyFunctionObject *) func)->flags,
                     ((__pyx_CyFunctionObject *) func)->func_qualname,
@@ -972,7 +994,6 @@
     if (PyTuple_Check(idx)) {
         PyObject *list = PyList_New(0);
         Py_ssize_t n = PyTuple_GET_SIZE(idx);
-        PyObject *string = NULL;
         PyObject *sep = NULL;
         int i;
 
@@ -976,7 +997,7 @@
         PyObject *sep = NULL;
         int i;
 
-        if (!list)
+        if (unlikely(!list))
             return NULL;
 
         for (i = 0; i < n; i++) {
@@ -980,6 +1001,8 @@
             return NULL;
 
         for (i = 0; i < n; i++) {
+            int ret;
+            PyObject *string;
 #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
             PyObject *item = PyTuple_GET_ITEM(idx, i);
 #else
@@ -983,9 +1006,9 @@
 #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
             PyObject *item = PyTuple_GET_ITEM(idx, i);
 #else
-            PyObject *item = PySequence_ITEM(idx, i);
+            PyObject *item = PySequence_ITEM(idx, i);  if (unlikely(!item)) goto __pyx_err;
 #endif
             string = _obj_to_str(item);
 #if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS)
             Py_DECREF(item);
 #endif
@@ -987,9 +1010,8 @@
 #endif
             string = _obj_to_str(item);
 #if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS)
             Py_DECREF(item);
 #endif
-            if (!string || PyList_Append(list, string) < 0)
-                goto __pyx_err;
-
+            if (unlikely(!string)) goto __pyx_err;
+            ret = PyList_Append(list, string);
             Py_DECREF(string);
@@ -995,4 +1017,5 @@
             Py_DECREF(string);
+            if (unlikely(ret < 0)) goto __pyx_err;
         }
 
         sep = PyUnicode_FromString("|");
@@ -996,7 +1019,7 @@
         }
 
         sep = PyUnicode_FromString("|");
-        if (sep)
+        if (likely(sep))
             signature = PyUnicode_Join(sep, list);
 __pyx_err:
 ;
@@ -1269,7 +1292,8 @@
     // special C-API function only in Pyston and PyPy >= 5.9
     if (PyMethodDescr_Check(method))
 #else
-    // It appears that PyMethodDescr_Type is not exposed anywhere in the CPython C-API
+    #if PY_MAJOR_VERSION == 2
+    // PyMethodDescr_Type is not exposed in the CPython C-API in Py2.
     static PyTypeObject *methoddescr_type = NULL;
     if (methoddescr_type == NULL) {
        PyObject *meth = PyObject_GetAttrString((PyObject*)&PyList_Type, "append");
@@ -1277,6 +1301,9 @@
        methoddescr_type = Py_TYPE(meth);
        Py_DECREF(meth);
     }
+    #else
+    PyTypeObject *methoddescr_type = &PyMethodDescr_Type;
+    #endif
     if (__Pyx_TypeCheck(method, methoddescr_type))
 #endif
     {
@@ -1294,10 +1321,6 @@
         // python classes
         return PyClassMethod_New(PyMethod_GET_FUNCTION(method));
     }
-    else if (PyCFunction_Check(method)) {
-        return PyClassMethod_New(method);
-    }
-#ifdef __Pyx_CyFunction_USED
-    else if (__Pyx_CyFunction_Check(method)) {
+    else {
         return PyClassMethod_New(method);
     }
@@ -1302,8 +1325,3 @@
         return PyClassMethod_New(method);
     }
-#endif
-    PyErr_SetString(PyExc_TypeError,
-                   "Class-level classmethod() can only be called on "
-                   "a method_descriptor or instance method.");
-    return NULL;
 }
diff --git a/Cython/Utility/ExtensionTypes.c b/Cython/Utility/ExtensionTypes.c
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL1V0aWxpdHkvRXh0ZW5zaW9uVHlwZXMuYw==..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL1V0aWxpdHkvRXh0ZW5zaW9uVHlwZXMuYw== 100644
--- a/Cython/Utility/ExtensionTypes.c
+++ b/Cython/Utility/ExtensionTypes.c
@@ -55,16 +55,42 @@
     }
 
 #if PY_VERSION_HEX >= 0x03050000
-    // As of https://bugs.python.org/issue22079
-    // PyType_Ready enforces that all bases of a non-heap type are
-    // non-heap. We know that this is the case for the solid base but
-    // other bases are heap allocated and are kept alive through the
-    // tp_bases reference.
-    // Other than this check, the Py_TPFLAGS_HEAPTYPE flag is unused
-    // in PyType_Ready().
-    t->tp_flags |= Py_TPFLAGS_HEAPTYPE;
+    {
+        // Make sure GC does not pick up our non-heap type as heap type with this hack!
+        // For details, see https://github.com/cython/cython/issues/3603
+        PyObject *ret, *py_status;
+        int gc_was_enabled;
+        PyObject *gc = PyImport_Import(PYUNICODE("gc"));
+        if (unlikely(!gc)) return -1;
+        py_status = PyObject_CallMethodObjArgs(gc, PYUNICODE("isenabled"), NULL);
+        if (unlikely(!py_status)) {
+            Py_DECREF(gc);
+            return -1;
+        }
+        gc_was_enabled = __Pyx_PyObject_IsTrue(py_status);
+        Py_DECREF(py_status);
+        if (gc_was_enabled > 0) {
+            ret = PyObject_CallMethodObjArgs(gc, PYUNICODE("disable"), NULL);
+            if (unlikely(!ret)) {
+                Py_DECREF(gc);
+                return -1;
+            }
+            Py_DECREF(ret);
+        } else if (unlikely(gc_was_enabled == -1)) {
+            Py_DECREF(gc);
+            return -1;
+        }
+
+        // As of https://bugs.python.org/issue22079
+        // PyType_Ready enforces that all bases of a non-heap type are
+        // non-heap. We know that this is the case for the solid base but
+        // other bases are heap allocated and are kept alive through the
+        // tp_bases reference.
+        // Other than this check, the Py_TPFLAGS_HEAPTYPE flag is unused
+        // in PyType_Ready().
+        t->tp_flags |= Py_TPFLAGS_HEAPTYPE;
 #endif
 
     r = PyType_Ready(t);
 
 #if PY_VERSION_HEX >= 0x03050000
@@ -66,9 +92,28 @@
 #endif
 
     r = PyType_Ready(t);
 
 #if PY_VERSION_HEX >= 0x03050000
-    t->tp_flags &= ~Py_TPFLAGS_HEAPTYPE;
+        t->tp_flags &= ~Py_TPFLAGS_HEAPTYPE;
+
+        if (gc_was_enabled) {
+            PyObject *t, *v, *tb;
+            PyErr_Fetch(&t, &v, &tb);
+            ret = PyObject_CallMethodObjArgs(gc, PYUNICODE("enable"), NULL);
+            if (likely(ret || r == -1)) {
+                Py_XDECREF(ret);
+                // do not overwrite exceptions raised by PyType_Ready() above
+                PyErr_Restore(t, v, tb);
+            } else {
+                // PyType_Ready() succeeded, but gc.enable() failed.
+                Py_XDECREF(t);
+                Py_XDECREF(v);
+                Py_XDECREF(tb);
+                r = -1;
+            }
+        }
+        Py_DECREF(gc);
+    }
 #endif
 
     return r;
@@ -132,6 +177,7 @@
 static int __Pyx_setup_reduce(PyObject* type_obj);
 
 /////////////// SetupReduce ///////////////
+//@requires: ObjectHandling.c::PyObjectGetAttrStrNoError
 //@requires: ObjectHandling.c::PyObjectGetAttrStr
 //@substitute: naming
 
@@ -188,10 +234,16 @@
         reduce = __Pyx_PyObject_GetAttrStr(type_obj, PYIDENT("__reduce__")); if (unlikely(!reduce)) goto __PYX_BAD;
 
         if (reduce == object_reduce || __Pyx_setup_reduce_is_named(reduce, PYIDENT("__reduce_cython__"))) {
-            reduce_cython = __Pyx_PyObject_GetAttrStr(type_obj, PYIDENT("__reduce_cython__")); if (unlikely(!reduce_cython)) goto __PYX_BAD;
-            ret = PyDict_SetItem(((PyTypeObject*)type_obj)->tp_dict, PYIDENT("__reduce__"), reduce_cython); if (unlikely(ret < 0)) goto __PYX_BAD;
-            ret = PyDict_DelItem(((PyTypeObject*)type_obj)->tp_dict, PYIDENT("__reduce_cython__")); if (unlikely(ret < 0)) goto __PYX_BAD;
+            reduce_cython = __Pyx_PyObject_GetAttrStrNoError(type_obj, PYIDENT("__reduce_cython__"));
+            if (likely(reduce_cython)) {
+                ret = PyDict_SetItem(((PyTypeObject*)type_obj)->tp_dict, PYIDENT("__reduce__"), reduce_cython); if (unlikely(ret < 0)) goto __PYX_BAD;
+                ret = PyDict_DelItem(((PyTypeObject*)type_obj)->tp_dict, PYIDENT("__reduce_cython__")); if (unlikely(ret < 0)) goto __PYX_BAD;
+            } else if (reduce == object_reduce || PyErr_Occurred()) {
+                // Ignore if we're done, i.e. if 'reduce' already has the right name and the original is gone.
+                // Otherwise: error.
+                goto __PYX_BAD;
+            }
 
             setstate = __Pyx_PyObject_GetAttrStr(type_obj, PYIDENT("__setstate__"));
             if (!setstate) PyErr_Clear();
             if (!setstate || __Pyx_setup_reduce_is_named(setstate, PYIDENT("__setstate_cython__"))) {
@@ -194,10 +246,16 @@
 
             setstate = __Pyx_PyObject_GetAttrStr(type_obj, PYIDENT("__setstate__"));
             if (!setstate) PyErr_Clear();
             if (!setstate || __Pyx_setup_reduce_is_named(setstate, PYIDENT("__setstate_cython__"))) {
-                setstate_cython = __Pyx_PyObject_GetAttrStr(type_obj, PYIDENT("__setstate_cython__")); if (unlikely(!setstate_cython)) goto __PYX_BAD;
-                ret = PyDict_SetItem(((PyTypeObject*)type_obj)->tp_dict, PYIDENT("__setstate__"), setstate_cython); if (unlikely(ret < 0)) goto __PYX_BAD;
-                ret = PyDict_DelItem(((PyTypeObject*)type_obj)->tp_dict, PYIDENT("__setstate_cython__")); if (unlikely(ret < 0)) goto __PYX_BAD;
+                setstate_cython = __Pyx_PyObject_GetAttrStrNoError(type_obj, PYIDENT("__setstate_cython__"));
+                if (likely(setstate_cython)) {
+                    ret = PyDict_SetItem(((PyTypeObject*)type_obj)->tp_dict, PYIDENT("__setstate__"), setstate_cython); if (unlikely(ret < 0)) goto __PYX_BAD;
+                    ret = PyDict_DelItem(((PyTypeObject*)type_obj)->tp_dict, PYIDENT("__setstate_cython__")); if (unlikely(ret < 0)) goto __PYX_BAD;
+                } else if (!setstate || PyErr_Occurred()) {
+                    // Ignore if we're done, i.e. if 'setstate' already has the right name and the original is gone.
+                    // Otherwise: error.
+                    goto __PYX_BAD;
+                }
             }
             PyType_Modified((PyTypeObject*)type_obj);
         }
@@ -220,3 +278,56 @@
     Py_XDECREF(setstate_cython);
     return ret;
 }
+
+/////////////// BinopSlot ///////////////
+
+static CYTHON_INLINE PyObject *{{func_name}}_maybe_call_slot(PyTypeObject* type, PyObject *left, PyObject *right {{extra_arg_decl}}) {
+    {{slot_type}} slot;
+#if CYTHON_USE_TYPE_SLOTS
+    slot = type->tp_as_number ? type->tp_as_number->{{slot_name}} : NULL;
+#else
+    slot = ({{slot_type}}) PyType_GetSlot(type, Py_{{slot_name}});
+#endif
+    return slot ? slot(left, right {{extra_arg}}) : __Pyx_NewRef(Py_NotImplemented);
+}
+
+static PyObject *{{func_name}}(PyObject *left, PyObject *right {{extra_arg_decl}}) {
+    PyObject *res;
+    int maybe_self_is_left, maybe_self_is_right = 0;
+    maybe_self_is_left = Py_TYPE(left) == Py_TYPE(right)
+#if CYTHON_USE_TYPE_SLOTS
+            || (Py_TYPE(left)->tp_as_number && Py_TYPE(left)->tp_as_number->{{slot_name}} == &{{func_name}})
+#endif
+            || __Pyx_TypeCheck(left, {{type_cname}});
+    // Optimize for the common case where the left operation is defined (and successful).
+    if (!{{overloads_left}}) {
+        maybe_self_is_right = Py_TYPE(left) == Py_TYPE(right)
+#if CYTHON_USE_TYPE_SLOTS
+                || (Py_TYPE(right)->tp_as_number && Py_TYPE(right)->tp_as_number->{{slot_name}} == &{{func_name}})
+#endif
+                || __Pyx_TypeCheck(right, {{type_cname}});
+    }
+    if (maybe_self_is_left) {
+        if (maybe_self_is_right && !{{overloads_left}}) {
+            res = {{call_right}};
+            if (res != Py_NotImplemented) return res;
+            Py_DECREF(res);
+            // Don't bother calling it again.
+            maybe_self_is_right = 0;
+        }
+        res = {{call_left}};
+        if (res != Py_NotImplemented) return res;
+        Py_DECREF(res);
+    }
+    if ({{overloads_left}}) {
+        maybe_self_is_right = Py_TYPE(left) == Py_TYPE(right)
+#if CYTHON_USE_TYPE_SLOTS
+                || (Py_TYPE(right)->tp_as_number && Py_TYPE(right)->tp_as_number->{{slot_name}} == &{{func_name}})
+#endif
+                || PyType_IsSubtype(Py_TYPE(right), {{type_cname}});
+    }
+    if (maybe_self_is_right) {
+        return {{call_right}};
+    }
+    return __Pyx_NewRef(Py_NotImplemented);
+}
diff --git a/Cython/Utility/FunctionArguments.c b/Cython/Utility/FunctionArguments.c
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL1V0aWxpdHkvRnVuY3Rpb25Bcmd1bWVudHMuYw==..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL1V0aWxpdHkvRnVuY3Rpb25Bcmd1bWVudHMuYw== 100644
--- a/Cython/Utility/FunctionArguments.c
+++ b/Cython/Utility/FunctionArguments.c
@@ -211,7 +211,7 @@
 
         name = first_kw_arg;
         #if PY_MAJOR_VERSION < 3
-        if (likely(PyString_CheckExact(key)) || likely(PyString_Check(key))) {
+        if (likely(PyString_Check(key))) {
             while (*name) {
                 if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key))
                         && _PyString_Eq(**name, key)) {
@@ -239,5 +239,5 @@
             while (*name) {
                 int cmp = (**name == key) ? 0 :
                 #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
-                    (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :
+                    (__Pyx_PyUnicode_GET_LENGTH(**name) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 :
                 #endif
@@ -243,5 +243,5 @@
                 #endif
-                    // need to convert argument name from bytes to unicode for comparison
+                    // In Py2, we may need to convert the argument name from str to unicode for comparison.
                     PyUnicode_Compare(**name, key);
                 if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
                 if (cmp == 0) {
@@ -257,7 +257,7 @@
                 while (argname != first_kw_arg) {
                     int cmp = (**argname == key) ? 0 :
                     #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
-                        (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :
+                        (__Pyx_PyUnicode_GET_LENGTH(**argname) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 :
                     #endif
                         // need to convert argument name from bytes to unicode for comparison
                         PyUnicode_Compare(**argname, key);
diff --git a/Cython/Utility/ImportExport.c b/Cython/Utility/ImportExport.c
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL1V0aWxpdHkvSW1wb3J0RXhwb3J0LmM=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL1V0aWxpdHkvSW1wb3J0RXhwb3J0LmM= 100644
--- a/Cython/Utility/ImportExport.c
+++ b/Cython/Utility/ImportExport.c
@@ -46,7 +46,8 @@
     {
         #if PY_MAJOR_VERSION >= 3
         if (level == -1) {
-            if (strchr(__Pyx_MODULE_NAME, '.')) {
+            // Avoid C compiler warning if strchr() evaluates to false at compile time.
+            if ((1) && (strchr(__Pyx_MODULE_NAME, '.'))) {
                 /* try package relative import first */
                 module = PyImport_ImportModuleLevelObject(
                     name, global_dict, empty_dict, list, 1);
diff --git a/Cython/Utility/MemoryView_C.c b/Cython/Utility/MemoryView_C.c
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL1V0aWxpdHkvTWVtb3J5Vmlld19DLmM=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL1V0aWxpdHkvTWVtb3J5Vmlld19DLmM= 100644
--- a/Cython/Utility/MemoryView_C.c
+++ b/Cython/Utility/MemoryView_C.c
@@ -181,9 +181,9 @@
     if (buf->strides) {
         if (spec & __Pyx_MEMVIEW_CONTIG) {
             if (spec & (__Pyx_MEMVIEW_PTR|__Pyx_MEMVIEW_FULL)) {
-                if (buf->strides[dim] != sizeof(void *)) {
+                if (unlikely(buf->strides[dim] != sizeof(void *))) {
                     PyErr_Format(PyExc_ValueError,
                                  "Buffer is not indirectly contiguous "
                                  "in dimension %d.", dim);
                     goto fail;
                 }
@@ -185,9 +185,9 @@
                     PyErr_Format(PyExc_ValueError,
                                  "Buffer is not indirectly contiguous "
                                  "in dimension %d.", dim);
                     goto fail;
                 }
-            } else if (buf->strides[dim] != buf->itemsize) {
+            } else if (unlikely(buf->strides[dim] != buf->itemsize)) {
                 PyErr_SetString(PyExc_ValueError,
                                 "Buffer and memoryview are not contiguous "
                                 "in the same dimension.");
@@ -199,7 +199,7 @@
             Py_ssize_t stride = buf->strides[dim];
             if (stride < 0)
                 stride = -stride;
-            if (stride < buf->itemsize) {
+            if (unlikely(stride < buf->itemsize)) {
                 PyErr_SetString(PyExc_ValueError,
                                 "Buffer and memoryview are not contiguous "
                                 "in the same dimension.");
@@ -207,8 +207,8 @@
             }
         }
     } else {
-        if (spec & __Pyx_MEMVIEW_CONTIG && dim != ndim - 1) {
+        if (unlikely(spec & __Pyx_MEMVIEW_CONTIG && dim != ndim - 1)) {
             PyErr_Format(PyExc_ValueError,
                          "C-contiguous buffer is not contiguous in "
                          "dimension %d", dim);
             goto fail;
@@ -211,9 +211,9 @@
             PyErr_Format(PyExc_ValueError,
                          "C-contiguous buffer is not contiguous in "
                          "dimension %d", dim);
             goto fail;
-        } else if (spec & (__Pyx_MEMVIEW_PTR)) {
+        } else if (unlikely(spec & (__Pyx_MEMVIEW_PTR))) {
             PyErr_Format(PyExc_ValueError,
                          "C-contiguous buffer is not indirect in "
                          "dimension %d", dim);
             goto fail;
@@ -216,8 +216,8 @@
             PyErr_Format(PyExc_ValueError,
                          "C-contiguous buffer is not indirect in "
                          "dimension %d", dim);
             goto fail;
-        } else if (buf->suboffsets) {
+        } else if (unlikely(buf->suboffsets)) {
             PyErr_SetString(PyExc_ValueError,
                             "Buffer exposes suboffsets but no strides");
             goto fail;
@@ -235,7 +235,7 @@
     // Todo: without PyBUF_INDIRECT we may not have suboffset information, i.e., the
     //       ptr may not be set to NULL but may be uninitialized?
     if (spec & __Pyx_MEMVIEW_DIRECT) {
-        if (buf->suboffsets && buf->suboffsets[dim] >= 0) {
+        if (unlikely(buf->suboffsets && buf->suboffsets[dim] >= 0)) {
             PyErr_Format(PyExc_ValueError,
                          "Buffer not compatible with direct access "
                          "in dimension %d.", dim);
@@ -244,7 +244,7 @@
     }
 
     if (spec & __Pyx_MEMVIEW_PTR) {
-        if (!buf->suboffsets || (buf->suboffsets[dim] < 0)) {
+        if (unlikely(!buf->suboffsets || (buf->suboffsets[dim] < 0))) {
             PyErr_Format(PyExc_ValueError,
                          "Buffer is not indirectly accessible "
                          "in dimension %d.", dim);
@@ -265,9 +265,7 @@
     if (c_or_f_flag & __Pyx_IS_F_CONTIG) {
         Py_ssize_t stride = 1;
         for (i = 0; i < ndim; i++) {
-            if (stride * buf->itemsize != buf->strides[i] &&
-                    buf->shape[i] > 1)
-            {
+            if (unlikely(stride * buf->itemsize != buf->strides[i]  &&  buf->shape[i] > 1)) {
                 PyErr_SetString(PyExc_ValueError,
                     "Buffer not fortran contiguous.");
                 goto fail;
@@ -277,8 +275,7 @@
     } else if (c_or_f_flag & __Pyx_IS_C_CONTIG) {
         Py_ssize_t stride = 1;
         for (i = ndim - 1; i >- 1; i--) {
-            if (stride * buf->itemsize != buf->strides[i] &&
-                    buf->shape[i] > 1) {
+            if (unlikely(stride * buf->itemsize != buf->strides[i]  &&  buf->shape[i] > 1)) {
                 PyErr_SetString(PyExc_ValueError,
                     "Buffer not C contiguous.");
                 goto fail;
@@ -325,7 +322,7 @@
     }
 
     buf = &memview->view;
-    if (buf->ndim != ndim) {
+    if (unlikely(buf->ndim != ndim)) {
         PyErr_Format(PyExc_ValueError,
                 "Buffer has wrong number of dimensions (expected %d, got %d)",
                 ndim, buf->ndim);
@@ -334,6 +331,6 @@
 
     if (new_memview) {
         __Pyx_BufFmt_Init(&ctx, stack, dtype);
-        if (!__Pyx_BufFmt_CheckString(&ctx, buf->format)) goto fail;
+        if (unlikely(!__Pyx_BufFmt_CheckString(&ctx, buf->format))) goto fail;
     }
 
@@ -338,6 +335,6 @@
     }
 
-    if ((unsigned) buf->itemsize != dtype->size) {
+    if (unlikely((unsigned) buf->itemsize != dtype->size)) {
         PyErr_Format(PyExc_ValueError,
                      "Item size of buffer (%" CYTHON_FORMAT_SSIZE_T "u byte%s) "
                      "does not match size of '%s' (%" CYTHON_FORMAT_SSIZE_T "u byte%s)",
@@ -352,5 +349,5 @@
     /* Check axes */
     for (i = 0; i < ndim; i++) {
         spec = axes_specs[i];
-        if (!__pyx_check_strides(buf, i, ndim, spec))
+        if (unlikely(!__pyx_check_strides(buf, i, ndim, spec)))
             goto fail;
@@ -356,6 +353,6 @@
             goto fail;
-        if (!__pyx_check_suboffsets(buf, i, ndim, spec))
+        if (unlikely(!__pyx_check_suboffsets(buf, i, ndim, spec)))
             goto fail;
     }
 
     /* Check contiguity */
@@ -358,8 +355,8 @@
             goto fail;
     }
 
     /* Check contiguity */
-    if (buf->strides && !__pyx_verify_contig(buf, ndim, c_or_f_flag))
+    if (unlikely(buf->strides && !__pyx_verify_contig(buf, ndim, c_or_f_flag)))
         goto fail;
 
     /* Initialize */
@@ -394,7 +391,7 @@
     Py_buffer *buf = &memview->view;
     __Pyx_RefNannySetupContext("init_memviewslice", 0);
 
-    if (memviewslice->memview || memviewslice->data) {
+    if (unlikely(memviewslice->memview || memviewslice->data)) {
         PyErr_SetString(PyExc_ValueError,
             "memviewslice is already initialized!");
         goto fail;
@@ -488,6 +485,6 @@
 {
     int first_time;
     struct {{memview_struct_name}} *memview = memslice->memview;
-    if (!memview || (PyObject *) memview == Py_None)
+    if (unlikely(!memview || (PyObject *) memview == Py_None))
         return; /* allow uninitialized memoryview assignment */
 
@@ -492,8 +489,8 @@
         return; /* allow uninitialized memoryview assignment */
 
-    if (__pyx_get_slice_count(memview) < 0)
+    if (unlikely(__pyx_get_slice_count(memview) < 0))
         __pyx_fatalerror("Acquisition count is %d (line %d)",
                          __pyx_get_slice_count(memview), lineno);
 
     first_time = __pyx_add_acquisition_count(memview) == 0;
 
@@ -495,9 +492,9 @@
         __pyx_fatalerror("Acquisition count is %d (line %d)",
                          __pyx_get_slice_count(memview), lineno);
 
     first_time = __pyx_add_acquisition_count(memview) == 0;
 
-    if (first_time) {
+    if (unlikely(first_time)) {
         if (have_gil) {
             Py_INCREF((PyObject *) memview);
         } else {
@@ -513,10 +510,9 @@
     int last_time;
     struct {{memview_struct_name}} *memview = memslice->memview;
 
-    if (!memview ) {
-        return;
-    } else if ((PyObject *) memview == Py_None) {
+    if (unlikely(!memview || (PyObject *) memview == Py_None)) {
+        // we do not ref-count None
         memslice->memview = NULL;
         return;
     }
 
@@ -519,10 +515,10 @@
         memslice->memview = NULL;
         return;
     }
 
-    if (__pyx_get_slice_count(memview) <= 0)
+    if (unlikely(__pyx_get_slice_count(memview) <= 0))
         __pyx_fatalerror("Acquisition count is %d (line %d)",
                          __pyx_get_slice_count(memview), lineno);
 
     last_time = __pyx_sub_acquisition_count(memview) == 1;
     memslice->data = NULL;
@@ -524,9 +520,10 @@
         __pyx_fatalerror("Acquisition count is %d (line %d)",
                          __pyx_get_slice_count(memview), lineno);
 
     last_time = __pyx_sub_acquisition_count(memview) == 1;
     memslice->data = NULL;
-    if (last_time) {
+
+    if (unlikely(last_time)) {
         if (have_gil) {
             Py_CLEAR(memslice->memview);
         } else {
@@ -570,7 +567,7 @@
     __Pyx_RefNannySetupContext("__pyx_memoryview_copy_new_contig", 0);
 
     for (i = 0; i < ndim; i++) {
-        if (from_mvs->suboffsets[i] >= 0) {
+        if (unlikely(from_mvs->suboffsets[i] >= 0)) {
             PyErr_Format(PyExc_ValueError, "Cannot copy memoryview slice with "
                                            "indirect dimensions (axis %d)", i);
             goto fail;
@@ -860,7 +857,7 @@
     {{endif}}
 
     {{if boundscheck}}
-        if (!__Pyx_is_valid_index(__pyx_tmp_idx, __pyx_tmp_shape)) {
+        if (unlikely(!__Pyx_is_valid_index(__pyx_tmp_idx, __pyx_tmp_shape))) {
             {{if not have_gil}}
                 #ifdef WITH_THREAD
                 PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();
@@ -878,9 +875,6 @@
 
             {{error_goto}}
         }
-    {{else}}
-        // make sure label is not un-used
-        if ((0)) {{error_goto}}
     {{endif}}
 
     {{if all_dimensions_direct}}
diff --git a/Cython/Utility/ModuleSetupCode.c b/Cython/Utility/ModuleSetupCode.c
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL1V0aWxpdHkvTW9kdWxlU2V0dXBDb2RlLmM=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL1V0aWxpdHkvTW9kdWxlU2V0dXBDb2RlLmM= 100644
--- a/Cython/Utility/ModuleSetupCode.c
+++ b/Cython/Utility/ModuleSetupCode.c
@@ -611,6 +611,15 @@
   #define PySet_CheckExact(obj)        (Py_TYPE(obj) == &PySet_Type)
 #endif
 
+
+#if PY_VERSION_HEX >= 0x030900A4
+  #define __Pyx_SET_REFCNT(obj, refcnt) Py_SET_REFCNT(obj, refcnt)
+  #define __Pyx_SET_SIZE(obj, size) Py_SET_SIZE(obj, size)
+#else
+  #define __Pyx_SET_REFCNT(obj, refcnt) Py_REFCNT(obj) = (refcnt)
+  #define __Pyx_SET_SIZE(obj, size) Py_SIZE(obj) = (size)
+#endif
+
 #if CYTHON_ASSUME_SAFE_MACROS
   #define __Pyx_PySequence_SIZE(seq)  Py_SIZE(seq)
 #else
@@ -697,5 +706,6 @@
 
 /////////////// PyModInitFuncType.proto ///////////////
 
-#if PY_MAJOR_VERSION < 3
+#ifndef CYTHON_NO_PYINIT_EXPORT
+#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC
 
@@ -701,4 +711,7 @@
 
-#ifdef CYTHON_NO_PYINIT_EXPORT
-// define this to void manually because PyMODINIT_FUNC adds __declspec(dllexport) to it's definition.
+#elif PY_MAJOR_VERSION < 3
+// Py2: define this to void manually because PyMODINIT_FUNC adds __declspec(dllexport) to it's definition.
+#ifdef __cplusplus
+#define __Pyx_PyMODINIT_FUNC extern "C" void
+#else
 #define __Pyx_PyMODINIT_FUNC void
@@ -704,6 +717,4 @@
 #define __Pyx_PyMODINIT_FUNC void
-#else
-#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC
 #endif
 
 #else
@@ -707,7 +718,8 @@
 #endif
 
 #else
-
-#ifdef CYTHON_NO_PYINIT_EXPORT
-// define this to PyObject * manually because PyMODINIT_FUNC adds __declspec(dllexport) to it's definition.
+// Py3+: define this to PyObject * manually because PyMODINIT_FUNC adds __declspec(dllexport) to it's definition.
+#ifdef __cplusplus
+#define __Pyx_PyMODINIT_FUNC extern "C" PyObject *
+#else
 #define __Pyx_PyMODINIT_FUNC PyObject *
@@ -713,4 +725,2 @@
 #define __Pyx_PyMODINIT_FUNC PyObject *
-#else
-#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC
 #endif
@@ -716,5 +726,4 @@
 #endif
-
 #endif
 
 
diff --git a/Cython/Utility/ObjectHandling.c b/Cython/Utility/ObjectHandling.c
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL1V0aWxpdHkvT2JqZWN0SGFuZGxpbmcuYw==..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL1V0aWxpdHkvT2JqZWN0SGFuZGxpbmcuYw== 100644
--- a/Cython/Utility/ObjectHandling.c
+++ b/Cython/Utility/ObjectHandling.c
@@ -1361,6 +1361,41 @@
 #endif
 
 
+/////////////// PyObjectGetAttrStrNoError.proto ///////////////
+
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name);/*proto*/
+
+/////////////// PyObjectGetAttrStrNoError ///////////////
+//@requires: PyObjectGetAttrStr
+//@requires: Exceptions.c::PyThreadStateGet
+//@requires: Exceptions.c::PyErrFetchRestore
+//@requires: Exceptions.c::PyErrExceptionMatches
+
+static void __Pyx_PyObject_GetAttrStr_ClearAttributeError(void) {
+    __Pyx_PyThreadState_declare
+    __Pyx_PyThreadState_assign
+    if (likely(__Pyx_PyErr_ExceptionMatches(PyExc_AttributeError)))
+        __Pyx_PyErr_Clear();
+}
+
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name) {
+    PyObject *result;
+#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_USE_TYPE_SLOTS && PY_VERSION_HEX >= 0x030700B1
+    // _PyObject_GenericGetAttrWithDict() in CPython 3.7+ can avoid raising the AttributeError.
+    // See https://bugs.python.org/issue32544
+    PyTypeObject* tp = Py_TYPE(obj);
+    if (likely(tp->tp_getattro == PyObject_GenericGetAttr)) {
+        return _PyObject_GenericGetAttrWithDict(obj, attr_name, NULL, 1);
+    }
+#endif
+    result = __Pyx_PyObject_GetAttrStr(obj, attr_name);
+    if (unlikely(!result)) {
+        __Pyx_PyObject_GetAttrStr_ClearAttributeError();
+    }
+    return result;
+}
+
+
 /////////////// PyObjectGetAttrStr.proto ///////////////
 
 #if CYTHON_USE_TYPE_SLOTS
diff --git a/Cython/Utility/Optimize.c b/Cython/Utility/Optimize.c
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL1V0aWxpdHkvT3B0aW1pemUuYw==..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL1V0aWxpdHkvT3B0aW1pemUuYw== 100644
--- a/Cython/Utility/Optimize.c
+++ b/Cython/Utility/Optimize.c
@@ -35,7 +35,7 @@
     if (likely(L->allocated > len) & likely(len > (L->allocated >> 1))) {
         Py_INCREF(x);
         PyList_SET_ITEM(list, len, x);
-        Py_SIZE(list) = len+1;
+        __Pyx_SET_SIZE(list, len + 1);
         return 0;
     }
     return PyList_Append(list, x);
@@ -53,7 +53,7 @@
     if (likely(L->allocated > len)) {
         Py_INCREF(x);
         PyList_SET_ITEM(list, len, x);
-        Py_SIZE(list) = len+1;
+        __Pyx_SET_SIZE(list, len + 1);
         return 0;
     }
     return PyList_Append(list, x);
@@ -104,7 +104,7 @@
 static CYTHON_INLINE PyObject* __Pyx_PyList_Pop(PyObject* L) {
     /* Check that both the size is positive and no reallocation shrinking needs to be done. */
     if (likely(PyList_GET_SIZE(L) > (((PyListObject*)L)->allocated >> 1))) {
-        Py_SIZE(L) -= 1;
+        __Pyx_SET_SIZE(L, Py_SIZE(L) - 1);
         return PyList_GET_ITEM(L, PyList_GET_SIZE(L));
     }
     return CALL_UNBOUND_METHOD(PyList_Type, "pop", L);
@@ -167,7 +167,7 @@
         }
         if (likely(__Pyx_is_valid_index(cix, size))) {
             PyObject* v = PyList_GET_ITEM(L, cix);
-            Py_SIZE(L) -= 1;
+            __Pyx_SET_SIZE(L, Py_SIZE(L) - 1);
             size -= 1;
             memmove(&PyList_GET_ITEM(L, cix), &PyList_GET_ITEM(L, cix+1), (size_t)(size-cix)*sizeof(PyObject*));
             return v;
diff --git a/Cython/Utility/StringTools.c b/Cython/Utility/StringTools.c
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_Q3l0aG9uL1V0aWxpdHkvU3RyaW5nVG9vbHMuYw==..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_Q3l0aG9uL1V0aWxpdHkvU3RyaW5nVG9vbHMuYw== 100644
--- a/Cython/Utility/StringTools.c
+++ b/Cython/Utility/StringTools.c
@@ -466,4 +466,6 @@
         if (stop < 0)
             stop += length;
     }
+    if (unlikely(stop <= start))
+        return PyUnicode_FromUnicode(NULL, 0);
     length = stop - start;
@@ -469,6 +471,4 @@
     length = stop - start;
-    if (unlikely(length <= 0))
-        return PyUnicode_FromUnicode(NULL, 0);
     cstring += start;
     if (decode_func) {
         return decode_func(cstring, length, errors);
@@ -502,4 +502,6 @@
     }
     if (stop > length)
         stop = length;
+    if (unlikely(stop <= start))
+        return PyUnicode_FromUnicode(NULL, 0);
     length = stop - start;
@@ -505,6 +507,4 @@
     length = stop - start;
-    if (unlikely(length <= 0))
-        return PyUnicode_FromUnicode(NULL, 0);
     cstring += start;
     if (decode_func) {
         return decode_func(cstring, length, errors);
@@ -558,8 +558,7 @@
         stop += length;
     else if (stop > length)
         stop = length;
-    length = stop - start;
-    if (length <= 0)
+    if (stop <= start)
         return PyUnicode_FromUnicode(NULL, 0);
 #if CYTHON_PEP393_ENABLED
     return PyUnicode_FromKindAndData(PyUnicode_KIND(text),
diff --git a/appveyor.yml b/appveyor.yml
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_YXBwdmV5b3IueW1s..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_YXBwdmV5b3IueW1s 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -17,6 +17,14 @@
       PYTHON_VERSION: "2.7"
       PYTHON_ARCH: "64"
 
+    - PYTHON: "C:\\Python38"
+      PYTHON_VERSION: "3.8"
+      PYTHON_ARCH: "32"
+
+    - PYTHON: "C:\\Python38-x64"
+      PYTHON_VERSION: "3.8"
+      PYTHON_ARCH: "64"
+
     - PYTHON: "C:\\Python37"
       PYTHON_VERSION: "3.7"
       PYTHON_ARCH: "32"
diff --git a/docs/src/userguide/external_C_code.rst b/docs/src/userguide/external_C_code.rst
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_ZG9jcy9zcmMvdXNlcmd1aWRlL2V4dGVybmFsX0NfY29kZS5yc3Q=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_ZG9jcy9zcmMvdXNlcmd1aWRlL2V4dGVybmFsX0NfY29kZS5yc3Q= 100644
--- a/docs/src/userguide/external_C_code.rst
+++ b/docs/src/userguide/external_C_code.rst
@@ -17,7 +17,7 @@
 Cython module can be used as a bridge to allow Python code to call C code, it
 can also be used to allow C code to call Python code.
 
-.. _embedding Python: http://www.freenet.org.nz/python/embeddingpyrex/
+.. _embedding Python: https://web.archive.org/web/20120225082358/http://www.freenet.org.nz:80/python/embeddingpyrex/
 
 External declarations
 =======================
diff --git a/runtests.py b/runtests.py
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_cnVudGVzdHMucHk=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_cnVudGVzdHMucHk= 100755
--- a/runtests.py
+++ b/runtests.py
@@ -21,6 +21,7 @@
 import zlib
 import glob
 from contextlib import contextmanager
+from collections import defaultdict
 
 try:
     import platform
@@ -47,26 +48,6 @@
     threading = None
 
 try:
-    from collections import defaultdict
-except ImportError:
-    class defaultdict(object):
-        def __init__(self, default_factory=lambda : None):
-            self._dict = {}
-            self.default_factory = default_factory
-        def __getitem__(self, key):
-            if key not in self._dict:
-                self._dict[key] = self.default_factory()
-            return self._dict[key]
-        def __setitem__(self, key, value):
-            self._dict[key] = value
-        def __contains__(self, key):
-            return key in self._dict
-        def __repr__(self):
-            return repr(self._dict)
-        def __nonzero__(self):
-            return bool(self._dict)
-
-try:
     from unittest import SkipTest
 except ImportError:
     class SkipTest(Exception):  # don't raise, only provided to allow except-ing it!
@@ -424,6 +405,7 @@
     # to be unsafe...
     (2,999): (operator.lt, lambda x: x in ['run.special_methods_T561_py3',
                                            'run.test_raisefrom',
+                                           'run.reimport_failure',  # reimports don't do anything in Py2
                                            ]),
     (3,): (operator.ge, lambda x: x in ['run.non_future_division',
                                         'compile.extsetslice',
@@ -635,6 +617,7 @@
         self.cleanup_failures = options.cleanup_failures
         self.with_pyregr = with_pyregr
         self.cython_only = options.cython_only
+        self.doctest_selector = re.compile(options.only_pattern).search if options.only_pattern else None
         self.languages = languages
         self.test_bugs = test_bugs
         self.fork = options.fork
@@ -748,6 +731,6 @@
         else:
             languages = self.languages
 
-        if skip_c(tags) and 'c' in languages:
+        if 'c' in languages and skip_c(tags):
             languages = list(languages)
             languages.remove('c')
@@ -752,5 +735,5 @@
             languages = list(languages)
             languages.remove('c')
-        elif 'no-cpp' in tags['tag'] and 'cpp' in self.languages:
+        if 'cpp' in languages and 'no-cpp' in tags['tag']:
             languages = list(languages)
             languages.remove('cpp')
@@ -755,5 +738,7 @@
             languages = list(languages)
             languages.remove('cpp')
+        if not languages:
+            return []
 
         pythran_dir = self.pythran_dir
         if 'pythran' in tags['tag'] and not pythran_dir and 'cpp' in languages:
@@ -790,6 +775,7 @@
                           cleanup_sharedlibs=self.cleanup_sharedlibs,
                           cleanup_failures=self.cleanup_failures,
                           cython_only=self.cython_only,
+                          doctest_selector=self.doctest_selector,
                           fork=self.fork,
                           language_level=self.language_level,
                           warning_errors=warning_errors,
@@ -830,7 +816,7 @@
 class CythonCompileTestCase(unittest.TestCase):
     def __init__(self, test_directory, workdir, module, tags, language='c', preparse='id',
                  expect_errors=False, expect_warnings=False, annotate=False, cleanup_workdir=True,
-                 cleanup_sharedlibs=True, cleanup_failures=True, cython_only=False,
+                 cleanup_sharedlibs=True, cleanup_failures=True, cython_only=False, doctest_selector=None,
                  fork=True, language_level=2, warning_errors=False,
                  test_determinism=False,
                  common_utility_dir=None, pythran_dir=None, stats=None):
@@ -848,6 +834,7 @@
         self.cleanup_sharedlibs = cleanup_sharedlibs
         self.cleanup_failures = cleanup_failures
         self.cython_only = cython_only
+        self.doctest_selector = doctest_selector
         self.fork = fork
         self.language_level = language_level
         self.warning_errors = warning_errors
@@ -1095,6 +1082,11 @@
                 from Cython.Build.Dependencies import update_pythran_extension
                 update_pythran_extension(extension)
 
+            # Compile with -DCYTHON_CLINE_IN_TRACEBACK=1 unless we have
+            # the "traceback" tag
+            if 'traceback' not in self.tags['tag']:
+                extension.define_macros.append(("CYTHON_CLINE_IN_TRACEBACK", 1))
+
             for matcher, fixer in list(EXT_EXTRAS.items()):
                 if isinstance(matcher, str):
                     # lazy init
@@ -1289,6 +1281,8 @@
             else:
                 module = module_or_name
             tests = doctest.DocTestSuite(module)
+            if self.doctest_selector is not None:
+                tests._tests[:] = [test for test in tests._tests if self.doctest_selector(test.id())]
             with self.stats.time(self.name, self.language, 'run'):
                 tests.run(result)
         run_forked_test(result, run_test, self.shortDescription(), self.fork)
@@ -2071,6 +2065,8 @@
     parser.add_option("-T", "--ticket", dest="tickets",
                       action="append",
                       help="a bug ticket number to run the respective test in 'tests/*'")
+    parser.add_option("-k", dest="only_pattern",
+                      help="a regex pattern for selecting doctests and test functions in the test modules")
     parser.add_option("-3", dest="language_level",
                       action="store_const", const=3, default=2,
                       help="set language level to Python 3 (useful for running the CPython regression tests)'")
@@ -2428,4 +2424,5 @@
                                 options.pythran_dir, add_embedded_test=True, stats=stats)
         test_suite.addTest(filetests.build_suite())
     if options.examples and languages:
+        examples_workdir = os.path.join(WORKDIR, 'examples')
         for subdirectory in glob.glob(os.path.join(options.examples_dir, "*/")):
@@ -2431,5 +2428,5 @@
         for subdirectory in glob.glob(os.path.join(options.examples_dir, "*/")):
-            filetests = TestBuilder(subdirectory, WORKDIR, selectors, exclude_selectors,
+            filetests = TestBuilder(subdirectory, examples_workdir, selectors, exclude_selectors,
                                     options, options.pyregr, languages, test_bugs,
                                     options.language_level, common_utility_dir,
                                     options.pythran_dir,
diff --git a/setup.py b/setup.py
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_c2V0dXAucHk=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_c2V0dXAucHk= 100755
--- a/setup.py
+++ b/setup.py
@@ -248,8 +248,9 @@
     Python code.
 
     Note that for one-time builds, e.g. for CI/testing, on platforms that are not
-    covered by one of the wheel packages provided on PyPI, it is substantially faster
-    than a full source build to install an uncompiled (slower) version of Cython with::
+    covered by one of the wheel packages provided on PyPI *and* the pure Python wheel
+    that we provide is not used, it is substantially faster than a full source build
+    to install an uncompiled (slower) version of Cython with::
 
         pip install Cython --install-option="--no-cython-compile"
 
diff --git a/tests/buffers/buffmt.pyx b/tests/buffers/buffmt.pyx
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_dGVzdHMvYnVmZmVycy9idWZmbXQucHl4..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvYnVmZmVycy9idWZmbXQucHl4 100644
--- a/tests/buffers/buffmt.pyx
+++ b/tests/buffers/buffmt.pyx
@@ -1,4 +1,5 @@
 from __future__ import unicode_literals
+import struct
 
 # Tests buffer format string parsing.
 
@@ -406,6 +407,62 @@
         fmt, sizeof(PackedStructWithCharArrays))
 
 
+ctypedef struct PackedStructWithArrays:
+    double a[16]
+    double b[16]
+    double c
+
+ctypedef struct UnpackedStructWithArrays:
+    int a
+    float b[8]
+    float c
+    unsigned long long d
+    int e[5]
+    int f
+    int g
+    double h[4]
+    int i
+
+ctypedef struct PackedStructWithNDArrays:
+    double a
+    double b[2][2]
+    float c
+    float d
+
+
+@testcase
+def packed_struct_with_arrays(fmt):
+    """
+    >>> packed_struct_with_arrays("T{(16)d:a:(16)d:b:d:c:}")
+    """
+
+    cdef object[PackedStructWithArrays] buf = MockBuffer(
+        fmt, sizeof(PackedStructWithArrays))
+
+
+@testcase
+def unpacked_struct_with_arrays(fmt):
+    """
+    >>> if struct.calcsize('P') == 8:  # 64 bit
+    ...     unpacked_struct_with_arrays("T{i:a:(8)f:b:f:c:Q:d:(5)i:e:i:f:i:g:xxxx(4)d:h:i:i:}")
+    ... elif struct.calcsize('P') == 4:  # 32 bit
+    ...     unpacked_struct_with_arrays("T{i:a:(8)f:b:f:c:Q:d:(5)i:e:i:f:i:g:(4)d:h:i:i:}")
+    """
+
+    cdef object[UnpackedStructWithArrays] buf = MockBuffer(
+        fmt, sizeof(UnpackedStructWithArrays))
+
+
+@testcase
+def packed_struct_with_ndarrays(fmt):
+    """
+    >>> packed_struct_with_ndarrays("T{d:a:(2,2)d:b:f:c:f:d:}")
+    """
+
+    cdef object[PackedStructWithNDArrays] buf = MockBuffer(
+        fmt, sizeof(PackedStructWithNDArrays))
+
+
 # TODO: empty struct
 # TODO: Incomplete structs
 # TODO: mixed structs
diff --git a/tests/buffers/mockbuffers.pxi b/tests/buffers/mockbuffers.pxi
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_dGVzdHMvYnVmZmVycy9tb2NrYnVmZmVycy5weGk=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvYnVmZmVycy9tb2NrYnVmZmVycy5weGk= 100644
--- a/tests/buffers/mockbuffers.pxi
+++ b/tests/buffers/mockbuffers.pxi
@@ -43,9 +43,7 @@
         if strides is None:
             strides = []
             cumprod = 1
-            rshape = list(shape)
-            rshape.reverse()
-            for s in rshape:
+            for s in shape[::-1]:
                 strides.append(cumprod)
                 cumprod *= s
             strides.reverse()
diff --git a/tests/compile/ctuple_unused_T3543.pyx b/tests/compile/ctuple_unused_T3543.pyx
new file mode 100644
index 0000000000000000000000000000000000000000..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvY29tcGlsZS9jdHVwbGVfdW51c2VkX1QzNTQzLnB5eA==
--- /dev/null
+++ b/tests/compile/ctuple_unused_T3543.pyx
@@ -0,0 +1,9 @@
+# ticket: 3543
+# mode: compile
+
+# Views define unused ctuples, including (long,)
+from cython cimport view
+
+# Implicitly generate a ctuple (long,)
+obj = None
+obj or (1,)
diff --git a/tests/errors/e_cpp_only_features.pyx b/tests/errors/e_cpp_only_features.pyx
new file mode 100644
index 0000000000000000000000000000000000000000..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvZXJyb3JzL2VfY3BwX29ubHlfZmVhdHVyZXMucHl4
--- /dev/null
+++ b/tests/errors/e_cpp_only_features.pyx
@@ -0,0 +1,26 @@
+# mode: error
+# tag: no-cpp, werror
+
+from cython.operator import typeid
+
+def use_typeid():
+    cdef int i = 0
+    print typeid(i) == typeid(i)
+
+cdef cppclass A:
+    pass
+
+def use_new():
+    cdef A* x = new A()
+
+def use_del():
+    cdef A a = A()
+    cdef A *p = &a
+    del p
+
+_ERRORS = """
+8:10: typeid operator only allowed in c++
+8:23: typeid operator only allowed in c++
+14:20: Operation only allowed in c++
+19:4: Operation only allowed in c++
+"""
diff --git a/tests/errors/e_fstring.pyx b/tests/errors/e_fstring.pyx
new file mode 100644
index 0000000000000000000000000000000000000000..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvZXJyb3JzL2VfZnN0cmluZy5weXg=
--- /dev/null
+++ b/tests/errors/e_fstring.pyx
@@ -0,0 +1,23 @@
+# mode: error
+# tag: fstring
+
+def incorrect_fstrings(x):
+    return [
+        f"{x}{'\\'}'{x+1}",
+        f"""{}""",
+        f"{}",
+        f"{x!}",
+        f"{",
+        f"{{}}}",
+    ]
+
+
+_ERRORS = """
+6:16: backslashes not allowed in f-strings
+7:14: empty expression not allowed in f-string
+8:12: empty expression not allowed in f-string
+9:14: missing '}' in format string expression, found '!'
+10:12: empty expression not allowed in f-string
+10:12: missing '}' in format string expression
+11:15: f-string: single '}' is not allowed
+"""
diff --git a/tests/errors/w_uninitialized.pyx b/tests/errors/w_uninitialized.pyx
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_dGVzdHMvZXJyb3JzL3dfdW5pbml0aWFsaXplZC5weXg=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvZXJyb3JzL3dfdW5pbml0aWFsaXplZC5weXg= 100644
--- a/tests/errors/w_uninitialized.pyx
+++ b/tests/errors/w_uninitialized.pyx
@@ -112,6 +112,10 @@
     args = []
     kwargs = {}
 
+def uninitialized_in_sizeof():
+    cdef int i
+    print sizeof(i)
+
 _ERRORS = """
 6:10: local variable 'a' referenced before assignment
 12:11: local variable 'a' might be referenced before assignment
diff --git a/tests/errors/w_uninitialized_cpp.pyx b/tests/errors/w_uninitialized_cpp.pyx
new file mode 100644
index 0000000000000000000000000000000000000000..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvZXJyb3JzL3dfdW5pbml0aWFsaXplZF9jcHAucHl4
--- /dev/null
+++ b/tests/errors/w_uninitialized_cpp.pyx
@@ -0,0 +1,12 @@
+# cython: warn.maybe_uninitialized=True
+# mode: error
+# tag: cpp, werror
+
+from cython.operator import typeid
+
+def uninitialized_in_typeid():
+    cdef int i
+    print typeid(i) == typeid(i)
+
+_ERRORS = """
+"""
diff --git a/tests/memoryview/numpy_memoryview.pyx b/tests/memoryview/numpy_memoryview.pyx
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_dGVzdHMvbWVtb3J5dmlldy9udW1weV9tZW1vcnl2aWV3LnB5eA==..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvbWVtb3J5dmlldy9udW1weV9tZW1vcnl2aWV3LnB5eA== 100644
--- a/tests/memoryview/numpy_memoryview.pyx
+++ b/tests/memoryview/numpy_memoryview.pyx
@@ -718,3 +718,42 @@
         x[i]
         x[i, ...]
         x[i, :]
+
+
+ctypedef struct SameTypeAfterArraysStructSimple:
+    double a[16]
+    double b[16]
+    double c
+
+@testcase
+def same_type_after_arrays_simple():
+    """
+    >>> same_type_after_arrays_simple()
+    """
+
+    cdef SameTypeAfterArraysStructSimple element
+    arr = np.ones(2, np.asarray(<SameTypeAfterArraysStructSimple[:1]>&element).dtype)
+    cdef SameTypeAfterArraysStructSimple[:] memview = arr
+
+
+ctypedef struct SameTypeAfterArraysStructComposite:
+    int a
+    float b[8]
+    float c
+    unsigned long d
+    int e[5]
+    int f
+    int g
+    double h[4]
+    int i
+
+@testcase
+def same_type_after_arrays_composite():
+    """
+    >>> same_type_after_arrays_composite() if sys.version_info[:2] >= (3, 5) else None
+    >>> same_type_after_arrays_composite() if sys.version_info[:2] == (2, 7) else None
+    """
+
+    cdef SameTypeAfterArraysStructComposite element
+    arr = np.ones(2, np.asarray(<SameTypeAfterArraysStructComposite[:1]>&element).dtype)
+    cdef SameTypeAfterArraysStructComposite[:] memview = arr
diff --git a/tests/pypy_bugs.txt b/tests/pypy_bugs.txt
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_dGVzdHMvcHlweV9idWdzLnR4dA==..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvcHlweV9idWdzLnR4dA== 100644
--- a/tests/pypy_bugs.txt
+++ b/tests/pypy_bugs.txt
@@ -11,6 +11,8 @@
 yield_from_pep380
 memoryview_inplace_division
 
+run.reimport_failure
+
 # gc issue?
 memoryview_in_subclasses
 external_ref_reassignment
diff --git a/tests/run/always_allow_keywords_T295.pyx b/tests/run/always_allow_keywords_T295.pyx
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_dGVzdHMvcnVuL2Fsd2F5c19hbGxvd19rZXl3b3Jkc19UMjk1LnB5eA==..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvcnVuL2Fsd2F5c19hbGxvd19rZXl3b3Jkc19UMjk1LnB5eA== 100644
--- a/tests/run/always_allow_keywords_T295.pyx
+++ b/tests/run/always_allow_keywords_T295.pyx
@@ -2,8 +2,19 @@
 
 cimport cython
 
+def assert_typeerror_no_keywords(func, *args, **kwds):
+    # Python 3.9 produces an slightly different error message
+    # to previous versions, so doctest isn't matching the
+    # traceback
+    try:
+        func(*args, **kwds)
+    except TypeError as e:
+        assert e.args[0].endswith(" takes no keyword arguments"), e.args[0]
+    else:
+        assert False, "call did not raise TypeError"
+
 
 def func1(arg):
     """
     >>> func1(None)
     >>> func1(*[None])
@@ -5,12 +16,9 @@
 
 def func1(arg):
     """
     >>> func1(None)
     >>> func1(*[None])
-    >>> func1(arg=None)
-    Traceback (most recent call last):
-    ...
-    TypeError: func1() takes no keyword arguments
+    >>> assert_typeerror_no_keywords(func1, arg=None)
     """
     pass
 
@@ -19,10 +27,7 @@
     """
     >>> func2(None)
     >>> func2(*[None])
-    >>> func2(arg=None)
-    Traceback (most recent call last):
-    ...
-    TypeError: func2() takes no keyword arguments
+    >>> assert_typeerror_no_keywords(func2, arg=None)
     """
     pass
 
@@ -39,9 +44,6 @@
     """
     >>> A().meth1(None)
     >>> A().meth1(*[None])
-    >>> A().meth1(arg=None)
-    Traceback (most recent call last):
-    ...
-    TypeError: meth1() takes no keyword arguments
+    >>> assert_typeerror_no_keywords(A().meth1, arg=None)
     >>> A().meth2(None)
     >>> A().meth2(*[None])
@@ -46,9 +48,6 @@
     >>> A().meth2(None)
     >>> A().meth2(*[None])
-    >>> A().meth2(arg=None)
-    Traceback (most recent call last):
-    ...
-    TypeError: meth2() takes no keyword arguments
+    >>> assert_typeerror_no_keywords(A().meth2, arg=None)
     >>> A().meth3(None)
     >>> A().meth3(*[None])
     >>> A().meth3(arg=None)
diff --git a/tests/run/async_def.pyx b/tests/run/async_def.pyx
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_dGVzdHMvcnVuL2FzeW5jX2RlZi5weXg=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvcnVuL2FzeW5jX2RlZi5weXg= 100644
--- a/tests/run/async_def.pyx
+++ b/tests/run/async_def.pyx
@@ -1,4 +1,4 @@
-# cython: language_level=3, binding=True
+# cython: language_level=3str, binding=True
 # mode: run
 # tag: pep492, await, gh3337
 
@@ -33,3 +33,30 @@
     ([], 0)
     """
     return min(x - y, 0)
+
+
+async def outer_with_nested(called):
+    """
+    >>> called = []
+    >>> _, inner = run_async(outer_with_nested(called))
+    >>> called  # after outer_with_nested()
+    ['outer', 'make inner', 'deco', 'return inner']
+    >>> _ = run_async(inner())
+    >>> called  # after inner()
+    ['outer', 'make inner', 'deco', 'return inner', 'inner']
+    """
+    called.append('outer')
+
+    def deco(f):
+        called.append('deco')
+        return f
+
+    called.append('make inner')
+
+    @deco
+    async def inner():
+        called.append('inner')
+        return 1
+
+    called.append('return inner')
+    return inner
diff --git a/tests/run/builtin_abs.pyx b/tests/run/builtin_abs.pyx
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_dGVzdHMvcnVuL2J1aWx0aW5fYWJzLnB5eA==..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvcnVuL2J1aWx0aW5fYWJzLnB5eA== 100644
--- a/tests/run/builtin_abs.pyx
+++ b/tests/run/builtin_abs.pyx
@@ -1,5 +1,6 @@
 # mode: run
 # ticket: 698
+# distutils: extra_compile_args=-fwrapv
 
 cdef extern from *:
     int INT_MAX
diff --git a/tests/run/bytesmethods.pyx b/tests/run/bytesmethods.pyx
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_dGVzdHMvcnVuL2J5dGVzbWV0aG9kcy5weXg=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvcnVuL2J5dGVzbWV0aG9kcy5weXg= 100644
--- a/tests/run/bytesmethods.pyx
+++ b/tests/run/bytesmethods.pyx
@@ -1,5 +1,13 @@
 cimport cython
 
+cdef extern from *:
+    cdef Py_ssize_t PY_SSIZE_T_MIN
+    cdef Py_ssize_t PY_SSIZE_T_MAX
+
+SSIZE_T_MAX = PY_SSIZE_T_MAX
+SSIZE_T_MIN = PY_SSIZE_T_MIN
+
+
 b_a = b'a'
 b_b = b'b'
 
@@ -114,6 +122,14 @@
     <BLANKLINE>
     >>> print(bytes_decode(s, -300, -500))
     <BLANKLINE>
+    >>> print(bytes_decode(s, SSIZE_T_MIN, SSIZE_T_MIN))
+    <BLANKLINE>
+    >>> print(bytes_decode(s, SSIZE_T_MIN, SSIZE_T_MAX))
+    abaab
+    >>> print(bytes_decode(s, SSIZE_T_MAX, SSIZE_T_MIN))
+    <BLANKLINE>
+    >>> print(bytes_decode(s, SSIZE_T_MAX, SSIZE_T_MAX))
+    <BLANKLINE>
 
     >>> s[:'test']                       # doctest: +ELLIPSIS
     Traceback (most recent call last):
diff --git a/tests/run/charptr_decode.pyx b/tests/run/charptr_decode.pyx
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_dGVzdHMvcnVuL2NoYXJwdHJfZGVjb2RlLnB5eA==..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvcnVuL2NoYXJwdHJfZGVjb2RlLnB5eA== 100644
--- a/tests/run/charptr_decode.pyx
+++ b/tests/run/charptr_decode.pyx
@@ -1,6 +1,11 @@
 
 cimport cython
 
+cdef extern from *:
+    cdef Py_ssize_t PY_SSIZE_T_MIN
+    cdef Py_ssize_t PY_SSIZE_T_MAX
+
+
 ############################################################
 # tests for char* slicing
 
@@ -118,6 +123,19 @@
             (cstring+1)[:].decode('UTF-8'),
             (cstring+1)[return1():return5()].decode('UTF-8'))
 
+@cython.test_assert_path_exists("//PythonCapiCallNode")
+@cython.test_fail_if_path_exists("//AttributeNode")
+def slice_charptr_decode_large_bounds():
+    """
+    >>> print(str(slice_charptr_decode_large_bounds()).replace("u'", "'"))
+    ('abcABCqtp', '', '', '')
+    """
+    return (cstring[PY_SSIZE_T_MIN:9].decode('UTF-8'),
+            cstring[PY_SSIZE_T_MAX:PY_SSIZE_T_MIN].decode('UTF-8'),
+            cstring[PY_SSIZE_T_MIN:PY_SSIZE_T_MIN].decode('UTF-8'),
+            cstring[PY_SSIZE_T_MAX:PY_SSIZE_T_MAX].decode('UTF-8'))
+
+
 cdef return1(): return 1
 cdef return3(): return 3
 cdef return4(): return 4
diff --git a/tests/run/classmethod.pyx b/tests/run/classmethod.pyx
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_dGVzdHMvcnVuL2NsYXNzbWV0aG9kLnB5eA==..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvcnVuL2NsYXNzbWV0aG9kLnB5eA== 100644
--- a/tests/run/classmethod.pyx
+++ b/tests/run/classmethod.pyx
@@ -9,6 +9,10 @@
 class1
 >>> class1().bview()
 class1
+>>> class1().cview()
+class1
+>>> class1().cview("XX")
+class1XX
 
 >>> class2.view()
 class2
@@ -35,6 +39,12 @@
 def f_plus(cls, a):
     return cls.a + a
 
+def second_decorator(f):
+    # note - a class, not a function (didn't pass Cython's test in __Pyx_Method_ClassMethod)
+    class C:
+        def __call__(self, *args):
+            return f(*args)
+    return C()
 
 class class1:
     a = 5
@@ -48,6 +58,11 @@
     def bview(cls):
         print cls.__name__
 
+    @classmethod
+    @second_decorator
+    def cview(cls, s=""):
+        print cls.__name__+s
+
 
 class class2(object):
     a = 6
diff --git a/tests/run/cpp_move.pyx b/tests/run/cpp_move.pyx
new file mode 100644
index 0000000000000000000000000000000000000000..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvcnVuL2NwcF9tb3ZlLnB5eA==
--- /dev/null
+++ b/tests/run/cpp_move.pyx
@@ -0,0 +1,34 @@
+# mode: run
+# tag: cpp, werror, cpp11
+
+from libcpp cimport nullptr
+from libcpp.memory cimport shared_ptr, make_shared
+from libcpp.utility cimport move
+from cython.operator cimport dereference
+
+cdef extern from *:
+    """
+    #include <string>
+
+    template<typename T> const char* move_helper(T&) { return "lvalue-ref"; }
+    template<typename T> const char* move_helper(T&&) { return "rvalue-ref"; }
+    """
+    const char* move_helper[T](T)
+
+def test_move_assignment():
+    """
+    >>> test_move_assignment()
+    """
+    cdef shared_ptr[int] p1, p2
+    p1 = make_shared[int](1337)
+    p2 = move(p1)
+    assert p1 == nullptr
+    assert dereference(p2) == 1337
+
+def test_move_func_call():
+    """
+    >>> test_move_func_call()
+    """
+    cdef shared_ptr[int] p
+    assert move_helper(p) == b'lvalue-ref'
+    assert move_helper(move(p)) == b'rvalue-ref'
diff --git a/tests/run/cython3.pyx b/tests/run/cython3.pyx
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_dGVzdHMvcnVuL2N5dGhvbjMucHl4..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvcnVuL2N5dGhvbjMucHl4 100644
--- a/tests/run/cython3.pyx
+++ b/tests/run/cython3.pyx
@@ -258,6 +258,32 @@
         yield 6
 
 
+def nested_except_gh3666(a=False, b=False):
+    """
+    >>> print(nested_except_gh3666())
+    A
+    >>> print(nested_except_gh3666(a=True))
+    B-V
+    >>> print(nested_except_gh3666(a=True, b=True))
+    B-V-T
+    """
+    try:
+        if a:
+            raise ValueError
+        return "A"
+    except TypeError as exc:
+        return "A-T"
+    except ValueError as exc:
+        try:
+            if b:
+                raise TypeError
+            return "B-V"
+        except ValueError as exc:
+            return "B-V-V"
+        except TypeError as exc:
+            return "B-V-T"
+
+
 ### Py3 feature tests
 
 def print_function(*args):
diff --git a/tests/run/fstring.pyx b/tests/run/fstring.pyx
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_dGVzdHMvcnVuL2ZzdHJpbmcucHl4..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvcnVuL2ZzdHJpbmcucHl4 100644
--- a/tests/run/fstring.pyx
+++ b/tests/run/fstring.pyx
@@ -474,5 +474,5 @@
     "//FormattedValueNode",
     "//JoinedStrNode",
 )
-def generated_fstring(int i, unicode u not None, o):
+def generated_fstring(int i, float f, unicode u not None, o):
     """
@@ -478,3 +478,3 @@
     """
-    >>> i, u, o = 11, u'xyz', [1]
+    >>> i, f, u, o = 11, 1.3125, u'xyz', [1]
     >>> print(((
@@ -480,5 +480,6 @@
     >>> print(((
-    ...     u"(i) %s-%.3s-%r-%.3r-%d-%3d-%o-%04o-%x-%4x-%X-%03X-%.1f-%04.2f %% "
-    ...     u"(u) %s-%.2s-%r-%.7r %% "
-    ...     u"(o) %s-%.2s-%r-%.2r"
+    ...     u"(i) %s-%.3s-%r-%.3r-%d-%3d-%-3d-%o-%04o-%x-%4x-%X-%03X-%.1f-%04.2f %% "
+    ...     u"(u) %s-%.2s-%r-%.7r-%05s-%-5s %% "
+    ...     u"(o) %s-%.2s-%r-%.2r %% "
+    ...     u"(f) %.2f-%d"
     ... ) % (
@@ -484,4 +485,4 @@
     ... ) % (
-    ...     i, i, i, i, i, i, i, i, i, i, i, i, i, i,
-    ...     u, u, u, u,
+    ...     i, i, i, i, i, i, i, i, i, i, i, i, i, i, i,
+    ...     u, u, u, u, u, u,
     ...     o, o, o, o,
@@ -487,2 +488,3 @@
     ...     o, o, o, o,
+    ...     f, f,
     ... )).replace("-u'xyz'", "-'xyz'"))
@@ -488,3 +490,3 @@
     ... )).replace("-u'xyz'", "-'xyz'"))
-    (i) 11-11-11-11-11- 11-13-0013-b-   b-B-00B-11.0-11.00 % (u) xyz-xy-'xyz'-'xyz' % (o) [1]-[1-[1]-[1
+    (i) 11-11-11-11-11- 11-11 -13-0013-b-   b-B-00B-11.0-11.00 % (u) xyz-xy-'xyz'-'xyz'-  xyz-xyz   % (o) [1]-[1-[1]-[1 % (f) 1.31-1
 
@@ -490,5 +492,5 @@
 
-    >>> print(generated_fstring(i, u, o).replace("-u'xyz'", "-'xyz'"))
-    (i) 11-11-11-11-11- 11-13-0013-b-   b-B-00B-11.0-11.00 % (u) xyz-xy-'xyz'-'xyz' % (o) [1]-[1-[1]-[1
+    >>> print(generated_fstring(i, f, u, o).replace("-u'xyz'", "-'xyz'"))
+    (i) 11-11-11-11-11- 11-11 -13-0013-b-   b-B-00B-11.0-11.00 % (u) xyz-xy-'xyz'-'xyz'-  xyz-xyz   % (o) [1]-[1-[1]-[1 % (f) 1.31-1
     """
     return (
@@ -493,6 +495,7 @@
     """
     return (
-        u"(i) %s-%.3s-%r-%.3r-%d-%3d-%o-%04o-%x-%4x-%X-%03X-%.1f-%04.2f %% "
-        u"(u) %s-%.2s-%r-%.7r %% "
-        u"(o) %s-%.2s-%r-%.2r"
+        u"(i) %s-%.3s-%r-%.3r-%d-%3d-%-3d-%o-%04o-%x-%4x-%X-%03X-%.1f-%04.2f %% "
+        u"(u) %s-%.2s-%r-%.7r-%05s-%-5s %% "
+        u"(o) %s-%.2s-%r-%.2r %% "
+        u"(f) %.2f-%d"
     ) % (
@@ -498,4 +501,4 @@
     ) % (
-        i, i, i, i, i, i, i, i, i, i, i, i, i, i,
-        u, u, u, u,
+        i, i, i, i, i, i, i, i, i, i, i, i, i, i, i,
+        u, u, u, u, u, u,
         o, o, o, o,
@@ -501,4 +504,5 @@
         o, o, o, o,
+        f, f,
     )
 
 
diff --git a/tests/run/fused_types.pyx b/tests/run/fused_types.pyx
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_dGVzdHMvcnVuL2Z1c2VkX3R5cGVzLnB5eA==..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvcnVuL2Z1c2VkX3R5cGVzLnB5eA== 100644
--- a/tests/run/fused_types.pyx
+++ b/tests/run/fused_types.pyx
@@ -400,3 +400,27 @@
         return x
     else:
         return 2 * x
+
+
+### see GH3642 - presence of cdef inside "unrelated" caused a type to be incorrectly inferred
+cdef unrelated(cython.floating x):
+    cdef cython.floating t = 1
+    return t
+
+cdef handle_float(float* x): return 'float'
+
+cdef handle_double(double* x): return 'double'
+
+def convert_to_ptr(cython.floating x):
+    """
+    >>> convert_to_ptr(1.0)
+    'double'
+    >>> convert_to_ptr['double'](1.0)
+    'double'
+    >>> convert_to_ptr['float'](1.0)
+    'float'
+    """
+    if cython.floating is float:
+        return handle_float(&x)
+    elif cython.floating is double:
+        return handle_double(&x)
diff --git a/tests/run/numpy_common.pxi b/tests/run/numpy_common.pxi
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_dGVzdHMvcnVuL251bXB5X2NvbW1vbi5weGk=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvcnVuL251bXB5X2NvbW1vbi5weGk= 100644
--- a/tests/run/numpy_common.pxi
+++ b/tests/run/numpy_common.pxi
@@ -1,4 +1,6 @@
 # hack to avoid C compiler warnings about unused functions in the NumPy header files
 
+from numpy cimport import_array  # , import_umath
+
 cdef extern from *:
    bint FALSE "0"
@@ -3,7 +5,5 @@
 cdef extern from *:
    bint FALSE "0"
-   void import_array()
-#   void import_umath()
 
 if FALSE:
     import_array()
diff --git a/tests/run/numpy_test.pyx b/tests/run/numpy_test.pyx
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_dGVzdHMvcnVuL251bXB5X3Rlc3QucHl4..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvcnVuL251bXB5X3Rlc3QucHl4 100644
--- a/tests/run/numpy_test.pyx
+++ b/tests/run/numpy_test.pyx
@@ -7,6 +7,9 @@
 import re
 import sys
 
+# initialise NumPy C-API
+np.import_array()
+
 
 def little_endian():
     cdef int endian_detector = 1
@@ -182,7 +185,7 @@
             ('a', np.dtype('i,i')),\
             ('b', np.dtype('i,i'))\
         ]))))                              # doctest: +NORMALIZE_WHITESPACE
-    array([((0, 0), (0, 0)), ((1, 2), (1, 4)), ((1, 2), (1, 4))], 
+    array([((0, 0), (0, 0)), ((1, 2), (1, 4)), ((1, 2), (1, 4))],
           dtype=[('a', [('f0', '!i4'), ('f1', '!i4')]), ('b', [('f0', '!i4'), ('f1', '!i4')])])
 
     >>> print(test_nested_dtypes(np.zeros((3,), dtype=np.dtype([\
@@ -235,7 +238,7 @@
     8,16
 
     >>> test_point_record()         # doctest: +NORMALIZE_WHITESPACE
-    array([(0., 0.), (1., -1.), (2., -2.)], 
+    array([(0., 0.), (1., -1.), (2., -2.)],
           dtype=[('x', '!f8'), ('y', '!f8')])
 
 """
@@ -947,4 +950,16 @@
     return a == 0, obj == 0, a == 1, obj == 1
 
 
-include "numpy_common.pxi"
+@testcase
+def test_c_api_searchsorted(np.ndarray arr, other):
+    """
+    >>> arr = np.random.randn(10)
+    >>> other = np.random.randn(5)
+    >>> result, expected = test_c_api_searchsorted(arr, other)
+    >>> (result == expected).all()
+    True
+    """
+    result = np.PyArray_SearchSorted(arr, other, np.NPY_SEARCHRIGHT, NULL)
+
+    expected = arr.searchsorted(other, side="right")
+    return result, expected
diff --git a/tests/run/parallel.pyx b/tests/run/parallel.pyx
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_dGVzdHMvcnVuL3BhcmFsbGVsLnB5eA==..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvcnVuL3BhcmFsbGVsLnB5eA== 100644
--- a/tests/run/parallel.pyx
+++ b/tests/run/parallel.pyx
@@ -8,6 +8,9 @@
 
 openmp.omp_set_nested(1)
 
+cdef int forward(int x) nogil:
+    return x
+
 def test_parallel():
     """
     >>> test_parallel()
@@ -20,6 +23,9 @@
 
     with nogil, cython.parallel.parallel():
         buf[threadid()] = threadid()
+        # Recognise threadid() also when it's used in a function argument.
+        # See https://github.com/cython/cython/issues/3594
+        buf[forward(cython.parallel.threadid())] = forward(threadid())
 
     for i in range(maxthreads):
         assert buf[i] == i
diff --git a/tests/run/reimport_failure.srctree b/tests/run/reimport_failure.srctree
new file mode 100644
index 0000000000000000000000000000000000000000..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvcnVuL3JlaW1wb3J0X2ZhaWx1cmUuc3JjdHJlZQ==
--- /dev/null
+++ b/tests/run/reimport_failure.srctree
@@ -0,0 +1,38 @@
+# mode: run
+# tag: pep489
+
+"""
+PYTHON setup.py build_ext -i
+PYTHON tester.py
+"""
+
+######## setup.py ########
+
+from Cython.Build.Dependencies import cythonize
+from distutils.core import setup
+
+setup(
+  ext_modules = cythonize("*.pyx"),
+)
+
+
+######## failure.pyx ########
+
+if globals():  # runtime True to confuse dead code removal
+    raise ImportError
+
+cdef class C:
+    cdef int a
+
+
+######## tester.py ########
+
+try:
+    try:
+        import failure  # 1
+    except ImportError:
+        import failure  # 2
+except ImportError:
+    pass
+else:
+    raise RuntimeError("ImportError was not raised on second import!")
diff --git a/tests/run/tracebacks.pyx b/tests/run/tracebacks.pyx
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_dGVzdHMvcnVuL3RyYWNlYmFja3MucHl4..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvcnVuL3RyYWNlYmFja3MucHl4 100644
--- a/tests/run/tracebacks.pyx
+++ b/tests/run/tracebacks.pyx
@@ -1,4 +1,4 @@
-import traceback
+# tag: traceback
 
 def foo1():
   foo2()
@@ -21,6 +21,7 @@
   try:
     foo1()
   except:
+    import traceback
     tb_string = traceback.format_exc()
     expected = (
       'tracebacks.pyx',
diff --git a/tests/run/unicode_slicing.pyx b/tests/run/unicode_slicing.pyx
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_dGVzdHMvcnVuL3VuaWNvZGVfc2xpY2luZy5weXg=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvcnVuL3VuaWNvZGVfc2xpY2luZy5weXg= 100644
--- a/tests/run/unicode_slicing.pyx
+++ b/tests/run/unicode_slicing.pyx
@@ -151,5 +151,20 @@
     >>> slice_none_none(None, 2, 4)
     Traceback (most recent call last):    
     TypeError: 'NoneType' object is not subscriptable
+
+    >>> slice_start_end(u'abcdef', SSIZE_T_MAX, SSIZE_T_MIN)
+    <BLANKLINE>
+    >>> slice_start(u'abcdef', SSIZE_T_MAX, SSIZE_T_MIN)
+    <BLANKLINE>
+    >>> slice_end(u'abcdef', SSIZE_T_MAX, SSIZE_T_MIN)
+    abcdef
+    >>> slice_all(u'abcdef', SSIZE_T_MAX, SSIZE_T_MIN)
+    abcdef
+    >>> slice_start_none(u'abcdef', SSIZE_T_MAX, SSIZE_T_MIN)
+    <BLANKLINE>
+    >>> slice_none_end(u'abcdef', SSIZE_T_MAX, SSIZE_T_MIN)
+    abcdef
+    >>> slice_none_none(u'abcdef', SSIZE_T_MAX, SSIZE_T_MIN)
+    abcdef
 """
 
@@ -154,7 +169,14 @@
 """
 
+cdef extern from *:
+    cdef Py_ssize_t PY_SSIZE_T_MIN
+    cdef Py_ssize_t PY_SSIZE_T_MAX
+
+SSIZE_T_MAX = PY_SSIZE_T_MAX
+SSIZE_T_MIN = PY_SSIZE_T_MIN
+
 import sys
 
 if sys.version_info[0] >= 3:
     __doc__ = __doc__.replace(u"(u'", u"('").replace(u" u'", u" '")
 
@@ -156,8 +178,8 @@
 import sys
 
 if sys.version_info[0] >= 3:
     __doc__ = __doc__.replace(u"(u'", u"('").replace(u" u'", u" '")
 
-def slice_start_end(unicode s, int i, int j):
+def slice_start_end(unicode s, Py_ssize_t i, Py_ssize_t j):
     print(s[i:j])
 
@@ -162,5 +184,5 @@
     print(s[i:j])
 
-def slice_start(unicode s, int i, int j):
+def slice_start(unicode s, Py_ssize_t i, Py_ssize_t j):
     print(s[i:])
 
@@ -165,5 +187,5 @@
     print(s[i:])
 
-def slice_end(unicode s, int i, int j):
+def slice_end(unicode s, Py_ssize_t i, Py_ssize_t j):
     print(s[:i])
 
@@ -168,5 +190,5 @@
     print(s[:i])
 
-def slice_all(unicode s, int i, int j):
+def slice_all(unicode s, Py_ssize_t i, Py_ssize_t j):
     print(s[:])
 
@@ -171,5 +193,5 @@
     print(s[:])
 
-def slice_start_none(unicode s, int i, int j):
+def slice_start_none(unicode s, Py_ssize_t i, Py_ssize_t j):
     print(s[i:None])
 
@@ -174,5 +196,5 @@
     print(s[i:None])
 
-def slice_none_end(unicode s, int i, int j):
+def slice_none_end(unicode s, Py_ssize_t i, Py_ssize_t j):
     print(s[None:i])
 
@@ -177,4 +199,4 @@
     print(s[None:i])
 
-def slice_none_none(unicode s, int i, int j):
+def slice_none_none(unicode s, Py_ssize_t i, Py_ssize_t j):
     print(s[None:None])
diff --git a/tests/run/verbatiminclude.pyx b/tests/run/verbatiminclude.pyx
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_dGVzdHMvcnVuL3ZlcmJhdGltaW5jbHVkZS5weXg=..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvcnVuL3ZlcmJhdGltaW5jbHVkZS5weXg= 100644
--- a/tests/run/verbatiminclude.pyx
+++ b/tests/run/verbatiminclude.pyx
@@ -25,5 +25,5 @@
 
 cdef extern from "Python.h":
     """
-    #define Py_SET_SIZE(obj, size)  Py_SIZE((obj)) = (size)
+    #define my_SET_SIZE(obj, size)  __Pyx_SET_SIZE(obj, size)
     """
@@ -29,5 +29,5 @@
     """
-    void Py_SET_SIZE(object, Py_ssize_t)
+    void my_SET_SIZE(object, Py_ssize_t)
 
 
 def test_square(x):
@@ -59,4 +59,4 @@
 def test_set_size(x, size):
     # This function manipulates Python objects in a bad way, so we
     # do not call it. The real test is that it compiles.
-    Py_SET_SIZE(x, size)
+    my_SET_SIZE(x, size)
diff --git a/tests/windows_bugs.txt b/tests/windows_bugs.txt
index 036fed8fdf0b7167741b03ac22ae1dd2fe789b41_dGVzdHMvd2luZG93c19idWdzLnR4dA==..69a0b6f830a19a8221774dfed9d21fe88d3f88e1_dGVzdHMvd2luZG93c19idWdzLnR4dA== 100644
--- a/tests/windows_bugs.txt
+++ b/tests/windows_bugs.txt
@@ -6,7 +6,6 @@
 package_compilation
 
 carray_coercion
-ctuple
 int_float_builtins_as_casts_T400
 int_float_builtins_as_casts_T400_long_double
 list_pop