From 384034624bbaa108bc6187b32548314ff59ac203 Mon Sep 17 00:00:00 2001
From: Harald van Dijk <harald.vandijk@codeplay.com>
Date: Fri, 17 Nov 2023 11:24:01 +0000
Subject: [PATCH] mix: reimplement based on std::lerp reference implementation.

The referenced paper explains the problems with various other
implementations of lerp/mix, including ours.
---
 .../abacus_detail_common.in                   | 31 +++++++++++++++-
 .../abacus/source/abacus_common/mix.cpp       |  6 +--
 .../cl/test/UnitCL/source/ktst_precision.cpp  | 37 +++----------------
 3 files changed, 38 insertions(+), 36 deletions(-)
diff --git a/modules/compiler/builtins/abacus/generate/abacus_detail_common/abacus_detail_common.in b/modules/compiler/builtins/abacus/generate/abacus_detail_common/abacus_detail_common.in
index e7ad27863..29647b932 100644
--- a/modules/compiler/builtins/abacus/generate/abacus_detail_common/abacus_detail_common.in
+++ b/modules/compiler/builtins/abacus/generate/abacus_detail_common/abacus_detail_common.in
@@ -115,8 +115,35 @@ template<typename T> T degrees(const T& t) {
   return t * T(57.295779513082320876798154814105170332405472466564321);
 }
 
-template<typename T, typename U> T mix(const T& x, const T& y, const U& a) {
-  return x + ((y - x) * T(a));
+template <typename T>
+T mix(const T &a, const T &b, const T &t) {
+  // Unoptimized vector version of the implementation of std::lerp given in
+  // https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p0811r3.html
+  // adjusted to support non-finite arguments.
+  //
+  // Unlike std::lerp, OpenCL mix returns an undefined result if
+  // !(t >= 0 && t <= 1), so we could simplify slightly, but we aim to behave
+  // sensibly for this.
+
+  using SignedType = typename TypeTraits<T>::SignedType;
+
+  // The direct translation of the expression mix/lerp is defined as. This
+  // version is used in most cases, but has issues around t=1 and does not
+  // account for the possibility b-a overflows.
+  const T x = a + t * (b - a);
+
+  // Correct for the issues around t=1 by returning b in edge cases.
+  const SignedType useb =
+      __abacus_select(b <= x, x <= b, (t > T(1)) == (b > a)) |
+      ((t == T(1)) & (x == x));
+
+  // Alternative definition that avoids overflow of b-a. This version is used
+  // when a and b are finite numbers with different signs.
+  const T y = t * b + (T(1) - t) * a;
+  const SignedType usey = isfinite(a) & isfinite(b) &
+                          (((a <= 0) & (b >= 0)) | ((a >= 0) & (b <= 0)));
+
+  return __abacus_select(__abacus_select(x, b, useb), y, usey);
 }
 
 template<typename T> T radians(const T& t) {
diff --git a/modules/compiler/builtins/abacus/source/abacus_common/mix.cpp b/modules/compiler/builtins/abacus/source/abacus_common/mix.cpp
index e5609378c..6e0bf50a8 100644
--- a/modules/compiler/builtins/abacus/source/abacus_common/mix.cpp
+++ b/modules/compiler/builtins/abacus/source/abacus_common/mix.cpp
@@ -21,9 +21,9 @@
   TYPE __abacus_mix(TYPE x, TYPE y, TYPE a) {    \
     return abacus::detail::common::mix(x, y, a); \
   }
-#define DEF2(TYPE, TYPE2)                        \
-  TYPE __abacus_mix(TYPE x, TYPE y, TYPE2 a) {   \
-    return abacus::detail::common::mix(x, y, a); \
+#define DEF2(TYPE, TYPE2)                              \
+  TYPE __abacus_mix(TYPE x, TYPE y, TYPE2 a) {         \
+    return abacus::detail::common::mix<TYPE>(x, y, a); \
   }
 
 #ifdef __CA_BUILTINS_HALF_SUPPORT
diff --git a/source/cl/test/UnitCL/source/ktst_precision.cpp b/source/cl/test/UnitCL/source/ktst_precision.cpp
index 5d518d2c6..925ea7a65 100644
--- a/source/cl/test/UnitCL/source/ktst_precision.cpp
+++ b/source/cl/test/UnitCL/source/ktst_precision.cpp
@@ -1633,24 +1633,12 @@ TEST_P(HalfMathBuiltins, Precision_53_Half_mix) {
     GTEST_SKIP();
   }
 
-  // TODO: CA-2882: Vector widths 2 and 4 don't work
-#ifdef __arm__
-  if ((getParam() == 2) || (getParam() == 4)) {
-    GTEST_SKIP();
-  }
-#endif
-
   auto mix_ref = [](cl_float x, cl_float y, cl_float a) -> cl_float {
+    // Our reference for mix is inaccurate due to intermediate rounding, even if
+    // we use float rather than half, but because we test with MAX_ULP_ERROR,
+    // this does not matter for the overall test.
     cl_float sub = y - x;
 
-    // Check for overflow and underflow of intermediate
-    const cl_half sub_as_half = ConvertFloatToHalf(sub);
-    if (IsInf(sub_as_half)) {
-      sub = std::copysign(INFINITY, sub);
-    } else if (0 == (sub_as_half & ~TypeInfo<cl_half>::sign_bit)) {
-      sub = std::copysign(0.0f, sub);
-    }
-
     return x + sub * a;
   };
 
@@ -1666,25 +1654,12 @@ TEST_P(HalfMathBuiltins, Precision_53_Half_mix_scalar) {
     GTEST_SKIP();
   }
 
-  // TODO: CA-2731: Vector widths 2 and 8 don't work
-  // TODO: CA-2882: Vector width 4 doesn't work
-#ifdef __arm__
-  if ((getParam() == 2) || (getParam() == 4) || (getParam() == 8)) {
-    GTEST_SKIP();
-  }
-#endif
-
   auto mix_ref = [](cl_float x, cl_float y, cl_float a) -> cl_float {
+    // Our reference for mix is inaccurate due to intermediate rounding, even if
+    // we use float rather than half, but because we test with MAX_ULP_ERROR,
+    // this does not matter for the overall test.
     cl_float sub = y - x;
 
-    // Check for overflow and underflow of intermediate
-    const cl_half sub_as_half = ConvertFloatToHalf(sub);
-    if (IsInf(sub_as_half)) {
-      sub = std::copysign(INFINITY, sub);
-    } else if (0 == (sub_as_half & ~TypeInfo<cl_half>::sign_bit)) {
-      sub = std::copysign(0.0f, sub);
-    }
-
     return x + sub * a;
   };