From 181c2b5ec1d293271d28ebce82f5db43d49754b0 Mon Sep 17 00:00:00 2001
From: Valentin Roland <github@vroland.de>
Date: Sun, 16 Jun 2024 15:01:38 +0200
Subject: [PATCH] test LCD 1k, bpp LUT functions

---
 examples/test/main/main.c          |   4 +-
 src/output_common/lut.c            | 127 ++++++++++++++++-------------
 src/output_common/lut.h            |  12 ++-
 src/output_common/render_context.h |   3 +-
 src/output_common/render_method.c  |   2 +-
 src/output_common/render_method.h  |   1 -
 src/output_lcd/render_lcd.c        |   2 +-
 src/render.c                       |   2 +-
 test/test_lut.c                    | 106 +++++++++++++++++-------
 9 files changed, 159 insertions(+), 100 deletions(-)

diff --git a/examples/test/main/main.c b/examples/test/main/main.c
index 4ff48869..9fa5e22d 100644
--- a/examples/test/main/main.c
+++ b/examples/test/main/main.c
@@ -9,7 +9,7 @@ static void print_banner(const char* text) {
 void app_main(void) {
     print_banner("Running all the registered tests");
     UNITY_BEGIN();
-    // unity_run_tests_by_tag("unit", false);
-    unity_run_all_tests();
+    unity_run_tests_by_tag("lut", false);
+    // unity_run_all_tests();
     UNITY_END();
 }
diff --git a/src/output_common/lut.c b/src/output_common/lut.c
index 9cb9d47d..d4b439ab 100644
--- a/src/output_common/lut.c
+++ b/src/output_common/lut.c
@@ -105,36 +105,6 @@ __attribute__((optimize("O3"))) void IRAM_ATTR calc_epd_input_1bpp(
     }
 }
 
-__attribute__((optimize("O3"))) void IRAM_ATTR calc_epd_input_4bpp_lut_64k(
-    const uint32_t* line_data, uint8_t* epd_input, const uint8_t* conversion_lut, uint32_t epd_width
-) {
-    uint32_t* wide_epd_input = (uint32_t*)epd_input;
-    const uint16_t* line_data_16 = (const uint16_t*)line_data;
-
-    // this is reversed for little-endian, but this is later compensated
-    // through the output peripheral.
-    for (uint32_t j = 0; j < epd_width / 16; j++) {
-        uint16_t v1 = *(line_data_16++);
-        uint16_t v2 = *(line_data_16++);
-        uint16_t v3 = *(line_data_16++);
-        uint16_t v4 = *(line_data_16++);
-
-#ifdef RENDER_METHOD_LCD
-        uint32_t pixel = conversion_lut[v1] | conversion_lut[v2] << 8 | conversion_lut[v3] << 16
-                         | conversion_lut[v4] << 24;
-#elif RENDER_METHOD_I2S
-        uint32_t pixel = conversion_lut[v4];
-        pixel = pixel << 8;
-        pixel |= conversion_lut[v3];
-        pixel = pixel << 8;
-        pixel |= conversion_lut[v2];
-        pixel = pixel << 8;
-        pixel |= conversion_lut[v1];
-#endif
-        wide_epd_input[j] = pixel;
-    }
-}
-
 /**
  * Look up 4 pixels of a differential image in a LUT constructed for use with vector extensions.
  */
@@ -226,11 +196,43 @@ __attribute__((optimize("O3"))) void IRAM_ATTR calc_epd_input_1ppB_64k(
 #endif
 }
 
+__attribute__((optimize("O3"))) void IRAM_ATTR calc_epd_input_4bpp_lut_64k(
+    const uint32_t* line_data, uint8_t* epd_input, const uint8_t* conversion_lut, uint32_t epd_width
+) {
+    const uint16_t* line_data_16 = (const uint16_t*)line_data;
+
+#ifdef RENDER_METHOD_LCD
+    for (uint32_t j = 0; j < epd_width / 4; j++) {
+        epd_input[j] = conversion_lut[*(line_data_16++)];
+    }
+#elif RENDER_METHOD_I2S
+    // TODO!
+    uint32_t* wide_epd_input = (uint32_t*)epd_input;
+
+    // this is reversed for little-endian, but this is later compensated
+    // through the output peripheral.
+    for (uint32_t j = 0; j < epd_width / 16; j++) {
+        uint16_t v1 = *(line_data_16++);
+        uint16_t v2 = *(line_data_16++);
+        uint16_t v3 = *(line_data_16++);
+        uint16_t v4 = *(line_data_16++);
+        uint32_t pixel = conversion_lut[v4];
+        pixel = pixel << 8;
+        pixel |= conversion_lut[v3];
+        pixel = pixel << 8;
+        pixel |= conversion_lut[v2];
+        pixel = pixel << 8;
+        pixel |= conversion_lut[v1];
+        wide_epd_input[j] = pixel;
+    }
+#endif
+}
+
 /**
  * Look up 4 pixels in a 1K LUT with fixed "from" value.
  */
-__attribute__((optimize("O3"))) uint8_t lookup_pixels_4bpp_1k(
-    uint16_t in, const uint8_t* conversion_lut, uint8_t from, uint32_t epd_width
+__attribute__((optimize("O3"))) static uint8_t lookup_pixels_4bpp_1k(
+    uint16_t in, const uint8_t* conversion_lut, uint8_t from
 ) {
     uint8_t v;
     uint8_t out;
@@ -258,22 +260,36 @@ __attribute__((optimize("O3"))) void IRAM_ATTR calc_epd_input_4bpp_1k_lut(
     uint8_t from,
     uint32_t epd_width
 ) {
-    uint16_t* ptr = (uint16_t*)ld;
-    // this is reversed for little-endian, but this is later compensated
-    // through the output peripheral.
-    for (uint32_t j = 0; j < epd_width / 4; j += 4) {
+    const uint16_t* line_data_16 = (const uint16_t*)ld;
+
 #ifdef RENDER_METHOD_LCD
-        epd_input[j + 0] = lookup_pixels_4bpp_1k(*(ptr++), conversion_lut, from, epd_width);
-        epd_input[j + 1] = lookup_pixels_4bpp_1k(*(ptr++), conversion_lut, from, epd_width);
-        epd_input[j + 2] = lookup_pixels_4bpp_1k(*(ptr++), conversion_lut, from, epd_width);
-        epd_input[j + 3] = lookup_pixels_4bpp_1k(*(ptr++), conversion_lut, from, epd_width);
+    for (uint32_t j = 0; j < epd_width / 4; j++) {
+        epd_input[j] = lookup_pixels_4bpp_1k(*(line_data_16++), conversion_lut, from);
+    };
 #elif RENDER_METHOD_I2S
-        epd_input[j + 2] = lookup_pixels_4bpp_1k(*(ptr++), conversion_lut, from, epd_width);
-        epd_input[j + 3] = lookup_pixels_4bpp_1k(*(ptr++), conversion_lut, from, epd_width);
-        epd_input[j + 0] = lookup_pixels_4bpp_1k(*(ptr++), conversion_lut, from, epd_width);
-        epd_input[j + 1] = lookup_pixels_4bpp_1k(*(ptr++), conversion_lut, from, epd_width);
-#endif
+    uint32_t* wide_epd_input = (uint32_t*)epd_input;
+    const uint16_t* line_data_16 = (const uint16_t*)ld
+        // this is reversed for little-endian, but this is later compensated
+        // through the output peripheral.
+        for (uint32_t j = 0; j < epd_width / 16; j++) {
+        uint16_t v1 = *(line_data_16++);
+        uint16_t v2 = *(line_data_16++);
+        uint16_t v3 = *(line_data_16++);
+        uint16_t v4 = *(line_data_16++);
+        uint8_t o1 = lookup_pixels_4bpp_1k(v1, conversion_lut, from);
+        uint8_t o2 = lookup_pixels_4bpp_1k(v2, conversion_lut, from);
+        uint8_t o3 = lookup_pixels_4bpp_1k(v3, conversion_lut, from);
+        uint8_t o4 = lookup_pixels_4bpp_1k(v4, conversion_lut, from);
+        uint32_t pixel = o4;
+        pixel = pixel << 8;
+        pixel |= o3;
+        pixel = pixel << 8;
+        pixel |= o2;
+        pixel = pixel << 8;
+        pixel |= o1;
+        wide_epd_input[j] = pixel;
     }
+#endif
 }
 
 __attribute__((optimize("O3"))) void IRAM_ATTR calc_epd_input_4bpp_1k_lut_white(
@@ -376,8 +392,9 @@ build_1ppB_lut_S3_VE_1k(uint8_t* lut, const EpdWaveformPhases* phases, int frame
  * known, e.g. all white or all black.
  * This LUT is use to look up 4 pixels at once, as with the epdiy LUT.
  */
-__attribute__((optimize("O3"))) static void
-build_2ppB_lut_64k_static_from(uint8_t* lut, const EpdWaveformPhases* phases, uint8_t from, int frame) {
+__attribute__((optimize("O3"))) static void build_2ppB_lut_64k_static_from(
+    uint8_t* lut, const EpdWaveformPhases* phases, uint8_t from, int frame
+) {
     const uint8_t* p_lut = phases->luts + (16 * 4 * frame);
 
     /// index into the packed "from" row
@@ -428,16 +445,14 @@ static void build_8ppB_lut_256b_from_15(uint8_t* lut, const EpdWaveformPhases* p
     memcpy(lut, lut_1bpp_black, sizeof(lut_1bpp_black));
 }
 
-
-
 LutFunctionPair find_lut_functions(enum EpdDrawMode mode, uint32_t lut_size) {
     LutFunctionPair pair;
     pair.build_func = NULL;
     pair.lookup_func = NULL;
-    
 
-    if (mode & MODE_PACKING_1PPB_DIFFERENCE) { 
-        if (EPD_CURRENT_RENDER_METHOD == RENDER_METHOD_LCD && !(mode & MODE_FORCE_NO_PIE) && lut_size >= 1024) {
+    if (mode & MODE_PACKING_1PPB_DIFFERENCE) {
+        if (EPD_CURRENT_RENDER_METHOD == RENDER_METHOD_LCD && !(mode & MODE_FORCE_NO_PIE)
+            && lut_size >= 1024) {
             pair.build_func = &build_1ppB_lut_S3_VE_1k;
             pair.lookup_func = &calc_epd_input_1ppB_1k_S3_VE;
             return pair;
@@ -445,14 +460,14 @@ LutFunctionPair find_lut_functions(enum EpdDrawMode mode, uint32_t lut_size) {
             pair.build_func = &build_1ppB_lut_64k;
             pair.lookup_func = &calc_epd_input_1bpp;
             return pair;
-        } 
+        }
     } else if (mode & MODE_PACKING_2PPB) {
         if (lut_size >= 1 << 16) {
             if (mode & PREVIOUSLY_WHITE) {
                 pair.build_func = &build_2ppB_lut_64k_from_15;
                 pair.lookup_func = &calc_epd_input_4bpp_lut_64k;
                 return pair;
-            } else if (mode & PREVIOUSLY_BLACK) { 
+            } else if (mode & PREVIOUSLY_BLACK) {
                 pair.build_func = &build_2ppB_lut_64k_from_0;
                 pair.lookup_func = &calc_epd_input_4bpp_lut_64k;
                 return pair;
@@ -472,7 +487,7 @@ LutFunctionPair find_lut_functions(enum EpdDrawMode mode, uint32_t lut_size) {
         if (lut_size < sizeof(lut_1bpp_black)) {
             return pair;
         }
-        
+
         if (mode & PREVIOUSLY_WHITE) {
             pair.build_func = &build_8ppB_lut_256b_from_15;
             pair.lookup_func = &calc_epd_input_1bpp;
@@ -484,5 +499,3 @@ LutFunctionPair find_lut_functions(enum EpdDrawMode mode, uint32_t lut_size) {
 
     return pair;
 }
-
-
diff --git a/src/output_common/lut.h b/src/output_common/lut.h
index 55e56f4d..08a9dab8 100644
--- a/src/output_common/lut.h
+++ b/src/output_common/lut.h
@@ -8,36 +8,34 @@
 // Make a block of 4 pixels darker on the EPD.
 #define DARK_BYTE 0B01010101
 
-
 /**
- * Type signature of a framebuffer to display output lookup function. 
+ * Type signature of a framebuffer to display output lookup function.
  */
-typedef void (*lut_func_t)(const uint32_t* line_buffer, uint8_t* epd_input, const uint8_t* lut, uint32_t epd_width);
+typedef void (*lut_func_t)(
+    const uint32_t* line_buffer, uint8_t* epd_input, const uint8_t* lut, uint32_t epd_width
+);
 
 /**
  * Type signature of a LUT preparation function.
  */
 typedef void (*lut_build_func_t)(uint8_t* lut, const EpdWaveformPhases* phases, int frame);
 
-
 typedef struct {
     lut_build_func_t build_func;
     lut_func_t lookup_func;
 } LutFunctionPair;
 
 /**
- * Select the appropriate LUT building and lookup function 
+ * Select the appropriate LUT building and lookup function
  * for the selected draw mode and allocated LUT size.
  */
 LutFunctionPair find_lut_functions(enum EpdDrawMode mode, uint32_t lut_size);
 
-
 /*
  * Reorder the output buffer to account for I2S FIFO order.
  */
 void reorder_line_buffer(uint32_t* line_data, int buf_len);
 
-
 // legacy functions
 void bit_shift_buffer_right(uint8_t* buf, uint32_t len, int shift);
 void nibble_shift_buffer_right(uint8_t* buf, uint32_t len);
diff --git a/src/output_common/render_context.h b/src/output_common/render_context.h
index 262c93c3..5a6f9612 100644
--- a/src/output_common/render_context.h
+++ b/src/output_common/render_context.h
@@ -57,7 +57,7 @@ typedef struct {
     size_t conversion_lut_size;
     // Lookup table space.
     uint8_t* conversion_lut;
-    
+
     /// LUT lookup function. Must not be NULL.
     lut_func_t lut_lookup_func;
     /// LUT building function. Must not be NULL
@@ -72,7 +72,6 @@ typedef struct {
     int skipping;
 } RenderContext_t;
 
-
 /**
  * Based on the render context, assign the bytes per line,
  * framebuffer start pointer, min and max vertical positions and the pixels per byte.
diff --git a/src/output_common/render_method.c b/src/output_common/render_method.c
index 916ac419..5404b7ee 100644
--- a/src/output_common/render_method.c
+++ b/src/output_common/render_method.c
@@ -1,5 +1,5 @@
-#include "sdkconfig.h"
 #include "render_method.h"
+#include "sdkconfig.h"
 
 #ifdef CONFIG_IDF_TARGET_ESP32
 const enum EpdRenderMethod EPD_CURRENT_RENDER_METHOD = RENDER_METHOD_I2S;
diff --git a/src/output_common/render_method.h b/src/output_common/render_method.h
index 9db72208..217b4100 100644
--- a/src/output_common/render_method.h
+++ b/src/output_common/render_method.h
@@ -22,7 +22,6 @@ extern const enum EpdRenderMethod EPD_CURRENT_RENDER_METHOD;
 #error "unknown chip, cannot choose render method!"
 #endif
 
-
 #ifdef __clang__
 #define IRAM_ATTR
 // define this if we're using clangd to make it accept the GCC builtin
diff --git a/src/output_lcd/render_lcd.c b/src/output_lcd/render_lcd.c
index 348b9030..fe93a87f 100644
--- a/src/output_lcd/render_lcd.c
+++ b/src/output_lcd/render_lcd.c
@@ -121,7 +121,7 @@ lcd_calculate_frame(RenderContext_t* ctx, int thread_id) {
 
     LineQueue_t* lq = &ctx->line_queues[thread_id];
     int l = 0;
-    
+
     // if there is an error, start the frame but don't feed data.
     if (ctx->error) {
         memset(ctx->line_threads, 0, ctx->lines_total);
diff --git a/src/render.c b/src/render.c
index 53c0453b..51f1fa70 100644
--- a/src/render.c
+++ b/src/render.c
@@ -169,7 +169,7 @@ enum EpdDrawError IRAM_ATTR epd_draw_base(
         );
     }
 #endif
-    
+
     LutFunctionPair lut_functions = find_lut_functions(mode, render_context.conversion_lut_size);
     if (lut_functions.build_func == NULL || lut_functions.lookup_func == NULL) {
         ESP_LOGE("epdiy", "no output lookup method found for your mode and LUT size!");
diff --git a/test/test_lut.c b/test/test_lut.c
index 0d9f3306..c201a4ff 100644
--- a/test/test_lut.c
+++ b/test/test_lut.c
@@ -16,14 +16,20 @@
 static const uint8_t input_data_pattern[16] = { 0xFF, 0xFF, 0xF0, 0xFF, 0xFF, 0x00, 0x01, 0x10,
                                                 0xA5, 0xA5, 0x5A, 0x5A, 0xFF, 0xFF, 0x00, 0x08 };
 static const uint8_t result_data_pattern_lcd_1ppB[4] = { 0x20, 0x90, 0x5A, 0x40 };
-static const uint8_t result_data_pattern_lcd_2ppB_white[8] = { 0x00, 0x01, 0x50, 0x55, 0x55, 0x55, 0x00, 0x55 };
+static const uint8_t result_data_pattern_lcd_2ppB_white[8]
+    = { 0x00, 0x01, 0x50, 0x55, 0x55, 0x55, 0x00, 0x55 };
+static const uint8_t result_data_pattern_lcd_2ppB_black[8]
+    = { 0xAA, 0xA8, 0x0A, 0x82, 0xAA, 0xAA, 0xAA, 0x20 };
 
 typedef void (*lut_func_t)(const uint32_t*, uint8_t*, const uint8_t*, uint32_t);
 static uint8_t waveform_phases[16][4];
 
-void calc_epd_input_1ppB_1k_S3_VE(const uint32_t* ld, uint8_t* epd_input, const uint8_t* conversion_lut, uint32_t epd_width);
-void calc_epd_input_1ppB_64k(const uint32_t* ld, uint8_t* epd_input, const uint8_t* conversion_lut, uint32_t epd_width);
-
+void calc_epd_input_1ppB_1k_S3_VE(
+    const uint32_t* ld, uint8_t* epd_input, const uint8_t* conversion_lut, uint32_t epd_width
+);
+void calc_epd_input_1ppB_64k(
+    const uint32_t* ld, uint8_t* epd_input, const uint8_t* conversion_lut, uint32_t epd_width
+);
 
 static EpdWaveformPhases test_waveform = {
     .phase_times = NULL,
@@ -41,7 +47,6 @@ typedef struct {
     int example_len_px;
 } LutTestBuffers;
 
-
 static void fill_test_waveform() {
     for (int to = 0; to < 16; to++) {
         memset(waveform_phases[to], 0, 4);
@@ -66,23 +71,29 @@ static void lut_test_buffers_fill(LutTestBuffers* bufs, const uint8_t* result_pa
     // initialize test and check patterns
     for (int i = 0; i < bufs->example_len_px; i++) {
         bufs->line_data[i] = input_data_pattern[i % sizeof(input_data_pattern)];
-        bufs->expected_line[i / bufs->in_out_ratio] = result_pattern[(i / bufs->in_out_ratio) % result_pattern_len];
+        bufs->expected_line[i / bufs->in_out_ratio]
+            = result_pattern[(i / bufs->in_out_ratio) % result_pattern_len];
     }
-    
+
     memset(bufs->lut, 0, 1 << 16);
     memset(bufs->result_line, 0, bufs->example_len_px / bufs->in_out_ratio);
 
     fill_test_waveform();
+    heap_caps_check_integrity_all(true);
 }
 
 /*
  * Allocates and populates buffers for LUT tests.
  */
-static void lut_test_buffers_init(LutTestBuffers* bufs, int example_len_px, const uint8_t* result_pattern, int in_out_ratio) {
-    bufs->line_data = heap_caps_aligned_alloc(16, example_len_px, MALLOC_CAP_DEFAULT);
-    bufs->result_line = heap_caps_aligned_alloc(16, example_len_px / in_out_ratio, MALLOC_CAP_DEFAULT);
-    bufs->expected_line = heap_caps_aligned_alloc(16, example_len_px / in_out_ratio, MALLOC_CAP_DEFAULT);
-    bufs->lut = heap_caps_aligned_alloc(16, 1 << 16, MALLOC_CAP_DEFAULT);
+static void lut_test_buffers_init(
+    LutTestBuffers* bufs, int example_len_px, const uint8_t* result_pattern, int in_out_ratio
+) {
+    bufs->line_data = heap_caps_aligned_alloc(16, example_len_px, MALLOC_CAP_INTERNAL);
+    bufs->result_line
+        = heap_caps_aligned_alloc(16, example_len_px / in_out_ratio, MALLOC_CAP_INTERNAL);
+    bufs->expected_line
+        = heap_caps_aligned_alloc(16, example_len_px / in_out_ratio, MALLOC_CAP_INTERNAL);
+    bufs->lut = heap_caps_malloc(1 << 16, MALLOC_CAP_INTERNAL);
     bufs->example_len_px = example_len_px;
     bufs->in_out_ratio = in_out_ratio;
 
@@ -115,36 +126,39 @@ static void IRAM_ATTR test_with_alignments(LutTestBuffers* bufs, lut_func_t lut_
             memcpy(bufs->expected_line, expectation_backup, out_len);
 
             // before and after the designated range the buffer shoulld be clear
-            memset(bufs->expected_line, 0, start_offset / bufs->in_out_ratio);
-            memset(bufs->expected_line + (start_offset + unaligned_len) / bufs->in_out_ratio, 0, end_offset / bufs->in_out_ratio);
-
-            // if we have less than four bytes of input for one output byte, we have to adjust the "line length",
-            // so that all of the input is used.
-            int line_length = unaligned_len * 4 / bufs->in_out_ratio;
-
-            printf("benchmarking and checking with alignment (in px): (%d, %d)... ", start_offset, unaligned_len);
+            memset(bufs->expected_line, 0, start_offset / 4);
+            memset(bufs->expected_line + (start_offset + unaligned_len) / 4, 0, end_offset / 4);
+
+            printf(
+                "benchmarking and checking with alignment (in px): (%d, %d)... ",
+                start_offset,
+                unaligned_len
+            );
             uint64_t start = esp_timer_get_time();
             for (int i = 0; i < 100; i++) {
                 lut_func(
-                    (uint32_t*)(bufs->line_data + start_offset),
-                    bufs->result_line + start_offset / bufs->in_out_ratio,
+                    (uint32_t*)(bufs->line_data + start_offset * bufs->in_out_ratio / 4),
+                    bufs->result_line + start_offset / 4,
                     bufs->lut,
-                    line_length
+                    unaligned_len
                 );
             }
+            heap_caps_check_integrity_all(true);
             uint64_t end = esp_timer_get_time();
             printf("took %.2fus per iter.\n", (end - start) / 100.0);
 
-            // Compare computed outputs to the expectation. We limit the comparison to len / 4, since the LUT functions
-            // only compute a full display line, not more, even though our test buffer may be larger.
+            // Compare computed outputs to the expectation. We limit the comparison to len / 4,
+            // since the LUT functions only compute a full display line, not more, even though our
+            // test buffer may be larger.
             TEST_ASSERT_EQUAL_UINT8_ARRAY(bufs->expected_line, bufs->result_line, len / 4);
         }
     }
+    heap_caps_check_integrity_all(true);
 
     heap_caps_free(expectation_backup);
 }
 
-TEST_CASE("1ppB lookup LCD, 64k LUT", "[epdiy,unit]") {
+TEST_CASE("1ppB lookup LCD, 64k LUT", "[epdiy,unit,lut]") {
     LutTestBuffers bufs;
     lut_test_buffers_init(&bufs, DEFAULT_EXAMPLE_LEN, result_data_pattern_lcd_1ppB, 4);
 
@@ -156,7 +170,7 @@ TEST_CASE("1ppB lookup LCD, 64k LUT", "[epdiy,unit]") {
     diff_test_buffers_free(&bufs);
 }
 
-TEST_CASE("1ppB lookup LCD, 1k LUT, PIE", "[epdiy,unit]") {
+TEST_CASE("1ppB lookup LCD, 1k LUT, PIE", "[epdiy,unit,lut]") {
     LutTestBuffers bufs;
     lut_test_buffers_init(&bufs, DEFAULT_EXAMPLE_LEN, result_data_pattern_lcd_1ppB, 4);
 
@@ -167,7 +181,7 @@ TEST_CASE("1ppB lookup LCD, 1k LUT, PIE", "[epdiy,unit]") {
     diff_test_buffers_free(&bufs);
 }
 
-TEST_CASE("2ppB lookup LCD, 64k LUT, previously white", "[epdiy,unit]") {
+TEST_CASE("2ppB lookup LCD, 64k LUT, previously white", "[epdiy,unit,lut]") {
     LutTestBuffers bufs;
     lut_test_buffers_init(&bufs, DEFAULT_EXAMPLE_LEN, result_data_pattern_lcd_2ppB_white, 2);
 
@@ -178,3 +192,39 @@ TEST_CASE("2ppB lookup LCD, 64k LUT, previously white", "[epdiy,unit]") {
 
     diff_test_buffers_free(&bufs);
 }
+
+TEST_CASE("2ppB lookup LCD, 64k LUT, previously black", "[epdiy,unit,lut]") {
+    LutTestBuffers bufs;
+    lut_test_buffers_init(&bufs, DEFAULT_EXAMPLE_LEN, result_data_pattern_lcd_2ppB_black, 2);
+
+    enum EpdDrawMode mode = MODE_GL16 | MODE_PACKING_2PPB | PREVIOUSLY_BLACK;
+    LutFunctionPair func_pair = find_lut_functions(mode, 1 << 16);
+    func_pair.build_func(bufs.lut, &test_waveform, 0);
+    test_with_alignments(&bufs, func_pair.lookup_func);
+
+    diff_test_buffers_free(&bufs);
+}
+
+TEST_CASE("2ppB lookup LCD, 1k LUT, previously white", "[epdiy,unit,lut]") {
+    LutTestBuffers bufs;
+    lut_test_buffers_init(&bufs, DEFAULT_EXAMPLE_LEN, result_data_pattern_lcd_2ppB_white, 2);
+
+    enum EpdDrawMode mode = MODE_GL16 | MODE_PACKING_2PPB | PREVIOUSLY_WHITE;
+    LutFunctionPair func_pair = find_lut_functions(mode, 1 << 10);
+    func_pair.build_func(bufs.lut, &test_waveform, 0);
+    test_with_alignments(&bufs, func_pair.lookup_func);
+
+    diff_test_buffers_free(&bufs);
+}
+
+TEST_CASE("2ppB lookup LCD, 1k LUT, previously black", "[epdiy,unit,lut]") {
+    LutTestBuffers bufs;
+    lut_test_buffers_init(&bufs, DEFAULT_EXAMPLE_LEN, result_data_pattern_lcd_2ppB_black, 2);
+
+    enum EpdDrawMode mode = MODE_GL16 | MODE_PACKING_2PPB | PREVIOUSLY_BLACK;
+    LutFunctionPair func_pair = find_lut_functions(mode, 1 << 10);
+    func_pair.build_func(bufs.lut, &test_waveform, 0);
+    test_with_alignments(&bufs, func_pair.lookup_func);
+
+    diff_test_buffers_free(&bufs);
+}