From 70e3f1f204b9c05876953db620fc49d1fbb2f87c Mon Sep 17 00:00:00 2001 From: Yvan Tortorella Date: Tue, 30 Apr 2024 14:24:59 +0200 Subject: [PATCH 01/11] Fix parameter to propagate the right byte enable. --- rtl/redmule_pkg.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtl/redmule_pkg.sv b/rtl/redmule_pkg.sv index d1b8ac75..a35e10a0 100644 --- a/rtl/redmule_pkg.sv +++ b/rtl/redmule_pkg.sv @@ -30,7 +30,7 @@ package redmule_pkg; parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = 4'b1000; parameter fpnew_pkg::operation_e CAST_OP = fpnew_pkg::F2F; parameter int unsigned MIN_FMT = fpnew_pkg::min_fp_width(FpFmtConfig); - parameter int unsigned DW_CUT = DATA_W - ARRAY_HEIGHT*(PIPE_REGS + 1)*MIN_FMT; + parameter int unsigned DW_CUT = DATAW - ARRAY_HEIGHT*(PIPE_REGS + 1)*MIN_FMT; // Register File mapping /********************** From 13a7ea3969fb44a0f61ae06d071d8d3765f56c9c Mon Sep 17 00:00:00 2001 From: Yvan Tortorella Date: Tue, 30 Apr 2024 16:02:42 +0200 Subject: [PATCH 02/11] Fix utils function to check results on 8-bit data. --- sw/redmule.c | 8 ++- sw/utils/redmule_utils.h | 111 ++++++++++++++++++++------------------- 2 files changed, 65 insertions(+), 54 deletions(-) diff --git a/sw/redmule.c b/sw/redmule.c index 3e26e528..cd6e195e 100644 --- a/sw/redmule.c +++ b/sw/redmule.c @@ -26,6 +26,7 @@ int main() { uint8_t *w = w_inp; uint8_t *y = y_inp; uint8_t *z = z_oup; // golden_out //1c010000 + uint32_t *gold; uint8_t float_fmt = (SRC_FMT == FP8) ? (uint8_t)Float8 : (SRC_FMT == FP8ALT) ? (uint8_t)Float8Alt @@ -33,6 +34,11 @@ int main() { : (SRC_FMT == FP16ALT) ? (uint8_t)Float16Alt : (uint8_t)Float16; + int golden_size = (float_fmt == (Float8 | Float8Alt)) ? m_size*k_size/4 : m_size*k_size/2; + + for (int i = 0; i < golden_size; i++) + *(gold + i) = golden[i]; + volatile int errors = 0; int gold_sum = 0, check_sum = 0; int i, j; @@ -65,7 +71,7 @@ int main() { if (float_fmt == Float16 || float_fmt == Float16Alt) errors = redmule16_compare_int(y, golden, m_size * k_size / 2); else if (float_fmt == Float8 || float_fmt == Float8Alt) - errors = redmule8_compare_int(y, golden, m_size * k_size / 4); + errors = redmule8_compare_int(y, gold, m_size, k_size); *(int *)0x80000000 = errors; diff --git a/sw/utils/redmule_utils.h b/sw/utils/redmule_utils.h index fc072f93..21103d72 100644 --- a/sw/utils/redmule_utils.h +++ b/sw/utils/redmule_utils.h @@ -80,7 +80,7 @@ int redmule16_compare_int(uint32_t *actual_z, uint32_t *golden_z, int len) { return errors; } -int redmule8_compare_int(uint32_t *actual_z, uint32_t *golden_z, int len) { +int redmule8_compare_int(uint32_t *actual_z, uint32_t *golden_z, int m, int k) { uint32_t actual_word = 0; uint8_t actual_Byte0, actual_Byte1, actual_Byte2, actual_Byte3; uint32_t golden_word = 0; @@ -88,94 +88,99 @@ int redmule8_compare_int(uint32_t *actual_z, uint32_t *golden_z, int len) { uint32_t actual = 0; uint32_t golden = 0; + #define BytePerWord 4 + #define FpFormat 8 + uint32_t jump = k*FpFormat/BytePerWord; int errors = 0; int error; - for (int i = 0; i < len; i++) { - error = 0; - actual_word = *(actual_z + i); - golden_word = *(golden_z + i); + for (int j = 0; j < m; j++) { + for (int i = 0; i < k/4; i++) { + error = 0; + actual_word = *(actual_z + i + j*jump); + golden_word = *(golden_z + i + j); - // int error = ((actual_word ^ golden_word) & ~IGNORE_BITS_COMPARE) ? 1 : 0; - uint8_t diff = 0; + // int error = ((actual_word ^ golden_word) & ~IGNORE_BITS_COMPARE) ? 1 : 0; + uint8_t diff = 0; - // Cheching Byte0 - actual_Byte0 = (uint8_t)(actual_word & 0x000000FF); - golden_Byte0 = (uint8_t)(golden_word & 0x000000FF); + // Cheching Byte0 + actual_Byte0 = (uint8_t)(actual_word & 0x000000FF); + golden_Byte0 = (uint8_t)(golden_word & 0x000000FF); - diff = (actual_Byte0 > golden_Byte0) ? (actual_Byte0 - golden_Byte0) - : (actual_Byte0 < golden_Byte0) ? (golden_Byte0 - actual_Byte0) - : 0; + diff = (actual_Byte0 > golden_Byte0) ? (actual_Byte0 - golden_Byte0) + : (actual_Byte0 < golden_Byte0) ? (golden_Byte0 - actual_Byte0) + : 0; - if (diff > ERR) { - error = 1; + if (diff > ERR) { + error = 1; #ifdef VERBOSE tfp_printf("diff: 0x%08x\n", diff); tfp_printf("Byte0: Error!\n"); #endif - } + } - // Cheching Byte1 - actual_Byte1 = (uint8_t)((actual_word >> 8) & 0x000000FF); - golden_Byte1 = (uint8_t)((golden_word >> 8) & 0x000000FF); + // Cheching Byte1 + actual_Byte1 = (uint8_t)((actual_word >> 8) & 0x000000FF); + golden_Byte1 = (uint8_t)((golden_word >> 8) & 0x000000FF); - diff = (actual_Byte1 > golden_Byte1) ? (actual_Byte1 - golden_Byte1) - : (actual_Byte1 < golden_Byte1) ? (golden_Byte1 - actual_Byte1) - : 0; + diff = (actual_Byte1 > golden_Byte1) ? (actual_Byte1 - golden_Byte1) + : (actual_Byte1 < golden_Byte1) ? (golden_Byte1 - actual_Byte1) + : 0; - if (diff > ERR) { - error = 1; + if (diff > ERR) { + error = 1; #ifdef VERBOSE tfp_printf("diff: 0x%08x\n", diff); tfp_printf("Byte1: Error!\n"); #endif - } - - // Cheching Byte2 - actual_Byte2 = (uint8_t)((actual_word >> 16) & 0x000000FF); - golden_Byte2 = (uint8_t)((golden_word >> 16) & 0x000000FF); - - diff = (actual_Byte2 > golden_Byte2) ? (actual_Byte2 - golden_Byte2) - : (actual_Byte2 < golden_Byte2) ? (golden_Byte2 - actual_Byte2) - : 0; - - if (diff > ERR) { - error = 1; + } + + // Cheching Byte2 + actual_Byte2 = (uint8_t)((actual_word >> 16) & 0x000000FF); + golden_Byte2 = (uint8_t)((golden_word >> 16) & 0x000000FF); + + diff = (actual_Byte2 > golden_Byte2) ? (actual_Byte2 - golden_Byte2) + : (actual_Byte2 < golden_Byte2) ? (golden_Byte2 - actual_Byte2) + : 0; + + if (diff > ERR) { + error = 1; #ifdef VERBOSE tfp_printf("diff: 0x%08x\n", diff); tfp_printf("Byte2: Error!\n"); #endif - } + } - // Cheching Byte3 - actual_Byte3 = (uint8_t)((actual_word >> 24) & 0x000000FF); - golden_Byte3 = (uint8_t)((golden_word >> 24) & 0x000000FF); + // Cheching Byte3 + actual_Byte3 = (uint8_t)((actual_word >> 24) & 0x000000FF); + golden_Byte3 = (uint8_t)((golden_word >> 24) & 0x000000FF); - diff = (actual_Byte3 > golden_Byte3) ? (actual_Byte3 - golden_Byte3) - : (actual_Byte3 < golden_Byte3) ? (golden_Byte3 - actual_Byte3) - : 0; + diff = (actual_Byte3 > golden_Byte3) ? (actual_Byte3 - golden_Byte3) + : (actual_Byte3 < golden_Byte3) ? (golden_Byte3 - actual_Byte3) + : 0; - if (diff > ERR) { - error = 1; + if (diff > ERR) { + error = 1; #ifdef VERBOSE tfp_printf("diff: 0x%08x\n", diff); tfp_printf("Byte3: Error!\n"); #endif - } - - errors += error; + } + + errors += error; #ifdef DEBUG tfp_printf("Golden: 0x%08x; Actual: 0x%08x,\n", golden_word, actual_word); #endif #ifdef VERBOSE - if (error) { - if (errors == 1) tfp_printf(" golden <- actual @ address @ index\n"); - tfp_printf(" 0x%08x <- 0x%08x @ 0x%08x @ 0x%08x\n", golden_word, actual_word, (actual_z + i), - i * 4); - } + if (error) { + if (errors == 1) tfp_printf(" golden <- actual @ address @ index\n"); + tfp_printf(" 0x%08x <- 0x%08x @ 0x%08x @ 0x%08x\n", golden_word, actual_word, (actual_z + i), + i * 4); + } #endif + } } return errors; } From 561dbc29880c2a317b1e22a6001f8bd01ef058de Mon Sep 17 00:00:00 2001 From: Yvan Tortorella Date: Tue, 30 Apr 2024 16:03:24 +0200 Subject: [PATCH 03/11] Update dependencies. --- Bender.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Bender.lock b/Bender.lock index 8c910c64..333d36e2 100644 --- a/Bender.lock +++ b/Bender.lock @@ -7,8 +7,8 @@ packages: dependencies: - common_cells common_cells: - revision: 2bd027cb87eaa9bf7d17196ec5f69864b35b630f - version: 1.32.0 + revision: 0d67563b6b592549542544f1abc0f43e5d4ee8b4 + version: 1.35.0 source: Git: https://github.com/pulp-platform/common_cells.git dependencies: @@ -67,8 +67,8 @@ packages: dependencies: - tech_cells_generic hwpe-stream: - revision: 4c2ef8c33a6e2a8c88127e2153013d4f2dc3f448 - version: 1.7.0 + revision: 65c99a4a2f37a79acee800ab0151f67dfb1edef1 + version: 1.8.0 source: Git: https://github.com/pulp-platform/hwpe-stream.git dependencies: From 4a40564880005c081ea6581cac9c9e7fd5b5d2ea Mon Sep 17 00:00:00 2001 From: Yvan Tortorella Date: Tue, 30 Apr 2024 19:42:40 +0200 Subject: [PATCH 04/11] Make CI save venv after creation. --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index fdf6fb6d..9e8a37c2 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -18,7 +18,7 @@ init: stage: init extends: .base script: - - cd golden-model; source setup-py.sh + - cd golden-model; source setup-py.sh; cd .. artifacts: when: always expire_in: 1 week From 9873be087fd20e61b30622dc470d5f617427051a Mon Sep 17 00:00:00 2001 From: Yvan Tortorella Date: Tue, 30 Apr 2024 20:05:39 +0200 Subject: [PATCH 05/11] Add FP8 regressions to CI. --- .gitlab-ci.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9e8a37c2..ace2c153 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -91,6 +91,24 @@ hwpe-test: - { OP: gemm, M: 23, N: 31, K: 31, FMT: FP16 } - { OP: gemm, M: 24, N: 17, K: 32, FMT: FP16 } - { OP: gemm, M: 24, N: 20, K: 32, FMT: FP16 } + - { OP: gemm, M: 96, N: 96, K: 96, FMT: FP8 } + - { OP: gemm, M: 128, N: 128, K: 128, FMT: FP8 } + - { OP: gemm, M: 12, N: 16, K: 16, FMT: FP8 } + - { OP: gemm, M: 24, N: 16, K: 16, FMT: FP8 } + - { OP: gemm, M: 48, N: 32, K: 32, FMT: FP8 } + - { OP: gemm, M: 30, N: 32, K: 17, FMT: FP8 } + - { OP: gemm, M: 24, N: 32, K: 1, FMT: FP8 } + - { OP: gemm, M: 31, N: 32, K: 16, FMT: FP8 } + - { OP: gemm, M: 17, N: 32, K: 16, FMT: FP8 } + - { OP: gemm, M: 31, N: 32, K: 31, FMT: FP8 } + - { OP: gemm, M: 17, N: 32, K: 3, FMT: FP8 } + - { OP: gemm, M: 5, N: 32, K: 17, FMT: FP8 } + - { OP: gemm, M: 5, N: 32, K: 3, FMT: FP8 } + - { OP: gemm, M: 36, N: 31, K: 32, FMT: FP8 } + - { OP: gemm, M: 12, N: 31, K: 16, FMT: FP8 } + - { OP: gemm, M: 23, N: 31, K: 31, FMT: FP8 } + - { OP: gemm, M: 24, N: 17, K: 32, FMT: FP8 } + - { OP: gemm, M: 24, N: 20, K: 32, FMT: FP8 } complex-test: extends: .redmule-vsim-tpl From 5d3af964e2e2be3cea866e638e99e8edc7b6fc01 Mon Sep 17 00:00:00 2001 From: Yvan Tortorella Date: Tue, 30 Apr 2024 20:19:32 +0200 Subject: [PATCH 06/11] Mod CI script to specify stage dependencies. --- .gitlab-ci.yml | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ace2c153..2a48e697 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -22,14 +22,12 @@ init: artifacts: when: always expire_in: 1 week - paths: - - ./golden-model/venv + paths: [./golden-model/venv] .redmule-build-tpl: extends: .base stage: build - dependencies: - - init + needs: [init] script: - SETUP_CONFIG=${SETUP_CONFIG} - source scripts/${SETUP_CONFIG}.sh @@ -38,10 +36,7 @@ init: artifacts: when: always expire_in: 1 week - paths: - - ./.bender - - ./scripts/compile.tcl - - ./vsim/* + paths: [./.bender, ./scripts/compile.tcl, ./vsim/*] redmule-build-hwpe: extends: .redmule-build-tpl @@ -56,6 +51,7 @@ redmule-build-complex: .redmule-vsim-tpl: extends: .base stage: test + needs: [build] script: - SETUP_CONFIG=${SETUP_CONFIG} - source scripts/${SETUP_CONFIG}.sh From d6827ef5e4decd048743db2212e8e1806244220b Mon Sep 17 00:00:00 2001 From: Yvan Tortorella Date: Tue, 30 Apr 2024 20:24:31 +0200 Subject: [PATCH 07/11] Attempt to fix CI dependencies. --- .gitlab-ci.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 2a48e697..bbcfcf38 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -63,8 +63,7 @@ redmule-build-complex: hwpe-test: extends: .redmule-vsim-tpl - dependencies: - - redmule-build-hwpe + needs: [redmule-build-hwpe] variables: SETUP_CONFIG: "setup-hwpe" parallel: @@ -108,8 +107,7 @@ hwpe-test: complex-test: extends: .redmule-vsim-tpl - dependencies: - - redmule-build-complex + dependencies: [redmule-build-complex] variables: SETUP_CONFIG: "setup-complex" parallel: From 781f3abc64bbc9259bed38f0604d46ac174a39e5 Mon Sep 17 00:00:00 2001 From: Yvan Tortorella Date: Tue, 30 Apr 2024 20:26:01 +0200 Subject: [PATCH 08/11] Attempt to fix CI dependencies. --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index bbcfcf38..8cd51a8c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -107,7 +107,7 @@ hwpe-test: complex-test: extends: .redmule-vsim-tpl - dependencies: [redmule-build-complex] + needs: [redmule-build-complex] variables: SETUP_CONFIG: "setup-complex" parallel: From e4c9fa28e266c6ffca36ae87b829aceff5e118e6 Mon Sep 17 00:00:00 2001 From: Yvan Tortorella Date: Tue, 30 Apr 2024 20:35:11 +0200 Subject: [PATCH 09/11] Add init to test dependencies. --- .gitlab-ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8cd51a8c..ae34ef53 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -63,7 +63,7 @@ redmule-build-complex: hwpe-test: extends: .redmule-vsim-tpl - needs: [redmule-build-hwpe] + needs: [init, redmule-build-hwpe] variables: SETUP_CONFIG: "setup-hwpe" parallel: @@ -107,7 +107,7 @@ hwpe-test: complex-test: extends: .redmule-vsim-tpl - needs: [redmule-build-complex] + needs: [init, redmule-build-complex] variables: SETUP_CONFIG: "setup-complex" parallel: From 8ffa2372012a76bae37448089e738b65fa76fb8c Mon Sep 17 00:00:00 2001 From: Yvan Tortorella Date: Sun, 5 May 2024 15:06:15 +0200 Subject: [PATCH 10/11] Remove FP8 tests that cannot generate with Python. --- .gitlab-ci.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ae34ef53..fd0e04da 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -95,13 +95,9 @@ hwpe-test: - { OP: gemm, M: 24, N: 32, K: 1, FMT: FP8 } - { OP: gemm, M: 31, N: 32, K: 16, FMT: FP8 } - { OP: gemm, M: 17, N: 32, K: 16, FMT: FP8 } - - { OP: gemm, M: 31, N: 32, K: 31, FMT: FP8 } - - { OP: gemm, M: 17, N: 32, K: 3, FMT: FP8 } - { OP: gemm, M: 5, N: 32, K: 17, FMT: FP8 } - - { OP: gemm, M: 5, N: 32, K: 3, FMT: FP8 } - { OP: gemm, M: 36, N: 31, K: 32, FMT: FP8 } - { OP: gemm, M: 12, N: 31, K: 16, FMT: FP8 } - - { OP: gemm, M: 23, N: 31, K: 31, FMT: FP8 } - { OP: gemm, M: 24, N: 17, K: 32, FMT: FP8 } - { OP: gemm, M: 24, N: 20, K: 32, FMT: FP8 } From f2a4e0984778e3055df0eb6cb0a2ac330de018e5 Mon Sep 17 00:00:00 2001 From: Yvan Tortorella Date: Fri, 14 Jun 2024 12:24:07 +0200 Subject: [PATCH 11/11] Fix software. --- sw/redmule.c | 5 +---- sw/utils/redmule_utils.h | 8 ++++---- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/sw/redmule.c b/sw/redmule.c index cd6e195e..6d5d896d 100644 --- a/sw/redmule.c +++ b/sw/redmule.c @@ -26,7 +26,7 @@ int main() { uint8_t *w = w_inp; uint8_t *y = y_inp; uint8_t *z = z_oup; // golden_out //1c010000 - uint32_t *gold; + uint32_t *gold = golden; uint8_t float_fmt = (SRC_FMT == FP8) ? (uint8_t)Float8 : (SRC_FMT == FP8ALT) ? (uint8_t)Float8Alt @@ -36,9 +36,6 @@ int main() { int golden_size = (float_fmt == (Float8 | Float8Alt)) ? m_size*k_size/4 : m_size*k_size/2; - for (int i = 0; i < golden_size; i++) - *(gold + i) = golden[i]; - volatile int errors = 0; int gold_sum = 0, check_sum = 0; int i, j; diff --git a/sw/utils/redmule_utils.h b/sw/utils/redmule_utils.h index 21103d72..eefff790 100644 --- a/sw/utils/redmule_utils.h +++ b/sw/utils/redmule_utils.h @@ -66,13 +66,13 @@ int redmule16_compare_int(uint32_t *actual_z, uint32_t *golden_z, int len) { errors += error; #ifdef DEBUG - tfp_printf("Golden: 0x%08x; Actual: 0x%08x,\n", golden_word, actual_word); + tfp_printf("Golden: 0x%08lx @ 0x%08ln; Actual: 0x%08lx @ 0x%08ln,\n", golden_word, (golden_z + i), actual_word, (actual_z + i)); #endif #ifdef VERBOSE if (error) { if (errors == 1) tfp_printf(" golden <- actual @ address @ index\n"); - tfp_printf("0x%08x <- 0x%08x @ 0x%08x @ 0x%08x\n", golden_word, actual_word, (actual_z + i), + tfp_printf("0x%08lx <- 0x%08lx @ 0x%08ln @ 0x%08x\n", golden_word, actual_word, (actual_z + i), i * 4); } #endif @@ -170,13 +170,13 @@ int redmule8_compare_int(uint32_t *actual_z, uint32_t *golden_z, int m, int k) { errors += error; #ifdef DEBUG - tfp_printf("Golden: 0x%08x; Actual: 0x%08x,\n", golden_word, actual_word); + tfp_printf("Golden: 0x%08x @ 0x%08x; Actual: 0x%08x @ 0x%08x,\n", golden_word, (golden_z + i), actual_word, (actual_z + i)); #endif #ifdef VERBOSE if (error) { if (errors == 1) tfp_printf(" golden <- actual @ address @ index\n"); - tfp_printf(" 0x%08x <- 0x%08x @ 0x%08x @ 0x%08x\n", golden_word, actual_word, (actual_z + i), + tfp_printf(" 0x%x <- 0x%x @ 0x%x @ 0x%x\n", golden_word, actual_word, (actual_z + i), i * 4); } #endif