diff --git a/core/math/MathUtil.cpp b/core/math/MathUtil.cpp index 6e6681665ce1..9fb49000b3b2 100644 --- a/core/math/MathUtil.cpp +++ b/core/math/MathUtil.cpp @@ -1,6 +1,7 @@ /** Copyright 2013 BlackBerry Inc. Copyright (c) 2017-2018 Xiamen Yaji Software Co., Ltd. +Copyright (c) 2019-present Axmol Engine contributors (see AUTHORS.md). Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -20,6 +21,7 @@ This file was modified to fit the cocos2d-x project */ #include "math/MathUtil.h" +#include "math/Mat4.h" #include "base/Macros.h" #if (AX_TARGET_PLATFORM == AX_PLATFORM_ANDROID) @@ -35,28 +37,29 @@ This file was modified to fit the cocos2d-x project #if (AX_TARGET_PLATFORM == AX_PLATFORM_IOS) # if defined(__arm64__) -# define USE_NEON64 -# define INCLUDE_NEON64 +# define USE_NEON64 1 +# define INCLUDE_NEON64 1 # elif defined(__ARM_NEON__) -# define USE_NEON32 -# define INCLUDE_NEON32 -# else +# define USE_NEON32 1 +# define INCLUDE_NEON32 1 +# endif +#elif (AX_TARGET_PLATFORM == AX_PLATFORM_OSX) +# if defined(__arm64__) || defined(__aarch64__) +# define USE_NEON64 1 +# define INCLUDE_NEON64 1 # endif #elif (AX_TARGET_PLATFORM == AX_PLATFORM_ANDROID) # if defined(__arm64__) || defined(__aarch64__) -# define USE_NEON64 -# define INCLUDE_NEON64 +# define USE_NEON64 1 +# define INCLUDE_NEON64 1 # elif defined(__ARM_NEON__) -# define INCLUDE_NEON32 -# else +# define INCLUDE_NEON32 1 # endif -#else - #endif #if defined(AX_USE_SSE) -# define USE_SSE -# define INCLUDE_SSE +# define USE_SSE 1 +# define INCLUDE_SSE 1 #endif #ifdef INCLUDE_NEON32 @@ -298,4 +301,34 @@ void MathUtil::crossVec3(const float* v1, const float* v2, float* dst) #endif } +void MathUtil::transformVertices(V3F_C4B_T2F* dst, const V3F_C4B_T2F* src, size_t count, const Mat4& transform) +{ + // Check some assumptions made by optimizations + static_assert(sizeof(V3F_C4B_T2F) == 24); + static_assert(offsetof(V3F_C4B_T2F, vertices) == 0); + static_assert(offsetof(V3F_C4B_T2F, colors) == 12); + static_assert(offsetof(V3F_C4B_T2F, texCoords) == 16); + +#ifdef USE_NEON32 + MathUtilNeon::transformVertices(dst, src, count, transform); +#elif defined(USE_NEON64) + MathUtilNeon64::transformVertices(dst, src, count, transform); +#elif defined(INCLUDE_NEON32) + if (isNeon32Enabled()) + MathUtilNeon::transformVertices(dst, src, count, transform); + else + MathUtilC::transformVertices(dst, src, count, transform); +#else + MathUtilC::transformVertices(dst, src, count, transform); +#endif +} + +void MathUtil::transformIndices(uint16_t* dst, const uint16_t* src, size_t count, uint16_t offset) { +#if defined(USE_NEON64) + MathUtilNeon64::transformIndices(dst, src, count, offset); +#else + MathUtilC::transformIndices(dst, src, count, offset); +#endif +} + NS_AX_MATH_END diff --git a/core/math/MathUtil.h b/core/math/MathUtil.h index 1fbe4060755f..7cb78b7845f0 100644 --- a/core/math/MathUtil.h +++ b/core/math/MathUtil.h @@ -2,6 +2,7 @@ Copyright 2013 BlackBerry Inc. Copyright (c) 2014-2017 Chukong Technologies Copyright (c) 2017-2018 Xiamen Yaji Software Co., Ltd. + Copyright (c) 2019-present Axmol Engine contributors (see AUTHORS.md). Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -29,6 +30,11 @@ #include "math/MathBase.h" + +NS_AX_BEGIN + struct V3F_C4B_T2F; +NS_AX_END + /** * @addtogroup base * @{ @@ -36,6 +42,8 @@ NS_AX_MATH_BEGIN +class Mat4; + /** * Defines a math utility class. * @@ -45,6 +53,7 @@ class AX_DLL MathUtil { friend class Mat4; friend class Vec3; + friend class Renderer; public: /** @@ -130,6 +139,9 @@ class AX_DLL MathUtil static void transformVec4(const float* m, const float* v, float* dst); static void crossVec3(const float* v1, const float* v2, float* dst); + + static void transformVertices(V3F_C4B_T2F* dst, const V3F_C4B_T2F* src, size_t count, const Mat4& transform); + static void transformIndices(uint16_t* dst, const uint16_t* src, size_t count, uint16_t offset); }; NS_AX_MATH_END diff --git a/core/math/MathUtil.inl b/core/math/MathUtil.inl index 397c336d8e44..4d7028bdbd59 100644 --- a/core/math/MathUtil.inl +++ b/core/math/MathUtil.inl @@ -1,5 +1,6 @@ /** Copyright 2013 BlackBerry Inc. + Copyright (c) 2019-present Axmol Engine contributors (see AUTHORS.md). Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -24,24 +25,20 @@ class MathUtilC { public: inline static void addMatrix(const float* m, float scalar, float* dst); - inline static void addMatrix(const float* m1, const float* m2, float* dst); - inline static void subtractMatrix(const float* m1, const float* m2, float* dst); - inline static void multiplyMatrix(const float* m, float scalar, float* dst); - inline static void multiplyMatrix(const float* m1, const float* m2, float* dst); - + inline static void negateMatrix(const float* m, float* dst); - inline static void transposeMatrix(const float* m, float* dst); - + inline static void transformVec4(const float* m, float x, float y, float z, float w, float* dst); - inline static void transformVec4(const float* m, const float* v, float* dst); - inline static void crossVec3(const float* v1, const float* v2, float* dst); + + inline static void transformVertices(V3F_C4B_T2F* dst, const V3F_C4B_T2F* src, size_t count, const Mat4& transform); + inline static void transformIndices(uint16_t* dst, const uint16_t* src, size_t count, uint16_t offset); }; inline void MathUtilC::addMatrix(const float* m, float scalar, float* dst) @@ -128,27 +125,27 @@ inline void MathUtilC::multiplyMatrix(const float* m1, const float* m2, float* d { // Support the case where m1 or m2 is the same array as dst. float product[16]; - + product[0] = m1[0] * m2[0] + m1[4] * m2[1] + m1[8] * m2[2] + m1[12] * m2[3]; product[1] = m1[1] * m2[0] + m1[5] * m2[1] + m1[9] * m2[2] + m1[13] * m2[3]; product[2] = m1[2] * m2[0] + m1[6] * m2[1] + m1[10] * m2[2] + m1[14] * m2[3]; product[3] = m1[3] * m2[0] + m1[7] * m2[1] + m1[11] * m2[2] + m1[15] * m2[3]; - + product[4] = m1[0] * m2[4] + m1[4] * m2[5] + m1[8] * m2[6] + m1[12] * m2[7]; product[5] = m1[1] * m2[4] + m1[5] * m2[5] + m1[9] * m2[6] + m1[13] * m2[7]; product[6] = m1[2] * m2[4] + m1[6] * m2[5] + m1[10] * m2[6] + m1[14] * m2[7]; product[7] = m1[3] * m2[4] + m1[7] * m2[5] + m1[11] * m2[6] + m1[15] * m2[7]; - + product[8] = m1[0] * m2[8] + m1[4] * m2[9] + m1[8] * m2[10] + m1[12] * m2[11]; product[9] = m1[1] * m2[8] + m1[5] * m2[9] + m1[9] * m2[10] + m1[13] * m2[11]; product[10] = m1[2] * m2[8] + m1[6] * m2[9] + m1[10] * m2[10] + m1[14] * m2[11]; product[11] = m1[3] * m2[8] + m1[7] * m2[9] + m1[11] * m2[10] + m1[15] * m2[11]; - + product[12] = m1[0] * m2[12] + m1[4] * m2[13] + m1[8] * m2[14] + m1[12] * m2[15]; product[13] = m1[1] * m2[12] + m1[5] * m2[13] + m1[9] * m2[14] + m1[13] * m2[15]; product[14] = m1[2] * m2[12] + m1[6] * m2[13] + m1[10] * m2[14] + m1[14] * m2[15]; product[15] = m1[3] * m2[12] + m1[7] * m2[13] + m1[11] * m2[14] + m1[15] * m2[15]; - + memcpy(dst, product, MATRIX_SIZE); } @@ -197,7 +194,7 @@ inline void MathUtilC::transformVec4(const float* m, const float* v, float* dst) float y = v[0] * m[1] + v[1] * m[5] + v[2] * m[9] + v[3] * m[13]; float z = v[0] * m[2] + v[1] * m[6] + v[2] * m[10] + v[3] * m[14]; float w = v[0] * m[3] + v[1] * m[7] + v[2] * m[11] + v[3] * m[15]; - + dst[0] = x; dst[1] = y; dst[2] = z; @@ -209,10 +206,39 @@ inline void MathUtilC::crossVec3(const float* v1, const float* v2, float* dst) float x = (v1[1] * v2[2]) - (v1[2] * v2[1]); float y = (v1[2] * v2[0]) - (v1[0] * v2[2]); float z = (v1[0] * v2[1]) - (v1[1] * v2[0]); - + dst[0] = x; dst[1] = y; dst[2] = z; } +inline void MathUtilC::transformVertices(V3F_C4B_T2F* dst, const V3F_C4B_T2F* src, size_t count, const Mat4& transform) +{ + auto end = dst + count; + auto t = transform; // Make copy for better aliasing inference + auto m = t.m; + + while (dst < end) + { + auto pos = src->vertices; + dst->vertices.x = pos.x * m[0] + pos.y * m[4] + pos.z * m[8] + m[12]; + dst->vertices.y = pos.x * m[1] + pos.y * m[5] + pos.z * m[9] + m[13]; + dst->vertices.z = pos.x * m[2] + pos.y * m[6] + pos.z * m[10] + m[14]; + memcpy(&dst->colors, &src->colors, sizeof(dst->colors) + sizeof(dst->texCoords)); + ++dst; + ++src; + } +} + +inline void MathUtilC::transformIndices(uint16_t* dst, const uint16_t* src, size_t count, uint16_t offset) +{ + auto end = dst + count; + while (dst < end) + { + *dst = *src + offset; + ++dst; + ++src; + } +} + NS_AX_MATH_END diff --git a/core/math/MathUtilNeon.inl b/core/math/MathUtilNeon.inl index 7479649cb0a0..e80382490351 100644 --- a/core/math/MathUtilNeon.inl +++ b/core/math/MathUtilNeon.inl @@ -1,5 +1,6 @@ /** Copyright 2013 BlackBerry Inc. + Copyright (c) 2019-present Axmol Engine contributors (see AUTHORS.md). Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,30 +18,28 @@ This file was modified to fit the cocos2d-x project */ + +#include + NS_AX_MATH_BEGIN class MathUtilNeon { public: inline static void addMatrix(const float* m, float scalar, float* dst); - inline static void addMatrix(const float* m1, const float* m2, float* dst); - inline static void subtractMatrix(const float* m1, const float* m2, float* dst); - inline static void multiplyMatrix(const float* m, float scalar, float* dst); - inline static void multiplyMatrix(const float* m1, const float* m2, float* dst); - + inline static void negateMatrix(const float* m, float* dst); - inline static void transposeMatrix(const float* m, float* dst); - + inline static void transformVec4(const float* m, float x, float y, float z, float w, float* dst); - inline static void transformVec4(const float* m, const float* v, float* dst); - inline static void crossVec3(const float* v1, const float* v2, float* dst); + + inline static void transformVertices(ax::V3F_C4B_T2F* dst, const ax::V3F_C4B_T2F* src, size_t count, const ax::Mat4& transform); }; inline void MathUtilNeon::addMatrix(const float* m, float scalar, float* dst) @@ -52,12 +51,12 @@ inline void MathUtilNeon::addMatrix(const float* m, float scalar, float* dst) "vmov.f32 s17, s16 \n\t" // s "vmov.f32 s18, s16 \n\t" // s "vmov.f32 s19, s16 \n\t" // s - + "vadd.f32 q8, q0, q4 \n\t" // DST->M[m0-m3] = M[m0-m3] + s "vadd.f32 q9, q1, q4 \n\t" // DST->M[m4-m7] = M[m4-m7] + s "vadd.f32 q10, q2, q4 \n\t" // DST->M[m8-m11] = M[m8-m11] + s "vadd.f32 q11, q3, q4 \n\t" // DST->M[m12-m15] = M[m12-m15] + s - + "vst1.32 {q8, q9}, [%0]! \n\t" // DST->M[m0-m7] "vst1.32 {q10, q11}, [%0] \n\t" // DST->M[m8-m15] : @@ -73,12 +72,12 @@ inline void MathUtilNeon::addMatrix(const float* m1, const float* m2, float* dst "vld1.32 {q2, q3}, [%1] \n\t" // M1[m8-m15] "vld1.32 {q8, q9}, [%2]! \n\t" // M2[m0-m7] "vld1.32 {q10, q11}, [%2] \n\t" // M2[m8-m15] - + "vadd.f32 q12, q0, q8 \n\t" // DST->M[m0-m3] = M1[m0-m3] + M2[m0-m3] "vadd.f32 q13, q1, q9 \n\t" // DST->M[m4-m7] = M1[m4-m7] + M2[m4-m7] "vadd.f32 q14, q2, q10 \n\t" // DST->M[m8-m11] = M1[m8-m11] + M2[m8-m11] "vadd.f32 q15, q3, q11 \n\t" // DST->M[m12-m15] = M1[m12-m15] + M2[m12-m15] - + "vst1.32 {q12, q13}, [%0]! \n\t" // DST->M[m0-m7] "vst1.32 {q14, q15}, [%0] \n\t" // DST->M[m8-m15] : @@ -94,12 +93,12 @@ inline void MathUtilNeon::subtractMatrix(const float* m1, const float* m2, float "vld1.32 {q2, q3}, [%1] \n\t" // M1[m8-m15] "vld1.32 {q8, q9}, [%2]! \n\t" // M2[m0-m7] "vld1.32 {q10, q11}, [%2] \n\t" // M2[m8-m15] - + "vsub.f32 q12, q0, q8 \n\t" // DST->M[m0-m3] = M1[m0-m3] - M2[m0-m3] "vsub.f32 q13, q1, q9 \n\t" // DST->M[m4-m7] = M1[m4-m7] - M2[m4-m7] "vsub.f32 q14, q2, q10 \n\t" // DST->M[m8-m11] = M1[m8-m11] - M2[m8-m11] "vsub.f32 q15, q3, q11 \n\t" // DST->M[m12-m15] = M1[m12-m15] - M2[m12-m15] - + "vst1.32 {q12, q13}, [%0]! \n\t" // DST->M[m0-m7] "vst1.32 {q14, q15}, [%0] \n\t" // DST->M[m8-m15] : @@ -114,12 +113,12 @@ inline void MathUtilNeon::multiplyMatrix(const float* m, float scalar, float* ds "vld1.32 {d0[0]}, [%2] \n\t" // M[m0-m7] "vld1.32 {q4-q5}, [%1]! \n\t" // M[m8-m15] "vld1.32 {q6-q7}, [%1] \n\t" // s - + "vmul.f32 q8, q4, d0[0] \n\t" // DST->M[m0-m3] = M[m0-m3] * s "vmul.f32 q9, q5, d0[0] \n\t" // DST->M[m4-m7] = M[m4-m7] * s "vmul.f32 q10, q6, d0[0] \n\t" // DST->M[m8-m11] = M[m8-m11] * s "vmul.f32 q11, q7, d0[0] \n\t" // DST->M[m12-m15] = M[m12-m15] * s - + "vst1.32 {q8-q9}, [%0]! \n\t" // DST->M[m0-m7] "vst1.32 {q10-q11}, [%0] \n\t" // DST->M[m8-m15] : @@ -135,30 +134,30 @@ inline void MathUtilNeon::multiplyMatrix(const float* m1, const float* m2, float "vld1.32 {d20 - d23}, [%1] \n\t" // M1[m8-m15] "vld1.32 {d0 - d3}, [%2]! \n\t" // M2[m0-m7] "vld1.32 {d4 - d7}, [%2] \n\t" // M2[m8-m15] - + "vmul.f32 q12, q8, d0[0] \n\t" // DST->M[m0-m3] = M1[m0-m3] * M2[m0] "vmul.f32 q13, q8, d2[0] \n\t" // DST->M[m4-m7] = M1[m4-m7] * M2[m4] "vmul.f32 q14, q8, d4[0] \n\t" // DST->M[m8-m11] = M1[m8-m11] * M2[m8] "vmul.f32 q15, q8, d6[0] \n\t" // DST->M[m12-m15] = M1[m12-m15] * M2[m12] - + "vmla.f32 q12, q9, d0[1] \n\t" // DST->M[m0-m3] += M1[m0-m3] * M2[m1] "vmla.f32 q13, q9, d2[1] \n\t" // DST->M[m4-m7] += M1[m4-m7] * M2[m5] "vmla.f32 q14, q9, d4[1] \n\t" // DST->M[m8-m11] += M1[m8-m11] * M2[m9] "vmla.f32 q15, q9, d6[1] \n\t" // DST->M[m12-m15] += M1[m12-m15] * M2[m13] - + "vmla.f32 q12, q10, d1[0] \n\t" // DST->M[m0-m3] += M1[m0-m3] * M2[m2] "vmla.f32 q13, q10, d3[0] \n\t" // DST->M[m4-m7] += M1[m4-m7] * M2[m6] "vmla.f32 q14, q10, d5[0] \n\t" // DST->M[m8-m11] += M1[m8-m11] * M2[m10] "vmla.f32 q15, q10, d7[0] \n\t" // DST->M[m12-m15] += M1[m12-m15] * M2[m14] - + "vmla.f32 q12, q11, d1[1] \n\t" // DST->M[m0-m3] += M1[m0-m3] * M2[m3] "vmla.f32 q13, q11, d3[1] \n\t" // DST->M[m4-m7] += M1[m4-m7] * M2[m7] "vmla.f32 q14, q11, d5[1] \n\t" // DST->M[m8-m11] += M1[m8-m11] * M2[m11] "vmla.f32 q15, q11, d7[1] \n\t" // DST->M[m12-m15] += M1[m12-m15] * M2[m15] - + "vst1.32 {d24 - d27}, [%0]! \n\t" // DST->M[m0-m7] "vst1.32 {d28 - d31}, [%0] \n\t" // DST->M[m8-m15] - + : // output : "r"(dst), "r"(m1), "r"(m2) // input - note *value* of pointer doesn't change. : "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" @@ -170,12 +169,12 @@ inline void MathUtilNeon::negateMatrix(const float* m, float* dst) asm volatile( "vld1.32 {q0-q1}, [%1]! \n\t" // load m0-m7 "vld1.32 {q2-q3}, [%1] \n\t" // load m8-m15 - + "vneg.f32 q4, q0 \n\t" // negate m0-m3 "vneg.f32 q5, q1 \n\t" // negate m4-m7 "vneg.f32 q6, q2 \n\t" // negate m8-m15 "vneg.f32 q7, q3 \n\t" // negate m8-m15 - + "vst1.32 {q4-q5}, [%0]! \n\t" // store m0-m7 "vst1.32 {q6-q7}, [%0] \n\t" // store m8-m15 : @@ -191,7 +190,7 @@ inline void MathUtilNeon::transposeMatrix(const float* m, float* dst) "vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [%1]! \n\t" // DST->M[m1, m5, m9, m12] = M[m4-m7] "vld4.32 {d1[0], d3[0], d5[0], d7[0]}, [%1]! \n\t" // DST->M[m2, m6, m10, m12] = M[m8-m11] "vld4.32 {d1[1], d3[1], d5[1], d7[1]}, [%1] \n\t" // DST->M[m3, m7, m11, m12] = M[m12-m15] - + "vst1.32 {q0-q1}, [%0]! \n\t" // DST->M[m0-m7] "vst1.32 {q2-q3}, [%0] \n\t" // DST->M[m8-m15] : @@ -209,12 +208,12 @@ inline void MathUtilNeon::transformVec4(const float* m, float x, float y, float "vld1.32 {d1[1]}, [%4] \n\t" // V[w] "vld1.32 {d18 - d21}, [%5]! \n\t" // M[m0-m7] "vld1.32 {d22 - d25}, [%5] \n\t" // M[m8-m15] - + "vmul.f32 q13, q9, d0[0] \n\t" // DST->V = M[m0-m3] * V[x] "vmla.f32 q13, q10, d0[1] \n\t" // DST->V += M[m4-m7] * V[y] "vmla.f32 q13, q11, d1[0] \n\t" // DST->V += M[m8-m11] * V[z] "vmla.f32 q13, q12, d1[1] \n\t" // DST->V += M[m12-m15] * V[w] - + "vst1.32 {d26}, [%0]! \n\t" // DST->V[x, y] "vst1.32 {d27[0]}, [%0] \n\t" // DST->V[z] : @@ -230,12 +229,12 @@ inline void MathUtilNeon::transformVec4(const float* m, const float* v, float* d "vld1.32 {d0, d1}, [%1] \n\t" // V[x, y, z, w] "vld1.32 {d18 - d21}, [%2]! \n\t" // M[m0-m7] "vld1.32 {d22 - d25}, [%2] \n\t" // M[m8-m15] - + "vmul.f32 q13, q9, d0[0] \n\t" // DST->V = M[m0-m3] * V[x] "vmla.f32 q13, q10, d0[1] \n\t" // DST->V = M[m4-m7] * V[y] "vmla.f32 q13, q11, d1[0] \n\t" // DST->V = M[m8-m11] * V[z] "vmla.f32 q13, q12, d1[1] \n\t" // DST->V = M[m12-m15] * V[w] - + "vst1.32 {d26, d27}, [%0] \n\t" // DST->V : : "r"(dst), "r"(v), "r"(m) @@ -249,17 +248,17 @@ inline void MathUtilNeon::crossVec3(const float* v1, const float* v2, float* dst "vld1.32 {d1[1]}, [%1] \n\t" // "vld1.32 {d0}, [%2] \n\t" // "vmov.f32 s2, s1 \n\t" // q0 = (v1y, v1z, v1z, v1x) - + "vld1.32 {d2[1]}, [%3] \n\t" // "vld1.32 {d3}, [%4] \n\t" // "vmov.f32 s4, s7 \n\t" // q1 = (v2z, v2x, v2y, v2z) - + "vmul.f32 d4, d0, d2 \n\t" // x = v1y * v2z, y = v1z * v2x "vmls.f32 d4, d1, d3 \n\t" // x -= v1z * v2y, y-= v1x - v2z - + "vmul.f32 d5, d3, d1[1] \n\t" // z = v1x * v2y "vmls.f32 d5, d0, d2[1] \n\t" // z-= v1y * vx - + "vst1.32 {d4}, [%0]! \n\t" // V[x, y] "vst1.32 {d5[0]}, [%0] \n\t" // V[z] : @@ -268,4 +267,105 @@ inline void MathUtilNeon::crossVec3(const float* v1, const float* v2, float* dst ); } +inline void MathUtilNeon::transformVertices(ax::V3F_C4B_T2F* dst, const ax::V3F_C4B_T2F* src, size_t count, const ax::Mat4& transform) +{ + auto end = dst + count; + + // Load matrix + float32x4_t mc0 = vld1q_f32(transform.m); + float32x4_t mc1 = vld1q_f32(transform.m + 4); + float32x4_t mc2 = vld1q_f32(transform.m + 8); + float32x4_t mc3 = vld1q_f32(transform.m + 12); + + // Process 4 vertices at a time + auto end4 = dst + count / 4 * 4; + while (dst < end4) + { + // Load 4 vertices. Note that color will also get loaded into w + float32x2_t xy0 = vld1_f32(&src[0].vertices.x); + float32x2_t zw0 = vld1_f32(&src[0].vertices.z); + float32x2_t uv0 = vld1_f32(&src[0].texCoords.u); + float32x2_t xy1 = vld1_f32(&src[1].vertices.x); + float32x2_t zw1 = vld1_f32(&src[1].vertices.z); + float32x2_t uv1 = vld1_f32(&src[1].texCoords.u); + float32x2_t xy2 = vld1_f32(&src[2].vertices.x); + float32x2_t zw2 = vld1_f32(&src[2].vertices.z); + float32x2_t uv2 = vld1_f32(&src[2].texCoords.u); + float32x2_t xy3 = vld1_f32(&src[3].vertices.x); + float32x2_t zw3 = vld1_f32(&src[3].vertices.z); + float32x2_t uv3 = vld1_f32(&src[3].texCoords.u); + + // Multiply x by column 0 + float32x4_t r0 = vmulq_lane_f32(mc0, xy0, 0); + float32x4_t r1 = vmulq_lane_f32(mc0, xy1, 0); + float32x4_t r2 = vmulq_lane_f32(mc0, xy2, 0); + float32x4_t r3 = vmulq_lane_f32(mc0, xy3, 0); + + // Multiply y by column 1 and add to result + r0 = vmlaq_lane_f32(r0, mc1, xy0, 1); + r1 = vmlaq_lane_f32(r1, mc1, xy1, 1); + r2 = vmlaq_lane_f32(r2, mc1, xy2, 1); + r3 = vmlaq_lane_f32(r3, mc1, xy3, 1); + + // Multiply z by column 2 and add to result + r0 = vmlaq_lane_f32(r0, mc2, zw0, 0); + r1 = vmlaq_lane_f32(r1, mc2, zw1, 0); + r2 = vmlaq_lane_f32(r2, mc2, zw2, 0); + r3 = vmlaq_lane_f32(r3, mc2, zw3, 0); + + // Add column 3 + r0 = vaddq_f32(r0, mc3); + r1 = vaddq_f32(r1, mc3); + r2 = vaddq_f32(r2, mc3); + r3 = vaddq_f32(r3, mc3); + + // Set color + r0 = vsetq_lane_f32(vget_lane_f32(zw0, 1), r0, 3); + r1 = vsetq_lane_f32(vget_lane_f32(zw1, 1), r1, 3); + r2 = vsetq_lane_f32(vget_lane_f32(zw2, 1), r2, 3); + r3 = vsetq_lane_f32(vget_lane_f32(zw3, 1), r3, 3); + + // Store result + vst1q_f32(&dst[0].vertices.x, r0); + vst1_f32(&dst[0].texCoords.u, uv0); + vst1q_f32(&dst[1].vertices.x, r1); + vst1_f32(&dst[1].texCoords.u, uv1); + vst1q_f32(&dst[2].vertices.x, r2); + vst1_f32(&dst[2].texCoords.u, uv2); + vst1q_f32(&dst[3].vertices.x, r3); + vst1_f32(&dst[3].texCoords.u, uv3); + + dst += 4; + src += 4; + } + + // Process remaining vertices + while (dst < end) + { + // Load vertex + float32x2_t xy = vld1_f32(&src->vertices.x); + float32x2_t zw = vld1_f32(&src->vertices.z); + float32x2_t uv = vld1_f32(&src->texCoords.u); + + // Multiply x by column 0 + float32x4_t r = vmulq_lane_f32(mc0, xy, 0); + // Multiply y by column 1 and add to result + r = vmlaq_lane_f32(r, mc1, xy, 1); + // Multiply z by column 2 and add to result + r = vmlaq_lane_f32(r, mc2, zw, 0); + // Add column 3 + r = vaddq_f32(r, mc3); + + // Set color + r = vsetq_lane_f32(vget_lane_f32(zw, 1), r, 3); + + // Store result + vst1q_f32(&dst->vertices.x, r); + vst1_f32(&dst->texCoords.u, uv); + + ++dst; + ++src; + } +} + NS_AX_MATH_END diff --git a/core/math/MathUtilNeon64.inl b/core/math/MathUtilNeon64.inl index 3f683afcbd93..1bfb02759dc1 100644 --- a/core/math/MathUtilNeon64.inl +++ b/core/math/MathUtilNeon64.inl @@ -1,5 +1,6 @@ /** Copyright 2013 BlackBerry Inc. + Copyright (c) 2019-present Axmol Engine contributors (see AUTHORS.md). Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,30 +19,29 @@ This file was modified to fit the cocos2d-x project */ +#include +#include "base/Types.h" + NS_AX_MATH_BEGIN class MathUtilNeon64 { public: inline static void addMatrix(const float* m, float scalar, float* dst); - inline static void addMatrix(const float* m1, const float* m2, float* dst); - inline static void subtractMatrix(const float* m1, const float* m2, float* dst); - inline static void multiplyMatrix(const float* m, float scalar, float* dst); - inline static void multiplyMatrix(const float* m1, const float* m2, float* dst); - + inline static void negateMatrix(const float* m, float* dst); - inline static void transposeMatrix(const float* m, float* dst); - + inline static void transformVec4(const float* m, float x, float y, float z, float w, float* dst); - inline static void transformVec4(const float* m, const float* v, float* dst); - inline static void crossVec3(const float* v1, const float* v2, float* dst); + + inline static void transformVertices(V3F_C4B_T2F* dst, const V3F_C4B_T2F* src, size_t count, const Mat4& transform); + inline static void transformIndices(uint16_t* dst, const uint16_t* src, size_t count, uint16_t offset); }; inline void MathUtilNeon64::addMatrix(const float* m, float scalar, float* dst) @@ -54,7 +54,7 @@ inline void MathUtilNeon64::addMatrix(const float* m, float scalar, float* dst) "fadd v9.4s, v1.4s, v4.4s \n\t" // DST->M[m4-m7] = M[m4-m7] + s "fadd v10.4s, v2.4s, v4.4s \n\t" // DST->M[m8-m11] = M[m8-m11] + s "fadd v11.4s, v3.4s, v4.4s \n\t" // DST->M[m12-m15] = M[m12-m15] + s - + "st4 {v8.4s, v9.4s, v10.4s, v11.4s}, [%0] \n\t" // Result in V9 : : "r"(dst), "r"(m), "r"(&scalar) @@ -73,7 +73,7 @@ inline void MathUtilNeon64::addMatrix(const float* m1, const float* m2, float* d "fadd v14.4s, v2.4s, v10.4s \n\t" // DST->M[m8-m11] = M1[m8-m11] + M2[m8-m11] "fadd v15.4s, v3.4s, v11.4s \n\t" // DST->M[m12-m15] = M1[m12-m15] + M2[m12-m15] - "st4 {v12.4s, v13.4s, v14.4s, v15.4s}, [%0] \n\t" // DST->M[m0-m7] DST->M[m8-m15] + "st4 {v12.4s, v13.4s, v14.4s, v15.4s}, [%0] \n\t" // DST->M[m0-m7] DST->M[m8-m15] : : "r"(dst), "r"(m1), "r"(m2) : "v0", "v1", "v2", "v3", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "memory" @@ -84,7 +84,7 @@ inline void MathUtilNeon64::subtractMatrix(const float* m1, const float* m2, flo { asm volatile( "ld4 {v0.4s, v1.4s, v2.4s, v3.4s}, [%1] \n\t" // M1[m0-m7] M1[m8-m15] - "ld4 {v8.4s, v9.4s, v10.4s, v11.4s}, [%2] \n\t" // M2[m0-m7] M2[m8-m15] + "ld4 {v8.4s, v9.4s, v10.4s, v11.4s}, [%2] \n\t" // M2[m0-m7] M2[m8-m15] "fsub v12.4s, v0.4s, v8.4s \n\t" // DST->M[m0-m3] = M1[m0-m3] - M2[m0-m3] "fsub v13.4s, v1.4s, v9.4s \n\t" // DST->M[m4-m7] = M1[m4-m7] - M2[m4-m7] @@ -101,7 +101,7 @@ inline void MathUtilNeon64::subtractMatrix(const float* m1, const float* m2, flo inline void MathUtilNeon64::multiplyMatrix(const float* m, float scalar, float* dst) { asm volatile( - "ld1 {v0.s}[0], [%2] \n\t" //s + "ld1 {v0.s}[0], [%2] \n\t" //s "ld4 {v4.4s, v5.4s, v6.4s, v7.4s}, [%1] \n\t" //M[m0-m7] M[m8-m15] "fmul v8.4s, v4.4s, v0.s[0] \n\t" // DST->M[m0-m3] = M[m0-m3] * s @@ -171,8 +171,8 @@ inline void MathUtilNeon64::negateMatrix(const float* m, float* dst) inline void MathUtilNeon64::transposeMatrix(const float* m, float* dst) { asm volatile( - "ld4 {v0.4s, v1.4s, v2.4s, v3.4s}, [%1] \n\t" // DST->M[m0, m4, m8, m12] = M[m0-m3] - //DST->M[m1, m5, m9, m12] = M[m4-m7] + "ld4 {v0.4s, v1.4s, v2.4s, v3.4s}, [%1] \n\t" // DST->M[m0, m4, m8, m12] = M[m0-m3] + //DST->M[m1, m5, m9, m12] = M[m4-m7] "st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [%0] \n\t" : : "r"(dst), "r"(m) @@ -189,7 +189,7 @@ inline void MathUtilNeon64::transformVec4(const float* m, float x, float y, floa "ld1 {v0.s}[3], [%4] \n\t" // V[w] "ld1 {v9.4s, v10.4s, v11.4s, v12.4s}, [%5] \n\t" // M[m0-m7] M[m8-m15] - + "fmul v13.4s, v9.4s, v0.s[0] \n\t" // DST->V = M[m0-m3] * V[x] "fmla v13.4s, v10.4s, v0.s[1] \n\t" // DST->V += M[m4-m7] * V[y] "fmla v13.4s, v11.4s, v0.s[2] \n\t" // DST->V += M[m8-m11] * V[z] @@ -198,8 +198,8 @@ inline void MathUtilNeon64::transformVec4(const float* m, float x, float y, floa //"st1 {v13.4s}, [%0] \n\t" // DST->V[x, y] // DST->V[z] "st1 {v13.2s}, [%0], 8 \n\t" "st1 {v13.s}[2], [%0] \n\t" - : - : "r"(dst), "r"(&x), "r"(&y), "r"(&z), "r"(&w), "r"(m) + : "+r"(dst) + : "r"(&x), "r"(&y), "r"(&z), "r"(&w), "r"(m) : "v0", "v9", "v10","v11", "v12", "v13", "memory" ); } @@ -256,10 +256,143 @@ inline void MathUtilNeon64::crossVec3(const float* v1, const float* v2, float* d "st1 {v2.2s}, [%0], 8 \n\t" // V[x, y] "st1 {v2.s}[2], [%0] \n\t" // V[z] - : - : "r"(dst), "r"(v1), "r"((v1+1)), "r"(v2), "r"((v2+1)) + : "+r"(dst) + : "r"(v1), "r"((v1+1)), "r"(v2), "r"((v2+1)) : "v0", "v1", "v2", "memory" ); } +inline void MathUtilNeon64::transformVertices(V3F_C4B_T2F* dst, const V3F_C4B_T2F* src, size_t count, const Mat4& transform) +{ + auto end = dst + count; + + // Load matrix + float32x4x4_t m = vld1q_f32_x4(transform.m); + + // Process 4 vertices at a time if there's enough data + auto end4 = dst + count / 4 * 4; + while (dst < end4) + { + // Do this for each vertex + // dst->vertices.x = pos.x * m[0] + pos.y * m[4] + pos.z * m[8] + m[12]; + // dst->vertices.y = pos.x * m[1] + pos.y * m[5] + pos.z * m[9] + m[13]; + // dst->vertices.z = pos.x * m[2] + pos.y * m[6] + pos.z * m[10] + m[14]; + + // First, load each vertex, multiply x by column 0 and add to column 3 + // Note: since we're reading 4 floats it will load color bytes into v.w + float32x4_t v0 = vld1q_f32(&src[0].vertices.x); + float32x4_t r0 = vmlaq_laneq_f32(m.val[3], m.val[0], v0, 0); + float32x4_t v1 = vld1q_f32(&src[1].vertices.x); + float32x4_t r1 = vmlaq_laneq_f32(m.val[3], m.val[0], v1, 0); + float32x4_t v2 = vld1q_f32(&src[2].vertices.x); + float32x4_t r2 = vmlaq_laneq_f32(m.val[3], m.val[0], v2, 0); + float32x4_t v3 = vld1q_f32(&src[3].vertices.x); + float32x4_t r3 = vmlaq_laneq_f32(m.val[3], m.val[0], v3, 0); + + // Load texCoords + float32x2_t uv0 = vld1_f32(&src[0].texCoords.u); + float32x2_t uv1 = vld1_f32(&src[1].texCoords.u); + float32x2_t uv2 = vld1_f32(&src[2].texCoords.u); + float32x2_t uv3 = vld1_f32(&src[3].texCoords.u); + + // Multiply y by column 1 and add to result + r0 = vmlaq_laneq_f32(r0, m.val[1], v0, 1); + r1 = vmlaq_laneq_f32(r1, m.val[1], v1, 1); + r2 = vmlaq_laneq_f32(r2, m.val[1], v2, 1); + r3 = vmlaq_laneq_f32(r3, m.val[1], v3, 1); + + // Multiply z by column 2 and add to result + r0 = vmlaq_laneq_f32(r0, m.val[2], v0, 2); + r1 = vmlaq_laneq_f32(r1, m.val[2], v1, 2); + r2 = vmlaq_laneq_f32(r2, m.val[2], v2, 2); + r3 = vmlaq_laneq_f32(r3, m.val[2], v3, 2); + + // Set w to loaded color + r0 = vsetq_lane_f32(vgetq_lane_f32(v0, 3), r0, 3); + r1 = vsetq_lane_f32(vgetq_lane_f32(v1, 3), r1, 3); + r2 = vsetq_lane_f32(vgetq_lane_f32(v2, 3), r2, 3); + r3 = vsetq_lane_f32(vgetq_lane_f32(v3, 3), r3, 3); + + // Store result + vst1q_f32(&dst[0].vertices.x, r0); + vst1_f32(&dst[0].texCoords.u, uv0); + vst1q_f32(&dst[1].vertices.x, r1); + vst1_f32(&dst[1].texCoords.u, uv1); + vst1q_f32(&dst[2].vertices.x, r2); + vst1_f32(&dst[2].texCoords.u, uv2); + vst1q_f32(&dst[3].vertices.x, r3); + vst1_f32(&dst[3].texCoords.u, uv3); + + dst += 4; + src += 4; + } + + // Process remaining vertices one by one + while (dst < end) + { + float32x4_t v = vld1q_f32(&src->vertices.x); + float32x4_t r = vmlaq_laneq_f32(m.val[3], m.val[0], v, 0); + r = vmlaq_laneq_f32(r, m.val[1], v, 1); + r = vmlaq_laneq_f32(r, m.val[2], v, 2); + r = vsetq_lane_f32(vgetq_lane_f32(v, 3), r, 3); + float32x2_t uv = vld1_f32(&src->texCoords.u); + vst1q_f32(&dst->vertices.x, r); + vst1_f32(&dst->texCoords.u, uv); + + ++dst; + ++src; + } +} + +inline void MathUtilNeon64::transformIndices(uint16_t* dst, const uint16_t* src, size_t count, uint16_t offset) +{ + auto end = dst + count; + auto off = vdupq_n_u16(offset); + + if (count < 8) + goto LEFTOVER; + + // Process 32 indices at a time if there's enough data + while (count >= 32) + { + // Load 32 indices + uint16x8x4_t v = vld1q_u16_x4(src); + + // Add offset + v.val[0] = vaddq_u16(v.val[0], off); + v.val[1] = vaddq_u16(v.val[1], off); + v.val[2] = vaddq_u16(v.val[2], off); + v.val[3] = vaddq_u16(v.val[3], off); + + // Store result + vst1q_u16_x4(dst, v); + + dst += 32; + src += 32; + count -= 32; + } + + // Process 8 indices at a time if there's enough data + while (count >= 8) + { + uint16x8_t v = vld1q_u16(src); + v = vaddq_u16(v, off); + vst1q_u16(dst, v); + + dst += 8; + src += 8; + count -= 8; + } + +LEFTOVER: + // Process remaining indices one by one + while (count > 0) + { + *dst = *src + offset; + ++dst; + ++src; + --count; + } +} + NS_AX_MATH_END diff --git a/core/renderer/Renderer.cpp b/core/renderer/Renderer.cpp index d3b98ba15abe..544a5026a31f 100644 --- a/core/renderer/Renderer.cpp +++ b/core/renderer/Renderer.cpp @@ -589,23 +589,17 @@ void Renderer::setViewPort(int x, int y, unsigned int w, unsigned int h) void Renderer::fillVerticesAndIndices(const TrianglesCommand* cmd, unsigned int vertexBufferOffset) { - size_t vertexCount = cmd->getVertexCount(); - memcpy(&_verts[_filledVertex], cmd->getVertices(), sizeof(V3F_C4B_T2F) * vertexCount); - - // fill vertex, and convert them to world coordinates - const Mat4& modelView = cmd->getModelView(); - for (size_t i = 0; i < vertexCount; ++i) - { - modelView.transformPoint(&(_verts[i + _filledVertex].vertices)); - } - - // fill index - const unsigned short* indices = cmd->getIndices(); - size_t indexCount = cmd->getIndexCount(); - for (size_t i = 0; i < indexCount; ++i) - { - _indices[_filledIndex + i] = vertexBufferOffset + _filledVertex + indices[i]; - } + auto destVertices = &_verts[_filledVertex]; + auto srcVertices = cmd->getVertices(); + auto vertexCount = cmd->getVertexCount(); + auto&& modelView = cmd->getModelView(); + MathUtil::transformVertices(destVertices, srcVertices, vertexCount, modelView); + + auto destIndices = &_indices[_filledIndex]; + auto srcIndices = cmd->getIndices(); + auto indexCount = cmd->getIndexCount(); + auto offset = vertexBufferOffset + _filledVertex; + MathUtil::transformIndices(destIndices, srcIndices, indexCount, int(offset)); _filledVertex += vertexCount; _filledIndex += indexCount; diff --git a/tests/unit-tests/CMakeLists.txt b/tests/unit-tests/CMakeLists.txt index f673f2e7bc4f..062589bb0742 100644 --- a/tests/unit-tests/CMakeLists.txt +++ b/tests/unit-tests/CMakeLists.txt @@ -42,6 +42,7 @@ _1klink("${sample-assets_SOURCE_DIR}/unit-tests/Content" "${CMAKE_CURRENT_LIST_D set(GAME_SOURCE Source/AppDelegate.cpp Source/doctest.cpp + Source/TestUtils.cpp Source/core/base/MapTests.cpp Source/core/base/UTF8Tests.cpp @@ -74,13 +75,9 @@ elseif(WINDOWS) endif() if(ANDROID) - list(APPEND GAME_HEADER - Source/JNITest/JNITest.h - ) list(APPEND GAME_SOURCE - Source/JNITest/JNITest.cpp - proj.android/app/jni/main.cpp - ) + proj.android/app/jni/main.cpp + ) elseif(LINUX) list(APPEND GAME_SOURCE proj.linux/main.cpp diff --git a/tests/unit-tests/Source/TestUtils.cpp b/tests/unit-tests/Source/TestUtils.cpp new file mode 100644 index 000000000000..ca4d083d4908 --- /dev/null +++ b/tests/unit-tests/Source/TestUtils.cpp @@ -0,0 +1,45 @@ +#include +#include "base/Types.h" +#include "TestUtils.h" + +NS_AX_BEGIN + + +doctest::String toString(const Vec2& value) { + std::string s; + s.append("("); + s.append(std::to_string(value.u)); + s.append(", "); + s.append(std::to_string(value.v)); + s.append(")"); + return s.c_str(); +} + +doctest::String toString(const Vec3& value) { + std::string s; + s.append("("); + s.append(std::to_string(value.x)); + s.append(", "); + s.append(std::to_string(value.y)); + s.append(", "); + s.append(std::to_string(value.z)); + s.append(")"); + return s.c_str(); +} + +doctest::String toString(const Color4B& value) { + std::string s; + s.append("("); + s.append(std::to_string(value.r)); + s.append(", "); + s.append(std::to_string(value.g)); + s.append(", "); + s.append(std::to_string(value.b)); + s.append(", "); + s.append(std::to_string(value.a)); + s.append(")"); + return s.c_str(); +} + + +NS_AX_END diff --git a/tests/unit-tests/Source/TestUtils.h b/tests/unit-tests/Source/TestUtils.h index 1631b13534e5..a6bf0af45474 100644 --- a/tests/unit-tests/Source/TestUtils.h +++ b/tests/unit-tests/Source/TestUtils.h @@ -39,3 +39,10 @@ class AsyncRunner { return f.get(); } }; + + +namespace ax { + doctest::String toString(const Color4B& value); + doctest::String toString(const Vec2& value); + doctest::String toString(const Vec3& value); +} diff --git a/tests/unit-tests/Source/core/math/MathUtilTests.cpp b/tests/unit-tests/Source/core/math/MathUtilTests.cpp index c59a8c15264e..4c5e8523b41a 100644 --- a/tests/unit-tests/Source/core/math/MathUtilTests.cpp +++ b/tests/unit-tests/Source/core/math/MathUtilTests.cpp @@ -25,25 +25,29 @@ #include #include "base/Config.h" +#include "base/Types.h" +#include "TestUtils.h" #if (AX_TARGET_PLATFORM == AX_PLATFORM_IOS) #if defined(__arm64__) - #define USE_NEON64 - #define INCLUDE_NEON64 + #define USE_NEON64 1 + #define INCLUDE_NEON64 1 #elif defined(__ARM_NEON__) - #define USE_NEON32 - #define INCLUDE_NEON32 + #define USE_NEON32 1 + #define INCLUDE_NEON32 1 + #endif +#elif (AX_TARGET_PLATFORM == AX_PLATFORM_OSX) + #if defined(__arm64__) || defined(__aarch64__) + #define USE_NEON64 1 + #define INCLUDE_NEON64 1 #endif #elif (AX_TARGET_PLATFORM == AX_PLATFORM_ANDROID) #if defined(__arm64__) || defined(__aarch64__) - #define USE_NEON64 - #define INCLUDE_NEON64 + #define USE_NEON64 1 + #define INCLUDE_NEON64 1 #elif defined(__ARM_NEON__) - #define INCLUDE_NEON32 + #define INCLUDE_NEON32 1 #endif -#elif defined(AX_USE_SSE) - #define USE_SSE - #define INCLUDE_SSE #endif #if defined(USE_NEON32) || defined(USE_NEON64) // || defined(USE_SSE) @@ -84,6 +88,96 @@ static void __checkMathUtilResult(std::string_view description, const float* a1, } +TEST_SUITE("math/MathUtil") { + using namespace UnitTest::ax; + + + static void checkVerticesAreEqual(const V3F_C4B_T2F* v1, const V3F_C4B_T2F* v2, size_t count) + { + for (size_t i = 0; i < count; ++i) + { + CHECK_EQ(v1[i].vertices, v2[i].vertices); + CHECK_EQ(v1[i].colors, v2[i].colors); + CHECK_EQ(v1[i].texCoords, v2[i].texCoords); + } + } + + + TEST_CASE("transformVertices") { + auto count = 5; + std::vector src(count); + std::vector expected(count); + std::vector dst(count); + + for (int i = 0; i < count; ++i) { + src[i].vertices.set(float(i), float(i + 1), float(i + 2)); + src[i].colors.set(uint8_t(i + 3), uint8_t(i + 4), uint8_t(i + 5), uint8_t(i + 6)); + src[i].texCoords.set(float(i + 7), float(i + 8)); + + expected[i] = src[i]; + expected[i].vertices.x = src[i].vertices.y * 4; + expected[i].vertices.y = src[i].vertices.x * -5; + expected[i].vertices.z = src[i].vertices.z * 6; + } + + Mat4 transform( + 0, 4, 0, 0, + -5, 0, 0, 0, + 0, 0, 6, 0, + 1, 2, 3, 1 + ); + + SUBCASE("MathUtilC") { + MathUtilC::transformVertices(dst.data(), src.data(), count, transform); + checkVerticesAreEqual(expected.data(), dst.data(), count); + } + + #if INCLUDE_NEON32 + SUBCASE("MathUtilNeon") { + MathUtilNeon::transformVertices(dst.data(), src.data(), count, transform); + checkVerticesAreEqual(expected.data(), dst.data(), count); + } + #endif + + #if INCLUDE_NEON64 + SUBCASE("MathUtilNeon64") { + MathUtilNeon64::transformVertices(dst.data(), src.data(), count, transform); + checkVerticesAreEqual(expected.data(), dst.data(), count); + } + #endif + } + + TEST_CASE("transformIndices") { + auto count = 43; + std::vector src(count); + std::vector expected(count); + + for (int i = 0; i < count; ++i) { + src[i] = i; + expected[i] = i + 5; + } + + uint16_t offset = 5; + + SUBCASE("MathUtilC") { + std::vector dst(count); + MathUtilC::transformIndices(dst.data(), src.data(), count, offset); + for (int i = 0; i < count; ++i) + CHECK_EQ(expected[i], dst[i]); + } + + #if INCLUDE_NEON64 + SUBCASE("MathUtilNeon64") { + std::vector dst(count); + MathUtilNeon64::transformIndices(dst.data(), src.data(), count, offset); + for (int i = 0; i < count; ++i) + CHECK_EQ(expected[i], dst[i]); + } + #endif + } +} + + TEST_SUITE("math/MathUtil" * SKIP_SIMD_TEST) { TEST_CASE("old_tests") { // I know the next line looks ugly, but it's a way to test MathUtil. :) diff --git a/tests/unit-tests/proj.android/app/build.gradle b/tests/unit-tests/proj.android/app/build.gradle index 97766c5f9b6b..9086c18ad1bd 100644 --- a/tests/unit-tests/proj.android/app/build.gradle +++ b/tests/unit-tests/proj.android/app/build.gradle @@ -70,6 +70,8 @@ android { proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro' if (project.hasProperty("KEY_STORE_FILE")) { signingConfig signingConfigs.release + } else { + signingConfig signingConfigs.debug } }