From 1ab1c3de35445f7256c790d5a9725a71e60dd6d2 Mon Sep 17 00:00:00 2001 From: mateusfavarin Date: Sat, 27 Sep 2025 19:40:22 -0700 Subject: [PATCH 1/3] fix critical bug in GTE macros, refactor test functions, add COLL_TestLeaf_Quadblocks --- include/ctr/coll.h | 5 +- include/ctr/gte.h | 2 +- include/ctr/nd.h | 7 +- include/ctr/nugget/inline_n.h | 1517 +++++++++++++++++++++++++++++++++ include/ctr/test.h | 15 +- rewrite/src/exe/coll.c | 125 ++- rewrite/src/tests/test.c | 23 +- rewrite/src/tests/test_coll.c | 66 +- rewrite/src/tests/test_math.c | 32 +- rewrite/src/tests/test_rng.c | 20 +- symbols/gcc-syms-rewrite.txt | 2 +- 11 files changed, 1722 insertions(+), 92 deletions(-) create mode 100644 include/ctr/nugget/inline_n.h diff --git a/include/ctr/coll.h b/include/ctr/coll.h index ea1ac3fe1..c2f6b5415 100644 --- a/include/ctr/coll.h +++ b/include/ctr/coll.h @@ -105,9 +105,6 @@ typedef struct CollDCache #define DCACHE_COLL (*(CollDCache*) 0x1f800108) -void COLL_ProjectPointToEdge(SVec3* out, const SVec3* v1, const SVec3* v2, const SVec3* point); -void COLL_LoadQuadblockData_LowLOD(CollDCache* cache, Quadblock* quadblock); -void COLL_LoadQuadblockData_HighLOD(CollDCache* cache, Quadblock* quadblock); -void COLL_TestTriangle(CollDCache* cache, const CollVertex* v1, const CollVertex* v2, const CollVertex* v3); +void COLL_TestLeaf_Quadblock(const Quadblock* quadblock, CollDCache* cache); extern u32 e_ignoreCollisionDoorFlagTerrain; // 0x8008d728 \ No newline at end of file diff --git a/include/ctr/gte.h b/include/ctr/gte.h index 7d138f3fd..fd0e36269 100644 --- a/include/ctr/gte.h +++ b/include/ctr/gte.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include typedef enum GTE_ROW_INDEX { diff --git a/include/ctr/nd.h b/include/ctr/nd.h index 513741644..45b9b5b92 100644 --- a/include/ctr/nd.h +++ b/include/ctr/nd.h @@ -30,9 +30,10 @@ u32 ND_RNG_Random(RNGSeed* seed); /* COLL */ void ND_COLL_ProjectPointToEdge(SVec3* out, const SVec3* v1, const SVec3* v2, const SVec3* point); -void ND_COLL_LoadQuadblockData_LowLOD(CollDCache* cache, Quadblock* quadblock); -void ND_COLL_LoadQuadblockData_HighLOD(CollDCache* cache, Quadblock* quadblock); +void ND_COLL_LoadQuadblockData_LowLOD(CollDCache* cache, const Quadblock* quadblock); +void ND_COLL_LoadQuadblockData_HighLOD(CollDCache* cache, const Quadblock* quadblock); void ND_COLL_CalculateTrianglePlane(const CollDCache* cache, CollVertex* v1, const CollVertex* v2, const CollVertex* v3); void ND_COLL_LoadVerticeData(CollDCache* cache); s32 ND_COLL_BarycentricTest(TestVertex* t, const CollVertex* v1, const CollVertex* v2, const CollVertex* v3); -void ND_COLL_TestTriangle(CollDCache* cache, const CollVertex* v1, const CollVertex* v2, const CollVertex* v3); \ No newline at end of file +void ND_COLL_TestTriangle(CollDCache* cache, const CollVertex* v1, const CollVertex* v2, const CollVertex* v3); +void ND_COLL_TestLeaf_Quadblock(const Quadblock* quadblock, CollDCache* cache); \ No newline at end of file diff --git a/include/ctr/nugget/inline_n.h b/include/ctr/nugget/inline_n.h new file mode 100644 index 000000000..7ff054a37 --- /dev/null +++ b/include/ctr/nugget/inline_n.h @@ -0,0 +1,1517 @@ +/* + * GTE Macro definitions - special version for Nugget (NO DMPSX) + */ + +/* + * Type 1 functions + */ + +#define gte_ldv0(r0) \ + __asm__ volatile( \ + "lwc2 $0, 0( %0 );" \ + "lwc2 $1, 4( %0 )" \ + : \ + : "r"(r0)) + +#define gte_ldv1(r0) \ + __asm__ volatile( \ + "lwc2 $2, 0( %0 );" \ + "lwc2 $3, 4( %0 )" \ + : \ + : "r"(r0)) + +#define gte_ldv2(r0) \ + __asm__ volatile( \ + "lwc2 $4, 0( %0 );" \ + "lwc2 $5, 4( %0 )" \ + : \ + : "r"(r0)) + +#define gte_ldv3(r0, r1, r2) \ + __asm__ volatile( \ + "lwc2 $0, 0( %0 );" \ + "lwc2 $1, 4( %0 );" \ + "lwc2 $2, 0( %1 );" \ + "lwc2 $3, 4( %1 );" \ + "lwc2 $4, 0( %2 );" \ + "lwc2 $5, 4( %2 )" \ + : \ + : "r"(r0), "r"(r1), "r"(r2)) + +#define gte_ldv3c(r0) \ + __asm__ volatile( \ + "lwc2 $0, 0( %0 );" \ + "lwc2 $1, 4( %0 );" \ + "lwc2 $2, 8( %0 );" \ + "lwc2 $3, 12( %0 );" \ + "lwc2 $4, 16( %0 );" \ + "lwc2 $5, 20( %0 )" \ + : \ + : "r"(r0)) + +#define gte_ldv3c_vertc(r0) \ + __asm__ volatile( \ + "lwc2 $0, 0( %0 );" \ + "lwc2 $1, 4( %0 );" \ + "lwc2 $2, 12( %0 );" \ + "lwc2 $3, 16( %0 );" \ + "lwc2 $4, 24( %0 );" \ + "lwc2 $5, 28( %0 )" \ + : \ + : "r"(r0)) + +#define gte_ldv01(r0, r1) \ + __asm__ volatile( \ + "lwc2 $0, 0( %0 );" \ + "lwc2 $1, 4( %0 );" \ + "lwc2 $2, 0( %1 );" \ + "lwc2 $3, 4( %1 )" \ + : \ + : "r"(r0), "r"(r1)) + +#define gte_ldv01c(r0) \ + __asm__ volatile( \ + "lwc2 $0, 0( %0 );" \ + "lwc2 $1, 4( %0 );" \ + "lwc2 $2, 8( %0 );" \ + "lwc2 $3, 12( %0 )" \ + : \ + : "r"(r0)) + +#define gte_ldrgb(r0) __asm__ volatile("lwc2 $6, 0( %0 )" : : "r"(r0)) + +#define gte_ldrgb3(r0, r1, r2) \ + __asm__ volatile( \ + "lwc2 $20, 0( %0 );" \ + "lwc2 $21, 0( %1 );" \ + "lwc2 $22, 0( %2 );" \ + "lwc2 $6, 0( %2 )" \ + : \ + : "r"(r0), "r"(r1), "r"(r2)) + +#define gte_ldrgb3c(r0) \ + __asm__ volatile( \ + "lwc2 $20, 0( %0 );" \ + "lwc2 $21, 4( %0 );" \ + "lwc2 $22, 8( %0 );" \ + "lwc2 $6, 8( %0 )" \ + : \ + : "r"(r0)) + +#define gte_ldlv0(r0) \ + __asm__ volatile( \ + "lhu $13, 4( %0 );" \ + "lhu $12, 0( %0 );" \ + "sll $13, $13, 16;" \ + "or $12, $12, $13;" \ + "mtc2 $12, $0;" \ + "lwc2 $1, 8( %0 )" \ + : \ + : "r"(r0) \ + : "$12", "$13") + +#define gte_ldlvl(r0) \ + __asm__ volatile( \ + "lwc2 $9, 0( %0 );" \ + "lwc2 $10, 4( %0 );" \ + "lwc2 $11, 8( %0 )" \ + : \ + : "r"(r0)) + +#define gte_ldsv(r0) \ + __asm__ volatile( \ + "lhu $12, 0( %0 );" \ + "lhu $13, 2( %0 );" \ + "lhu $14, 4( %0 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + "mtc2 $14, $11" \ + : \ + : "r"(r0) \ + : "$12", "$13", "$14") + +#define gte_ldbv(r0) \ + __asm__ volatile( \ + "lbu $12, 0( %0 );" \ + "lbu $13, 1( %0 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10" \ + : \ + : "r"(r0) \ + : "$12", "$13") + +#define gte_ldcv(r0) \ + __asm__ volatile( \ + "lbu $12, 0( %0 );" \ + "lbu $13, 1( %0 );" \ + "lbu $14, 2( %0 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + "mtc2 $14, $11" \ + : \ + : "r"(r0) \ + : "$12", "$13", "$14") + +#define gte_ldclmv(r0) \ + __asm__ volatile( \ + "lhu $12, 0( %0 );" \ + "lhu $13, 6( %0 );" \ + "lhu $14, 12( %0 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + "mtc2 $14, $11" \ + : \ + : "r"(r0) \ + : "$12", "$13", "$14") + +#define gte_lddp(r0) __asm__ volatile("mtc2 %0, $8" : : "r"(r0)) + +#define gte_ldsxy0(r0) __asm__ volatile("mtc2 %0, $12" : : "r"(r0)) + +#define gte_ldsxy1(r0) __asm__ volatile("mtc2 %0, $13" : : "r"(r0)) + +#define gte_ldsxy2(r0) __asm__ volatile("mtc2 %0, $14" : : "r"(r0)) + +#define gte_ldsxy3(r0, r1, r2) \ + __asm__ volatile( \ + "mtc2 %0, $12;" \ + "mtc2 %2, $14;" \ + "mtc2 %1, $13" \ + : \ + : "r"(r0), "r"(r1), "r"(r2)) + +#define gte_ldsxy3c(r0) \ + __asm__ volatile( \ + "lwc2 $12, 0( %0 );" \ + "lwc2 $13, 4( %0 );" \ + "lwc2 $14, 8( %0 )" \ + : \ + : "r"(r0)) + +#define gte_ldsz3(r0, r1, r2) \ + __asm__ volatile( \ + "mtc2 %0, $17;" \ + "mtc2 %1, $18;" \ + "mtc2 %2, $19" \ + : \ + : "r"(r0), "r"(r1), "r"(r2)) + +#define gte_ldsz4(r0, r1, r2, r3) \ + __asm__ volatile( \ + "mtc2 %0, $16;" \ + "mtc2 %1, $17;" \ + "mtc2 %2, $18;" \ + "mtc2 %3, $19" \ + : \ + : "r"(r0), "r"(r1), "r"(r2), "r"(r3)) + +#define gte_ldopv1(r0) \ + __asm__ volatile( \ + "lw $12, 0( %0 );" \ + "lw $13, 4( %0 );" \ + "ctc2 $12, $0;" \ + "lw $14, 8( %0 );" \ + "ctc2 $13, $2;" \ + "ctc2 $14, $4" \ + : \ + : "r"(r0) \ + : "$12", "$13", "$14") + +#define gte_ldopv2(r0) \ + __asm__ volatile( \ + "lwc2 $11, 8( %0 );" \ + "lwc2 $9, 0( %0 );" \ + "lwc2 $10, 4( %0 )" \ + : \ + : "r"(r0)) + +#define gte_ldlzc(r0) __asm__ volatile("mtc2 %0, $30" : : "r"(r0)) + +#define gte_SetRGBcd(r0) __asm__ volatile("lwc2 $6, 0( %0 )" : : "r"(r0)) + +#define gte_ldbkdir(r0, r1, r2) \ + __asm__ volatile( \ + "ctc2 %0, $13;" \ + "ctc2 %1, $14;" \ + "ctc2 %2, $15" \ + : \ + : "r"(r0), "r"(r1), "r"(r2)) + +#define gte_SetBackColor(r0, r1, r2) \ + __asm__ volatile( \ + "sll $12, %0, 4;" \ + "sll $13, %1, 4;" \ + "sll $14, %2, 4;" \ + "ctc2 $12, $13;" \ + "ctc2 $13, $14;" \ + "ctc2 $14, $15" \ + : \ + : "r"(r0), "r"(r1), "r"(r2) \ + : "$12", "$13", "$14") + +#define gte_ldfcdir(r0, r1, r2) \ + __asm__ volatile( \ + "ctc2 %0, $21;" \ + "ctc2 %1, $22;" \ + "ctc2 %2, $23" \ + : \ + : "r"(r0), "r"(r1), "r"(r2)) + +#define gte_SetFarColor(r0, r1, r2) \ + __asm__ volatile( \ + "sll $12, %0, 4;" \ + "sll $13, %1, 4;" \ + "sll $14, %2, 4;" \ + "ctc2 $12, $21;" \ + "ctc2 $13, $22;" \ + "ctc2 $14, $23" \ + : \ + : "r"(r0), "r"(r1), "r"(r2) \ + : "$12", "$13", "$14") + +#define gte_SetGeomOffset(r0, r1) \ + __asm__ volatile( \ + "sll $12, %0, 16;" \ + "sll $13, %1, 16;" \ + "ctc2 $12, $24;" \ + "ctc2 $13, $25" \ + : \ + : "r"(r0), "r"(r1) \ + : "$12", "$13") + +#define gte_SetGeomScreen(r0) __asm__ volatile("ctc2 %0, $26" : : "r"(r0)) + +#define gte_ldsvrtrow0(r0) \ + __asm__ volatile( \ + "lw $12, 0( %0 );" \ + "lw $13, 4( %0 );" \ + "ctc2 $12, $0;" \ + "ctc2 $13, $1" \ + : \ + : "r"(r0) \ + : "$12", "$13") + +#define gte_SetRotMatrix(r0) \ + __asm__ volatile( \ + "lw $12, 0( %0 );" \ + "lw $13, 4( %0 );" \ + "ctc2 $12, $0;" \ + "ctc2 $13, $1;" \ + "lw $12, 8( %0 );" \ + "lw $13, 12( %0 );" \ + "lw $14, 16( %0 );" \ + "ctc2 $12, $2;" \ + "ctc2 $13, $3;" \ + "ctc2 $14, $4" \ + : \ + : "r"(r0) \ + : "$12", "$13", "$14") + +#define gte_ldsvllrow0(r0) \ + __asm__ volatile( \ + "lw $12, 0( %0 );" \ + "lw $13, 4( %0 );" \ + "ctc2 $12, $8;" \ + "ctc2 $13, $9" \ + : \ + : "r"(r0) \ + : "$12", "$13") + +#define gte_SetLightMatrix(r0) \ + __asm__ volatile( \ + "lw $12, 0( %0 );" \ + "lw $13, 4( %0 );" \ + "ctc2 $12, $8;" \ + "ctc2 $13, $9;" \ + "lw $12, 8( %0 );" \ + "lw $13, 12( %0 );" \ + "lw $14, 16( %0 );" \ + "ctc2 $12, $10;" \ + "ctc2 $13, $11;" \ + "ctc2 $14, $12" \ + : \ + : "r"(r0) \ + : "$12", "$13", "$14") + +#define gte_ldsvlcrow0(r0) \ + __asm__ volatile( \ + "lw $12, 0( %0 );" \ + "lw $13, 4( %0 );" \ + "ctc2 $12, $16;" \ + "ctc2 $13, $17" \ + : \ + : "r"(r0) \ + : "$12", "$13") + +#define gte_SetColorMatrix(r0) \ + __asm__ volatile( \ + "lw $12, 0( %0 );" \ + "lw $13, 4( %0 );" \ + "ctc2 $12, $16;" \ + "ctc2 $13, $17;" \ + "lw $12, 8( %0 );" \ + "lw $13, 12( %0 );" \ + "lw $14, 16( %0 );" \ + "ctc2 $12, $18;" \ + "ctc2 $13, $19;" \ + "ctc2 $14, $20" \ + : \ + : "r"(r0) \ + : "$12", "$13", "$14") + +#define gte_SetTransMatrix(r0) \ + __asm__ volatile( \ + "lw $12, 20( %0 );" \ + "lw $13, 24( %0 );" \ + "ctc2 $12, $5;" \ + "lw $14, 28( %0 );" \ + "ctc2 $13, $6;" \ + "ctc2 $14, $7" \ + : \ + : "r"(r0) \ + : "$12", "$13", "$14") + +#define gte_ldtr(r0, r1, r2) \ + __asm__ volatile( \ + "ctc2 %0, $5;" \ + "ctc2 %1, $6;" \ + "ctc2 %2, $7" \ + : \ + : "r"(r0), "r"(r1), "r"(r2)) + +#define gte_SetTransVector(r0) \ + __asm__ volatile( \ + "lw $12, 0( %0 );" \ + "lw $13, 4( %0 );" \ + "lw $14, 8( %0 );" \ + "ctc2 $12, $5;" \ + "ctc2 $13, $6;" \ + "ctc2 $14, $7" \ + : \ + : "r"(r0) \ + : "$12", "$13", "$14") + +#define gte_ld_intpol_uv0(r0) \ + __asm__ volatile( \ + "lbu $12, 0( %0 );" \ + "lbu $13, 1( %0 );" \ + "ctc2 $12, $21;" \ + "ctc2 $13, $22" \ + : \ + : "r"(r0) \ + : "$12", "$13") + +#define gte_ld_intpol_uv1(r0) \ + __asm__ volatile( \ + "lbu $12, 0( %0 );" \ + "lbu $13, 1( %0 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10" \ + : \ + : "r"(r0) \ + : "$12", "$13") + +#define gte_ld_intpol_bv0(r0) \ + __asm__ volatile( \ + "lbu $12, 0( %0 );" \ + "lbu $13, 1( %0 );" \ + "ctc2 $12, $21;" \ + "ctc2 $13, $22" \ + : \ + : "r"(r0) \ + : "$12", "$13") + +#define gte_ld_intpol_bv1(r0) \ + __asm__ volatile( \ + "lbu $12, 0( %0 );" \ + "lbu $13, 1( %0 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10" \ + : \ + : "r"(r0) \ + : "$12", "$13") + +#define gte_ld_intpol_sv0(r0) \ + __asm__ volatile( \ + "lh $12, 0( %0 );" \ + "lh $13, 2( %0 );" \ + "lh $14, 4( %0 );" \ + "ctc2 $12, $21;" \ + "ctc2 $13, $22;" \ + "ctc2 $14, $23" \ + : \ + : "r"(r0) \ + : "$12", "$13", "$14") + +#define gte_ld_intpol_sv1(r0) \ + __asm__ volatile( \ + "lh $12, 0( %0 );" \ + "lh $13, 2( %0 );" \ + "lh $14, 4( %0 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + "mtc2 $14, $11" \ + : \ + : "r"(r0) \ + : "$12", "$13", "$14") + +#define gte_ldfc(r0) \ + __asm__ volatile( \ + "lw $12, 0( %0 );" \ + "lw $13, 4( %0 );" \ + "lw $14, 8( %0 );" \ + "ctc2 $12, $21;" \ + "ctc2 $13, $22;" \ + "ctc2 $14, $23" \ + : \ + : "r"(r0) \ + : "$12", "$13", "$14") + +#define gte_ldopv2SV(r0) \ + __asm__ volatile( \ + "lh $12, 0( %0 );" \ + "lh $13, 2( %0 );" \ + "lh $14, 4( %0 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + "mtc2 $14, $11" \ + : \ + : "r"(r0) \ + : "$12", "$13", "$14") + +#define gte_ldopv1SV(r0) \ + __asm__ volatile( \ + "lh $12, 0( %0 );" \ + "lh $13, 2( %0 );" \ + "ctc2 $12, $0;" \ + "lh $14, 4( %0 );" \ + "ctc2 $13, $2;" \ + "ctc2 $14, $4" \ + : \ + : "r"(r0) \ + : "$12", "$13", "$14") + +/* + * Type 2 functions + */ + +#define gte_rtps() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0180001;") + +#define gte_rtpt() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0280030;") + +#define gte_rt() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0480012;") + +#define gte_rtv0() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0486012;") + +#define gte_rtv1() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x048E012;") + +#define gte_rtv2() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0496012;") + +#define gte_rtir() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x049E012;") + +#define gte_rtir_sf0() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x041E012;") + +#define gte_rtv0tr() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0480012;") + +#define gte_rtv1tr() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0488012;") + +#define gte_rtv2tr() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0490012;") + +#define gte_rtirtr() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0498012;") + +#define gte_rtv0bk() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0482012;") + +#define gte_rtv1bk() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x048A012;") + +#define gte_rtv2bk() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0492012;") + +#define gte_rtirbk() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x049A012;") + +#define gte_ll() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04A6412;") + +#define gte_llv0() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04A6012;") + +#define gte_llv1() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04AE012;") + +#define gte_llv2() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04B6012;") + +#define gte_llir() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04BE012;") + +#define gte_llv0tr() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04A0012;") + +#define gte_llv1tr() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04A8012;") + +#define gte_llv2tr() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04B0012;") + +#define gte_llirtr() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04B8012;") + +#define gte_llv0bk() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04A2012;") + +#define gte_llv1bk() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04AA012;") + +#define gte_llv2bk() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04B2012;") + +#define gte_llirbk() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04BA012;") + +#define gte_lc() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04DA412;") + +#define gte_lcv0() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04C6012;") + +#define gte_lcv1() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04CE012;") + +#define gte_lcv2() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04D6012;") + +#define gte_lcir() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04DE012;") + +#define gte_lcv0tr() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04C0012;") + +#define gte_lcv1tr() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04C8012;") + +#define gte_lcv2tr() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04D0012;") + +#define gte_lcirtr() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04D8012;") + +#define gte_lcv0bk() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04C2012;") + +#define gte_lcv1bk() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04CA012;") + +#define gte_lcv2bk() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04D2012;") + +#define gte_lcirbk() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x04DA012;") + +#define gte_dpcl() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0680029;") + +#define gte_dpcs() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0780010;") + +#define gte_dpct() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0F8002A;") + +#define gte_intpl() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0980011;") + +#define gte_sqr12() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0A80428;") + +#define gte_sqr0() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0A00428;") + +#define gte_ncs() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0C8041E;") + +#define gte_nct() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0D80420;") + +#define gte_ncds() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0E80413;") + +#define gte_ncdt() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0F80416;") + +#define gte_nccs() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0108041B;") + +#define gte_ncct() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0118043F;") + +#define gte_cdp() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x01280414;") + +#define gte_cc() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0138041C;") + +#define gte_nclip() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x01400006;") + +#define gte_avsz3() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0158002D;") + +#define gte_avsz4() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0168002E;") + +#define gte_op12() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0178000C;") + +#define gte_op0() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0170000C;") + +#define gte_gpf12() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0198003D;") + +#define gte_gpf0() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x0190003D;") + +#define gte_gpl12() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x01A8003E;") + +#define gte_gpl0() \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 0x01A0003E0") + +#define gte_mvmva_core(r0) \ + __asm__ volatile( \ + "nop;" \ + "nop;" \ + "cop2 %0" \ + : \ + : "g"(r0)) + +#define gte_mvmva(sf, mx, v, cv, lm) \ + gte_mvmva_core(0x0400012 | ((sf) << 19) | ((mx) << 17) | ((v) << 15) | ((cv) << 13) | ((lm) << 10)) + +/* + * Type 2 functions without nop + */ + +#define gte_rtps_b() __asm__ volatile("cop2 0x0180001;") +#define gte_rtpt_b() __asm__ volatile("cop2 0x0280030;") +#define gte_rt_b() __asm__ volatile("cop2 0x0480012;") +#define gte_rtv0_b() __asm__ volatile("cop2 0x0486012;") +#define gte_rtv1_b() __asm__ volatile("cop2 0x048E012;") +#define gte_rtv2_b() __asm__ volatile("cop2 0x0496012;") +#define gte_rtir_b() __asm__ volatile("cop2 0x049E012;") +#define gte_rtir_sf0_b() __asm__ volatile("cop2 0x041E012;") +#define gte_rtv0tr_b() __asm__ volatile("cop2 0x0480012;") +#define gte_rtv1tr_b() __asm__ volatile("cop2 0x0488012;") +#define gte_rtv2tr_b() __asm__ volatile("cop2 0x0490012;") +#define gte_rtirtr_b() __asm__ volatile("cop2 0x0498012;") +#define gte_rtv0bk_b() __asm__ volatile("cop2 0x0482012;") +#define gte_rtv1bk_b() __asm__ volatile("cop2 0x048A012;") +#define gte_rtv2bk_b() __asm__ volatile("cop2 0x0492012;") +#define gte_rtirbk_b() __asm__ volatile("cop2 0x049A012;") +#define gte_ll_b() __asm__ volatile("cop2 0x04A6412;") +#define gte_llv0_b() __asm__ volatile("cop2 0x04A6012;") +#define gte_llv1_b() __asm__ volatile("cop2 0x04AE012;") +#define gte_llv2_b() __asm__ volatile("cop2 0x04B6012;") +#define gte_llir_b() __asm__ volatile("cop2 0x04BE012;") +#define gte_llv0tr_b() __asm__ volatile("cop2 0x04A0012;") +#define gte_llv1tr_b() __asm__ volatile("cop2 0x04A8012;") +#define gte_llv2tr_b() __asm__ volatile("cop2 0x04B0012;") +#define gte_llirtr_b() __asm__ volatile("cop2 0x04B8012;") +#define gte_llv0bk_b() __asm__ volatile("cop2 0x04A2012;") +#define gte_llv1bk_b() __asm__ volatile("cop2 0x04AA012;") +#define gte_llv2bk_b() __asm__ volatile("cop2 0x04B2012;") +#define gte_llirbk_b() __asm__ volatile("cop2 0x04BA012;") +#define gte_lc_b() __asm__ volatile("cop2 0x04DA412;") +#define gte_lcv0_b() __asm__ volatile("cop2 0x04C6012;") +#define gte_lcv1_b() __asm__ volatile("cop2 0x04CE012;") +#define gte_lcv2_b() __asm__ volatile("cop2 0x04D6012;") +#define gte_lcir_b() __asm__ volatile("cop2 0x04DE012;") +#define gte_lcv0tr_b() __asm__ volatile("cop2 0x04C0012;") +#define gte_lcv1tr_b() __asm__ volatile("cop2 0x04C8012;") +#define gte_lcv2tr_b() __asm__ volatile("cop2 0x04D0012;") +#define gte_lcirtr_b() __asm__ volatile("cop2 0x04D8012;") +#define gte_lcv0bk_b() __asm__ volatile("cop2 0x04C2012;") +#define gte_lcv1bk_b() __asm__ volatile("cop2 0x04CA012;") +#define gte_lcv2bk_b() __asm__ volatile("cop2 0x04D2012;") +#define gte_lcirbk_b() __asm__ volatile("cop2 0x04DA012;") +#define gte_dpcl_b() __asm__ volatile("cop2 0x0680029;") +#define gte_dpcs_b() __asm__ volatile("cop2 0x0780010;") +#define gte_dpct_b() __asm__ volatile("cop2 0x0F8002A;") +#define gte_intpl_b() __asm__ volatile("cop2 0x0980011;") +#define gte_sqr12_b() __asm__ volatile("cop2 0x0A80428;") +#define gte_sqr0_b() __asm__ volatile("cop2 0x0A00428;") +#define gte_ncs_b() __asm__ volatile("cop2 0x0C8041E;") +#define gte_nct_b() __asm__ volatile("cop2 0x0D80420;") +#define gte_ncds_b() __asm__ volatile("cop2 0x0E80413;") +#define gte_ncdt_b() __asm__ volatile("cop2 0x0F80416;") +#define gte_nccs_b() __asm__ volatile("cop2 0x0108041B;") +#define gte_ncct_b() __asm__ volatile("cop2 0x0118043F;") +#define gte_cdp_b() __asm__ volatile("cop2 0x01280414;") +#define gte_cc_b() __asm__ volatile("cop2 0x0138041C;") +#define gte_nclip_b() __asm__ volatile("cop2 0x01400006;") +#define gte_avsz3_b() __asm__ volatile("cop2 0x0158002D;") +#define gte_avsz4_b() __asm__ volatile("cop2 0x0168002E;") +#define gte_op12_b() __asm__ volatile("cop2 0x0178000C;") +#define gte_op0_b() __asm__ volatile("cop2 0x0170000C;") +#define gte_gpf12_b() __asm__ volatile("cop2 0x0198003D;") +#define gte_gpf0_b() __asm__ volatile("cop2 0x0190003D;") +#define gte_gpl12_b() __asm__ volatile("cop2 0x01A8003E;") +#define gte_gpl0_b() __asm__ volatile("cop2 0x01A0003E;") +#define gte_mvmva_core_b(r0) __asm__ volatile("cop2 %0" : : "g"(r0)) +#define gte_mvmva_b(sf, mx, v, cv, lm) \ + gte_mvmva_core_b(0x0400012 | ((sf) << 19) | ((mx) << 17) | ((v) << 15) | ((cv) << 13) | ((lm) << 10)) + +/* + * Type 3 functions + */ + +#define gte_stsxy(r0) __asm__ volatile("swc2 $14, 0( %0 )" : : "r"(r0) : "memory") + +#define gte_stsxy3(r0, r1, r2) \ + __asm__ volatile( \ + "swc2 $12, 0( %0 );" \ + "swc2 $13, 0( %1 );" \ + "swc2 $14, 0( %2 )" \ + : \ + : "r"(r0), "r"(r1), "r"(r2) \ + : "memory") + +#define gte_stsxy3c(r0) \ + __asm__ volatile( \ + "swc2 $12, 0( %0 );" \ + "swc2 $13, 4( %0 );" \ + "swc2 $14, 8( %0 )" \ + : \ + : "r"(r0) \ + : "memory") + +#define gte_stsxy2(r0) __asm__ volatile("swc2 $14, 0( %0 )" : : "r"(r0) : "memory") + +#define gte_stsxy1(r0) __asm__ volatile("swc2 $13, 0( %0 )" : : "r"(r0) : "memory") + +#define gte_stsxy0(r0) __asm__ volatile("swc2 $12, 0( %0 )" : : "r"(r0) : "memory") + +#define gte_stsxy01(r0, r1) \ + __asm__ volatile( \ + "swc2 $12, 0( %0 );" \ + "swc2 $13, 0( %1 )" \ + : \ + : "r"(r0), "r"(r1) \ + : "memory") + +#define gte_stsxy01c(r0) \ + __asm__ volatile( \ + "swc2 $12, 0( %0 );" \ + "swc2 $13, 4( %0 )" \ + : \ + : "r"(r0) \ + : "memory") + +#define gte_stsxy3_f3(r0) \ + __asm__ volatile( \ + "swc2 $12, 8( %0 );" \ + "swc2 $13, 12( %0 );" \ + "swc2 $14, 16( %0 )" \ + : \ + : "r"(r0) \ + : "memory") + +#define gte_stsxy3_g3(r0) \ + __asm__ volatile( \ + "swc2 $12, 8( %0 );" \ + "swc2 $13, 16( %0 );" \ + "swc2 $14, 24( %0 )" \ + : \ + : "r"(r0) \ + : "memory") + +#define gte_stsxy3_ft3(r0) \ + __asm__ volatile( \ + "swc2 $12, 8( %0 );" \ + "swc2 $13, 16( %0 );" \ + "swc2 $14, 24( %0 )" \ + : \ + : "r"(r0) \ + : "memory") + +#define gte_stsxy3_gt3(r0) \ + __asm__ volatile( \ + "swc2 $12, 8( %0 );" \ + "swc2 $13, 20( %0 );" \ + "swc2 $14, 32( %0 )" \ + : \ + : "r"(r0) \ + : "memory") + +#define gte_stsxy3_f4(r0) \ + __asm__ volatile( \ + "swc2 $12, 8( %0 );" \ + "swc2 $13, 12( %0 );" \ + "swc2 $14, 16( %0 )" \ + : \ + : "r"(r0) \ + : "memory") + +#define gte_stsxy3_g4(r0) \ + __asm__ volatile( \ + "swc2 $12, 8( %0 );" \ + "swc2 $13, 16( %0 );" \ + "swc2 $14, 24( %0 )" \ + : \ + : "r"(r0) \ + : "memory") + +#define gte_stsxy3_ft4(r0) \ + __asm__ volatile( \ + "swc2 $12, 8( %0 );" \ + "swc2 $13, 16( %0 );" \ + "swc2 $14, 24( %0 )" \ + : \ + : "r"(r0) \ + : "memory") + +#define gte_stsxy3_gt4(r0) \ + __asm__ volatile( \ + "swc2 $12, 8( %0 );" \ + "swc2 $13, 20( %0 );" \ + "swc2 $14, 32( %0 )" \ + : \ + : "r"(r0) \ + : "memory") + +#define gte_stdp(r0) __asm__ volatile("swc2 $8, 0( %0 )" : : "r"(r0) : "memory") + +#define gte_stflg(r0) \ + __asm__ volatile( \ + "cfc2 $12, $31;" \ + "nop;" \ + "sw $12, 0( %0 )" \ + : \ + : "r"(r0) \ + : "$12", "memory") + +#define gte_stflg_4(r0) \ + __asm__ volatile( \ + "cfc2 $12, $31;" \ + "addi $13, $0, 4;" \ + "sll $13, $13, 16;" \ + "and $12, $12, $13;" \ + "sw $12, 0( %0 )" \ + : \ + : "r"(r0) \ + : "$12", "$13", "memory") + +#define gte_stsz(r0) __asm__ volatile("swc2 $19, 0( %0 )" : : "r"(r0) : "memory") + +#define gte_stsz3(r0, r1, r2) \ + __asm__ volatile( \ + "swc2 $17, 0( %0 );" \ + "swc2 $18, 0( %1 );" \ + "swc2 $19, 0( %2 )" \ + : \ + : "r"(r0), "r"(r1), "r"(r2) \ + : "memory") + +#define gte_stsz4(r0, r1, r2, r3) \ + __asm__ volatile( \ + "swc2 $16, 0( %0 );" \ + "swc2 $17, 0( %1 );" \ + "swc2 $18, 0( %2 );" \ + "swc2 $19, 0( %3 )" \ + : \ + : "r"(r0), "r"(r1), "r"(r2), "r"(r3) \ + : "memory") + +#define gte_stsz3c(r0) \ + __asm__ volatile( \ + "swc2 $17, 0( %0 );" \ + "swc2 $18, 4( %0 );" \ + "swc2 $19, 8( %0 )" \ + : \ + : "r"(r0) \ + : "memory") + +#define gte_stsz4c(r0) \ + __asm__ volatile( \ + "swc2 $16, 0( %0 );" \ + "swc2 $17, 4( %0 );" \ + "swc2 $18, 8( %0 );" \ + "swc2 $19, 12( %0 )" \ + : \ + : "r"(r0) \ + : "memory") + +#define gte_stszotz(r0) \ + __asm__ volatile( \ + "mfc2 $12, $19;" \ + "nop;" \ + "sra $12, $12, 2;" \ + "sw $12, 0( %0 )" \ + : \ + : "r"(r0) \ + : "$12", "memory") + +#define gte_stotz(r0) __asm__ volatile("swc2 $7, 0( %0 )" : : "r"(r0) : "memory") + +#define gte_stopz(r0) __asm__ volatile("swc2 $24, 0( %0 )" : : "r"(r0) : "memory") + +#define gte_stlvl(r0) \ + __asm__ volatile( \ + "swc2 $9, 0( %0 );" \ + "swc2 $10, 4( %0 );" \ + "swc2 $11, 8( %0 )" \ + : \ + : "r"(r0) \ + : "memory") + +#define gte_stlvnl(r0) \ + __asm__ volatile( \ + "swc2 $25, 0( %0 );" \ + "swc2 $26, 4( %0 );" \ + "swc2 $27, 8( %0 )" \ + : \ + : "r"(r0) \ + : "memory") + +#define gte_stlvnl0(r0) __asm__ volatile("swc2 $25, 0( %0 )" : : "r"(r0) : "memory") + +#define gte_stlvnl1(r0) __asm__ volatile("swc2 $26, 0( %0 )" : : "r"(r0) : "memory") + +#define gte_stlvnl2(r0) __asm__ volatile("swc2 $27, 0( %0 )" : : "r"(r0) : "memory") + +#define gte_stsv(r0) \ + __asm__ volatile( \ + "mfc2 $12, $9;" \ + "mfc2 $13, $10;" \ + "mfc2 $14, $11;" \ + "sh $12, 0( %0 );" \ + "sh $13, 2( %0 );" \ + "sh $14, 4( %0 )" \ + : \ + : "r"(r0) \ + : "$12", "$13", "$14", "memory") + +#define gte_stclmv(r0) \ + __asm__ volatile( \ + "mfc2 $12, $9;" \ + "mfc2 $13, $10;" \ + "mfc2 $14, $11;" \ + "sh $12, 0( %0 );" \ + "sh $13, 6( %0 );" \ + "sh $14, 12( %0 )" \ + : \ + : "r"(r0) \ + : "$12", "$13", "$14", "memory") + +#define gte_stbv(r0) \ + __asm__ volatile( \ + "mfc2 $12, $9;" \ + "mfc2 $13, $10;" \ + "sb $12, 0( %0 );" \ + "sb $13, 1( %0 )" \ + : \ + : "r"(r0) \ + : "$12", "$13", "memory") + +#define gte_stcv(r0) \ + __asm__ volatile( \ + "mfc2 $12, $9;" \ + "mfc2 $13, $10;" \ + "mfc2 $14, $11;" \ + "sb $12, 0( %0 );" \ + "sb $13, 1( %0 );" \ + "sb $14, 2( %0 )" \ + : \ + : "r"(r0) \ + : "$12", "$13", "$14", "memory") + +#define gte_strgb(r0) __asm__ volatile("swc2 $22, 0( %0 )" : : "r"(r0) : "memory") + +#define gte_strgb3(r0, r1, r2) \ + __asm__ volatile( \ + "swc2 $20, 0( %0 );" \ + "swc2 $21, 0( %1 );" \ + "swc2 $22, 0( %2 )" \ + : \ + : "r"(r0), "r"(r1), "r"(r2) \ + : "memory") + +#define gte_strgb3_g3(r0) \ + __asm__ volatile( \ + "swc2 $20, 4( %0 );" \ + "swc2 $21, 12( %0 );" \ + "swc2 $22, 20( %0 )" \ + : \ + : "r"(r0) \ + : "memory") + +#define gte_strgb3_gt3(r0) \ + __asm__ volatile( \ + "swc2 $20, 4( %0 );" \ + "swc2 $21, 16( %0 );" \ + "swc2 $22, 28( %0 )" \ + : \ + : "r"(r0) \ + : "memory") + +#define gte_strgb3_g4(r0) \ + __asm__ volatile( \ + "swc2 $20, 4( %0 );" \ + "swc2 $21, 12( %0 );" \ + "swc2 $22, 20( %0 )" \ + : \ + : "r"(r0) \ + : "memory") + +#define gte_strgb3_gt4(r0) \ + __asm__ volatile( \ + "swc2 $20, 4( %0 );" \ + "swc2 $21, 16( %0 );" \ + "swc2 $22, 28( %0 )" \ + : \ + : "r"(r0) \ + : "memory") + +#define gte_ReadGeomOffset(r0, r1) \ + __asm__ volatile( \ + "cfc2 $12, $24;" \ + "cfc2 $13, $25;" \ + "sra $12, $12, 16;" \ + "sra $13, $13, 16;" \ + "sw $12, 0( %0 );" \ + "sw $13, 0( %1 )" \ + : \ + : "r"(r0), "r"(r1) \ + : "$12", "$13", "memory") + +#define gte_ReadGeomScreen(r0) \ + __asm__ volatile( \ + "cfc2 $12, $26;" \ + "nop;" \ + "sw $12, 0( %0 )" \ + : \ + : "r"(r0) \ + : "$12", "memory") + +#define gte_ReadRotMatrix(r0) \ + __asm__ volatile( \ + "cfc2 $12, $0;" \ + "cfc2 $13, $1;" \ + "sw $12, 0( %0 );" \ + "sw $13, 4( %0 );" \ + "cfc2 $12, $2;" \ + "cfc2 $13, $3;" \ + "cfc2 $14, $4;" \ + "sw $12, 8( %0 );" \ + "sw $13, 12( %0 );" \ + "sw $14, 16( %0 );" \ + "cfc2 $12, $5;" \ + "cfc2 $13, $6;" \ + "cfc2 $14, $7;" \ + "sw $12, 20( %0 );" \ + "sw $13, 24( %0 );" \ + "sw $14, 28( %0 )" \ + : \ + : "r"(r0) \ + : "$12", "$13", "$14", "memory") + +#define gte_sttr(r0) \ + __asm__ volatile( \ + "cfc2 $12, $5;" \ + "cfc2 $13, $6;" \ + "cfc2 $14, $7;" \ + "sw $12, 0( %0 );" \ + "sw $13, 4( %0 );" \ + "sw $14, 8( %0 )" \ + : \ + : "r"(r0) \ + : "$12", "$13", "$14", "memory") + +#define gte_ReadLightMatrix(r0) \ + __asm__ volatile( \ + "cfc2 $12, $8;" \ + "cfc2 $13, $9;" \ + "sw $12, 0( %0 );" \ + "sw $13, 4( %0 );" \ + "cfc2 $12, $10;" \ + "cfc2 $13, $11;" \ + "cfc2 $14, $12;" \ + "sw $12, 8( %0 );" \ + "sw $13, 12( %0 );" \ + "sw $14, 16( %0 );" \ + "cfc2 $12, $13;" \ + "cfc2 $13, $14;" \ + "cfc2 $14, $15;" \ + "sw $12, 20( %0 );" \ + "sw $13, 24( %0 );" \ + "sw $14, 28( %0 )" \ + : \ + : "r"(r0) \ + : "$12", "$13", "$14", "memory") + +#define gte_ReadColorMatrix(r0) \ + __asm__ volatile( \ + "cfc2 $12, $16;" \ + "cfc2 $13, $17;" \ + "sw $12, 0( %0 );" \ + "sw $13, 4( %0 );" \ + "cfc2 $12, $18;" \ + "cfc2 $13, $19;" \ + "cfc2 $14, $20;" \ + "sw $12, 8( %0 );" \ + "sw $13, 12( %0 );" \ + "sw $14, 16( %0 );" \ + "cfc2 $12, $21;" \ + "cfc2 $13, $22;" \ + "cfc2 $14, $23;" \ + "sw $12, 20( %0 );" \ + "sw $13, 24( %0 );" \ + "sw $14, 28( %0 )" \ + : \ + : "r"(r0) \ + : "$12", "$13", "$14", "memory") + +#define gte_stlzc(r0) __asm__ volatile("swc2 $31, 0( %0 )" : : "r"(r0) : "memory") + +#define gte_stfc(r0) \ + __asm__ volatile( \ + "cfc2 $12, $21;" \ + "cfc2 $13, $22;" \ + "cfc2 $14, $23;" \ + "sw $12, 0( %0 );" \ + "sw $13, 4( %0 );" \ + "sw $14, 8( %0 )" \ + : \ + : "r"(r0) \ + : "$12", "$13", "$14", "memory") + +#define gte_mvlvtr() \ + __asm__ volatile( \ + "mfc2 $12, $25;" \ + "mfc2 $13, $26;" \ + "mfc2 $14, $27;" \ + "ctc2 $12, $5;" \ + "ctc2 $13, $6;" \ + "ctc2 $14, $7" \ + : \ + : \ + : "$12", "$13", "$14") + +#define gte_nop() __asm__ volatile("nop") + +#define gte_subdvl(r0, r1, r2) \ + __asm__ volatile( \ + "lw $12, 0( %0 );" \ + "lw $13, 0( %1 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + "sra $12, $12, 16;" \ + "sra $13, $13, 16;" \ + "subu $15, $12, $13;" \ + "mfc2 $12, $9;" \ + "mfc2 $13, $10;" \ + "sw $15, 4( %2 );" \ + "subu $12, $12, $13;" \ + "sw $12, 0( %2 )" \ + : \ + : "r"(r0), "r"(r1), "r"(r2) \ + : "$12", "$13", "$14", "$15", "memory") + +#define gte_subdvd(r0, r1, r2) \ + __asm__ volatile( \ + "lw $12, 0( %0 );" \ + "lw $13, 0( %1 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + "sra $12, $12, 16;" \ + "sra $13, $13, 16;" \ + "subu $15, $12, $13;" \ + "mfc2 $12, $9;" \ + "mfc2 $13, $10;" \ + "sh $15, 2( %2 );" \ + "subu $12, $12, $13;" \ + "sh $12, 0( %2 )" \ + : \ + : "r"(r0), "r"(r1), "r"(r2) \ + : "$12", "$13", "$14", "$15", "memory") + +#define gte_adddvl(r0, r1, r2) \ + __asm__ volatile( \ + "lw $12, 0( %0 );" \ + "lw $13, 0( %1 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + "sra $12, $12, 16;" \ + "sra $13, $13, 16;" \ + "addu $15, $12, $13;" \ + "mfc2 $12, $9;" \ + "mfc2 $13, $10;" \ + "sw $15, 4( %2 );" \ + "addu $12, $12, $13;" \ + "sw $12, 0( %2 )" \ + : \ + : "r"(r0), "r"(r1), "r"(r2) \ + : "$12", "$13", "$14", "$15", "memory") + +#define gte_adddvd(r0, r1, r2) \ + __asm__ volatile( \ + "lw $12, 0( %0 );" \ + "lw $13, 0( %1 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + "sra $12, $12, 16;" \ + "sra $13, $13, 16;" \ + "addu $15, $12, $13;" \ + "mfc2 $12, $9;" \ + "mfc2 $13, $10;" \ + "sh $15, 2( %2 );" \ + "addu $12, $12, $13;" \ + "sh $12, 0( %2 )" \ + : \ + : "r"(r0), "r"(r1), "r"(r2) \ + : "$12", "$13", "$14", "$15", "memory") + +#define gte_FlipRotMatrixX() \ + __asm__ volatile( \ + "cfc2 $12, $0;" \ + "cfc2 $13, $1;" \ + "sll $14, $12, 16;" \ + "sra $14, $14, 16;" \ + "subu $14, $0, $14;" \ + "sra $15, $12, 16;" \ + "subu $15, $0, $15;" \ + "sll $15, $15, 16;" \ + "sll $14, $14, 16;" \ + "srl $14, $14, 16;" \ + "or $14, $14, $15;" \ + "ctc2 $14, $0;" \ + "sll $14, $13, 16;" \ + "sra $14, $14, 16;" \ + "subu $14, $0, $14;" \ + "sra $15, $13, 16;" \ + "sll $15, $15, 16;" \ + "sll $14, $14, 16;" \ + "srl $14, $14, 16;" \ + "or $14, $14, $15;" \ + "ctc2 $14, $1" \ + : \ + : \ + : "$12", "$13", "$14", "$15") + +#define gte_FlipTRX() \ + __asm__ volatile( \ + "cfc2 $12, $5;" \ + "nop;" \ + "subu $12, $0, $12;" \ + "ctc2 $12, $5" \ + : \ + : \ + : "$12") diff --git a/include/ctr/test.h b/include/ctr/test.h index 2b20e2ecb..6276c1f6e 100644 --- a/include/ctr/test.h +++ b/include/ctr/test.h @@ -8,12 +8,14 @@ #include #include +extern const char* s_nameTestedFunc; + void TEST_WRAPPER(); void LoadTestPatches(); -u32 PatchFunction_Beg(u32* index); +u32 PatchFunction_Beg(u32* index, const char* funcName); void PatchFunction_End(u32 index); -u32 PrintSVectorDiff(const char* name, const SVec3* expected, const SVec3* ret); -u32 PrintMatrixDiff(const char* name, const Matrix* expected, const Matrix* ret, u32 cmpTrans); +u32 PrintSVectorDiff(const SVec3* expected, const SVec3* ret); +u32 PrintMatrixDiff(const Matrix* expected, const Matrix* ret, u32 cmpTrans); force_inline void FlushCache() { @@ -24,8 +26,8 @@ force_inline void FlushCache() #define BACKUP_ADDR 0x80400000 -//#define TEST_MATH_IMPL -//#define TEST_RNG_IMPL +#define TEST_MATH_IMPL +#define TEST_RNG_IMPL #define TEST_COLL_IMPL #ifdef TEST_MATH_IMPL @@ -68,8 +70,11 @@ force_inline void FlushCache() void TEST_COLL_ProjectPointToEdge(const SVec3* v1, const SVec3* v2, const SVec3* point, const SVec3* ret); void TEST_COLL_CalculateTrianglePlane(const CollDCache* cache, CollVertex* v1, const CollVertex* v2, const CollVertex* v3, const CollVertex* ret); void TEST_COLL_LoadVerticeData(CollDCache* cache); + void TEST_COLL_LoadQuadblockData_LowLOD(CollDCache* cache, const Quadblock* quadblock, const CollDCache* ret); + void TEST_COLL_LoadQuadblockData_HighLOD(CollDCache* cache, const Quadblock* quadblock, const CollDCache* ret); void TEST_COLL_BarycentricTest(TestVertex* t, const CollVertex* v1, const CollVertex* v2, const CollVertex* v3, const SVec3* pos, s32 ret); void TEST_COLL_TestTriangle(CollDCache* cache, const CollVertex* v1, const CollVertex* v2, const CollVertex* v3, const CollDCache* ret); + void TEST_COLL_TestLeaf_Quadblock(const Quadblock* quadblock, CollDCache* cache, const CollDCache* ret); #else #define TEST_COLL_ProjectPointToEdge(out, v1, v2, point) #define TEST_COLL_CalculateTrianglePlane(cache, v1, v2, v3, ret) diff --git a/rewrite/src/exe/coll.c b/rewrite/src/exe/coll.c index 4ee358194..9ddccffd5 100644 --- a/rewrite/src/exe/coll.c +++ b/rewrite/src/exe/coll.c @@ -4,7 +4,7 @@ #include /* Address: 0x8001ede4 */ -void COLL_ProjectPointToEdge(SVec3* out, const SVec3* v1, const SVec3* v2, const SVec3* point) +static void COLL_ProjectPointToEdge(SVec3* out, const SVec3* v1, const SVec3* v2, const SVec3* point) { const SVec3 edge = { v2->x - v1->x, v2->y - v1->y, v2->z - v1->z }; const Matrix m = @@ -37,12 +37,6 @@ void COLL_ProjectPointToEdge(SVec3* out, const SVec3* v1, const SVec3* v2, const out->y = coords.y; out->z = coords.z; TEST_COLL_ProjectPointToEdge(v1, v2, point, out); - /* This is a hand written assembly function that breaks the ABI, - and some callers expect the argument registers to be untouched */ - __asm__ volatile("move $a0, %0" : : "r"((u32)out)); - __asm__ volatile("move $a1, %0" : : "r"((u32)v1)); - __asm__ volatile("move $a2, %0" : : "r"((u32)v2)); - __asm__ volatile("move $a3, %0" : : "r"((u32)point)); } /* Address: 0x8001f2dc */ @@ -91,7 +85,7 @@ static void COLL_LoadVerticeData(CollDCache* cache) } /* Address: 0x8001f67c */ -void COLL_LoadQuadblockData_LowLOD(CollDCache* cache, Quadblock* quadblock) +static void _COLL_LoadQuadblockData_LowLOD(CollDCache* cache, const Quadblock* quadblock) { COLL_LoadVerticeData(cache); cache->lodShift = 2; @@ -103,14 +97,19 @@ void COLL_LoadQuadblockData_LowLOD(CollDCache* cache, Quadblock* quadblock) } cache->normalScale = quadblock->triNormalVecDividend[8]; COLL_CalculateTrianglePlane(cache, &cache->quadblockCollVertices[0], &cache->quadblockCollVertices[1], &cache->quadblockCollVertices[2]); - /* This is a hand written assembly function that breaks the ABI, - and some callers expect the argument registers to be untouched */ - __asm__ volatile("move $a0, %0" : : "r"((u32)cache)); - __asm__ volatile("move $t9, %0" : : "r"((u32)quadblock)); +} + +static void COLL_LoadQuadblockData_LowLOD(CollDCache* cache, const Quadblock* quadblock) +{ +#ifdef TEST_COLL_IMPL + *(CollDCache*)(BACKUP_ADDR) = *cache; +#endif + _COLL_LoadQuadblockData_LowLOD(cache, quadblock); + TEST_COLL_LoadQuadblockData_LowLOD((CollDCache*)(BACKUP_ADDR), quadblock, cache); } /* Address: 0x8001f6f0 */ -void COLL_LoadQuadblockData_HighLOD(CollDCache* cache, Quadblock* quadblock) +static void _COLL_LoadQuadblockData_HighLOD(CollDCache* cache, const Quadblock* quadblock) { COLL_LoadVerticeData(cache); cache->lodShift = 0; @@ -134,10 +133,15 @@ void COLL_LoadQuadblockData_HighLOD(CollDCache* cache, Quadblock* quadblock) COLL_CalculateTrianglePlane(cache, &cache->quadblockCollVertices[6], &cache->quadblockCollVertices[4], &cache->quadblockCollVertices[1]); cache->normalScale = quadblock->triNormalVecDividend[3]; COLL_CalculateTrianglePlane(cache, &cache->quadblockCollVertices[5], &cache->quadblockCollVertices[6], &cache->quadblockCollVertices[2]); - /* This is a hand written assembly function that breaks the ABI, - and some callers expect the argument registers to be untouched */ - __asm__ volatile("move $a0, %0" : : "r"((u32)cache)); - __asm__ volatile("move $t9, %0" : : "r"((u32)quadblock)); +} + +static void COLL_LoadQuadblockData_HighLOD(CollDCache* cache, const Quadblock* quadblock) +{ +#ifdef TEST_COLL_IMPL + *(CollDCache*)(BACKUP_ADDR) = *cache; +#endif + _COLL_LoadQuadblockData_HighLOD(cache, quadblock); + TEST_COLL_LoadQuadblockData_HighLOD((CollDCache*)(BACKUP_ADDR), quadblock, cache); } /* Address: 0x8001f928 */ @@ -376,15 +380,90 @@ static void _COLL_TestTriangle(CollDCache* cache, const CollVertex* v1, const Co cache->numTrianglesCollided++; } -void COLL_TestTriangle(CollDCache* cache, const CollVertex* v1, const CollVertex* v2, const CollVertex* v3) +static void COLL_TestTriangle(CollDCache* cache, const CollVertex* v1, const CollVertex* v2, const CollVertex* v3) { #ifdef TEST_COLL_IMPL - *(CollDCache*)(BACKUP_ADDR) = *cache; + const u32 backupAddr = BACKUP_ADDR + sizeof(CollDCache); + *(CollDCache*)(backupAddr) = *cache; #endif _COLL_TestTriangle(cache, v1, v2, v3); - TEST_COLL_TestTriangle((CollDCache*)(BACKUP_ADDR), v1, v2, v3, cache); + TEST_COLL_TestTriangle((CollDCache*)(backupAddr), v1, v2, v3, cache); +} + +/* Address: 0x80020064 */ +static void _COLL_TestLeaf_Quadblock(const Quadblock* quadblock, CollDCache* cache) +{ + cache->currQuadblock = quadblock; + const u16 quadFlags = quadblock->flags; + if (((cache->collInput.quadblock.quadFlagsCheckColl & quadFlags) == 0) || (cache->collInput.quadblock.quadFlagsIgnoreColl & quadFlags) || + quadblock->bbox.min.x > cache->bbox.max.x || quadblock->bbox.max.x < cache->bbox.min.x || + quadblock->bbox.min.y > cache->bbox.max.y || quadblock->bbox.max.y < cache->bbox.min.y || + quadblock->bbox.min.z > cache->bbox.max.z || quadblock->bbox.max.z < cache->bbox.min.z ) { return; } + + const u16 collFlags = cache->collInput.quadblock.collFlags; + if (collFlags & COLLFLAGS_HIGH_LOD_QUAD) + { + if ((collFlags & COLLFLAGS_CACHED_HIGH_LOD_VERTICES) == 0) { COLL_LoadQuadblockData_HighLOD(cache, quadblock); } + cache->currTriangleIndex = 2; + COLL_TestTriangle(cache, &cache->quadblockCollVertices[0], &cache->quadblockCollVertices[4], &cache->quadblockCollVertices[5]); + cache->currTriangleIndex = 3; + COLL_TestTriangle(cache, &cache->quadblockCollVertices[4], &cache->quadblockCollVertices[6], &cache->quadblockCollVertices[5]); + cache->currTriangleIndex = 4; + COLL_TestTriangle(cache, &cache->quadblockCollVertices[6], &cache->quadblockCollVertices[4], &cache->quadblockCollVertices[1]); + /* This function isn't doing critical calculations to justify hand writting in assembly, they were really just asking for bugs... */ +#ifdef FIX_CTR_BUGS + cache->currTriangleIndex = 5; +#endif + COLL_TestTriangle(cache, &cache->quadblockCollVertices[5], &cache->quadblockCollVertices[6], &cache->quadblockCollVertices[2]); +#ifndef FIX_CTR_BUG + cache->currTriangleIndex = 6; +#endif + if (cache->quadblockThirdIndex != cache->quadblockFourthIndex) + { +#ifdef FIX_CTR_BUGS + cache->currTriangleIndex = 6; +#endif + COLL_TestTriangle(cache, &cache->quadblockCollVertices[8], &cache->quadblockCollVertices[6], &cache->quadblockCollVertices[7]); + cache->currTriangleIndex = 7; + COLL_TestTriangle(cache, &cache->quadblockCollVertices[7], &cache->quadblockCollVertices[3], &cache->quadblockCollVertices[8]); + cache->currTriangleIndex = 8; + COLL_TestTriangle(cache, &cache->quadblockCollVertices[1], &cache->quadblockCollVertices[7], &cache->quadblockCollVertices[6]); + cache->currTriangleIndex = 9; + COLL_TestTriangle(cache, &cache->quadblockCollVertices[2], &cache->quadblockCollVertices[6], &cache->quadblockCollVertices[8]); + } + return; + } + COLL_LoadQuadblockData_LowLOD(cache, quadblock); + cache->currTriangleIndex = 0; + COLL_TestTriangle(cache, &cache->quadblockCollVertices[0], &cache->quadblockCollVertices[1], &cache->quadblockCollVertices[2]); +#ifndef FIX_CTR_BUG + cache->currTriangleIndex = 1; +#endif + if (cache->quadblockThirdIndex != cache->quadblockFourthIndex) + { +#ifdef FIX_CTR_BUGS + cache->currTriangleIndex = 1; +#endif + COLL_TestTriangle(cache, &cache->quadblockCollVertices[1], &cache->quadblockCollVertices[3], &cache->quadblockCollVertices[2]); + } +} + +void COLL_TestLeaf_Quadblock(const Quadblock* quadblock, CollDCache* cache) +{ +#ifdef TEST_COLL_IMPL + *(CollDCache*)(BACKUP_ADDR) = *cache; +#endif + _COLL_TestLeaf_Quadblock(quadblock, cache); +#ifdef TEST_COLL_IMPL + const u32 retAddr = BACKUP_ADDR + sizeof(CollDCache); + *(CollDCache*)(retAddr) = *cache; + *cache = *(CollDCache*)(BACKUP_ADDR); +#endif + TEST_COLL_TestLeaf_Quadblock(quadblock, cache, (CollDCache*)(retAddr)); +#ifdef TEST_COLL_IMPL + *cache = *(CollDCache*)(retAddr); +#endif /* This is a hand written assembly function that breaks the ABI, - and some callers expect the argument registers to be untouched */ - __asm__ volatile("move $a0, %0" : : "r"((u32)cache)); - __asm__ volatile("move $t9, %0" : : "r"((u32)cache->currQuadblock)); + and some callers expect the argument registers to be untouched */ + __asm__ volatile("move $t9, %0" : : "r"((u32)quadblock)); } \ No newline at end of file diff --git a/rewrite/src/tests/test.c b/rewrite/src/tests/test.c index 90b82d9e8..4b2171176 100644 --- a/rewrite/src/tests/test.c +++ b/rewrite/src/tests/test.c @@ -29,12 +29,11 @@ FunctionPatch s_functions[] = TEST_FUNC(RNG_RandInt), TEST_FUNC(RNG_PseudoRand), TEST_FUNC(RNG_Random), - TEST_FUNC(COLL_ProjectPointToEdge), - TEST_FUNC(COLL_LoadQuadblockData_LowLOD), - TEST_FUNC(COLL_LoadQuadblockData_HighLOD), - TEST_FUNC(COLL_TestTriangle), + TEST_FUNC(COLL_TestLeaf_Quadblock), }; +const char* s_nameTestedFunc = nullptr; + void LoadTestPatches() { const u32 funcCount = ARR_LEN(s_functions); @@ -50,12 +49,13 @@ void LoadTestPatches() FlushCache(); } -u32 PatchFunction_Beg(u32* address) +u32 PatchFunction_Beg(u32* address, const char* funcName) { u32 addr = (u32)address; + s_nameTestedFunc = funcName; __asm__ volatile("move $k1, %0" : : "r"(addr)); - u32 index = 0; + u32 index = UINT32_MAX; const u32 funcCount = ARR_LEN(s_functions); for (u32 i = 0; i < funcCount; i++) { @@ -73,12 +73,13 @@ u32 PatchFunction_Beg(u32* address) void PatchFunction_End(u32 index) { + if (index == UINT32_MAX) { return; } *(s_functions[index].address) = s_functions[index].firstNewInst; *(s_functions[index].address + 1) = s_functions[index].secondNewInst; FlushCache(); } -u32 PrintSVectorDiff(const char* name, const SVec3* expected, const SVec3* ret) +u32 PrintSVectorDiff(const SVec3* expected, const SVec3* ret) { u32 failed = 0; for (u32 i = 0; i < 3; i++) @@ -86,13 +87,13 @@ u32 PrintSVectorDiff(const char* name, const SVec3* expected, const SVec3* ret) if (expected->v[i] != ret->v[i]) { failed = 1; - ND_printf("[%s] Test Failed:\nv[%d] = %d, got %d\n", name, i, expected->v[i], ret->v[i]); + ND_printf("[%s] Test Failed:\nv[%d] = %d, got %d\n", s_nameTestedFunc, i, expected->v[i], ret->v[i]); } } return failed; } -u32 PrintMatrixDiff(const char* name, const Matrix* expected, const Matrix* ret, u32 cmpTrans) +u32 PrintMatrixDiff(const Matrix* expected, const Matrix* ret, u32 cmpTrans) { u32 failed = 0; for (u32 i = 0; i < 3; i++) @@ -102,13 +103,13 @@ u32 PrintMatrixDiff(const char* name, const Matrix* expected, const Matrix* ret, if (expected->m[i][j] != ret->m[i][j]) { failed = 1; - ND_printf("[%s] Test Failed:\nm[%d][%d] = %d, got %d\n", name, i, j, expected->m[i][j], ret->m[i][j]); + ND_printf("[%s] Test Failed:\nm[%d][%d] = %d, got %d\n", s_nameTestedFunc, i, j, expected->m[i][j], ret->m[i][j]); } } if ((cmpTrans) && (expected->t.v[i] != ret->t.v[i])) { failed = 1; - ND_printf("[%s] Test Failed:\nt[%d] = %d, got %d\n", name, i, expected->t.v[i], ret->t.v[i]); + ND_printf("[%s] Test Failed:\nt[%d] = %d, got %d\n", s_nameTestedFunc, i, expected->t.v[i], ret->t.v[i]); } } return failed; diff --git a/rewrite/src/tests/test_coll.c b/rewrite/src/tests/test_coll.c index 41f0c8b34..5172951b8 100644 --- a/rewrite/src/tests/test_coll.c +++ b/rewrite/src/tests/test_coll.c @@ -2,7 +2,7 @@ #ifdef TEST_COLL_IMPL -static u32 PrintDCacheDiff(const char* name, const CollDCache* expected, const CollDCache* ret) +static u32 PrintDCacheDiff(const CollDCache* expected, const CollDCache* ret) { u32 failed = false; const u8* pExpected = (const u8*) expected; @@ -10,31 +10,31 @@ static u32 PrintDCacheDiff(const char* name, const CollDCache* expected, const C const u32 len = sizeof(CollDCache); for (u32 i = 0; i < len; i++) { - if (pExpected[i] != pRet[i]) { ND_printf("[%s] Test Failed:\nOffset %x: %d, got: %d\n", name, i, (u32) pExpected[i], (u32) pRet[i]); failed = true; } + if (pExpected[i] != pRet[i]) { ND_printf("[%s] Test Failed:\nOffset %x: %d, got: %d\n", s_nameTestedFunc, i, (u32) pExpected[i], (u32) pRet[i]); failed = true; } } return failed; } void TEST_COLL_ProjectPointToEdge(const SVec3* v1, const SVec3* v2, const SVec3* point, const SVec3* ret) { - const u32 index = PatchFunction_Beg((u32*)(&ND_COLL_ProjectPointToEdge)); + const u32 index = PatchFunction_Beg((u32*)(&ND_COLL_ProjectPointToEdge), "COLL_ProjectPointToEdge"); SVec3 expected; typedef void (*Func)(SVec3* out, const SVec3* v1, const SVec3* v2, const SVec3* point); Func func = (Func) TEST_WRAPPER; func(&expected, v1, v2, point); - PrintSVectorDiff("COLL_ProjectPointToEdge", &expected, ret); + PrintSVectorDiff(&expected, ret); PatchFunction_End(index); } void TEST_COLL_CalculateTrianglePlane(const CollDCache* cache, CollVertex* v1, const CollVertex* v2, const CollVertex* v3, const CollVertex* ret) { - const u32 index = PatchFunction_Beg((u32*)(&ND_COLL_CalculateTrianglePlane)); + const u32 index = PatchFunction_Beg((u32*)(&ND_COLL_CalculateTrianglePlane), "COLL_CalculateTrianglePlane"); typedef void (*Func)(const CollDCache* cache, CollVertex* v1, const CollVertex* v2, const CollVertex* v3); Func func = (Func) TEST_WRAPPER; func(cache, v1, v2, v3); - PrintSVectorDiff("COLL_CalculateTrianglePlane", &v1->triNormal, &ret->triNormal); - if (v1->planeDist != ret->planeDist) { ND_printf("[COLL_CalculateTrianglePlane] Test Failed:\nDist: %d\nResult: %d\n", v1->planeDist, ret->planeDist); } - if (v1->normalDominantAxis != ret->normalDominantAxis) { ND_printf("[COLL_CalculateTrianglePlane] Test Failed:\nAxis: %d\nResult: %d\n", v1->normalDominantAxis, ret->normalDominantAxis); } + PrintSVectorDiff(&v1->triNormal, &ret->triNormal); + if (v1->planeDist != ret->planeDist) { ND_printf("[%s] Test Failed:\nDist: %d\nResult: %d\n", s_nameTestedFunc, v1->planeDist, ret->planeDist); } + if (v1->normalDominantAxis != ret->normalDominantAxis) { ND_printf("[%s] Test Failed:\nAxis: %d\nResult: %d\n", s_nameTestedFunc, v1->normalDominantAxis, ret->normalDominantAxis); } PatchFunction_End(index); } @@ -47,7 +47,7 @@ void TEST_COLL_LoadVerticeData(CollDCache* cache) } const u16 thirdIndex = cache->quadblockThirdIndex; const u16 fourthIndex = cache->quadblockFourthIndex; - const u32 index = PatchFunction_Beg((u32*)(&ND_COLL_LoadVerticeData)); + const u32 index = PatchFunction_Beg((u32*)(&ND_COLL_LoadVerticeData), "COLL_LoadVerticeData"); const u32 quadblock = (u32) cache->currQuadblock; const u32 levVertices = (u32) cache->meshInfo->vertices; @@ -59,35 +59,65 @@ void TEST_COLL_LoadVerticeData(CollDCache* cache) func(cache); for (u32 i = 0; i < NUM_VERTICES_QUADBLOCK; i++) { - PrintSVectorDiff("COLL_LoadVerticeData", &cache->quadblockCollVertices[i].pos, &vertices[i].pos); + PrintSVectorDiff(&cache->quadblockCollVertices[i].pos, &vertices[i].pos); if (cache->quadblockCollVertices[i].levVertex != vertices[i].levVertex) { - ND_printf("[COLL_LoadVerticeData] Test Failed: levVertex at index %d\n", i); + ND_printf("[%s] Test Failed: levVertex at index %d\n", s_nameTestedFunc, i); } } - if (cache->quadblockThirdIndex != thirdIndex) { ND_printf("[COLL_LoadVerticeData] Test Failed:\nthirdIndex: %d\nResult:%d\n", cache->quadblockThirdIndex, thirdIndex); } - if (cache->quadblockFourthIndex != fourthIndex) { ND_printf("[COLL_LoadVerticeData] Test Failed:\nfourthIndex: %d\nResult:%d\n", cache->quadblockFourthIndex, fourthIndex);} + if (cache->quadblockThirdIndex != thirdIndex) { ND_printf("[%s] Test Failed:\nthirdIndex: %d\nResult:%d\n", s_nameTestedFunc, cache->quadblockThirdIndex, thirdIndex); } + if (cache->quadblockFourthIndex != fourthIndex) { ND_printf("[%s] Test Failed:\nfourthIndex: %d\nResult:%d\n", s_nameTestedFunc, cache->quadblockFourthIndex, fourthIndex);} + PatchFunction_End(index); +} + +void TEST_COLL_LoadQuadblockData_LowLOD(CollDCache* cache, const Quadblock* quadblock, const CollDCache* ret) +{ + const u32 index = PatchFunction_Beg((u32*)(&ND_COLL_LoadQuadblockData_LowLOD), "COLL_LoadQuadblockData_LowLOD"); + typedef void (*Func)(CollDCache* cache, const Quadblock* quadblock); + Func func = (Func) TEST_WRAPPER; + func(cache, quadblock); + PrintDCacheDiff(cache, ret); + PatchFunction_End(index); +} + +void TEST_COLL_LoadQuadblockData_HighLOD(CollDCache* cache, const Quadblock* quadblock, const CollDCache* ret) +{ + const u32 index = PatchFunction_Beg((u32*)(&ND_COLL_LoadQuadblockData_HighLOD), "COLL_LoadQuadblockData_HighLOD"); + typedef void (*Func)(CollDCache* cache, const Quadblock* quadblock); + Func func = (Func) TEST_WRAPPER; + func(cache, quadblock); + PrintDCacheDiff(cache, ret); PatchFunction_End(index); } void TEST_COLL_BarycentricTest(TestVertex* t, const CollVertex* v1, const CollVertex* v2, const CollVertex* v3, const SVec3* pos, s32 ret) { - const u32 index = PatchFunction_Beg((u32*)(&ND_COLL_BarycentricTest)); + const u32 index = PatchFunction_Beg((u32*)(&ND_COLL_BarycentricTest), "COLL_BarycentricTest"); typedef s32 (*Func)(TestVertex* t, const CollVertex* v1, const CollVertex* v2, const CollVertex* v3); Func func = (Func) TEST_WRAPPER; const s32 expected = func(t, v1, v2, v3); - PrintSVectorDiff("COLL_BarycentricTest", &t->pos, pos); - if (expected != ret) { ND_printf("[COLL_BarycentricTest] Test Failed:\nExpected: %d\nResult: %d\n", expected, ret); } + PrintSVectorDiff(&t->pos, pos); + if (expected != ret) { ND_printf("[%s] Test Failed:\nExpected: %d\nResult: %d\n", s_nameTestedFunc, expected, ret); } PatchFunction_End(index); } void TEST_COLL_TestTriangle(CollDCache* cache, const CollVertex* v1, const CollVertex* v2, const CollVertex* v3, const CollDCache* ret) { - const u32 index = PatchFunction_Beg((u32*)(&ND_COLL_TestTriangle)); + const u32 index = PatchFunction_Beg((u32*)(&ND_COLL_TestTriangle), "COLL_TestTriangle"); typedef void (*Func)(CollDCache* cache, const CollVertex* v1, const CollVertex* v2, const CollVertex* v3); Func func = (Func) TEST_WRAPPER; func(cache, v1, v2, v3); - PrintDCacheDiff("COLL_TestTriangle", cache, ret); + PrintDCacheDiff(cache, ret); + PatchFunction_End(index); +} + +void TEST_COLL_TestLeaf_Quadblock(const Quadblock* quadblock, CollDCache* cache, const CollDCache* ret) +{ + const u32 index = PatchFunction_Beg((u32*)(&ND_COLL_TestLeaf_Quadblock), "COLL_TestLeaf_Quadblock"); + typedef void (*Func)(const Quadblock* quadblock, CollDCache* cache); + Func func = (Func) TEST_WRAPPER; + func(quadblock, cache); + PrintDCacheDiff(cache, ret); PatchFunction_End(index); } diff --git a/rewrite/src/tests/test_math.c b/rewrite/src/tests/test_math.c index 1ded33f5a..312c339a7 100644 --- a/rewrite/src/tests/test_math.c +++ b/rewrite/src/tests/test_math.c @@ -4,68 +4,68 @@ void TEST_MATH_Sin(u32 angle, s32 ret) { - const u32 index = PatchFunction_Beg((u32*)(&ND_MATH_Sin)); + const u32 index = PatchFunction_Beg((u32*)(&ND_MATH_Sin), "MATH_Sin"); const s32 expected = ND_MATH_Sin(angle); - if (expected != ret) { ND_printf("[MATH_Sin] Test Failed:\nInput: %d\nExpected: %d\nResult: %d\n", angle, expected, ret); } + if (expected != ret) { ND_printf("[%s] Test Failed:\nInput: %d\nExpected: %d\nResult: %d\n", s_nameTestedFunc, angle, expected, ret); } PatchFunction_End(index); } void TEST_MATH_Cos(u32 angle, s32 ret) { - const u32 index = PatchFunction_Beg((u32*)(&ND_MATH_Cos)); + const u32 index = PatchFunction_Beg((u32*)(&ND_MATH_Cos), "MATH_Cos"); const s32 expected = ND_MATH_Cos(angle); - if (expected != ret) { ND_printf("[MATH_Cos] Test Failed:\nInput: %d\nExpected: %d\nResult: %d\n", angle, expected, ret); } + if (expected != ret) { ND_printf("[%s] Test Failed:\nInput: %d\nExpected: %d\nResult: %d\n", s_nameTestedFunc, angle, expected, ret); } PatchFunction_End(index); } void TEST_MATH_Sqrt(u32 n, u32 shift, u32 ret) { - const u32 index = PatchFunction_Beg((u32*)(&ND_MATH_Sqrt)); + const u32 index = PatchFunction_Beg((u32*)(&ND_MATH_Sqrt), "MATH_Sqrt"); const u32 expected = ND_MATH_Sqrt(n, shift); - if (expected != ret) { ND_printf("[MATH_Sqrt] Test Failed:\nInput: %d %d\nExpected: %d\nResult: %d\n", n, shift, expected, ret); } + if (expected != ret) { ND_printf("[%s] Test Failed:\nInput: %d %d\nExpected: %d\nResult: %d\n", s_nameTestedFunc, n, shift, expected, ret); } PatchFunction_End(index); } void TEST_MATH_GetInverseMatrixTransformation(const Matrix* matrix, const Matrix* ret) { Matrix out; - const u32 index = PatchFunction_Beg((u32*)(&ND_MATH_GetInverseMatrixTransformation)); + const u32 index = PatchFunction_Beg((u32*)(&ND_MATH_GetInverseMatrixTransformation), "MATH_GetInverseMatrixTransformation"); ND_MATH_GetInverseMatrixTransformation(&out, matrix); - PrintMatrixDiff("MATH_GetInverseMatrixTransformation", &out, ret, true); + PrintMatrixDiff(&out, ret, true); PatchFunction_End(index); } void TEST_MATH_VectorLength(const SVec3* vector, s32 ret) { - const u32 index = PatchFunction_Beg((u32*)(&ND_MATH_VectorLength)); + const u32 index = PatchFunction_Beg((u32*)(&ND_MATH_VectorLength), "MATH_VectorLength"); const s32 expected = ND_MATH_VectorLength(vector); - if (expected != ret) { ND_printf("[MATH_VectorLength] Test Failed:\nInput: %d %d %d\nExpected: %d\nResult: %d\n", vector->x, vector->y, vector->z, expected, ret); } + if (expected != ret) { ND_printf("[%s] Test Failed:\nInput: %d %d %d\nExpected: %d\nResult: %d\n", s_nameTestedFunc, vector->x, vector->y, vector->z, expected, ret); } PatchFunction_End(index); } void TEST_MATH_VectorNormalize(SVec3* vector, const SVec3* ret) { - const u32 index = PatchFunction_Beg((u32*)(&ND_MATH_VectorNormalize)); + const u32 index = PatchFunction_Beg((u32*)(&ND_MATH_VectorNormalize), "MATH_VectorNormalize"); ND_MATH_VectorNormalize(vector); - PrintSVectorDiff("MATH_VectorNormalize", vector, ret); + PrintSVectorDiff(vector, ret); PatchFunction_End(index); } void TEST_MATH_CombineMatrixTransformation(const Matrix* m, const Matrix* n, const Matrix* ret) { Matrix expected; - const u32 index = PatchFunction_Beg((u32*)(&ND_MATH_CombineMatrixTransformation)); + const u32 index = PatchFunction_Beg((u32*)(&ND_MATH_CombineMatrixTransformation), "MATH_CombineMatrixTransformation"); ND_MATH_CombineMatrixTransformation(&expected, m, n); - PrintMatrixDiff("MATH_CombineMatrixTransformation", &expected, ret, true); + PrintMatrixDiff(&expected, ret, true); PatchFunction_End(index); } void TEST_MATH_MatrixMultiplication(const Matrix* m, const Matrix* n, const Matrix* ret) { Matrix expected; - const u32 index = PatchFunction_Beg((u32*)(&ND_MATH_MatrixMultiplication)); + const u32 index = PatchFunction_Beg((u32*)(&ND_MATH_MatrixMultiplication), "MATH_MatrixMultiplication"); ND_MATH_MatrixMultiplication(&expected, m, n); - PrintMatrixDiff("MATH_MatrixMultiplication", &expected, ret, false); + PrintMatrixDiff(&expected, ret, false); PatchFunction_End(index); } diff --git a/rewrite/src/tests/test_rng.c b/rewrite/src/tests/test_rng.c index 8ed24b458..6f7d5434a 100644 --- a/rewrite/src/tests/test_rng.c +++ b/rewrite/src/tests/test_rng.c @@ -10,11 +10,11 @@ void BACKUP_RNG_Rand() void TEST_RNG_Rand() { - const u32 index = PatchFunction_Beg((u32*)(&ND_RNG_Rand)); + const u32 index = PatchFunction_Beg((u32*)(&ND_RNG_Rand), "RNG_Rand"); const u32 ret = e_seed; e_seed = *(u32*) BACKUP_ADDR; ND_RNG_Rand(); - if (e_seed != ret) { ND_printf("[RNG_Rand] Test Failed:\nExpected: %d\nResult: %d\n", e_seed, ret); } + if (e_seed != ret) { ND_printf("[%s] Test Failed:\nExpected: %d\nResult: %d\n", s_nameTestedFunc, e_seed, ret); } PatchFunction_End(index); } @@ -26,28 +26,28 @@ void BACKUP_RNG_RandInt() void TEST_RNG_RandInt(u32 n, s32 ret) { - const u32 index = PatchFunction_Beg((u32*)(&ND_RNG_RandInt)); + const u32 index = PatchFunction_Beg((u32*)(&ND_RNG_RandInt), "RNG_RandInt"); e_gameTracker->seed = *(RNGSeed*) BACKUP_ADDR; const s32 expected = ND_RNG_RandInt(n); - if (expected != ret) { ND_printf("[RNG_RandInt] Test Failed:\nExpected: %d\nResult: %d\n", expected, ret); } + if (expected != ret) { ND_printf("[%s] Test Failed:\nExpected: %d\nResult: %d\n", s_nameTestedFunc, expected, ret); } PatchFunction_End(index); } void TEST_RNG_PseudoRand(u16 n, u16 ret) { - const u32 index = PatchFunction_Beg((u32*)(&ND_RNG_PseudoRand)); + const u32 index = PatchFunction_Beg((u32*)(&ND_RNG_PseudoRand), "RNG_PseudoRand"); const u16 expected = ND_RNG_PseudoRand(n); - if (expected != ret) { ND_printf("[RNG_PseudoRand] Test Failed:\nExpected: %d\nResult: %d\n", expected, ret); } + if (expected != ret) { ND_printf("[%s] Test Failed:\nExpected: %d\nResult: %d\n", s_nameTestedFunc, expected, ret); } PatchFunction_End(index); } void TEST_RNG_Random(RNGSeed* seed, const RNGSeed* ret) { - const u32 index = PatchFunction_Beg((u32*)(&ND_RNG_Random)); + const u32 index = PatchFunction_Beg((u32*)(&ND_RNG_Random), "RNG_Random"); const u32 expected = ND_RNG_Random(seed); - if (seed->a != ret->a) { ND_printf("[RNG_Random] Test Failed:\nseed->a: %d\nret->a: %d\n", seed->a, ret->a); } - if (seed->b != ret->b) { ND_printf("[RNG_Random] Test Failed:\nseed->b: %d\nret->b: %d\n", seed->b, ret->b); } - if (expected != ret->b) { ND_printf("[RNG_Random] Test Failed:\nExpected: %d\nret: %d\n", expected, ret->b); } + if (seed->a != ret->a) { ND_printf("[%s] Test Failed:\nseed->a: %d\nret->a: %d\n", s_nameTestedFunc, seed->a, ret->a); } + if (seed->b != ret->b) { ND_printf("[%s] Test Failed:\nseed->b: %d\nret->b: %d\n", s_nameTestedFunc, seed->b, ret->b); } + if (expected != ret->b) { ND_printf("[%s] Test Failed:\nExpected: %d\nret: %d\n", s_nameTestedFunc, expected, ret->b); } PatchFunction_End(index); } diff --git a/symbols/gcc-syms-rewrite.txt b/symbols/gcc-syms-rewrite.txt index 759d14ff9..78f67e73e 100644 --- a/symbols/gcc-syms-rewrite.txt +++ b/symbols/gcc-syms-rewrite.txt @@ -71,7 +71,7 @@ ND_COLL_LoadQuadblockData_HighLOD = 0x8001f6f0; ND_COLL_LoadVerticeData = 0x8001f7f0; ND_COLL_BarycentricTest = 0x8001f928; ND_COLL_TestTriangle = 0x8001fc40; -ND_COLL_MOVED_QUADBLK_TestTriangles = 0x80020064; +ND_COLL_TestLeaf_Quadblock = 0x80020064; ND_COLL_MOVED_BSPLEAF_TestQuadblocks = 0x800202a8; ND_COLL_MOVED_PlayerSearch = 0x80020410; ND_COLL_MOVED_ScrubImpact = 0x80020c58; From 6c68cb33dd3769c6eeedc961a14be417f19be413 Mon Sep 17 00:00:00 2001 From: mateusfavarin Date: Sat, 27 Sep 2025 20:13:38 -0700 Subject: [PATCH 2/3] address code review --- include/ctr/macros.h | 14 ++++++++++++++ rewrite/src/exe/coll.c | 4 ++-- rewrite/src/tests/test.c | 4 ++-- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/include/ctr/macros.h b/include/ctr/macros.h index 6f0a351e5..86a48c648 100644 --- a/include/ctr/macros.h +++ b/include/ctr/macros.h @@ -17,6 +17,20 @@ typedef int16_t s16; typedef uint8_t u8; typedef int8_t s8; +#define U32_MAX UINT32_MAX +#define S32_MAX INT32_MAX +#define U16_MAX UINT16_MAX +#define S16_MAX INT16_MAX +#define U8_MAX UINT8_MAX +#define S8_MAX INT8_MAX + +#define U32_MIN UINT32_MIN +#define S32_MIN INT32_MIN +#define U16_MIN UINT16_MIN +#define S16_MIN INT16_MIN +#define U8_MIN UINT8_MIN +#define S8_MIN INT8_MIN + #define AugReview 805 #define SepReview 903 #define UsaRetail 926 diff --git a/rewrite/src/exe/coll.c b/rewrite/src/exe/coll.c index 9ddccffd5..1b374af07 100644 --- a/rewrite/src/exe/coll.c +++ b/rewrite/src/exe/coll.c @@ -416,7 +416,7 @@ static void _COLL_TestLeaf_Quadblock(const Quadblock* quadblock, CollDCache* cac #endif COLL_TestTriangle(cache, &cache->quadblockCollVertices[5], &cache->quadblockCollVertices[6], &cache->quadblockCollVertices[2]); #ifndef FIX_CTR_BUG - cache->currTriangleIndex = 6; + cache->currTriangleIndex = 6; // not a bug, but unnecessary assignment... #endif if (cache->quadblockThirdIndex != cache->quadblockFourthIndex) { @@ -437,7 +437,7 @@ static void _COLL_TestLeaf_Quadblock(const Quadblock* quadblock, CollDCache* cac cache->currTriangleIndex = 0; COLL_TestTriangle(cache, &cache->quadblockCollVertices[0], &cache->quadblockCollVertices[1], &cache->quadblockCollVertices[2]); #ifndef FIX_CTR_BUG - cache->currTriangleIndex = 1; + cache->currTriangleIndex = 1; // not a bug, but unnecessary assignment... #endif if (cache->quadblockThirdIndex != cache->quadblockFourthIndex) { diff --git a/rewrite/src/tests/test.c b/rewrite/src/tests/test.c index 4b2171176..071af047f 100644 --- a/rewrite/src/tests/test.c +++ b/rewrite/src/tests/test.c @@ -55,7 +55,7 @@ u32 PatchFunction_Beg(u32* address, const char* funcName) s_nameTestedFunc = funcName; __asm__ volatile("move $k1, %0" : : "r"(addr)); - u32 index = UINT32_MAX; + u32 index = U32_MAX; const u32 funcCount = ARR_LEN(s_functions); for (u32 i = 0; i < funcCount; i++) { @@ -73,7 +73,7 @@ u32 PatchFunction_Beg(u32* address, const char* funcName) void PatchFunction_End(u32 index) { - if (index == UINT32_MAX) { return; } + if (index == U32_MAX) { return; } *(s_functions[index].address) = s_functions[index].firstNewInst; *(s_functions[index].address + 1) = s_functions[index].secondNewInst; FlushCache(); From 8a925a5b6c681d3ad40e56904e79d6206e3325bf Mon Sep 17 00:00:00 2001 From: mateusfavarin Date: Sat, 27 Sep 2025 20:16:13 -0700 Subject: [PATCH 3/3] add missing defs --- include/ctr/test.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/ctr/test.h b/include/ctr/test.h index 6276c1f6e..2cf6a28c0 100644 --- a/include/ctr/test.h +++ b/include/ctr/test.h @@ -79,6 +79,9 @@ force_inline void FlushCache() #define TEST_COLL_ProjectPointToEdge(out, v1, v2, point) #define TEST_COLL_CalculateTrianglePlane(cache, v1, v2, v3, ret) #define TEST_COLL_LoadVerticeData(cache) + #define TEST_COLL_LoadQuadblockData_LowLOD(cache, quadblock, ret) + #define TEST_COLL_LoadQuadblockData_HighLOD(cache, quadblock, ret) #define TEST_COLL_BarycentricTest(t, v1, v2, v3, pos, ret) #define TEST_COLL_TestTriangle(cache, v1, v2, v3, ret) + #define TEST_COLL_TestLeaf_Quadblock(quadblock, cache, ret) #endif \ No newline at end of file