#pragma once #include "gte_instruction.hpp" // GTE Overview // GTE Data Register Summary (cop2r0-31) // // `mtc2` (Move To Coprocessor 2): Sets a Data Register (0–31). // `mfc2` (Move From Coprocessor 2): Gets a Data Register (0–31). // // | cop2r0-1 | 3xS16 | VXY0,VZ0 | Vector 0 (X,Y,Z) | // | cop2r2-3 | 3xS16 | VXY1,VZ1 | Vector 1 (X,Y,Z) | // | cop2r4-5 | 3xS16 | VXY2,VZ2 | Vector 2 (X,Y,Z) | // | cop2r6 | 4xU8 | RGBC | Color/code value | // | cop2r7 | 1xU16 | OTZ | Average Z value (for Ordering Table) | // | cop2r8 | 1xS16 | IR0 | 16bit Accumulator (Interpolate) | // | cop2r9-11 | 3xS16 | IR1,IR2,IR3 | 16bit Accumulator (Vector) | // | cop2r12-15 | 6xS16 | SXY0,SXY1,SXY2,SXYP | Screen XY-coordinate FIFO (3 stages) | // | cop2r16-19 | 4xU16 | SZ0,SZ1,SZ2,SZ3 | Screen Z-coordinate FIFO (4 stages) | // | cop2r20-22 | 12xU8 | RGB0,RGB1,RGB2 | Color CRGB-code/color FIFO (3 stages) | // | cop2r23 | 4xU8 | (RES1) | Prohibited | // | cop2r24 | 1xS32 | MAC0 | 32bit Maths Accumulators (Value) | // | cop2r25-27 | 3xS32 | MAC1,MAC2,MAC3 | 32bit Maths Accumulators (Vector) | // | cop2r28-29 | 1xU15 | IRGB,ORGB | Convert RGB Color (48bit vs 15bit) | // | cop2r30-31 | 2xS32 | LZCS,LZCR | Count Leading-Zeroes/Ones (sign bits) | // // GTE Control Register Summary (cop2r32-63) // ctc2 (Copy To Control Coprocessor 2): Sets a Control Register (cnt0–31). // cfc2 (Copy From Control Coprocessor 2): Gets a Control Register (cnt0–31). // // | cop2r32-36 9xS16 RT11RT12,..,RT33 | Rotation matrix (3x3) | cnt0-4 | // | cop2r37-39 3x 32 TRX,TRY,TRZ | Translation vector (X,Y,Z) | cnt5-7 | // | cop2r40-44 9xS16 L11L12,..,L33 | Light source matrix (3x3) | cnt8-12 | // | cop2r45-47 3x 32 RBK,GBK,BBK | Background color (R,G,B) | cnt13-15 | // | cop2r48-52 9xS16 LR1LR2,..,LB3 | Light color matrix source (3x3) | cnt16-20 | // | cop2r53-55 3x 32 RFC,GFC,BFC | Far color (R,G,B) | cnt21-23 | // | cop2r56-57 2x 32 OFX,OFY | Screen offset (X,Y) | cnt24-25 | (1bit sign, 15bit integer, 16bit fraction) // | cop2r58 BuggyU16 H | Projection plane distance. | cnt26 | (0bit sign, 16bit integer, 0bit fraction) // | cop2r59 S16 DQA | Depth queing parameter A (coeff) | cnt27 | // | cop2r60 32 DQB | Depth queing parameter B (offset) | cnt28 | // | cop2r61-62 2xS16 ZSF3,ZSF4 | Average Z scale factors | cnt29-30 | // | cop2r63 U20 FLAG | Returns any calculation errors | cnt31 | namespace JabyEngine { namespace GTE { static constexpr auto StackSize = 16; /* matrix: first input Sets the 3x3 constant rotation matrix and the parallel transfer vector from input */ void set_matrix(const MATRIX& matrix); /* returns: current matrix Gets the current 3x3 constant rotation matrix and the parallel transfer vector */ MATRIX get_matrix(); /* RotTrans Perform coordinate transformation using a rotation matrix input: Input vector output: Output vector flag: flag output */ static void rot_trans(const SVECTOR& input, VECTOR& output, int32_t& flag) { ldv0(input); rt(); stlvnl(output); stflg(flag); } /* ScaleMatrix m: Pointer to matrix (input/output) v: Pointer to scale vector (input) result: m Scales m by v. The components of v are fixed point decimals in which 1.0 represents 4096 */ static ROTMATRIX& scale_matrix(ROTMATRIX& m, const VECTOR& v) { static const auto multiply_matrix_row = [](int32_t value, ROTMATRIX& matrix, size_t row) { ldir0(value); // lwc2 r8, v.x ldclmv(matrix, row); // load matrix row to r9 - r11 (mtc2) gpf12(); // gte_gpf12 stclmv(matrix, row); // store matrix row }; multiply_matrix_row(v.x, m, 0); multiply_matrix_row(v.y, m, 1); multiply_matrix_row(v.z, m, 2); return m; } /* SetRotMatrix Sets a 3x3 matrix m as a constant rotation matrix. matrix: The rotation matrix to set */ static void set_rot_matrix(const ROTMATRIX& matrix) { __asm__ volatile("lw $12, 0(%0)" :: "r"(&matrix) : "$12", "$13", "$14"); __asm__ volatile("lw $13, 4(%0)" :: "r"(&matrix) : "$12", "$13", "$14"); __asm__ volatile("ctc2 $12, $0" :: "r"(&matrix) : "$12", "$13", "$14"); __asm__ volatile("ctc2 $13, $1" :: "r"(&matrix) : "$12", "$13", "$14"); __asm__ volatile("lw $12, 8(%0)" :: "r"(&matrix) : "$12", "$13", "$14"); __asm__ volatile("lw $13, 12(%0)" :: "r"(&matrix) : "$12", "$13", "$14"); __asm__ volatile("lw $14, 16(%0)" :: "r"(&matrix) : "$12", "$13", "$14"); __asm__ volatile("ctc2 $12, $2" :: "r"(&matrix) : "$12", "$13", "$14"); __asm__ volatile("ctc2 $13, $3" :: "r"(&matrix) : "$12", "$13", "$14"); __asm__ volatile("ctc2 $14, $4" :: "r"(&matrix) : "$12", "$13", "$14"); } /* GetRotMatrix Writes the current 3x3 constant rotation matrix to matrix (This doesn't require us to use memory clobber) */ static void get_rot_matrix(ROTMATRIX &matrix) { __asm__ volatile("cfc2 $12, $0" :: "r"(&matrix) : "$12", "$13", "$14"); __asm__ volatile("cfc2 $13, $1" :: "r"(&matrix) : "$12", "$13", "$14"); __asm__ volatile("sw $12, 0(%0)" :: "r"(&matrix) : "$12", "$13", "$14"); __asm__ volatile("sw $13, 4(%0)" :: "r"(&matrix) : "$12", "$13", "$14"); __asm__ volatile("cfc2 $12, $2" :: "r"(&matrix) : "$12", "$13", "$14"); __asm__ volatile("cfc2 $13, $3" :: "r"(&matrix) : "$12", "$13", "$14"); __asm__ volatile("cfc2 $14, $4" :: "r"(&matrix) : "$12", "$13", "$14"); __asm__ volatile("sw $12, 8(%0)" :: "r"(&matrix) : "$12", "$13", "$14"); __asm__ volatile("sw $13, 12(%0)" :: "r"(&matrix) : "$12", "$13", "$14"); __asm__ volatile("sw $14, 16(%0)" :: "r"(&matrix) : "$12", "$13", "$14"); } /* SetTransMatrix Sets a constant parallel transfer vector specified by m */ static void set_trans_vector(const TRANSFERVECTOR& vector) { __asm__ volatile("lw $12, 0(%0)" :: "r"(&vector) : "$12", "$13", "$14"); __asm__ volatile("lw $13, 4(%0)" :: "r"(&vector) : "$12", "$13", "$14"); __asm__ volatile("ctc2 $12, $5" :: "r"(&vector) : "$12", "$13", "$14"); __asm__ volatile("lw $14, 8(%0)" :: "r"(&vector) : "$12", "$13", "$14"); __asm__ volatile("ctc2 $13, $6" :: "r"(&vector) : "$12", "$13", "$14"); __asm__ volatile("ctc2 $14, $7" :: "r"(&vector) : "$12", "$13", "$14"); } /* GetTransMatrix Writes the current constant parallel transfer vector to matrix (This doesn't require us to use memory clobber) */ static void get_trans_vector(TRANSFERVECTOR& vector) { __asm__ volatile("cfc2 $14, $7" :: "r"(&vector) : "$12", "$13", "$14"); __asm__ volatile("cfc2 $13, $6" :: "r"(&vector) : "$12", "$13", "$14"); __asm__ volatile("sw $14, 8(%0)" :: "r"(&vector) : "$12", "$13", "$14"); __asm__ volatile("cfc2 $12, $5" :: "r"(&vector) : "$12", "$13", "$14"); __asm__ volatile("sw $13, 4(%0)" :: "r"(&vector) : "$12", "$13", "$14"); __asm__ volatile("sw $12, 0(%0)" :: "r"(&vector) : "$12", "$13", "$14"); } /* ApplyMatrix m0: Matrix to apply v0: Vector to apply to v1: Result returns: result Applies the matrix to the vector The function destroys the constant rotation matrix and transfer vector */ static SVECTOR& apply_matrix(const MATRIX& m0, const SVECTOR& v0, SVECTOR& v1) { set_matrix(m0); JabyEngine::GTE::ldv0(v0); JabyEngine::GTE::rt(); JabyEngine::GTE::stsv(v1); return v1; } /* Same as apply_matrix but works on Vertex */ static GPU::Vertex& apply_matrix(const MATRIX& m0, const GPU::Vertex& v0, GPU::Vertex& v1) { set_matrix(m0); JabyEngine::GTE::ldgv0(v0); JabyEngine::GTE::rt(); JabyEngine::GTE::stgv(v1); return v1; } /* MulMatrix0 m0: first input m1: second input result: result of multiplication returns: result Multiplies two matrices m0 and m1. The function destroys the constant rotation matrix */ ROTMATRIX& multiply_matrix(const ROTMATRIX& m0, const ROTMATRIX& m1, ROTMATRIX& result); /* CompMatrix m0: first input m1: second input result: result of computing m0 and m1 return: returns result */ static MATRIX& comp_matrix(const MATRIX& m0, const MATRIX& m1, MATRIX& result) { multiply_matrix(m0.rotation, m1.rotation, result.rotation); set_trans_vector(m0.transfer); GTE::ldlv0(reinterpret_cast(m1.transfer)); GTE::rt(); GTE::stlvnl(reinterpret_cast(result.transfer)); return result; } /* matrix: optional input Pushes the current matrix (rotation and parallel) to an internal stack Optional: replaces current matrix (rotation and parallel) with input */ void push_matrix(); void push_matrix_and_set(const MATRIX& matrix); /* Restores the previous stored matrix (rotation and parallel) */ MATRIX get_and_pop_matrix(); void pop_matrix(); /* SetGeomOffset(ofx,ofy) Load GTE-offset. */ static void set_geom_offset(int32_t off_x, int32_t off_y) { __asm__ volatile("sll $12, %0, 16" :: "r"(off_x), "r"(off_y) : "$12", "$13"); __asm__ volatile("sll $13, %1, 16" :: "r"(off_x), "r"(off_y) : "$12", "$13"); __asm__ volatile("ctc2 $12, $24" :: "r"(off_x), "r"(off_y) : "$12", "$13"); __asm__ volatile("ctc2 $13, $25" :: "r"(off_x), "r"(off_y) : "$12", "$13"); } static void get_geom_offset(int32_t &off_x, int32_t &off_y) { int32_t raw_x, raw_y; __asm__ volatile ( "cfc2 %0, $24\n" "cfc2 %1, $25" : "=r" (raw_x), "=r" (raw_y) ); off_x = raw_x >> 16; off_y = raw_y >> 16; } /* SetGeomScreen(h) Load distance from viewpoint to screen. */ static void set_geom_screen(int32_t h) { __asm__ volatile("ctc2 %0, $26" :: "r"(h)); } /* GetGeomScreen() (???) Get distance from viewpoint to screen. */ static int32_t get_geom_screen() { int32_t h; __asm__ volatile("cfc2 %0, $26" : "=r"(h)); return h; } // Implementations for the MATRIX struct inline MATRIX& MATRIX :: comp(const MATRIX& matrix) { return comp_matrix(matrix, *this, *this); } inline GPU::Vertex& MATRIX :: apply_to(GPU::Vertex& vertex) const { return apply_matrix(*this, vertex, vertex); } inline GPU::Vertex MATRIX :: apply_to(const GPU::Vertex& vertex) const { GPU::Vertex result; apply_matrix(*this, vertex, result); return result; } } }