307 lines
12 KiB
C++
307 lines
12 KiB
C++
#pragma once
|
||
#include "gte_instruction.hpp"
|
||
|
||
// GTE Overview
|
||
// GTE Data Register Summary (cop2r0-31)
|
||
//
|
||
// `mtc2` (Move To Coprocessor 2): Sets a Data Register (0–31).
|
||
// `mfc2` (Move From Coprocessor 2): Gets a Data Register (0–31).
|
||
//
|
||
// | cop2r0-1 | 3xS16 | VXY0,VZ0 | Vector 0 (X,Y,Z) |
|
||
// | cop2r2-3 | 3xS16 | VXY1,VZ1 | Vector 1 (X,Y,Z) |
|
||
// | cop2r4-5 | 3xS16 | VXY2,VZ2 | Vector 2 (X,Y,Z) |
|
||
// | cop2r6 | 4xU8 | RGBC | Color/code value |
|
||
// | cop2r7 | 1xU16 | OTZ | Average Z value (for Ordering Table) |
|
||
// | cop2r8 | 1xS16 | IR0 | 16bit Accumulator (Interpolate) |
|
||
// | cop2r9-11 | 3xS16 | IR1,IR2,IR3 | 16bit Accumulator (Vector) |
|
||
// | cop2r12-15 | 6xS16 | SXY0,SXY1,SXY2,SXYP | Screen XY-coordinate FIFO (3 stages) |
|
||
// | cop2r16-19 | 4xU16 | SZ0,SZ1,SZ2,SZ3 | Screen Z-coordinate FIFO (4 stages) |
|
||
// | cop2r20-22 | 12xU8 | RGB0,RGB1,RGB2 | Color CRGB-code/color FIFO (3 stages) |
|
||
// | cop2r23 | 4xU8 | (RES1) | Prohibited |
|
||
// | cop2r24 | 1xS32 | MAC0 | 32bit Maths Accumulators (Value) |
|
||
// | cop2r25-27 | 3xS32 | MAC1,MAC2,MAC3 | 32bit Maths Accumulators (Vector) |
|
||
// | cop2r28-29 | 1xU15 | IRGB,ORGB | Convert RGB Color (48bit vs 15bit) |
|
||
// | cop2r30-31 | 2xS32 | LZCS,LZCR | Count Leading-Zeroes/Ones (sign bits) |
|
||
//
|
||
// GTE Control Register Summary (cop2r32-63)
|
||
// ctc2 (Copy To Control Coprocessor 2): Sets a Control Register (cnt0–31).
|
||
// cfc2 (Copy From Control Coprocessor 2): Gets a Control Register (cnt0–31).
|
||
//
|
||
// | cop2r32-36 9xS16 RT11RT12,..,RT33 | Rotation matrix (3x3) | cnt0-4 |
|
||
// | cop2r37-39 3x 32 TRX,TRY,TRZ | Translation vector (X,Y,Z) | cnt5-7 |
|
||
// | cop2r40-44 9xS16 L11L12,..,L33 | Light source matrix (3x3) | cnt8-12 |
|
||
// | cop2r45-47 3x 32 RBK,GBK,BBK | Background color (R,G,B) | cnt13-15 |
|
||
// | cop2r48-52 9xS16 LR1LR2,..,LB3 | Light color matrix source (3x3) | cnt16-20 |
|
||
// | cop2r53-55 3x 32 RFC,GFC,BFC | Far color (R,G,B) | cnt21-23 |
|
||
// | cop2r56-57 2x 32 OFX,OFY | Screen offset (X,Y) | cnt24-25 | (1bit sign, 15bit integer, 16bit fraction)
|
||
// | cop2r58 BuggyU16 H | Projection plane distance. | cnt26 | (0bit sign, 16bit integer, 0bit fraction)
|
||
// | cop2r59 S16 DQA | Depth queing parameter A (coeff) | cnt27 |
|
||
// | cop2r60 32 DQB | Depth queing parameter B (offset) | cnt28 |
|
||
// | cop2r61-62 2xS16 ZSF3,ZSF4 | Average Z scale factors | cnt29-30 |
|
||
// | cop2r63 U20 FLAG | Returns any calculation errors | cnt31 |
|
||
|
||
|
||
|
||
namespace JabyEngine {
|
||
namespace GTE {
|
||
static constexpr auto StackSize = 16;
|
||
|
||
/*
|
||
matrix: first input
|
||
|
||
Sets the 3x3 constant rotation matrix and the parallel transfer vector from input
|
||
*/
|
||
void set_matrix(const MATRIX& matrix);
|
||
|
||
/*
|
||
returns: current matrix
|
||
|
||
Gets the current 3x3 constant rotation matrix and the parallel transfer vector
|
||
*/
|
||
MATRIX get_matrix();
|
||
|
||
/*
|
||
RotTrans
|
||
|
||
Perform coordinate transformation using a rotation matrix
|
||
input: Input vector
|
||
output: Output vector
|
||
flag: flag output
|
||
*/
|
||
static void rot_trans(const SVECTOR& input, VECTOR& output, int32_t& flag) {
|
||
ldv0(input);
|
||
rt();
|
||
stlvnl(output);
|
||
stflg(flag);
|
||
}
|
||
|
||
/*
|
||
ScaleMatrix
|
||
|
||
m: Pointer to matrix (input/output)
|
||
v: Pointer to scale vector (input)
|
||
|
||
result: m
|
||
Scales m by v. The components of v are fixed point decimals in which 1.0 represents 4096
|
||
*/
|
||
static ROTMATRIX& scale_matrix(ROTMATRIX& m, const VECTOR& v) {
|
||
static const auto multiply_matrix_row = [](int32_t value, ROTMATRIX& matrix, size_t row) {
|
||
ldir0(value); // lwc2 r8, v.x
|
||
ldclmv(matrix, row); // load matrix row to r9 - r11 (mtc2)
|
||
gpf12(); // gte_gpf12
|
||
stclmv(matrix, row); // store matrix row
|
||
};
|
||
|
||
multiply_matrix_row(v.x, m, 0);
|
||
multiply_matrix_row(v.y, m, 1);
|
||
multiply_matrix_row(v.z, m, 2);
|
||
return m;
|
||
}
|
||
|
||
/*
|
||
SetRotMatrix
|
||
|
||
Sets a 3x3 matrix m as a constant rotation matrix.
|
||
matrix: The rotation matrix to set
|
||
*/
|
||
static void set_rot_matrix(const ROTMATRIX& matrix) {
|
||
__asm__ volatile("lw $12, 0(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
|
||
__asm__ volatile("lw $13, 4(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
|
||
__asm__ volatile("ctc2 $12, $0" :: "r"(&matrix) : "$12", "$13", "$14");
|
||
__asm__ volatile("ctc2 $13, $1" :: "r"(&matrix) : "$12", "$13", "$14");
|
||
__asm__ volatile("lw $12, 8(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
|
||
__asm__ volatile("lw $13, 12(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
|
||
__asm__ volatile("lw $14, 16(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
|
||
__asm__ volatile("ctc2 $12, $2" :: "r"(&matrix) : "$12", "$13", "$14");
|
||
__asm__ volatile("ctc2 $13, $3" :: "r"(&matrix) : "$12", "$13", "$14");
|
||
__asm__ volatile("ctc2 $14, $4" :: "r"(&matrix) : "$12", "$13", "$14");
|
||
}
|
||
|
||
/*
|
||
GetRotMatrix
|
||
|
||
Writes the current 3x3 constant rotation matrix to matrix
|
||
(This doesn't require us to use memory clobber)
|
||
*/
|
||
static void get_rot_matrix(ROTMATRIX &matrix) {
|
||
__asm__ volatile("cfc2 $12, $0" :: "r"(&matrix) : "$12", "$13", "$14");
|
||
__asm__ volatile("cfc2 $13, $1" :: "r"(&matrix) : "$12", "$13", "$14");
|
||
__asm__ volatile("sw $12, 0(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
|
||
__asm__ volatile("sw $13, 4(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
|
||
__asm__ volatile("cfc2 $12, $2" :: "r"(&matrix) : "$12", "$13", "$14");
|
||
__asm__ volatile("cfc2 $13, $3" :: "r"(&matrix) : "$12", "$13", "$14");
|
||
__asm__ volatile("cfc2 $14, $4" :: "r"(&matrix) : "$12", "$13", "$14");
|
||
__asm__ volatile("sw $12, 8(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
|
||
__asm__ volatile("sw $13, 12(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
|
||
__asm__ volatile("sw $14, 16(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
|
||
}
|
||
|
||
/*
|
||
SetTransMatrix
|
||
|
||
Sets a constant parallel transfer vector specified by m
|
||
*/
|
||
static void set_trans_vector(const TRANSFERVECTOR& vector) {
|
||
__asm__ volatile("lw $12, 0(%0)" :: "r"(&vector) : "$12", "$13", "$14");
|
||
__asm__ volatile("lw $13, 4(%0)" :: "r"(&vector) : "$12", "$13", "$14");
|
||
__asm__ volatile("ctc2 $12, $5" :: "r"(&vector) : "$12", "$13", "$14");
|
||
__asm__ volatile("lw $14, 8(%0)" :: "r"(&vector) : "$12", "$13", "$14");
|
||
__asm__ volatile("ctc2 $13, $6" :: "r"(&vector) : "$12", "$13", "$14");
|
||
__asm__ volatile("ctc2 $14, $7" :: "r"(&vector) : "$12", "$13", "$14");
|
||
}
|
||
|
||
/*
|
||
GetTransMatrix
|
||
|
||
Writes the current constant parallel transfer vector to matrix
|
||
(This doesn't require us to use memory clobber)
|
||
*/
|
||
static void get_trans_vector(TRANSFERVECTOR& vector) {
|
||
__asm__ volatile("cfc2 $14, $7" :: "r"(&vector) : "$12", "$13", "$14");
|
||
__asm__ volatile("cfc2 $13, $6" :: "r"(&vector) : "$12", "$13", "$14");
|
||
__asm__ volatile("sw $14, 8(%0)" :: "r"(&vector) : "$12", "$13", "$14");
|
||
__asm__ volatile("cfc2 $12, $5" :: "r"(&vector) : "$12", "$13", "$14");
|
||
__asm__ volatile("sw $13, 4(%0)" :: "r"(&vector) : "$12", "$13", "$14");
|
||
__asm__ volatile("sw $12, 0(%0)" :: "r"(&vector) : "$12", "$13", "$14");
|
||
}
|
||
|
||
/*
|
||
ApplyMatrix
|
||
m0: Matrix to apply
|
||
v0: Vector to apply to
|
||
v1: Result
|
||
returns: result
|
||
|
||
Applies the matrix to the vector
|
||
The function destroys the constant rotation matrix and transfer vector
|
||
*/
|
||
static SVECTOR& apply_matrix(const MATRIX& m0, const SVECTOR& v0, SVECTOR& v1) {
|
||
set_matrix(m0);
|
||
|
||
JabyEngine::GTE::ldv0(v0);
|
||
JabyEngine::GTE::rt();
|
||
JabyEngine::GTE::stsv(v1);
|
||
return v1;
|
||
}
|
||
|
||
/*
|
||
Same as apply_matrix but works on Vertex
|
||
*/
|
||
static GPU::Vertex& apply_matrix(const MATRIX& m0, const GPU::Vertex& v0, GPU::Vertex& v1) {
|
||
set_matrix(m0);
|
||
|
||
JabyEngine::GTE::ldgv0(v0);
|
||
JabyEngine::GTE::rt();
|
||
JabyEngine::GTE::stgv(v1);
|
||
return v1;
|
||
}
|
||
|
||
/*
|
||
MulMatrix0
|
||
|
||
m0: first input
|
||
m1: second input
|
||
result: result of multiplication
|
||
returns: result
|
||
|
||
Multiplies two matrices m0 and m1.
|
||
The function destroys the constant rotation matrix
|
||
*/
|
||
ROTMATRIX& multiply_matrix(const ROTMATRIX& m0, const ROTMATRIX& m1, ROTMATRIX& result);
|
||
|
||
/*
|
||
CompMatrix
|
||
|
||
m0: first input
|
||
m1: second input
|
||
result: result of computing m0 and m1
|
||
return: returns result
|
||
*/
|
||
static MATRIX& comp_matrix(const MATRIX& m0, const MATRIX& m1, MATRIX& result) {
|
||
multiply_matrix(m0.rotation, m1.rotation, result.rotation);
|
||
set_trans_vector(m0.transfer);
|
||
GTE::ldlv0(reinterpret_cast<const VECTOR&>(m1.transfer));
|
||
GTE::rt();
|
||
GTE::stlvnl(reinterpret_cast<VECTOR&>(result.transfer));
|
||
|
||
return result;
|
||
}
|
||
|
||
/*
|
||
matrix: optional input
|
||
|
||
Pushes the current matrix (rotation and parallel) to an internal stack
|
||
Optional: replaces current matrix (rotation and parallel) with input
|
||
*/
|
||
void push_matrix();
|
||
void push_matrix_and_set(const MATRIX& matrix);
|
||
|
||
/*
|
||
Restores the previous stored matrix (rotation and parallel)
|
||
*/
|
||
MATRIX get_and_pop_matrix();
|
||
void pop_matrix();
|
||
|
||
/*
|
||
SetGeomOffset(ofx,ofy)
|
||
|
||
Load GTE-offset.
|
||
*/
|
||
static void set_geom_offset(int32_t off_x, int32_t off_y) {
|
||
__asm__ volatile("sll $12, %0, 16" :: "r"(off_x), "r"(off_y) : "$12", "$13");
|
||
__asm__ volatile("sll $13, %1, 16" :: "r"(off_x), "r"(off_y) : "$12", "$13");
|
||
__asm__ volatile("ctc2 $12, $24" :: "r"(off_x), "r"(off_y) : "$12", "$13");
|
||
__asm__ volatile("ctc2 $13, $25" :: "r"(off_x), "r"(off_y) : "$12", "$13");
|
||
}
|
||
|
||
static void get_geom_offset(int32_t &off_x, int32_t &off_y) {
|
||
int32_t raw_x, raw_y;
|
||
|
||
__asm__ volatile (
|
||
"cfc2 %0, $24\n"
|
||
"cfc2 %1, $25"
|
||
: "=r" (raw_x), "=r" (raw_y)
|
||
);
|
||
|
||
off_x = raw_x >> 16;
|
||
off_y = raw_y >> 16;
|
||
}
|
||
|
||
/*
|
||
SetGeomScreen(h)
|
||
|
||
Load distance from viewpoint to screen.
|
||
*/
|
||
static void set_geom_screen(int32_t h) {
|
||
__asm__ volatile("ctc2 %0, $26" :: "r"(h));
|
||
}
|
||
|
||
/*
|
||
GetGeomScreen() (???)
|
||
|
||
Get distance from viewpoint to screen.
|
||
*/
|
||
static int32_t get_geom_screen() {
|
||
int32_t h;
|
||
|
||
__asm__ volatile("cfc2 %0, $26" : "=r"(h));
|
||
return h;
|
||
}
|
||
|
||
// Implementations for the MATRIX struct
|
||
inline MATRIX& MATRIX :: comp(const MATRIX& matrix) {
|
||
return comp_matrix(matrix, *this, *this);
|
||
}
|
||
|
||
inline GPU::Vertex& MATRIX :: apply_to(GPU::Vertex& vertex) const {
|
||
return apply_matrix(*this, vertex, vertex);
|
||
}
|
||
|
||
inline GPU::Vertex MATRIX :: apply_to(const GPU::Vertex& vertex) const {
|
||
GPU::Vertex result;
|
||
|
||
apply_matrix(*this, vertex, result);
|
||
return result;
|
||
}
|
||
}
|
||
} |