Files
jabyengine/include/PSX/GTE/gte.hpp
2026-04-28 22:12:50 +01:00

307 lines
12 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#pragma once
#include "gte_instruction.hpp"
// GTE Overview
// GTE Data Register Summary (cop2r0-31)
//
// `mtc2` (Move To Coprocessor 2): Sets a Data Register (031).
// `mfc2` (Move From Coprocessor 2): Gets a Data Register (031).
//
// | cop2r0-1 | 3xS16 | VXY0,VZ0 | Vector 0 (X,Y,Z) |
// | cop2r2-3 | 3xS16 | VXY1,VZ1 | Vector 1 (X,Y,Z) |
// | cop2r4-5 | 3xS16 | VXY2,VZ2 | Vector 2 (X,Y,Z) |
// | cop2r6 | 4xU8 | RGBC | Color/code value |
// | cop2r7 | 1xU16 | OTZ | Average Z value (for Ordering Table) |
// | cop2r8 | 1xS16 | IR0 | 16bit Accumulator (Interpolate) |
// | cop2r9-11 | 3xS16 | IR1,IR2,IR3 | 16bit Accumulator (Vector) |
// | cop2r12-15 | 6xS16 | SXY0,SXY1,SXY2,SXYP | Screen XY-coordinate FIFO (3 stages) |
// | cop2r16-19 | 4xU16 | SZ0,SZ1,SZ2,SZ3 | Screen Z-coordinate FIFO (4 stages) |
// | cop2r20-22 | 12xU8 | RGB0,RGB1,RGB2 | Color CRGB-code/color FIFO (3 stages) |
// | cop2r23 | 4xU8 | (RES1) | Prohibited |
// | cop2r24 | 1xS32 | MAC0 | 32bit Maths Accumulators (Value) |
// | cop2r25-27 | 3xS32 | MAC1,MAC2,MAC3 | 32bit Maths Accumulators (Vector) |
// | cop2r28-29 | 1xU15 | IRGB,ORGB | Convert RGB Color (48bit vs 15bit) |
// | cop2r30-31 | 2xS32 | LZCS,LZCR | Count Leading-Zeroes/Ones (sign bits) |
//
// GTE Control Register Summary (cop2r32-63)
// ctc2 (Copy To Control Coprocessor 2): Sets a Control Register (cnt031).
// cfc2 (Copy From Control Coprocessor 2): Gets a Control Register (cnt031).
//
// | cop2r32-36 9xS16 RT11RT12,..,RT33 | Rotation matrix (3x3) | cnt0-4 |
// | cop2r37-39 3x 32 TRX,TRY,TRZ | Translation vector (X,Y,Z) | cnt5-7 |
// | cop2r40-44 9xS16 L11L12,..,L33 | Light source matrix (3x3) | cnt8-12 |
// | cop2r45-47 3x 32 RBK,GBK,BBK | Background color (R,G,B) | cnt13-15 |
// | cop2r48-52 9xS16 LR1LR2,..,LB3 | Light color matrix source (3x3) | cnt16-20 |
// | cop2r53-55 3x 32 RFC,GFC,BFC | Far color (R,G,B) | cnt21-23 |
// | cop2r56-57 2x 32 OFX,OFY | Screen offset (X,Y) | cnt24-25 | (1bit sign, 15bit integer, 16bit fraction)
// | cop2r58 BuggyU16 H | Projection plane distance. | cnt26 | (0bit sign, 16bit integer, 0bit fraction)
// | cop2r59 S16 DQA | Depth queing parameter A (coeff) | cnt27 |
// | cop2r60 32 DQB | Depth queing parameter B (offset) | cnt28 |
// | cop2r61-62 2xS16 ZSF3,ZSF4 | Average Z scale factors | cnt29-30 |
// | cop2r63 U20 FLAG | Returns any calculation errors | cnt31 |
namespace JabyEngine {
namespace GTE {
static constexpr auto StackSize = 16;
/*
matrix: first input
Sets the 3x3 constant rotation matrix and the parallel transfer vector from input
*/
void set_matrix(const MATRIX& matrix);
/*
returns: current matrix
Gets the current 3x3 constant rotation matrix and the parallel transfer vector
*/
MATRIX get_matrix();
/*
RotTrans
Perform coordinate transformation using a rotation matrix
input: Input vector
output: Output vector
flag: flag output
*/
static void rot_trans(const SVECTOR& input, VECTOR& output, int32_t& flag) {
ldv0(input);
rt();
stlvnl(output);
stflg(flag);
}
/*
ScaleMatrix
m: Pointer to matrix (input/output)
v: Pointer to scale vector (input)
result: m
Scales m by v. The components of v are fixed point decimals in which 1.0 represents 4096
*/
static ROTMATRIX& scale_matrix(ROTMATRIX& m, const VECTOR& v) {
static const auto multiply_matrix_row = [](int32_t value, ROTMATRIX& matrix, size_t row) {
ldir0(value); // lwc2 r8, v.x
ldclmv(matrix, row); // load matrix row to r9 - r11 (mtc2)
gpf12(); // gte_gpf12
stclmv(matrix, row); // store matrix row
};
multiply_matrix_row(v.x, m, 0);
multiply_matrix_row(v.y, m, 1);
multiply_matrix_row(v.z, m, 2);
return m;
}
/*
SetRotMatrix
Sets a 3x3 matrix m as a constant rotation matrix.
matrix: The rotation matrix to set
*/
static void set_rot_matrix(const ROTMATRIX& matrix) {
__asm__ volatile("lw $12, 0(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
__asm__ volatile("lw $13, 4(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
__asm__ volatile("ctc2 $12, $0" :: "r"(&matrix) : "$12", "$13", "$14");
__asm__ volatile("ctc2 $13, $1" :: "r"(&matrix) : "$12", "$13", "$14");
__asm__ volatile("lw $12, 8(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
__asm__ volatile("lw $13, 12(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
__asm__ volatile("lw $14, 16(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
__asm__ volatile("ctc2 $12, $2" :: "r"(&matrix) : "$12", "$13", "$14");
__asm__ volatile("ctc2 $13, $3" :: "r"(&matrix) : "$12", "$13", "$14");
__asm__ volatile("ctc2 $14, $4" :: "r"(&matrix) : "$12", "$13", "$14");
}
/*
GetRotMatrix
Writes the current 3x3 constant rotation matrix to matrix
(This doesn't require us to use memory clobber)
*/
static void get_rot_matrix(ROTMATRIX &matrix) {
__asm__ volatile("cfc2 $12, $0" :: "r"(&matrix) : "$12", "$13", "$14");
__asm__ volatile("cfc2 $13, $1" :: "r"(&matrix) : "$12", "$13", "$14");
__asm__ volatile("sw $12, 0(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
__asm__ volatile("sw $13, 4(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
__asm__ volatile("cfc2 $12, $2" :: "r"(&matrix) : "$12", "$13", "$14");
__asm__ volatile("cfc2 $13, $3" :: "r"(&matrix) : "$12", "$13", "$14");
__asm__ volatile("cfc2 $14, $4" :: "r"(&matrix) : "$12", "$13", "$14");
__asm__ volatile("sw $12, 8(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
__asm__ volatile("sw $13, 12(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
__asm__ volatile("sw $14, 16(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
}
/*
SetTransMatrix
Sets a constant parallel transfer vector specified by m
*/
static void set_trans_vector(const TRANSFERVECTOR& vector) {
__asm__ volatile("lw $12, 0(%0)" :: "r"(&vector) : "$12", "$13", "$14");
__asm__ volatile("lw $13, 4(%0)" :: "r"(&vector) : "$12", "$13", "$14");
__asm__ volatile("ctc2 $12, $5" :: "r"(&vector) : "$12", "$13", "$14");
__asm__ volatile("lw $14, 8(%0)" :: "r"(&vector) : "$12", "$13", "$14");
__asm__ volatile("ctc2 $13, $6" :: "r"(&vector) : "$12", "$13", "$14");
__asm__ volatile("ctc2 $14, $7" :: "r"(&vector) : "$12", "$13", "$14");
}
/*
GetTransMatrix
Writes the current constant parallel transfer vector to matrix
(This doesn't require us to use memory clobber)
*/
static void get_trans_vector(TRANSFERVECTOR& vector) {
__asm__ volatile("cfc2 $14, $7" :: "r"(&vector) : "$12", "$13", "$14");
__asm__ volatile("cfc2 $13, $6" :: "r"(&vector) : "$12", "$13", "$14");
__asm__ volatile("sw $14, 8(%0)" :: "r"(&vector) : "$12", "$13", "$14");
__asm__ volatile("cfc2 $12, $5" :: "r"(&vector) : "$12", "$13", "$14");
__asm__ volatile("sw $13, 4(%0)" :: "r"(&vector) : "$12", "$13", "$14");
__asm__ volatile("sw $12, 0(%0)" :: "r"(&vector) : "$12", "$13", "$14");
}
/*
ApplyMatrix
m0: Matrix to apply
v0: Vector to apply to
v1: Result
returns: result
Applies the matrix to the vector
The function destroys the constant rotation matrix and transfer vector
*/
static SVECTOR& apply_matrix(const MATRIX& m0, const SVECTOR& v0, SVECTOR& v1) {
set_matrix(m0);
JabyEngine::GTE::ldv0(v0);
JabyEngine::GTE::rt();
JabyEngine::GTE::stsv(v1);
return v1;
}
/*
Same as apply_matrix but works on Vertex
*/
static GPU::Vertex& apply_matrix(const MATRIX& m0, const GPU::Vertex& v0, GPU::Vertex& v1) {
set_matrix(m0);
JabyEngine::GTE::ldgv0(v0);
JabyEngine::GTE::rt();
JabyEngine::GTE::stgv(v1);
return v1;
}
/*
MulMatrix0
m0: first input
m1: second input
result: result of multiplication
returns: result
Multiplies two matrices m0 and m1.
The function destroys the constant rotation matrix
*/
ROTMATRIX& multiply_matrix(const ROTMATRIX& m0, const ROTMATRIX& m1, ROTMATRIX& result);
/*
CompMatrix
m0: first input
m1: second input
result: result of computing m0 and m1
return: returns result
*/
static MATRIX& comp_matrix(const MATRIX& m0, const MATRIX& m1, MATRIX& result) {
multiply_matrix(m0.rotation, m1.rotation, result.rotation);
set_trans_vector(m0.transfer);
GTE::ldlv0(reinterpret_cast<const VECTOR&>(m1.transfer));
GTE::rt();
GTE::stlvnl(reinterpret_cast<VECTOR&>(result.transfer));
return result;
}
/*
matrix: optional input
Pushes the current matrix (rotation and parallel) to an internal stack
Optional: replaces current matrix (rotation and parallel) with input
*/
void push_matrix();
void push_matrix_and_set(const MATRIX& matrix);
/*
Restores the previous stored matrix (rotation and parallel)
*/
MATRIX get_and_pop_matrix();
void pop_matrix();
/*
SetGeomOffset(ofx,ofy)
Load GTE-offset.
*/
static void set_geom_offset(int32_t off_x, int32_t off_y) {
__asm__ volatile("sll $12, %0, 16" :: "r"(off_x), "r"(off_y) : "$12", "$13");
__asm__ volatile("sll $13, %1, 16" :: "r"(off_x), "r"(off_y) : "$12", "$13");
__asm__ volatile("ctc2 $12, $24" :: "r"(off_x), "r"(off_y) : "$12", "$13");
__asm__ volatile("ctc2 $13, $25" :: "r"(off_x), "r"(off_y) : "$12", "$13");
}
static void get_geom_offset(int32_t &off_x, int32_t &off_y) {
int32_t raw_x, raw_y;
__asm__ volatile (
"cfc2 %0, $24\n"
"cfc2 %1, $25"
: "=r" (raw_x), "=r" (raw_y)
);
off_x = raw_x >> 16;
off_y = raw_y >> 16;
}
/*
SetGeomScreen(h)
Load distance from viewpoint to screen.
*/
static void set_geom_screen(int32_t h) {
__asm__ volatile("ctc2 %0, $26" :: "r"(h));
}
/*
GetGeomScreen() (???)
Get distance from viewpoint to screen.
*/
static int32_t get_geom_screen() {
int32_t h;
__asm__ volatile("cfc2 %0, $26" : "=r"(h));
return h;
}
// Implementations for the MATRIX struct
inline MATRIX& MATRIX :: comp(const MATRIX& matrix) {
return comp_matrix(matrix, *this, *this);
}
inline GPU::Vertex& MATRIX :: apply_to(GPU::Vertex& vertex) const {
return apply_matrix(*this, vertex, vertex);
}
inline GPU::Vertex MATRIX :: apply_to(const GPU::Vertex& vertex) const {
GPU::Vertex result;
apply_matrix(*this, vertex, result);
return result;
}
}
}