jabyengine/include/PSX/GTE/gte.hpp

#pragma once
#include "gte_instruction.hpp"

namespace JabyEngine {
    namespace GTE {
        static constexpr auto StackSize = 16;

        /*
            matrix: first input

            Sets the 3x3 constant rotation matrix and the parallel transfer vector from input
        */
        void set_matrix(const MATRIX& matrix);

        /*
            returns: current matrix

            Gets the current 3x3 constant rotation matrix and the parallel transfer vector
        */
        MATRIX get_matrix();

        /*
            RotTrans

            Perform coordinate transformation using a rotation matrix
            input:  Input vector
            output: Output vector
            flag:   flag output
        */
        static void rot_trans(const SVECTOR& input, VECTOR& output, int32_t& flag) {
            ldv0(input);
            rt();
            stlvnl(output);
            stflg(flag);
        }

        /*
            ScaleMatrix

            m: Pointer to matrix (input/output)
            v: Pointer to scale vector (input)

            result: m
            Scales m by v. The components of v are fixed point decimals in which 1.0 represents 4096
        */
        static ROTMATRIX& scale_matrix(ROTMATRIX& m, const VECTOR& v) {
            static const auto multiply_matrix_row = [](int32_t value, ROTMATRIX& matrix, size_t row) {
                ldir0(value);           // lwc2	r8,	v.x
                ldclmv(matrix, row);    // load matrix row to r9 - r11 (mtc2)
                gpf12();                // gte_gpf12
                stclmv(matrix, row);    // store matrix row
            };

            multiply_matrix_row(v.x, m, 0);
            multiply_matrix_row(v.y, m, 1);
            multiply_matrix_row(v.z, m, 2);
            return m;
        }

        /*
            SetRotMatrix

            Sets a 3x3 matrix m as a constant rotation matrix.
            matrix: The rotation matrix to set
        */
        static void set_rot_matrix(const ROTMATRIX& matrix) {
            __asm__ volatile("lw   $12, 0(%0)"  :: "r"(&matrix) : "$12", "$13", "$14");
            __asm__ volatile("lw   $13, 4(%0)"  :: "r"(&matrix) : "$12", "$13", "$14");
            __asm__ volatile("ctc2 $12, $0"     :: "r"(&matrix) : "$12", "$13", "$14");
            __asm__ volatile("ctc2 $13, $1"     :: "r"(&matrix) : "$12", "$13", "$14");
            __asm__ volatile("lw   $12, 8(%0)"  :: "r"(&matrix) : "$12", "$13", "$14");
            __asm__ volatile("lw   $13, 12(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
            __asm__ volatile("lw   $14, 16(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
            __asm__ volatile("ctc2 $12, $2"     :: "r"(&matrix) : "$12", "$13", "$14");
            __asm__ volatile("ctc2 $13, $3"     :: "r"(&matrix) : "$12", "$13", "$14");
            __asm__ volatile("ctc2 $14, $4"     :: "r"(&matrix) : "$12", "$13", "$14");
        }

        /*
            GetRotMatrix

            Writes the current 3x3 constant rotation matrix to matrix
            (This doesn't require us to use memory clobber)
        */
        static void get_rot_matrix(ROTMATRIX &matrix) {
            __asm__ volatile("cfc2 $12, $0"     :: "r"(&matrix) : "$12", "$13", "$14");
            __asm__ volatile("cfc2 $13, $1"     :: "r"(&matrix) : "$12", "$13", "$14");
            __asm__ volatile("sw   $12, 0(%0)"  :: "r"(&matrix) : "$12", "$13", "$14");
            __asm__ volatile("sw   $13, 4(%0)"  :: "r"(&matrix) : "$12", "$13", "$14");
            __asm__ volatile("cfc2 $12, $2"     :: "r"(&matrix) : "$12", "$13", "$14");
            __asm__ volatile("cfc2 $13, $3"     :: "r"(&matrix) : "$12", "$13", "$14");
            __asm__ volatile("cfc2 $14, $4"     :: "r"(&matrix) : "$12", "$13", "$14");
            __asm__ volatile("sw   $12, 8(%0)"  :: "r"(&matrix) : "$12", "$13", "$14");
            __asm__ volatile("sw   $13, 12(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
            __asm__ volatile("sw   $14, 16(%0)" :: "r"(&matrix) : "$12", "$13", "$14");
        }

        /*
            SetTransMatrix

            Sets a constant parallel transfer vector specified by m
        */
        static void set_trans_vector(const TRANSFERVECTOR& vector)  {
            __asm__ volatile("lw   $12, 0(%0)" :: "r"(&vector) : "$12", "$13", "$14");
            __asm__ volatile("lw   $13, 4(%0)" :: "r"(&vector) : "$12", "$13", "$14");
            __asm__ volatile("ctc2 $12, $5"    :: "r"(&vector) : "$12", "$13", "$14");
            __asm__ volatile("lw   $14, 8(%0)" :: "r"(&vector) : "$12", "$13", "$14");
            __asm__ volatile("ctc2 $13, $6"    :: "r"(&vector) : "$12", "$13", "$14");
            __asm__ volatile("ctc2 $14, $7"    :: "r"(&vector) : "$12", "$13", "$14");
        }

        /*
            GetTransMatrix

            Writes the current constant parallel transfer vector to matrix
            (This doesn't require us to use memory clobber)
        */
        static void get_trans_vector(TRANSFERVECTOR& vector) {
            __asm__ volatile("cfc2 $14, $7"    :: "r"(&vector) : "$12", "$13", "$14");
            __asm__ volatile("cfc2 $13, $6"    :: "r"(&vector) : "$12", "$13", "$14");
            __asm__ volatile("sw   $14, 8(%0)" :: "r"(&vector) : "$12", "$13", "$14");
            __asm__ volatile("cfc2 $12, $5"    :: "r"(&vector) : "$12", "$13", "$14");
            __asm__ volatile("sw   $13, 4(%0)" :: "r"(&vector) : "$12", "$13", "$14");
            __asm__ volatile("sw   $12, 0(%0)" :: "r"(&vector) : "$12", "$13", "$14");
        }

        /*
            ApplyMatrix
            m0: Matrix to apply
            v0: Vector to apply to
            v1: Result
            returns: result

            Applies the matrix to the vector
            The function destroys the constant rotation matrix and transfer vector
        */
        static SVECTOR& apply_matrix(const MATRIX& m0, const SVECTOR& v0, SVECTOR& v1) {
            set_matrix(m0);

            JabyEngine::GTE::ldv0(v0);
            JabyEngine::GTE::rt();
            JabyEngine::GTE::stsv(v1);
            return v1;
        }

        /*
            Same as apply_matrix but works on Vertex
        */
        static GPU::Vertex& apply_matrix(const MATRIX& m0, const GPU::Vertex& v0, GPU::Vertex& v1) {
            set_matrix(m0);

            JabyEngine::GTE::ldgv0(v0);
            JabyEngine::GTE::rt();
            JabyEngine::GTE::stgv(v1);
            return v1;
        }

        /*
            MulMatrix0

            m0: first input
            m1: second input
            result: result of multiplication
            returns: result

            Multiplies two matrices m0 and m1.
            The function destroys the constant rotation matrix
        */
        ROTMATRIX& multiply_matrix(const ROTMATRIX& m0, const ROTMATRIX& m1, ROTMATRIX& result);

        /*
            CompMatrix

            m0: first input
            m1: second input
            result: result of computing m0 and m1
            return: returns result
        */
        static MATRIX& comp_matrix(const MATRIX& m0, const MATRIX& m1, MATRIX& result) {
            multiply_matrix(m0.rotation, m1.rotation, result.rotation);
            set_trans_vector(m0.transfer);
            GTE::ldlv0(reinterpret_cast<const VECTOR&>(m1.transfer));
            GTE::rt();
            GTE::stlvnl(reinterpret_cast<VECTOR&>(result.transfer));

			return result;
        }

        /*
            matrix: optional input

            Pushes the current matrix (rotation and parallel) to an internal stack
            Optional: replaces current matrix (rotation and parallel) with input
        */
        void push_matrix();
        void push_matrix_and_set(const MATRIX& matrix);

        /*
            Restores the previous stored matrix (rotation and parallel)
        */
        MATRIX get_and_pop_matrix();
        void   pop_matrix();

        /*
            SetGeomOffset(ofx,ofy)

            Load GTE-offset.
        */
        static void set_geom_offset(int32_t off_x, int32_t off_y) {
            __asm__ volatile("sll  $12, %0, 16" :: "r"(off_x), "r"(off_y) : "$12", "$13");
            __asm__ volatile("sll  $13, %1, 16" :: "r"(off_x), "r"(off_y) : "$12", "$13");
            __asm__ volatile("ctc2 $12, $24"    :: "r"(off_x), "r"(off_y) : "$12", "$13");
            __asm__ volatile("ctc2 $13, $25"    :: "r"(off_x), "r"(off_y) : "$12", "$13");
        }

        /*
            SetGeomScreen(h)

            Load distance from viewpoint to screen.
        */
        static void set_geom_screen(int32_t h) {
            __asm__ volatile("ctc2 %0, $26" :: "r"(h));
        }

        // Implementations for the MATRIX struct
        inline MATRIX& MATRIX :: comp(const MATRIX& matrix) {
            return comp_matrix(matrix, *this, *this);
        }

        inline GPU::Vertex& MATRIX :: apply_to(GPU::Vertex& vertex) const {
            return apply_matrix(*this, vertex, vertex);
        }

        inline GPU::Vertex MATRIX :: apply_to(const GPU::Vertex& vertex) const {
            GPU::Vertex result;

            apply_matrix(*this, vertex, result);
            return result;
        }
    }
}