From 801d70e22e317e2e48423d0c44e4f08490e89680 Mon Sep 17 00:00:00 2001
From: spicyjpeg <thatspicyjpeg@gmail.com>
Date: Sat, 8 Mar 2025 01:10:42 +0100
Subject: [PATCH] Disable unimplemented formats, add missing const qualifiers

---
 README.md           |  40 ++++++++--------
 libpsxav/adpcm.c    |  53 ++++++++++++++++----
 libpsxav/libpsxav.h |   8 ++--
 psxavenc/args.c     |   4 +-
 psxavenc/filefmt.c  | 114 +++++++++++++++++++++++++++++++++++++++++++-
 psxavenc/main.c     |   4 ++
 psxavenc/mdec.c     |  15 +++---
 psxavenc/mdec.h     |   4 +-
 8 files changed, 196 insertions(+), 46 deletions(-)

diff --git a/README.md b/README.md
index 12f3218..007bd35 100644
--- a/README.md
+++ b/README.md
@@ -49,19 +49,18 @@ $ psxavenc -t vagi -f 44100 -c 2 -L -i 2048 in.wav out.vag
 
 The output format must be set using the `-t` option.
 
-| Format   | Audio codec          | Audio channels | Video codec   | Sector size |
-| :------- | :------------------- | :------------- | :------------ | :---------- |
-| `xa`     | XA-ADPCM             | 1 or 2         |               | 2336 bytes  |
-| `xacd`   | XA-ADPCM             | 1 or 2         |               | 2352 bytes  |
-| `spu`    | SPU-ADPCM            | 1              |               |             |
-| `vag`    | SPU-ADPCM            | 1              |               |             |
-| `spui`   | SPU-ADPCM            | Any            |               |             |
-| `vagi`   | SPU-ADPCM            | Any            |               |             |
-| `str`    | XA-ADPCM (optional)  | 1 or 2         | BS v2/v3/v3dc | 2336 bytes  |
-| `strcd`  | XA-ADPCM (optional)  | 1 or 2         | BS v2/v3/v3dc | 2352 bytes  |
-| `strspu` | SPU-ADPCM (optional) | Any            | BS v2/v3/v3dc | 2048 bytes  |
-| `strv`   |                      |                | BS v2/v3/v3dc | 2048 bytes  |
-| `sbs`    |                      |                | BS v2/v3/v3dc |             |
+| Format  | Audio codec          | Audio channels | Video codec   | Sector size |
+| :------ | :------------------- | :------------- | :------------ | :---------- |
+| `xa`    | XA-ADPCM             | 1 or 2         |               | 2336 bytes  |
+| `xacd`  | XA-ADPCM             | 1 or 2         |               | 2352 bytes  |
+| `spu`   | SPU-ADPCM            | 1              |               |             |
+| `vag`   | SPU-ADPCM            | 1              |               |             |
+| `spui`  | SPU-ADPCM            | Any            |               |             |
+| `vagi`  | SPU-ADPCM            | Any            |               |             |
+| `str`   | XA-ADPCM (optional)  | 1 or 2         | BS v2/v3/v3dc | 2336 bytes  |
+| `strcd` | XA-ADPCM (optional)  | 1 or 2         | BS v2/v3/v3dc | 2352 bytes  |
+| `strv`  |                      |                | BS v2/v3/v3dc | 2048 bytes  |
+| `sbs`   |                      |                | BS v2/v3/v3dc |             |
 
 Notes:
 
@@ -81,11 +80,12 @@ Notes:
   specified using the `-a` option (2048 bytes by default). Note that `vagi`
   files with more than 2 channels and/or alignment other than 2048 bytes are not
   standardized.
-- The `strspu` format encodes the input file's audio track as a series of custom
-  .str chunks (type ID `0x0001` by default) holding interleaved SPU-ADPCM data
-  in the same format as `spui`, rather than XA-ADPCM. As .str chunks do not
-  require custom XA subheaders, a file with standard 2048-byte sectors that does
-  not need any special handling will be generated.
+- ~~The `strspu` format encodes the input file's audio track as a series of~~
+  ~~custom .str chunks (type ID `0x0001` by default) holding interleaved~~
+  ~~SPU-ADPCM data in the same format as `spui`, rather than XA-ADPCM. As .str~~
+  ~~chunks do not require custom XA subheaders, a file with standard 2048-byte~~
+  ~~sectors that does not need any special handling will be generated.~~ *This*
+  *format has not yet been implemented.*
 - The `strv` format disables audio altogether and is equivalent to `strspu` on
   an input file with no audio track.
 - The `sbs` format (used in some System 573 games) consists of a series of
@@ -95,8 +95,8 @@ Notes:
 
 ## Supported video codecs
 
-All formats with a video track (`str`, `strcd`, `strspu`, `strv` and `sbs`) can
-use any of the codecs listed below. The codec can be set using the `-v` option.
+All formats with a video track (`str`, `strcd`, `strv` and `sbs`) can use any of
+the codecs listed below. The codec can be set using the `-v` option.
 
 | Codec          | Supported by          | Typ. decoder CPU usage |
 | :------------- | :-------------------- | :--------------------- |
diff --git a/libpsxav/adpcm.c b/libpsxav/adpcm.c
index 2d5dbc5..80e3413 100644
--- a/libpsxav/adpcm.c
+++ b/libpsxav/adpcm.c
@@ -36,7 +36,14 @@ freely, subject to the following restrictions:
 static const int16_t filter_k1[ADPCM_FILTER_COUNT] = {0, 60, 115, 98, 122};
 static const int16_t filter_k2[ADPCM_FILTER_COUNT] = {0, 0, -52, -55, -60};
 
-static int find_min_shift(const psx_audio_encoder_channel_state_t *state, int16_t *samples, int sample_limit, int pitch, int filter, int shift_range) {
+static int find_min_shift(
+	const psx_audio_encoder_channel_state_t *state,
+	const int16_t *samples,
+	int sample_limit,
+	int pitch,
+	int filter,
+	int shift_range
+) {
 	// Assumption made:
 	//
 	// There is value in shifting right one step further to allow the nibbles to clip.
@@ -71,7 +78,19 @@ static int find_min_shift(const psx_audio_encoder_channel_state_t *state, int16_
 	return min_shift;
 }
 
-static uint8_t attempt_to_encode(psx_audio_encoder_channel_state_t *outstate, const psx_audio_encoder_channel_state_t *instate, int16_t *samples, int sample_limit, int pitch, uint8_t *data, int data_shift, int data_pitch, int filter, int sample_shift, int shift_range) {
+static uint8_t attempt_to_encode(
+	psx_audio_encoder_channel_state_t *outstate,
+	const psx_audio_encoder_channel_state_t *instate,
+	const int16_t *samples,
+	int sample_limit,
+	int pitch,
+	uint8_t *data,
+	int data_shift,
+	int data_pitch,
+	int filter,
+	int sample_shift,
+	int shift_range
+) {
 	uint8_t sample_mask = 0xFFFF >> shift_range;
 	uint8_t nondata_mask = ~(sample_mask << data_shift);
 
@@ -120,8 +139,18 @@ static uint8_t attempt_to_encode(psx_audio_encoder_channel_state_t *outstate, co
 	return hdr;
 }
 
-static uint8_t encode(psx_audio_encoder_channel_state_t *state, int16_t *samples, int sample_limit, int pitch, uint8_t *data, int data_shift, int data_pitch, int filter_count, int shift_range) {
-    psx_audio_encoder_channel_state_t proposed;
+static uint8_t encode(
+	psx_audio_encoder_channel_state_t *state,
+	const int16_t *samples,
+	int sample_limit,
+	int pitch,
+	uint8_t *data,
+	int data_shift,
+	int data_pitch,
+	int filter_count,
+	int shift_range
+) {
+	psx_audio_encoder_channel_state_t proposed;
 	int64_t best_mse = ((int64_t)1<<(int64_t)50);
 	int best_filter = 0;
 	int best_sample_shift = 0;
@@ -161,7 +190,13 @@ static uint8_t encode(psx_audio_encoder_channel_state_t *state, int16_t *samples
 		best_filter, best_sample_shift, shift_range);
 }
 
-static void encode_block_xa(int16_t *audio_samples, int audio_samples_limit, uint8_t *data, psx_audio_xa_settings_t settings, psx_audio_encoder_state_t *state) {
+static void encode_block_xa(
+	const int16_t *audio_samples,
+	int audio_samples_limit,
+	uint8_t *data,
+	psx_audio_xa_settings_t settings,
+	psx_audio_encoder_state_t *state
+) {
 	if (settings.bits_per_sample == 4) {
 		if (settings.stereo) {
 			data[0]  = encode(&(state->left),  audio_samples,            audio_samples_limit,        2, data + 0x10, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
@@ -258,7 +293,7 @@ static void psx_audio_xa_encode_init_sector(psx_cdrom_sector_mode2_t *buffer, in
 int psx_audio_xa_encode(
 	psx_audio_xa_settings_t settings,
 	psx_audio_encoder_state_t *state,
-	int16_t* samples,
+	const int16_t *samples,
 	int sample_count,
 	int lba,
 	uint8_t *output
@@ -306,7 +341,7 @@ void psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *out
 
 int psx_audio_xa_encode_simple(
 	psx_audio_xa_settings_t settings,
-	int16_t* samples,
+	const int16_t *samples,
 	int sample_count,
 	int lba,
 	uint8_t *output
@@ -320,7 +355,7 @@ int psx_audio_xa_encode_simple(
 
 int psx_audio_spu_encode(
 	psx_audio_encoder_channel_state_t *state,
-	int16_t* samples,
+	const int16_t *samples,
 	int sample_count,
 	int pitch,
 	uint8_t *output
@@ -340,7 +375,7 @@ int psx_audio_spu_encode(
 	return buffer - output;
 }
 
-int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *output, int loop_start) {
+int psx_audio_spu_encode_simple(const int16_t *samples, int sample_count, uint8_t *output, int loop_start) {
 	psx_audio_encoder_channel_state_t state;
 	memset(&state, 0, sizeof(psx_audio_encoder_channel_state_t));
 	int length = psx_audio_spu_encode(&state, samples, sample_count, 1, output);
diff --git a/libpsxav/libpsxav.h b/libpsxav/libpsxav.h
index 0d9d171..67733dd 100644
--- a/libpsxav/libpsxav.h
+++ b/libpsxav/libpsxav.h
@@ -75,26 +75,26 @@ uint32_t psx_audio_xa_get_sector_interleave(psx_audio_xa_settings_t settings);
 int psx_audio_xa_encode(
 	psx_audio_xa_settings_t settings,
 	psx_audio_encoder_state_t *state,
-	int16_t* samples,
+	const int16_t *samples,
 	int sample_count,
 	int lba,
 	uint8_t *output
 );
 int psx_audio_xa_encode_simple(
 	psx_audio_xa_settings_t settings,
-	int16_t* samples,
+	const int16_t *samples,
 	int sample_count,
 	int lba,
 	uint8_t *output
 );
 int psx_audio_spu_encode(
 	psx_audio_encoder_channel_state_t *state,
-	int16_t* samples,
+	const int16_t *samples,
 	int sample_count,
 	int pitch,
 	uint8_t *output
 );
-int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *output, int loop_start);
+int psx_audio_spu_encode_simple(const int16_t *samples, int sample_count, uint8_t *output, int loop_start);
 void psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length);
 
 // cdrom.c
diff --git a/psxavenc/args.c b/psxavenc/args.c
index fb74a1f..93c3ef0 100644
--- a/psxavenc/args.c
+++ b/psxavenc/args.c
@@ -125,7 +125,7 @@ static const char *const general_options_help =
 	"                        vagi:   [A.] .vag SPU-ADPCM interleaved\n"
 	"                        str:    [AV] .str video + XA-ADPCM, 2336-byte sectors\n"
 	"                        strcd:  [AV] .str video + XA-ADPCM, 2352-byte sectors\n"
-	"                        strspu: [AV] .str video + SPU-ADPCM, 2048-byte sectors\n"
+	//"                        strspu: [AV] .str video + SPU-ADPCM, 2048-byte sectors\n"
 	"                        strv:   [.V] .str video, 2048-byte sectors\n"
 	"                        sbs:    [.V] .sbs video\n"
 	"    -R key=value,...  Pass custom options to libswresample (see FFmpeg docs)\n"
@@ -498,7 +498,7 @@ static const char *const general_usage =
 	"    psxavenc -t spu|vag   [spu-options]                             <in> <out.vag>\n"
 	"    psxavenc -t spui|vagi [spui-options]                            <in> <out.vag>\n"
 	"    psxavenc -t str|strcd [xa-options]   [bs-options] [str-options] <in> <out.str>\n"
-	"    psxavenc -t strspu    [spui-options] [bs-options] [str-options] <in> <out.str>\n"
+	//"    psxavenc -t strspu    [spui-options] [bs-options] [str-options] <in> <out.str>\n"
 	"    psxavenc -t strv                     [bs-options] [str-options] <in> <out.str>\n"
 	"    psxavenc -t sbs                      [bs-options] [sbs-options] <in> <out.sbs>\n"
 	"\n";
diff --git a/psxavenc/filefmt.c b/psxavenc/filefmt.c
index 1150364..cb446df 100644
--- a/psxavenc/filefmt.c
+++ b/psxavenc/filefmt.c
@@ -22,6 +22,7 @@ freely, subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
+#include <assert.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
@@ -432,7 +433,9 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) {
 		uint8_t sector[PSX_CDROM_SECTOR_SIZE];
 		bool is_video_sector;
 
-		if (args->flags & FLAG_STR_TRAILING_AUDIO)
+		if (audio_samples_per_sector == 0)
+			is_video_sector = true;
+		else if (args->flags & FLAG_STR_TRAILING_AUDIO)
 			is_video_sector = (sector_count % interleave) < video_sectors_per_block;
 		else
 			is_video_sector = (sector_count % interleave) > 0;
@@ -497,7 +500,114 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) {
 }
 
 void encode_file_strspu(const args_t *args, decoder_t *decoder, FILE *output) {
-	// TODO: implement
+	int interleave;
+	int audio_samples_per_sector;
+	int video_sectors_per_block;
+
+	if (decoder->state.audio_stream != NULL) {
+		assert(false); // TODO: implement
+
+		if (!(args->flags & FLAG_QUIET))
+			fprintf(
+				stderr,
+				"Interleave: %d/%d audio, %d/%d video\n",
+				interleave - video_sectors_per_block,
+				interleave,
+				video_sectors_per_block,
+				interleave
+			);
+	} else {
+		// 0/1 audio, 1/1 video
+		interleave = 1;
+		audio_samples_per_sector = 0;
+		video_sectors_per_block = 1;
+	}
+
+	mdec_encoder_t encoder;
+	init_mdec_encoder(&encoder, args->video_codec, args->video_width, args->video_height);
+
+	// e.g. 15fps = (150*7/8/15) = 8.75 blocks per frame
+	encoder.state.frame_block_base_overflow = (75 * args->str_cd_speed) * video_sectors_per_block * args->str_fps_den;
+	encoder.state.frame_block_overflow_den = interleave * args->str_fps_num;
+	double frame_size = (double)encoder.state.frame_block_base_overflow / (double)encoder.state.frame_block_overflow_den;
+
+	if (!(args->flags & FLAG_QUIET))
+		fprintf(stderr, "Frame size: %.2f sectors\n", frame_size);
+
+	encoder.state.frame_output = malloc(2016 * (int)ceil(frame_size));
+	encoder.state.frame_index = 0;
+	encoder.state.frame_data_offset = 0;
+	encoder.state.frame_max_size = 0;
+	encoder.state.frame_block_overflow_num = 0;
+	encoder.state.quant_scale_sum = 0;
+
+	// FIXME: this needs an extra frame to prevent A/V desync
+	int frames_needed = (int) ceil((double)video_sectors_per_block / frame_size);
+
+	if (frames_needed < 2)
+		frames_needed = 2;
+
+	int sector_count = 0;
+
+	for (; !decoder->end_of_input || encoder.state.frame_data_offset < encoder.state.frame_max_size; sector_count++) {
+		ensure_av_data(decoder, audio_samples_per_sector * args->audio_channels, frames_needed);
+
+		uint8_t sector[2048];
+		bool is_video_sector;
+
+		if (audio_samples_per_sector == 0)
+			is_video_sector = true;
+		else if (args->flags & FLAG_STR_TRAILING_AUDIO)
+			is_video_sector = (sector_count % interleave) < video_sectors_per_block;
+		else
+			is_video_sector = (sector_count % interleave) > 0;
+
+		if (is_video_sector) {
+			init_sector_buffer_video(args, sector, sector_count);
+
+			int frames_used = encode_sector_str(
+				&encoder,
+				args->format,
+				args->str_video_id,
+				decoder->video_frames,
+				sector
+			);
+
+			retire_av_data(decoder, 0, frames_used);
+		} else {
+			int samples_length = decoder->audio_sample_count / args->audio_channels;
+
+			if (samples_length > audio_samples_per_sector)
+				samples_length = audio_samples_per_sector;
+
+			// FIXME: this is an extremely hacky way to handle audio tracks
+			// shorter than the video track
+			if (!samples_length)
+				video_sectors_per_block++;
+
+			assert(false); // TODO: implement
+
+			retire_av_data(decoder, samples_length * args->audio_channels, 0);
+		}
+
+		fwrite(sector, 2048, 1, output);
+
+		time_t t = get_elapsed_time();
+
+		if (!(args->flags & FLAG_HIDE_PROGRESS) && t) {
+			fprintf(
+				stderr,
+				"\rFrame: %4d | LBA: %6d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx",
+				encoder.state.frame_index,
+				sector_count,
+				(double)encoder.state.quant_scale_sum / (double)encoder.state.frame_index,
+				(double)(encoder.state.frame_index * args->str_fps_den) / (double)(t * args->str_fps_num)
+			);
+		}
+	}
+
+	free(encoder.state.frame_output);
+	destroy_mdec_encoder(&encoder);
 }
 
 void encode_file_sbs(const args_t *args, decoder_t *decoder, FILE *output) {
diff --git a/psxavenc/main.c b/psxavenc/main.c
index 0f5e225..9e584c2 100644
--- a/psxavenc/main.c
+++ b/psxavenc/main.c
@@ -146,6 +146,10 @@ int main(int argc, const char **argv) {
 			break;
 
 		case FORMAT_STRSPU:
+			// TODO: implement and remove this check
+			fprintf(stderr, "This format is not currently supported\n");
+			break;
+
 		case FORMAT_STRV:
 			if (!(args.flags & FLAG_QUIET)) {
 				if (decoder.state.audio_stream)
diff --git a/psxavenc/mdec.c b/psxavenc/mdec.c
index 602e4cc..ba3d043 100644
--- a/psxavenc/mdec.c
+++ b/psxavenc/mdec.c
@@ -577,7 +577,7 @@ void destroy_mdec_encoder(mdec_encoder_t *encoder) {
 	}
 }
 
-void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame) {
+void encode_frame_bs(mdec_encoder_t *encoder, const uint8_t *video_frame) {
 	mdec_encoder_state_t *state = &(encoder->state);
 
 	assert(state->dct_context);
@@ -758,15 +758,12 @@ int encode_sector_str(
 	mdec_encoder_t *encoder,
 	format_t format,
 	uint16_t str_video_id,
-	uint8_t *video_frames,
+	const uint8_t *video_frames,
 	uint8_t *output
 ) {
 	mdec_encoder_state_t *state = &(encoder->state);
-	int last_frame_index = state->frame_index;
 	int frame_size = encoder->video_width * encoder->video_height * 2;
-
-	uint8_t header[32];
-	memset(header, 0, sizeof(header));
+	int frames_used = 0;
 
 	while (state->frame_data_offset >= state->frame_max_size) {
 		state->frame_index++;
@@ -779,8 +776,12 @@ int encode_sector_str(
 
 		encode_frame_bs(encoder, video_frames);
 		video_frames += frame_size;
+		frames_used++;
 	}
 
+	uint8_t header[32];
+	memset(header, 0, sizeof(header));
+
 	// STR version
 	header[0x000] = 0x60;
 	header[0x001] = 0x01;
@@ -831,5 +832,5 @@ int encode_sector_str(
 	memcpy(output + offset + 0x020, state->frame_output + state->frame_data_offset, 2016);
 
 	state->frame_data_offset += 2016;
-	return state->frame_index - last_frame_index;
+	return frames_used;
 }
diff --git a/psxavenc/mdec.h b/psxavenc/mdec.h
index 4b8e026..ed94f2e 100644
--- a/psxavenc/mdec.h
+++ b/psxavenc/mdec.h
@@ -64,11 +64,11 @@ typedef struct {
 
 bool init_mdec_encoder(mdec_encoder_t *encoder, bs_codec_t video_codec, int video_width, int video_height);
 void destroy_mdec_encoder(mdec_encoder_t *encoder);
-void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame);
+void encode_frame_bs(mdec_encoder_t *encoder, const uint8_t *video_frame);
 int encode_sector_str(
 	mdec_encoder_t *encoder,
 	format_t format,
 	uint16_t str_video_id,
-	uint8_t *video_frames,
+	const uint8_t *video_frames,
 	uint8_t *output
 );