From 982fad256ef10832f3dfc893b753ea189a05f2c3 Mon Sep 17 00:00:00 2001
From: spicyjpeg <thatspicyjpeg@gmail.com>
Date: Tue, 25 Feb 2025 18:54:53 +0100
Subject: [PATCH 1/8] Add .editorconfig, .gitignore and FFmpeg deprecation note

---
 .editorconfig       |  9 +++++++++
 .gitignore          |  6 ++++++
 meson.build         | 26 +++++++++++++-------------
 psxavenc/decoding.c |  1 +
 4 files changed, 29 insertions(+), 13 deletions(-)
 create mode 100644 .editorconfig
 create mode 100644 .gitignore

diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..4dd2432
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,9 @@
+root = true
+
+[*]
+indent_style             = tab
+indent_size              = 4
+charset                  = utf-8
+end_of_line              = lf
+trim_trailing_whitespace = true
+insert_final_newline     = true
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..5feaf27
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+desktop.ini
+.DS_Store
+.vscode/
+build/
+.cache/
+*.code-workspace
diff --git a/meson.build b/meson.build
index 99b6249..c70759c 100644
--- a/meson.build
+++ b/meson.build
@@ -9,24 +9,24 @@ configure_file(output: 'config.h', configuration: conf_data)
 libm_dep = meson.get_compiler('c').find_library('m')
 
 ffmpeg = [
-  dependency('libavformat'),
-  dependency('libavcodec'),
-  dependency('libavutil'),
-  dependency('libswresample'),
-  dependency('libswscale')
+	dependency('libavformat'),
+	dependency('libavcodec'),
+	dependency('libavutil'),
+	dependency('libswresample'),
+	dependency('libswscale')
 ]
 
 libpsxav = static_library('psxav', [
-  'libpsxav/adpcm.c',
-  'libpsxav/cdrom.c',
-  'libpsxav/libpsxav.h'
+	'libpsxav/adpcm.c',
+	'libpsxav/cdrom.c',
+	'libpsxav/libpsxav.h'
 ])
 libpsxav_dep = declare_dependency(include_directories: include_directories('libpsxav'), link_with: libpsxav)
 
 executable('psxavenc', [
-  'psxavenc/cdrom.c',
-  'psxavenc/decoding.c',
-  'psxavenc/filefmt.c',
-  'psxavenc/mdec.c',
-  'psxavenc/psxavenc.c'
+	'psxavenc/cdrom.c',
+	'psxavenc/decoding.c',
+	'psxavenc/filefmt.c',
+	'psxavenc/mdec.c',
+	'psxavenc/psxavenc.c'
 ], dependencies: [libm_dep, ffmpeg, libpsxav_dep], install: true)
diff --git a/psxavenc/decoding.c b/psxavenc/decoding.c
index 99895ab..54a9124 100644
--- a/psxavenc/decoding.c
+++ b/psxavenc/decoding.c
@@ -399,6 +399,7 @@ void close_av_data(settings_t *settings)
 
 	av_frame_free(&(av->frame));
 	swr_free(&(av->resampler));
+	// Deprecated, kept for compatibility with older FFmpeg versions.
 	avcodec_close(av->audio_codec_context);
 	avcodec_free_context(&(av->audio_codec_context));
 	avformat_free_context(av->format);

From 7b5953322f789e3dea303f39858a51b40dba2057 Mon Sep 17 00:00:00 2001
From: spicyjpeg <thatspicyjpeg@gmail.com>
Date: Fri, 28 Feb 2025 01:26:41 +0100
Subject: [PATCH 2/8] Add new argument parser

---
 meson.build         |   1 +
 psxavenc/args.c     | 711 ++++++++++++++++++++++++++++++++++++++++++++
 psxavenc/args.h     |  93 ++++++
 psxavenc/psxavenc.c |   4 +-
 4 files changed, 807 insertions(+), 2 deletions(-)
 create mode 100644 psxavenc/args.c
 create mode 100644 psxavenc/args.h

diff --git a/meson.build b/meson.build
index c70759c..abd8a35 100644
--- a/meson.build
+++ b/meson.build
@@ -24,6 +24,7 @@ libpsxav = static_library('psxav', [
 libpsxav_dep = declare_dependency(include_directories: include_directories('libpsxav'), link_with: libpsxav)
 
 executable('psxavenc', [
+	'psxavenc/args.c',
 	'psxavenc/cdrom.c',
 	'psxavenc/decoding.c',
 	'psxavenc/filefmt.c',
diff --git a/psxavenc/args.c b/psxavenc/args.c
new file mode 100644
index 0000000..03d0695
--- /dev/null
+++ b/psxavenc/args.c
@@ -0,0 +1,711 @@
+/*
+psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
+
+Copyright (c) 2019, 2020 Adrian "asie" Siekierka
+Copyright (c) 2019 Ben "GreaseMonkey" Russell
+Copyright (c) 2023, 2025 spicyjpeg
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+   claim that you wrote the original software. If you use this software
+   in a product, an acknowledgment in the product documentation would be
+   appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "args.h"
+
+#define INVALID_PARAM -1
+
+static int parse_int(
+	int *output,
+	const char *name,
+	const char *value,
+	int min_value,
+	int max_value
+) {
+	if (value == NULL) {
+		fprintf(stderr, "Missing %s value after option\n", name);
+		return INVALID_PARAM;
+	}
+
+	*output = strtol(value, NULL, 0);
+
+	if (
+		(*output < min_value) ||
+		(max_value >= 0 && *output > max_value)
+	) {
+		if (max_value >= 0)
+			fprintf(stderr, "Invalid %s: %d (must be in %d-%d range)\n", name, *output, min_value, max_value);
+		else
+			fprintf(stderr, "Invalid %s: %d (must be %d or greater)\n", name, *output, min_value);
+		return INVALID_PARAM;
+	}
+
+	return 2;
+}
+
+static int parse_int_one_of(
+	int *output,
+	const char *name,
+	const char *value,
+	int value_a,
+	int value_b
+) {
+	if (value == NULL) {
+		fprintf(stderr, "Missing %s value after option\n", name);
+		return INVALID_PARAM;
+	}
+
+	*output = strtol(value, NULL, 0);
+
+	if (*output != value_a && *output != value_b) {
+		fprintf(stderr, "Invalid %s: %d (must be %d or %d)\n", name, *output, value_a, value_b);
+		return INVALID_PARAM;
+	}
+
+	return 2;
+}
+
+static int parse_enum(
+	int *output,
+	const char *name,
+	const char *value,
+	const char *const *choices,
+	int count
+) {
+	if (value == NULL) {
+		fprintf(stderr, "Missing %s value after option\n", name);
+		return INVALID_PARAM;
+	}
+	for (int i = 0; i < count; i++) {
+		if (strcmp(value, choices[i]) == 0) {
+			*output = i;
+			return 2;
+		}
+	}
+
+	fprintf(
+		stderr,
+		"Invalid %s: %s\n"
+		"Must be one of the following values:\n",
+		name,
+		value
+	);
+	for (int i = 0; i < count; i++)
+		fprintf(stderr, "    %s\n", choices[i]);
+	return INVALID_PARAM;
+}
+
+static const char *const general_options_help =
+	"General options:\n"
+	"    -h                Show this help message and exit\n"
+	"    -V                Show version information and exit\n"
+	"    -q                Suppress all non-error messages\n"
+	"    -t format         Use (or show help for) specified output format\n"
+	"                        xa:     [A.] XA-ADPCM, 2336-byte sectors\n"
+	"                        xacd:   [A.] XA-ADPCM, 2352-byte sectors\n"
+	"                        spu:    [A.] raw SPU-ADPCM mono data\n"
+	"                        spui:   [A.] raw SPU-ADPCM interleaved data\n"
+	"                        vag:    [A.] .vag SPU-ADPCM mono\n"
+	"                        vagi:   [A.] .vag SPU-ADPCM interleaved\n"
+	"                        str:    [AV] .str video, 2336-byte sectors\n"
+	"                        strcd:  [AV] .str video, 2352-byte sectors\n"
+	"                        strspu: [AV] .str video, 2048-byte sectors\n"
+	"                        strv:   [.V] .str video, 2048-byte sectors\n"
+	"                        sbs:    [.V] .sbs video\n"
+	"    -R key=value,...  Pass custom options to libswresample (see FFmpeg docs)\n"
+	"    -S key=value,...  Pass custom options to libswscale (see FFmpeg docs)\n"
+	"\n";
+
+static const char *const format_names[NUM_FORMATS] = {
+	"xa",
+	"xacd",
+	"spu",
+	"vag",
+	"spui",
+	"vagi",
+	"str",
+	"strcd",
+	"strspu",
+	"strv",
+	"sbs"
+};
+
+static void init_default_args(args_t *args) {
+	args->flags = 0;
+
+	args->input_file = NULL;
+	args->output_file = NULL;
+	args->swresample_options = NULL;
+	args->swscale_options = NULL;
+
+	if (
+		args->format == FORMAT_XA || args->format == FORMAT_XACD ||
+		args->format == FORMAT_STR || args->format == FORMAT_STRCD
+	)
+		args->audio_frequency = 37800;
+	else
+		args->audio_frequency = 44100;
+	if (args->format == FORMAT_SPU || args->format == FORMAT_VAG)
+		args->audio_channels = 1;
+	else
+		args->audio_channels = 2;
+
+	args->audio_bit_depth = 4;
+	args->audio_xa_file = 0;
+	args->audio_xa_channel = 0;
+	args->audio_interleave = 2048;
+	args->audio_loop_point = -1;
+
+	args->video_codec = BS_CODEC_V2;
+	args->video_width = 320;
+	args->video_height = 240;
+
+	args->str_fps_num = 15;
+	args->str_fps_den = 1;
+	args->str_cd_speed = 2;
+
+	if (args->format == FORMAT_SPU || args->format == FORMAT_VAG)
+		args->alignment = 64;
+	else if (args->format == FORMAT_SBS)
+		args->alignment = 8192;
+	else
+		args->alignment = 2048;
+}
+
+static int parse_general_option(args_t *args, char option, const char *param) {
+	int parsed;
+
+	switch (option) {
+		case '-':
+			args->flags |= FLAG_IGNORE_OPTIONS;
+			return 1;
+
+		case 'h':
+			args->flags |= FLAG_PRINT_HELP;
+			return 1;
+
+		case 'V':
+			args->flags |= FLAG_PRINT_VERSION;
+			return 1;
+
+		case 'q':
+			args->flags |= FLAG_QUIET | FLAG_HIDE_PROGRESS;
+			return 1;
+
+		case 't':
+			parsed = parse_enum(&(args->format), "format", param, format_names, NUM_FORMATS);
+			if (parsed > 0)
+				init_default_args(args);
+			return parsed;
+
+		case 'R':
+			if (param == NULL) {
+				fprintf(stderr, "Missing libswresample parameter list after option\n");
+				return INVALID_PARAM;
+			}
+
+			args->swresample_options = param;
+			return 2;
+
+		case 'S':
+			if (param == NULL) {
+				fprintf(stderr, "Missing libswscale parameter list after option\n");
+				return INVALID_PARAM;
+			}
+
+			args->swscale_options = param;
+			return 2;
+
+		default:
+			return 0;
+	}
+}
+
+static const char *const xa_options_help =
+	"XA-ADPCM options:\n"
+	"    [-f 18900|37800] [-c 1|2] [-b 4|8] [-F 0-255] [-C 0-31]\n"
+	"\n"
+	"    -f 18900|37800    Use specified sample rate (default 37800)\n"
+	"    -c 1|2            Use specified channel count (default 2)\n"
+	"    -b 4|8            Use specified bit depth (default 4)\n"
+	"    -F 0-255          Set CD-XA file number (for both audio and video, default 0)\n"
+	"    -C 0-31           Set CD-XA channel number (for both audio and video, default 0)\n"
+	"\n";
+
+static int parse_xa_option(args_t *args, char option, const char *param) {
+	switch (option) {
+		case 'f':
+			return parse_int_one_of(&(args->audio_frequency), "sample rate", param, 18900, 37800);
+
+		case 'c':
+			return parse_int_one_of(&(args->audio_channels), "channel count", param, 1, 2);
+
+		case 'b':
+			return parse_int_one_of(&(args->audio_bit_depth), "bit depth", param, 4, 8);
+
+		case 'F':
+			return parse_int(&(args->audio_xa_file), "file number", param, 0, 255);
+
+		case 'C':
+			return parse_int(&(args->audio_xa_channel), "channel number", param, 0, 31);
+
+		default:
+			return 0;
+	}
+}
+
+static const char *const spu_options_help =
+	"SPU-ADPCM options:\n"
+	"    [-f freq] [-a size] [-l ms | -L] [-D]\n"
+	"\n"
+	"    -f freq           Use specified sample rate (default 44100)\n"
+	"    -a size           Pad audio data excluding header to multiple of given size (default 64)\n"
+	"    -l ms             Add loop point at specified offset (in milliseconds)\n"
+	"    -L                Set loop end flag at the end of data but do not add a loop point\n"
+	"    -D                Do not prepend encoded data with a dummy silent block\n"
+	"\n";
+
+static int parse_spu_option(args_t *args, char option, const char *param) {
+	switch (option) {
+		case 'f':
+			return parse_int(&(args->audio_frequency), "sample rate", param, 1, -1);
+
+		case 'a':
+			return parse_int(&(args->alignment), "alignment", param, 1, -1);
+
+		case 'l':
+			args->flags |= FLAG_SPU_LOOP_END;
+			return parse_int(&(args->audio_loop_point), "loop offset", param, 0, -1);
+
+		case 'L':
+			args->flags |= FLAG_SPU_LOOP_END;
+			return 1;
+
+		case 'D':
+			args->flags |= FLAG_SPU_NO_LEADING_DUMMY;
+			return 1;
+
+		default:
+			return 0;
+	}
+}
+
+static const char *const spui_options_help =
+	"Interleaved SPU-ADPCM options:\n"
+	"    [-f freq] [-c channels] [-i size] [-a size] [-L] [-D]\n"
+	"\n"
+	"    -f freq           Use specified sample rate (default 44100)\n"
+	"    -c channels       Use specified channel count (default 2)\n"
+	"    -i size           Use specified channel interleave size (default 2048)\n"
+	"    -a size           Pad .vag header and each audio chunk to multiples of given size\n"
+	"                      (default 2048)\n"
+	"    -L                Set loop end flag at the end of each audio chunk\n"
+	"    -D                Do not prepend first chunk's data with a dummy silent block\n"
+	"\n";
+
+static int parse_spui_option(args_t *args, char option, const char *param) {
+	int parsed;
+
+	switch (option) {
+		case 'f':
+			return parse_int(&(args->audio_frequency), "sample rate", param, 1, -1);
+
+		case 'c':
+			return parse_int(&(args->audio_channels), "channel count", param, 1, -1);
+
+		case 'i':
+			parsed = parse_int(&(args->audio_interleave), "interleave", param, 16, -1);
+
+			// Round up to nearest multiple of 16
+			args->audio_interleave = (args->audio_interleave + 15) & ~15;
+			return parsed;
+
+		case 'a':
+			return parse_int(&(args->alignment), "alignment", param, 1, -1);
+
+		case 'L':
+			args->flags |= FLAG_SPU_LOOP_END;
+			return 1;
+
+		case 'D':
+			args->flags |= FLAG_SPU_NO_LEADING_DUMMY;
+			return 1;
+
+		default:
+			return 0;
+	}
+}
+
+static const char *const bs_options_help =
+	"Video options:\n"
+	"    [-v v2|v3|v3dc] [-s WxH] [-I]\n"
+	"\n"
+	"    -v codec          Use specified video codec\n"
+	"                        v2:   MDEC BS v2 (default)\n"
+	"                        v3:   MDEC BS v3\n"
+	"                        v3dc: MDEC BS v3, expect decoder to wrap DC coefficients\n"
+	"    -s WxH            Rescale input file to fit within specified size\n"
+	"                      (16x16-640x512 in 16-pixel increments, default 320x240)\n"
+	"    -I                Force stretching to given size without preserving aspect ratio\n"
+	"\n";
+
+const char *const bs_codec_names[NUM_BS_CODECS] = {
+	"v2",
+	"v3",
+	"v3dc"
+};
+
+static int parse_bs_option(args_t *args, char option, const char *param) {
+	char *next = NULL;
+
+	switch (option) {
+		case 'v':
+			return parse_enum(&(args->video_codec), "video codec", param, bs_codec_names, NUM_BS_CODECS);
+
+		case 's':
+			if (param == NULL) {
+				fprintf(stderr, "Missing video size after option\n");
+				return INVALID_PARAM;
+			}
+
+			args->video_width = strtol(param, &next, 10);
+
+			if (next && *next == 'x') {
+				args->video_height = strtol(next + 1, NULL, 10);
+			} else {
+				fprintf(stderr, "Invalid video size (must be specified as <width>x<height>)\n");
+				return INVALID_PARAM;
+			}
+
+			if (args->video_width < 16 || args->video_width > 640) {
+				fprintf(stderr, "Invalid video width: %d (must be in 16-640 range)\n", args->video_width);
+				return INVALID_PARAM;
+			}
+			if (args->video_height < 16 || args->video_height > 512) {
+				fprintf(stderr, "Invalid video height: %d (must be in 16-512 range)\n", args->video_height);
+				return INVALID_PARAM;
+			}
+
+			// Round up to nearest multiples of 16
+			args->video_width = (args->video_width + 15) & ~15;
+			args->video_height = (args->video_height + 15) & ~15;
+			return 2;
+
+		case 'I':
+			args->flags |= FLAG_BS_IGNORE_ASPECT;
+			return 1;
+
+		default:
+			return 0;
+	}
+}
+
+static const char *const str_options_help =
+	".str container options:\n"
+	"    [-r num[/den]] [-x 1|2] [-A]\n"
+	"\n"
+	"    -r num[/den]      Set video frame rate to specified integer or fraction (default 15)\n"
+	"    -x 1|2            Set CD-ROM speed the file is meant to played at (default 2)\n"
+	"    -A                Place audio sectors after corresponding video sectors\n"
+	"                      (rather than ahead of them)\n"
+	"\n";
+
+static int parse_str_option(args_t *args, char option, const char *param) {
+	char *next = NULL;
+	int fps;
+
+	switch (option) {
+		case 'r':
+			if (param == NULL) {
+				fprintf(stderr, "Missing frame rate value after option\n");
+				return INVALID_PARAM;
+			}
+
+			args->str_fps_num = strtol(param, &next, 10);
+
+			if (next && *next == '/')
+				args->str_fps_den = strtol(next + 1, NULL, 10);
+			else
+				args->str_fps_den = 1;
+
+			if (args->str_fps_num <= 0 || args->str_fps_den <= 0) {
+				fprintf(stderr, "Invalid frame rate (must be a non-zero integer or fraction)\n");
+				return INVALID_PARAM;
+			}
+
+			fps = args->str_fps_num / args->str_fps_den;
+
+			if (fps < 1 || fps > 60) {
+				fprintf(stderr, "Invalid frame rate: %d/%d (must be in 1-60 range)\n", args->str_fps_num, args->str_fps_den);
+				return INVALID_PARAM;
+			}
+			return 2;
+
+		case 'x':
+			return parse_int_one_of(&(args->str_cd_speed), "CD-ROM speed", param, 1, 2);
+
+		case 'A':
+			args->flags |= FLAG_STR_TRAILING_AUDIO;
+			return 1;
+
+		default:
+			return 0;
+	}
+}
+
+static const char *const sbs_options_help =
+	".sbs container options:\n"
+	"    [-a size]\n"
+	"\n"
+	"    -a size           Set size of each video frame (default 8192)\n"
+	"\n";
+
+static int parse_sbs_option(args_t *args, char option, const char *param) {
+	switch (option) {
+		case 'a':
+			return parse_int(&(args->alignment), "video frame size", param, 256, -1);
+
+		default:
+			return 0;
+	}
+}
+
+static const char *const general_usage =
+	"Usage:\n"
+	"    psxavenc -t xa|xacd   [xa-options]                              <in> <out.xa>\n"
+	"    psxavenc -t spu|vag   [spu-options]                             <in> <out.vag>\n"
+	"    psxavenc -t spui|vagi [spui-options]                            <in> <out.vag>\n"
+	"    psxavenc -t str|strcd [xa-options]   [bs-options] [str-options] <in> <out.str>\n"
+	"    psxavenc -t strspu    [spui-options] [bs-options] [str-options] <in> <out.str>\n"
+	"    psxavenc -t strv                     [bs-options] [str-options] <in> <out.str>\n"
+	"    psxavenc -t sbs                      [bs-options] [sbs-options] <in> <out.sbs>\n"
+	"\n";
+
+static const struct {
+	const char *usage;
+	const char *audio_options_help;
+	const char *video_options_help;
+	const char *container_options_help;
+	int (*parse_audio_option)(args_t *, char, const char *);
+	int (*parse_video_option)(args_t *, char, const char *);
+	int (*parse_container_option)(args_t *, char, const char *);
+} format_info[NUM_FORMATS] = {
+	{
+		.usage = "psxavenc -t xa [xa-options] <in> <out.xa>",
+		.audio_options_help = xa_options_help,
+		.video_options_help = NULL,
+		.container_options_help = NULL,
+		.parse_audio_option = parse_xa_option,
+		.parse_video_option = NULL,
+		.parse_container_option = NULL
+	}, {
+		.usage = "psxavenc -t xacd [xa-options] <in> <out.xa>",
+		.audio_options_help = xa_options_help,
+		.video_options_help = NULL,
+		.container_options_help = NULL,
+		.parse_audio_option = parse_xa_option,
+		.parse_video_option = NULL,
+		.parse_container_option = NULL
+	}, {
+		.usage = "psxavenc -t spu [spu-options] <in> <out>",
+		.audio_options_help = spu_options_help,
+		.video_options_help = NULL,
+		.container_options_help = NULL,
+		.parse_audio_option = parse_spu_option,
+		.parse_video_option = NULL,
+		.parse_container_option = NULL
+	}, {
+		.usage = "psxavenc -t vag [spu-options] <in> <out.vag>",
+		.audio_options_help = spu_options_help,
+		.video_options_help = NULL,
+		.container_options_help = NULL,
+		.parse_audio_option = parse_spu_option,
+		.parse_video_option = NULL,
+		.parse_container_option = NULL
+	}, {
+		.usage = "psxavenc -t spui [spui-options] <in> <out>",
+		.audio_options_help = spui_options_help,
+		.video_options_help = NULL,
+		.container_options_help = NULL,
+		.parse_audio_option = parse_spui_option,
+		.parse_video_option = NULL,
+		.parse_container_option = NULL
+	}, {
+		.usage = "psxavenc -t vagi [spui-options] <in> <out.vag>",
+		.audio_options_help = spui_options_help,
+		.video_options_help = NULL,
+		.container_options_help = NULL,
+		.parse_audio_option = parse_spui_option,
+		.parse_video_option = NULL,
+		.parse_container_option = NULL
+	}, {
+		.usage = "psxavenc -t str [xa-options] [bs-options] [str-options] <in> <out.str>",
+		.audio_options_help = xa_options_help,
+		.video_options_help = bs_options_help,
+		.container_options_help = str_options_help,
+		.parse_audio_option = parse_xa_option,
+		.parse_video_option = parse_bs_option,
+		.parse_container_option = parse_str_option
+	}, {
+		.usage = "psxavenc -t strcd [xa-options] [bs-options] [str-options] <in> <out.str>",
+		.audio_options_help = xa_options_help,
+		.video_options_help = bs_options_help,
+		.container_options_help = str_options_help,
+		.parse_audio_option = parse_xa_option,
+		.parse_video_option = parse_bs_option,
+		.parse_container_option = parse_str_option
+	}, {
+		.usage = "psxavenc -t strspu [spui-options] [bs-options] [str-options] <in> <out.str>",
+		.audio_options_help = spui_options_help,
+		.video_options_help = bs_options_help,
+		.container_options_help = str_options_help,
+		.parse_audio_option = parse_spui_option,
+		.parse_video_option = parse_bs_option,
+		.parse_container_option = parse_str_option
+	}, {
+		.usage = "psxavenc -t strv [bs-options] [str-options] <in> <out.str>",
+		.audio_options_help = NULL,
+		.video_options_help = bs_options_help,
+		.container_options_help = str_options_help,
+		.parse_audio_option = NULL,
+		.parse_video_option = parse_bs_option,
+		.parse_container_option = parse_str_option
+	}, {
+		.usage = "psxavenc -t sbs [bs-options] [sbs-options] <in> <out.sbs>",
+		.audio_options_help = NULL,
+		.video_options_help = bs_options_help,
+		.container_options_help = sbs_options_help,
+		.parse_audio_option = NULL,
+		.parse_video_option = parse_bs_option,
+		.parse_container_option = parse_sbs_option
+	}
+};
+
+static int parse_option(args_t *args, char option, const char *param) {
+	int parsed = parse_general_option(args, option, param);
+
+	if (parsed == 0 && args->format != FORMAT_INVALID) {
+		if (format_info[args->format].parse_audio_option != NULL)
+			parsed = format_info[args->format].parse_audio_option(args, option, param);
+	}
+	if (parsed == 0 && args->format != FORMAT_INVALID) {
+		if (format_info[args->format].parse_video_option != NULL)
+			parsed = format_info[args->format].parse_video_option(args, option, param);
+	}
+	if (parsed == 0 && args->format != FORMAT_INVALID) {
+		if (format_info[args->format].parse_container_option != NULL)
+			parsed = format_info[args->format].parse_container_option(args, option, param);
+	}
+	if (parsed == 0) {
+		if (args->format == FORMAT_INVALID)
+			fprintf(
+				stderr,
+				"Unknown general option: -%c\n"
+				"(if this is a format-specific option, it shall be passed after -t)\n",
+				option
+			);
+		else
+			fprintf(stderr, "Unknown option for format %s: -%c\n", format_names[args->format], option);
+	}
+
+	return parsed;
+}
+
+static void print_help(format_t format) {
+	if (format == FORMAT_INVALID) {
+		printf(
+			"%s%s%s%s%s%s%s%s",
+			general_usage,
+			general_options_help,
+			xa_options_help,
+			spu_options_help,
+			spui_options_help,
+			bs_options_help,
+			str_options_help,
+			sbs_options_help
+		);
+		return;
+	}
+
+	printf(
+		"Usage:\n"
+		"    %s\n"
+		"\n"
+		"%s",
+		format_info[format].usage,
+		general_options_help
+	);
+	if (format_info[format].audio_options_help != NULL)
+		printf("%s", format_info[format].audio_options_help);
+	if (format_info[format].video_options_help != NULL)
+		printf("%s", format_info[format].video_options_help);
+	if (format_info[format].container_options_help != NULL)
+		printf("%s", format_info[format].container_options_help);
+}
+
+bool parse_args(args_t *args, const char *const *options, int count) {
+	int arg_index = 0;
+
+	while (arg_index < count) {
+		const char *option = options[arg_index];
+
+		if (option[0] == '-' && option[2] == 0 && !(args->flags & FLAG_IGNORE_OPTIONS)) {
+			const char *param;
+			if ((arg_index + 1) < count)
+				param = options[arg_index + 1];
+			else
+				param = NULL;
+
+			int parsed = parse_option(args, option[1], param);
+			if (parsed <= 0)
+				return false;
+
+			arg_index += parsed;
+			continue;
+		}
+
+		if (args->input_file == NULL) {
+			args->input_file = option;
+		} else if (args->output_file == NULL) {
+			args->output_file = option;
+		} else {
+			fprintf(stderr, "There should be no arguments after the output file path\n");
+			return false;
+		}
+		arg_index++;
+	}
+
+	if (args->flags & FLAG_PRINT_HELP) {
+		print_help(args->format);
+		return false;
+	}
+	if (args->format == FORMAT_INVALID || args->input_file == NULL || args->output_file == NULL) {
+		fprintf(
+			stderr,
+			"%s"
+			"For more information about the options supported for a given output format, run:\n"
+			"    psxavenc -t <format> -h\n"
+			"To view the full list of supported options, run:\n"
+			"    psxavenc -h\n",
+			general_usage
+		);
+		return false;
+	}
+
+	return true;
+}
diff --git a/psxavenc/args.h b/psxavenc/args.h
new file mode 100644
index 0000000..9249290
--- /dev/null
+++ b/psxavenc/args.h
@@ -0,0 +1,93 @@
+/*
+psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
+
+Copyright (c) 2019, 2020 Adrian "asie" Siekierka
+Copyright (c) 2019 Ben "GreaseMonkey" Russell
+Copyright (c) 2023, 2025 spicyjpeg
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+   claim that you wrote the original software. If you use this software
+   in a product, an acknowledgment in the product documentation would be
+   appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#pragma once
+
+#include <stdbool.h>
+
+#define NUM_FORMATS 11
+#define NUM_BS_CODECS 3
+
+enum {
+	FLAG_IGNORE_OPTIONS = 1 << 0,
+	FLAG_QUIET = 1 << 1,
+	FLAG_HIDE_PROGRESS = 1 << 2,
+	FLAG_PRINT_HELP = 1 << 3,
+	FLAG_PRINT_VERSION = 1 << 4,
+	FLAG_SPU_LOOP_END = 1 << 5,
+	FLAG_SPU_NO_LEADING_DUMMY = 1 << 6,
+	FLAG_BS_IGNORE_ASPECT = 1 << 7,
+	FLAG_STR_TRAILING_AUDIO = 1 << 8
+};
+
+typedef enum {
+	FORMAT_INVALID = -1,
+	FORMAT_XA,
+	FORMAT_XACD,
+	FORMAT_SPU,
+	FORMAT_VAG,
+	FORMAT_SPUI,
+	FORMAT_VAGI,
+	FORMAT_STR,
+	FORMAT_STRCD,
+	FORMAT_STRSPU,
+	FORMAT_STRV,
+	FORMAT_SBS
+} format_t;
+
+typedef enum {
+	BS_CODEC_INVALID = -1,
+	BS_CODEC_V2,
+	BS_CODEC_V3,
+	BS_CODEC_V3DC
+} bs_codec_t;
+
+typedef struct {
+	int flags;
+
+	format_t format;
+	const char *input_file;
+	const char *output_file;
+	const char *swresample_options;
+	const char *swscale_options;
+
+	int audio_frequency; // 18900 or 37800 Hz
+	int audio_channels;
+	int audio_bit_depth; // 4 or 8
+	int audio_xa_file; // 00-FF
+	int audio_xa_channel; // 00-1F
+	int audio_interleave;
+	int audio_loop_point;
+
+	bs_codec_t video_codec;
+	int video_width;
+	int video_height;
+
+	int str_fps_num;
+	int str_fps_den;
+	int str_cd_speed; // 1 or 2
+	int alignment;
+} args_t;
+
+bool parse_args(args_t *args, const char *const *options, int count);
diff --git a/psxavenc/psxavenc.c b/psxavenc/psxavenc.c
index c64a49b..d980f1d 100644
--- a/psxavenc/psxavenc.c
+++ b/psxavenc/psxavenc.c
@@ -108,7 +108,7 @@ void print_version(void) {
 	printf("psxavenc " VERSION "\n");
 }
 
-int parse_args(settings_t* settings, int argc, char** argv) {
+int parse_args_old(settings_t* settings, int argc, char** argv) {
 	int c, i;
 	char *next;
 	while ((c = getopt(argc, argv, "?hVqt:F:C:f:b:c:LR:i:a:s:IS:r:x:")) != -1) {
@@ -389,7 +389,7 @@ int main(int argc, char **argv) {
 		return 1;
 	}
 
-	arg_offset = parse_args(&settings, argc, argv);
+	arg_offset = parse_args_old(&settings, argc, argv);
 	if (arg_offset < 0) {
 		return 1;
 	} else if (argc < arg_offset + 2) {

From a39f159aaf5c0969f494ee4f0b5f36e25d51285a Mon Sep 17 00:00:00 2001
From: spicyjpeg <thatspicyjpeg@gmail.com>
Date: Fri, 28 Feb 2025 02:15:21 +0100
Subject: [PATCH 3/8] Refactor and get rid of common.h

---
 libpsxav/adpcm.c    |   4 +-
 libpsxav/cdrom.c    | 103 ++++--
 libpsxav/libpsxav.h |  13 +-
 meson.build         |   5 +-
 psxavenc/args.c     |  12 +-
 psxavenc/cdrom.c    |  61 ----
 psxavenc/common.h   | 156 ---------
 psxavenc/decoding.c | 377 +++++++++++---------
 psxavenc/decoding.h |  80 +++++
 psxavenc/filefmt.c  | 518 ++++++++++++++++------------
 psxavenc/filefmt.h  |  35 ++
 psxavenc/main.c     | 174 ++++++++++
 psxavenc/mdec.c     | 822 +++++++++++++++++++++++---------------------
 psxavenc/mdec.h     |  67 ++++
 psxavenc/psxavenc.c | 495 --------------------------
 15 files changed, 1396 insertions(+), 1526 deletions(-)
 delete mode 100644 psxavenc/cdrom.c
 delete mode 100644 psxavenc/common.h
 create mode 100644 psxavenc/decoding.h
 create mode 100644 psxavenc/filefmt.h
 create mode 100644 psxavenc/main.c
 create mode 100644 psxavenc/mdec.h
 delete mode 100644 psxavenc/psxavenc.c

diff --git a/libpsxav/adpcm.c b/libpsxav/adpcm.c
index 96c0ad0..03d298f 100644
--- a/libpsxav/adpcm.c
+++ b/libpsxav/adpcm.c
@@ -266,7 +266,7 @@ int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_stat
 	uint8_t init_sector = 1;
 
 	if (settings.stereo) { sample_count <<= 1; }
-	
+
 	for (i = 0, j = 0; i < sample_count || ((j % 18) != 0); i += sample_jump, j++) {
 		psx_cdrom_sector_mode2_t *sector_data = (psx_cdrom_sector_mode2_t*) (output + ((j/18) * xa_sector_size) - xa_offset);
 		uint8_t *block_data = sector_data->data + ((j%18) * 0x80);
@@ -282,7 +282,7 @@ int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_stat
 		memcpy(block_data + 12, block_data + 8, 4);
 
 		if ((j+1)%18 == 0) {
-			psx_cdrom_calculate_checksums((uint8_t*) sector_data, PSX_CDROM_SECTOR_TYPE_MODE2_FORM2);
+			psx_cdrom_calculate_checksums((psx_cdrom_sector_t *)sector_data, PSX_CDROM_SECTOR_TYPE_MODE2_FORM2);
 			init_sector = 1;
 		}
 	}
diff --git a/libpsxav/cdrom.c b/libpsxav/cdrom.c
index f6b0144..ac9de32 100644
--- a/libpsxav/cdrom.c
+++ b/libpsxav/cdrom.c
@@ -21,49 +21,88 @@ freely, subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
+#include <stdint.h>
 #include <string.h>
 #include "libpsxav.h"
 
-static uint32_t psx_cdrom_calculate_edc(uint8_t *sector, uint32_t offset, uint32_t size)
-{
+#define EDC_CRC32_POLYNOMIAL 0xD8018001
+
+static uint32_t edc_crc32(uint8_t *data, int length) {
 	uint32_t edc = 0;
-	for (int i = offset; i < offset+size; i++) {
-		edc ^= 0xFF&(uint32_t)sector[i];
-		for (int ibit = 0; ibit < 8; ibit++) {
-			edc = (edc>>1)^(0xD8018001*(edc&0x1));
-		}
+
+	for (int i = 0; i < length; i++) {
+		edc ^= 0xFF & (uint32_t)data[i];
+
+		for (int j = 0; j < 8; j++)
+			edc = (edc >> 1) ^ (EDC_CRC32_POLYNOMIAL * (edc & 0x1));
 	}
+
 	return edc;
 }
 
-void psx_cdrom_calculate_checksums(uint8_t *sector, psx_cdrom_sector_type_t type)
-{
-	switch (type) {
-		case PSX_CDROM_SECTOR_TYPE_MODE1: {
-			uint32_t edc = psx_cdrom_calculate_edc(sector, 0x0, 0x810);
-			sector[0x810] = (uint8_t)(edc);
-			sector[0x811] = (uint8_t)(edc >> 8);
-			sector[0x812] = (uint8_t)(edc >> 16);
-			sector[0x813] = (uint8_t)(edc >> 24);
+#define TO_BCD(x) ((x) + ((x) / 10) * 6)
 
+void psx_cdrom_init_sector(psx_cdrom_sector_t *sector, int lba, psx_cdrom_sector_type_t type) {
+	// Sync sequence
+	memset(sector->mode1.sync + 1, 0xff, 10);
+	sector->mode1.sync[0x0] = 0x00;
+	sector->mode1.sync[0xb] = 0x00;
+
+	// Timecode
+	lba += 150;
+	sector->mode1.header.minute = TO_BCD(lba / 4500);
+	sector->mode1.header.second = TO_BCD((lba / 75) % 60);
+	sector->mode1.header.sector = TO_BCD(lba % 75);
+
+	// Mode
+	if (type == PSX_CDROM_SECTOR_TYPE_MODE1) {
+		sector->mode1.header.mode = 0x01;
+	} else {
+		sector->mode2.header.mode = 0x02;
+
+		memset(sector->mode2.subheader, 0, sizeof(psx_cdrom_sector_xa_subheader_t));
+		sector->mode2.subheader[0].submode = PSX_CDROM_SECTOR_XA_SUBMODE_DATA;
+
+		if (type == PSX_CDROM_SECTOR_TYPE_MODE2_FORM2)
+			sector->mode2.subheader[0].submode |= PSX_CDROM_SECTOR_XA_SUBMODE_FORM2;
+
+		memcpy(sector->mode2.subheader + 1, sector->mode2.subheader, sizeof(psx_cdrom_sector_xa_subheader_t));
+	}
+}
+
+void psx_cdrom_calculate_checksums(psx_cdrom_sector_t *sector, psx_cdrom_sector_type_t type) {
+	uint8_t *data = (uint8_t *)sector;
+	uint32_t edc;
+
+	switch (type) {
+		case PSX_CDROM_SECTOR_TYPE_MODE1:
+			edc = edc_crc32(data, 0x810);
+
+			data[0x810] = (uint8_t)(edc);
+			data[0x811] = (uint8_t)(edc >> 8);
+			data[0x812] = (uint8_t)(edc >> 16);
+			data[0x813] = (uint8_t)(edc >> 24);
 			memset(sector + 0x814, 0, 8);
 			// TODO: ECC
-		} break;
-		case PSX_CDROM_SECTOR_TYPE_MODE2_FORM1: {
-			uint32_t edc = psx_cdrom_calculate_edc(sector, 0x10, 0x808);
-			sector[0x818] = (uint8_t)(edc);
-			sector[0x819] = (uint8_t)(edc >> 8);
-			sector[0x81A] = (uint8_t)(edc >> 16);
-			sector[0x81B] = (uint8_t)(edc >> 24);
+			break;
 
+		case PSX_CDROM_SECTOR_TYPE_MODE2_FORM1:
+			edc = edc_crc32(data + 0x10, 0x808);
+
+			data[0x818] = (uint8_t)(edc);
+			data[0x819] = (uint8_t)(edc >> 8);
+			data[0x81A] = (uint8_t)(edc >> 16);
+			data[0x81B] = (uint8_t)(edc >> 24);
 			// TODO: ECC
-		} break;
-		case PSX_CDROM_SECTOR_TYPE_MODE2_FORM2: {
-			uint32_t edc = psx_cdrom_calculate_edc(sector, 0x10, 0x91C);
-			sector[0x92C] = (uint8_t)(edc);
-			sector[0x92D] = (uint8_t)(edc >> 8);
-			sector[0x92E] = (uint8_t)(edc >> 16);
-			sector[0x92F] = (uint8_t)(edc >> 24);
-		} break;
+			break;
+
+		case PSX_CDROM_SECTOR_TYPE_MODE2_FORM2:
+			edc = edc_crc32(data + 0x10, 0x91C);
+
+			data[0x92C] = (uint8_t)(edc);
+			data[0x92D] = (uint8_t)(edc >> 8);
+			data[0x92E] = (uint8_t)(edc >> 16);
+			data[0x92F] = (uint8_t)(edc >> 24);
+			break;
 	}
-}
\ No newline at end of file
+}
diff --git a/libpsxav/libpsxav.h b/libpsxav/libpsxav.h
index e20138e..32eabaf 100644
--- a/libpsxav/libpsxav.h
+++ b/libpsxav/libpsxav.h
@@ -21,8 +21,7 @@ freely, subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef __LIBPSXAV_H__
-#define __LIBPSXAV_H__
+#pragma once
 
 #include <stdbool.h>
 #include <stdint.h>
@@ -106,6 +105,11 @@ typedef struct {
 	uint8_t data[0x918];
 } psx_cdrom_sector_mode2_t;
 
+typedef union {
+	psx_cdrom_sector_mode1_t mode1;
+	psx_cdrom_sector_mode2_t mode2;
+} psx_cdrom_sector_t;
+
 _Static_assert(sizeof(psx_cdrom_sector_mode1_t) == PSX_CDROM_SECTOR_SIZE, "Invalid Mode1 sector size");
 _Static_assert(sizeof(psx_cdrom_sector_mode2_t) == PSX_CDROM_SECTOR_SIZE, "Invalid Mode2 sector size");
 
@@ -137,6 +141,5 @@ typedef enum {
 	PSX_CDROM_SECTOR_TYPE_MODE2_FORM2
 } psx_cdrom_sector_type_t;
 
-void psx_cdrom_calculate_checksums(uint8_t *sector, psx_cdrom_sector_type_t type);
-
-#endif /* __LIBPSXAV_H__ */
+void psx_cdrom_init_sector(psx_cdrom_sector_t *sector, int lba, psx_cdrom_sector_type_t type);
+void psx_cdrom_calculate_checksums(psx_cdrom_sector_t *sector, psx_cdrom_sector_type_t type);
diff --git a/meson.build b/meson.build
index abd8a35..4061200 100644
--- a/meson.build
+++ b/meson.build
@@ -25,9 +25,8 @@ libpsxav_dep = declare_dependency(include_directories: include_directories('libp
 
 executable('psxavenc', [
 	'psxavenc/args.c',
-	'psxavenc/cdrom.c',
 	'psxavenc/decoding.c',
 	'psxavenc/filefmt.c',
-	'psxavenc/mdec.c',
-	'psxavenc/psxavenc.c'
+	'psxavenc/main.c',
+	'psxavenc/mdec.c'
 ], dependencies: [libm_dep, ffmpeg, libpsxav_dep], install: true)
diff --git a/psxavenc/args.c b/psxavenc/args.c
index 03d0695..8c92346 100644
--- a/psxavenc/args.c
+++ b/psxavenc/args.c
@@ -27,6 +27,7 @@ freely, subject to the following restrictions:
 #include <stdlib.h>
 #include <string.h>
 #include "args.h"
+#include "config.h"
 
 #define INVALID_PARAM -1
 
@@ -146,13 +147,6 @@ static const char *const format_names[NUM_FORMATS] = {
 };
 
 static void init_default_args(args_t *args) {
-	args->flags = 0;
-
-	args->input_file = NULL;
-	args->output_file = NULL;
-	args->swresample_options = NULL;
-	args->swscale_options = NULL;
-
 	if (
 		args->format == FORMAT_XA || args->format == FORMAT_XACD ||
 		args->format == FORMAT_STR || args->format == FORMAT_STRCD
@@ -694,6 +688,10 @@ bool parse_args(args_t *args, const char *const *options, int count) {
 		print_help(args->format);
 		return false;
 	}
+	if (args->flags & FLAG_PRINT_VERSION) {
+		printf("psxavenc " VERSION "\n");
+		return false;
+	}
 	if (args->format == FORMAT_INVALID || args->input_file == NULL || args->output_file == NULL) {
 		fprintf(
 			stderr,
diff --git a/psxavenc/cdrom.c b/psxavenc/cdrom.c
deleted file mode 100644
index d391e12..0000000
--- a/psxavenc/cdrom.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
-psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
-
-Copyright (c) 2019, 2020 Adrian "asie" Siekierka
-Copyright (c) 2019 Ben "GreaseMonkey" Russell
-
-This software is provided 'as-is', without any express or implied
-warranty. In no event will the authors be held liable for any damages
-arising from the use of this software.
-
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it
-freely, subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not
-   claim that you wrote the original software. If you use this software
-   in a product, an acknowledgment in the product documentation would be
-   appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be
-   misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#include "common.h"
-
-void init_sector_buffer_video(psx_cdrom_sector_mode2_t *buffer, settings_t *settings) {
-	if (settings->format == FORMAT_STR2CD) {
-		memset(buffer, 0, PSX_CDROM_SECTOR_SIZE);
-		memset(buffer->sync + 1, 0xFF, 10);
-		buffer->header.mode = 0x02;
-	} else if (settings->format == FORMAT_STR2V) {
-		memset(buffer->data, 0, 2048);
-	} else {
-		memset(buffer->subheader, 0, PSX_CDROM_SECTOR_SIZE - 16);
-	}
-
-	buffer->subheader[0].file = settings->file_number;
-	buffer->subheader[0].channel = settings->channel_number & PSX_CDROM_SECTOR_XA_CHANNEL_MASK;
-	buffer->subheader[0].submode =
-		PSX_CDROM_SECTOR_XA_SUBMODE_DATA
-		| PSX_CDROM_SECTOR_XA_SUBMODE_RT;
-	buffer->subheader[0].coding = 0;
-	memcpy(buffer->subheader + 1, buffer->subheader, sizeof(psx_cdrom_sector_xa_subheader_t));
-}
-
-void calculate_edc_data(uint8_t *buffer)
-{
-	uint32_t edc = 0;
-	for (int i = 0x010; i < 0x818; i++) {
-		edc ^= 0xFF&(uint32_t)buffer[i];
-		for (int ibit = 0; ibit < 8; ibit++) {
-			edc = (edc>>1)^(0xD8018001*(edc&0x1));
-		}
-	}
-	buffer[0x818] = (uint8_t)(edc);
-	buffer[0x819] = (uint8_t)(edc >> 8);
-	buffer[0x81A] = (uint8_t)(edc >> 16);
-	buffer[0x81B] = (uint8_t)(edc >> 24);
-
-	// TODO: ECC
-}
diff --git a/psxavenc/common.h b/psxavenc/common.h
deleted file mode 100644
index 6cf39f9..0000000
--- a/psxavenc/common.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
-psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
-
-Copyright (c) 2019, 2020 Adrian "asie" Siekierka
-Copyright (c) 2019 Ben "GreaseMonkey" Russell
-
-This software is provided 'as-is', without any express or implied
-warranty. In no event will the authors be held liable for any damages
-arising from the use of this software.
-
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it
-freely, subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not
-   claim that you wrote the original software. If you use this software
-   in a product, an acknowledgment in the product documentation would be
-   appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be
-   misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#include <assert.h>
-#include <getopt.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#include <time.h>
-#include <unistd.h>
-
-#include <libavutil/opt.h>
-#include <libavcodec/avcodec.h>
-#include <libavcodec/avdct.h>
-#include <libavformat/avformat.h>
-#include <libswscale/swscale.h>
-#include <libswresample/swresample.h>
-#include <libpsxav.h>
-
-typedef enum {
-	FORMAT_XA,
-	FORMAT_XACD,
-	FORMAT_SPU,
-	FORMAT_SPUI,
-	FORMAT_VAG,
-	FORMAT_VAGI,
-	FORMAT_STR2,
-	FORMAT_STR2CD,
-	FORMAT_STR2V,
-	FORMAT_SBS2,
-	NUM_FORMATS
-} psxavenc_format_t;
-
-typedef struct {
-	int frame_index;
-	int frame_data_offset;
-	int frame_max_size;
-	int frame_block_base_overflow;
-	int frame_block_overflow_num;
-	int frame_block_overflow_den;
-	uint16_t bits_value;
-	int bits_left;
-	uint8_t *frame_output;
-	int bytes_used;
-	int blocks_used;
-	int uncomp_hwords_used;
-	int quant_scale;
-	int quant_scale_sum;
-
-	uint32_t *huffman_encoding_map;
-	int16_t *coeff_clamp_map;
-	int16_t *dct_block_lists[6];
-	AVDCT *dct_context;
-} vid_encoder_state_t;
-
-typedef struct {
-	int video_frame_dst_size;
-	int audio_stream_index;
-	int video_stream_index;
-	AVFormatContext* format;
-	AVStream* audio_stream;
-	AVStream* video_stream;
-	AVCodecContext* audio_codec_context;
-	AVCodecContext* video_codec_context;
-	struct SwrContext* resampler;
-	struct SwsContext* scaler;
-	AVFrame* frame;
-
-	int sample_count_mul;
-
-	double video_next_pts;
-} av_decoder_state_t;
-
-typedef struct {
-	bool quiet;
-	bool show_progress;
-
-	int format; // FORMAT_*
-	int channels;
-	int cd_speed; // 1 or 2
-	int frequency; // 18900 or 37800 Hz
-	int bits_per_sample; // 4 or 8
-	int file_number; // 00-FF
-	int channel_number; // 00-1F
-	int interleave;
-	int alignment;
-	bool loop;
-
-	int video_width;
-	int video_height;
-	int video_fps_num; // FPS numerator
-	int video_fps_den; // FPS denominator
-	bool ignore_aspect_ratio;
-
-	char *swresample_options;
-	char *swscale_options;
-
-	int16_t *audio_samples;
-	int audio_sample_count;
-	uint8_t *video_frames;
-	int video_frame_count;
-
-	av_decoder_state_t decoder_state_av;
-	vid_encoder_state_t state_vid;
-	bool end_of_input;
-
-	time_t start_time;
-	time_t last_progress_update;
-} settings_t;
-
-// cdrom.c
-void init_sector_buffer_video(psx_cdrom_sector_mode2_t *buffer, settings_t *settings);
-void calculate_edc_data(uint8_t *buffer);
-
-// decoding.c
-bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bool use_video, bool audio_required, bool video_required);
-bool poll_av_data(settings_t *settings);
-bool ensure_av_data(settings_t *settings, int needed_audio_samples, int needed_video_frames);
-void retire_av_data(settings_t *settings, int retired_audio_samples, int retired_video_frames);
-void close_av_data(settings_t *settings);
-
-// filefmt.c
-void encode_file_spu(settings_t *settings, FILE *output);
-void encode_file_spu_interleaved(settings_t *settings, FILE *output);
-void encode_file_xa(settings_t *settings, FILE *output);
-void encode_file_str(settings_t *settings, FILE *output);
-void encode_file_sbs(settings_t *settings, FILE *output);
-
-// mdec.c
-bool init_encoder_state(settings_t *settings);
-void destroy_encoder_state(settings_t *settings);
-void encode_frame_bs(uint8_t *video_frame, settings_t *settings);
-void encode_sector_str(uint8_t *video_frames, uint8_t *output, settings_t *settings);
diff --git a/psxavenc/decoding.c b/psxavenc/decoding.c
index 54a9124..a29e90a 100644
--- a/psxavenc/decoding.c
+++ b/psxavenc/decoding.c
@@ -22,30 +22,57 @@ freely, subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#include "common.h"
-
-int decode_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, AVPacket *packet) {
-	int ret;
+#include <assert.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <libavutil/opt.h>
+#include <libavcodec/avcodec.h>
+#include <libavcodec/avdct.h>
+#include <libavformat/avformat.h>
+#include <libswresample/swresample.h>
+#include <libswscale/swscale.h>
+#include "args.h"
+#include "decoding.h"
 
+static int decode_frame(
+	AVCodecContext *codec,
+	AVFrame *frame,
+	int *frame_size,
+	AVPacket *packet
+) {
 	if (packet != NULL) {
-		ret = avcodec_send_packet(codec, packet);
-		if (ret != 0) {
+		if (avcodec_send_packet(codec, packet) != 0)
 			return 0;
-		}
 	}
 
-	ret = avcodec_receive_frame(codec, frame);
+	int ret = avcodec_receive_frame(codec, frame);
+
 	if (ret >= 0) {
 		*frame_size = ret;
 		return 1;
+	} else if (ret == AVERROR(EAGAIN)) {
+		return 1;
 	} else {
-		return ret == AVERROR(EAGAIN) ? 1 : 0;
+		return 0;
 	}
 }
 
-bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bool use_video, bool audio_required, bool video_required)
-{
-	av_decoder_state_t* av = &(settings->decoder_state_av);
+bool open_av_data(decoder_t *decoder, const args_t *args, int flags) {
+	decoder->audio_samples = NULL;
+	decoder->audio_sample_count = 0;
+	decoder->video_frames = NULL;
+	decoder->video_frame_count = 0;
+
+	decoder->video_width = args->video_width;
+	decoder->video_height = args->video_height;
+	decoder->video_fps_num = args->str_fps_num;
+	decoder->video_fps_den = args->str_fps_den;
+	decoder->end_of_input = false;
+
+	decoder_state_t *av = &(decoder->state);
+
 	av->video_next_pts = 0.0;
 	av->frame = NULL;
 	av->video_frame_dst_size = 0;
@@ -59,19 +86,17 @@ bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bo
 	av->resampler = NULL;
 	av->scaler = NULL;
 
-	if (settings->quiet) {
+	if (args->flags & FLAG_QUIET)
 		av_log_set_level(AV_LOG_QUIET);
-	}
 
 	av->format = avformat_alloc_context();
-	if (avformat_open_input(&(av->format), filename, NULL, NULL)) {
-		return false;
-	}
-	if (avformat_find_stream_info(av->format, NULL) < 0) {
-		return false;
-	}
 
-	if (use_audio) {
+	if (avformat_open_input(&(av->format), args->input_file, NULL, NULL))
+		return false;
+	if (avformat_find_stream_info(av->format, NULL) < 0)
+		return false;
+
+	if (flags & DECODER_USE_AUDIO) {
 		for (int i = 0; i < av->format->nb_streams; i++) {
 			if (av->format->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
 				if (av->audio_stream_index >= 0) {
@@ -81,13 +106,14 @@ bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bo
 				av->audio_stream_index = i;
 			}
 		}
-		if (audio_required && av->audio_stream_index == -1) {
+
+		if ((flags & DECODER_AUDIO_REQUIRED) && av->audio_stream_index == -1) {
 			fprintf(stderr, "Input file has no audio data\n");
 			return false;
 		}
 	}
 
-	if (use_video) {
+	if (flags & DECODER_USE_VIDEO) {
 		for (int i = 0; i < av->format->nb_streams; i++) {
 			if (av->format->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
 				if (av->video_stream_index >= 0) {
@@ -97,7 +123,8 @@ bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bo
 				av->video_stream_index = i;
 			}
 		}
-		if (video_required && av->video_stream_index == -1) {
+
+		if ((flags & DECODER_VIDEO_REQUIRED) && av->video_stream_index == -1) {
 			fprintf(stderr, "Input file has no video data\n");
 			return false;
 		}
@@ -109,34 +136,39 @@ bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bo
 	if (av->audio_stream != NULL) {
 		const AVCodec *codec = avcodec_find_decoder(av->audio_stream->codecpar->codec_id);
 		av->audio_codec_context = avcodec_alloc_context3(codec);
-		if (av->audio_codec_context == NULL) {
+
+		if (av->audio_codec_context == NULL)
 			return false;
-		}
-		if (avcodec_parameters_to_context(av->audio_codec_context, av->audio_stream->codecpar) < 0) {
+		if (avcodec_parameters_to_context(av->audio_codec_context, av->audio_stream->codecpar) < 0)
 			return false;
-		}
-		if (avcodec_open2(av->audio_codec_context, codec, NULL) < 0) {
+		if (avcodec_open2(av->audio_codec_context, codec, NULL) < 0)
 			return false;
-		}
 
 		AVChannelLayout layout;
-		layout.nb_channels = settings->channels;
-		if (settings->channels <= 2) {
+		layout.nb_channels = args->audio_channels;
+
+		if (args->audio_channels == 1) {
 			layout.order = AV_CHANNEL_ORDER_NATIVE;
-			layout.u.mask = (settings->channels == 2) ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO;
+			layout.u.mask = AV_CH_LAYOUT_MONO;
+		} else if (args->audio_channels == 2) {
+			layout.order = AV_CHANNEL_ORDER_NATIVE;
+			layout.u.mask = AV_CH_LAYOUT_STEREO;
 		} else {
 			layout.order = AV_CHANNEL_ORDER_UNSPEC;
 		}
-		if (!settings->quiet && settings->channels > av->audio_codec_context->ch_layout.nb_channels) {
-			fprintf(stderr, "Warning: input file has less than %d channels\n", settings->channels);
+
+		if (!(args->flags & FLAG_QUIET)) {
+			if (args->audio_channels > av->audio_codec_context->ch_layout.nb_channels)
+				fprintf(stderr, "Warning: input file has less than %d channels\n", args->audio_channels);
 		}
 
-		av->sample_count_mul = settings->channels;
+		av->sample_count_mul = args->audio_channels;
+
 		if (swr_alloc_set_opts2(
 			&av->resampler,
 			&layout,
 			AV_SAMPLE_FMT_S16,
-			settings->frequency,
+			args->audio_frequency,
 			&av->audio_codec_context->ch_layout,
 			av->audio_codec_context->sample_fmt,
 			av->audio_codec_context->sample_rate,
@@ -145,47 +177,43 @@ bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bo
 		) < 0) {
 			return false;
 		}
-		if (settings->swresample_options) {
-			if (av_opt_set_from_string(av->resampler, settings->swresample_options, NULL, "=", ":,") < 0) {
+		if (args->swresample_options) {
+			if (av_opt_set_from_string(av->resampler, args->swresample_options, NULL, "=", ":,") < 0)
 				return false;
-			}
 		}
-
-		if (swr_init(av->resampler) < 0) {
+		if (swr_init(av->resampler) < 0)
 			return false;
-		}
 	}
 
 	if (av->video_stream != NULL) {
 		const AVCodec *codec = avcodec_find_decoder(av->video_stream->codecpar->codec_id);
 		av->video_codec_context = avcodec_alloc_context3(codec);
-		if(av->video_codec_context == NULL) {
+
+		if (av->video_codec_context == NULL)
 			return false;
-		}
-		if (avcodec_parameters_to_context(av->video_codec_context, av->video_stream->codecpar) < 0) {
+		if (avcodec_parameters_to_context(av->video_codec_context, av->video_stream->codecpar) < 0)
 			return false;
-		}
-		if (avcodec_open2(av->video_codec_context, codec, NULL) < 0) {
+		if (avcodec_open2(av->video_codec_context, codec, NULL) < 0)
 			return false;
+
+		if (!(args->flags & FLAG_QUIET)) {
+			if (
+				decoder->video_width > av->video_codec_context->width ||
+				decoder->video_height > av->video_codec_context->height
+			)
+				fprintf(stderr, "Warning: input file has resolution lower than %dx%d\n", decoder->video_width, decoder->video_height);
 		}
 
-		if (!settings->quiet && (
-			settings->video_width > av->video_codec_context->width ||
-			settings->video_height > av->video_codec_context->height
-		)) {
-			fprintf(stderr, "Warning: input file has resolution lower than %dx%d\n",
-				settings->video_width, settings->video_height
-			);
-		}
-		if (!settings->ignore_aspect_ratio) {
+		if (!(args->flags & FLAG_BS_IGNORE_ASPECT)) {
 			// Reduce the provided size so that it matches the input file's
 			// aspect ratio.
 			double src_ratio = (double)av->video_codec_context->width / (double)av->video_codec_context->height;
-			double dst_ratio = (double)settings->video_width / (double)settings->video_height;
+			double dst_ratio = (double)decoder->video_width / (double)decoder->video_height;
+
 			if (src_ratio < dst_ratio) {
-				settings->video_width = (int)((double)settings->video_height * src_ratio + 15.0) & ~15;
+				decoder->video_width = (int)((double)decoder->video_height * src_ratio + 15.0) & ~15;
 			} else {
-				settings->video_height = (int)((double)settings->video_width / src_ratio + 15.0) & ~15;
+				decoder->video_height = (int)((double)decoder->video_width / src_ratio + 15.0) & ~15;
 			}
 		}
 
@@ -193,17 +221,16 @@ bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bo
 			av->video_codec_context->width,
 			av->video_codec_context->height,
 			av->video_codec_context->pix_fmt,
-			settings->video_width,
-			settings->video_height,
+			decoder->video_width,
+			decoder->video_height,
 			AV_PIX_FMT_NV21,
 			SWS_BICUBIC,
 			NULL,
 			NULL,
 			NULL
 		);
-		if (av->scaler == NULL) {
+		if (av->scaler == NULL)
 			return false;
-		}
 		if (sws_setColorspaceDetails(
 			av->scaler,
 			sws_getCoefficients(av->video_codec_context->colorspace),
@@ -213,189 +240,211 @@ bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bo
 			0,
 			1 << 16,
 			1 << 16
-		) < 0) {
+		) < 0)
 			return false;
-		}
-		if (settings->swscale_options) {
-			if (av_opt_set_from_string(av->scaler, settings->swscale_options, NULL, "=", ":,") < 0) {
+		if (args->swscale_options) {
+			if (av_opt_set_from_string(av->scaler, args->swscale_options, NULL, "=", ":,") < 0)
 				return false;
-			}
 		}
 
-		av->video_frame_dst_size = 3*settings->video_width*settings->video_height/2;
+		av->video_frame_dst_size = 3 * decoder->video_width * decoder->video_height / 2;
 	}
 
 	av->frame = av_frame_alloc();
-	if (av->frame == NULL) {
-		return false;
-	}
 
-	settings->audio_samples = NULL;
-	settings->audio_sample_count = 0;
-	settings->video_frames = NULL;
-	settings->video_frame_count = 0;
-	settings->end_of_input = false;
+	if (av->frame == NULL)
+		return false;
 
 	return true;
 }
 
-static void poll_av_packet_audio(settings_t *settings, AVPacket *packet)
-{
-	av_decoder_state_t* av = &(settings->decoder_state_av);
+static void poll_av_packet_audio(decoder_t *decoder, AVPacket *packet) {
+	decoder_state_t *av = &(decoder->state);
 
 	int frame_size, frame_sample_count;
 	uint8_t *buffer[1];
 
 	if (decode_frame(av->audio_codec_context, av->frame, &frame_size, packet)) {
 		size_t buffer_size = sizeof(int16_t) * av->sample_count_mul * swr_get_out_samples(av->resampler, av->frame->nb_samples);
+
 		buffer[0] = malloc(buffer_size);
 		memset(buffer[0], 0, buffer_size);
-		frame_sample_count = swr_convert(av->resampler, buffer, av->frame->nb_samples, (const uint8_t**)av->frame->data, av->frame->nb_samples);
-		settings->audio_samples = realloc(settings->audio_samples, (settings->audio_sample_count + ((frame_sample_count + 4032) * av->sample_count_mul)) * sizeof(int16_t));
-		memmove(&(settings->audio_samples[settings->audio_sample_count]), buffer[0], sizeof(int16_t) * frame_sample_count * av->sample_count_mul);
-		settings->audio_sample_count += frame_sample_count * av->sample_count_mul;
+
+		frame_sample_count = swr_convert(
+			av->resampler,
+			buffer,
+			av->frame->nb_samples,
+			(const uint8_t**)av->frame->data,
+			av->frame->nb_samples
+		);
+
+		decoder->audio_samples = realloc(
+			decoder->audio_samples,
+			(decoder->audio_sample_count + ((frame_sample_count + 4032) * av->sample_count_mul)) * sizeof(int16_t)
+		);
+		memmove(
+			&(decoder->audio_samples[decoder->audio_sample_count]),
+			buffer[0],
+			sizeof(int16_t) * frame_sample_count * av->sample_count_mul
+		);
+		decoder->audio_sample_count += frame_sample_count * av->sample_count_mul;
 		free(buffer[0]);
 	}
 }
 
-static void poll_av_packet_video(settings_t *settings, AVPacket *packet)
-{
-	av_decoder_state_t* av = &(settings->decoder_state_av);
+static void poll_av_packet_video(decoder_t *decoder, AVPacket *packet) {
+	decoder_state_t *av = &(decoder->state);
 
 	int frame_size;
-	double pts_step = ((double)1.0*(double)settings->video_fps_den)/(double)settings->video_fps_num;
+	double pts_step = (double)decoder->video_fps_den / (double)decoder->video_fps_num;
 
-	int plane_size = settings->video_width*settings->video_height;
+	int plane_size = decoder->video_width * decoder->video_height;
 	int dst_strides[2] = {
-		settings->video_width, settings->video_width
+		decoder->video_width, decoder->video_width
 	};
 
 	if (decode_frame(av->video_codec_context, av->frame, &frame_size, packet)) {
-		if (!av->frame->width || !av->frame->height || !av->frame->data[0]) {
+		if (!av->frame->width || !av->frame->height || !av->frame->data[0])
 			return;
-		}
 
 		// Some files seem to have timestamps starting from a negative value
 		// (but otherwise valid) for whatever reason.
-		double pts = (((double)av->frame->pts)*(double)av->video_stream->time_base.num)/av->video_stream->time_base.den;
-		//if (pts < 0.0) {
-			//return;
-		//}
-		if (settings->video_frame_count >= 1 && pts < av->video_next_pts) {
+		double pts =
+			((double)av->frame->pts * (double)av->video_stream->time_base.num)
+			/ av->video_stream->time_base.den;
+#if 0
+		if (pts < 0.0)
 			return;
-		}
-		if ((settings->video_frame_count) < 1) {
+#endif
+		if (decoder->video_frame_count >= 1 && pts < av->video_next_pts)
+			return;
+		if (decoder->video_frame_count < 1)
 			av->video_next_pts = pts;
-		} else {
+		else
 			av->video_next_pts += pts_step;
-		}
 
-		//fprintf(stderr, "%d %f %f %f\n", (settings->video_frame_count), pts, av->video_next_pts, pts_step);
+		//fprintf(stderr, "%d %f %f %f\n", decoder->video_frame_count, pts, av->video_next_pts, pts_step);
 
 		// Insert duplicate frames if the frame rate of the input stream is
 		// lower than the target frame rate.
 		int dupe_frames = (int) ceil((pts - av->video_next_pts) / pts_step);
 		if (dupe_frames < 0) dupe_frames = 0;
-		settings->video_frames = realloc(
-			settings->video_frames,
-			(settings->video_frame_count + dupe_frames + 1) * av->video_frame_dst_size
+		decoder->video_frames = realloc(
+			decoder->video_frames,
+			(decoder->video_frame_count + dupe_frames + 1) * av->video_frame_dst_size
 		);
 
 		for (; dupe_frames; dupe_frames--) {
 			memcpy(
-				(settings->video_frames) + av->video_frame_dst_size*(settings->video_frame_count),
-				(settings->video_frames) + av->video_frame_dst_size*(settings->video_frame_count-1),
+				(decoder->video_frames) + av->video_frame_dst_size * decoder->video_frame_count,
+				(decoder->video_frames) + av->video_frame_dst_size * (decoder->video_frame_count - 1),
 				av->video_frame_dst_size
 			);
-			settings->video_frame_count += 1;
+			decoder->video_frame_count += 1;
 			av->video_next_pts += pts_step;
 		}
 
-		uint8_t *dst_frame = (settings->video_frames) + av->video_frame_dst_size*(settings->video_frame_count);
+		uint8_t *dst_frame = decoder->video_frames + av->video_frame_dst_size * decoder->video_frame_count;
 		uint8_t *dst_pointers[2] = {
 			dst_frame, dst_frame + plane_size
 		};
-		sws_scale(av->scaler, (const uint8_t *const *) av->frame->data, av->frame->linesize, 0, av->frame->height, dst_pointers, dst_strides);
+		sws_scale(
+			av->scaler,
+			(const uint8_t *const *) av->frame->data,
+			av->frame->linesize,
+			0,
+			av->frame->height,
+			dst_pointers,
+			dst_strides
+		);
 
-		settings->video_frame_count += 1;
+		decoder->video_frame_count += 1;
 	}
 }
 
-bool poll_av_data(settings_t *settings)
-{
-	av_decoder_state_t* av = &(settings->decoder_state_av);
+bool poll_av_data(decoder_t *decoder) {
+	decoder_state_t *av = &(decoder->state);
+
+	if (decoder->end_of_input)
+		return false;
+
 	AVPacket packet;
 
-	if (settings->end_of_input) {
-		return false;
-	}
-
 	if (av_read_frame(av->format, &packet) >= 0) {
-		if (packet.stream_index == av->audio_stream_index) {
-			poll_av_packet_audio(settings, &packet);
-		} else if (packet.stream_index == av->video_stream_index) {
-			poll_av_packet_video(settings, &packet);
-		}
+		if (packet.stream_index == av->audio_stream_index)
+			poll_av_packet_audio(decoder, &packet);
+		else if (packet.stream_index == av->video_stream_index)
+			poll_av_packet_video(decoder, &packet);
+
 		av_packet_unref(&packet);
 		return true;
 	} else {
 		// out is always padded out with 4032 "0" samples, this makes calculations elsewhere easier
-		if (av->audio_stream) {
-			memset((settings->audio_samples) + (settings->audio_sample_count), 0, 4032 * av->sample_count_mul * sizeof(int16_t));
-		}
+		if (av->audio_stream)
+			memset(
+				decoder->audio_samples + decoder->audio_sample_count,
+				0,
+				4032 * av->sample_count_mul * sizeof(int16_t)
+			);
 
-		settings->end_of_input = true;
+		decoder->end_of_input = true;
 		return false;
 	}
 }
 
-bool ensure_av_data(settings_t *settings, int needed_audio_samples, int needed_video_frames)
-{
-	// HACK: in order to update settings->end_of_input as soon as all data has
+bool ensure_av_data(decoder_t *decoder, int needed_audio_samples, int needed_video_frames) {
+	// HACK: in order to update decoder->end_of_input as soon as all data has
 	// been read from the input file, this loop waits for more data than
 	// strictly needed.
-	//while (settings->audio_sample_count < needed_audio_samples || settings->video_frame_count < needed_video_frames) {
+#if 0
+	while (decoder->audio_sample_count < needed_audio_samples || decoder->video_frame_count < needed_video_frames) {
+#else
 	while (
-		(needed_audio_samples && settings->audio_sample_count <= needed_audio_samples) ||
-		(needed_video_frames && settings->video_frame_count <= needed_video_frames)
+		(needed_audio_samples && decoder->audio_sample_count <= needed_audio_samples) ||
+		(needed_video_frames && decoder->video_frame_count <= needed_video_frames)
 	) {
-		//fprintf(stderr, "ensure %d -> %d, %d -> %d\n", settings->audio_sample_count, needed_audio_samples, settings->video_frame_count, needed_video_frames);
-		if (!poll_av_data(settings)) {
+#endif
+		//fprintf(stderr, "ensure %d -> %d, %d -> %d\n", decoder->audio_sample_count, needed_audio_samples, decoder->video_frame_count, needed_video_frames);
+		if (!poll_av_data(decoder)) {
 			// Keep returning true even if the end of the input file has been
 			// reached, if the buffer is not yet completely empty.
-			return (settings->audio_sample_count || !needed_audio_samples)
-				&& (settings->video_frame_count || !needed_video_frames);
+			return
+				(decoder->audio_sample_count || !needed_audio_samples) &&
+				(decoder->video_frame_count || !needed_video_frames);
 		}
 	}
-	//fprintf(stderr, "ensure %d -> %d, %d -> %d\n", settings->audio_sample_count, needed_audio_samples, settings->video_frame_count, needed_video_frames);
+	//fprintf(stderr, "ensure %d -> %d, %d -> %d\n", decoder->audio_sample_count, needed_audio_samples, decoder->video_frame_count, needed_video_frames);
 
 	return true;
 }
 
-void retire_av_data(settings_t *settings, int retired_audio_samples, int retired_video_frames)
-{
-	av_decoder_state_t* av = &(settings->decoder_state_av);
-
-	//fprintf(stderr, "retire %d -> %d, %d -> %d\n", settings->audio_sample_count, retired_audio_samples, settings->video_frame_count, retired_video_frames);
-	assert(retired_audio_samples <= settings->audio_sample_count);
-	assert(retired_video_frames <= settings->video_frame_count);
+void retire_av_data(decoder_t *decoder, int retired_audio_samples, int retired_video_frames) {
+	//fprintf(stderr, "retire %d -> %d, %d -> %d\n", decoder->audio_sample_count, retired_audio_samples, decoder->video_frame_count, retired_video_frames);
+	assert(retired_audio_samples <= decoder->audio_sample_count);
+	assert(retired_video_frames <= decoder->video_frame_count);
 
 	int sample_size = sizeof(int16_t);
-	if (settings->audio_sample_count > retired_audio_samples) {
-		memmove(settings->audio_samples, settings->audio_samples + retired_audio_samples, (settings->audio_sample_count - retired_audio_samples)*sample_size);
-	}
-	settings->audio_sample_count -= retired_audio_samples;
+	int frame_size = decoder->state.video_frame_dst_size;
 
-	int frame_size = av->video_frame_dst_size;
-	if (settings->video_frame_count > retired_video_frames) {
-		memmove(settings->video_frames, settings->video_frames + retired_video_frames*frame_size, (settings->video_frame_count - retired_video_frames)*frame_size);
-	}
-	settings->video_frame_count -= retired_video_frames;
+	if (decoder->audio_sample_count > retired_audio_samples)
+		memmove(
+			decoder->audio_samples,
+			decoder->audio_samples + retired_audio_samples,
+			(decoder->audio_sample_count - retired_audio_samples) * sample_size
+		);
+	if (decoder->video_frame_count > retired_video_frames)
+		memmove(
+			decoder->video_frames,
+			decoder->video_frames + retired_video_frames * frame_size,
+			(decoder->video_frame_count - retired_video_frames) * frame_size
+		);
+
+	decoder->audio_sample_count -= retired_audio_samples;
+	decoder->video_frame_count -= retired_video_frames;
 }
 
-void close_av_data(settings_t *settings)
-{
-	av_decoder_state_t* av = &(settings->decoder_state_av);
+void close_av_data(decoder_t *decoder) {
+	decoder_state_t *av = &(decoder->state);
 
 	av_frame_free(&(av->frame));
 	swr_free(&(av->resampler));
@@ -404,12 +453,12 @@ void close_av_data(settings_t *settings)
 	avcodec_free_context(&(av->audio_codec_context));
 	avformat_free_context(av->format);
 
-	if(settings->audio_samples != NULL) {
-		free(settings->audio_samples);
-		settings->audio_samples = NULL;
+	if(decoder->audio_samples != NULL) {
+		free(decoder->audio_samples);
+		decoder->audio_samples = NULL;
 	}
-	if(settings->video_frames != NULL) {
-		free(settings->video_frames);
-		settings->video_frames = NULL;
+	if(decoder->video_frames != NULL) {
+		free(decoder->video_frames);
+		decoder->video_frames = NULL;
 	}
 }
diff --git a/psxavenc/decoding.h b/psxavenc/decoding.h
new file mode 100644
index 0000000..ccf0b65
--- /dev/null
+++ b/psxavenc/decoding.h
@@ -0,0 +1,80 @@
+/*
+psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
+
+Copyright (c) 2019, 2020 Adrian "asie" Siekierka
+Copyright (c) 2019 Ben "GreaseMonkey" Russell
+Copyright (c) 2023, 2025 spicyjpeg
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+   claim that you wrote the original software. If you use this software
+   in a product, an acknowledgment in the product documentation would be
+   appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#pragma once
+
+#include <stdbool.h>
+#include <libavutil/opt.h>
+#include <libavcodec/avcodec.h>
+#include <libavcodec/avdct.h>
+#include <libavformat/avformat.h>
+#include <libswresample/swresample.h>
+#include <libswscale/swscale.h>
+#include "args.h"
+
+typedef struct {
+	int video_frame_dst_size;
+	int audio_stream_index;
+	int video_stream_index;
+	AVFormatContext* format;
+	AVStream* audio_stream;
+	AVStream* video_stream;
+	AVCodecContext* audio_codec_context;
+	AVCodecContext* video_codec_context;
+	struct SwrContext* resampler;
+	struct SwsContext* scaler;
+	AVFrame* frame;
+
+	int sample_count_mul;
+
+	double video_next_pts;
+} decoder_state_t;
+
+typedef struct {
+	int16_t *audio_samples;
+	int audio_sample_count;
+	uint8_t *video_frames;
+	int video_frame_count;
+
+	int video_width;
+	int video_height;
+	int video_fps_num;
+	int video_fps_den;
+	bool end_of_input;
+
+	decoder_state_t state;
+} decoder_t;
+
+enum {
+	DECODER_USE_AUDIO = 1 << 0,
+	DECODER_USE_VIDEO = 1 << 1,
+	DECODER_AUDIO_REQUIRED = 1 << 2,
+	DECODER_VIDEO_REQUIRED = 1 << 3
+};
+
+bool open_av_data(decoder_t *decoder, const args_t *args, int flags);
+bool poll_av_data(decoder_t *decoder);
+bool ensure_av_data(decoder_t *decoder, int needed_audio_samples, int needed_video_frames);
+void retire_av_data(decoder_t *decoder, int retired_audio_samples, int retired_video_frames);
+void close_av_data(decoder_t *decoder);
diff --git a/psxavenc/filefmt.c b/psxavenc/filefmt.c
index 803ac2d..b00a29b 100644
--- a/psxavenc/filefmt.c
+++ b/psxavenc/filefmt.c
@@ -3,7 +3,7 @@ psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
 
 Copyright (c) 2019, 2020 Adrian "asie" Siekierka
 Copyright (c) 2019 Ben "GreaseMonkey" Russell
-Copyright (c) 2023 spicyjpeg
+Copyright (c) 2023, 2025 spicyjpeg
 
 This software is provided 'as-is', without any express or implied
 warranty. In no event will the authors be held liable for any damages
@@ -22,48 +22,77 @@ freely, subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#include "common.h"
-#include "libpsxav.h"
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <libpsxav.h>
+#include "args.h"
+#include "decoding.h"
+#include "mdec.h"
 
-static time_t get_elapsed_time(settings_t *settings) {
-	if (!settings->show_progress) {
-		return 0;
+static time_t start_time = 0;
+static time_t last_progress_update = 0;
+
+static time_t get_elapsed_time(void) {
+	time_t t;
+
+	if (start_time > 0) {
+		t = time(NULL) - start_time;
+	} else {
+		t = 0;
+		start_time = time(NULL);
 	}
-	time_t t = time(NULL) - settings->start_time;
-	if (t <= settings->last_progress_update) {
+
+	if (t <= last_progress_update)
 		return 0;
-	}
-	settings->last_progress_update = t;
+
+	last_progress_update = t;
 	return t;
 }
 
-static psx_audio_xa_settings_t settings_to_libpsxav_xa_audio(settings_t *settings) {
-	psx_audio_xa_settings_t new_settings;
-	new_settings.bits_per_sample = settings->bits_per_sample;
-	new_settings.frequency = settings->frequency;
-	new_settings.stereo = settings->channels == 2;
-	new_settings.file_number = settings->file_number;
-	new_settings.channel_number = settings->channel_number;
+static psx_audio_xa_settings_t args_to_libpsxav_xa_audio(const args_t *args) {
+	psx_audio_xa_settings_t settings;
 
-	switch (settings->format) {
-		case FORMAT_XA:
-		case FORMAT_STR2:
-			new_settings.format = PSX_AUDIO_XA_FORMAT_XA;
-			break;
-		default:
-			new_settings.format = PSX_AUDIO_XA_FORMAT_XACD;
-			break;
-	}
+	settings.bits_per_sample = args->audio_bit_depth;
+	settings.frequency = args->audio_frequency;
+	settings.stereo = (args->audio_channels == 2);
+	settings.file_number = args->audio_xa_file;
+	settings.channel_number = args->audio_xa_channel;
 
-	return new_settings;
+	if (args->format == FORMAT_XACD || args->format == FORMAT_STRCD)
+		settings.format = PSX_AUDIO_XA_FORMAT_XACD;
+	else
+		settings.format = PSX_AUDIO_XA_FORMAT_XA;
+
+	return settings;
 };
 
-void write_vag_header(int size_per_channel, uint8_t *header, settings_t *settings) {
+static void init_sector_buffer_video(const args_t *args, psx_cdrom_sector_mode2_t *sector, int lba) {
+	psx_cdrom_init_sector((psx_cdrom_sector_t *)sector, lba, PSX_CDROM_SECTOR_TYPE_MODE2_FORM1);
+
+	sector->subheader[0].file = args->audio_xa_file;
+	sector->subheader[0].channel = args->audio_xa_channel & PSX_CDROM_SECTOR_XA_CHANNEL_MASK;
+	sector->subheader[0].submode = PSX_CDROM_SECTOR_XA_SUBMODE_DATA | PSX_CDROM_SECTOR_XA_SUBMODE_RT;
+	sector->subheader[0].coding = 0;
+
+	memcpy(sector->subheader + 1, sector->subheader, sizeof(psx_cdrom_sector_xa_subheader_t));
+}
+
+#define VAG_HEADER_SIZE 0x30
+
+static void write_vag_header(const args_t *args, int size_per_channel, uint8_t *header) {
+	memset(header, 0, VAG_HEADER_SIZE);
+
 	// Magic
 	header[0x00] = 'V';
 	header[0x01] = 'A';
 	header[0x02] = 'G';
-	header[0x03] = settings->interleave ? 'i' : 'p';
+
+	if (args->format == FORMAT_VAGI)
+		header[0x03] = 'i';
+	else
+	 	header[0x03] = 'p';
 
 	// Version (big-endian)
 	header[0x04] = 0x00;
@@ -72,150 +101,228 @@ void write_vag_header(int size_per_channel, uint8_t *header, settings_t *setting
 	header[0x07] = 0x20;
 
 	// Interleave (little-endian)
-	header[0x08] = (uint8_t)settings->interleave;
-	header[0x09] = (uint8_t)(settings->interleave>>8);
-	header[0x0a] = (uint8_t)(settings->interleave>>16);
-	header[0x0b] = (uint8_t)(settings->interleave>>24);
+	if (args->format == FORMAT_VAGI) {
+		header[0x08] = (uint8_t)args->audio_interleave;
+		header[0x09] = (uint8_t)(args->audio_interleave >> 8);
+		header[0x0a] = (uint8_t)(args->audio_interleave >> 16);
+		header[0x0b] = (uint8_t)(args->audio_interleave >> 24);
+	}
 
 	// Length of data for each channel (big-endian)
-	header[0x0c] = (uint8_t)(size_per_channel>>24);
-	header[0x0d] = (uint8_t)(size_per_channel>>16);
-	header[0x0e] = (uint8_t)(size_per_channel>>8);
+	header[0x0c] = (uint8_t)(size_per_channel >> 24);
+	header[0x0d] = (uint8_t)(size_per_channel >> 16);
+	header[0x0e] = (uint8_t)(size_per_channel >> 8);
 	header[0x0f] = (uint8_t)size_per_channel;
 
 	// Sample rate (big-endian)
-	header[0x10] = (uint8_t)(settings->frequency>>24);
-	header[0x11] = (uint8_t)(settings->frequency>>16);
-	header[0x12] = (uint8_t)(settings->frequency>>8);
-	header[0x13] = (uint8_t)settings->frequency;
+	header[0x10] = (uint8_t)(args->audio_frequency >> 24);
+	header[0x11] = (uint8_t)(args->audio_frequency >> 16);
+	header[0x12] = (uint8_t)(args->audio_frequency >> 8);
+	header[0x13] = (uint8_t)args->audio_frequency;
 
 	// Number of channels (little-endian)
-	header[0x1e] = (uint8_t)settings->channels;
+	header[0x1e] = (uint8_t)args->audio_channels;
 	header[0x1f] = 0x00;
 
 	// Filename
-	//strncpy(header + 0x20, "psxavenc", 16);
-	memset(header + 0x20, 0, 16);
+	int name_offset = strlen(args->output_file);
+	while (
+		name_offset > 0 &&
+		args->output_file[name_offset - 1] != '/' &&
+		args->output_file[name_offset - 1] != '\\'
+	)
+		name_offset--;
+
+	strncpy((char*)(header + 0x20), &args->output_file[name_offset], 16);
 }
 
-void encode_file_spu(settings_t *settings, FILE *output) {
-	psx_audio_encoder_channel_state_t audio_state;	
+void encode_file_xa(args_t *args, decoder_t *decoder, FILE *output) {
+	psx_audio_xa_settings_t xa_settings = args_to_libpsxav_xa_audio(args);
+
+	int audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings);
+
+	psx_audio_encoder_state_t audio_state;
+	memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t));
+
+	for (int j = 0; ensure_av_data(decoder, audio_samples_per_sector * args->audio_channels, 0); j++) {
+		int samples_length = decoder->audio_sample_count / args->audio_channels;
+
+		if (samples_length > audio_samples_per_sector)
+			samples_length = audio_samples_per_sector;
+
+		uint8_t buffer[PSX_CDROM_SECTOR_SIZE];
+		int length = psx_audio_xa_encode(
+			xa_settings,
+			&audio_state,
+			decoder->audio_samples,
+			samples_length,
+			buffer
+		);
+
+		if (decoder->end_of_input)
+			psx_audio_xa_encode_finalize(xa_settings, buffer, length);
+
+		if (args->format == FORMAT_XACD) {
+			int t = j + 75*2;
+
+			// Put the time in
+			buffer[0x00C] = ((t/75/60)%10)|(((t/75/60)/10)<<4);
+			buffer[0x00D] = (((t/75)%60)%10)|((((t/75)%60)/10)<<4);
+			buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4);
+		}
+
+		retire_av_data(decoder, samples_length * args->audio_channels, 0);
+		fwrite(buffer, length, 1, output);
+
+		time_t t = get_elapsed_time();
+
+		if (!(args->flags & FLAG_HIDE_PROGRESS) && t) {
+			fprintf(
+				stderr,
+				"\rLBA: %6d | Encoding speed: %5.2fx",
+				j,
+				(double)(j * audio_samples_per_sector) / (double)(args->audio_frequency * t)
+			);
+		}
+	}
+}
+
+void encode_file_spu(args_t *args, decoder_t *decoder, FILE *output) {
+	psx_audio_encoder_channel_state_t audio_state;
+	memset(&audio_state, 0, sizeof(psx_audio_encoder_channel_state_t));
+
 	int audio_samples_per_block = psx_audio_spu_get_samples_per_block();
 	int block_size = psx_audio_spu_get_buffer_size_per_block();
-	uint8_t buffer[16];
 	int block_count;
 
-	memset(&audio_state, 0, sizeof(psx_audio_encoder_channel_state_t));
-
 	// The header must be written after the data as we don't yet know the
 	// number of audio samples.
-	if (settings->format == FORMAT_VAG) {
-		fseek(output, 48, SEEK_SET);
-	}
+	if (args->format == FORMAT_VAG)
+		fseek(output, VAG_HEADER_SIZE, SEEK_SET);
 
-	for (block_count = 0; ensure_av_data(settings, audio_samples_per_block, 0); block_count++) {
-		int samples_length = settings->audio_sample_count;
-		if (samples_length > audio_samples_per_block) samples_length = audio_samples_per_block;
+	for (block_count = 0; ensure_av_data(decoder, audio_samples_per_block, 0); block_count++) {
+		int samples_length = decoder->audio_sample_count;
 
-		int length = psx_audio_spu_encode(&audio_state, settings->audio_samples, samples_length, 1, buffer);
-		if (!block_count) {
-			// This flag is not required as the SPU already resets the loop
-			// address when starting playback of a sample.
-			//buffer[1] |= PSX_AUDIO_SPU_LOOP_START;
-		}
-		if (settings->end_of_input) {
-			buffer[1] |= settings->loop ? PSX_AUDIO_SPU_LOOP_REPEAT : PSX_AUDIO_SPU_LOOP_END;
+		if (samples_length > audio_samples_per_block)
+			samples_length = audio_samples_per_block;
+
+		uint8_t buffer[16];
+		int length = psx_audio_spu_encode(
+			&audio_state,
+			decoder->audio_samples,
+			samples_length,
+			1,
+			buffer
+		);
+
+		// TODO: implement proper loop flag support
+		if (false)
+			buffer[1] |= PSX_AUDIO_SPU_LOOP_START;
+		if (decoder->end_of_input) {
+			if (args->flags & FLAG_SPU_LOOP_END)
+				buffer[1] |= PSX_AUDIO_SPU_LOOP_REPEAT;
+			else
+			 	buffer[1] |= PSX_AUDIO_SPU_LOOP_END;
 		}
 
-		retire_av_data(settings, samples_length, 0);
+		retire_av_data(decoder, samples_length, 0);
 		fwrite(buffer, length, 1, output);
 
-		time_t t = get_elapsed_time(settings);
-		if (t) {
-			fprintf(stderr, "\rBlock: %6d | Encoding speed: %5.2fx",
+		time_t t = get_elapsed_time();
+
+		if (!(args->flags & FLAG_HIDE_PROGRESS) && t) {
+			fprintf(
+				stderr,
+				"\rBlock: %6d | Encoding speed: %5.2fx",
 				block_count,
-				(double)(block_count*audio_samples_per_block) / (double)(settings->frequency*t)
+				(double)(block_count * audio_samples_per_block) / (double)(args->audio_frequency * t)
 			);
 		}
 	}
 
-	int padding_size = (block_count*block_size) % settings->alignment;
-	if (padding_size) {
-		padding_size = settings->alignment - padding_size;
-		uint8_t *padding = malloc(padding_size);
-		memset(padding, 0, padding_size);
-		fwrite(padding, padding_size, 1, output);
-		free(padding);
-	}
+	int overflow = (block_count * block_size) % args->alignment;
+
+	if (overflow) {
+		for (int i = 0; i < (args->alignment - overflow); i++)
+			fputc(0, output);
+	}
+	if (args->format == FORMAT_VAG) {
+		uint8_t header[VAG_HEADER_SIZE];
+		write_vag_header(args, block_count * block_size, header);
 
-	if (settings->format == FORMAT_VAG) {
-		uint8_t header[48];
-		memset(header, 0, 48);
-		write_vag_header(block_count*block_size, header, settings);
 		fseek(output, 0, SEEK_SET);
-		fwrite(header, 48, 1, output);
+		fwrite(header, VAG_HEADER_SIZE, 1, output);
 	}
 }
 
-void encode_file_spu_interleaved(settings_t *settings, FILE *output) {
-	int audio_state_size = sizeof(psx_audio_encoder_channel_state_t) * settings->channels;
+void encode_file_spui(args_t *args, decoder_t *decoder, FILE *output) {
+	int audio_state_size = sizeof(psx_audio_encoder_channel_state_t) * args->audio_channels;
 
 	// NOTE: since the interleaved .vag format is not standardized, some tools
 	// (such as vgmstream) will not properly play files with interleave < 2048,
 	// alignment != 2048 or channels != 2.
-	int buffer_size = settings->interleave + settings->alignment - 1;
-	buffer_size -= buffer_size % settings->alignment;
-	int header_size = 48 + settings->alignment - 1;
-	header_size -= header_size % settings->alignment;
+	int buffer_size = args->audio_interleave + args->alignment - 1;
+	buffer_size -= buffer_size % args->alignment;
+
+	int header_size = VAG_HEADER_SIZE + args->alignment - 1;
+	header_size -= header_size % args->alignment;
+
+	int audio_samples_per_block = psx_audio_spu_get_samples_per_block();
+	int block_size = psx_audio_spu_get_buffer_size_per_block();
+	int audio_samples_per_chunk = args->audio_interleave / block_size * audio_samples_per_block;
+	int chunk_count;
+
+	if (args->format == FORMAT_VAGI)
+		fseek(output, header_size, SEEK_SET);
 
 	psx_audio_encoder_channel_state_t *audio_state = malloc(audio_state_size);
 	uint8_t *buffer = malloc(buffer_size);
-	int audio_samples_per_block = psx_audio_spu_get_samples_per_block();
-	int block_size = psx_audio_spu_get_buffer_size_per_block();
-	int audio_samples_per_chunk = settings->interleave / block_size * audio_samples_per_block;
-	int chunk_count;
-
 	memset(audio_state, 0, audio_state_size);
 
-	if (settings->format == FORMAT_VAGI) {
-		fseek(output, header_size, SEEK_SET);
-	}
-
-	for (chunk_count = 0; ensure_av_data(settings, audio_samples_per_chunk*settings->channels, 0); chunk_count++) {
-		int samples_length = settings->audio_sample_count / settings->channels;
+	for (chunk_count = 0; ensure_av_data(decoder, audio_samples_per_chunk * args->audio_channels, 0); chunk_count++) {
+		int samples_length = decoder->audio_sample_count / args->audio_channels;
 		if (samples_length > audio_samples_per_chunk) samples_length = audio_samples_per_chunk;
 
-		for (int ch = 0; ch < settings->channels; ch++) {
+		for (int ch = 0; ch < args->audio_channels; ch++) {
 			memset(buffer, 0, buffer_size);
-			int length = psx_audio_spu_encode(audio_state + ch, settings->audio_samples + ch, samples_length, settings->channels, buffer);
+			int length = psx_audio_spu_encode(
+				audio_state + ch,
+				decoder->audio_samples + ch,
+				samples_length,
+				args->audio_channels,
+				buffer
+			);
+
 			if (length) {
-				//buffer[1] |= PSX_AUDIO_SPU_LOOP_START;
-				if (settings->loop) {
+				// TODO: implement proper loop flag support
+				if (args->flags & FLAG_SPU_LOOP_END)
 					buffer[length - block_size + 1] |= PSX_AUDIO_SPU_LOOP_REPEAT;
-				}
-				if (settings->end_of_input) {
+				else if (decoder->end_of_input)
 					buffer[length - block_size + 1] |= PSX_AUDIO_SPU_LOOP_END;
-				}
 			}
 
 			fwrite(buffer, buffer_size, 1, output);
 
-			time_t t = get_elapsed_time(settings);
-			if (t) {
-				fprintf(stderr, "\rChunk: %6d | Encoding speed: %5.2fx",
+			time_t t = get_elapsed_time();
+
+			if (!(args->flags & FLAG_HIDE_PROGRESS) && t) {
+				fprintf(
+					stderr,
+					"\rChunk: %6d | Encoding speed: %5.2fx",
 					chunk_count,
-					(double)(chunk_count*audio_samples_per_chunk) / (double)(settings->frequency*t)
+					(double)(chunk_count * audio_samples_per_chunk) / (double)(args->audio_frequency * t)
 				);
 			}
 		}
 
-		retire_av_data(settings, samples_length*settings->channels, 0);
+		retire_av_data(decoder, samples_length * args->audio_channels, 0);
 	}
 
-	if (settings->format == FORMAT_VAGI) {
+	if (args->format == FORMAT_VAGI) {
 		uint8_t *header = malloc(header_size);
 		memset(header, 0, header_size);
-		write_vag_header(chunk_count*settings->interleave, header, settings);
+		write_vag_header(args, chunk_count * args->audio_interleave, header);
+
 		fseek(output, 0, SEEK_SET);
 		fwrite(header, header_size, 1, output);
 		free(header);
@@ -225,52 +332,14 @@ void encode_file_spu_interleaved(settings_t *settings, FILE *output) {
 	free(buffer);
 }
 
-void encode_file_xa(settings_t *settings, FILE *output) {
-	psx_audio_xa_settings_t xa_settings = settings_to_libpsxav_xa_audio(settings);
-	psx_audio_encoder_state_t audio_state;	
-	int audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings);
-	uint8_t buffer[PSX_CDROM_SECTOR_SIZE];
-
-	memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t));
-
-	for (int j = 0; ensure_av_data(settings, audio_samples_per_sector*settings->channels, 0); j++) {
-		int samples_length = settings->audio_sample_count / settings->channels;
-		if (samples_length > audio_samples_per_sector) samples_length = audio_samples_per_sector;
-		int length = psx_audio_xa_encode(xa_settings, &audio_state, settings->audio_samples, samples_length, buffer);
-		if (settings->end_of_input) {
-			psx_audio_xa_encode_finalize(xa_settings, buffer, length);
-		}
-
-		if (settings->format == FORMAT_XACD) {
-			int t = j + 75*2;
-
-			// Put the time in
-			buffer[0x00C] = ((t/75/60)%10)|(((t/75/60)/10)<<4);
-			buffer[0x00D] = (((t/75)%60)%10)|((((t/75)%60)/10)<<4);
-			buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4);
-		}
-
-		retire_av_data(settings, samples_length*settings->channels, 0);
-		fwrite(buffer, length, 1, output);
-
-		time_t t = get_elapsed_time(settings);
-		if (t) {
-			fprintf(stderr, "\rLBA: %6d | Encoding speed: %5.2fx",
-				j,
-				(double)(j*audio_samples_per_sector) / (double)(settings->frequency*t)
-			);
-		}
-	}
-}
-
-void encode_file_str(settings_t *settings, FILE *output) {
-	psx_audio_xa_settings_t xa_settings = settings_to_libpsxav_xa_audio(settings);
-	psx_audio_encoder_state_t audio_state;
+void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) {
+	psx_audio_xa_settings_t xa_settings = args_to_libpsxav_xa_audio(args);
 	int audio_samples_per_sector;
 	uint8_t buffer[PSX_CDROM_SECTOR_SIZE];
 
 	int offset, sector_size;
-	if (settings->format == FORMAT_STR2V) {
+
+	if (args->format == FORMAT_STRV) {
 		sector_size = 2048;
 		offset = 0x18;
 	} else {
@@ -280,16 +349,21 @@ void encode_file_str(settings_t *settings, FILE *output) {
 
 	int interleave;
 	int video_sectors_per_block;
-	if (settings->decoder_state_av.audio_stream) {
+	if (decoder->state.audio_stream) {
 		// 1/N audio, (N-1)/N video
 		audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings);
-		interleave = psx_audio_xa_get_sector_interleave(xa_settings) * settings->cd_speed;
+		interleave = psx_audio_xa_get_sector_interleave(xa_settings) * args->str_cd_speed;
 		video_sectors_per_block = interleave - 1;
 
-		if (!settings->quiet) {
-			fprintf(stderr, "Interleave: %d/%d audio, %d/%d video\n",
-				interleave - video_sectors_per_block, interleave, video_sectors_per_block, interleave);
-		}
+		if (!(args->flags & FLAG_QUIET))
+			fprintf(
+				stderr,
+				"Interleave: %d/%d audio, %d/%d video\n",
+				interleave - video_sectors_per_block,
+				interleave,
+				video_sectors_per_block,
+				interleave
+			);
 	} else {
 		// 0/1 audio, 1/1 video
 		audio_samples_per_sector = 0;
@@ -297,54 +371,65 @@ void encode_file_str(settings_t *settings, FILE *output) {
 		video_sectors_per_block = 1;
 	}
 
+	psx_audio_encoder_state_t audio_state;
 	memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t));
 
-	// e.g. 15fps = (150*7/8/15) = 8.75 blocks per frame
-	settings->state_vid.frame_block_base_overflow = (75*settings->cd_speed) * video_sectors_per_block * settings->video_fps_den;
-	settings->state_vid.frame_block_overflow_den = interleave * settings->video_fps_num;
-	double frame_size = (double)settings->state_vid.frame_block_base_overflow / (double)settings->state_vid.frame_block_overflow_den;
-	if (!settings->quiet) {
-		fprintf(stderr, "Frame size: %.2f sectors\n", frame_size);
-	}
+	mdec_encoder_t encoder;
+	init_mdec_encoder(&encoder, args->video_width, args->video_height);
 
-	init_encoder_state(settings);
-	settings->state_vid.frame_output = malloc(2016 * (int)ceil(frame_size));
-	settings->state_vid.frame_index = 0;
-	settings->state_vid.frame_data_offset = 0;
-	settings->state_vid.frame_max_size = 0;
-	settings->state_vid.frame_block_overflow_num = 0;
-	settings->state_vid.quant_scale_sum = 0;
+	// e.g. 15fps = (150*7/8/15) = 8.75 blocks per frame
+	encoder.state.frame_block_base_overflow = (75 * args->str_cd_speed) * video_sectors_per_block * args->str_fps_den;
+	encoder.state.frame_block_overflow_den = interleave * args->str_fps_num;
+	double frame_size = (double)encoder.state.frame_block_base_overflow / (double)encoder.state.frame_block_overflow_den;
+
+	if (!(args->flags & FLAG_QUIET))
+		fprintf(stderr, "Frame size: %.2f sectors\n", frame_size);
+
+	encoder.state.frame_output = malloc(2016 * (int)ceil(frame_size));
+	encoder.state.frame_index = 0;
+	encoder.state.frame_data_offset = 0;
+	encoder.state.frame_max_size = 0;
+	encoder.state.frame_block_overflow_num = 0;
+	encoder.state.quant_scale_sum = 0;
 
 	// FIXME: this needs an extra frame to prevent A/V desync
 	int frames_needed = (int) ceil((double)video_sectors_per_block / frame_size);
 	if (frames_needed < 2) frames_needed = 2;
 
-	for (int j = 0; !settings->end_of_input || settings->state_vid.frame_data_offset < settings->state_vid.frame_max_size; j++) {
-		ensure_av_data(settings, audio_samples_per_sector*settings->channels, frames_needed);
+	for (int j = 0; !decoder->end_of_input || encoder.state.frame_data_offset < encoder.state.frame_max_size; j++) {
+		ensure_av_data(decoder, audio_samples_per_sector * args->audio_channels, frames_needed);
 
 		if ((j%interleave) < video_sectors_per_block) {
 			// Video sector
-			init_sector_buffer_video((psx_cdrom_sector_mode2_t*) buffer, settings);
-			encode_sector_str(settings->video_frames, buffer, settings);
+			init_sector_buffer_video(args, (psx_cdrom_sector_mode2_t*) buffer, j);
+
+			int frames_used = encode_sector_str(&encoder, decoder->video_frames, buffer);
+			retire_av_data(decoder, 0, frames_used);
 		} else {
 			// Audio sector
-			int samples_length = settings->audio_sample_count / settings->channels;
+			int samples_length = decoder->audio_sample_count / args->audio_channels;
 			if (samples_length > audio_samples_per_sector) samples_length = audio_samples_per_sector;
 
 			// FIXME: this is an extremely hacky way to handle audio tracks
 			// shorter than the video track
-			if (!samples_length) {
+			if (!samples_length)
 				video_sectors_per_block++;
-			}
 
-			int length = psx_audio_xa_encode(xa_settings, &audio_state, settings->audio_samples, samples_length, buffer);
-			if (settings->end_of_input) {
+			int length = psx_audio_xa_encode(
+				xa_settings,
+				&audio_state,
+				decoder->audio_samples,
+				samples_length,
+				buffer
+			);
+
+			if (decoder->end_of_input)
 				psx_audio_xa_encode_finalize(xa_settings, buffer, length);
-			}
-			retire_av_data(settings, samples_length*settings->channels, 0);
+
+			retire_av_data(decoder, samples_length * args->audio_channels, 0);
 		}
 
-		if (settings->format == FORMAT_STR2CD) {
+		if (args->format == FORMAT_STRCD) {
 			int t = j + 75*2;
 
 			// Put the time in
@@ -353,48 +438,57 @@ void encode_file_str(settings_t *settings, FILE *output) {
 			buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4);
 		}
 
-		if((j%interleave) < video_sectors_per_block) {
-			calculate_edc_data(buffer);
-		}
+		if((j%interleave) < video_sectors_per_block)
+			psx_cdrom_calculate_checksums((psx_cdrom_sector_t *)buffer, PSX_CDROM_SECTOR_TYPE_MODE2_FORM1);
 
 		fwrite(buffer + offset, sector_size, 1, output);
 
-		time_t t = get_elapsed_time(settings);
-		if (t) {
-			fprintf(stderr, "\rFrame: %4d | LBA: %6d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx",
-				settings->state_vid.frame_index,
+		time_t t = get_elapsed_time();
+
+		if (!(args->flags & FLAG_HIDE_PROGRESS) && t) {
+			fprintf(
+				stderr,
+				"\rFrame: %4d | LBA: %6d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx",
+				encoder.state.frame_index,
 				j,
-				(double)settings->state_vid.quant_scale_sum / (double)settings->state_vid.frame_index,
-				(double)(settings->state_vid.frame_index*settings->video_fps_den) / (double)(t*settings->video_fps_num)
+				(double)encoder.state.quant_scale_sum / (double)encoder.state.frame_index,
+				(double)(encoder.state.frame_index * args->str_fps_den) / (double)(t * args->str_fps_num)
 			);
 		}
 	}
 
-	free(settings->state_vid.frame_output);
-	destroy_encoder_state(settings);
+	free(encoder.state.frame_output);
+	destroy_mdec_encoder(&encoder);
 }
 
-void encode_file_sbs(settings_t *settings, FILE *output) {
-	init_encoder_state(settings);
-	settings->state_vid.frame_output = malloc(settings->alignment);
-	settings->state_vid.frame_data_offset = 0;
-	settings->state_vid.frame_max_size = settings->alignment;
-	settings->state_vid.quant_scale_sum = 0;
+void encode_file_sbs(args_t *args, decoder_t *decoder, FILE *output) {
+	mdec_encoder_t encoder;
+	init_mdec_encoder(&encoder, args->video_width, args->video_height);
 
-	for (int j = 0; ensure_av_data(settings, 0, 1); j++) {
-		encode_frame_bs(settings->video_frames, settings);
-		fwrite(settings->state_vid.frame_output, settings->alignment, 1, output);
+	encoder.state.frame_output = malloc(args->alignment);
+	encoder.state.frame_data_offset = 0;
+	encoder.state.frame_max_size = args->alignment;
+	encoder.state.quant_scale_sum = 0;
 
-		time_t t = get_elapsed_time(settings);
-		if (t) {
-			fprintf(stderr, "\rFrame: %4d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx",
+	for (int j = 0; ensure_av_data(decoder, 0, 1); j++) {
+		encode_frame_bs(&encoder, decoder->video_frames);
+
+		retire_av_data(decoder, 0, 1);
+		fwrite(encoder.state.frame_output, args->alignment, 1, output);
+
+		time_t t = get_elapsed_time();
+
+		if (!(args->flags & FLAG_HIDE_PROGRESS) && t) {
+			fprintf(
+				stderr,
+				"\rFrame: %4d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx",
 				j,
-				(double)settings->state_vid.quant_scale_sum / (double)j,
-				(double)(j*settings->video_fps_den) / (double)(t*settings->video_fps_num)
+				(double)encoder.state.quant_scale_sum / (double)j,
+				(double)(j * args->str_fps_den) / (double)(t * args->str_fps_num)
 			);
 		}
 	}
 
-	free(settings->state_vid.frame_output);
-	destroy_encoder_state(settings);
+	free(encoder.state.frame_output);
+	destroy_mdec_encoder(&encoder);
 }
diff --git a/psxavenc/filefmt.h b/psxavenc/filefmt.h
new file mode 100644
index 0000000..5f8eb38
--- /dev/null
+++ b/psxavenc/filefmt.h
@@ -0,0 +1,35 @@
+/*
+psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
+
+Copyright (c) 2019, 2020 Adrian "asie" Siekierka
+Copyright (c) 2019 Ben "GreaseMonkey" Russell
+Copyright (c) 2023, 2025 spicyjpeg
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+   claim that you wrote the original software. If you use this software
+   in a product, an acknowledgment in the product documentation would be
+   appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#pragma once
+
+#include <stdio.h>
+#include "args.h"
+#include "decoding.h"
+
+void encode_file_xa(const args_t *args, decoder_t *decoder, FILE *output);
+void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output);
+void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output);
+void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output);
+void encode_file_sbs(const args_t *args, decoder_t *decoder, FILE *output);
diff --git a/psxavenc/main.c b/psxavenc/main.c
new file mode 100644
index 0000000..78c0935
--- /dev/null
+++ b/psxavenc/main.c
@@ -0,0 +1,174 @@
+/*
+psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
+
+Copyright (c) 2019, 2020 Adrian "asie" Siekierka
+Copyright (c) 2019 Ben "GreaseMonkey" Russell
+Copyright (c) 2023 spicyjpeg
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+   claim that you wrote the original software. If you use this software
+   in a product, an acknowledgment in the product documentation would be
+   appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include <stdint.h>
+#include <stdio.h>
+#include "args.h"
+#include "decoding.h"
+#include "filefmt.h"
+
+static const char *const bs_codec_names[NUM_BS_CODECS] = {
+	"BS v2",
+	"BS v3",
+	"BS v3 (with DC wrapping)"
+};
+
+static const uint8_t decoder_flags[NUM_FORMATS] = {
+	DECODER_USE_AUDIO | DECODER_AUDIO_REQUIRED, // xa
+	DECODER_USE_AUDIO | DECODER_AUDIO_REQUIRED, // xacd
+	DECODER_USE_AUDIO | DECODER_AUDIO_REQUIRED, // spu
+	DECODER_USE_AUDIO | DECODER_AUDIO_REQUIRED, // vag
+	DECODER_USE_AUDIO | DECODER_AUDIO_REQUIRED, // spui
+	DECODER_USE_AUDIO | DECODER_AUDIO_REQUIRED, // vagi
+	DECODER_USE_AUDIO | DECODER_USE_VIDEO | DECODER_VIDEO_REQUIRED, // str
+	DECODER_USE_AUDIO | DECODER_USE_VIDEO | DECODER_VIDEO_REQUIRED, // strcd
+	DECODER_USE_AUDIO | DECODER_USE_VIDEO | DECODER_VIDEO_REQUIRED, // strspu
+	DECODER_USE_VIDEO | DECODER_VIDEO_REQUIRED, // strv
+	DECODER_USE_VIDEO | DECODER_VIDEO_REQUIRED // sbs
+};
+
+int main(int argc, const char **argv) {
+	args_t args;
+	decoder_t decoder;
+	FILE *output;
+
+	args.flags = 0;
+
+	args.format = FORMAT_INVALID;
+	args.input_file = NULL;
+	args.output_file = NULL;
+	args.swresample_options = NULL;
+	args.swscale_options = NULL;
+
+	if (!parse_args(&args, argv + 1, argc - 1))
+		return 1;
+	if (!open_av_data(&decoder, &args, decoder_flags[args.format])) {
+		fprintf(stderr, "Failed to open input file: %s\n", args.input_file);
+		return 1;
+	}
+
+	output = fopen(args.output_file, "wb");
+
+	if (output == NULL) {
+		fprintf(stderr, "Failed to open output file: %s\n", args.output_file);
+		return 1;
+	}
+
+	switch (args.format) {
+		case FORMAT_XA:
+		case FORMAT_XACD:
+			if (!(args.flags & FLAG_QUIET))
+				fprintf(
+					stderr,
+					"Audio format: XA-ADPCM, %d Hz %d-bit %s, F=%d C=%d\n",
+					args.audio_frequency,
+					args.audio_bit_depth,
+					(args.audio_channels == 2) ? "stereo" : "mono",
+					args.audio_xa_file,
+					args.audio_xa_channel
+				);
+
+			encode_file_xa(&args, &decoder, output);
+			break;
+
+		case FORMAT_SPU:
+		case FORMAT_VAG:
+			if (!(args.flags & FLAG_QUIET))
+				fprintf(
+					stderr,
+					"Audio format: SPU-ADPCM, %d Hz mono\n",
+					args.audio_frequency
+				);
+
+			encode_file_spu(&args, &decoder, output);
+			break;
+
+		case FORMAT_SPUI:
+		case FORMAT_VAGI:
+			if (!(args.flags & FLAG_QUIET))
+				fprintf(
+					stderr,
+					"Audio format: SPU-ADPCM, %d Hz %d channels, interleave=%d\n",
+					args.audio_frequency,
+					args.audio_channels,
+					args.audio_interleave
+				);
+
+			encode_file_spui(&args, &decoder, output);
+			break;
+
+		case FORMAT_STR:
+		case FORMAT_STRCD:
+		case FORMAT_STRSPU:
+		case FORMAT_STRV:
+			if (!(args.flags & FLAG_QUIET)) {
+				if (decoder.state.audio_stream)
+					fprintf(
+						stderr,
+						"Audio format: XA-ADPCM, %d Hz %d-bit %s, F=%d C=%d\n",
+						args.audio_frequency,
+						args.audio_bit_depth,
+						(args.audio_channels == 2) ? "stereo" : "mono",
+						args.audio_xa_file,
+						args.audio_xa_channel
+					);
+
+				fprintf(
+					stderr,
+					"Video format: %s, %dx%d, %.2f fps\n",
+					bs_codec_names[args.video_codec],
+					args.video_width,
+					args.video_height,
+					(double)args.str_fps_num / (double)args.str_fps_den
+				);
+			}
+
+			encode_file_str(&args, &decoder, output);
+			break;
+
+		case FORMAT_SBS:
+			if (!(args.flags & FLAG_QUIET))
+				fprintf(
+					stderr,
+					"Video format: %s, %dx%d, %.2f fps\n",
+					bs_codec_names[args.video_codec],
+					args.video_width,
+					args.video_height,
+					(double)args.str_fps_num / (double)args.str_fps_den
+				);
+
+			encode_file_sbs(&args, &decoder, output);
+			break;
+
+		default:
+			;
+	}
+
+	if (!(args.flags & FLAG_HIDE_PROGRESS))
+		fprintf(stderr, "\nDone.\n");
+
+	fclose(output);
+	close_av_data(&decoder);
+	return 0;
+}
diff --git a/psxavenc/mdec.c b/psxavenc/mdec.c
index 6abffea..095bb33 100644
--- a/psxavenc/mdec.c
+++ b/psxavenc/mdec.c
@@ -22,131 +22,139 @@ freely, subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#include "common.h"
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <libavcodec/avdct.h>
+#include "mdec.h"
 
-#define MAKE_HUFFMAN_PAIR(zeroes, value) (((zeroes)<<10)|((+(value))&0x3FF)),(((zeroes)<<10)|((-(value))&0x3FF))
-const struct {
+#define AC_PAIR(zeroes, value) \
+	(((zeroes) << 10) | ((+(value)) & 0x3FF)), \
+	(((zeroes) << 10) | ((-(value)) & 0x3FF))
+
+static const struct {
 	int c_bits;
 	uint32_t c_value;
 	uint16_t u_hword_pos;
 	uint16_t u_hword_neg;
-} huffman_lookup[] = {
+} ac_huffman_tree[] = {
 	// Fuck this Huffman tree in particular --GM
-	{2,0x3,MAKE_HUFFMAN_PAIR(0,1)},
-	{3,0x3,MAKE_HUFFMAN_PAIR(1,1)},
-	{4,0x4,MAKE_HUFFMAN_PAIR(0,2)},
-	{4,0x5,MAKE_HUFFMAN_PAIR(2,1)},
-	{5,0x05,MAKE_HUFFMAN_PAIR(0,3)},
-	{5,0x06,MAKE_HUFFMAN_PAIR(4,1)},
-	{5,0x07,MAKE_HUFFMAN_PAIR(3,1)},
-	{6,0x04,MAKE_HUFFMAN_PAIR(7,1)},
-	{6,0x05,MAKE_HUFFMAN_PAIR(6,1)},
-	{6,0x06,MAKE_HUFFMAN_PAIR(1,2)},
-	{6,0x07,MAKE_HUFFMAN_PAIR(5,1)},
-	{7,0x04,MAKE_HUFFMAN_PAIR(2,2)},
-	{7,0x05,MAKE_HUFFMAN_PAIR(9,1)},
-	{7,0x06,MAKE_HUFFMAN_PAIR(0,4)},
-	{7,0x07,MAKE_HUFFMAN_PAIR(8,1)},
-	{8,0x20,MAKE_HUFFMAN_PAIR(13,1)},
-	{8,0x21,MAKE_HUFFMAN_PAIR(0,6)},
-	{8,0x22,MAKE_HUFFMAN_PAIR(12,1)},
-	{8,0x23,MAKE_HUFFMAN_PAIR(11,1)},
-	{8,0x24,MAKE_HUFFMAN_PAIR(3,2)},
-	{8,0x25,MAKE_HUFFMAN_PAIR(1,3)},
-	{8,0x26,MAKE_HUFFMAN_PAIR(0,5)},
-	{8,0x27,MAKE_HUFFMAN_PAIR(10,1)},
-	{10,0x008,MAKE_HUFFMAN_PAIR(16,1)},
-	{10,0x009,MAKE_HUFFMAN_PAIR(5,2)},
-	{10,0x00A,MAKE_HUFFMAN_PAIR(0,7)},
-	{10,0x00B,MAKE_HUFFMAN_PAIR(2,3)},
-	{10,0x00C,MAKE_HUFFMAN_PAIR(1,4)},
-	{10,0x00D,MAKE_HUFFMAN_PAIR(15,1)},
-	{10,0x00E,MAKE_HUFFMAN_PAIR(14,1)},
-	{10,0x00F,MAKE_HUFFMAN_PAIR(4,2)},
-	{12,0x010,MAKE_HUFFMAN_PAIR(0,11)},
-	{12,0x011,MAKE_HUFFMAN_PAIR(8,2)},
-	{12,0x012,MAKE_HUFFMAN_PAIR(4,3)},
-	{12,0x013,MAKE_HUFFMAN_PAIR(0,10)},
-	{12,0x014,MAKE_HUFFMAN_PAIR(2,4)},
-	{12,0x015,MAKE_HUFFMAN_PAIR(7,2)},
-	{12,0x016,MAKE_HUFFMAN_PAIR(21,1)},
-	{12,0x017,MAKE_HUFFMAN_PAIR(20,1)},
-	{12,0x018,MAKE_HUFFMAN_PAIR(0,9)},
-	{12,0x019,MAKE_HUFFMAN_PAIR(19,1)},
-	{12,0x01A,MAKE_HUFFMAN_PAIR(18,1)},
-	{12,0x01B,MAKE_HUFFMAN_PAIR(1,5)},
-	{12,0x01C,MAKE_HUFFMAN_PAIR(3,3)},
-	{12,0x01D,MAKE_HUFFMAN_PAIR(0,8)},
-	{12,0x01E,MAKE_HUFFMAN_PAIR(6,2)},
-	{12,0x01F,MAKE_HUFFMAN_PAIR(17,1)},
-	{13,0x0010,MAKE_HUFFMAN_PAIR(10,2)},
-	{13,0x0011,MAKE_HUFFMAN_PAIR(9,2)},
-	{13,0x0012,MAKE_HUFFMAN_PAIR(5,3)},
-	{13,0x0013,MAKE_HUFFMAN_PAIR(3,4)},
-	{13,0x0014,MAKE_HUFFMAN_PAIR(2,5)},
-	{13,0x0015,MAKE_HUFFMAN_PAIR(1,7)},
-	{13,0x0016,MAKE_HUFFMAN_PAIR(1,6)},
-	{13,0x0017,MAKE_HUFFMAN_PAIR(0,15)},
-	{13,0x0018,MAKE_HUFFMAN_PAIR(0,14)},
-	{13,0x0019,MAKE_HUFFMAN_PAIR(0,13)},
-	{13,0x001A,MAKE_HUFFMAN_PAIR(0,12)},
-	{13,0x001B,MAKE_HUFFMAN_PAIR(26,1)},
-	{13,0x001C,MAKE_HUFFMAN_PAIR(25,1)},
-	{13,0x001D,MAKE_HUFFMAN_PAIR(24,1)},
-	{13,0x001E,MAKE_HUFFMAN_PAIR(23,1)},
-	{13,0x001F,MAKE_HUFFMAN_PAIR(22,1)},
-	{14,0x0010,MAKE_HUFFMAN_PAIR(0,31)},
-	{14,0x0011,MAKE_HUFFMAN_PAIR(0,30)},
-	{14,0x0012,MAKE_HUFFMAN_PAIR(0,29)},
-	{14,0x0013,MAKE_HUFFMAN_PAIR(0,28)},
-	{14,0x0014,MAKE_HUFFMAN_PAIR(0,27)},
-	{14,0x0015,MAKE_HUFFMAN_PAIR(0,26)},
-	{14,0x0016,MAKE_HUFFMAN_PAIR(0,25)},
-	{14,0x0017,MAKE_HUFFMAN_PAIR(0,24)},
-	{14,0x0018,MAKE_HUFFMAN_PAIR(0,23)},
-	{14,0x0019,MAKE_HUFFMAN_PAIR(0,22)},
-	{14,0x001A,MAKE_HUFFMAN_PAIR(0,21)},
-	{14,0x001B,MAKE_HUFFMAN_PAIR(0,20)},
-	{14,0x001C,MAKE_HUFFMAN_PAIR(0,19)},
-	{14,0x001D,MAKE_HUFFMAN_PAIR(0,18)},
-	{14,0x001E,MAKE_HUFFMAN_PAIR(0,17)},
-	{14,0x001F,MAKE_HUFFMAN_PAIR(0,16)},
-	{15,0x0010,MAKE_HUFFMAN_PAIR(0,40)},
-	{15,0x0011,MAKE_HUFFMAN_PAIR(0,39)},
-	{15,0x0012,MAKE_HUFFMAN_PAIR(0,38)},
-	{15,0x0013,MAKE_HUFFMAN_PAIR(0,37)},
-	{15,0x0014,MAKE_HUFFMAN_PAIR(0,36)},
-	{15,0x0015,MAKE_HUFFMAN_PAIR(0,35)},
-	{15,0x0016,MAKE_HUFFMAN_PAIR(0,34)},
-	{15,0x0017,MAKE_HUFFMAN_PAIR(0,33)},
-	{15,0x0018,MAKE_HUFFMAN_PAIR(0,32)},
-	{15,0x0019,MAKE_HUFFMAN_PAIR(1,14)},
-	{15,0x001A,MAKE_HUFFMAN_PAIR(1,13)},
-	{15,0x001B,MAKE_HUFFMAN_PAIR(1,12)},
-	{15,0x001C,MAKE_HUFFMAN_PAIR(1,11)},
-	{15,0x001D,MAKE_HUFFMAN_PAIR(1,10)},
-	{15,0x001E,MAKE_HUFFMAN_PAIR(1,9)},
-	{15,0x001F,MAKE_HUFFMAN_PAIR(1,8)},
-	{16,0x0010,MAKE_HUFFMAN_PAIR(1,18)},
-	{16,0x0011,MAKE_HUFFMAN_PAIR(1,17)},
-	{16,0x0012,MAKE_HUFFMAN_PAIR(1,16)},
-	{16,0x0013,MAKE_HUFFMAN_PAIR(1,15)},
-	{16,0x0014,MAKE_HUFFMAN_PAIR(6,3)},
-	{16,0x0015,MAKE_HUFFMAN_PAIR(16,2)},
-	{16,0x0016,MAKE_HUFFMAN_PAIR(15,2)},
-	{16,0x0017,MAKE_HUFFMAN_PAIR(14,2)},
-	{16,0x0018,MAKE_HUFFMAN_PAIR(13,2)},
-	{16,0x0019,MAKE_HUFFMAN_PAIR(12,2)},
-	{16,0x001A,MAKE_HUFFMAN_PAIR(11,2)},
-	{16,0x001B,MAKE_HUFFMAN_PAIR(31,1)},
-	{16,0x001C,MAKE_HUFFMAN_PAIR(30,1)},
-	{16,0x001D,MAKE_HUFFMAN_PAIR(29,1)},
-	{16,0x001E,MAKE_HUFFMAN_PAIR(28,1)},
-	{16,0x001F,MAKE_HUFFMAN_PAIR(27,1)},
+	{ 2, 0x3,    AC_PAIR( 0,  1)},
+	{ 3, 0x3,    AC_PAIR( 1,  1)},
+	{ 4, 0x4,    AC_PAIR( 0,  2)},
+	{ 4, 0x5,    AC_PAIR( 2,  1)},
+	{ 5, 0x05,   AC_PAIR( 0,  3)},
+	{ 5, 0x06,   AC_PAIR( 4,  1)},
+	{ 5, 0x07,   AC_PAIR( 3,  1)},
+	{ 6, 0x04,   AC_PAIR( 7,  1)},
+	{ 6, 0x05,   AC_PAIR( 6,  1)},
+	{ 6, 0x06,   AC_PAIR( 1,  2)},
+	{ 6, 0x07,   AC_PAIR( 5,  1)},
+	{ 7, 0x04,   AC_PAIR( 2,  2)},
+	{ 7, 0x05,   AC_PAIR( 9,  1)},
+	{ 7, 0x06,   AC_PAIR( 0,  4)},
+	{ 7, 0x07,   AC_PAIR( 8,  1)},
+	{ 8, 0x20,   AC_PAIR(13,  1)},
+	{ 8, 0x21,   AC_PAIR( 0,  6)},
+	{ 8, 0x22,   AC_PAIR(12,  1)},
+	{ 8, 0x23,   AC_PAIR(11,  1)},
+	{ 8, 0x24,   AC_PAIR( 3,  2)},
+	{ 8, 0x25,   AC_PAIR( 1,  3)},
+	{ 8, 0x26,   AC_PAIR( 0,  5)},
+	{ 8, 0x27,   AC_PAIR(10,  1)},
+	{10, 0x008,  AC_PAIR(16,  1)},
+	{10, 0x009,  AC_PAIR( 5,  2)},
+	{10, 0x00A,  AC_PAIR( 0,  7)},
+	{10, 0x00B,  AC_PAIR( 2,  3)},
+	{10, 0x00C,  AC_PAIR( 1,  4)},
+	{10, 0x00D,  AC_PAIR(15,  1)},
+	{10, 0x00E,  AC_PAIR(14,  1)},
+	{10, 0x00F,  AC_PAIR( 4,  2)},
+	{12, 0x010,  AC_PAIR( 0, 11)},
+	{12, 0x011,  AC_PAIR( 8,  2)},
+	{12, 0x012,  AC_PAIR( 4,  3)},
+	{12, 0x013,  AC_PAIR( 0, 10)},
+	{12, 0x014,  AC_PAIR( 2,  4)},
+	{12, 0x015,  AC_PAIR( 7,  2)},
+	{12, 0x016,  AC_PAIR(21,  1)},
+	{12, 0x017,  AC_PAIR(20,  1)},
+	{12, 0x018,  AC_PAIR( 0,  9)},
+	{12, 0x019,  AC_PAIR(19,  1)},
+	{12, 0x01A,  AC_PAIR(18,  1)},
+	{12, 0x01B,  AC_PAIR( 1,  5)},
+	{12, 0x01C,  AC_PAIR( 3,  3)},
+	{12, 0x01D,  AC_PAIR( 0,  8)},
+	{12, 0x01E,  AC_PAIR( 6,  2)},
+	{12, 0x01F,  AC_PAIR(17,  1)},
+	{13, 0x0010, AC_PAIR(10,  2)},
+	{13, 0x0011, AC_PAIR( 9,  2)},
+	{13, 0x0012, AC_PAIR( 5,  3)},
+	{13, 0x0013, AC_PAIR( 3,  4)},
+	{13, 0x0014, AC_PAIR( 2,  5)},
+	{13, 0x0015, AC_PAIR( 1,  7)},
+	{13, 0x0016, AC_PAIR( 1,  6)},
+	{13, 0x0017, AC_PAIR( 0, 15)},
+	{13, 0x0018, AC_PAIR( 0, 14)},
+	{13, 0x0019, AC_PAIR( 0, 13)},
+	{13, 0x001A, AC_PAIR( 0, 12)},
+	{13, 0x001B, AC_PAIR(26,  1)},
+	{13, 0x001C, AC_PAIR(25,  1)},
+	{13, 0x001D, AC_PAIR(24,  1)},
+	{13, 0x001E, AC_PAIR(23,  1)},
+	{13, 0x001F, AC_PAIR(22,  1)},
+	{14, 0x0010, AC_PAIR( 0, 31)},
+	{14, 0x0011, AC_PAIR( 0, 30)},
+	{14, 0x0012, AC_PAIR( 0, 29)},
+	{14, 0x0013, AC_PAIR( 0, 28)},
+	{14, 0x0014, AC_PAIR( 0, 27)},
+	{14, 0x0015, AC_PAIR( 0, 26)},
+	{14, 0x0016, AC_PAIR( 0, 25)},
+	{14, 0x0017, AC_PAIR( 0, 24)},
+	{14, 0x0018, AC_PAIR( 0, 23)},
+	{14, 0x0019, AC_PAIR( 0, 22)},
+	{14, 0x001A, AC_PAIR( 0, 21)},
+	{14, 0x001B, AC_PAIR( 0, 20)},
+	{14, 0x001C, AC_PAIR( 0, 19)},
+	{14, 0x001D, AC_PAIR( 0, 18)},
+	{14, 0x001E, AC_PAIR( 0, 17)},
+	{14, 0x001F, AC_PAIR( 0, 16)},
+	{15, 0x0010, AC_PAIR( 0, 40)},
+	{15, 0x0011, AC_PAIR( 0, 39)},
+	{15, 0x0012, AC_PAIR( 0, 38)},
+	{15, 0x0013, AC_PAIR( 0, 37)},
+	{15, 0x0014, AC_PAIR( 0, 36)},
+	{15, 0x0015, AC_PAIR( 0, 35)},
+	{15, 0x0016, AC_PAIR( 0, 34)},
+	{15, 0x0017, AC_PAIR( 0, 33)},
+	{15, 0x0018, AC_PAIR( 0, 32)},
+	{15, 0x0019, AC_PAIR( 1, 14)},
+	{15, 0x001A, AC_PAIR( 1, 13)},
+	{15, 0x001B, AC_PAIR( 1, 12)},
+	{15, 0x001C, AC_PAIR( 1, 11)},
+	{15, 0x001D, AC_PAIR( 1, 10)},
+	{15, 0x001E, AC_PAIR( 1,  9)},
+	{15, 0x001F, AC_PAIR( 1,  8)},
+	{16, 0x0010, AC_PAIR( 1, 18)},
+	{16, 0x0011, AC_PAIR( 1, 17)},
+	{16, 0x0012, AC_PAIR( 1, 16)},
+	{16, 0x0013, AC_PAIR( 1, 15)},
+	{16, 0x0014, AC_PAIR( 6,  3)},
+	{16, 0x0015, AC_PAIR(16,  2)},
+	{16, 0x0016, AC_PAIR(15,  2)},
+	{16, 0x0017, AC_PAIR(14,  2)},
+	{16, 0x0018, AC_PAIR(13,  2)},
+	{16, 0x0019, AC_PAIR(12,  2)},
+	{16, 0x001A, AC_PAIR(11,  2)},
+	{16, 0x001B, AC_PAIR(31,  1)},
+	{16, 0x001C, AC_PAIR(30,  1)},
+	{16, 0x001D, AC_PAIR(29,  1)},
+	{16, 0x001E, AC_PAIR(28,  1)},
+	{16, 0x001F, AC_PAIR(27,  1)}
 };
-#undef MAKE_HUFFMAN_PAIR
 
-const uint8_t quant_dec[8*8] = {
+static const uint8_t quant_dec[8*8] = {
 	 2, 16, 19, 22, 26, 27, 29, 34,
 	16, 16, 22, 24, 27, 29, 34, 37,
 	19, 22, 26, 27, 29, 34, 34, 38,
@@ -154,96 +162,110 @@ const uint8_t quant_dec[8*8] = {
 	22, 26, 27, 29, 32, 35, 40, 48,
 	26, 27, 29, 32, 35, 40, 48, 58,
 	26, 27, 29, 34, 38, 46, 56, 69,
-	27, 29, 35, 38, 46, 56, 69, 83,
+	27, 29, 35, 38, 46, 56, 69, 83
 };
 
-const uint8_t dct_zigzag_table[8*8] = {
-	0x00,0x01,0x05,0x06,0x0E,0x0F,0x1B,0x1C,
-	0x02,0x04,0x07,0x0D,0x10,0x1A,0x1D,0x2A,
-	0x03,0x08,0x0C,0x11,0x19,0x1E,0x29,0x2B,
-	0x09,0x0B,0x12,0x18,0x1F,0x28,0x2C,0x35,
-	0x0A,0x13,0x17,0x20,0x27,0x2D,0x34,0x36,
-	0x14,0x16,0x21,0x26,0x2E,0x33,0x37,0x3C,
-	0x15,0x22,0x25,0x2F,0x32,0x38,0x3B,0x3D,
-	0x23,0x24,0x30,0x31,0x39,0x3A,0x3E,0x3F,
+static const uint8_t dct_zigzag_table[8*8] = {
+	 0,  1,  5,  6, 14, 15, 27, 28,
+	 2,  4,  7, 13, 16, 26, 29, 42,
+	 3,  8, 12, 17, 25, 30, 41, 43,
+	 9, 11, 18, 24, 31, 40, 44, 53,
+	10, 19, 23, 32, 39, 45, 52, 54,
+	20, 22, 33, 38, 46, 51, 55, 60,
+	21, 34, 37, 47, 50, 56, 59, 61,
+	35, 36, 48, 49, 57, 58, 62, 63
 };
 
-const uint8_t dct_zagzig_table[8*8] = {
-	0x00,0x01,0x08,0x10,0x09,0x02,0x03,0x0A,
-	0x11,0x18,0x20,0x19,0x12,0x0B,0x04,0x05,
-	0x0C,0x13,0x1A,0x21,0x28,0x30,0x29,0x22,
-	0x1B,0x14,0x0D,0x06,0x07,0x0E,0x15,0x1C,
-	0x23,0x2A,0x31,0x38,0x39,0x32,0x2B,0x24,
-	0x1D,0x16,0x0F,0x17,0x1E,0x25,0x2C,0x33,
-	0x3A,0x3B,0x34,0x2D,0x26,0x1F,0x27,0x2E,
-	0x35,0x3C,0x3D,0x36,0x2F,0x37,0x3E,0x3F,
+static const uint8_t dct_zagzig_table[8*8] = {
+	 0,  1,  8, 16,  9,  2,  3, 10,
+	17, 24, 32, 25, 18, 11,  4,  5,
+	12, 19, 26, 33, 40, 48, 41, 34,
+	27, 20, 13,  6,  7, 14, 21, 28,
+	35, 42, 49, 56, 57, 50, 43, 36,
+	29, 22, 15, 23, 30, 37, 44, 51,
+	58, 59, 52, 45, 38, 31, 39, 46,
+	53, 60, 61, 54, 47, 55, 62, 63
 };
 
-const int16_t dct_scale_table[8*8] = {
-	+0x5A82, +0x5A82, +0x5A82, +0x5A82, +0x5A82, +0x5A82, +0x5A82, +0x5A82,
-	+0x7D8A, +0x6A6D, +0x471C, +0x18F8, -0x18F9, -0x471D, -0x6A6E, -0x7D8B,
-	+0x7641, +0x30FB, -0x30FC, -0x7642, -0x7642, -0x30FC, +0x30FB, +0x7641,
-	+0x6A6D, -0x18F9, -0x7D8B, -0x471D, +0x471C, +0x7D8A, +0x18F8, -0x6A6E,
-	+0x5A82, -0x5A83, -0x5A83, +0x5A82, +0x5A82, -0x5A83, -0x5A83, +0x5A82,
-	+0x471C, -0x7D8B, +0x18F8, +0x6A6D, -0x6A6E, -0x18F9, +0x7D8A, -0x471D,
-	+0x30FB, -0x7642, +0x7641, -0x30FC, -0x30FC, +0x7641, -0x7642, +0x30FB,
-	+0x18F8, -0x471D, +0x6A6D, -0x7D8B, +0x7D8A, -0x6A6E, +0x471C, -0x18F9,
-};
+#if 0
+#define SF0 0x5a82 // cos(0/16 * pi) * sqrt(2)
+#define SF1 0x7d8a // cos(1/16 * pi) * 2
+#define SF2 0x7641 // cos(2/16 * pi) * 2
+#define SF3 0x6a6d // cos(3/16 * pi) * 2
+#define SF4 0x5a82 // cos(4/16 * pi) * 2
+#define SF5 0x471c // cos(5/16 * pi) * 2
+#define SF6 0x30fb // cos(6/16 * pi) * 2
+#define SF7 0x18f8 // cos(7/16 * pi) * 2
 
-static void init_dct_data(vid_encoder_state_t *state)
-{
+static const int16_t dct_scale_table[8*8] = {
+	SF0,  SF0,  SF0,  SF0,  SF0,  SF0,  SF0,  SF0,
+	SF1,  SF3,  SF5,  SF7, -SF7, -SF5, -SF3, -SF1,
+	SF2,  SF6, -SF6, -SF2, -SF2, -SF6,  SF6,  SF2,
+	SF3, -SF7, -SF1, -SF5,  SF5,  SF1,  SF7, -SF3,
+	SF4, -SF4, -SF4,  SF4,  SF4, -SF4, -SF4,  SF4,
+	SF5, -SF1,  SF7,  SF3, -SF3, -SF7,  SF1, -SF5,
+	SF6, -SF2,  SF2, -SF6, -SF6,  SF2, -SF2,  SF6,
+	SF7, -SF5,  SF3, -SF1,  SF1, -SF3,  SF5, -SF7
+};
+#endif
+
+static void init_dct_data(mdec_encoder_state_t *state) {
 	for(int i = 0; i <= 0xFFFF; i++) {
 		// high 8 bits = bit count
 		// low 24 bits = value
-		state->huffman_encoding_map[i] = ((6+16)<<24)|((0x01<<16)|(i));
+		state->ac_huffman_map[i] = ((6+16) << 24) | (0x01 << 16) | i;
 
 		int16_t coeff = (int16_t)i;
-		if (coeff < -0x200) { coeff = -0x200; }
-		if (coeff > +0x1FF) { coeff = +0x1FF; }
-		state->coeff_clamp_map[i] = coeff&0x3FF;
+
+		if (coeff < -0x200)
+			coeff = -0x200;
+		else if (coeff > +0x1FF)
+			coeff = +0x1FF;
+
+		state->coeff_clamp_map[i] = coeff & 0x3FF;
 	}
 
-	for(int i = 0; i < sizeof(huffman_lookup)/sizeof(huffman_lookup[0]); i++) {
-		int bits = huffman_lookup[i].c_bits+1;
-		uint32_t base_value = huffman_lookup[i].c_value;
-		state->huffman_encoding_map[huffman_lookup[i].u_hword_pos] = (bits<<24)|(base_value<<1)|0;
-		state->huffman_encoding_map[huffman_lookup[i].u_hword_neg] = (bits<<24)|(base_value<<1)|1;
-	}
+	int tree_item_count = sizeof(ac_huffman_tree) / sizeof(ac_huffman_tree[0]);
 
+	for(int i = 0; i < tree_item_count; i++) {
+		int bits = ac_huffman_tree[i].c_bits+1;
+		uint32_t base_value = ac_huffman_tree[i].c_value;
+
+		state->ac_huffman_map[ac_huffman_tree[i].u_hword_pos] = (bits << 24) | (base_value << 1) | 0;
+		state->ac_huffman_map[ac_huffman_tree[i].u_hword_neg] = (bits << 24) | (base_value << 1) | 1;
+	}
 }
 
-static bool flush_bits(vid_encoder_state_t *state)
-{
+static bool flush_bits(mdec_encoder_state_t *state) {
 	if(state->bits_left < 16) {
 		state->frame_output[state->bytes_used++] = (uint8_t)state->bits_value;
-		if (state->bytes_used >= state->frame_max_size) {
+		if (state->bytes_used >= state->frame_max_size)
 			return false;
-		}
+
 		state->frame_output[state->bytes_used++] = (uint8_t)(state->bits_value>>8);
 	}
+
 	state->bits_left = 16;
 	state->bits_value = 0;
 	return true;
 }
 
-static bool encode_bits(vid_encoder_state_t *state, int bits, uint32_t val)
-{
+static bool encode_bits(mdec_encoder_state_t *state, int bits, uint32_t val) {
 	assert(val < (1<<bits));
 
 	// FIXME: for some reason the main logic breaks when bits > 16
 	// and I have no idea why, so I have to split this up --GM
 	if (bits > 16) {
-		if (!encode_bits(state, bits-16, val>>16)) {
+		if (!encode_bits(state, bits-16, val>>16))
 			return false;
-		}
+
 		bits = 16;
 		val &= 0xFFFF;
 	}
 
 	if (state->bits_left == 0) {
-		if (!flush_bits(state)) {
+		if (!flush_bits(state))
 			return false;
-		}
 	}
 
 	while (bits > state->bits_left) {
@@ -260,9 +282,8 @@ static bool encode_bits(vid_encoder_state_t *state, int bits, uint32_t val)
 		val &= mask;
 		assert(mask >= 1);
 		assert(val < (1<<bits));
-		if (!flush_bits(state)) {
+		if (!flush_bits(state))
 			return false;
-		}
 	}
 
 	if (bits >= 1) {
@@ -281,31 +302,31 @@ static bool encode_bits(vid_encoder_state_t *state, int bits, uint32_t val)
 	return true;
 }
 
-static bool encode_ac_value(vid_encoder_state_t *state, uint16_t value)
-{
+static bool encode_ac_value(mdec_encoder_state_t *state, uint16_t value) {
 	assert(0 <= value && value <= 0xFFFF);
 
 #if 0
-	for(int i = 0; i < sizeof(huffman_lookup)/sizeof(huffman_lookup[0]); i++) {
-		if(value == huffman_lookup[i].u_hword_pos) {
-			return encode_bits(state, huffman_lookup[i].c_bits+1, (((uint32_t)huffman_lookup[i].c_value)<<1)|0);
-		}
-		else if(value == huffman_lookup[i].u_hword_neg) {
-			return encode_bits(state, huffman_lookup[i].c_bits+1, (((uint32_t)huffman_lookup[i].c_value)<<1)|1);
+	int tree_item_count = sizeof(ac_huffman_tree) / sizeof(ac_huffman_tree[0]);
+
+	for (int i = 0; i < tree_item_count; i++) {
+		if (value == ac_huffman_tree[i].u_hword_pos) {
+			return encode_bits(state, ac_huffman_tree[i].c_bits+1, ((uint32_t)ac_huffman_tree[i].c_value << 1) | 0);
+		} else if (value == ac_huffman_tree[i].u_hword_neg) {
+			return encode_bits(state, ac_huffman_tree[i].c_bits+1, ((uint32_t)ac_huffman_tree[i].c_value << 1) | 1);
 		}
 	}
 
 	// Use an escape
-	return encode_bits(state, 6+16, (0x01<<16)|(0xFFFF&(uint32_t)value));
+	return encode_bits(state, 6+16, (0x01 << 16) | (0xFFFF & (uint32_t)value));
 #else
-	uint32_t outword = state->huffman_encoding_map[value];
-	return encode_bits(state, outword>>24, outword&0xFFFFFF);
+	uint32_t outword = state->ac_huffman_map[value];
+
+	return encode_bits(state, outword >> 24, outword & 0xFFFFFF);
 #endif
 }
 
-static void transform_dct_block(vid_encoder_state_t *state, int16_t *block)
-{
 #if 0
+static void transform_dct_block(int16_t *block) {
 	// Apply DCT to block
 	int midblock[8*8];
 
@@ -327,55 +348,9 @@ static void transform_dct_block(vid_encoder_state_t *state, int16_t *block)
 		block[8*i+j] = (int16_t)((v + 0xFFF) >> 13);
 	}
 	}
-#else
-	state->dct_context->fdct(block);
-#endif
 }
 
-// https://stackoverflow.com/a/60011209
-//#define DIVIDE_ROUNDED(n, d) (((n) >= 0) ? (((n) + (d)/2) / (d)) : (((n) - (d)/2) / (d)))
-#define DIVIDE_ROUNDED(n, d) ((int)round((double)(n) / (double)(d)))
-
-static bool encode_dct_block(vid_encoder_state_t *state, const int16_t *block, const int16_t *quant_table)
-{
-	int dc = DIVIDE_ROUNDED(block[0], quant_table[0]);
-	dc = state->coeff_clamp_map[dc&0xFFFF];
-
-	if (!encode_bits(state, 10, dc)) {
-		return false;
-	}
-
-	for (int i = 1, zeroes = 0; i < 64; i++) {
-		int ri = dct_zagzig_table[i];
-		int ac = DIVIDE_ROUNDED(block[ri], quant_table[ri]);
-		ac = state->coeff_clamp_map[ac&0xFFFF];
-
-		if (ac == 0) {
-			zeroes++;
-		} else {
-			if (!encode_ac_value(state, (zeroes<<10)|ac)) {
-				return false;
-			}
-			zeroes = 0;
-			state->uncomp_hwords_used += 1;
-		}
-	}
-
-	//fprintf(stderr, "dc %08X rles %2d\n", dc, zero_rle_words);
-	//assert(dc >= -0x200); assert(dc <  +0x200);
-
-	// Store end of block
-	if (!encode_bits(state, 2, 0x2)) {
-		return false;
-	}
-	state->uncomp_hwords_used += 2;
-	//state->uncomp_hwords_used = (state->uncomp_hwords_used+0xF)&~0xF;
-	return true;
-}
-
-#if 0
-static int reduce_dct_block(vid_encoder_state_t *state, int32_t *block, int32_t min_val, int *values_to_shed)
-{
+static int reduce_dct_block(mdec_encoder_state_t *state, int32_t *block, int32_t min_val, int *values_to_shed) {
 	// Reduce so it can all fit
 	int nonzeroes = 0;
 
@@ -397,117 +372,178 @@ static int reduce_dct_block(vid_encoder_state_t *state, int32_t *block, int32_t
 }
 #endif
 
-bool init_encoder_state(settings_t *settings)
-{
-	if (settings->state_vid.huffman_encoding_map) {
-		return true;
-	}
+// https://stackoverflow.com/a/60011209
+#if 0
+#define DIVIDE_ROUNDED(n, d) (((n) >= 0) ? (((n) + (d)/2) / (d)) : (((n) - (d)/2) / (d)))
+#else
+#define DIVIDE_ROUNDED(n, d) ((int)round((double)(n) / (double)(d)))
+#endif
 
-	settings->state_vid.huffman_encoding_map = malloc(0x10000*sizeof(uint32_t));
-	settings->state_vid.coeff_clamp_map = malloc(0x10000*sizeof(int16_t));
-	if (!settings->state_vid.huffman_encoding_map || !settings->state_vid.coeff_clamp_map) {
+static bool encode_dct_block(mdec_encoder_state_t *state, const int16_t *block, const int16_t *quant_table) {
+	int dc = DIVIDE_ROUNDED(block[0], quant_table[0]);
+	dc = state->coeff_clamp_map[dc&0xFFFF];
+
+	if (!encode_bits(state, 10, dc))
 		return false;
-	}
-	init_dct_data(&(settings->state_vid));
 
-	settings->state_vid.dct_context = avcodec_dct_alloc();
-	if (!settings->state_vid.dct_context) {
-		return false;
-	}
-	avcodec_dct_init(settings->state_vid.dct_context);
+	for (int i = 1, zeroes = 0; i < 64; i++) {
+		int ri = dct_zagzig_table[i];
+		int ac = DIVIDE_ROUNDED(block[ri], quant_table[ri]);
+		ac = state->coeff_clamp_map[ac&0xFFFF];
 
-	int dct_block_count_x = (settings->video_width+15)/16;
-	int dct_block_count_y = (settings->video_height+15)/16;
+		if (ac == 0) {
+			zeroes++;
+		} else {
+			if (!encode_ac_value(state, (zeroes<<10)|ac))
+				return false;
 
-	int dct_block_size = dct_block_count_x*dct_block_count_y*sizeof(int16_t)*8*8;
-	for (int i = 0; i < 6; i++) {
-		settings->state_vid.dct_block_lists[i] = malloc(dct_block_size);
-		if (!settings->state_vid.dct_block_lists[i]) {
-			return false;
+			zeroes = 0;
+			state->uncomp_hwords_used += 1;
 		}
 	}
 
+	//fprintf(stderr, "dc %08X rles %2d\n", dc, zero_rle_words);
+	//assert(dc >= -0x200); assert(dc <  +0x200);
+
+	// Store end of block
+	if (!encode_bits(state, 2, 0x2))
+		return false;
+
+	state->uncomp_hwords_used += 2;
+	//state->uncomp_hwords_used = (state->uncomp_hwords_used+0xF)&~0xF;
 	return true;
 }
 
-void destroy_encoder_state(settings_t *settings)
-{
-	if (settings->state_vid.huffman_encoding_map) {
-		free(settings->state_vid.huffman_encoding_map);
-		settings->state_vid.huffman_encoding_map = NULL;
+bool init_mdec_encoder(mdec_encoder_t *encoder, int video_width, int video_height) {
+	mdec_encoder_state_t *state = &(encoder->state);
+
+	if (state->dct_context != NULL)
+		return true;
+
+	state->dct_context = avcodec_dct_alloc();
+	state->ac_huffman_map = malloc(0x10000 * sizeof(uint32_t));
+	state->dc_huffman_map = NULL;
+	state->coeff_clamp_map = malloc(0x10000 * sizeof(int16_t));
+	state->delta_clamp_map = NULL;
+
+	if (
+		state->dct_context == NULL ||
+		state->ac_huffman_map == NULL ||
+		state->coeff_clamp_map == NULL
+	)
+		return false;
+
+	int dct_block_count_x = (video_width + 15) / 16;
+	int dct_block_count_y = (video_height + 15) / 16;
+	int dct_block_size = dct_block_count_x * dct_block_count_y * sizeof(int16_t) * 8*8;
+
+	for (int i = 0; i < 6; i++) {
+		state->dct_block_lists[i] = malloc(dct_block_size);
+
+		if (!state->dct_block_lists[i])
+			return false;
 	}
-	if (settings->state_vid.coeff_clamp_map) {
-		free(settings->state_vid.coeff_clamp_map);
-		settings->state_vid.coeff_clamp_map = NULL;
+
+	avcodec_dct_init(state->dct_context);
+	init_dct_data(state);
+	return true;
+}
+
+void destroy_mdec_encoder(mdec_encoder_t *encoder) {
+	mdec_encoder_state_t *state = &(encoder->state);
+
+	if (state->dct_context) {
+		av_free(state->dct_context);
+		state->dct_context = NULL;
 	}
-	if (settings->state_vid.dct_context) {
-		av_free(settings->state_vid.dct_context);
-		settings->state_vid.dct_context = NULL;
+	if (state->ac_huffman_map) {
+		free(state->ac_huffman_map);
+		state->ac_huffman_map = NULL;
 	}
-	if (settings->state_vid.dct_block_lists[0]) {
-		for (int i = 0; i < 6; i++) {
-			free(settings->state_vid.dct_block_lists[i]);
-			settings->state_vid.dct_block_lists[i] = NULL;
+	if (state->dc_huffman_map) {
+		free(state->dc_huffman_map);
+		state->dc_huffman_map = NULL;
+	}
+	if (state->coeff_clamp_map) {
+		free(state->coeff_clamp_map);
+		state->coeff_clamp_map = NULL;
+	}
+	if (state->delta_clamp_map) {
+		free(state->delta_clamp_map);
+		state->delta_clamp_map = NULL;
+	}
+	for (int i = 0; i < 6; i++) {
+		if (state->dct_block_lists[i]) {
+			free(state->dct_block_lists[i]);
+			state->dct_block_lists[i] = NULL;
 		}
 	}
 }
 
-void encode_frame_bs(uint8_t *video_frame, settings_t *settings)
-{
-	int pitch = settings->video_width;
-	/*int real_index = (settings->state_vid.frame_index-1);
-	if (real_index > video_frame_count-1) {
-		real_index = video_frame_count-1;
-	}
-	uint8_t *y_plane = video_frames + settings->video_width*settings->video_height*3/2*real_index;*/
+void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame) {
+	mdec_encoder_state_t *state = &(encoder->state);
+
+	assert(state->dct_context);
+
+	int pitch = encoder->video_width;
+#if 0
+	int real_index = state->frame_index - 1;
+	if (real_index > (video_frame_count - 1))
+		real_index = video_frame_count - 1;
+
+	uint8_t *y_plane = video_frames + encoder->video_width * encoder->video_height * 3/2 * real_index;
+#else
 	uint8_t *y_plane = video_frame;
-	uint8_t *c_plane = y_plane + (settings->video_width*settings->video_height);
+	uint8_t *c_plane = y_plane + (encoder->video_width * encoder->video_height);
+#endif
 
-	assert(settings->state_vid.huffman_encoding_map);
-
-	int dct_block_count_x = (settings->video_width+15)/16;
-	int dct_block_count_y = (settings->video_height+15)/16;
+	int dct_block_count_x = (encoder->video_width + 15) / 16;
+	int dct_block_count_y = (encoder->video_height + 15) / 16;
 
 	// TODO: non-16x16-aligned videos
-	assert((settings->video_width % 16) == 0);
-	assert((settings->video_height % 16) == 0);
+	assert((encoder->video_width % 16) == 0);
+	assert((encoder->video_height % 16) == 0);
 
 	// Rearrange the Y/C planes returned by libswscale into macroblocks.
-	for(int fx = 0; fx < dct_block_count_x; fx++) {
-	for(int fy = 0; fy < dct_block_count_y; fy++) {
-		// Order: Cr Cb [Y1|Y2\nY3|Y4]
-		int block_offs = 64 * (fy*dct_block_count_x + fx);
-		int16_t *blocks[6] = {
-			settings->state_vid.dct_block_lists[0] + block_offs,
-			settings->state_vid.dct_block_lists[1] + block_offs,
-			settings->state_vid.dct_block_lists[2] + block_offs,
-			settings->state_vid.dct_block_lists[3] + block_offs,
-			settings->state_vid.dct_block_lists[4] + block_offs,
-			settings->state_vid.dct_block_lists[5] + block_offs,
-		};
+	for (int fx = 0; fx < dct_block_count_x; fx++) {
+		for (int fy = 0; fy < dct_block_count_y; fy++) {
+			// Order: Cr Cb [Y1|Y2]
+			//              [Y3|Y4]
+			int block_offs = 64 * (fy*dct_block_count_x + fx);
+			int16_t *blocks[6] = {
+				state->dct_block_lists[0] + block_offs,
+				state->dct_block_lists[1] + block_offs,
+				state->dct_block_lists[2] + block_offs,
+				state->dct_block_lists[3] + block_offs,
+				state->dct_block_lists[4] + block_offs,
+				state->dct_block_lists[5] + block_offs
+			};
 
-		for(int y = 0; y < 8; y++) {
-		for(int x = 0; x < 8; x++) {
-			int k = y*8 + x;
-			int cx = fx*8 + x;
-			int cy = fy*8 + y;
-			int lx = fx*16 + x;
-			int ly = fy*16 + y;
+			for (int y = 0; y < 8; y++) {
+				for (int x = 0; x < 8; x++) {
+					int k = y*8 + x;
+					int cx = fx*8 + x;
+					int cy = fy*8 + y;
+					int lx = fx*16 + x;
+					int ly = fy*16 + y;
 
-			blocks[0][k] = (int16_t)c_plane[pitch*cy + 2*cx + 0] - 128;
-			blocks[1][k] = (int16_t)c_plane[pitch*cy + 2*cx + 1] - 128;
-			blocks[2][k] = (int16_t)y_plane[pitch*(ly+0) + (lx+0)] - 128;
-			blocks[3][k] = (int16_t)y_plane[pitch*(ly+0) + (lx+8)] - 128;
-			blocks[4][k] = (int16_t)y_plane[pitch*(ly+8) + (lx+0)] - 128;
-			blocks[5][k] = (int16_t)y_plane[pitch*(ly+8) + (lx+8)] - 128;
-		}
-		}
+					blocks[0][k] = (int16_t)c_plane[pitch*cy + 2*cx + 0] - 128;
+					blocks[1][k] = (int16_t)c_plane[pitch*cy + 2*cx + 1] - 128;
+					blocks[2][k] = (int16_t)y_plane[pitch*(ly+0) + (lx+0)] - 128;
+					blocks[3][k] = (int16_t)y_plane[pitch*(ly+0) + (lx+8)] - 128;
+					blocks[4][k] = (int16_t)y_plane[pitch*(ly+8) + (lx+0)] - 128;
+					blocks[5][k] = (int16_t)y_plane[pitch*(ly+8) + (lx+8)] - 128;
+				}
+			}
 
-		for(int i = 0; i < 6; i++) {
-			transform_dct_block(&(settings->state_vid), blocks[i]);
+			for (int i = 0; i < 6; i++)
+#if 0
+				transform_dct_block(blocks[i]);
+#else
+				state->dct_context->fdct(blocks[i]);
+#endif
 		}
 	}
-	}
 
 	// Attempt encoding the frame at the maximum quality. If the result is too
 	// large, increase the quantization scale and try again.
@@ -516,100 +552,107 @@ void encode_frame_bs(uint8_t *video_frame, settings_t *settings)
 	// compressing at scale N but optimizing coefficients away until it fits
 	// (like the old algorithm did)
 	for (
-		settings->state_vid.quant_scale = 1;
-		settings->state_vid.quant_scale < 64;
-		settings->state_vid.quant_scale++
+		state->quant_scale = 1;
+		state->quant_scale < 64;
+		state->quant_scale++
 	) {
 		int16_t quant_table[8*8];
 
 		// The DC coefficient's quantization scale is always 8.
 		quant_table[0] = quant_dec[0] * 8;
-		for (int i = 1; i < 64; i++) {
-			quant_table[i] = quant_dec[i] * settings->state_vid.quant_scale;
-		}
 
-		memset(settings->state_vid.frame_output, 0, settings->state_vid.frame_max_size);
+		for (int i = 1; i < 64; i++)
+			quant_table[i] = quant_dec[i] * state->quant_scale;
 
-		settings->state_vid.bits_value = 0;
-		settings->state_vid.bits_left = 16;
-		settings->state_vid.uncomp_hwords_used = 0;
-		settings->state_vid.bytes_used = 8;
+		memset(state->frame_output, 0, state->frame_max_size);
+
+		state->bits_value = 0;
+		state->bits_left = 16;
+		state->uncomp_hwords_used = 0;
+		state->bytes_used = 8;
 
 		bool ok = true;
-		for(int fx = 0; ok && (fx < dct_block_count_x); fx++) {
-		for(int fy = 0; ok && (fy < dct_block_count_y); fy++) {
-			// Order: Cr Cb [Y1|Y2\nY3|Y4]
-			int block_offs = 64 * (fy*dct_block_count_x + fx);
-			int16_t *blocks[6] = {
-				settings->state_vid.dct_block_lists[0] + block_offs,
-				settings->state_vid.dct_block_lists[1] + block_offs,
-				settings->state_vid.dct_block_lists[2] + block_offs,
-				settings->state_vid.dct_block_lists[3] + block_offs,
-				settings->state_vid.dct_block_lists[4] + block_offs,
-				settings->state_vid.dct_block_lists[5] + block_offs,
-			};
+		for (int fx = 0; ok && (fx < dct_block_count_x); fx++) {
+			for (int fy = 0; ok && (fy < dct_block_count_y); fy++) {
+				// Order: Cr Cb [Y1|Y2]
+				//              [Y3|Y4]
+				int block_offs = 64 * (fy*dct_block_count_x + fx);
+				int16_t *blocks[6] = {
+					state->dct_block_lists[0] + block_offs,
+					state->dct_block_lists[1] + block_offs,
+					state->dct_block_lists[2] + block_offs,
+					state->dct_block_lists[3] + block_offs,
+					state->dct_block_lists[4] + block_offs,
+					state->dct_block_lists[5] + block_offs
+				};
 
-			for(int i = 0; ok && (i < 6); i++) {
-				ok = encode_dct_block(&(settings->state_vid), blocks[i], quant_table);
+				for(int i = 0; ok && (i < 6); i++)
+					ok = encode_dct_block(state, blocks[i], quant_table);
 			}
 		}
-		}
 
-		if (!ok) { continue; }
-		if (!encode_bits(&(settings->state_vid), 10, 0x1FF)) { continue; }
-		if (!encode_bits(&(settings->state_vid), 2, 0x2)) { continue; }
-		if (!flush_bits(&(settings->state_vid))) { continue; }
+		if (!ok)
+			continue;
+		if (!encode_bits(state, 10, 0x1FF))
+			continue;
+		if (!encode_bits(state, 2, 0x2))
+			continue;
+		if (!flush_bits(state))
+			continue;
 
-		settings->state_vid.uncomp_hwords_used += 2;
-		settings->state_vid.quant_scale_sum += settings->state_vid.quant_scale;
+		state->uncomp_hwords_used += 2;
+		state->quant_scale_sum += state->quant_scale;
 		break;
 	}
-	assert(settings->state_vid.quant_scale < 64);
+	assert(state->quant_scale < 64);
 
 	// MDEC DMA is usually configured to transfer data in 32-word chunks.
-	settings->state_vid.uncomp_hwords_used = (settings->state_vid.uncomp_hwords_used+0x3F)&~0x3F;
+	state->uncomp_hwords_used = (state->uncomp_hwords_used+0x3F)&~0x3F;
 
 	// This is not the number of 32-byte blocks required for uncompressed data
 	// as jPSXdec docs say, but rather the number of 32-*bit* words required.
 	// The first 4 bytes of the frame header are in fact the MDEC command to
 	// start decoding, which contains the data length in words in the lower 16
 	// bits.
-	settings->state_vid.blocks_used = (settings->state_vid.uncomp_hwords_used+1)>>1;
+	state->blocks_used = (state->uncomp_hwords_used+1)>>1;
 
 	// We need a multiple of 4
-	settings->state_vid.bytes_used = (settings->state_vid.bytes_used+0x3)&~0x3;
+	state->bytes_used = (state->bytes_used+0x3)&~0x3;
 
 	// MDEC command (size of decompressed MDEC data)
-	settings->state_vid.frame_output[0x000] = (uint8_t)settings->state_vid.blocks_used;
-	settings->state_vid.frame_output[0x001] = (uint8_t)(settings->state_vid.blocks_used>>8);
-	settings->state_vid.frame_output[0x002] = (uint8_t)0x00;
-	settings->state_vid.frame_output[0x003] = (uint8_t)0x38;
+	state->frame_output[0x000] = (uint8_t)state->blocks_used;
+	state->frame_output[0x001] = (uint8_t)(state->blocks_used>>8);
+	state->frame_output[0x002] = (uint8_t)0x00;
+	state->frame_output[0x003] = (uint8_t)0x38;
 
 	// Quantization scale
-	settings->state_vid.frame_output[0x004] = (uint8_t)settings->state_vid.quant_scale;
-	settings->state_vid.frame_output[0x005] = (uint8_t)(settings->state_vid.quant_scale>>8);
+	state->frame_output[0x004] = (uint8_t)state->quant_scale;
+	state->frame_output[0x005] = (uint8_t)(state->quant_scale>>8);
 
 	// BS version
-	settings->state_vid.frame_output[0x006] = 0x02;
-	settings->state_vid.frame_output[0x007] = 0x00;
-
-	retire_av_data(settings, 0, 1);
+	state->frame_output[0x006] = 0x02;
+	state->frame_output[0x007] = 0x00;
 }
 
-void encode_sector_str(uint8_t *video_frames, uint8_t *output, settings_t *settings)
-{
+int encode_sector_str(mdec_encoder_t *encoder, uint8_t *video_frames, uint8_t *output) {
+	mdec_encoder_state_t *state = &(encoder->state);
+	int last_frame_index = state->frame_index;
+	int frame_size = encoder->video_width * encoder->video_height * 2;
+
 	uint8_t header[32];
 	memset(header, 0, sizeof(header));
 
-	while(settings->state_vid.frame_data_offset >= settings->state_vid.frame_max_size) {
-		settings->state_vid.frame_index++;
+	while (state->frame_data_offset >= state->frame_max_size) {
+		state->frame_index++;
 		// TODO: work out an optimal block count for this
 		// TODO: calculate this all based on FPS
-		settings->state_vid.frame_block_overflow_num += settings->state_vid.frame_block_base_overflow;
-		settings->state_vid.frame_max_size = settings->state_vid.frame_block_overflow_num / settings->state_vid.frame_block_overflow_den * 2016;
-		settings->state_vid.frame_block_overflow_num %= settings->state_vid.frame_block_overflow_den;
-		settings->state_vid.frame_data_offset = 0;
-		encode_frame_bs(video_frames, settings);
+		state->frame_block_overflow_num += state->frame_block_base_overflow;
+		state->frame_max_size = state->frame_block_overflow_num / state->frame_block_overflow_den * 2016;
+		state->frame_block_overflow_num %= state->frame_block_overflow_den;
+		state->frame_data_offset = 0;
+
+		encode_frame_bs(encoder, video_frames);
+		video_frames += frame_size;
 	}
 
 	// STR version
@@ -621,47 +664,48 @@ void encode_sector_str(uint8_t *video_frames, uint8_t *output, settings_t *setti
 	header[0x003] = 0x80;
 
 	// Muxed chunk index/count
-	int chunk_index = settings->state_vid.frame_data_offset/2016;
-	int chunk_count = settings->state_vid.frame_max_size/2016;
+	int chunk_index = state->frame_data_offset / 2016;
+	int chunk_count = state->frame_max_size / 2016;
 	header[0x004] = (uint8_t)chunk_index;
-	header[0x005] = (uint8_t)(chunk_index>>8);
+	header[0x005] = (uint8_t)(chunk_index >> 8);
 	header[0x006] = (uint8_t)chunk_count;
-	header[0x007] = (uint8_t)(chunk_count>>8);
+	header[0x007] = (uint8_t)(chunk_count >> 8);
 
 	// Frame index
-	header[0x008] = (uint8_t)settings->state_vid.frame_index;
-	header[0x009] = (uint8_t)(settings->state_vid.frame_index>>8);
-	header[0x00A] = (uint8_t)(settings->state_vid.frame_index>>16);
-	header[0x00B] = (uint8_t)(settings->state_vid.frame_index>>24);
+	header[0x008] = (uint8_t)state->frame_index;
+	header[0x009] = (uint8_t)(state->frame_index >> 8);
+	header[0x00A] = (uint8_t)(state->frame_index >> 16);
+	header[0x00B] = (uint8_t)(state->frame_index >> 24);
 
 	// Video frame size
-	header[0x010] = (uint8_t)settings->video_width;
-	header[0x011] = (uint8_t)(settings->video_width>>8);
-	header[0x012] = (uint8_t)settings->video_height;
-	header[0x013] = (uint8_t)(settings->video_height>>8);
+	header[0x010] = (uint8_t)encoder->video_width;
+	header[0x011] = (uint8_t)(encoder->video_width >> 8);
+	header[0x012] = (uint8_t)encoder->video_height;
+	header[0x013] = (uint8_t)(encoder->video_height >> 8);
 
 	// MDEC command (size of decompressed MDEC data)
-	header[0x014] = (uint8_t)settings->state_vid.blocks_used;
-	header[0x015] = (uint8_t)(settings->state_vid.blocks_used>>8);
+	header[0x014] = (uint8_t)state->blocks_used;
+	header[0x015] = (uint8_t)(state->blocks_used >> 8);
 	header[0x016] = 0x00;
 	header[0x017] = 0x38;
 
 	// Quantization scale
-	header[0x018] = (uint8_t)settings->state_vid.quant_scale;
-	header[0x019] = (uint8_t)(settings->state_vid.quant_scale>>8);
+	header[0x018] = (uint8_t)state->quant_scale;
+	header[0x019] = (uint8_t)(state->quant_scale >> 8);
 
 	// BS version
 	header[0x01A] = 0x02;
 	header[0x01B] = 0x00;
 
 	// Demuxed bytes used as a multiple of 4
-	header[0x00C] = (uint8_t)settings->state_vid.bytes_used;
-	header[0x00D] = (uint8_t)(settings->state_vid.bytes_used>>8);
-	header[0x00E] = (uint8_t)(settings->state_vid.bytes_used>>16);
-	header[0x00F] = (uint8_t)(settings->state_vid.bytes_used>>24);
+	header[0x00C] = (uint8_t)state->bytes_used;
+	header[0x00D] = (uint8_t)(state->bytes_used >> 8);
+	header[0x00E] = (uint8_t)(state->bytes_used >> 16);
+	header[0x00F] = (uint8_t)(state->bytes_used >> 24);
 
 	memcpy(output + 0x018, header, sizeof(header));
-	memcpy(output + 0x018 + 0x020, settings->state_vid.frame_output + settings->state_vid.frame_data_offset, 2016);
+	memcpy(output + 0x018 + 0x020, state->frame_output + state->frame_data_offset, 2016);
 
-	settings->state_vid.frame_data_offset += 2016;
+	state->frame_data_offset += 2016;
+	return state->frame_index - last_frame_index;
 }
diff --git a/psxavenc/mdec.h b/psxavenc/mdec.h
new file mode 100644
index 0000000..6b22e20
--- /dev/null
+++ b/psxavenc/mdec.h
@@ -0,0 +1,67 @@
+/*
+psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
+
+Copyright (c) 2019, 2020 Adrian "asie" Siekierka
+Copyright (c) 2019 Ben "GreaseMonkey" Russell
+Copyright (c) 2023, 2025 spicyjpeg
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+   claim that you wrote the original software. If you use this software
+   in a product, an acknowledgment in the product documentation would be
+   appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <libavcodec/avdct.h>
+
+typedef struct {
+	int frame_index;
+	int frame_data_offset;
+	int frame_max_size;
+	int frame_block_base_overflow;
+	int frame_block_overflow_num;
+	int frame_block_overflow_den;
+	int block_type;
+	int16_t last_dc_values[3];
+	uint16_t bits_value;
+	int bits_left;
+	uint8_t *frame_output;
+	int bytes_used;
+	int blocks_used;
+	int uncomp_hwords_used;
+	int quant_scale;
+	int quant_scale_sum;
+
+	AVDCT *dct_context;
+	uint32_t *ac_huffman_map;
+	uint32_t *dc_huffman_map;
+	int16_t *coeff_clamp_map;
+	int16_t *delta_clamp_map;
+	int16_t *dct_block_lists[6];
+} mdec_encoder_state_t;
+
+typedef struct {
+	int video_width;
+	int video_height;
+
+	mdec_encoder_state_t state;
+} mdec_encoder_t;
+
+bool init_mdec_encoder(mdec_encoder_t *encoder, int video_width, int video_height);
+void destroy_mdec_encoder(mdec_encoder_t *encoder);
+void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame);
+int encode_sector_str(mdec_encoder_t *encoder, uint8_t *video_frames, uint8_t *output);
diff --git a/psxavenc/psxavenc.c b/psxavenc/psxavenc.c
deleted file mode 100644
index d980f1d..0000000
--- a/psxavenc/psxavenc.c
+++ /dev/null
@@ -1,495 +0,0 @@
-/*
-psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
-
-Copyright (c) 2019, 2020 Adrian "asie" Siekierka
-Copyright (c) 2019 Ben "GreaseMonkey" Russell
-Copyright (c) 2023 spicyjpeg
-
-This software is provided 'as-is', without any express or implied
-warranty. In no event will the authors be held liable for any damages
-arising from the use of this software.
-
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it
-freely, subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not
-   claim that you wrote the original software. If you use this software
-   in a product, an acknowledgment in the product documentation would be
-   appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be
-   misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#include "common.h"
-#include "config.h"
-
-const char *format_names[NUM_FORMATS] = {
-	"xa", "xacd",
-	"spu", "spui",
-	"vag", "vagi",
-	"str2", "str2cd", "str2v",
-	"sbs2"
-};
-
-void print_help(void) {
-	fprintf(stderr,
-		"Usage:\n"
-		"    psxavenc -t <xa|xacd>     [-f 18900|37800] [-b 4|8] [-c 1|2] [-F 0-255] [-C 0-31] <in> <out.xa>\n"
-		"    psxavenc -t <str2|str2cd> [-f 18900|37800] [-b 4|8] [-c 1|2] [-F 0-255] [-C 0-31] [-s WxH] [-I] [-r num/den] [-x 1|2] <in> <out.str>\n"
-		"    psxavenc -t str2v         [-s WxH] [-I] [-r num/den] [-x 1|2] <in> <out.str>\n"
-		"    psxavenc -t sbs2          [-s WxH] [-I] [-r num/den] [-a size] <in> <out.str>\n"
-		"    psxavenc -t <spu|vag>     [-f freq] [-L] [-a size] <in> <out.vag>\n"
-		"    psxavenc -t <spui|vagi>   [-f freq] [-c 1-24] [-L] [-i size] [-a size] <in> <out.vag>\n"
-		"\nTool options:\n"
-		"    -h               Show this help message and exit\n"
-		"    -V               Show version information and exit\n"
-		"    -q               Suppress all non-error messages\n"
-		"\n"
-		"Output options:\n"
-		"    -t format        Use specified output type\n"
-		"                       xa     [A.] XA-ADPCM, 2336-byte sectors\n"
-		"                       xacd   [A.] XA-ADPCM, 2352-byte sectors\n"
-		"                       spu    [A.] raw SPU-ADPCM mono data\n"
-		"                       spui   [A.] raw SPU-ADPCM interleaved data\n"
-		"                       vag    [A.] .vag SPU-ADPCM mono\n"
-		"                       vagi   [A.] .vag SPU-ADPCM interleaved\n"
-		"                       str2   [AV] v2 .str video, 2336-byte sectors\n"
-		"                       str2cd [AV] v2 .str video, 2352-byte sectors\n"
-		"                       str2v  [.V] v2 .str video file\n"
-		"                       sbs2   [.V] v2 .sbs video, 2048-byte sectors\n"
-		"    -F num           xa/str2: Set the XA file number\n"
-		"                       0-255, default 0\n"
-		"    -C num           xa/str2: Set the XA channel number\n"
-		"                       0-31, default 0\n"
-		"\n"
-		"Audio options:\n"
-		"    -f freq          Use specified sample rate\n"
-		"                       xa/str2:   18900 or 37800, default 37800\n"
-		"                       spu/vag:   any value, default 44100\n"
-		"                       spui/vagi: any value, default 44100\n"
-		"    -b bitdepth      Use specified bit depth\n"
-		"                       xa/str2:   4 or 8, default 4\n"
-		"                       spu/vag:   must be 4\n"
-		"                       spui/vagi: must be 4\n"
-		"    -c channels      Use specified channel count\n"
-		"                       xa/str2:   1 or 2, default 2\n"
-		"                       spu/vag:   must be 1\n"
-		"                       spui/vagi: any value, default 2\n"
-		"    -R key=value,... Pass custom options to libswresample (see FFmpeg docs)\n"
-		"\n"
-		"SPU-ADPCM options (spu/spui/vag/vagi formats):\n"
-		"    -L               spu/vag:   Add a loop marker at the end of sample data\n"
-		"                     spui/vagi: Add a loop marker at the end of each chunk\n"
-		"    -i size          spui/vagi: Use specified channel interleave\n"
-		"                       Any multiple of 16, default 2048\n"
-		"    -a size          spu/vag:   Pad sample data to multiple of specified size\n"
-		"                       Any value >= 16, default 64\n"
-		"                     spui/vagi: Pad header and each chunk to multiple of specified size\n"
-		"                       Any value >= 16, default 2048\n"
-		"\n"
-		"Video options:\n"
-		"    -s WxH           Rescale input file to fit within specified size\n"
-		"                       16x16-320x256 in 16-pixel increments, default 320x240\n"
-		"    -I               Force stretching to given size without preserving aspect ratio\n"
-		"    -r num[/den]     Set frame rate to specified integer or fraction\n"
-		"                       1-30, default 15\n"
-		"    -x speed         str2: Set the CD-ROM speed the file is meant to played at\n"
-		"                       1 or 2, default 2\n"
-		"    -a size          sbs2: Set the size of each frame\n"
-		"                       Any value >= 256, default 8192\n"
-		"    -S key=value,... Pass custom options to libswscale (see FFmpeg docs)\n"
-		"\n"
-	);
-}
-
-void print_version(void) {
-	printf("psxavenc " VERSION "\n");
-}
-
-int parse_args_old(settings_t* settings, int argc, char** argv) {
-	int c, i;
-	char *next;
-	while ((c = getopt(argc, argv, "?hVqt:F:C:f:b:c:LR:i:a:s:IS:r:x:")) != -1) {
-		switch (c) {
-			case '?':
-			case 'h': {
-				print_help();
-				return -1;
-			} break;
-			case 'V': {
-				print_version();
-				return -1;
-			} break;
-			case 'q': {
-				settings->quiet = true;
-				settings->show_progress = false;
-			} break;
-			case 't': {
-				settings->format = -1;
-				for (i = 0; i < NUM_FORMATS; i++) {
-					if (!strcmp(optarg, format_names[i])) {
-						settings->format = i;
-						break;
-					}
-				}
-				if (settings->format < 0) {
-					fprintf(stderr, "Invalid format: %s\n", optarg);
-					return -1;
-				}
-			} break;
-			case 'F': {
-				settings->file_number = strtol(optarg, NULL, 0);
-				if (settings->file_number < 0 || settings->file_number > 255) {
-					fprintf(stderr, "Invalid file number: %d (must be in 0-255 range)\n", settings->file_number);
-					return -1;
-				}
-			} break;
-			case 'C': {
-				settings->channel_number = strtol(optarg, NULL, 0);
-				if (settings->channel_number < 0 || settings->channel_number > 31) {
-					fprintf(stderr, "Invalid channel number: %d (must be in 0-31 range)\n", settings->channel_number);
-					return -1;
-				}
-			} break;
-			case 'f': {
-				settings->frequency = strtol(optarg, NULL, 0);
-				if (settings->frequency < 1000) {
-					fprintf(stderr, "Invalid frequency: %d (must be at least 1000)\n", settings->frequency);
-					return -1;
-				}
-			} break;
-			case 'b': {
-				settings->bits_per_sample = strtol(optarg, NULL, 0);
-				if (settings->bits_per_sample != 4 && settings->bits_per_sample != 8) {
-					fprintf(stderr, "Invalid bit depth: %d (must be 4 or 8)\n", settings->bits_per_sample);
-					return -1;
-				}
-			} break;
-			case 'c': {
-				settings->channels = strtol(optarg, NULL, 0);
-				if (settings->channels < 1) {
-					fprintf(stderr, "Invalid channel count: %d (must be at least 1)\n", settings->channels);
-					return -1;
-				}
-			} break;
-			case 'L': {
-				settings->loop = true;
-			} break;
-			case 'R': {
-				settings->swresample_options = optarg;
-			} break;
-			case 'i': {
-				settings->interleave = (strtol(optarg, NULL, 0) + 15) & ~15;
-				if (settings->interleave < 16) {
-					fprintf(stderr, "Invalid interleave: %d (must be at least 16)\n", settings->interleave);
-					return -1;
-				}
-			} break;
-			case 'a': {
-				settings->alignment = strtol(optarg, NULL, 0);
-				if (settings->alignment < 16) {
-					fprintf(stderr, "Invalid alignment: %d (must be at least 16)\n", settings->alignment);
-					return -1;
-				}
-			} break;
-			case 's': {
-				settings->video_width = (strtol(optarg, &next, 0) + 15) & ~15;
-				if (*next != 'x') {
-					fprintf(stderr, "Invalid video size (must be specified as <width>x<height>)\n");
-					return -1;
-				}
-				settings->video_height = (strtol(next + 1, NULL, 0) + 15) & ~15;
-
-				if (settings->video_width < 16 || settings->video_width > 320) {
-					fprintf(stderr, "Invalid video width: %d (must be in 16-320 range)\n", settings->video_width);
-					return -1;
-				}
-				if (settings->video_height < 16 || settings->video_height > 256) {
-					fprintf(stderr, "Invalid video height: %d (must be in 16-256 range)\n", settings->video_height);
-					return -1;
-				}
-			} break;
-			case 'I': {
-				settings->ignore_aspect_ratio = true;
-			} break;
-			case 'S': {
-				settings->swscale_options = optarg;
-			} break;
-			case 'r': {
-				settings->video_fps_num = strtol(optarg, &next, 0);
-				if (*next == '/') {
-					settings->video_fps_den = strtol(next + 1, NULL, 0);
-				} else {
-					settings->video_fps_den = 1;
-				}
-
-				if (!settings->video_fps_den) {
-					fprintf(stderr, "Invalid frame rate denominator\n");
-					return -1;
-				}
-				i = settings->video_fps_num / settings->video_fps_den;
-				if (i < 1 || i > 60) {
-					fprintf(stderr, "Invalid frame rate: %d/%d (must be in 1-60 range)\n", settings->video_fps_num, settings->video_fps_den);
-					return -1;
-				}
-			} break;
-			case 'x': {
-				settings->cd_speed = strtol(optarg, NULL, 0);
-				if (settings->cd_speed < 1 || settings->cd_speed > 2) {
-					fprintf(stderr, "Invalid CD-ROM speed: %d (must be 1 or 2)\n", settings->cd_speed);
-					return -1;
-				}
-			} break;
-		}
-	}
-
-	// Some settings' (frequency, channels, interleave and alignment) default
-	// values are initialized here as they depend on the chosen format.
-	switch (settings->format) {
-		case FORMAT_XA:
-		case FORMAT_XACD:
-		case FORMAT_STR2:
-		case FORMAT_STR2CD:
-		case FORMAT_STR2V:
-			if (!settings->frequency) {
-				settings->frequency = PSX_AUDIO_XA_FREQ_DOUBLE;
-			} else if (settings->frequency != PSX_AUDIO_XA_FREQ_SINGLE && settings->frequency != PSX_AUDIO_XA_FREQ_DOUBLE) {
-				fprintf(
-					stderr, "Invalid XA-ADPCM frequency: %d Hz (must be %d or %d Hz)\n", settings->frequency,
-					PSX_AUDIO_XA_FREQ_SINGLE, PSX_AUDIO_XA_FREQ_DOUBLE
-				);
-				return -1;
-			}
-			if (!settings->channels) {
-				settings->channels = 2;
-			} else if (settings->channels > 2) {
-				fprintf(stderr, "Invalid XA-ADPCM channel count: %d (must be 1 or 2)\n", settings->channels);
-				return -1;
-			}
-			if (settings->interleave || settings->alignment) {
-				fprintf(stderr, "Interleave and frame size cannot be specified for this format\n");
-				return -1;
-			}
-			if (settings->loop) {
-				fprintf(stderr, "XA-ADPCM does not support loop markers\n");
-				return -1;
-			}
-			break;
-		case FORMAT_SPU:
-		case FORMAT_VAG:
-			if (!settings->frequency) {
-				settings->frequency = 44100;
-			}
-			if (settings->bits_per_sample != 4) {
-				fprintf(stderr, "Invalid SPU-ADPCM bit depth: %d (must be 4)\n", settings->bits_per_sample);
-				return -1;
-			}
-			if (!settings->channels) {
-				settings->channels = 1;
-			} else if (settings->channels > 1) {
-				fprintf(stderr, "Invalid SPU-ADPCM channel count: %d (must be 1)\n", settings->channels);
-				return -1;
-			}
-			if (settings->interleave) {
-				fprintf(stderr, "Interleave cannot be specified for this format\n");
-				return -1;
-			}
-			if (!settings->alignment) {
-				settings->alignment = 64;
-			}
-			break;
-		case FORMAT_SPUI:
-		case FORMAT_VAGI:
-			if (!settings->frequency) {
-				settings->frequency = 44100;
-			}
-			if (settings->bits_per_sample != 4) {
-				fprintf(stderr, "Invalid SPU-ADPCM bit depth: %d (must be 4)\n", settings->bits_per_sample);
-				return -1;
-			}
-			if (!settings->channels) {
-				settings->channels = 2;
-			}
-			if (!settings->interleave) {
-				settings->interleave = 2048;
-			}
-			if (!settings->alignment) {
-				settings->alignment = 2048;
-			}
-			break;
-		case FORMAT_SBS2:
-			if (settings->interleave) {
-				fprintf(stderr, "Interleave cannot be specified for this format\n");
-				return -1;
-			}
-			if (!settings->alignment) {
-				settings->alignment = 8192;
-			} else if (settings->alignment < 256) {
-				fprintf(stderr, "Invalid frame size: %d (must be at least 256)\n", settings->alignment);
-				return -1;
-			}
-			break;
-		default:
-			fprintf(stderr, "Output format must be specified\n");
-			return -1;
-	}
-
-	return optind;
-}
-
-int main(int argc, char **argv) {
-	settings_t settings;
-	int arg_offset;
-	FILE* output;
-
-	memset(&settings,0,sizeof(settings_t));
-
-	settings.quiet = false;
-	settings.show_progress = isatty(fileno(stderr));
-
-	settings.format = -1;
-	settings.file_number = 0;
-	settings.channel_number = 0;
-	settings.cd_speed = 2;
-	settings.channels = 0;
-	settings.frequency = 0;
-	settings.bits_per_sample = 4;
-	settings.interleave = 0;
-	settings.alignment = 0;
-	settings.loop = false;
-
-	// NOTE: ffmpeg/ffplay's .str demuxer has the frame rate hardcoded to 15fps
-	// so if you're messing around with this make sure you test generated files
-	// with another player and/or in an emulator.
-	settings.video_width = 320;
-	settings.video_height = 240;
-	settings.video_fps_num = 15;
-	settings.video_fps_den = 1;
-	settings.ignore_aspect_ratio = false;
-
-	settings.swresample_options = NULL;
-	settings.swscale_options = NULL;
-
-	settings.audio_samples = NULL;
-	settings.audio_sample_count = 0;
-	settings.video_frames = NULL;
-	settings.video_frame_count = 0;
-
-	settings.state_vid.huffman_encoding_map = NULL;
-	settings.state_vid.coeff_clamp_map = NULL;
-	settings.state_vid.dct_context = NULL;
-	for(int i = 0; i < 6; i++) {
-		settings.state_vid.dct_block_lists[i] = NULL;
-	}
-
-	if (argc < 2) {
-		print_help();
-		return 1;
-	}
-
-	arg_offset = parse_args_old(&settings, argc, argv);
-	if (arg_offset < 0) {
-		return 1;
-	} else if (argc < arg_offset + 2) {
-		print_help();
-		return 1;
-	}
-
-	bool has_audio =
-		(settings.format != FORMAT_STR2V) &&
-		(settings.format != FORMAT_SBS2);
-	bool has_video =
-		(settings.format == FORMAT_STR2) ||
-		(settings.format == FORMAT_STR2CD) ||
-		(settings.format == FORMAT_STR2V) ||
-		(settings.format == FORMAT_SBS2);
-
-	bool did_open_data = open_av_data(argv[arg_offset + 0], &settings,
-		has_audio, has_video, !has_video, has_video);
-	if (!did_open_data) {
-		fprintf(stderr, "Could not open input file!\n");
-		return 1;
-	}
-
-	output = fopen(argv[arg_offset + 1], "wb");
-	if (output == NULL) {
-		fprintf(stderr, "Could not open output file!\n");
-		return 1;
-	}
-
-	settings.start_time = time(NULL);
-	settings.last_progress_update = 0;
-
-	switch (settings.format) {
-		case FORMAT_XA:
-		case FORMAT_XACD:
-			if (!settings.quiet) {
-				fprintf(stderr, "Audio format: XA-ADPCM, %d Hz %d-bit %s, F=%d C=%d\n",
-					settings.frequency, settings.bits_per_sample,
-					(settings.channels == 2) ? "stereo" : "mono",
-					settings.file_number, settings.channel_number
-				);
-			}
-
-			encode_file_xa(&settings, output);
-			break;
-		case FORMAT_SPU:
-		case FORMAT_VAG:
-			if (!settings.quiet) {
-				fprintf(stderr, "Audio format: SPU-ADPCM, %d Hz mono\n",
-					settings.frequency
-				);
-			}
-
-			encode_file_spu(&settings, output);
-			break;
-		case FORMAT_SPUI:
-		case FORMAT_VAGI:
-			if (!settings.quiet) {
-				fprintf(stderr, "Audio format: SPU-ADPCM, %d Hz %d channels, interleave=%d\n",
-					settings.frequency, settings.channels, settings.interleave
-				);
-			}
-
-			encode_file_spu_interleaved(&settings, output);
-			break;
-		case FORMAT_STR2:
-		case FORMAT_STR2CD:
-		case FORMAT_STR2V:
-			if (!settings.quiet) {
-				if (settings.decoder_state_av.audio_stream) {
-					fprintf(stderr, "Audio format: XA-ADPCM, %d Hz %d-bit %s, F=%d C=%d\n",
-						settings.frequency, settings.bits_per_sample,
-						(settings.channels == 2) ? "stereo" : "mono",
-						settings.file_number, settings.channel_number
-					);
-				}
-				fprintf(stderr, "Video format: BS v2, %dx%d, %.2f fps\n",
-					settings.video_width, settings.video_height,
-					(double)settings.video_fps_num / (double)settings.video_fps_den
-				);
-			}
-
-			encode_file_str(&settings, output);
-			break;
-		case FORMAT_SBS2:
-			if (!settings.quiet) {
-				fprintf(stderr, "Video format: BS v2, %dx%d, %.2f fps\n",
-					settings.video_width, settings.video_height,
-					(double)settings.video_fps_num / (double)settings.video_fps_den
-				);
-			}
-
-			encode_file_sbs(&settings, output);
-			break;
-	}
-
-	if (settings.show_progress) {
-		fprintf(stderr, "\nDone.\n");
-	}
-	fclose(output);
-	close_av_data(&settings);
-	return 0;
-}

From 4a0d0c55fd80dad115a81a6490571505cb7138ce Mon Sep 17 00:00:00 2001
From: spicyjpeg <thatspicyjpeg@gmail.com>
Date: Fri, 28 Feb 2025 11:42:23 +0100
Subject: [PATCH 4/8] Add BS v3 encoding support

---
 psxavenc/filefmt.c |   6 +-
 psxavenc/mdec.c    | 274 ++++++++++++++++++++++++++++++++-------------
 psxavenc/mdec.h    |   6 +-
 3 files changed, 203 insertions(+), 83 deletions(-)

diff --git a/psxavenc/filefmt.c b/psxavenc/filefmt.c
index b00a29b..7e508f5 100644
--- a/psxavenc/filefmt.c
+++ b/psxavenc/filefmt.c
@@ -375,7 +375,7 @@ void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) {
 	memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t));
 
 	mdec_encoder_t encoder;
-	init_mdec_encoder(&encoder, args->video_width, args->video_height);
+	init_mdec_encoder(&encoder, args->video_codec, args->video_width, args->video_height);
 
 	// e.g. 15fps = (150*7/8/15) = 8.75 blocks per frame
 	encoder.state.frame_block_base_overflow = (75 * args->str_cd_speed) * video_sectors_per_block * args->str_fps_den;
@@ -403,7 +403,7 @@ void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) {
 			// Video sector
 			init_sector_buffer_video(args, (psx_cdrom_sector_mode2_t*) buffer, j);
 
-			int frames_used = encode_sector_str(&encoder, decoder->video_frames, buffer);
+			int frames_used = encode_sector_str(&encoder, args->format, decoder->video_frames, buffer);
 			retire_av_data(decoder, 0, frames_used);
 		} else {
 			// Audio sector
@@ -463,7 +463,7 @@ void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) {
 
 void encode_file_sbs(args_t *args, decoder_t *decoder, FILE *output) {
 	mdec_encoder_t encoder;
-	init_mdec_encoder(&encoder, args->video_width, args->video_height);
+	init_mdec_encoder(&encoder, args->video_codec, args->video_width, args->video_height);
 
 	encoder.state.frame_output = malloc(args->alignment);
 	encoder.state.frame_data_offset = 0;
diff --git a/psxavenc/mdec.c b/psxavenc/mdec.c
index 095bb33..0d945c3 100644
--- a/psxavenc/mdec.c
+++ b/psxavenc/mdec.c
@@ -28,8 +28,16 @@ freely, subject to the following restrictions:
 #include <stdlib.h>
 #include <string.h>
 #include <libavcodec/avdct.h>
+#include "args.h"
 #include "mdec.h"
 
+// https://stackoverflow.com/a/60011209
+#if 0
+#define DIVIDE_ROUNDED(n, d) (((n) >= 0) ? (((n) + (d)/2) / (d)) : (((n) - (d)/2) / (d)))
+#else
+#define DIVIDE_ROUNDED(n, d) ((int)round((double)(n) / (double)(d)))
+#endif
+
 #define AC_PAIR(zeroes, value) \
 	(((zeroes) << 10) | ((+(value)) & 0x3FF)), \
 	(((zeroes) << 10) | ((-(value)) & 0x3FF))
@@ -154,6 +162,44 @@ static const struct {
 	{16, 0x001F, AC_PAIR(27,  1)}
 };
 
+static const struct {
+	int c_bits;
+	uint32_t c_value;
+	int sign_bits;
+	int value_bits;
+} dc_c_huffman_tree[] = {
+	{2, 0x0,  0, 0},
+	{2, 0x1,  1, 0},
+	{2, 0x2,  1, 1},
+	{3, 0x6,  1, 2},
+	{4, 0xE,  1, 3},
+	{5, 0x1E, 1, 4},
+	{6, 0x3E, 1, 5},
+	{7, 0x7E, 1, 6},
+	{8, 0xFE, 1, 7},
+};
+
+static const struct {
+	int c_bits;
+	uint32_t c_value;
+	int sign_bits;
+	int value_bits;
+} dc_y_huffman_tree[] = {
+	{3, 0x4,  0, 0},
+	{2, 0x0,  1, 0},
+	{2, 0x1,  1, 1},
+	{3, 0x5,  1, 2},
+	{3, 0x6,  1, 3},
+	{4, 0xE,  1, 4},
+	{5, 0x1E, 1, 5},
+	{6, 0x3E, 1, 6},
+	{7, 0x7E, 1, 7},
+};
+
+static const uint8_t dc_coeff_indices[6] = {
+	0, 1, 2, 2, 2, 2
+};
+
 static const uint8_t quant_dec[8*8] = {
 	 2, 16, 19, 22, 26, 27, 29, 34,
 	16, 16, 22, 24, 27, 29, 34, 37,
@@ -165,6 +211,7 @@ static const uint8_t quant_dec[8*8] = {
 	27, 29, 35, 38, 46, 56, 69, 83
 };
 
+#if 0
 static const uint8_t dct_zigzag_table[8*8] = {
 	 0,  1,  5,  6, 14, 15, 27, 28,
 	 2,  4,  7, 13, 16, 26, 29, 42,
@@ -175,6 +222,7 @@ static const uint8_t dct_zigzag_table[8*8] = {
 	21, 34, 37, 47, 50, 56, 59, 61,
 	35, 36, 48, 49, 57, 58, 62, 63
 };
+#endif
 
 static const uint8_t dct_zagzig_table[8*8] = {
 	 0,  1,  8, 16,  9,  2,  3, 10,
@@ -209,31 +257,84 @@ static const int16_t dct_scale_table[8*8] = {
 };
 #endif
 
-static void init_dct_data(mdec_encoder_state_t *state) {
+static void init_dct_data(mdec_encoder_state_t *state, bs_codec_t codec) {
 	for(int i = 0; i <= 0xFFFF; i++) {
 		// high 8 bits = bit count
 		// low 24 bits = value
-		state->ac_huffman_map[i] = ((6+16) << 24) | (0x01 << 16) | i;
+		state->ac_huffman_map[i] = ((6+16)<<24)|((0x01<<16)|(i));
 
 		int16_t coeff = (int16_t)i;
-
 		if (coeff < -0x200)
 			coeff = -0x200;
-		else if (coeff > +0x1FF)
-			coeff = +0x1FF;
+		else if (coeff > +0x1FE)
+			coeff = +0x1FE; // 0x1FF = v2 end of frame
 
-		state->coeff_clamp_map[i] = coeff & 0x3FF;
+		state->coeff_clamp_map[i] = coeff;
+
+		int16_t delta = (int16_t)DIVIDE_ROUNDED(i, 4);
+		if (delta < -0xFF)
+			delta = -0xFF;
+		else if (delta > +0xFF)
+			delta = +0xFF;
+
+		// Some versions of Sony's BS v3 decoder compute each DC coefficient as
+		// ((last + delta * 4) & 0x3FF) instead of just (last + delta * 4). The
+		// encoder can leverage this behavior to represent large coefficient
+		// differences as smaller deltas that cause the decoder to overflow and
+		// wrap around (e.g. -1 to encode -512 -> 511 as opposed to +1023). This
+		// saves some space as larger DC values take up more bits.
+		if (codec == BS_CODEC_V3DC) {
+			if (delta > +0x80)
+				delta -= 0x100;
+		}
+
+		state->delta_clamp_map[i] = delta;
 	}
 
-	int tree_item_count = sizeof(ac_huffman_tree) / sizeof(ac_huffman_tree[0]);
+	int ac_tree_item_count = sizeof(ac_huffman_tree) / sizeof(ac_huffman_tree[0]);
+	int dc_c_tree_item_count = sizeof(dc_c_huffman_tree) / sizeof(dc_c_huffman_tree[0]);
+	int dc_y_tree_item_count = sizeof(dc_y_huffman_tree) / sizeof(dc_y_huffman_tree[0]);
 
-	for(int i = 0; i < tree_item_count; i++) {
+	for (int i = 0; i < ac_tree_item_count; i++) {
 		int bits = ac_huffman_tree[i].c_bits+1;
 		uint32_t base_value = ac_huffman_tree[i].c_value;
 
 		state->ac_huffman_map[ac_huffman_tree[i].u_hword_pos] = (bits << 24) | (base_value << 1) | 0;
 		state->ac_huffman_map[ac_huffman_tree[i].u_hword_neg] = (bits << 24) | (base_value << 1) | 1;
 	}
+	for (int i = 0; i < dc_c_tree_item_count; i++) {
+		int dc_bits = dc_c_huffman_tree[i].sign_bits + dc_c_huffman_tree[i].value_bits;
+		int bits = dc_c_huffman_tree[i].c_bits + dc_bits;
+		uint32_t base_value = dc_c_huffman_tree[i].c_value << dc_bits;
+
+		for (int j = 0; j < (1 << dc_bits); j++) {
+			int delta = j;
+
+			if ((j >> dc_c_huffman_tree[i].value_bits) == 0) {
+				delta -= (1 << dc_bits) - 1;
+				delta &= 0x1FF;
+			}
+
+			state->dc_huffman_map[(0 << 9) | delta] = (bits << 24) | base_value | j;
+			state->dc_huffman_map[(1 << 9) | delta] = (bits << 24) | base_value | j;
+		}
+	}
+	for (int i = 0; i < dc_y_tree_item_count; i++) {
+		int dc_bits = dc_y_huffman_tree[i].sign_bits + dc_y_huffman_tree[i].value_bits;
+		int bits = dc_y_huffman_tree[i].c_bits + dc_bits;
+		uint32_t base_value = dc_y_huffman_tree[i].c_value << dc_bits;
+
+		for (int j = 0; j < (1 << dc_bits); j++) {
+			int delta = j;
+
+			if ((j >> dc_y_huffman_tree[i].value_bits) == 0) {
+				delta -= (1 << dc_bits) - 1;
+				delta &= 0x1FF;
+			}
+
+			state->dc_huffman_map[(2 << 9) | delta] = (bits << 24) | base_value | j;
+		}
+	}
 }
 
 static bool flush_bits(mdec_encoder_state_t *state) {
@@ -302,29 +403,6 @@ static bool encode_bits(mdec_encoder_state_t *state, int bits, uint32_t val) {
 	return true;
 }
 
-static bool encode_ac_value(mdec_encoder_state_t *state, uint16_t value) {
-	assert(0 <= value && value <= 0xFFFF);
-
-#if 0
-	int tree_item_count = sizeof(ac_huffman_tree) / sizeof(ac_huffman_tree[0]);
-
-	for (int i = 0; i < tree_item_count; i++) {
-		if (value == ac_huffman_tree[i].u_hword_pos) {
-			return encode_bits(state, ac_huffman_tree[i].c_bits+1, ((uint32_t)ac_huffman_tree[i].c_value << 1) | 0);
-		} else if (value == ac_huffman_tree[i].u_hword_neg) {
-			return encode_bits(state, ac_huffman_tree[i].c_bits+1, ((uint32_t)ac_huffman_tree[i].c_value << 1) | 1);
-		}
-	}
-
-	// Use an escape
-	return encode_bits(state, 6+16, (0x01 << 16) | (0xFFFF & (uint32_t)value));
-#else
-	uint32_t outword = state->ac_huffman_map[value];
-
-	return encode_bits(state, outword >> 24, outword & 0xFFFFFF);
-#endif
-}
-
 #if 0
 static void transform_dct_block(int16_t *block) {
 	// Apply DCT to block
@@ -372,49 +450,67 @@ static int reduce_dct_block(mdec_encoder_state_t *state, int32_t *block, int32_t
 }
 #endif
 
-// https://stackoverflow.com/a/60011209
-#if 0
-#define DIVIDE_ROUNDED(n, d) (((n) >= 0) ? (((n) + (d)/2) / (d)) : (((n) - (d)/2) / (d)))
-#else
-#define DIVIDE_ROUNDED(n, d) ((int)round((double)(n) / (double)(d)))
-#endif
-
-static bool encode_dct_block(mdec_encoder_state_t *state, const int16_t *block, const int16_t *quant_table) {
+static bool encode_dct_block(
+	mdec_encoder_state_t *state,
+	bs_codec_t codec,
+	const int16_t *block,
+	const int16_t *quant_table
+) {
 	int dc = DIVIDE_ROUNDED(block[0], quant_table[0]);
-	dc = state->coeff_clamp_map[dc&0xFFFF];
 
-	if (!encode_bits(state, 10, dc))
-		return false;
+	dc = state->coeff_clamp_map[dc & 0xFFFF];
+
+	if (codec == BS_CODEC_V2) {
+		if (!encode_bits(state, 10, dc & 0x3FF))
+			return false;
+	} else {
+		int index = dc_coeff_indices[state->block_type];
+		int last = state->last_dc_values[index];
+
+		int delta = state->delta_clamp_map[(dc - last) & 0xFFFF];
+		state->last_dc_values[index] = (last + delta * 4) & 0x3FF;
+
+		uint32_t outword = state->dc_huffman_map[(index << 9) | (delta & 0x1FF)];
+
+		if (!encode_bits(state, outword >> 24, outword & 0xFFFFFF))
+			return false;
+	}
 
 	for (int i = 1, zeroes = 0; i < 64; i++) {
 		int ri = dct_zagzig_table[i];
 		int ac = DIVIDE_ROUNDED(block[ri], quant_table[ri]);
-		ac = state->coeff_clamp_map[ac&0xFFFF];
+
+		ac = state->coeff_clamp_map[ac & 0xFFFF];
 
 		if (ac == 0) {
 			zeroes++;
 		} else {
-			if (!encode_ac_value(state, (zeroes<<10)|ac))
+			uint32_t outword = state->ac_huffman_map[(zeroes << 10) | ac];
+
+			if (!encode_bits(state, outword >> 24, outword & 0xFFFFFF))
 				return false;
 
 			zeroes = 0;
-			state->uncomp_hwords_used += 1;
+			state->uncomp_hwords_used++;
 		}
 	}
 
-	//fprintf(stderr, "dc %08X rles %2d\n", dc, zero_rle_words);
-	//assert(dc >= -0x200); assert(dc <  +0x200);
-
 	// Store end of block
 	if (!encode_bits(state, 2, 0x2))
 		return false;
 
+	state->block_type++;
+	state->block_type %= 6;
 	state->uncomp_hwords_used += 2;
 	//state->uncomp_hwords_used = (state->uncomp_hwords_used+0xF)&~0xF;
 	return true;
 }
 
-bool init_mdec_encoder(mdec_encoder_t *encoder, int video_width, int video_height) {
+bool init_mdec_encoder(mdec_encoder_t *encoder, bs_codec_t video_codec, int video_width, int video_height) {
+	encoder->video_codec = video_codec;
+	encoder->video_width = video_width;
+	encoder->video_height = video_height;
+
 	mdec_encoder_state_t *state = &(encoder->state);
 
 	if (state->dct_context != NULL)
@@ -422,9 +518,9 @@ bool init_mdec_encoder(mdec_encoder_t *encoder, int video_width, int video_heigh
 
 	state->dct_context = avcodec_dct_alloc();
 	state->ac_huffman_map = malloc(0x10000 * sizeof(uint32_t));
-	state->dc_huffman_map = NULL;
+	state->dc_huffman_map = malloc(0x600 * sizeof(uint32_t));
 	state->coeff_clamp_map = malloc(0x10000 * sizeof(int16_t));
-	state->delta_clamp_map = NULL;
+	state->delta_clamp_map = malloc(0x10000 * sizeof(int16_t));
 
 	if (
 		state->dct_context == NULL ||
@@ -445,7 +541,7 @@ bool init_mdec_encoder(mdec_encoder_t *encoder, int video_width, int video_heigh
 	}
 
 	avcodec_dct_init(state->dct_context);
-	init_dct_data(state);
+	init_dct_data(state, video_codec);
 	return true;
 }
 
@@ -545,6 +641,19 @@ void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame) {
 		}
 	}
 
+	uint32_t end_of_block;
+
+	if (encoder->video_codec == BS_CODEC_V2) {
+		end_of_block = 0x1FF;
+	} else {
+		end_of_block = 0x3FF;
+		assert(state->dc_huffman_map);
+		assert(state->delta_clamp_map);
+	}
+
+	assert(state->ac_huffman_map);
+	assert(state->coeff_clamp_map);
+
 	// Attempt encoding the frame at the maximum quality. If the result is too
 	// large, increase the quantization scale and try again.
 	// TODO: if a frame encoded at scale N is too large but the same frame
@@ -566,6 +675,11 @@ void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame) {
 
 		memset(state->frame_output, 0, state->frame_max_size);
 
+		state->block_type = 0;
+		state->last_dc_values[0] = 0;
+		state->last_dc_values[1] = 0;
+		state->last_dc_values[2] = 0;
+
 		state->bits_value = 0;
 		state->bits_left = 16;
 		state->uncomp_hwords_used = 0;
@@ -587,16 +701,18 @@ void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame) {
 				};
 
 				for(int i = 0; ok && (i < 6); i++)
-					ok = encode_dct_block(state, blocks[i], quant_table);
+					ok = encode_dct_block(state, encoder->video_codec, blocks[i], quant_table);
 			}
 		}
 
 		if (!ok)
 			continue;
-		if (!encode_bits(state, 10, 0x1FF))
+		if (!encode_bits(state, 10, end_of_block))
 			continue;
+#if 0
 		if (!encode_bits(state, 2, 0x2))
 			continue;
+#endif
 		if (!flush_bits(state))
 			continue;
 
@@ -630,11 +746,15 @@ void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame) {
 	state->frame_output[0x005] = (uint8_t)(state->quant_scale>>8);
 
 	// BS version
-	state->frame_output[0x006] = 0x02;
+	if (encoder->video_codec == BS_CODEC_V2)
+		state->frame_output[0x006] = 0x02;
+	else
+		state->frame_output[0x006] = 0x03;
+
 	state->frame_output[0x007] = 0x00;
 }
 
-int encode_sector_str(mdec_encoder_t *encoder, uint8_t *video_frames, uint8_t *output) {
+int encode_sector_str(mdec_encoder_t *encoder, format_t format, uint8_t *video_frames, uint8_t *output) {
 	mdec_encoder_state_t *state = &(encoder->state);
 	int last_frame_index = state->frame_index;
 	int frame_size = encoder->video_width * encoder->video_height * 2;
@@ -677,34 +797,32 @@ int encode_sector_str(mdec_encoder_t *encoder, uint8_t *video_frames, uint8_t *o
 	header[0x00A] = (uint8_t)(state->frame_index >> 16);
 	header[0x00B] = (uint8_t)(state->frame_index >> 24);
 
-	// Video frame size
-	header[0x010] = (uint8_t)encoder->video_width;
-	header[0x011] = (uint8_t)(encoder->video_width >> 8);
-	header[0x012] = (uint8_t)encoder->video_height;
-	header[0x013] = (uint8_t)(encoder->video_height >> 8);
-
-	// MDEC command (size of decompressed MDEC data)
-	header[0x014] = (uint8_t)state->blocks_used;
-	header[0x015] = (uint8_t)(state->blocks_used >> 8);
-	header[0x016] = 0x00;
-	header[0x017] = 0x38;
-
-	// Quantization scale
-	header[0x018] = (uint8_t)state->quant_scale;
-	header[0x019] = (uint8_t)(state->quant_scale >> 8);
-
-	// BS version
-	header[0x01A] = 0x02;
-	header[0x01B] = 0x00;
-
 	// Demuxed bytes used as a multiple of 4
 	header[0x00C] = (uint8_t)state->bytes_used;
 	header[0x00D] = (uint8_t)(state->bytes_used >> 8);
 	header[0x00E] = (uint8_t)(state->bytes_used >> 16);
 	header[0x00F] = (uint8_t)(state->bytes_used >> 24);
 
-	memcpy(output + 0x018, header, sizeof(header));
-	memcpy(output + 0x018 + 0x020, state->frame_output + state->frame_data_offset, 2016);
+	// Video frame size
+	header[0x010] = (uint8_t)encoder->video_width;
+	header[0x011] = (uint8_t)(encoder->video_width >> 8);
+	header[0x012] = (uint8_t)encoder->video_height;
+	header[0x013] = (uint8_t)(encoder->video_height >> 8);
+
+	// Copy of BS header
+	memcpy(header + 0x014, state->frame_output, 8);
+
+	int offset;
+
+	if (format == FORMAT_STR)
+		offset = 0x008;
+	else if (format == FORMAT_STRCD)
+		offset = 0x018;
+	else
+		offset = 0x000;
+
+	memcpy(output + offset, header, sizeof(header));
+	memcpy(output + offset + 0x020, state->frame_output + state->frame_data_offset, 2016);
 
 	state->frame_data_offset += 2016;
 	return state->frame_index - last_frame_index;
diff --git a/psxavenc/mdec.h b/psxavenc/mdec.h
index 6b22e20..3d1e4dc 100644
--- a/psxavenc/mdec.h
+++ b/psxavenc/mdec.h
@@ -27,6 +27,7 @@ freely, subject to the following restrictions:
 #include <stdbool.h>
 #include <stdint.h>
 #include <libavcodec/avdct.h>
+#include "args.h"
 
 typedef struct {
 	int frame_index;
@@ -55,13 +56,14 @@ typedef struct {
 } mdec_encoder_state_t;
 
 typedef struct {
+	bs_codec_t video_codec;
 	int video_width;
 	int video_height;
 
 	mdec_encoder_state_t state;
 } mdec_encoder_t;
 
-bool init_mdec_encoder(mdec_encoder_t *encoder, int video_width, int video_height);
+bool init_mdec_encoder(mdec_encoder_t *encoder, bs_codec_t video_codec, int video_width, int video_height);
 void destroy_mdec_encoder(mdec_encoder_t *encoder);
 void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame);
-int encode_sector_str(mdec_encoder_t *encoder, uint8_t *video_frames, uint8_t *output);
+int encode_sector_str(mdec_encoder_t *encoder, format_t format, uint8_t *video_frames, uint8_t *output);

From 7d537edffb3ab427ac592ccd56f513cdda78dc14 Mon Sep 17 00:00:00 2001
From: spicyjpeg <thatspicyjpeg@gmail.com>
Date: Sun, 2 Mar 2025 12:12:51 +0100
Subject: [PATCH 5/8] Clean up, implement new SPU-ADPCM looping options

---
 libpsxav/adpcm.c    |  52 +++++++----------
 libpsxav/libpsxav.h |  60 ++++++++++---------
 psxavenc/args.h     |  18 +++---
 psxavenc/decoding.h |   4 +-
 psxavenc/filefmt.c  | 138 +++++++++++++++++++++++++++++---------------
 psxavenc/filefmt.h  |   1 +
 psxavenc/main.c     |  25 +++++++-
 psxavenc/mdec.c     |  25 ++++----
 8 files changed, 198 insertions(+), 125 deletions(-)

diff --git a/libpsxav/adpcm.c b/libpsxav/adpcm.c
index 03d298f..ecc7264 100644
--- a/libpsxav/adpcm.c
+++ b/libpsxav/adpcm.c
@@ -29,8 +29,8 @@ freely, subject to the following restrictions:
 #define SHIFT_RANGE_4BPS 12
 #define SHIFT_RANGE_8BPS 8
 
-#define ADPCM_FILTER_COUNT 5
-#define XA_ADPCM_FILTER_COUNT 4
+#define ADPCM_FILTER_COUNT     5
+#define XA_ADPCM_FILTER_COUNT  4
 #define SPU_ADPCM_FILTER_COUNT 5
 
 static const int16_t filter_k1[ADPCM_FILTER_COUNT] = {0, 60, 115, 98, 122};
@@ -54,7 +54,7 @@ static int find_min_shift(const psx_audio_encoder_channel_state_t *state, int16_
 
 	int32_t s_min = 0;
 	int32_t s_max = 0;
-	for (int i = 0; i < 28; i++) {
+	for (int i = 0; i < PSX_AUDIO_SPU_SAMPLES_PER_BLOCK; i++) {
 		int32_t raw_sample = (i >= sample_limit) ? 0 : samples[i * pitch];
 		int32_t previous_values = (k1*prev1 + k2*prev2 + (1<<5))>>6;
 		int32_t sample = raw_sample - previous_values;
@@ -87,7 +87,7 @@ static uint8_t attempt_to_encode(psx_audio_encoder_channel_state_t *outstate, co
 
 	outstate->mse = 0;
 
-	for (int i = 0; i < 28; i++) {
+	for (int i = 0; i < PSX_AUDIO_SPU_SAMPLES_PER_BLOCK; i++) {
 		int32_t sample = ((i >= sample_limit) ? 0 : samples[i * pitch]) + outstate->qerr;
 		int32_t previous_values = (k1*outstate->prev1 + k2*outstate->prev2 + (1<<5))>>6;
 		int32_t sample_enc = sample - previous_values;
@@ -205,25 +205,17 @@ uint32_t psx_audio_xa_get_buffer_size(psx_audio_xa_settings_t settings, int samp
 }
 
 uint32_t psx_audio_spu_get_buffer_size(int sample_count) {
-	return ((sample_count + 27) / 28) << 4;
+	return ((sample_count + PSX_AUDIO_SPU_SAMPLES_PER_BLOCK - 1) / PSX_AUDIO_SPU_SAMPLES_PER_BLOCK) << 4;
 }
 
 uint32_t psx_audio_xa_get_buffer_size_per_sector(psx_audio_xa_settings_t settings) {
 	return settings.format == PSX_AUDIO_XA_FORMAT_XA ? 2336 : 2352;
 }
 
-uint32_t psx_audio_spu_get_buffer_size_per_block(void) {
-	return 16;
-}
-
 uint32_t psx_audio_xa_get_samples_per_sector(psx_audio_xa_settings_t settings) {
 	return (((settings.bits_per_sample == 8) ? 112 : 224) >> (settings.stereo ? 1 : 0)) * 18;
 }
 
-uint32_t psx_audio_spu_get_samples_per_block(void) {
-	return 28;
-}
-
 uint32_t psx_audio_xa_get_sector_interleave(psx_audio_xa_settings_t settings) {
 	// 1/2 interleave for 37800 Hz 8-bit stereo at 1x speed
 	int interleave = settings.stereo ? 2 : 4;
@@ -307,14 +299,14 @@ int psx_audio_xa_encode_simple(psx_audio_xa_settings_t settings, int16_t* sample
 }
 
 int psx_audio_spu_encode(psx_audio_encoder_channel_state_t *state, int16_t* samples, int sample_count, int pitch, uint8_t *output) {
-	uint8_t prebuf[28];
+	uint8_t prebuf[PSX_AUDIO_SPU_SAMPLES_PER_BLOCK];
 	uint8_t *buffer = output;
 
-	for (int i = 0; i < sample_count; i += 28, buffer += 16) {
+	for (int i = 0; i < sample_count; i += PSX_AUDIO_SPU_SAMPLES_PER_BLOCK, buffer += PSX_AUDIO_SPU_BLOCK_SIZE) {
 		buffer[0] = encode(state, samples + i * pitch, sample_count - i, pitch, prebuf, 0, 1, SPU_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
 		buffer[1] = 0;
 
-		for (int j = 0; j < 28; j+=2) {
+		for (int j = 0; j < PSX_AUDIO_SPU_SAMPLES_PER_BLOCK; j+=2) {
 			buffer[2 + (j>>1)] = (prebuf[j] & 0x0F) | (prebuf[j+1] << 4);
 		}
 	}
@@ -327,24 +319,24 @@ int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *out
 	memset(&state, 0, sizeof(psx_audio_encoder_channel_state_t));
 	int length = psx_audio_spu_encode(&state, samples, sample_count, 1, output);
 
-	if (length >= 32) {
+	if (length >= PSX_AUDIO_SPU_BLOCK_SIZE) {
+		uint8_t *last_block = output + length - PSX_AUDIO_SPU_BLOCK_SIZE;
+
 		if (loop_start < 0) {
-			//output[1] = PSX_AUDIO_SPU_LOOP_START;
-			output[length - 16 + 1] = PSX_AUDIO_SPU_LOOP_END;
+			last_block[1] |= PSX_AUDIO_SPU_LOOP_END;
+
+			// Insert trailing looping block
+			memset(output + length, 0, PSX_AUDIO_SPU_BLOCK_SIZE);
+			output[length + 1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END;
+
+			length += PSX_AUDIO_SPU_BLOCK_SIZE;
 		} else {
-			psx_audio_spu_set_flag_at_sample(output, loop_start, PSX_AUDIO_SPU_LOOP_START);
-			output[length - 16 + 1] = PSX_AUDIO_SPU_LOOP_REPEAT;
+			int loop_start_offset = loop_start / PSX_AUDIO_SPU_SAMPLES_PER_BLOCK * PSX_AUDIO_SPU_BLOCK_SIZE;
+
+			last_block[1] |= PSX_AUDIO_SPU_LOOP_REPEAT;
+			output[loop_start_offset + 1] |= PSX_AUDIO_SPU_LOOP_START;
 		}
-	} else if (length >= 16) {
-		output[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END;
-		if (loop_start >= 0)
-			output[1] |= PSX_AUDIO_SPU_LOOP_REPEAT;
 	}
 
 	return length;
 }
-
-void psx_audio_spu_set_flag_at_sample(uint8_t* spu_data, int sample_pos, int flag) {
-	int buffer_pos = (sample_pos / 28) << 4;
-	spu_data[buffer_pos + 1] = flag;
-}
diff --git a/libpsxav/libpsxav.h b/libpsxav/libpsxav.h
index 32eabaf..1b8aaa1 100644
--- a/libpsxav/libpsxav.h
+++ b/libpsxav/libpsxav.h
@@ -28,8 +28,13 @@ freely, subject to the following restrictions:
 
 // audio.c
 
-#define PSX_AUDIO_XA_FREQ_SINGLE 18900
-#define PSX_AUDIO_XA_FREQ_DOUBLE 37800
+#define PSX_AUDIO_SPU_BLOCK_SIZE        16
+#define PSX_AUDIO_SPU_SAMPLES_PER_BLOCK 28
+
+enum {
+	PSX_AUDIO_XA_FREQ_SINGLE = 18900,
+	PSX_AUDIO_XA_FREQ_DOUBLE = 37800
+};
 
 typedef enum {
 	PSX_AUDIO_XA_FORMAT_XA, // .xa file
@@ -56,23 +61,22 @@ typedef struct {
 	psx_audio_encoder_channel_state_t right;
 } psx_audio_encoder_state_t;
 
-#define PSX_AUDIO_SPU_LOOP_END 1
-#define PSX_AUDIO_SPU_LOOP_REPEAT 3
-#define PSX_AUDIO_SPU_LOOP_START 4
+enum {
+	PSX_AUDIO_SPU_LOOP_END    = 1 << 0,
+	PSX_AUDIO_SPU_LOOP_REPEAT = 3 << 0,
+	PSX_AUDIO_SPU_LOOP_START  = 1 << 2
+};
 
 uint32_t psx_audio_xa_get_buffer_size(psx_audio_xa_settings_t settings, int sample_count);
 uint32_t psx_audio_spu_get_buffer_size(int sample_count);
 uint32_t psx_audio_xa_get_buffer_size_per_sector(psx_audio_xa_settings_t settings);
-uint32_t psx_audio_spu_get_buffer_size_per_block(void);
 uint32_t psx_audio_xa_get_samples_per_sector(psx_audio_xa_settings_t settings);
-uint32_t psx_audio_spu_get_samples_per_block(void);
 uint32_t psx_audio_xa_get_sector_interleave(psx_audio_xa_settings_t settings);
 int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_state_t *state, int16_t* samples, int sample_count, uint8_t *output);
 int psx_audio_xa_encode_simple(psx_audio_xa_settings_t settings, int16_t* samples, int sample_count, uint8_t *output);
 int psx_audio_spu_encode(psx_audio_encoder_channel_state_t *state, int16_t* samples, int sample_count, int pitch, uint8_t *output);
 int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *output, int loop_start);
 void psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length);
-void psx_audio_spu_set_flag_at_sample(uint8_t* spu_data, int sample_pos, int flag);
 
 // cdrom.c
 
@@ -115,25 +119,29 @@ _Static_assert(sizeof(psx_cdrom_sector_mode2_t) == PSX_CDROM_SECTOR_SIZE, "Inval
 
 #define PSX_CDROM_SECTOR_XA_CHANNEL_MASK 0x1F
 
-#define PSX_CDROM_SECTOR_XA_SUBMODE_EOR     0x01
-#define PSX_CDROM_SECTOR_XA_SUBMODE_VIDEO   0x02
-#define PSX_CDROM_SECTOR_XA_SUBMODE_AUDIO   0x04
-#define PSX_CDROM_SECTOR_XA_SUBMODE_DATA    0x08
-#define PSX_CDROM_SECTOR_XA_SUBMODE_TRIGGER 0x10
-#define PSX_CDROM_SECTOR_XA_SUBMODE_FORM2   0x20
-#define PSX_CDROM_SECTOR_XA_SUBMODE_RT      0x40
-#define PSX_CDROM_SECTOR_XA_SUBMODE_EOF     0x80
+enum {
+	PSX_CDROM_SECTOR_XA_SUBMODE_EOR     = 1 << 0,
+	PSX_CDROM_SECTOR_XA_SUBMODE_VIDEO   = 1 << 1,
+	PSX_CDROM_SECTOR_XA_SUBMODE_AUDIO   = 1 << 2,
+	PSX_CDROM_SECTOR_XA_SUBMODE_DATA    = 1 << 3,
+	PSX_CDROM_SECTOR_XA_SUBMODE_TRIGGER = 1 << 4,
+	PSX_CDROM_SECTOR_XA_SUBMODE_FORM2   = 1 << 5,
+	PSX_CDROM_SECTOR_XA_SUBMODE_RT      = 1 << 6,
+	PSX_CDROM_SECTOR_XA_SUBMODE_EOF     = 1 << 7
+};
 
-#define PSX_CDROM_SECTOR_XA_CODING_MONO         0x00
-#define PSX_CDROM_SECTOR_XA_CODING_STEREO       0x01
-#define PSX_CDROM_SECTOR_XA_CODING_CHANNEL_MASK 0x03
-#define PSX_CDROM_SECTOR_XA_CODING_FREQ_DOUBLE  0x00
-#define PSX_CDROM_SECTOR_XA_CODING_FREQ_SINGLE  0x04
-#define PSX_CDROM_SECTOR_XA_CODING_FREQ_MASK    0x0C
-#define PSX_CDROM_SECTOR_XA_CODING_BITS_4       0x00
-#define PSX_CDROM_SECTOR_XA_CODING_BITS_8       0x10
-#define PSX_CDROM_SECTOR_XA_CODING_BITS_MASK    0x30
-#define PSX_CDROM_SECTOR_XA_CODING_EMPHASIS     0x40
+enum {
+	PSX_CDROM_SECTOR_XA_CODING_MONO         = 0 << 0,
+	PSX_CDROM_SECTOR_XA_CODING_STEREO       = 1 << 0,
+	PSX_CDROM_SECTOR_XA_CODING_CHANNEL_MASK = 3 << 0,
+	PSX_CDROM_SECTOR_XA_CODING_FREQ_DOUBLE  = 0 << 2,
+	PSX_CDROM_SECTOR_XA_CODING_FREQ_SINGLE  = 1 << 2,
+	PSX_CDROM_SECTOR_XA_CODING_FREQ_MASK    = 3 << 2,
+	PSX_CDROM_SECTOR_XA_CODING_BITS_4       = 0 << 4,
+	PSX_CDROM_SECTOR_XA_CODING_BITS_8       = 1 << 4,
+	PSX_CDROM_SECTOR_XA_CODING_BITS_MASK    = 3 << 4,
+	PSX_CDROM_SECTOR_XA_CODING_EMPHASIS     = 1 << 6
+};
 
 typedef enum {
 	PSX_CDROM_SECTOR_TYPE_MODE1,
diff --git a/psxavenc/args.h b/psxavenc/args.h
index 9249290..f0fab88 100644
--- a/psxavenc/args.h
+++ b/psxavenc/args.h
@@ -26,19 +26,19 @@ freely, subject to the following restrictions:
 
 #include <stdbool.h>
 
-#define NUM_FORMATS 11
+#define NUM_FORMATS   11
 #define NUM_BS_CODECS 3
 
 enum {
-	FLAG_IGNORE_OPTIONS = 1 << 0,
-	FLAG_QUIET = 1 << 1,
-	FLAG_HIDE_PROGRESS = 1 << 2,
-	FLAG_PRINT_HELP = 1 << 3,
-	FLAG_PRINT_VERSION = 1 << 4,
-	FLAG_SPU_LOOP_END = 1 << 5,
+	FLAG_IGNORE_OPTIONS       = 1 << 0,
+	FLAG_QUIET                = 1 << 1,
+	FLAG_HIDE_PROGRESS        = 1 << 2,
+	FLAG_PRINT_HELP           = 1 << 3,
+	FLAG_PRINT_VERSION        = 1 << 4,
+	FLAG_SPU_LOOP_END         = 1 << 5,
 	FLAG_SPU_NO_LEADING_DUMMY = 1 << 6,
-	FLAG_BS_IGNORE_ASPECT = 1 << 7,
-	FLAG_STR_TRAILING_AUDIO = 1 << 8
+	FLAG_BS_IGNORE_ASPECT     = 1 << 7,
+	FLAG_STR_TRAILING_AUDIO   = 1 << 8
 };
 
 typedef enum {
diff --git a/psxavenc/decoding.h b/psxavenc/decoding.h
index ccf0b65..311cb69 100644
--- a/psxavenc/decoding.h
+++ b/psxavenc/decoding.h
@@ -67,8 +67,8 @@ typedef struct {
 } decoder_t;
 
 enum {
-	DECODER_USE_AUDIO = 1 << 0,
-	DECODER_USE_VIDEO = 1 << 1,
+	DECODER_USE_AUDIO      = 1 << 0,
+	DECODER_USE_VIDEO      = 1 << 1,
 	DECODER_AUDIO_REQUIRED = 1 << 2,
 	DECODER_VIDEO_REQUIRED = 1 << 3
 };
diff --git a/psxavenc/filefmt.c b/psxavenc/filefmt.c
index 7e508f5..e5d930b 100644
--- a/psxavenc/filefmt.c
+++ b/psxavenc/filefmt.c
@@ -136,7 +136,10 @@ static void write_vag_header(const args_t *args, int size_per_channel, uint8_t *
 	strncpy((char*)(header + 0x20), &args->output_file[name_offset], 16);
 }
 
-void encode_file_xa(args_t *args, decoder_t *decoder, FILE *output) {
+// The functions below are some peak spaghetti code I would rewrite if that
+// didn't also require scrapping the rest of the codebase. -- spicyjpeg
+
+void encode_file_xa(const args_t *args, decoder_t *decoder, FILE *output) {
 	psx_audio_xa_settings_t xa_settings = args_to_libpsxav_xa_audio(args);
 
 	int audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings);
@@ -187,26 +190,37 @@ void encode_file_xa(args_t *args, decoder_t *decoder, FILE *output) {
 	}
 }
 
-void encode_file_spu(args_t *args, decoder_t *decoder, FILE *output) {
+void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output) {
 	psx_audio_encoder_channel_state_t audio_state;
 	memset(&audio_state, 0, sizeof(psx_audio_encoder_channel_state_t));
 
-	int audio_samples_per_block = psx_audio_spu_get_samples_per_block();
-	int block_size = psx_audio_spu_get_buffer_size_per_block();
-	int block_count;
-
 	// The header must be written after the data as we don't yet know the
 	// number of audio samples.
 	if (args->format == FORMAT_VAG)
 		fseek(output, VAG_HEADER_SIZE, SEEK_SET);
 
-	for (block_count = 0; ensure_av_data(decoder, audio_samples_per_block, 0); block_count++) {
+	uint8_t buffer[PSX_AUDIO_SPU_BLOCK_SIZE];
+	int block_count = 0;
+
+	if (!(args->flags & FLAG_SPU_NO_LEADING_DUMMY)) {
+		// Insert leading silent block
+		memset(buffer, 0, PSX_AUDIO_SPU_BLOCK_SIZE);
+
+		fwrite(buffer, PSX_AUDIO_SPU_BLOCK_SIZE, 1, output);
+		block_count++;
+	}
+
+	int loop_start_block = -1;
+
+	if (args->audio_loop_point >= 0)
+		loop_start_block = (args->audio_loop_point * args->audio_frequency) / (PSX_AUDIO_SPU_SAMPLES_PER_BLOCK * 1000);
+
+	for (; ensure_av_data(decoder, PSX_AUDIO_SPU_SAMPLES_PER_BLOCK, 0); block_count++) {
 		int samples_length = decoder->audio_sample_count;
 
-		if (samples_length > audio_samples_per_block)
-			samples_length = audio_samples_per_block;
+		if (samples_length > PSX_AUDIO_SPU_SAMPLES_PER_BLOCK)
+			samples_length = PSX_AUDIO_SPU_SAMPLES_PER_BLOCK;
 
-		uint8_t buffer[16];
 		int length = psx_audio_spu_encode(
 			&audio_state,
 			decoder->audio_samples,
@@ -215,15 +229,10 @@ void encode_file_spu(args_t *args, decoder_t *decoder, FILE *output) {
 			buffer
 		);
 
-		// TODO: implement proper loop flag support
-		if (false)
+		if (block_count == loop_start_block)
 			buffer[1] |= PSX_AUDIO_SPU_LOOP_START;
-		if (decoder->end_of_input) {
-			if (args->flags & FLAG_SPU_LOOP_END)
-				buffer[1] |= PSX_AUDIO_SPU_LOOP_REPEAT;
-			else
-			 	buffer[1] |= PSX_AUDIO_SPU_LOOP_END;
-		}
+		if ((args->flags & FLAG_SPU_LOOP_END) && decoder->end_of_input)
+			buffer[1] |= PSX_AUDIO_SPU_LOOP_REPEAT;
 
 		retire_av_data(decoder, samples_length, 0);
 		fwrite(buffer, length, 1, output);
@@ -235,12 +244,21 @@ void encode_file_spu(args_t *args, decoder_t *decoder, FILE *output) {
 				stderr,
 				"\rBlock: %6d | Encoding speed: %5.2fx",
 				block_count,
-				(double)(block_count * audio_samples_per_block) / (double)(args->audio_frequency * t)
+				(double)(block_count * PSX_AUDIO_SPU_SAMPLES_PER_BLOCK) / (double)(args->audio_frequency * t)
 			);
 		}
 	}
 
-	int overflow = (block_count * block_size) % args->alignment;
+	if (!(args->flags & FLAG_SPU_LOOP_END)) {
+		// Insert trailing looping block
+		memset(buffer, 0, PSX_AUDIO_SPU_BLOCK_SIZE);
+		buffer[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END;
+
+		fwrite(buffer, PSX_AUDIO_SPU_BLOCK_SIZE, 1, output);
+		block_count++;
+	}
+
+	int overflow = (block_count * PSX_AUDIO_SPU_BLOCK_SIZE) % args->alignment;
 
 	if (overflow) {
 		for (int i = 0; i < (args->alignment - overflow); i++)
@@ -248,15 +266,15 @@ void encode_file_spu(args_t *args, decoder_t *decoder, FILE *output) {
 	}
 	if (args->format == FORMAT_VAG) {
 		uint8_t header[VAG_HEADER_SIZE];
-		write_vag_header(args, block_count * block_size, header);
+		write_vag_header(args, block_count * PSX_AUDIO_SPU_BLOCK_SIZE, header);
 
 		fseek(output, 0, SEEK_SET);
 		fwrite(header, VAG_HEADER_SIZE, 1, output);
 	}
 }
 
-void encode_file_spui(args_t *args, decoder_t *decoder, FILE *output) {
-	int audio_state_size = sizeof(psx_audio_encoder_channel_state_t) * args->audio_channels;
+void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) {
+	int audio_samples_per_chunk = args->audio_interleave / PSX_AUDIO_SPU_BLOCK_SIZE * PSX_AUDIO_SPU_SAMPLES_PER_BLOCK;
 
 	// NOTE: since the interleaved .vag format is not standardized, some tools
 	// (such as vgmstream) will not properly play files with interleave < 2048,
@@ -267,38 +285,52 @@ void encode_file_spui(args_t *args, decoder_t *decoder, FILE *output) {
 	int header_size = VAG_HEADER_SIZE + args->alignment - 1;
 	header_size -= header_size % args->alignment;
 
-	int audio_samples_per_block = psx_audio_spu_get_samples_per_block();
-	int block_size = psx_audio_spu_get_buffer_size_per_block();
-	int audio_samples_per_chunk = args->audio_interleave / block_size * audio_samples_per_block;
-	int chunk_count;
-
 	if (args->format == FORMAT_VAGI)
 		fseek(output, header_size, SEEK_SET);
 
+	int audio_state_size = sizeof(psx_audio_encoder_channel_state_t) * args->audio_channels;
 	psx_audio_encoder_channel_state_t *audio_state = malloc(audio_state_size);
-	uint8_t *buffer = malloc(buffer_size);
 	memset(audio_state, 0, audio_state_size);
 
-	for (chunk_count = 0; ensure_av_data(decoder, audio_samples_per_chunk * args->audio_channels, 0); chunk_count++) {
+	uint8_t *buffer = malloc(buffer_size);
+	int chunk_count = 0;
+
+	for (; ensure_av_data(decoder, audio_samples_per_chunk * args->audio_channels, 0); chunk_count++) {
 		int samples_length = decoder->audio_sample_count / args->audio_channels;
-		if (samples_length > audio_samples_per_chunk) samples_length = audio_samples_per_chunk;
+		int buffer_offset = 0;
+
+		if (samples_length > audio_samples_per_chunk)
+			samples_length = audio_samples_per_chunk;
+
+		// Insert leading silent block
+		if (chunk_count == 0 && !(args->flags & FLAG_SPU_NO_LEADING_DUMMY)) {
+			buffer_offset = PSX_AUDIO_SPU_BLOCK_SIZE;
+			samples_length -= PSX_AUDIO_SPU_BLOCK_SIZE;
+		}
 
 		for (int ch = 0; ch < args->audio_channels; ch++) {
 			memset(buffer, 0, buffer_size);
+
 			int length = psx_audio_spu_encode(
 				audio_state + ch,
 				decoder->audio_samples + ch,
 				samples_length,
 				args->audio_channels,
-				buffer
+				buffer + buffer_offset
 			);
 
-			if (length) {
-				// TODO: implement proper loop flag support
-				if (args->flags & FLAG_SPU_LOOP_END)
-					buffer[length - block_size + 1] |= PSX_AUDIO_SPU_LOOP_REPEAT;
-				else if (decoder->end_of_input)
-					buffer[length - block_size + 1] |= PSX_AUDIO_SPU_LOOP_END;
+			if (length > 0) {
+				uint8_t *last_block = buffer + length - PSX_AUDIO_SPU_BLOCK_SIZE;
+
+				if (args->flags & FLAG_SPU_LOOP_END) {
+					last_block[1] = PSX_AUDIO_SPU_LOOP_REPEAT;
+				} else if (decoder->end_of_input) {
+					// HACK: the trailing block should in theory be appended to
+					// the existing data, but it's easier to just zerofill and
+					// repurpose the last encoded block
+					memset(last_block, 0, PSX_AUDIO_SPU_BLOCK_SIZE);
+					last_block[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END;
+				}
 			}
 
 			fwrite(buffer, buffer_size, 1, output);
@@ -332,10 +364,9 @@ void encode_file_spui(args_t *args, decoder_t *decoder, FILE *output) {
 	free(buffer);
 }
 
-void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) {
+void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) {
 	psx_audio_xa_settings_t xa_settings = args_to_libpsxav_xa_audio(args);
 	int audio_samples_per_sector;
-	uint8_t buffer[PSX_CDROM_SECTOR_SIZE];
 
 	int offset, sector_size;
 
@@ -349,7 +380,8 @@ void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) {
 
 	int interleave;
 	int video_sectors_per_block;
-	if (decoder->state.audio_stream) {
+
+	if (decoder->state.audio_stream != NULL) {
 		// 1/N audio, (N-1)/N video
 		audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings);
 		interleave = psx_audio_xa_get_sector_interleave(xa_settings) * args->str_cd_speed;
@@ -399,16 +431,24 @@ void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) {
 	for (int j = 0; !decoder->end_of_input || encoder.state.frame_data_offset < encoder.state.frame_max_size; j++) {
 		ensure_av_data(decoder, audio_samples_per_sector * args->audio_channels, frames_needed);
 
-		if ((j%interleave) < video_sectors_per_block) {
-			// Video sector
+		uint8_t buffer[PSX_CDROM_SECTOR_SIZE];
+		bool is_video_sector;
+
+		if (args->flags & FLAG_STR_TRAILING_AUDIO)
+			is_video_sector = (j % interleave) < video_sectors_per_block;
+		else
+			is_video_sector = (j % interleave) > 0;
+
+		if (is_video_sector) {
 			init_sector_buffer_video(args, (psx_cdrom_sector_mode2_t*) buffer, j);
 
 			int frames_used = encode_sector_str(&encoder, args->format, decoder->video_frames, buffer);
 			retire_av_data(decoder, 0, frames_used);
 		} else {
-			// Audio sector
 			int samples_length = decoder->audio_sample_count / args->audio_channels;
-			if (samples_length > audio_samples_per_sector) samples_length = audio_samples_per_sector;
+
+			if (samples_length > audio_samples_per_sector)
+				samples_length = audio_samples_per_sector;
 
 			// FIXME: this is an extremely hacky way to handle audio tracks
 			// shorter than the video track
@@ -438,7 +478,7 @@ void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) {
 			buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4);
 		}
 
-		if((j%interleave) < video_sectors_per_block)
+		if (is_video_sector)
 			psx_cdrom_calculate_checksums((psx_cdrom_sector_t *)buffer, PSX_CDROM_SECTOR_TYPE_MODE2_FORM1);
 
 		fwrite(buffer + offset, sector_size, 1, output);
@@ -461,7 +501,11 @@ void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) {
 	destroy_mdec_encoder(&encoder);
 }
 
-void encode_file_sbs(args_t *args, decoder_t *decoder, FILE *output) {
+void encode_file_strspu(const args_t *args, decoder_t *decoder, FILE *output) {
+	// TODO: implement
+}
+
+void encode_file_sbs(const args_t *args, decoder_t *decoder, FILE *output) {
 	mdec_encoder_t encoder;
 	init_mdec_encoder(&encoder, args->video_codec, args->video_width, args->video_height);
 
diff --git a/psxavenc/filefmt.h b/psxavenc/filefmt.h
index 5f8eb38..9276160 100644
--- a/psxavenc/filefmt.h
+++ b/psxavenc/filefmt.h
@@ -32,4 +32,5 @@ void encode_file_xa(const args_t *args, decoder_t *decoder, FILE *output);
 void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output);
 void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output);
 void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output);
+void encode_file_strspu(const args_t *args, decoder_t *decoder, FILE *output);
 void encode_file_sbs(const args_t *args, decoder_t *decoder, FILE *output);
diff --git a/psxavenc/main.c b/psxavenc/main.c
index 78c0935..277aa26 100644
--- a/psxavenc/main.c
+++ b/psxavenc/main.c
@@ -120,7 +120,6 @@ int main(int argc, const char **argv) {
 
 		case FORMAT_STR:
 		case FORMAT_STRCD:
-		case FORMAT_STRSPU:
 		case FORMAT_STRV:
 			if (!(args.flags & FLAG_QUIET)) {
 				if (decoder.state.audio_stream)
@@ -147,6 +146,30 @@ int main(int argc, const char **argv) {
 			encode_file_str(&args, &decoder, output);
 			break;
 
+		case FORMAT_STRSPU:
+			if (!(args.flags & FLAG_QUIET)) {
+				if (decoder.state.audio_stream)
+					fprintf(
+						stderr,
+						"Audio format: SPU-ADPCM, %d Hz %d channels, interleave=%d\n",
+						args.audio_frequency,
+						args.audio_channels,
+						args.audio_interleave
+					);
+
+				fprintf(
+					stderr,
+					"Video format: %s, %dx%d, %.2f fps\n",
+					bs_codec_names[args.video_codec],
+					args.video_width,
+					args.video_height,
+					(double)args.str_fps_num / (double)args.str_fps_den
+				);
+			}
+
+			encode_file_strspu(&args, &decoder, output);
+			break;
+
 		case FORMAT_SBS:
 			if (!(args.flags & FLAG_QUIET))
 				fprintf(
diff --git a/psxavenc/mdec.c b/psxavenc/mdec.c
index 0d945c3..2221764 100644
--- a/psxavenc/mdec.c
+++ b/psxavenc/mdec.c
@@ -23,6 +23,7 @@ freely, subject to the following restrictions:
 */
 
 #include <assert.h>
+#include <math.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdlib.h>
@@ -236,14 +237,16 @@ static const uint8_t dct_zagzig_table[8*8] = {
 };
 
 #if 0
-#define SF0 0x5a82 // cos(0/16 * pi) * sqrt(2)
-#define SF1 0x7d8a // cos(1/16 * pi) * 2
-#define SF2 0x7641 // cos(2/16 * pi) * 2
-#define SF3 0x6a6d // cos(3/16 * pi) * 2
-#define SF4 0x5a82 // cos(4/16 * pi) * 2
-#define SF5 0x471c // cos(5/16 * pi) * 2
-#define SF6 0x30fb // cos(6/16 * pi) * 2
-#define SF7 0x18f8 // cos(7/16 * pi) * 2
+enum {
+	SF0 = 0x5a82, // cos(0/16 * pi) * sqrt(2)
+	SF1 = 0x7d8a, // cos(1/16 * pi) * 2
+	SF2 = 0x7641, // cos(2/16 * pi) * 2
+	SF3 = 0x6a6d, // cos(3/16 * pi) * 2
+	SF4 = 0x5a82, // cos(4/16 * pi) * 2
+	SF5 = 0x471c, // cos(5/16 * pi) * 2
+	SF6 = 0x30fb, // cos(6/16 * pi) * 2
+	SF7 = 0x18f8  // cos(7/16 * pi) * 2
+};
 
 static const int16_t dct_scale_table[8*8] = {
 	SF0,  SF0,  SF0,  SF0,  SF0,  SF0,  SF0,  SF0,
@@ -525,7 +528,9 @@ bool init_mdec_encoder(mdec_encoder_t *encoder, bs_codec_t video_codec, int vide
 	if (
 		state->dct_context == NULL ||
 		state->ac_huffman_map == NULL ||
-		state->coeff_clamp_map == NULL
+		state->dc_huffman_map == NULL ||
+		state->coeff_clamp_map == NULL ||
+		state->delta_clamp_map == NULL
 	)
 		return false;
 
@@ -536,7 +541,7 @@ bool init_mdec_encoder(mdec_encoder_t *encoder, bs_codec_t video_codec, int vide
 	for (int i = 0; i < 6; i++) {
 		state->dct_block_lists[i] = malloc(dct_block_size);
 
-		if (!state->dct_block_lists[i])
+		if (state->dct_block_lists[i] == NULL)
 			return false;
 	}
 

From 24d37145c60d6a4ffad152c28ff2bebc4730656a Mon Sep 17 00:00:00 2001
From: spicyjpeg <thatspicyjpeg@gmail.com>
Date: Sun, 2 Mar 2025 20:15:06 +0100
Subject: [PATCH 6/8] Bugfixes, add -T and -A options

---
 psxavenc/args.c     |  33 ++++---
 psxavenc/args.h     |   2 +
 psxavenc/decoding.c | 171 +++++++++++++++++++------------------
 psxavenc/filefmt.c  | 102 +++++++++++-----------
 psxavenc/main.c     |   2 +-
 psxavenc/mdec.c     | 203 ++++++++++++++++++++++----------------------
 psxavenc/mdec.h     |   9 +-
 7 files changed, 271 insertions(+), 251 deletions(-)

diff --git a/psxavenc/args.c b/psxavenc/args.c
index 8c92346..fb74a1f 100644
--- a/psxavenc/args.c
+++ b/psxavenc/args.c
@@ -123,9 +123,9 @@ static const char *const general_options_help =
 	"                        spui:   [A.] raw SPU-ADPCM interleaved data\n"
 	"                        vag:    [A.] .vag SPU-ADPCM mono\n"
 	"                        vagi:   [A.] .vag SPU-ADPCM interleaved\n"
-	"                        str:    [AV] .str video, 2336-byte sectors\n"
-	"                        strcd:  [AV] .str video, 2352-byte sectors\n"
-	"                        strspu: [AV] .str video, 2048-byte sectors\n"
+	"                        str:    [AV] .str video + XA-ADPCM, 2336-byte sectors\n"
+	"                        strcd:  [AV] .str video + XA-ADPCM, 2352-byte sectors\n"
+	"                        strspu: [AV] .str video + SPU-ADPCM, 2048-byte sectors\n"
 	"                        strv:   [.V] .str video, 2048-byte sectors\n"
 	"                        sbs:    [.V] .sbs video\n"
 	"    -R key=value,...  Pass custom options to libswresample (see FFmpeg docs)\n"
@@ -148,12 +148,15 @@ static const char *const format_names[NUM_FORMATS] = {
 
 static void init_default_args(args_t *args) {
 	if (
-		args->format == FORMAT_XA || args->format == FORMAT_XACD ||
-		args->format == FORMAT_STR || args->format == FORMAT_STRCD
+		args->format == FORMAT_XA ||
+		args->format == FORMAT_XACD ||
+		args->format == FORMAT_STR ||
+		args->format == FORMAT_STRCD
 	)
 		args->audio_frequency = 37800;
 	else
 		args->audio_frequency = 44100;
+
 	if (args->format == FORMAT_SPU || args->format == FORMAT_VAG)
 		args->audio_channels = 1;
 	else
@@ -172,11 +175,13 @@ static void init_default_args(args_t *args) {
 	args->str_fps_num = 15;
 	args->str_fps_den = 1;
 	args->str_cd_speed = 2;
+	args->str_video_id = 0x8001;
+	args->str_audio_id = 0x0001;
 
 	if (args->format == FORMAT_SPU || args->format == FORMAT_VAG)
-		args->alignment = 64;
+		args->alignment = 64; // Default SPU DMA chunk size
 	else if (args->format == FORMAT_SBS)
-		args->alignment = 8192;
+		args->alignment = 8192; // Default for System 573 games
 	else
 		args->alignment = 2048;
 }
@@ -264,7 +269,7 @@ static int parse_xa_option(args_t *args, char option, const char *param) {
 }
 
 static const char *const spu_options_help =
-	"SPU-ADPCM options:\n"
+	"Mono SPU-ADPCM options:\n"
 	"    [-f freq] [-a size] [-l ms | -L] [-D]\n"
 	"\n"
 	"    -f freq           Use specified sample rate (default 44100)\n"
@@ -411,11 +416,13 @@ static int parse_bs_option(args_t *args, char option, const char *param) {
 
 static const char *const str_options_help =
 	".str container options:\n"
-	"    [-r num[/den]] [-x 1|2] [-A]\n"
+	"    [-r num[/den]] [-x 1|2] [-T id] [-A id] [-X]\n"
 	"\n"
 	"    -r num[/den]      Set video frame rate to specified integer or fraction (default 15)\n"
 	"    -x 1|2            Set CD-ROM speed the file is meant to played at (default 2)\n"
-	"    -A                Place audio sectors after corresponding video sectors\n"
+	"    -T id             Tag video sectors with specified .str type ID (default 0x8001)\n"
+	"    -A id             Tag SPU-ADPCM sectors with specified .str type ID (default 0x0001)\n"
+	"    -X                Place audio sectors after corresponding video sectors\n"
 	"                      (rather than ahead of them)\n"
 	"\n";
 
@@ -453,7 +460,13 @@ static int parse_str_option(args_t *args, char option, const char *param) {
 		case 'x':
 			return parse_int_one_of(&(args->str_cd_speed), "CD-ROM speed", param, 1, 2);
 
+		case 'T':
+			return parse_int(&(args->str_video_id), "video track type ID", param, 0x0000, 0xFFFF);
+
 		case 'A':
+			return parse_int(&(args->str_audio_id), "audio track type ID", param, 0x0000, 0xFFFF);
+
+		case 'X':
 			args->flags |= FLAG_STR_TRAILING_AUDIO;
 			return 1;
 
diff --git a/psxavenc/args.h b/psxavenc/args.h
index f0fab88..d313646 100644
--- a/psxavenc/args.h
+++ b/psxavenc/args.h
@@ -87,6 +87,8 @@ typedef struct {
 	int str_fps_num;
 	int str_fps_den;
 	int str_cd_speed; // 1 or 2
+	int str_video_id;
+	int str_audio_id;
 	int alignment;
 } args_t;
 
diff --git a/psxavenc/decoding.c b/psxavenc/decoding.c
index a29e90a..a9cec89 100644
--- a/psxavenc/decoding.c
+++ b/psxavenc/decoding.c
@@ -36,27 +36,22 @@ freely, subject to the following restrictions:
 #include "args.h"
 #include "decoding.h"
 
-static int decode_frame(
-	AVCodecContext *codec,
-	AVFrame *frame,
-	int *frame_size,
-	AVPacket *packet
-) {
+static bool decode_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, AVPacket *packet) {
 	if (packet != NULL) {
 		if (avcodec_send_packet(codec, packet) != 0)
-			return 0;
+			return false;
 	}
 
 	int ret = avcodec_receive_frame(codec, frame);
 
 	if (ret >= 0) {
 		*frame_size = ret;
-		return 1;
-	} else if (ret == AVERROR(EAGAIN)) {
-		return 1;
-	} else {
-		return 0;
+		return true;
 	}
+	if (ret == AVERROR(EAGAIN))
+		return true;
+
+	return false;
 }
 
 bool open_av_data(decoder_t *decoder, const args_t *args, int flags) {
@@ -261,35 +256,39 @@ bool open_av_data(decoder_t *decoder, const args_t *args, int flags) {
 static void poll_av_packet_audio(decoder_t *decoder, AVPacket *packet) {
 	decoder_state_t *av = &(decoder->state);
 
-	int frame_size, frame_sample_count;
-	uint8_t *buffer[1];
+	int frame_size;
 
-	if (decode_frame(av->audio_codec_context, av->frame, &frame_size, packet)) {
-		size_t buffer_size = sizeof(int16_t) * av->sample_count_mul * swr_get_out_samples(av->resampler, av->frame->nb_samples);
+	if (!decode_frame(av->audio_codec_context, av->frame, &frame_size, packet))
+		return;
 
-		buffer[0] = malloc(buffer_size);
-		memset(buffer[0], 0, buffer_size);
+	int frame_sample_count = swr_get_out_samples(av->resampler, av->frame->nb_samples);
 
-		frame_sample_count = swr_convert(
-			av->resampler,
-			buffer,
-			av->frame->nb_samples,
-			(const uint8_t**)av->frame->data,
-			av->frame->nb_samples
-		);
+	if (frame_sample_count == 0)
+		return;
 
-		decoder->audio_samples = realloc(
-			decoder->audio_samples,
-			(decoder->audio_sample_count + ((frame_sample_count + 4032) * av->sample_count_mul)) * sizeof(int16_t)
-		);
-		memmove(
-			&(decoder->audio_samples[decoder->audio_sample_count]),
-			buffer[0],
-			sizeof(int16_t) * frame_sample_count * av->sample_count_mul
-		);
-		decoder->audio_sample_count += frame_sample_count * av->sample_count_mul;
-		free(buffer[0]);
-	}
+	size_t buffer_size = sizeof(int16_t) * av->sample_count_mul * frame_sample_count;
+	uint8_t *buffer = malloc(buffer_size);
+	memset(buffer, 0, buffer_size);
+
+	frame_sample_count = swr_convert(
+		av->resampler,
+		&buffer,
+		frame_sample_count,
+		(const uint8_t**)av->frame->data,
+		av->frame->nb_samples
+	);
+
+	decoder->audio_samples = realloc(
+		decoder->audio_samples,
+		(decoder->audio_sample_count + ((frame_sample_count + 4032) * av->sample_count_mul)) * sizeof(int16_t)
+	);
+	memmove(
+		&(decoder->audio_samples[decoder->audio_sample_count]),
+		buffer,
+		sizeof(int16_t) * frame_sample_count * av->sample_count_mul
+	);
+	decoder->audio_sample_count += frame_sample_count * av->sample_count_mul;
+	free(buffer);
 }
 
 static void poll_av_packet_video(decoder_t *decoder, AVPacket *packet) {
@@ -303,63 +302,63 @@ static void poll_av_packet_video(decoder_t *decoder, AVPacket *packet) {
 		decoder->video_width, decoder->video_width
 	};
 
-	if (decode_frame(av->video_codec_context, av->frame, &frame_size, packet)) {
-		if (!av->frame->width || !av->frame->height || !av->frame->data[0])
-			return;
+	if (!decode_frame(av->video_codec_context, av->frame, &frame_size, packet))
+		return;
+	if (!av->frame->width || !av->frame->height || !av->frame->data[0])
+		return;
 
-		// Some files seem to have timestamps starting from a negative value
-		// (but otherwise valid) for whatever reason.
-		double pts =
-			((double)av->frame->pts * (double)av->video_stream->time_base.num)
-			/ av->video_stream->time_base.den;
+	// Some files seem to have timestamps starting from a negative value
+	// (but otherwise valid) for whatever reason.
+	double pts =
+		((double)av->frame->pts * (double)av->video_stream->time_base.num)
+		/ av->video_stream->time_base.den;
 #if 0
-		if (pts < 0.0)
-			return;
+	if (pts < 0.0)
+		return;
 #endif
-		if (decoder->video_frame_count >= 1 && pts < av->video_next_pts)
-			return;
-		if (decoder->video_frame_count < 1)
-			av->video_next_pts = pts;
-		else
-			av->video_next_pts += pts_step;
+	if (decoder->video_frame_count >= 1 && pts < av->video_next_pts)
+		return;
+	if (decoder->video_frame_count < 1)
+		av->video_next_pts = pts;
+	else
+		av->video_next_pts += pts_step;
 
-		//fprintf(stderr, "%d %f %f %f\n", decoder->video_frame_count, pts, av->video_next_pts, pts_step);
+	//fprintf(stderr, "%d %f %f %f\n", decoder->video_frame_count, pts, av->video_next_pts, pts_step);
 
-		// Insert duplicate frames if the frame rate of the input stream is
-		// lower than the target frame rate.
-		int dupe_frames = (int) ceil((pts - av->video_next_pts) / pts_step);
-		if (dupe_frames < 0) dupe_frames = 0;
-		decoder->video_frames = realloc(
-			decoder->video_frames,
-			(decoder->video_frame_count + dupe_frames + 1) * av->video_frame_dst_size
+	// Insert duplicate frames if the frame rate of the input stream is
+	// lower than the target frame rate.
+	int dupe_frames = (int) ceil((pts - av->video_next_pts) / pts_step);
+	if (dupe_frames < 0) dupe_frames = 0;
+	decoder->video_frames = realloc(
+		decoder->video_frames,
+		(decoder->video_frame_count + dupe_frames + 1) * av->video_frame_dst_size
+	);
+
+	for (; dupe_frames; dupe_frames--) {
+		memcpy(
+			(decoder->video_frames) + av->video_frame_dst_size * decoder->video_frame_count,
+			(decoder->video_frames) + av->video_frame_dst_size * (decoder->video_frame_count - 1),
+			av->video_frame_dst_size
 		);
-
-		for (; dupe_frames; dupe_frames--) {
-			memcpy(
-				(decoder->video_frames) + av->video_frame_dst_size * decoder->video_frame_count,
-				(decoder->video_frames) + av->video_frame_dst_size * (decoder->video_frame_count - 1),
-				av->video_frame_dst_size
-			);
-			decoder->video_frame_count += 1;
-			av->video_next_pts += pts_step;
-		}
-
-		uint8_t *dst_frame = decoder->video_frames + av->video_frame_dst_size * decoder->video_frame_count;
-		uint8_t *dst_pointers[2] = {
-			dst_frame, dst_frame + plane_size
-		};
-		sws_scale(
-			av->scaler,
-			(const uint8_t *const *) av->frame->data,
-			av->frame->linesize,
-			0,
-			av->frame->height,
-			dst_pointers,
-			dst_strides
-		);
-
 		decoder->video_frame_count += 1;
+		av->video_next_pts += pts_step;
 	}
+
+	uint8_t *dst_frame = decoder->video_frames + av->video_frame_dst_size * decoder->video_frame_count;
+	uint8_t *dst_pointers[2] = {
+		dst_frame, dst_frame + plane_size
+	};
+	sws_scale(
+		av->scaler,
+		(const uint8_t *const *) av->frame->data,
+		av->frame->linesize,
+		0,
+		av->frame->height,
+		dst_pointers,
+		dst_strides
+	);
+
+	decoder->video_frame_count += 1;
 }
 
 bool poll_av_data(decoder_t *decoder) {
diff --git a/psxavenc/filefmt.c b/psxavenc/filefmt.c
index e5d930b..3f6ce45 100644
--- a/psxavenc/filefmt.c
+++ b/psxavenc/filefmt.c
@@ -104,15 +104,15 @@ static void write_vag_header(const args_t *args, int size_per_channel, uint8_t *
 	if (args->format == FORMAT_VAGI) {
 		header[0x08] = (uint8_t)args->audio_interleave;
 		header[0x09] = (uint8_t)(args->audio_interleave >> 8);
-		header[0x0a] = (uint8_t)(args->audio_interleave >> 16);
-		header[0x0b] = (uint8_t)(args->audio_interleave >> 24);
+		header[0x0A] = (uint8_t)(args->audio_interleave >> 16);
+		header[0x0B] = (uint8_t)(args->audio_interleave >> 24);
 	}
 
 	// Length of data for each channel (big-endian)
-	header[0x0c] = (uint8_t)(size_per_channel >> 24);
-	header[0x0d] = (uint8_t)(size_per_channel >> 16);
-	header[0x0e] = (uint8_t)(size_per_channel >> 8);
-	header[0x0f] = (uint8_t)size_per_channel;
+	header[0x0C] = (uint8_t)(size_per_channel >> 24);
+	header[0x0D] = (uint8_t)(size_per_channel >> 16);
+	header[0x0E] = (uint8_t)(size_per_channel >> 8);
+	header[0x0F] = (uint8_t)size_per_channel;
 
 	// Sample rate (big-endian)
 	header[0x10] = (uint8_t)(args->audio_frequency >> 24);
@@ -121,8 +121,8 @@ static void write_vag_header(const args_t *args, int size_per_channel, uint8_t *
 	header[0x13] = (uint8_t)args->audio_frequency;
 
 	// Number of channels (little-endian)
-	header[0x1e] = (uint8_t)args->audio_channels;
-	header[0x1f] = 0x00;
+	header[0x1E] = (uint8_t)args->audio_channels;
+	header[0x1F] = 0x00;
 
 	// Filename
 	int name_offset = strlen(args->output_file);
@@ -213,7 +213,7 @@ void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output) {
 	int loop_start_block = -1;
 
 	if (args->audio_loop_point >= 0)
-		loop_start_block = (args->audio_loop_point * args->audio_frequency) / (PSX_AUDIO_SPU_SAMPLES_PER_BLOCK * 1000);
+		loop_start_block = block_count + (args->audio_loop_point * args->audio_frequency) / (PSX_AUDIO_SPU_SAMPLES_PER_BLOCK * 1000);
 
 	for (; ensure_av_data(decoder, PSX_AUDIO_SPU_SAMPLES_PER_BLOCK, 0); block_count++) {
 		int samples_length = decoder->audio_sample_count;
@@ -279,7 +279,7 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) {
 	// NOTE: since the interleaved .vag format is not standardized, some tools
 	// (such as vgmstream) will not properly play files with interleave < 2048,
 	// alignment != 2048 or channels != 2.
-	int buffer_size = args->audio_interleave + args->alignment - 1;
+	int buffer_size = args->audio_interleave * args->audio_channels + args->alignment - 1;
 	buffer_size -= buffer_size % args->alignment;
 
 	int header_size = VAG_HEADER_SIZE + args->alignment - 1;
@@ -297,30 +297,30 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) {
 
 	for (; ensure_av_data(decoder, audio_samples_per_chunk * args->audio_channels, 0); chunk_count++) {
 		int samples_length = decoder->audio_sample_count / args->audio_channels;
-		int buffer_offset = 0;
 
 		if (samples_length > audio_samples_per_chunk)
 			samples_length = audio_samples_per_chunk;
 
+		memset(buffer, 0, buffer_size);
+		uint8_t *buffer_ptr = buffer;
+
 		// Insert leading silent block
 		if (chunk_count == 0 && !(args->flags & FLAG_SPU_NO_LEADING_DUMMY)) {
-			buffer_offset = PSX_AUDIO_SPU_BLOCK_SIZE;
-			samples_length -= PSX_AUDIO_SPU_BLOCK_SIZE;
+			buffer_ptr += PSX_AUDIO_SPU_BLOCK_SIZE;
+			samples_length -= PSX_AUDIO_SPU_SAMPLES_PER_BLOCK;
 		}
 
-		for (int ch = 0; ch < args->audio_channels; ch++) {
-			memset(buffer, 0, buffer_size);
-
+		for (int ch = 0; ch < args->audio_channels; ch++, buffer_ptr += args->audio_interleave) {
 			int length = psx_audio_spu_encode(
 				audio_state + ch,
 				decoder->audio_samples + ch,
 				samples_length,
 				args->audio_channels,
-				buffer + buffer_offset
+				buffer_ptr
 			);
 
 			if (length > 0) {
-				uint8_t *last_block = buffer + length - PSX_AUDIO_SPU_BLOCK_SIZE;
+				uint8_t *last_block = buffer_ptr + length - PSX_AUDIO_SPU_BLOCK_SIZE;
 
 				if (args->flags & FLAG_SPU_LOOP_END) {
 					last_block[1] = PSX_AUDIO_SPU_LOOP_REPEAT;
@@ -332,24 +332,27 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) {
 					last_block[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END;
 				}
 			}
-
-			fwrite(buffer, buffer_size, 1, output);
-
-			time_t t = get_elapsed_time();
-
-			if (!(args->flags & FLAG_HIDE_PROGRESS) && t) {
-				fprintf(
-					stderr,
-					"\rChunk: %6d | Encoding speed: %5.2fx",
-					chunk_count,
-					(double)(chunk_count * audio_samples_per_chunk) / (double)(args->audio_frequency * t)
-				);
-			}
 		}
 
 		retire_av_data(decoder, samples_length * args->audio_channels, 0);
+		fwrite(buffer, buffer_size, 1, output);
+
+		time_t t = get_elapsed_time();
+
+		if (!(args->flags & FLAG_HIDE_PROGRESS) && t) {
+			fprintf(
+				stderr,
+				"\rChunk: %6d | Encoding speed: %5.2fx",
+				chunk_count,
+				(double)(chunk_count * audio_samples_per_chunk) / (double)(args->audio_frequency * t)
+			);
+		}
+
 	}
 
+	free(audio_state);
+	free(buffer);
+
 	if (args->format == FORMAT_VAGI) {
 		uint8_t *header = malloc(header_size);
 		memset(header, 0, header_size);
@@ -359,32 +362,20 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) {
 		fwrite(header, header_size, 1, output);
 		free(header);
 	}
-
-	free(audio_state);
-	free(buffer);
 }
 
 void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) {
 	psx_audio_xa_settings_t xa_settings = args_to_libpsxav_xa_audio(args);
-	int audio_samples_per_sector;
-
-	int offset, sector_size;
-
-	if (args->format == FORMAT_STRV) {
-		sector_size = 2048;
-		offset = 0x18;
-	} else {
-		sector_size = psx_audio_xa_get_buffer_size_per_sector(xa_settings);
-		offset = PSX_CDROM_SECTOR_SIZE - sector_size;
-	}
+	int sector_size = psx_audio_xa_get_buffer_size_per_sector(xa_settings);
 
 	int interleave;
+	int audio_samples_per_sector;
 	int video_sectors_per_block;
 
 	if (decoder->state.audio_stream != NULL) {
 		// 1/N audio, (N-1)/N video
-		audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings);
 		interleave = psx_audio_xa_get_sector_interleave(xa_settings) * args->str_cd_speed;
+		audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings);
 		video_sectors_per_block = interleave - 1;
 
 		if (!(args->flags & FLAG_QUIET))
@@ -398,8 +389,8 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) {
 			);
 	} else {
 		// 0/1 audio, 1/1 video
-		audio_samples_per_sector = 0;
 		interleave = 1;
+		audio_samples_per_sector = 0;
 		video_sectors_per_block = 1;
 	}
 
@@ -426,7 +417,9 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) {
 
 	// FIXME: this needs an extra frame to prevent A/V desync
 	int frames_needed = (int) ceil((double)video_sectors_per_block / frame_size);
-	if (frames_needed < 2) frames_needed = 2;
+
+	if (frames_needed < 2)
+		frames_needed = 2;
 
 	for (int j = 0; !decoder->end_of_input || encoder.state.frame_data_offset < encoder.state.frame_max_size; j++) {
 		ensure_av_data(decoder, audio_samples_per_sector * args->audio_channels, frames_needed);
@@ -440,9 +433,16 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) {
 			is_video_sector = (j % interleave) > 0;
 
 		if (is_video_sector) {
-			init_sector_buffer_video(args, (psx_cdrom_sector_mode2_t*) buffer, j);
+			init_sector_buffer_video(args, (psx_cdrom_sector_mode2_t*)buffer, j);
+
+			int frames_used = encode_sector_str(
+				&encoder,
+				args->format,
+				args->str_video_id,
+				decoder->video_frames,
+				buffer
+			);
 
-			int frames_used = encode_sector_str(&encoder, args->format, decoder->video_frames, buffer);
 			retire_av_data(decoder, 0, frames_used);
 		} else {
 			int samples_length = decoder->audio_sample_count / args->audio_channels;
@@ -481,7 +481,7 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) {
 		if (is_video_sector)
 			psx_cdrom_calculate_checksums((psx_cdrom_sector_t *)buffer, PSX_CDROM_SECTOR_TYPE_MODE2_FORM1);
 
-		fwrite(buffer + offset, sector_size, 1, output);
+		fwrite(buffer + PSX_CDROM_SECTOR_SIZE - sector_size, sector_size, 1, output);
 
 		time_t t = get_elapsed_time();
 
diff --git a/psxavenc/main.c b/psxavenc/main.c
index 277aa26..0f5e225 100644
--- a/psxavenc/main.c
+++ b/psxavenc/main.c
@@ -120,7 +120,6 @@ int main(int argc, const char **argv) {
 
 		case FORMAT_STR:
 		case FORMAT_STRCD:
-		case FORMAT_STRV:
 			if (!(args.flags & FLAG_QUIET)) {
 				if (decoder.state.audio_stream)
 					fprintf(
@@ -147,6 +146,7 @@ int main(int argc, const char **argv) {
 			break;
 
 		case FORMAT_STRSPU:
+		case FORMAT_STRV:
 			if (!(args.flags & FLAG_QUIET)) {
 				if (decoder.state.audio_stream)
 					fprintf(
diff --git a/psxavenc/mdec.c b/psxavenc/mdec.c
index 2221764..3587ce1 100644
--- a/psxavenc/mdec.c
+++ b/psxavenc/mdec.c
@@ -32,13 +32,6 @@ freely, subject to the following restrictions:
 #include "args.h"
 #include "mdec.h"
 
-// https://stackoverflow.com/a/60011209
-#if 0
-#define DIVIDE_ROUNDED(n, d) (((n) >= 0) ? (((n) + (d)/2) / (d)) : (((n) - (d)/2) / (d)))
-#else
-#define DIVIDE_ROUNDED(n, d) ((int)round((double)(n) / (double)(d)))
-#endif
-
 #define AC_PAIR(zeroes, value) \
 	(((zeroes) << 10) | ((+(value)) & 0x3FF)), \
 	(((zeroes) << 10) | ((-(value)) & 0x3FF))
@@ -166,39 +159,31 @@ static const struct {
 static const struct {
 	int c_bits;
 	uint32_t c_value;
-	int sign_bits;
-	int value_bits;
+	int dc_bits;
 } dc_c_huffman_tree[] = {
-	{2, 0x0,  0, 0},
-	{2, 0x1,  1, 0},
-	{2, 0x2,  1, 1},
-	{3, 0x6,  1, 2},
-	{4, 0xE,  1, 3},
-	{5, 0x1E, 1, 4},
-	{6, 0x3E, 1, 5},
-	{7, 0x7E, 1, 6},
-	{8, 0xFE, 1, 7},
+	{2, 0x1,  0},
+	{2, 0x2,  1},
+	{3, 0x6,  2},
+	{4, 0xE,  3},
+	{5, 0x1E, 4},
+	{6, 0x3E, 5},
+	{7, 0x7E, 6},
+	{8, 0xFE, 7}
 };
 
 static const struct {
 	int c_bits;
 	uint32_t c_value;
-	int sign_bits;
-	int value_bits;
+	int dc_bits;
 } dc_y_huffman_tree[] = {
-	{3, 0x4,  0, 0},
-	{2, 0x0,  1, 0},
-	{2, 0x1,  1, 1},
-	{3, 0x5,  1, 2},
-	{3, 0x6,  1, 3},
-	{4, 0xE,  1, 4},
-	{5, 0x1E, 1, 5},
-	{6, 0x3E, 1, 6},
-	{7, 0x7E, 1, 7},
-};
-
-static const uint8_t dc_coeff_indices[6] = {
-	0, 1, 2, 2, 2, 2
+	{2, 0x0,  0},
+	{2, 0x1,  1},
+	{3, 0x5,  2},
+	{3, 0x6,  3},
+	{4, 0xE,  4},
+	{5, 0x1E, 5},
+	{6, 0x3E, 6},
+	{7, 0x7E, 7}
 };
 
 static const uint8_t quant_dec[8*8] = {
@@ -260,82 +245,75 @@ static const int16_t dct_scale_table[8*8] = {
 };
 #endif
 
+enum {
+	INDEX_CR,
+	INDEX_CB,
+	INDEX_Y
+};
+
+#define HUFFMAN_CODE(bits, value) (((bits) << 24) | (value))
+
 static void init_dct_data(mdec_encoder_state_t *state, bs_codec_t codec) {
 	for(int i = 0; i <= 0xFFFF; i++) {
-		// high 8 bits = bit count
-		// low 24 bits = value
-		state->ac_huffman_map[i] = ((6+16)<<24)|((0x01<<16)|(i));
+		state->ac_huffman_map[i] = HUFFMAN_CODE(6 + 16, (0x1 << 16) | i);
 
 		int16_t coeff = (int16_t)i;
+
 		if (coeff < -0x200)
 			coeff = -0x200;
 		else if (coeff > +0x1FE)
 			coeff = +0x1FE; // 0x1FF = v2 end of frame
 
 		state->coeff_clamp_map[i] = coeff;
-
-		int16_t delta = (int16_t)DIVIDE_ROUNDED(i, 4);
-		if (delta < -0xFF)
-			delta = -0xFF;
-		else if (delta > +0xFF)
-			delta = +0xFF;
-
-		// Some versions of Sony's BS v3 decoder compute each DC coefficient as
-		// ((last + delta * 4) & 0x3FF) instead of just (last + delta * 4). The
-		// encoder can leverage this behavior to represent large coefficient
-		// differences as smaller deltas that cause the decoder to overflow and
-		// wrap around (e.g. -1 to encode -512 -> 511 as opposed to +1023). This
-		// saves some space as larger DC values take up more bits.
-		if (codec == BS_CODEC_V3DC) {
-			if (delta > +0x80)
-				delta -= 0x100;
-		}
-
-		state->delta_clamp_map[i] = delta;
 	}
 
+	state->dc_huffman_map[(INDEX_CR << 9) | 0] = HUFFMAN_CODE(2, 0x0);
+	state->dc_huffman_map[(INDEX_CB << 9) | 0] = HUFFMAN_CODE(2, 0x0);
+	state->dc_huffman_map[(INDEX_Y  << 9) | 0] = HUFFMAN_CODE(3, 0x4);
+
 	int ac_tree_item_count = sizeof(ac_huffman_tree) / sizeof(ac_huffman_tree[0]);
 	int dc_c_tree_item_count = sizeof(dc_c_huffman_tree) / sizeof(dc_c_huffman_tree[0]);
 	int dc_y_tree_item_count = sizeof(dc_y_huffman_tree) / sizeof(dc_y_huffman_tree[0]);
 
 	for (int i = 0; i < ac_tree_item_count; i++) {
-		int bits = ac_huffman_tree[i].c_bits+1;
+		int bits = ac_huffman_tree[i].c_bits + 1;
 		uint32_t base_value = ac_huffman_tree[i].c_value;
 
-		state->ac_huffman_map[ac_huffman_tree[i].u_hword_pos] = (bits << 24) | (base_value << 1) | 0;
-		state->ac_huffman_map[ac_huffman_tree[i].u_hword_neg] = (bits << 24) | (base_value << 1) | 1;
+		state->ac_huffman_map[ac_huffman_tree[i].u_hword_pos] = HUFFMAN_CODE(bits, (base_value << 1) | 0);
+		state->ac_huffman_map[ac_huffman_tree[i].u_hword_neg] = HUFFMAN_CODE(bits, (base_value << 1) | 1);
 	}
 	for (int i = 0; i < dc_c_tree_item_count; i++) {
-		int dc_bits = dc_c_huffman_tree[i].sign_bits + dc_c_huffman_tree[i].value_bits;
-		int bits = dc_c_huffman_tree[i].c_bits + dc_bits;
-		uint32_t base_value = dc_c_huffman_tree[i].c_value << dc_bits;
+		int dc_bits = dc_c_huffman_tree[i].dc_bits;
+		int bits = dc_c_huffman_tree[i].c_bits + 1 + dc_bits;
+		uint32_t base_value = dc_c_huffman_tree[i].c_value;
+
+		int pos_offset = 1 << dc_bits;
+		int neg_offset = 1 - (1 << (dc_bits + 1));
 
 		for (int j = 0; j < (1 << dc_bits); j++) {
-			int delta = j;
+			int pos = (j + pos_offset) & 0x1FF;
+			int neg = (j + neg_offset) & 0x1FF;
 
-			if ((j >> dc_c_huffman_tree[i].value_bits) == 0) {
-				delta -= (1 << dc_bits) - 1;
-				delta &= 0x1FF;
-			}
-
-			state->dc_huffman_map[(0 << 9) | delta] = (bits << 24) | base_value | j;
-			state->dc_huffman_map[(1 << 9) | delta] = (bits << 24) | base_value | j;
+			state->dc_huffman_map[(INDEX_CR << 9) | pos] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (1 << dc_bits) | j);
+			state->dc_huffman_map[(INDEX_CR << 9) | neg] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (0 << dc_bits) | j);
+			state->dc_huffman_map[(INDEX_CB << 9) | pos] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (1 << dc_bits) | j);
+			state->dc_huffman_map[(INDEX_CB << 9) | neg] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (0 << dc_bits) | j);
 		}
 	}
 	for (int i = 0; i < dc_y_tree_item_count; i++) {
-		int dc_bits = dc_y_huffman_tree[i].sign_bits + dc_y_huffman_tree[i].value_bits;
-		int bits = dc_y_huffman_tree[i].c_bits + dc_bits;
-		uint32_t base_value = dc_y_huffman_tree[i].c_value << dc_bits;
+		int dc_bits = dc_y_huffman_tree[i].dc_bits;
+		int bits = dc_y_huffman_tree[i].c_bits + 1 + dc_bits;
+		uint32_t base_value = dc_y_huffman_tree[i].c_value;
+
+		int pos_offset = 1 << dc_bits;
+		int neg_offset = 1 - (1 << (dc_bits + 1));
 
 		for (int j = 0; j < (1 << dc_bits); j++) {
-			int delta = j;
+			int pos = (j + pos_offset) & 0x1FF;
+			int neg = (j + neg_offset) & 0x1FF;
 
-			if ((j >> dc_y_huffman_tree[i].value_bits) == 0) {
-				delta -= (1 << dc_bits) - 1;
-				delta &= 0x1FF;
-			}
-
-			state->dc_huffman_map[(2 << 9) | delta] = (bits << 24) | base_value | j;
+			state->dc_huffman_map[(INDEX_Y << 9) | pos] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (1 << dc_bits) | j);
+			state->dc_huffman_map[(INDEX_Y << 9) | neg] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (0 << dc_bits) | j);
 		}
 	}
 }
@@ -453,6 +431,13 @@ static int reduce_dct_block(mdec_encoder_state_t *state, int32_t *block, int32_t
 }
 #endif
 
+// https://stackoverflow.com/a/60011209
+#if 0
+#define DIVIDE_ROUNDED(n, d) (((n) >= 0) ? (((n) + (d)/2) / (d)) : (((n) - (d)/2) / (d)))
+#else
+#define DIVIDE_ROUNDED(n, d) ((int)round((double)(n) / (double)(d)))
+#endif
+
 static bool encode_dct_block(
 	mdec_encoder_state_t *state,
 	bs_codec_t codec,
@@ -467,11 +452,26 @@ static bool encode_dct_block(
 		if (!encode_bits(state, 10, dc & 0x3FF))
 			return false;
 	} else {
-		int index = dc_coeff_indices[state->block_type];
-		int last = state->last_dc_values[index];
+		int index = state->block_type;
 
-		int delta = state->delta_clamp_map[(dc - last) & 0xFFFF];
-		state->last_dc_values[index] = (last + delta * 4) & 0x3FF;
+		if (index > INDEX_Y)
+			index = INDEX_Y;
+
+		int delta = DIVIDE_ROUNDED(dc - state->last_dc_values[index], 4);
+		state->last_dc_values[index] += delta * 4;
+
+		// Some versions of Sony's BS v3 decoder compute each DC coefficient as
+		// ((last + delta * 4) & 0x3FF) instead of just (last + delta * 4). The
+		// encoder can leverage this behavior to represent large coefficient
+		// differences as smaller deltas that cause the decoder to overflow and
+		// wrap around (e.g. -1 to encode -512 -> 511 as opposed to +1023). This
+		// saves some space as larger DC values take up more bits.
+		if (codec == BS_CODEC_V3DC) {
+			if (delta < -0x80)
+				delta += 0x100;
+			else if (delta > +0x80)
+				delta -= 0x100;
+		}
 
 		uint32_t outword = state->dc_huffman_map[(index << 9) | (delta & 0x1FF)];
 
@@ -488,7 +488,7 @@ static bool encode_dct_block(
 		if (ac == 0) {
 			zeroes++;
 		} else {
-			uint32_t outword = state->ac_huffman_map[(zeroes << 10) | ac];
+			uint32_t outword = state->ac_huffman_map[(zeroes << 10) | (ac & 0x3FF)];
 
 			if (!encode_bits(state, outword >> 24, outword & 0xFFFFFF))
 				return false;
@@ -516,21 +516,21 @@ bool init_mdec_encoder(mdec_encoder_t *encoder, bs_codec_t video_codec, int vide
 
 	mdec_encoder_state_t *state = &(encoder->state);
 
+#if 0
 	if (state->dct_context != NULL)
 		return true;
+#endif
 
 	state->dct_context = avcodec_dct_alloc();
 	state->ac_huffman_map = malloc(0x10000 * sizeof(uint32_t));
-	state->dc_huffman_map = malloc(0x600 * sizeof(uint32_t));
+	state->dc_huffman_map = malloc(0x200 * 3 * sizeof(uint32_t));
 	state->coeff_clamp_map = malloc(0x10000 * sizeof(int16_t));
-	state->delta_clamp_map = malloc(0x10000 * sizeof(int16_t));
 
 	if (
 		state->dct_context == NULL ||
 		state->ac_huffman_map == NULL ||
 		state->dc_huffman_map == NULL ||
-		state->coeff_clamp_map == NULL ||
-		state->delta_clamp_map == NULL
+		state->coeff_clamp_map == NULL
 	)
 		return false;
 
@@ -569,12 +569,8 @@ void destroy_mdec_encoder(mdec_encoder_t *encoder) {
 		free(state->coeff_clamp_map);
 		state->coeff_clamp_map = NULL;
 	}
-	if (state->delta_clamp_map) {
-		free(state->delta_clamp_map);
-		state->delta_clamp_map = NULL;
-	}
 	for (int i = 0; i < 6; i++) {
-		if (state->dct_block_lists[i]) {
+		if (state->dct_block_lists[i] != NULL) {
 			free(state->dct_block_lists[i]);
 			state->dct_block_lists[i] = NULL;
 		}
@@ -653,7 +649,6 @@ void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame) {
 	} else {
 		end_of_block = 0x3FF;
 		assert(state->dc_huffman_map);
-		assert(state->delta_clamp_map);
 	}
 
 	assert(state->ac_huffman_map);
@@ -681,9 +676,9 @@ void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame) {
 		memset(state->frame_output, 0, state->frame_max_size);
 
 		state->block_type = 0;
-		state->last_dc_values[0] = 0;
-		state->last_dc_values[1] = 0;
-		state->last_dc_values[2] = 0;
+		state->last_dc_values[INDEX_CR] = 0;
+		state->last_dc_values[INDEX_CB] = 0;
+		state->last_dc_values[INDEX_Y] = 0;
 
 		state->bits_value = 0;
 		state->bits_left = 16;
@@ -759,7 +754,13 @@ void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame) {
 	state->frame_output[0x007] = 0x00;
 }
 
-int encode_sector_str(mdec_encoder_t *encoder, format_t format, uint8_t *video_frames, uint8_t *output) {
+int encode_sector_str(
+	mdec_encoder_t *encoder,
+	format_t format,
+	uint16_t str_video_id,
+	uint8_t *video_frames,
+	uint8_t *output
+) {
 	mdec_encoder_state_t *state = &(encoder->state);
 	int last_frame_index = state->frame_index;
 	int frame_size = encoder->video_width * encoder->video_height * 2;
@@ -784,9 +785,9 @@ int encode_sector_str(mdec_encoder_t *encoder, format_t format, uint8_t *video_f
 	header[0x000] = 0x60;
 	header[0x001] = 0x01;
 
-	// Chunk type: MDEC data
-	header[0x002] = 0x01;
-	header[0x003] = 0x80;
+	// Chunk type
+	header[0x002] = (uint8_t)str_video_id;
+	header[0x003] = (uint8_t)(str_video_id >> 8);
 
 	// Muxed chunk index/count
 	int chunk_index = state->frame_data_offset / 2016;
diff --git a/psxavenc/mdec.h b/psxavenc/mdec.h
index 3d1e4dc..4b8e026 100644
--- a/psxavenc/mdec.h
+++ b/psxavenc/mdec.h
@@ -51,7 +51,6 @@ typedef struct {
 	uint32_t *ac_huffman_map;
 	uint32_t *dc_huffman_map;
 	int16_t *coeff_clamp_map;
-	int16_t *delta_clamp_map;
 	int16_t *dct_block_lists[6];
 } mdec_encoder_state_t;
 
@@ -66,4 +65,10 @@ typedef struct {
 bool init_mdec_encoder(mdec_encoder_t *encoder, bs_codec_t video_codec, int video_width, int video_height);
 void destroy_mdec_encoder(mdec_encoder_t *encoder);
 void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame);
-int encode_sector_str(mdec_encoder_t *encoder, format_t format, uint8_t *video_frames, uint8_t *output);
+int encode_sector_str(
+	mdec_encoder_t *encoder,
+	format_t format,
+	uint16_t str_video_id,
+	uint8_t *video_frames,
+	uint8_t *output
+);

From 60cbaca2b294d2044478927729a01dfd8963efd2 Mon Sep 17 00:00:00 2001
From: spicyjpeg <thatspicyjpeg@gmail.com>
Date: Wed, 5 Mar 2025 01:32:35 +0100
Subject: [PATCH 7/8] Fix str subheader corruption, update README

---
 README.md           |  93 +++++++++++++++++++++++---------
 libpsxav/adpcm.c    |  68 ++++++++++++++++--------
 libpsxav/cdrom.c    |  21 ++++----
 libpsxav/libpsxav.h |  26 +++++++--
 psxavenc/filefmt.c  | 127 +++++++++++++++++++++-----------------------
 psxavenc/mdec.c     |  10 ++--
 6 files changed, 215 insertions(+), 130 deletions(-)

diff --git a/README.md b/README.md
index 411c595..12f3218 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 # psxavenc
 
 psxavenc is an open-source command-line tool for encoding audio and video data
-into formats commonly used on the original PlayStation.
+into formats commonly used on the original PlayStation and PlayStation 2.
 
 ## Installation
 
@@ -14,22 +14,22 @@ Requirements:
 
 ```shell
 $ meson setup build
-$ cd build
-$ ninja install
+$ meson compile -C build
+$ meson install -C build
 ```
 
 ## Usage
 
-Run `psxavenc`.
+Run `psxavenc -h`.
 
 ### Examples
 
 Rescale a video file to ≤320x240 pixels (preserving aspect ratio) and encode it
-into a 15fps .STR file with 37800 Hz 4-bit stereo audio and 2352-byte sectors,
-meant to be played at 2x CD-ROM speed:
+into a 15 fps version 2 .str file with 37800 Hz 4-bit stereo audio and 2352-byte
+sectors, meant to be played at 2x CD-ROM speed:
 
 ```shell
-$ psxavenc -t str2cd -f 37800 -b 4 -c 2 -s 320x240 -r 15 -x 2 in.mp4 out.str
+$ psxavenc -t strcd -v v2 -f 37800 -b 4 -c 2 -s 320x240 -r 15 -x 2 in.mp4 out.str
 ```
 
 Convert a mono audio sample to 22050 Hz raw SPU-ADPCM data:
@@ -38,36 +38,77 @@ Convert a mono audio sample to 22050 Hz raw SPU-ADPCM data:
 $ psxavenc -t spu -f 22050 in.ogg out.snd
 ```
 
-Convert a stereo audio file to a 44100 Hz interleaved .VAG file with 8192-byte
+Convert a stereo audio file to a 44100 Hz interleaved .vag file with 2048-byte
 interleave and loop flags set at the end of each interleaved chunk:
 
 ```shell
-$ psxavenc -t vagi -f 44100 -c 2 -L -i 8192 in.wav out.vag
+$ psxavenc -t vagi -f 44100 -c 2 -L -i 2048 in.wav out.vag
 ```
 
-## Supported formats
+## Supported output formats
 
-| Format   | Audio            | Channels | Video | Sector size |
-| :------- | :--------------- | :------- | :---- | :---------- |
-| `xa`     | XA-ADPCM         | 1 or 2   | None  | 2336 bytes  |
-| `xacd`   | XA-ADPCM         | 1 or 2   | None  | 2352 bytes  |
-| `spu`    | SPU-ADPCM        | 1        | None  |             |
-| `spui`   | SPU-ADPCM        | Any      | None  | Any         |
-| `vag`    | SPU-ADPCM        | 1        | None  |             |
-| `vagi`   | SPU-ADPCM        | Any      | None  | Any         |
-| `str2`   | None or XA-ADPCM | 1 or 2   | BS v2 | 2336 bytes  |
-| `str2cd` | None or XA-ADPCM | 1 or 2   | BS v2 | 2352 bytes  |
-| `str2v`  | None             |          | BS v2 |             |
-| `sbs2`   | None             |          | BS v2 | Any         |
+The output format must be set using the `-t` option.
+
+| Format   | Audio codec          | Audio channels | Video codec   | Sector size |
+| :------- | :------------------- | :------------- | :------------ | :---------- |
+| `xa`     | XA-ADPCM             | 1 or 2         |               | 2336 bytes  |
+| `xacd`   | XA-ADPCM             | 1 or 2         |               | 2352 bytes  |
+| `spu`    | SPU-ADPCM            | 1              |               |             |
+| `vag`    | SPU-ADPCM            | 1              |               |             |
+| `spui`   | SPU-ADPCM            | Any            |               |             |
+| `vagi`   | SPU-ADPCM            | Any            |               |             |
+| `str`    | XA-ADPCM (optional)  | 1 or 2         | BS v2/v3/v3dc | 2336 bytes  |
+| `strcd`  | XA-ADPCM (optional)  | 1 or 2         | BS v2/v3/v3dc | 2352 bytes  |
+| `strspu` | SPU-ADPCM (optional) | Any            | BS v2/v3/v3dc | 2048 bytes  |
+| `strv`   |                      |                | BS v2/v3/v3dc | 2048 bytes  |
+| `sbs`    |                      |                | BS v2/v3/v3dc |             |
 
 Notes:
 
-- `vag` and `vagi` are similar to `spu` and `spui` respectively, but add a .VAG
+- The `xa`, `xacd`, `str` and `strcd` formats will output files with 2336- or
+  2352-byte CD-ROM sectors, containing the appropriate CD-XA subheaders and
+  dummy EDC/ECC placeholders in addition to the actual sector data. Such files
+  **cannot be added to a disc image as-is** and must instead be parsed by an
+  authoring tool capable of rebuilding the EDC/ECC data (as it is dependent on
+  the file's absolute location on the disc) and generating a Mode 2 CD-ROM image
+  with "native" 2352-byte sectors.
+- Similarly, files generated with `-t xa` or `-t xacd` **must be interleaved**
+  **with other XA-ADPCM tracks or empty padding using an external tool** before
+  they can be played.
+- `vag` and `vagi` are similar to `spu` and `spui` respectively, but add a .vag
   header at the beginning of the file. The header is always 48 bytes long for
   `vag` files, while in the case of `vagi` files it is padded to the size
   specified using the `-a` option (2048 bytes by default). Note that `vagi`
   files with more than 2 channels and/or alignment other than 2048 bytes are not
   standardized.
-- The `sbs2` format (used in some System 573 games) is simply a series of
-  concatenated BS v2 frames, each padded to the size specified by the `-a`
-  option, with no additional headers besides the BS frame headers.
+- The `strspu` format encodes the input file's audio track as a series of custom
+  .str chunks (type ID `0x0001` by default) holding interleaved SPU-ADPCM data
+  in the same format as `spui`, rather than XA-ADPCM. As .str chunks do not
+  require custom XA subheaders, a file with standard 2048-byte sectors that does
+  not need any special handling will be generated.
+- The `strv` format disables audio altogether and is equivalent to `strspu` on
+  an input file with no audio track.
+- The `sbs` format (used in some System 573 games) consists of a series of
+  concatenated BS frames, each padded to the size specified by the `-a` option
+  (the default setting is 8192 bytes), with no additional headers besides the BS
+  frame headers.
+
+## Supported video codecs
+
+All formats with a video track (`str`, `strcd`, `strspu`, `strv` and `sbs`) can
+use any of the codecs listed below. The codec can be set using the `-v` option.
+
+| Codec          | Supported by          | Typ. decoder CPU usage |
+| :------------- | :-------------------- | :--------------------- |
+| `v2` (default) | All players/decoders  | Medium                 |
+| `v3`           | Most players/decoders | High                   |
+| `v3dc`         | Few players/decoders  | High                   |
+
+Notes:
+
+- The `v3dc` format is a variant of `v3` with a slightly better compression
+  ratio, however most tools and playback libraries (including FFmpeg, jPSXdec
+  and earlier versions of Sony's own BS decoder) are unable to decode it
+  correctly; its use is thus highly discouraged. Refer to
+  [the psx-spx section on DC coefficient encoding](https://psx-spx.consoledev.net/cdromfileformats/#dc-v3)
+  for more details.
diff --git a/libpsxav/adpcm.c b/libpsxav/adpcm.c
index ecc7264..2d5dbc5 100644
--- a/libpsxav/adpcm.c
+++ b/libpsxav/adpcm.c
@@ -228,14 +228,9 @@ static inline void psx_audio_xa_sync_subheader_copy(psx_cdrom_sector_mode2_t *bu
 	memcpy(buffer->subheader + 1, buffer->subheader, sizeof(psx_cdrom_sector_xa_subheader_t));
 }
 
-static void psx_audio_xa_encode_init_sector(psx_cdrom_sector_mode2_t *buffer, psx_audio_xa_settings_t settings) {
-	if (settings.format == PSX_AUDIO_XA_FORMAT_XACD) {
-		memset(buffer, 0, PSX_CDROM_SECTOR_SIZE);
-		memset(buffer->sync + 1, 0xFF, 10);
-		buffer->header.mode = 0x02;
-	} else {
-		memset(buffer->subheader, 0, PSX_CDROM_SECTOR_SIZE - 16);
-	}
+static void psx_audio_xa_encode_init_sector(psx_cdrom_sector_mode2_t *buffer, int lba, psx_audio_xa_settings_t settings) {
+	if (settings.format == PSX_AUDIO_XA_FORMAT_XACD)
+		psx_cdrom_init_sector((psx_cdrom_sector_t *)buffer, lba, PSX_CDROM_SECTOR_TYPE_MODE2_FORM2);
 
 	buffer->subheader[0].file = settings.file_number;
 	buffer->subheader[0].channel = settings.channel_number & PSX_CDROM_SECTOR_XA_CHANNEL_MASK;
@@ -243,28 +238,46 @@ static void psx_audio_xa_encode_init_sector(psx_cdrom_sector_mode2_t *buffer, ps
 		PSX_CDROM_SECTOR_XA_SUBMODE_AUDIO
 		| PSX_CDROM_SECTOR_XA_SUBMODE_FORM2
 		| PSX_CDROM_SECTOR_XA_SUBMODE_RT;
-	buffer->subheader[0].coding =
-		(settings.stereo ? PSX_CDROM_SECTOR_XA_CODING_STEREO : PSX_CDROM_SECTOR_XA_CODING_MONO)
-		| (settings.frequency >= PSX_AUDIO_XA_FREQ_DOUBLE ? PSX_CDROM_SECTOR_XA_CODING_FREQ_DOUBLE : PSX_CDROM_SECTOR_XA_CODING_FREQ_SINGLE)
-		| (settings.bits_per_sample >= 8 ? PSX_CDROM_SECTOR_XA_CODING_BITS_8 : PSX_CDROM_SECTOR_XA_CODING_BITS_4);
+
+	if (settings.stereo)
+		buffer->subheader[0].coding |= PSX_CDROM_SECTOR_XA_CODING_STEREO;
+	else
+		buffer->subheader[0].coding |= PSX_CDROM_SECTOR_XA_CODING_MONO;
+	if (settings.frequency == PSX_AUDIO_XA_FREQ_DOUBLE)
+		buffer->subheader[0].coding |= PSX_CDROM_SECTOR_XA_CODING_FREQ_DOUBLE;
+	else
+		buffer->subheader[0].coding |= PSX_CDROM_SECTOR_XA_CODING_FREQ_SINGLE;
+	if (settings.bits_per_sample == 8)
+		buffer->subheader[0].coding |= PSX_CDROM_SECTOR_XA_CODING_BITS_8;
+	else
+		buffer->subheader[0].coding |= PSX_CDROM_SECTOR_XA_CODING_BITS_4;
+
 	psx_audio_xa_sync_subheader_copy(buffer);
 }
 
-int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_state_t *state, int16_t* samples, int sample_count, uint8_t *output) {
+int psx_audio_xa_encode(
+	psx_audio_xa_settings_t settings,
+	psx_audio_encoder_state_t *state,
+	int16_t* samples,
+	int sample_count,
+	int lba,
+	uint8_t *output
+) {
 	int sample_jump = (settings.bits_per_sample == 8) ? 112 : 224;
 	int i, j;
-	int xa_sector_size = settings.format == PSX_AUDIO_XA_FORMAT_XA ? 2336 : 2352;
-	int xa_offset = 2352 - xa_sector_size;
+	int xa_sector_size = psx_audio_xa_get_buffer_size_per_sector(settings);
+	int xa_offset = PSX_CDROM_SECTOR_SIZE - xa_sector_size;
 	uint8_t init_sector = 1;
 
-	if (settings.stereo) { sample_count <<= 1; }
+	if (settings.stereo)
+		sample_count *= 2;
 
 	for (i = 0, j = 0; i < sample_count || ((j % 18) != 0); i += sample_jump, j++) {
 		psx_cdrom_sector_mode2_t *sector_data = (psx_cdrom_sector_mode2_t*) (output + ((j/18) * xa_sector_size) - xa_offset);
 		uint8_t *block_data = sector_data->data + ((j%18) * 0x80);
 
 		if (init_sector) {
-			psx_audio_xa_encode_init_sector(sector_data, settings);
+			psx_audio_xa_encode_init_sector(sector_data, lba, settings);
 			init_sector = 0;
 		}
 
@@ -276,6 +289,7 @@ int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_stat
 		if ((j+1)%18 == 0) {
 			psx_cdrom_calculate_checksums((psx_cdrom_sector_t *)sector_data, PSX_CDROM_SECTOR_TYPE_MODE2_FORM2);
 			init_sector = 1;
+			lba++;
 		}
 	}
 
@@ -284,21 +298,33 @@ int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_stat
 
 void psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length) {
 	if (output_length >= 2336) {
-		psx_cdrom_sector_mode2_t *sector = (psx_cdrom_sector_mode2_t*) &output[output_length - 2352];
+		psx_cdrom_sector_mode2_t *sector = (psx_cdrom_sector_mode2_t*) &output[output_length - PSX_CDROM_SECTOR_SIZE];
 		sector->subheader[0].submode |= PSX_CDROM_SECTOR_XA_SUBMODE_EOF;
 		psx_audio_xa_sync_subheader_copy(sector);
 	}
 }
 
-int psx_audio_xa_encode_simple(psx_audio_xa_settings_t settings, int16_t* samples, int sample_count, uint8_t *output) {
+int psx_audio_xa_encode_simple(
+	psx_audio_xa_settings_t settings,
+	int16_t* samples,
+	int sample_count,
+	int lba,
+	uint8_t *output
+) {
 	psx_audio_encoder_state_t state;
 	memset(&state, 0, sizeof(psx_audio_encoder_state_t));
-	int length = psx_audio_xa_encode(settings, &state, samples, sample_count, output);
+	int length = psx_audio_xa_encode(settings, &state, samples, sample_count, lba, output);
 	psx_audio_xa_encode_finalize(settings, output, length);
 	return length;
 }
 
-int psx_audio_spu_encode(psx_audio_encoder_channel_state_t *state, int16_t* samples, int sample_count, int pitch, uint8_t *output) {
+int psx_audio_spu_encode(
+	psx_audio_encoder_channel_state_t *state,
+	int16_t* samples,
+	int sample_count,
+	int pitch,
+	uint8_t *output
+) {
 	uint8_t prebuf[PSX_AUDIO_SPU_SAMPLES_PER_BLOCK];
 	uint8_t *buffer = output;
 
diff --git a/libpsxav/cdrom.c b/libpsxav/cdrom.c
index ac9de32..509e525 100644
--- a/libpsxav/cdrom.c
+++ b/libpsxav/cdrom.c
@@ -42,11 +42,21 @@ static uint32_t edc_crc32(uint8_t *data, int length) {
 
 #define TO_BCD(x) ((x) + ((x) / 10) * 6)
 
+void psx_cdrom_init_xa_subheader(psx_cdrom_sector_xa_subheader_t *subheader, psx_cdrom_sector_type_t type) {
+	memset(subheader, 0, sizeof(psx_cdrom_sector_xa_subheader_t) * 2);
+	subheader->submode = PSX_CDROM_SECTOR_XA_SUBMODE_DATA;
+
+	if (type == PSX_CDROM_SECTOR_TYPE_MODE2_FORM2)
+		subheader->submode |= PSX_CDROM_SECTOR_XA_SUBMODE_FORM2;
+
+	memcpy(subheader + 1, subheader, sizeof(psx_cdrom_sector_xa_subheader_t));
+}
+
 void psx_cdrom_init_sector(psx_cdrom_sector_t *sector, int lba, psx_cdrom_sector_type_t type) {
 	// Sync sequence
 	memset(sector->mode1.sync + 1, 0xff, 10);
 	sector->mode1.sync[0x0] = 0x00;
-	sector->mode1.sync[0xb] = 0x00;
+	sector->mode1.sync[0xB] = 0x00;
 
 	// Timecode
 	lba += 150;
@@ -59,14 +69,7 @@ void psx_cdrom_init_sector(psx_cdrom_sector_t *sector, int lba, psx_cdrom_sector
 		sector->mode1.header.mode = 0x01;
 	} else {
 		sector->mode2.header.mode = 0x02;
-
-		memset(sector->mode2.subheader, 0, sizeof(psx_cdrom_sector_xa_subheader_t));
-		sector->mode2.subheader[0].submode = PSX_CDROM_SECTOR_XA_SUBMODE_DATA;
-
-		if (type == PSX_CDROM_SECTOR_TYPE_MODE2_FORM2)
-			sector->mode2.subheader[0].submode |= PSX_CDROM_SECTOR_XA_SUBMODE_FORM2;
-
-		memcpy(sector->mode2.subheader + 1, sector->mode2.subheader, sizeof(psx_cdrom_sector_xa_subheader_t));
+		psx_cdrom_init_xa_subheader(sector->mode2.subheader, type);
 	}
 }
 
diff --git a/libpsxav/libpsxav.h b/libpsxav/libpsxav.h
index 1b8aaa1..0d9d171 100644
--- a/libpsxav/libpsxav.h
+++ b/libpsxav/libpsxav.h
@@ -72,9 +72,28 @@ uint32_t psx_audio_spu_get_buffer_size(int sample_count);
 uint32_t psx_audio_xa_get_buffer_size_per_sector(psx_audio_xa_settings_t settings);
 uint32_t psx_audio_xa_get_samples_per_sector(psx_audio_xa_settings_t settings);
 uint32_t psx_audio_xa_get_sector_interleave(psx_audio_xa_settings_t settings);
-int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_state_t *state, int16_t* samples, int sample_count, uint8_t *output);
-int psx_audio_xa_encode_simple(psx_audio_xa_settings_t settings, int16_t* samples, int sample_count, uint8_t *output);
-int psx_audio_spu_encode(psx_audio_encoder_channel_state_t *state, int16_t* samples, int sample_count, int pitch, uint8_t *output);
+int psx_audio_xa_encode(
+	psx_audio_xa_settings_t settings,
+	psx_audio_encoder_state_t *state,
+	int16_t* samples,
+	int sample_count,
+	int lba,
+	uint8_t *output
+);
+int psx_audio_xa_encode_simple(
+	psx_audio_xa_settings_t settings,
+	int16_t* samples,
+	int sample_count,
+	int lba,
+	uint8_t *output
+);
+int psx_audio_spu_encode(
+	psx_audio_encoder_channel_state_t *state,
+	int16_t* samples,
+	int sample_count,
+	int pitch,
+	uint8_t *output
+);
 int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *output, int loop_start);
 void psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length);
 
@@ -149,5 +168,6 @@ typedef enum {
 	PSX_CDROM_SECTOR_TYPE_MODE2_FORM2
 } psx_cdrom_sector_type_t;
 
+void psx_cdrom_init_xa_subheader(psx_cdrom_sector_xa_subheader_t *subheader, psx_cdrom_sector_type_t type);
 void psx_cdrom_init_sector(psx_cdrom_sector_t *sector, int lba, psx_cdrom_sector_type_t type);
 void psx_cdrom_calculate_checksums(psx_cdrom_sector_t *sector, psx_cdrom_sector_type_t type);
diff --git a/psxavenc/filefmt.c b/psxavenc/filefmt.c
index 3f6ce45..1150364 100644
--- a/psxavenc/filefmt.c
+++ b/psxavenc/filefmt.c
@@ -68,15 +68,24 @@ static psx_audio_xa_settings_t args_to_libpsxav_xa_audio(const args_t *args) {
 	return settings;
 };
 
-static void init_sector_buffer_video(const args_t *args, psx_cdrom_sector_mode2_t *sector, int lba) {
-	psx_cdrom_init_sector((psx_cdrom_sector_t *)sector, lba, PSX_CDROM_SECTOR_TYPE_MODE2_FORM1);
+static void init_sector_buffer_video(const args_t *args, uint8_t *sector, int lba) {
+	psx_cdrom_sector_xa_subheader_t *subheader = NULL;
 
-	sector->subheader[0].file = args->audio_xa_file;
-	sector->subheader[0].channel = args->audio_xa_channel & PSX_CDROM_SECTOR_XA_CHANNEL_MASK;
-	sector->subheader[0].submode = PSX_CDROM_SECTOR_XA_SUBMODE_DATA | PSX_CDROM_SECTOR_XA_SUBMODE_RT;
-	sector->subheader[0].coding = 0;
+	if (args->format == FORMAT_STRCD) {
+		psx_cdrom_init_sector((psx_cdrom_sector_t *)sector, lba, PSX_CDROM_SECTOR_TYPE_MODE2_FORM1);
+		subheader = ((psx_cdrom_sector_t *)sector)->mode2.subheader;
+	} else if (args->format == FORMAT_STR) {
+		subheader = (psx_cdrom_sector_xa_subheader_t *)sector;
+	}
 
-	memcpy(sector->subheader + 1, sector->subheader, sizeof(psx_cdrom_sector_xa_subheader_t));
+	if (subheader != NULL) {
+		subheader->file = args->audio_xa_file;
+		subheader->channel = args->audio_xa_channel & PSX_CDROM_SECTOR_XA_CHANNEL_MASK;
+		subheader->submode = PSX_CDROM_SECTOR_XA_SUBMODE_DATA | PSX_CDROM_SECTOR_XA_SUBMODE_RT;
+		subheader->coding = 0;
+
+		memcpy(subheader + 1, subheader, sizeof(psx_cdrom_sector_xa_subheader_t));
+	}
 }
 
 #define VAG_HEADER_SIZE 0x30
@@ -147,35 +156,29 @@ void encode_file_xa(const args_t *args, decoder_t *decoder, FILE *output) {
 	psx_audio_encoder_state_t audio_state;
 	memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t));
 
-	for (int j = 0; ensure_av_data(decoder, audio_samples_per_sector * args->audio_channels, 0); j++) {
+	int sector_count = 0;
+
+	for (; ensure_av_data(decoder, audio_samples_per_sector * args->audio_channels, 0); sector_count++) {
 		int samples_length = decoder->audio_sample_count / args->audio_channels;
 
 		if (samples_length > audio_samples_per_sector)
 			samples_length = audio_samples_per_sector;
 
-		uint8_t buffer[PSX_CDROM_SECTOR_SIZE];
+		uint8_t sector[PSX_CDROM_SECTOR_SIZE];
 		int length = psx_audio_xa_encode(
 			xa_settings,
 			&audio_state,
 			decoder->audio_samples,
 			samples_length,
-			buffer
+			sector_count,
+			sector
 		);
 
 		if (decoder->end_of_input)
-			psx_audio_xa_encode_finalize(xa_settings, buffer, length);
-
-		if (args->format == FORMAT_XACD) {
-			int t = j + 75*2;
-
-			// Put the time in
-			buffer[0x00C] = ((t/75/60)%10)|(((t/75/60)/10)<<4);
-			buffer[0x00D] = (((t/75)%60)%10)|((((t/75)%60)/10)<<4);
-			buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4);
-		}
+			psx_audio_xa_encode_finalize(xa_settings, sector, length);
 
 		retire_av_data(decoder, samples_length * args->audio_channels, 0);
-		fwrite(buffer, length, 1, output);
+		fwrite(sector, length, 1, output);
 
 		time_t t = get_elapsed_time();
 
@@ -183,8 +186,8 @@ void encode_file_xa(const args_t *args, decoder_t *decoder, FILE *output) {
 			fprintf(
 				stderr,
 				"\rLBA: %6d | Encoding speed: %5.2fx",
-				j,
-				(double)(j * audio_samples_per_sector) / (double)(args->audio_frequency * t)
+				sector_count,
+				(double)(sector_count * audio_samples_per_sector) / (double)(args->audio_frequency * t)
 			);
 		}
 	}
@@ -199,14 +202,14 @@ void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output) {
 	if (args->format == FORMAT_VAG)
 		fseek(output, VAG_HEADER_SIZE, SEEK_SET);
 
-	uint8_t buffer[PSX_AUDIO_SPU_BLOCK_SIZE];
+	uint8_t block[PSX_AUDIO_SPU_BLOCK_SIZE];
 	int block_count = 0;
 
 	if (!(args->flags & FLAG_SPU_NO_LEADING_DUMMY)) {
 		// Insert leading silent block
-		memset(buffer, 0, PSX_AUDIO_SPU_BLOCK_SIZE);
+		memset(block, 0, PSX_AUDIO_SPU_BLOCK_SIZE);
 
-		fwrite(buffer, PSX_AUDIO_SPU_BLOCK_SIZE, 1, output);
+		fwrite(block, PSX_AUDIO_SPU_BLOCK_SIZE, 1, output);
 		block_count++;
 	}
 
@@ -226,16 +229,16 @@ void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output) {
 			decoder->audio_samples,
 			samples_length,
 			1,
-			buffer
+			block
 		);
 
 		if (block_count == loop_start_block)
-			buffer[1] |= PSX_AUDIO_SPU_LOOP_START;
+			block[1] |= PSX_AUDIO_SPU_LOOP_START;
 		if ((args->flags & FLAG_SPU_LOOP_END) && decoder->end_of_input)
-			buffer[1] |= PSX_AUDIO_SPU_LOOP_REPEAT;
+			block[1] |= PSX_AUDIO_SPU_LOOP_REPEAT;
 
 		retire_av_data(decoder, samples_length, 0);
-		fwrite(buffer, length, 1, output);
+		fwrite(block, length, 1, output);
 
 		time_t t = get_elapsed_time();
 
@@ -251,10 +254,10 @@ void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output) {
 
 	if (!(args->flags & FLAG_SPU_LOOP_END)) {
 		// Insert trailing looping block
-		memset(buffer, 0, PSX_AUDIO_SPU_BLOCK_SIZE);
-		buffer[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END;
+		memset(block, 0, PSX_AUDIO_SPU_BLOCK_SIZE);
+		block[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END;
 
-		fwrite(buffer, PSX_AUDIO_SPU_BLOCK_SIZE, 1, output);
+		fwrite(block, PSX_AUDIO_SPU_BLOCK_SIZE, 1, output);
 		block_count++;
 	}
 
@@ -279,8 +282,8 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) {
 	// NOTE: since the interleaved .vag format is not standardized, some tools
 	// (such as vgmstream) will not properly play files with interleave < 2048,
 	// alignment != 2048 or channels != 2.
-	int buffer_size = args->audio_interleave * args->audio_channels + args->alignment - 1;
-	buffer_size -= buffer_size % args->alignment;
+	int chunk_size = args->audio_interleave * args->audio_channels + args->alignment - 1;
+	chunk_size -= chunk_size % args->alignment;
 
 	int header_size = VAG_HEADER_SIZE + args->alignment - 1;
 	header_size -= header_size % args->alignment;
@@ -292,7 +295,7 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) {
 	psx_audio_encoder_channel_state_t *audio_state = malloc(audio_state_size);
 	memset(audio_state, 0, audio_state_size);
 
-	uint8_t *buffer = malloc(buffer_size);
+	uint8_t *chunk = malloc(chunk_size);
 	int chunk_count = 0;
 
 	for (; ensure_av_data(decoder, audio_samples_per_chunk * args->audio_channels, 0); chunk_count++) {
@@ -301,26 +304,26 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) {
 		if (samples_length > audio_samples_per_chunk)
 			samples_length = audio_samples_per_chunk;
 
-		memset(buffer, 0, buffer_size);
-		uint8_t *buffer_ptr = buffer;
+		memset(chunk, 0, chunk_size);
+		uint8_t *chunk_ptr = chunk;
 
 		// Insert leading silent block
 		if (chunk_count == 0 && !(args->flags & FLAG_SPU_NO_LEADING_DUMMY)) {
-			buffer_ptr += PSX_AUDIO_SPU_BLOCK_SIZE;
+			chunk_ptr += PSX_AUDIO_SPU_BLOCK_SIZE;
 			samples_length -= PSX_AUDIO_SPU_SAMPLES_PER_BLOCK;
 		}
 
-		for (int ch = 0; ch < args->audio_channels; ch++, buffer_ptr += args->audio_interleave) {
+		for (int ch = 0; ch < args->audio_channels; ch++, chunk_ptr += args->audio_interleave) {
 			int length = psx_audio_spu_encode(
 				audio_state + ch,
 				decoder->audio_samples + ch,
 				samples_length,
 				args->audio_channels,
-				buffer_ptr
+				chunk_ptr
 			);
 
 			if (length > 0) {
-				uint8_t *last_block = buffer_ptr + length - PSX_AUDIO_SPU_BLOCK_SIZE;
+				uint8_t *last_block = chunk_ptr + length - PSX_AUDIO_SPU_BLOCK_SIZE;
 
 				if (args->flags & FLAG_SPU_LOOP_END) {
 					last_block[1] = PSX_AUDIO_SPU_LOOP_REPEAT;
@@ -335,7 +338,7 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) {
 		}
 
 		retire_av_data(decoder, samples_length * args->audio_channels, 0);
-		fwrite(buffer, buffer_size, 1, output);
+		fwrite(chunk, chunk_size, 1, output);
 
 		time_t t = get_elapsed_time();
 
@@ -351,7 +354,7 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) {
 	}
 
 	free(audio_state);
-	free(buffer);
+	free(chunk);
 
 	if (args->format == FORMAT_VAGI) {
 		uint8_t *header = malloc(header_size);
@@ -421,28 +424,31 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) {
 	if (frames_needed < 2)
 		frames_needed = 2;
 
-	for (int j = 0; !decoder->end_of_input || encoder.state.frame_data_offset < encoder.state.frame_max_size; j++) {
+	int sector_count = 0;
+
+	for (; !decoder->end_of_input || encoder.state.frame_data_offset < encoder.state.frame_max_size; sector_count++) {
 		ensure_av_data(decoder, audio_samples_per_sector * args->audio_channels, frames_needed);
 
-		uint8_t buffer[PSX_CDROM_SECTOR_SIZE];
+		uint8_t sector[PSX_CDROM_SECTOR_SIZE];
 		bool is_video_sector;
 
 		if (args->flags & FLAG_STR_TRAILING_AUDIO)
-			is_video_sector = (j % interleave) < video_sectors_per_block;
+			is_video_sector = (sector_count % interleave) < video_sectors_per_block;
 		else
-			is_video_sector = (j % interleave) > 0;
+			is_video_sector = (sector_count % interleave) > 0;
 
 		if (is_video_sector) {
-			init_sector_buffer_video(args, (psx_cdrom_sector_mode2_t*)buffer, j);
+			init_sector_buffer_video(args, sector, sector_count);
 
 			int frames_used = encode_sector_str(
 				&encoder,
 				args->format,
 				args->str_video_id,
 				decoder->video_frames,
-				buffer
+				sector
 			);
 
+			psx_cdrom_calculate_checksums((psx_cdrom_sector_t *)sector, PSX_CDROM_SECTOR_TYPE_MODE2_FORM1);
 			retire_av_data(decoder, 0, frames_used);
 		} else {
 			int samples_length = decoder->audio_sample_count / args->audio_channels;
@@ -460,28 +466,17 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) {
 				&audio_state,
 				decoder->audio_samples,
 				samples_length,
-				buffer
+				sector_count,
+				sector
 			);
 
 			if (decoder->end_of_input)
-				psx_audio_xa_encode_finalize(xa_settings, buffer, length);
+				psx_audio_xa_encode_finalize(xa_settings, sector, length);
 
 			retire_av_data(decoder, samples_length * args->audio_channels, 0);
 		}
 
-		if (args->format == FORMAT_STRCD) {
-			int t = j + 75*2;
-
-			// Put the time in
-			buffer[0x00C] = ((t/75/60)%10)|(((t/75/60)/10)<<4);
-			buffer[0x00D] = (((t/75)%60)%10)|((((t/75)%60)/10)<<4);
-			buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4);
-		}
-
-		if (is_video_sector)
-			psx_cdrom_calculate_checksums((psx_cdrom_sector_t *)buffer, PSX_CDROM_SECTOR_TYPE_MODE2_FORM1);
-
-		fwrite(buffer + PSX_CDROM_SECTOR_SIZE - sector_size, sector_size, 1, output);
+		fwrite(sector, sector_size, 1, output);
 
 		time_t t = get_elapsed_time();
 
@@ -490,7 +485,7 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) {
 				stderr,
 				"\rFrame: %4d | LBA: %6d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx",
 				encoder.state.frame_index,
-				j,
+				sector_count,
 				(double)encoder.state.quant_scale_sum / (double)encoder.state.frame_index,
 				(double)(encoder.state.frame_index * args->str_fps_den) / (double)(t * args->str_fps_num)
 			);
diff --git a/psxavenc/mdec.c b/psxavenc/mdec.c
index 3587ce1..602e4cc 100644
--- a/psxavenc/mdec.c
+++ b/psxavenc/mdec.c
@@ -288,11 +288,11 @@ static void init_dct_data(mdec_encoder_state_t *state, bs_codec_t codec) {
 		uint32_t base_value = dc_c_huffman_tree[i].c_value;
 
 		int pos_offset = 1 << dc_bits;
-		int neg_offset = 1 - (1 << (dc_bits + 1));
+		int neg_offset = pos_offset * 2 - 1;
 
 		for (int j = 0; j < (1 << dc_bits); j++) {
 			int pos = (j + pos_offset) & 0x1FF;
-			int neg = (j + neg_offset) & 0x1FF;
+			int neg = (j - neg_offset) & 0x1FF;
 
 			state->dc_huffman_map[(INDEX_CR << 9) | pos] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (1 << dc_bits) | j);
 			state->dc_huffman_map[(INDEX_CR << 9) | neg] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (0 << dc_bits) | j);
@@ -306,11 +306,11 @@ static void init_dct_data(mdec_encoder_state_t *state, bs_codec_t codec) {
 		uint32_t base_value = dc_y_huffman_tree[i].c_value;
 
 		int pos_offset = 1 << dc_bits;
-		int neg_offset = 1 - (1 << (dc_bits + 1));
+		int neg_offset = pos_offset * 2 - 1;
 
 		for (int j = 0; j < (1 << dc_bits); j++) {
 			int pos = (j + pos_offset) & 0x1FF;
-			int neg = (j + neg_offset) & 0x1FF;
+			int neg = (j - neg_offset) & 0x1FF;
 
 			state->dc_huffman_map[(INDEX_Y << 9) | pos] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (1 << dc_bits) | j);
 			state->dc_huffman_map[(INDEX_Y << 9) | neg] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (0 << dc_bits) | j);
@@ -657,7 +657,7 @@ void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame) {
 	// Attempt encoding the frame at the maximum quality. If the result is too
 	// large, increase the quantization scale and try again.
 	// TODO: if a frame encoded at scale N is too large but the same frame
-	// encoded at scale N-1 leaves a significant amount of free space, attempt
+	// encoded at scale N+1 leaves a significant amount of free space, attempt
 	// compressing at scale N but optimizing coefficients away until it fits
 	// (like the old algorithm did)
 	for (

From 801d70e22e317e2e48423d0c44e4f08490e89680 Mon Sep 17 00:00:00 2001
From: spicyjpeg <thatspicyjpeg@gmail.com>
Date: Sat, 8 Mar 2025 01:10:42 +0100
Subject: [PATCH 8/8] Disable unimplemented formats, add missing const
 qualifiers

---
 README.md           |  40 ++++++++--------
 libpsxav/adpcm.c    |  53 ++++++++++++++++----
 libpsxav/libpsxav.h |   8 ++--
 psxavenc/args.c     |   4 +-
 psxavenc/filefmt.c  | 114 +++++++++++++++++++++++++++++++++++++++++++-
 psxavenc/main.c     |   4 ++
 psxavenc/mdec.c     |  15 +++---
 psxavenc/mdec.h     |   4 +-
 8 files changed, 196 insertions(+), 46 deletions(-)

diff --git a/README.md b/README.md
index 12f3218..007bd35 100644
--- a/README.md
+++ b/README.md
@@ -49,19 +49,18 @@ $ psxavenc -t vagi -f 44100 -c 2 -L -i 2048 in.wav out.vag
 
 The output format must be set using the `-t` option.
 
-| Format   | Audio codec          | Audio channels | Video codec   | Sector size |
-| :------- | :------------------- | :------------- | :------------ | :---------- |
-| `xa`     | XA-ADPCM             | 1 or 2         |               | 2336 bytes  |
-| `xacd`   | XA-ADPCM             | 1 or 2         |               | 2352 bytes  |
-| `spu`    | SPU-ADPCM            | 1              |               |             |
-| `vag`    | SPU-ADPCM            | 1              |               |             |
-| `spui`   | SPU-ADPCM            | Any            |               |             |
-| `vagi`   | SPU-ADPCM            | Any            |               |             |
-| `str`    | XA-ADPCM (optional)  | 1 or 2         | BS v2/v3/v3dc | 2336 bytes  |
-| `strcd`  | XA-ADPCM (optional)  | 1 or 2         | BS v2/v3/v3dc | 2352 bytes  |
-| `strspu` | SPU-ADPCM (optional) | Any            | BS v2/v3/v3dc | 2048 bytes  |
-| `strv`   |                      |                | BS v2/v3/v3dc | 2048 bytes  |
-| `sbs`    |                      |                | BS v2/v3/v3dc |             |
+| Format  | Audio codec          | Audio channels | Video codec   | Sector size |
+| :------ | :------------------- | :------------- | :------------ | :---------- |
+| `xa`    | XA-ADPCM             | 1 or 2         |               | 2336 bytes  |
+| `xacd`  | XA-ADPCM             | 1 or 2         |               | 2352 bytes  |
+| `spu`   | SPU-ADPCM            | 1              |               |             |
+| `vag`   | SPU-ADPCM            | 1              |               |             |
+| `spui`  | SPU-ADPCM            | Any            |               |             |
+| `vagi`  | SPU-ADPCM            | Any            |               |             |
+| `str`   | XA-ADPCM (optional)  | 1 or 2         | BS v2/v3/v3dc | 2336 bytes  |
+| `strcd` | XA-ADPCM (optional)  | 1 or 2         | BS v2/v3/v3dc | 2352 bytes  |
+| `strv`  |                      |                | BS v2/v3/v3dc | 2048 bytes  |
+| `sbs`   |                      |                | BS v2/v3/v3dc |             |
 
 Notes:
 
@@ -81,11 +80,12 @@ Notes:
   specified using the `-a` option (2048 bytes by default). Note that `vagi`
   files with more than 2 channels and/or alignment other than 2048 bytes are not
   standardized.
-- The `strspu` format encodes the input file's audio track as a series of custom
-  .str chunks (type ID `0x0001` by default) holding interleaved SPU-ADPCM data
-  in the same format as `spui`, rather than XA-ADPCM. As .str chunks do not
-  require custom XA subheaders, a file with standard 2048-byte sectors that does
-  not need any special handling will be generated.
+- ~~The `strspu` format encodes the input file's audio track as a series of~~
+  ~~custom .str chunks (type ID `0x0001` by default) holding interleaved~~
+  ~~SPU-ADPCM data in the same format as `spui`, rather than XA-ADPCM. As .str~~
+  ~~chunks do not require custom XA subheaders, a file with standard 2048-byte~~
+  ~~sectors that does not need any special handling will be generated.~~ *This*
+  *format has not yet been implemented.*
 - The `strv` format disables audio altogether and is equivalent to `strspu` on
   an input file with no audio track.
 - The `sbs` format (used in some System 573 games) consists of a series of
@@ -95,8 +95,8 @@ Notes:
 
 ## Supported video codecs
 
-All formats with a video track (`str`, `strcd`, `strspu`, `strv` and `sbs`) can
-use any of the codecs listed below. The codec can be set using the `-v` option.
+All formats with a video track (`str`, `strcd`, `strv` and `sbs`) can use any of
+the codecs listed below. The codec can be set using the `-v` option.
 
 | Codec          | Supported by          | Typ. decoder CPU usage |
 | :------------- | :-------------------- | :--------------------- |
diff --git a/libpsxav/adpcm.c b/libpsxav/adpcm.c
index 2d5dbc5..80e3413 100644
--- a/libpsxav/adpcm.c
+++ b/libpsxav/adpcm.c
@@ -36,7 +36,14 @@ freely, subject to the following restrictions:
 static const int16_t filter_k1[ADPCM_FILTER_COUNT] = {0, 60, 115, 98, 122};
 static const int16_t filter_k2[ADPCM_FILTER_COUNT] = {0, 0, -52, -55, -60};
 
-static int find_min_shift(const psx_audio_encoder_channel_state_t *state, int16_t *samples, int sample_limit, int pitch, int filter, int shift_range) {
+static int find_min_shift(
+	const psx_audio_encoder_channel_state_t *state,
+	const int16_t *samples,
+	int sample_limit,
+	int pitch,
+	int filter,
+	int shift_range
+) {
 	// Assumption made:
 	//
 	// There is value in shifting right one step further to allow the nibbles to clip.
@@ -71,7 +78,19 @@ static int find_min_shift(const psx_audio_encoder_channel_state_t *state, int16_
 	return min_shift;
 }
 
-static uint8_t attempt_to_encode(psx_audio_encoder_channel_state_t *outstate, const psx_audio_encoder_channel_state_t *instate, int16_t *samples, int sample_limit, int pitch, uint8_t *data, int data_shift, int data_pitch, int filter, int sample_shift, int shift_range) {
+static uint8_t attempt_to_encode(
+	psx_audio_encoder_channel_state_t *outstate,
+	const psx_audio_encoder_channel_state_t *instate,
+	const int16_t *samples,
+	int sample_limit,
+	int pitch,
+	uint8_t *data,
+	int data_shift,
+	int data_pitch,
+	int filter,
+	int sample_shift,
+	int shift_range
+) {
 	uint8_t sample_mask = 0xFFFF >> shift_range;
 	uint8_t nondata_mask = ~(sample_mask << data_shift);
 
@@ -120,8 +139,18 @@ static uint8_t attempt_to_encode(psx_audio_encoder_channel_state_t *outstate, co
 	return hdr;
 }
 
-static uint8_t encode(psx_audio_encoder_channel_state_t *state, int16_t *samples, int sample_limit, int pitch, uint8_t *data, int data_shift, int data_pitch, int filter_count, int shift_range) {
-    psx_audio_encoder_channel_state_t proposed;
+static uint8_t encode(
+	psx_audio_encoder_channel_state_t *state,
+	const int16_t *samples,
+	int sample_limit,
+	int pitch,
+	uint8_t *data,
+	int data_shift,
+	int data_pitch,
+	int filter_count,
+	int shift_range
+) {
+	psx_audio_encoder_channel_state_t proposed;
 	int64_t best_mse = ((int64_t)1<<(int64_t)50);
 	int best_filter = 0;
 	int best_sample_shift = 0;
@@ -161,7 +190,13 @@ static uint8_t encode(psx_audio_encoder_channel_state_t *state, int16_t *samples
 		best_filter, best_sample_shift, shift_range);
 }
 
-static void encode_block_xa(int16_t *audio_samples, int audio_samples_limit, uint8_t *data, psx_audio_xa_settings_t settings, psx_audio_encoder_state_t *state) {
+static void encode_block_xa(
+	const int16_t *audio_samples,
+	int audio_samples_limit,
+	uint8_t *data,
+	psx_audio_xa_settings_t settings,
+	psx_audio_encoder_state_t *state
+) {
 	if (settings.bits_per_sample == 4) {
 		if (settings.stereo) {
 			data[0]  = encode(&(state->left),  audio_samples,            audio_samples_limit,        2, data + 0x10, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
@@ -258,7 +293,7 @@ static void psx_audio_xa_encode_init_sector(psx_cdrom_sector_mode2_t *buffer, in
 int psx_audio_xa_encode(
 	psx_audio_xa_settings_t settings,
 	psx_audio_encoder_state_t *state,
-	int16_t* samples,
+	const int16_t *samples,
 	int sample_count,
 	int lba,
 	uint8_t *output
@@ -306,7 +341,7 @@ void psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *out
 
 int psx_audio_xa_encode_simple(
 	psx_audio_xa_settings_t settings,
-	int16_t* samples,
+	const int16_t *samples,
 	int sample_count,
 	int lba,
 	uint8_t *output
@@ -320,7 +355,7 @@ int psx_audio_xa_encode_simple(
 
 int psx_audio_spu_encode(
 	psx_audio_encoder_channel_state_t *state,
-	int16_t* samples,
+	const int16_t *samples,
 	int sample_count,
 	int pitch,
 	uint8_t *output
@@ -340,7 +375,7 @@ int psx_audio_spu_encode(
 	return buffer - output;
 }
 
-int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *output, int loop_start) {
+int psx_audio_spu_encode_simple(const int16_t *samples, int sample_count, uint8_t *output, int loop_start) {
 	psx_audio_encoder_channel_state_t state;
 	memset(&state, 0, sizeof(psx_audio_encoder_channel_state_t));
 	int length = psx_audio_spu_encode(&state, samples, sample_count, 1, output);
diff --git a/libpsxav/libpsxav.h b/libpsxav/libpsxav.h
index 0d9d171..67733dd 100644
--- a/libpsxav/libpsxav.h
+++ b/libpsxav/libpsxav.h
@@ -75,26 +75,26 @@ uint32_t psx_audio_xa_get_sector_interleave(psx_audio_xa_settings_t settings);
 int psx_audio_xa_encode(
 	psx_audio_xa_settings_t settings,
 	psx_audio_encoder_state_t *state,
-	int16_t* samples,
+	const int16_t *samples,
 	int sample_count,
 	int lba,
 	uint8_t *output
 );
 int psx_audio_xa_encode_simple(
 	psx_audio_xa_settings_t settings,
-	int16_t* samples,
+	const int16_t *samples,
 	int sample_count,
 	int lba,
 	uint8_t *output
 );
 int psx_audio_spu_encode(
 	psx_audio_encoder_channel_state_t *state,
-	int16_t* samples,
+	const int16_t *samples,
 	int sample_count,
 	int pitch,
 	uint8_t *output
 );
-int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *output, int loop_start);
+int psx_audio_spu_encode_simple(const int16_t *samples, int sample_count, uint8_t *output, int loop_start);
 void psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length);
 
 // cdrom.c
diff --git a/psxavenc/args.c b/psxavenc/args.c
index fb74a1f..93c3ef0 100644
--- a/psxavenc/args.c
+++ b/psxavenc/args.c
@@ -125,7 +125,7 @@ static const char *const general_options_help =
 	"                        vagi:   [A.] .vag SPU-ADPCM interleaved\n"
 	"                        str:    [AV] .str video + XA-ADPCM, 2336-byte sectors\n"
 	"                        strcd:  [AV] .str video + XA-ADPCM, 2352-byte sectors\n"
-	"                        strspu: [AV] .str video + SPU-ADPCM, 2048-byte sectors\n"
+	//"                        strspu: [AV] .str video + SPU-ADPCM, 2048-byte sectors\n"
 	"                        strv:   [.V] .str video, 2048-byte sectors\n"
 	"                        sbs:    [.V] .sbs video\n"
 	"    -R key=value,...  Pass custom options to libswresample (see FFmpeg docs)\n"
@@ -498,7 +498,7 @@ static const char *const general_usage =
 	"    psxavenc -t spu|vag   [spu-options]                             <in> <out.vag>\n"
 	"    psxavenc -t spui|vagi [spui-options]                            <in> <out.vag>\n"
 	"    psxavenc -t str|strcd [xa-options]   [bs-options] [str-options] <in> <out.str>\n"
-	"    psxavenc -t strspu    [spui-options] [bs-options] [str-options] <in> <out.str>\n"
+	//"    psxavenc -t strspu    [spui-options] [bs-options] [str-options] <in> <out.str>\n"
 	"    psxavenc -t strv                     [bs-options] [str-options] <in> <out.str>\n"
 	"    psxavenc -t sbs                      [bs-options] [sbs-options] <in> <out.sbs>\n"
 	"\n";
diff --git a/psxavenc/filefmt.c b/psxavenc/filefmt.c
index 1150364..cb446df 100644
--- a/psxavenc/filefmt.c
+++ b/psxavenc/filefmt.c
@@ -22,6 +22,7 @@ freely, subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
+#include <assert.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
@@ -432,7 +433,9 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) {
 		uint8_t sector[PSX_CDROM_SECTOR_SIZE];
 		bool is_video_sector;
 
-		if (args->flags & FLAG_STR_TRAILING_AUDIO)
+		if (audio_samples_per_sector == 0)
+			is_video_sector = true;
+		else if (args->flags & FLAG_STR_TRAILING_AUDIO)
 			is_video_sector = (sector_count % interleave) < video_sectors_per_block;
 		else
 			is_video_sector = (sector_count % interleave) > 0;
@@ -497,7 +500,114 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) {
 }
 
 void encode_file_strspu(const args_t *args, decoder_t *decoder, FILE *output) {
-	// TODO: implement
+	int interleave;
+	int audio_samples_per_sector;
+	int video_sectors_per_block;
+
+	if (decoder->state.audio_stream != NULL) {
+		assert(false); // TODO: implement
+
+		if (!(args->flags & FLAG_QUIET))
+			fprintf(
+				stderr,
+				"Interleave: %d/%d audio, %d/%d video\n",
+				interleave - video_sectors_per_block,
+				interleave,
+				video_sectors_per_block,
+				interleave
+			);
+	} else {
+		// 0/1 audio, 1/1 video
+		interleave = 1;
+		audio_samples_per_sector = 0;
+		video_sectors_per_block = 1;
+	}
+
+	mdec_encoder_t encoder;
+	init_mdec_encoder(&encoder, args->video_codec, args->video_width, args->video_height);
+
+	// e.g. 15fps = (150*7/8/15) = 8.75 blocks per frame
+	encoder.state.frame_block_base_overflow = (75 * args->str_cd_speed) * video_sectors_per_block * args->str_fps_den;
+	encoder.state.frame_block_overflow_den = interleave * args->str_fps_num;
+	double frame_size = (double)encoder.state.frame_block_base_overflow / (double)encoder.state.frame_block_overflow_den;
+
+	if (!(args->flags & FLAG_QUIET))
+		fprintf(stderr, "Frame size: %.2f sectors\n", frame_size);
+
+	encoder.state.frame_output = malloc(2016 * (int)ceil(frame_size));
+	encoder.state.frame_index = 0;
+	encoder.state.frame_data_offset = 0;
+	encoder.state.frame_max_size = 0;
+	encoder.state.frame_block_overflow_num = 0;
+	encoder.state.quant_scale_sum = 0;
+
+	// FIXME: this needs an extra frame to prevent A/V desync
+	int frames_needed = (int) ceil((double)video_sectors_per_block / frame_size);
+
+	if (frames_needed < 2)
+		frames_needed = 2;
+
+	int sector_count = 0;
+
+	for (; !decoder->end_of_input || encoder.state.frame_data_offset < encoder.state.frame_max_size; sector_count++) {
+		ensure_av_data(decoder, audio_samples_per_sector * args->audio_channels, frames_needed);
+
+		uint8_t sector[2048];
+		bool is_video_sector;
+
+		if (audio_samples_per_sector == 0)
+			is_video_sector = true;
+		else if (args->flags & FLAG_STR_TRAILING_AUDIO)
+			is_video_sector = (sector_count % interleave) < video_sectors_per_block;
+		else
+			is_video_sector = (sector_count % interleave) > 0;
+
+		if (is_video_sector) {
+			init_sector_buffer_video(args, sector, sector_count);
+
+			int frames_used = encode_sector_str(
+				&encoder,
+				args->format,
+				args->str_video_id,
+				decoder->video_frames,
+				sector
+			);
+
+			retire_av_data(decoder, 0, frames_used);
+		} else {
+			int samples_length = decoder->audio_sample_count / args->audio_channels;
+
+			if (samples_length > audio_samples_per_sector)
+				samples_length = audio_samples_per_sector;
+
+			// FIXME: this is an extremely hacky way to handle audio tracks
+			// shorter than the video track
+			if (!samples_length)
+				video_sectors_per_block++;
+
+			assert(false); // TODO: implement
+
+			retire_av_data(decoder, samples_length * args->audio_channels, 0);
+		}
+
+		fwrite(sector, 2048, 1, output);
+
+		time_t t = get_elapsed_time();
+
+		if (!(args->flags & FLAG_HIDE_PROGRESS) && t) {
+			fprintf(
+				stderr,
+				"\rFrame: %4d | LBA: %6d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx",
+				encoder.state.frame_index,
+				sector_count,
+				(double)encoder.state.quant_scale_sum / (double)encoder.state.frame_index,
+				(double)(encoder.state.frame_index * args->str_fps_den) / (double)(t * args->str_fps_num)
+			);
+		}
+	}
+
+	free(encoder.state.frame_output);
+	destroy_mdec_encoder(&encoder);
 }
 
 void encode_file_sbs(const args_t *args, decoder_t *decoder, FILE *output) {
diff --git a/psxavenc/main.c b/psxavenc/main.c
index 0f5e225..9e584c2 100644
--- a/psxavenc/main.c
+++ b/psxavenc/main.c
@@ -146,6 +146,10 @@ int main(int argc, const char **argv) {
 			break;
 
 		case FORMAT_STRSPU:
+			// TODO: implement and remove this check
+			fprintf(stderr, "This format is not currently supported\n");
+			break;
+
 		case FORMAT_STRV:
 			if (!(args.flags & FLAG_QUIET)) {
 				if (decoder.state.audio_stream)
diff --git a/psxavenc/mdec.c b/psxavenc/mdec.c
index 602e4cc..ba3d043 100644
--- a/psxavenc/mdec.c
+++ b/psxavenc/mdec.c
@@ -577,7 +577,7 @@ void destroy_mdec_encoder(mdec_encoder_t *encoder) {
 	}
 }
 
-void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame) {
+void encode_frame_bs(mdec_encoder_t *encoder, const uint8_t *video_frame) {
 	mdec_encoder_state_t *state = &(encoder->state);
 
 	assert(state->dct_context);
@@ -758,15 +758,12 @@ int encode_sector_str(
 	mdec_encoder_t *encoder,
 	format_t format,
 	uint16_t str_video_id,
-	uint8_t *video_frames,
+	const uint8_t *video_frames,
 	uint8_t *output
 ) {
 	mdec_encoder_state_t *state = &(encoder->state);
-	int last_frame_index = state->frame_index;
 	int frame_size = encoder->video_width * encoder->video_height * 2;
-
-	uint8_t header[32];
-	memset(header, 0, sizeof(header));
+	int frames_used = 0;
 
 	while (state->frame_data_offset >= state->frame_max_size) {
 		state->frame_index++;
@@ -779,8 +776,12 @@ int encode_sector_str(
 
 		encode_frame_bs(encoder, video_frames);
 		video_frames += frame_size;
+		frames_used++;
 	}
 
+	uint8_t header[32];
+	memset(header, 0, sizeof(header));
+
 	// STR version
 	header[0x000] = 0x60;
 	header[0x001] = 0x01;
@@ -831,5 +832,5 @@ int encode_sector_str(
 	memcpy(output + offset + 0x020, state->frame_output + state->frame_data_offset, 2016);
 
 	state->frame_data_offset += 2016;
-	return state->frame_index - last_frame_index;
+	return frames_used;
 }
diff --git a/psxavenc/mdec.h b/psxavenc/mdec.h
index 4b8e026..ed94f2e 100644
--- a/psxavenc/mdec.h
+++ b/psxavenc/mdec.h
@@ -64,11 +64,11 @@ typedef struct {
 
 bool init_mdec_encoder(mdec_encoder_t *encoder, bs_codec_t video_codec, int video_width, int video_height);
 void destroy_mdec_encoder(mdec_encoder_t *encoder);
-void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame);
+void encode_frame_bs(mdec_encoder_t *encoder, const uint8_t *video_frame);
 int encode_sector_str(
 	mdec_encoder_t *encoder,
 	format_t format,
 	uint16_t str_video_id,
-	uint8_t *video_frames,
+	const uint8_t *video_frames,
 	uint8_t *output
 );