diff --git a/libpsxav/adpcm.c b/libpsxav/adpcm.c
index 96c0ad0..03d298f 100644
--- a/libpsxav/adpcm.c
+++ b/libpsxav/adpcm.c
@@ -266,7 +266,7 @@ int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_stat
 	uint8_t init_sector = 1;
 
 	if (settings.stereo) { sample_count <<= 1; }
-	
+
 	for (i = 0, j = 0; i < sample_count || ((j % 18) != 0); i += sample_jump, j++) {
 		psx_cdrom_sector_mode2_t *sector_data = (psx_cdrom_sector_mode2_t*) (output + ((j/18) * xa_sector_size) - xa_offset);
 		uint8_t *block_data = sector_data->data + ((j%18) * 0x80);
@@ -282,7 +282,7 @@ int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_stat
 		memcpy(block_data + 12, block_data + 8, 4);
 
 		if ((j+1)%18 == 0) {
-			psx_cdrom_calculate_checksums((uint8_t*) sector_data, PSX_CDROM_SECTOR_TYPE_MODE2_FORM2);
+			psx_cdrom_calculate_checksums((psx_cdrom_sector_t *)sector_data, PSX_CDROM_SECTOR_TYPE_MODE2_FORM2);
 			init_sector = 1;
 		}
 	}
diff --git a/libpsxav/cdrom.c b/libpsxav/cdrom.c
index f6b0144..ac9de32 100644
--- a/libpsxav/cdrom.c
+++ b/libpsxav/cdrom.c
@@ -21,49 +21,88 @@ freely, subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
+#include <stdint.h>
 #include <string.h>
 #include "libpsxav.h"
 
-static uint32_t psx_cdrom_calculate_edc(uint8_t *sector, uint32_t offset, uint32_t size)
-{
+#define EDC_CRC32_POLYNOMIAL 0xD8018001
+
+static uint32_t edc_crc32(uint8_t *data, int length) {
 	uint32_t edc = 0;
-	for (int i = offset; i < offset+size; i++) {
-		edc ^= 0xFF&(uint32_t)sector[i];
-		for (int ibit = 0; ibit < 8; ibit++) {
-			edc = (edc>>1)^(0xD8018001*(edc&0x1));
-		}
+
+	for (int i = 0; i < length; i++) {
+		edc ^= 0xFF & (uint32_t)data[i];
+
+		for (int j = 0; j < 8; j++)
+			edc = (edc >> 1) ^ (EDC_CRC32_POLYNOMIAL * (edc & 0x1));
 	}
+
 	return edc;
 }
 
-void psx_cdrom_calculate_checksums(uint8_t *sector, psx_cdrom_sector_type_t type)
-{
-	switch (type) {
-		case PSX_CDROM_SECTOR_TYPE_MODE1: {
-			uint32_t edc = psx_cdrom_calculate_edc(sector, 0x0, 0x810);
-			sector[0x810] = (uint8_t)(edc);
-			sector[0x811] = (uint8_t)(edc >> 8);
-			sector[0x812] = (uint8_t)(edc >> 16);
-			sector[0x813] = (uint8_t)(edc >> 24);
+#define TO_BCD(x) ((x) + ((x) / 10) * 6)
 
+void psx_cdrom_init_sector(psx_cdrom_sector_t *sector, int lba, psx_cdrom_sector_type_t type) {
+	// Sync sequence
+	memset(sector->mode1.sync + 1, 0xff, 10);
+	sector->mode1.sync[0x0] = 0x00;
+	sector->mode1.sync[0xb] = 0x00;
+
+	// Timecode
+	lba += 150;
+	sector->mode1.header.minute = TO_BCD(lba / 4500);
+	sector->mode1.header.second = TO_BCD((lba / 75) % 60);
+	sector->mode1.header.sector = TO_BCD(lba % 75);
+
+	// Mode
+	if (type == PSX_CDROM_SECTOR_TYPE_MODE1) {
+		sector->mode1.header.mode = 0x01;
+	} else {
+		sector->mode2.header.mode = 0x02;
+
+		memset(sector->mode2.subheader, 0, sizeof(psx_cdrom_sector_xa_subheader_t));
+		sector->mode2.subheader[0].submode = PSX_CDROM_SECTOR_XA_SUBMODE_DATA;
+
+		if (type == PSX_CDROM_SECTOR_TYPE_MODE2_FORM2)
+			sector->mode2.subheader[0].submode |= PSX_CDROM_SECTOR_XA_SUBMODE_FORM2;
+
+		memcpy(sector->mode2.subheader + 1, sector->mode2.subheader, sizeof(psx_cdrom_sector_xa_subheader_t));
+	}
+}
+
+void psx_cdrom_calculate_checksums(psx_cdrom_sector_t *sector, psx_cdrom_sector_type_t type) {
+	uint8_t *data = (uint8_t *)sector;
+	uint32_t edc;
+
+	switch (type) {
+		case PSX_CDROM_SECTOR_TYPE_MODE1:
+			edc = edc_crc32(data, 0x810);
+
+			data[0x810] = (uint8_t)(edc);
+			data[0x811] = (uint8_t)(edc >> 8);
+			data[0x812] = (uint8_t)(edc >> 16);
+			data[0x813] = (uint8_t)(edc >> 24);
 			memset(sector + 0x814, 0, 8);
 			// TODO: ECC
-		} break;
-		case PSX_CDROM_SECTOR_TYPE_MODE2_FORM1: {
-			uint32_t edc = psx_cdrom_calculate_edc(sector, 0x10, 0x808);
-			sector[0x818] = (uint8_t)(edc);
-			sector[0x819] = (uint8_t)(edc >> 8);
-			sector[0x81A] = (uint8_t)(edc >> 16);
-			sector[0x81B] = (uint8_t)(edc >> 24);
+			break;
 
+		case PSX_CDROM_SECTOR_TYPE_MODE2_FORM1:
+			edc = edc_crc32(data + 0x10, 0x808);
+
+			data[0x818] = (uint8_t)(edc);
+			data[0x819] = (uint8_t)(edc >> 8);
+			data[0x81A] = (uint8_t)(edc >> 16);
+			data[0x81B] = (uint8_t)(edc >> 24);
 			// TODO: ECC
-		} break;
-		case PSX_CDROM_SECTOR_TYPE_MODE2_FORM2: {
-			uint32_t edc = psx_cdrom_calculate_edc(sector, 0x10, 0x91C);
-			sector[0x92C] = (uint8_t)(edc);
-			sector[0x92D] = (uint8_t)(edc >> 8);
-			sector[0x92E] = (uint8_t)(edc >> 16);
-			sector[0x92F] = (uint8_t)(edc >> 24);
-		} break;
+			break;
+
+		case PSX_CDROM_SECTOR_TYPE_MODE2_FORM2:
+			edc = edc_crc32(data + 0x10, 0x91C);
+
+			data[0x92C] = (uint8_t)(edc);
+			data[0x92D] = (uint8_t)(edc >> 8);
+			data[0x92E] = (uint8_t)(edc >> 16);
+			data[0x92F] = (uint8_t)(edc >> 24);
+			break;
 	}
-}
\ No newline at end of file
+}
diff --git a/libpsxav/libpsxav.h b/libpsxav/libpsxav.h
index e20138e..32eabaf 100644
--- a/libpsxav/libpsxav.h
+++ b/libpsxav/libpsxav.h
@@ -21,8 +21,7 @@ freely, subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef __LIBPSXAV_H__
-#define __LIBPSXAV_H__
+#pragma once
 
 #include <stdbool.h>
 #include <stdint.h>
@@ -106,6 +105,11 @@ typedef struct {
 	uint8_t data[0x918];
 } psx_cdrom_sector_mode2_t;
 
+typedef union {
+	psx_cdrom_sector_mode1_t mode1;
+	psx_cdrom_sector_mode2_t mode2;
+} psx_cdrom_sector_t;
+
 _Static_assert(sizeof(psx_cdrom_sector_mode1_t) == PSX_CDROM_SECTOR_SIZE, "Invalid Mode1 sector size");
 _Static_assert(sizeof(psx_cdrom_sector_mode2_t) == PSX_CDROM_SECTOR_SIZE, "Invalid Mode2 sector size");
 
@@ -137,6 +141,5 @@ typedef enum {
 	PSX_CDROM_SECTOR_TYPE_MODE2_FORM2
 } psx_cdrom_sector_type_t;
 
-void psx_cdrom_calculate_checksums(uint8_t *sector, psx_cdrom_sector_type_t type);
-
-#endif /* __LIBPSXAV_H__ */
+void psx_cdrom_init_sector(psx_cdrom_sector_t *sector, int lba, psx_cdrom_sector_type_t type);
+void psx_cdrom_calculate_checksums(psx_cdrom_sector_t *sector, psx_cdrom_sector_type_t type);
diff --git a/meson.build b/meson.build
index abd8a35..4061200 100644
--- a/meson.build
+++ b/meson.build
@@ -25,9 +25,8 @@ libpsxav_dep = declare_dependency(include_directories: include_directories('libp
 
 executable('psxavenc', [
 	'psxavenc/args.c',
-	'psxavenc/cdrom.c',
 	'psxavenc/decoding.c',
 	'psxavenc/filefmt.c',
-	'psxavenc/mdec.c',
-	'psxavenc/psxavenc.c'
+	'psxavenc/main.c',
+	'psxavenc/mdec.c'
 ], dependencies: [libm_dep, ffmpeg, libpsxav_dep], install: true)
diff --git a/psxavenc/args.c b/psxavenc/args.c
index 03d0695..8c92346 100644
--- a/psxavenc/args.c
+++ b/psxavenc/args.c
@@ -27,6 +27,7 @@ freely, subject to the following restrictions:
 #include <stdlib.h>
 #include <string.h>
 #include "args.h"
+#include "config.h"
 
 #define INVALID_PARAM -1
 
@@ -146,13 +147,6 @@ static const char *const format_names[NUM_FORMATS] = {
 };
 
 static void init_default_args(args_t *args) {
-	args->flags = 0;
-
-	args->input_file = NULL;
-	args->output_file = NULL;
-	args->swresample_options = NULL;
-	args->swscale_options = NULL;
-
 	if (
 		args->format == FORMAT_XA || args->format == FORMAT_XACD ||
 		args->format == FORMAT_STR || args->format == FORMAT_STRCD
@@ -694,6 +688,10 @@ bool parse_args(args_t *args, const char *const *options, int count) {
 		print_help(args->format);
 		return false;
 	}
+	if (args->flags & FLAG_PRINT_VERSION) {
+		printf("psxavenc " VERSION "\n");
+		return false;
+	}
 	if (args->format == FORMAT_INVALID || args->input_file == NULL || args->output_file == NULL) {
 		fprintf(
 			stderr,
diff --git a/psxavenc/cdrom.c b/psxavenc/cdrom.c
deleted file mode 100644
index d391e12..0000000
--- a/psxavenc/cdrom.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
-psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
-
-Copyright (c) 2019, 2020 Adrian "asie" Siekierka
-Copyright (c) 2019 Ben "GreaseMonkey" Russell
-
-This software is provided 'as-is', without any express or implied
-warranty. In no event will the authors be held liable for any damages
-arising from the use of this software.
-
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it
-freely, subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not
-   claim that you wrote the original software. If you use this software
-   in a product, an acknowledgment in the product documentation would be
-   appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be
-   misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#include "common.h"
-
-void init_sector_buffer_video(psx_cdrom_sector_mode2_t *buffer, settings_t *settings) {
-	if (settings->format == FORMAT_STR2CD) {
-		memset(buffer, 0, PSX_CDROM_SECTOR_SIZE);
-		memset(buffer->sync + 1, 0xFF, 10);
-		buffer->header.mode = 0x02;
-	} else if (settings->format == FORMAT_STR2V) {
-		memset(buffer->data, 0, 2048);
-	} else {
-		memset(buffer->subheader, 0, PSX_CDROM_SECTOR_SIZE - 16);
-	}
-
-	buffer->subheader[0].file = settings->file_number;
-	buffer->subheader[0].channel = settings->channel_number & PSX_CDROM_SECTOR_XA_CHANNEL_MASK;
-	buffer->subheader[0].submode =
-		PSX_CDROM_SECTOR_XA_SUBMODE_DATA
-		| PSX_CDROM_SECTOR_XA_SUBMODE_RT;
-	buffer->subheader[0].coding = 0;
-	memcpy(buffer->subheader + 1, buffer->subheader, sizeof(psx_cdrom_sector_xa_subheader_t));
-}
-
-void calculate_edc_data(uint8_t *buffer)
-{
-	uint32_t edc = 0;
-	for (int i = 0x010; i < 0x818; i++) {
-		edc ^= 0xFF&(uint32_t)buffer[i];
-		for (int ibit = 0; ibit < 8; ibit++) {
-			edc = (edc>>1)^(0xD8018001*(edc&0x1));
-		}
-	}
-	buffer[0x818] = (uint8_t)(edc);
-	buffer[0x819] = (uint8_t)(edc >> 8);
-	buffer[0x81A] = (uint8_t)(edc >> 16);
-	buffer[0x81B] = (uint8_t)(edc >> 24);
-
-	// TODO: ECC
-}
diff --git a/psxavenc/common.h b/psxavenc/common.h
deleted file mode 100644
index 6cf39f9..0000000
--- a/psxavenc/common.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
-psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
-
-Copyright (c) 2019, 2020 Adrian "asie" Siekierka
-Copyright (c) 2019 Ben "GreaseMonkey" Russell
-
-This software is provided 'as-is', without any express or implied
-warranty. In no event will the authors be held liable for any damages
-arising from the use of this software.
-
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it
-freely, subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not
-   claim that you wrote the original software. If you use this software
-   in a product, an acknowledgment in the product documentation would be
-   appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be
-   misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#include <assert.h>
-#include <getopt.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#include <time.h>
-#include <unistd.h>
-
-#include <libavutil/opt.h>
-#include <libavcodec/avcodec.h>
-#include <libavcodec/avdct.h>
-#include <libavformat/avformat.h>
-#include <libswscale/swscale.h>
-#include <libswresample/swresample.h>
-#include <libpsxav.h>
-
-typedef enum {
-	FORMAT_XA,
-	FORMAT_XACD,
-	FORMAT_SPU,
-	FORMAT_SPUI,
-	FORMAT_VAG,
-	FORMAT_VAGI,
-	FORMAT_STR2,
-	FORMAT_STR2CD,
-	FORMAT_STR2V,
-	FORMAT_SBS2,
-	NUM_FORMATS
-} psxavenc_format_t;
-
-typedef struct {
-	int frame_index;
-	int frame_data_offset;
-	int frame_max_size;
-	int frame_block_base_overflow;
-	int frame_block_overflow_num;
-	int frame_block_overflow_den;
-	uint16_t bits_value;
-	int bits_left;
-	uint8_t *frame_output;
-	int bytes_used;
-	int blocks_used;
-	int uncomp_hwords_used;
-	int quant_scale;
-	int quant_scale_sum;
-
-	uint32_t *huffman_encoding_map;
-	int16_t *coeff_clamp_map;
-	int16_t *dct_block_lists[6];
-	AVDCT *dct_context;
-} vid_encoder_state_t;
-
-typedef struct {
-	int video_frame_dst_size;
-	int audio_stream_index;
-	int video_stream_index;
-	AVFormatContext* format;
-	AVStream* audio_stream;
-	AVStream* video_stream;
-	AVCodecContext* audio_codec_context;
-	AVCodecContext* video_codec_context;
-	struct SwrContext* resampler;
-	struct SwsContext* scaler;
-	AVFrame* frame;
-
-	int sample_count_mul;
-
-	double video_next_pts;
-} av_decoder_state_t;
-
-typedef struct {
-	bool quiet;
-	bool show_progress;
-
-	int format; // FORMAT_*
-	int channels;
-	int cd_speed; // 1 or 2
-	int frequency; // 18900 or 37800 Hz
-	int bits_per_sample; // 4 or 8
-	int file_number; // 00-FF
-	int channel_number; // 00-1F
-	int interleave;
-	int alignment;
-	bool loop;
-
-	int video_width;
-	int video_height;
-	int video_fps_num; // FPS numerator
-	int video_fps_den; // FPS denominator
-	bool ignore_aspect_ratio;
-
-	char *swresample_options;
-	char *swscale_options;
-
-	int16_t *audio_samples;
-	int audio_sample_count;
-	uint8_t *video_frames;
-	int video_frame_count;
-
-	av_decoder_state_t decoder_state_av;
-	vid_encoder_state_t state_vid;
-	bool end_of_input;
-
-	time_t start_time;
-	time_t last_progress_update;
-} settings_t;
-
-// cdrom.c
-void init_sector_buffer_video(psx_cdrom_sector_mode2_t *buffer, settings_t *settings);
-void calculate_edc_data(uint8_t *buffer);
-
-// decoding.c
-bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bool use_video, bool audio_required, bool video_required);
-bool poll_av_data(settings_t *settings);
-bool ensure_av_data(settings_t *settings, int needed_audio_samples, int needed_video_frames);
-void retire_av_data(settings_t *settings, int retired_audio_samples, int retired_video_frames);
-void close_av_data(settings_t *settings);
-
-// filefmt.c
-void encode_file_spu(settings_t *settings, FILE *output);
-void encode_file_spu_interleaved(settings_t *settings, FILE *output);
-void encode_file_xa(settings_t *settings, FILE *output);
-void encode_file_str(settings_t *settings, FILE *output);
-void encode_file_sbs(settings_t *settings, FILE *output);
-
-// mdec.c
-bool init_encoder_state(settings_t *settings);
-void destroy_encoder_state(settings_t *settings);
-void encode_frame_bs(uint8_t *video_frame, settings_t *settings);
-void encode_sector_str(uint8_t *video_frames, uint8_t *output, settings_t *settings);
diff --git a/psxavenc/decoding.c b/psxavenc/decoding.c
index 54a9124..a29e90a 100644
--- a/psxavenc/decoding.c
+++ b/psxavenc/decoding.c
@@ -22,30 +22,57 @@ freely, subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#include "common.h"
-
-int decode_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, AVPacket *packet) {
-	int ret;
+#include <assert.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <libavutil/opt.h>
+#include <libavcodec/avcodec.h>
+#include <libavcodec/avdct.h>
+#include <libavformat/avformat.h>
+#include <libswresample/swresample.h>
+#include <libswscale/swscale.h>
+#include "args.h"
+#include "decoding.h"
 
+static int decode_frame(
+	AVCodecContext *codec,
+	AVFrame *frame,
+	int *frame_size,
+	AVPacket *packet
+) {
 	if (packet != NULL) {
-		ret = avcodec_send_packet(codec, packet);
-		if (ret != 0) {
+		if (avcodec_send_packet(codec, packet) != 0)
 			return 0;
-		}
 	}
 
-	ret = avcodec_receive_frame(codec, frame);
+	int ret = avcodec_receive_frame(codec, frame);
+
 	if (ret >= 0) {
 		*frame_size = ret;
 		return 1;
+	} else if (ret == AVERROR(EAGAIN)) {
+		return 1;
 	} else {
-		return ret == AVERROR(EAGAIN) ? 1 : 0;
+		return 0;
 	}
 }
 
-bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bool use_video, bool audio_required, bool video_required)
-{
-	av_decoder_state_t* av = &(settings->decoder_state_av);
+bool open_av_data(decoder_t *decoder, const args_t *args, int flags) {
+	decoder->audio_samples = NULL;
+	decoder->audio_sample_count = 0;
+	decoder->video_frames = NULL;
+	decoder->video_frame_count = 0;
+
+	decoder->video_width = args->video_width;
+	decoder->video_height = args->video_height;
+	decoder->video_fps_num = args->str_fps_num;
+	decoder->video_fps_den = args->str_fps_den;
+	decoder->end_of_input = false;
+
+	decoder_state_t *av = &(decoder->state);
+
 	av->video_next_pts = 0.0;
 	av->frame = NULL;
 	av->video_frame_dst_size = 0;
@@ -59,19 +86,17 @@ bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bo
 	av->resampler = NULL;
 	av->scaler = NULL;
 
-	if (settings->quiet) {
+	if (args->flags & FLAG_QUIET)
 		av_log_set_level(AV_LOG_QUIET);
-	}
 
 	av->format = avformat_alloc_context();
-	if (avformat_open_input(&(av->format), filename, NULL, NULL)) {
-		return false;
-	}
-	if (avformat_find_stream_info(av->format, NULL) < 0) {
-		return false;
-	}
 
-	if (use_audio) {
+	if (avformat_open_input(&(av->format), args->input_file, NULL, NULL))
+		return false;
+	if (avformat_find_stream_info(av->format, NULL) < 0)
+		return false;
+
+	if (flags & DECODER_USE_AUDIO) {
 		for (int i = 0; i < av->format->nb_streams; i++) {
 			if (av->format->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
 				if (av->audio_stream_index >= 0) {
@@ -81,13 +106,14 @@ bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bo
 				av->audio_stream_index = i;
 			}
 		}
-		if (audio_required && av->audio_stream_index == -1) {
+
+		if ((flags & DECODER_AUDIO_REQUIRED) && av->audio_stream_index == -1) {
 			fprintf(stderr, "Input file has no audio data\n");
 			return false;
 		}
 	}
 
-	if (use_video) {
+	if (flags & DECODER_USE_VIDEO) {
 		for (int i = 0; i < av->format->nb_streams; i++) {
 			if (av->format->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
 				if (av->video_stream_index >= 0) {
@@ -97,7 +123,8 @@ bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bo
 				av->video_stream_index = i;
 			}
 		}
-		if (video_required && av->video_stream_index == -1) {
+
+		if ((flags & DECODER_VIDEO_REQUIRED) && av->video_stream_index == -1) {
 			fprintf(stderr, "Input file has no video data\n");
 			return false;
 		}
@@ -109,34 +136,39 @@ bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bo
 	if (av->audio_stream != NULL) {
 		const AVCodec *codec = avcodec_find_decoder(av->audio_stream->codecpar->codec_id);
 		av->audio_codec_context = avcodec_alloc_context3(codec);
-		if (av->audio_codec_context == NULL) {
+
+		if (av->audio_codec_context == NULL)
 			return false;
-		}
-		if (avcodec_parameters_to_context(av->audio_codec_context, av->audio_stream->codecpar) < 0) {
+		if (avcodec_parameters_to_context(av->audio_codec_context, av->audio_stream->codecpar) < 0)
 			return false;
-		}
-		if (avcodec_open2(av->audio_codec_context, codec, NULL) < 0) {
+		if (avcodec_open2(av->audio_codec_context, codec, NULL) < 0)
 			return false;
-		}
 
 		AVChannelLayout layout;
-		layout.nb_channels = settings->channels;
-		if (settings->channels <= 2) {
+		layout.nb_channels = args->audio_channels;
+
+		if (args->audio_channels == 1) {
 			layout.order = AV_CHANNEL_ORDER_NATIVE;
-			layout.u.mask = (settings->channels == 2) ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO;
+			layout.u.mask = AV_CH_LAYOUT_MONO;
+		} else if (args->audio_channels == 2) {
+			layout.order = AV_CHANNEL_ORDER_NATIVE;
+			layout.u.mask = AV_CH_LAYOUT_STEREO;
 		} else {
 			layout.order = AV_CHANNEL_ORDER_UNSPEC;
 		}
-		if (!settings->quiet && settings->channels > av->audio_codec_context->ch_layout.nb_channels) {
-			fprintf(stderr, "Warning: input file has less than %d channels\n", settings->channels);
+
+		if (!(args->flags & FLAG_QUIET)) {
+			if (args->audio_channels > av->audio_codec_context->ch_layout.nb_channels)
+				fprintf(stderr, "Warning: input file has less than %d channels\n", args->audio_channels);
 		}
 
-		av->sample_count_mul = settings->channels;
+		av->sample_count_mul = args->audio_channels;
+
 		if (swr_alloc_set_opts2(
 			&av->resampler,
 			&layout,
 			AV_SAMPLE_FMT_S16,
-			settings->frequency,
+			args->audio_frequency,
 			&av->audio_codec_context->ch_layout,
 			av->audio_codec_context->sample_fmt,
 			av->audio_codec_context->sample_rate,
@@ -145,47 +177,43 @@ bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bo
 		) < 0) {
 			return false;
 		}
-		if (settings->swresample_options) {
-			if (av_opt_set_from_string(av->resampler, settings->swresample_options, NULL, "=", ":,") < 0) {
+		if (args->swresample_options) {
+			if (av_opt_set_from_string(av->resampler, args->swresample_options, NULL, "=", ":,") < 0)
 				return false;
-			}
 		}
-
-		if (swr_init(av->resampler) < 0) {
+		if (swr_init(av->resampler) < 0)
 			return false;
-		}
 	}
 
 	if (av->video_stream != NULL) {
 		const AVCodec *codec = avcodec_find_decoder(av->video_stream->codecpar->codec_id);
 		av->video_codec_context = avcodec_alloc_context3(codec);
-		if(av->video_codec_context == NULL) {
+
+		if (av->video_codec_context == NULL)
 			return false;
-		}
-		if (avcodec_parameters_to_context(av->video_codec_context, av->video_stream->codecpar) < 0) {
+		if (avcodec_parameters_to_context(av->video_codec_context, av->video_stream->codecpar) < 0)
 			return false;
-		}
-		if (avcodec_open2(av->video_codec_context, codec, NULL) < 0) {
+		if (avcodec_open2(av->video_codec_context, codec, NULL) < 0)
 			return false;
+
+		if (!(args->flags & FLAG_QUIET)) {
+			if (
+				decoder->video_width > av->video_codec_context->width ||
+				decoder->video_height > av->video_codec_context->height
+			)
+				fprintf(stderr, "Warning: input file has resolution lower than %dx%d\n", decoder->video_width, decoder->video_height);
 		}
 
-		if (!settings->quiet && (
-			settings->video_width > av->video_codec_context->width ||
-			settings->video_height > av->video_codec_context->height
-		)) {
-			fprintf(stderr, "Warning: input file has resolution lower than %dx%d\n",
-				settings->video_width, settings->video_height
-			);
-		}
-		if (!settings->ignore_aspect_ratio) {
+		if (!(args->flags & FLAG_BS_IGNORE_ASPECT)) {
 			// Reduce the provided size so that it matches the input file's
 			// aspect ratio.
 			double src_ratio = (double)av->video_codec_context->width / (double)av->video_codec_context->height;
-			double dst_ratio = (double)settings->video_width / (double)settings->video_height;
+			double dst_ratio = (double)decoder->video_width / (double)decoder->video_height;
+
 			if (src_ratio < dst_ratio) {
-				settings->video_width = (int)((double)settings->video_height * src_ratio + 15.0) & ~15;
+				decoder->video_width = (int)((double)decoder->video_height * src_ratio + 15.0) & ~15;
 			} else {
-				settings->video_height = (int)((double)settings->video_width / src_ratio + 15.0) & ~15;
+				decoder->video_height = (int)((double)decoder->video_width / src_ratio + 15.0) & ~15;
 			}
 		}
 
@@ -193,17 +221,16 @@ bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bo
 			av->video_codec_context->width,
 			av->video_codec_context->height,
 			av->video_codec_context->pix_fmt,
-			settings->video_width,
-			settings->video_height,
+			decoder->video_width,
+			decoder->video_height,
 			AV_PIX_FMT_NV21,
 			SWS_BICUBIC,
 			NULL,
 			NULL,
 			NULL
 		);
-		if (av->scaler == NULL) {
+		if (av->scaler == NULL)
 			return false;
-		}
 		if (sws_setColorspaceDetails(
 			av->scaler,
 			sws_getCoefficients(av->video_codec_context->colorspace),
@@ -213,189 +240,211 @@ bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bo
 			0,
 			1 << 16,
 			1 << 16
-		) < 0) {
+		) < 0)
 			return false;
-		}
-		if (settings->swscale_options) {
-			if (av_opt_set_from_string(av->scaler, settings->swscale_options, NULL, "=", ":,") < 0) {
+		if (args->swscale_options) {
+			if (av_opt_set_from_string(av->scaler, args->swscale_options, NULL, "=", ":,") < 0)
 				return false;
-			}
 		}
 
-		av->video_frame_dst_size = 3*settings->video_width*settings->video_height/2;
+		av->video_frame_dst_size = 3 * decoder->video_width * decoder->video_height / 2;
 	}
 
 	av->frame = av_frame_alloc();
-	if (av->frame == NULL) {
-		return false;
-	}
 
-	settings->audio_samples = NULL;
-	settings->audio_sample_count = 0;
-	settings->video_frames = NULL;
-	settings->video_frame_count = 0;
-	settings->end_of_input = false;
+	if (av->frame == NULL)
+		return false;
 
 	return true;
 }
 
-static void poll_av_packet_audio(settings_t *settings, AVPacket *packet)
-{
-	av_decoder_state_t* av = &(settings->decoder_state_av);
+static void poll_av_packet_audio(decoder_t *decoder, AVPacket *packet) {
+	decoder_state_t *av = &(decoder->state);
 
 	int frame_size, frame_sample_count;
 	uint8_t *buffer[1];
 
 	if (decode_frame(av->audio_codec_context, av->frame, &frame_size, packet)) {
 		size_t buffer_size = sizeof(int16_t) * av->sample_count_mul * swr_get_out_samples(av->resampler, av->frame->nb_samples);
+
 		buffer[0] = malloc(buffer_size);
 		memset(buffer[0], 0, buffer_size);
-		frame_sample_count = swr_convert(av->resampler, buffer, av->frame->nb_samples, (const uint8_t**)av->frame->data, av->frame->nb_samples);
-		settings->audio_samples = realloc(settings->audio_samples, (settings->audio_sample_count + ((frame_sample_count + 4032) * av->sample_count_mul)) * sizeof(int16_t));
-		memmove(&(settings->audio_samples[settings->audio_sample_count]), buffer[0], sizeof(int16_t) * frame_sample_count * av->sample_count_mul);
-		settings->audio_sample_count += frame_sample_count * av->sample_count_mul;
+
+		frame_sample_count = swr_convert(
+			av->resampler,
+			buffer,
+			av->frame->nb_samples,
+			(const uint8_t**)av->frame->data,
+			av->frame->nb_samples
+		);
+
+		decoder->audio_samples = realloc(
+			decoder->audio_samples,
+			(decoder->audio_sample_count + ((frame_sample_count + 4032) * av->sample_count_mul)) * sizeof(int16_t)
+		);
+		memmove(
+			&(decoder->audio_samples[decoder->audio_sample_count]),
+			buffer[0],
+			sizeof(int16_t) * frame_sample_count * av->sample_count_mul
+		);
+		decoder->audio_sample_count += frame_sample_count * av->sample_count_mul;
 		free(buffer[0]);
 	}
 }
 
-static void poll_av_packet_video(settings_t *settings, AVPacket *packet)
-{
-	av_decoder_state_t* av = &(settings->decoder_state_av);
+static void poll_av_packet_video(decoder_t *decoder, AVPacket *packet) {
+	decoder_state_t *av = &(decoder->state);
 
 	int frame_size;
-	double pts_step = ((double)1.0*(double)settings->video_fps_den)/(double)settings->video_fps_num;
+	double pts_step = (double)decoder->video_fps_den / (double)decoder->video_fps_num;
 
-	int plane_size = settings->video_width*settings->video_height;
+	int plane_size = decoder->video_width * decoder->video_height;
 	int dst_strides[2] = {
-		settings->video_width, settings->video_width
+		decoder->video_width, decoder->video_width
 	};
 
 	if (decode_frame(av->video_codec_context, av->frame, &frame_size, packet)) {
-		if (!av->frame->width || !av->frame->height || !av->frame->data[0]) {
+		if (!av->frame->width || !av->frame->height || !av->frame->data[0])
 			return;
-		}
 
 		// Some files seem to have timestamps starting from a negative value
 		// (but otherwise valid) for whatever reason.
-		double pts = (((double)av->frame->pts)*(double)av->video_stream->time_base.num)/av->video_stream->time_base.den;
-		//if (pts < 0.0) {
-			//return;
-		//}
-		if (settings->video_frame_count >= 1 && pts < av->video_next_pts) {
+		double pts =
+			((double)av->frame->pts * (double)av->video_stream->time_base.num)
+			/ av->video_stream->time_base.den;
+#if 0
+		if (pts < 0.0)
 			return;
-		}
-		if ((settings->video_frame_count) < 1) {
+#endif
+		if (decoder->video_frame_count >= 1 && pts < av->video_next_pts)
+			return;
+		if (decoder->video_frame_count < 1)
 			av->video_next_pts = pts;
-		} else {
+		else
 			av->video_next_pts += pts_step;
-		}
 
-		//fprintf(stderr, "%d %f %f %f\n", (settings->video_frame_count), pts, av->video_next_pts, pts_step);
+		//fprintf(stderr, "%d %f %f %f\n", decoder->video_frame_count, pts, av->video_next_pts, pts_step);
 
 		// Insert duplicate frames if the frame rate of the input stream is
 		// lower than the target frame rate.
 		int dupe_frames = (int) ceil((pts - av->video_next_pts) / pts_step);
 		if (dupe_frames < 0) dupe_frames = 0;
-		settings->video_frames = realloc(
-			settings->video_frames,
-			(settings->video_frame_count + dupe_frames + 1) * av->video_frame_dst_size
+		decoder->video_frames = realloc(
+			decoder->video_frames,
+			(decoder->video_frame_count + dupe_frames + 1) * av->video_frame_dst_size
 		);
 
 		for (; dupe_frames; dupe_frames--) {
 			memcpy(
-				(settings->video_frames) + av->video_frame_dst_size*(settings->video_frame_count),
-				(settings->video_frames) + av->video_frame_dst_size*(settings->video_frame_count-1),
+				(decoder->video_frames) + av->video_frame_dst_size * decoder->video_frame_count,
+				(decoder->video_frames) + av->video_frame_dst_size * (decoder->video_frame_count - 1),
 				av->video_frame_dst_size
 			);
-			settings->video_frame_count += 1;
+			decoder->video_frame_count += 1;
 			av->video_next_pts += pts_step;
 		}
 
-		uint8_t *dst_frame = (settings->video_frames) + av->video_frame_dst_size*(settings->video_frame_count);
+		uint8_t *dst_frame = decoder->video_frames + av->video_frame_dst_size * decoder->video_frame_count;
 		uint8_t *dst_pointers[2] = {
 			dst_frame, dst_frame + plane_size
 		};
-		sws_scale(av->scaler, (const uint8_t *const *) av->frame->data, av->frame->linesize, 0, av->frame->height, dst_pointers, dst_strides);
+		sws_scale(
+			av->scaler,
+			(const uint8_t *const *) av->frame->data,
+			av->frame->linesize,
+			0,
+			av->frame->height,
+			dst_pointers,
+			dst_strides
+		);
 
-		settings->video_frame_count += 1;
+		decoder->video_frame_count += 1;
 	}
 }
 
-bool poll_av_data(settings_t *settings)
-{
-	av_decoder_state_t* av = &(settings->decoder_state_av);
+bool poll_av_data(decoder_t *decoder) {
+	decoder_state_t *av = &(decoder->state);
+
+	if (decoder->end_of_input)
+		return false;
+
 	AVPacket packet;
 
-	if (settings->end_of_input) {
-		return false;
-	}
-
 	if (av_read_frame(av->format, &packet) >= 0) {
-		if (packet.stream_index == av->audio_stream_index) {
-			poll_av_packet_audio(settings, &packet);
-		} else if (packet.stream_index == av->video_stream_index) {
-			poll_av_packet_video(settings, &packet);
-		}
+		if (packet.stream_index == av->audio_stream_index)
+			poll_av_packet_audio(decoder, &packet);
+		else if (packet.stream_index == av->video_stream_index)
+			poll_av_packet_video(decoder, &packet);
+
 		av_packet_unref(&packet);
 		return true;
 	} else {
 		// out is always padded out with 4032 "0" samples, this makes calculations elsewhere easier
-		if (av->audio_stream) {
-			memset((settings->audio_samples) + (settings->audio_sample_count), 0, 4032 * av->sample_count_mul * sizeof(int16_t));
-		}
+		if (av->audio_stream)
+			memset(
+				decoder->audio_samples + decoder->audio_sample_count,
+				0,
+				4032 * av->sample_count_mul * sizeof(int16_t)
+			);
 
-		settings->end_of_input = true;
+		decoder->end_of_input = true;
 		return false;
 	}
 }
 
-bool ensure_av_data(settings_t *settings, int needed_audio_samples, int needed_video_frames)
-{
-	// HACK: in order to update settings->end_of_input as soon as all data has
+bool ensure_av_data(decoder_t *decoder, int needed_audio_samples, int needed_video_frames) {
+	// HACK: in order to update decoder->end_of_input as soon as all data has
 	// been read from the input file, this loop waits for more data than
 	// strictly needed.
-	//while (settings->audio_sample_count < needed_audio_samples || settings->video_frame_count < needed_video_frames) {
+#if 0
+	while (decoder->audio_sample_count < needed_audio_samples || decoder->video_frame_count < needed_video_frames) {
+#else
 	while (
-		(needed_audio_samples && settings->audio_sample_count <= needed_audio_samples) ||
-		(needed_video_frames && settings->video_frame_count <= needed_video_frames)
+		(needed_audio_samples && decoder->audio_sample_count <= needed_audio_samples) ||
+		(needed_video_frames && decoder->video_frame_count <= needed_video_frames)
 	) {
-		//fprintf(stderr, "ensure %d -> %d, %d -> %d\n", settings->audio_sample_count, needed_audio_samples, settings->video_frame_count, needed_video_frames);
-		if (!poll_av_data(settings)) {
+#endif
+		//fprintf(stderr, "ensure %d -> %d, %d -> %d\n", decoder->audio_sample_count, needed_audio_samples, decoder->video_frame_count, needed_video_frames);
+		if (!poll_av_data(decoder)) {
 			// Keep returning true even if the end of the input file has been
 			// reached, if the buffer is not yet completely empty.
-			return (settings->audio_sample_count || !needed_audio_samples)
-				&& (settings->video_frame_count || !needed_video_frames);
+			return
+				(decoder->audio_sample_count || !needed_audio_samples) &&
+				(decoder->video_frame_count || !needed_video_frames);
 		}
 	}
-	//fprintf(stderr, "ensure %d -> %d, %d -> %d\n", settings->audio_sample_count, needed_audio_samples, settings->video_frame_count, needed_video_frames);
+	//fprintf(stderr, "ensure %d -> %d, %d -> %d\n", decoder->audio_sample_count, needed_audio_samples, decoder->video_frame_count, needed_video_frames);
 
 	return true;
 }
 
-void retire_av_data(settings_t *settings, int retired_audio_samples, int retired_video_frames)
-{
-	av_decoder_state_t* av = &(settings->decoder_state_av);
-
-	//fprintf(stderr, "retire %d -> %d, %d -> %d\n", settings->audio_sample_count, retired_audio_samples, settings->video_frame_count, retired_video_frames);
-	assert(retired_audio_samples <= settings->audio_sample_count);
-	assert(retired_video_frames <= settings->video_frame_count);
+void retire_av_data(decoder_t *decoder, int retired_audio_samples, int retired_video_frames) {
+	//fprintf(stderr, "retire %d -> %d, %d -> %d\n", decoder->audio_sample_count, retired_audio_samples, decoder->video_frame_count, retired_video_frames);
+	assert(retired_audio_samples <= decoder->audio_sample_count);
+	assert(retired_video_frames <= decoder->video_frame_count);
 
 	int sample_size = sizeof(int16_t);
-	if (settings->audio_sample_count > retired_audio_samples) {
-		memmove(settings->audio_samples, settings->audio_samples + retired_audio_samples, (settings->audio_sample_count - retired_audio_samples)*sample_size);
-	}
-	settings->audio_sample_count -= retired_audio_samples;
+	int frame_size = decoder->state.video_frame_dst_size;
 
-	int frame_size = av->video_frame_dst_size;
-	if (settings->video_frame_count > retired_video_frames) {
-		memmove(settings->video_frames, settings->video_frames + retired_video_frames*frame_size, (settings->video_frame_count - retired_video_frames)*frame_size);
-	}
-	settings->video_frame_count -= retired_video_frames;
+	if (decoder->audio_sample_count > retired_audio_samples)
+		memmove(
+			decoder->audio_samples,
+			decoder->audio_samples + retired_audio_samples,
+			(decoder->audio_sample_count - retired_audio_samples) * sample_size
+		);
+	if (decoder->video_frame_count > retired_video_frames)
+		memmove(
+			decoder->video_frames,
+			decoder->video_frames + retired_video_frames * frame_size,
+			(decoder->video_frame_count - retired_video_frames) * frame_size
+		);
+
+	decoder->audio_sample_count -= retired_audio_samples;
+	decoder->video_frame_count -= retired_video_frames;
 }
 
-void close_av_data(settings_t *settings)
-{
-	av_decoder_state_t* av = &(settings->decoder_state_av);
+void close_av_data(decoder_t *decoder) {
+	decoder_state_t *av = &(decoder->state);
 
 	av_frame_free(&(av->frame));
 	swr_free(&(av->resampler));
@@ -404,12 +453,12 @@ void close_av_data(settings_t *settings)
 	avcodec_free_context(&(av->audio_codec_context));
 	avformat_free_context(av->format);
 
-	if(settings->audio_samples != NULL) {
-		free(settings->audio_samples);
-		settings->audio_samples = NULL;
+	if(decoder->audio_samples != NULL) {
+		free(decoder->audio_samples);
+		decoder->audio_samples = NULL;
 	}
-	if(settings->video_frames != NULL) {
-		free(settings->video_frames);
-		settings->video_frames = NULL;
+	if(decoder->video_frames != NULL) {
+		free(decoder->video_frames);
+		decoder->video_frames = NULL;
 	}
 }
diff --git a/psxavenc/decoding.h b/psxavenc/decoding.h
new file mode 100644
index 0000000..ccf0b65
--- /dev/null
+++ b/psxavenc/decoding.h
@@ -0,0 +1,80 @@
+/*
+psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
+
+Copyright (c) 2019, 2020 Adrian "asie" Siekierka
+Copyright (c) 2019 Ben "GreaseMonkey" Russell
+Copyright (c) 2023, 2025 spicyjpeg
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+   claim that you wrote the original software. If you use this software
+   in a product, an acknowledgment in the product documentation would be
+   appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#pragma once
+
+#include <stdbool.h>
+#include <libavutil/opt.h>
+#include <libavcodec/avcodec.h>
+#include <libavcodec/avdct.h>
+#include <libavformat/avformat.h>
+#include <libswresample/swresample.h>
+#include <libswscale/swscale.h>
+#include "args.h"
+
+typedef struct {
+	int video_frame_dst_size;
+	int audio_stream_index;
+	int video_stream_index;
+	AVFormatContext* format;
+	AVStream* audio_stream;
+	AVStream* video_stream;
+	AVCodecContext* audio_codec_context;
+	AVCodecContext* video_codec_context;
+	struct SwrContext* resampler;
+	struct SwsContext* scaler;
+	AVFrame* frame;
+
+	int sample_count_mul;
+
+	double video_next_pts;
+} decoder_state_t;
+
+typedef struct {
+	int16_t *audio_samples;
+	int audio_sample_count;
+	uint8_t *video_frames;
+	int video_frame_count;
+
+	int video_width;
+	int video_height;
+	int video_fps_num;
+	int video_fps_den;
+	bool end_of_input;
+
+	decoder_state_t state;
+} decoder_t;
+
+enum {
+	DECODER_USE_AUDIO = 1 << 0,
+	DECODER_USE_VIDEO = 1 << 1,
+	DECODER_AUDIO_REQUIRED = 1 << 2,
+	DECODER_VIDEO_REQUIRED = 1 << 3
+};
+
+bool open_av_data(decoder_t *decoder, const args_t *args, int flags);
+bool poll_av_data(decoder_t *decoder);
+bool ensure_av_data(decoder_t *decoder, int needed_audio_samples, int needed_video_frames);
+void retire_av_data(decoder_t *decoder, int retired_audio_samples, int retired_video_frames);
+void close_av_data(decoder_t *decoder);
diff --git a/psxavenc/filefmt.c b/psxavenc/filefmt.c
index 803ac2d..b00a29b 100644
--- a/psxavenc/filefmt.c
+++ b/psxavenc/filefmt.c
@@ -3,7 +3,7 @@ psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
 
 Copyright (c) 2019, 2020 Adrian "asie" Siekierka
 Copyright (c) 2019 Ben "GreaseMonkey" Russell
-Copyright (c) 2023 spicyjpeg
+Copyright (c) 2023, 2025 spicyjpeg
 
 This software is provided 'as-is', without any express or implied
 warranty. In no event will the authors be held liable for any damages
@@ -22,48 +22,77 @@ freely, subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#include "common.h"
-#include "libpsxav.h"
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <libpsxav.h>
+#include "args.h"
+#include "decoding.h"
+#include "mdec.h"
 
-static time_t get_elapsed_time(settings_t *settings) {
-	if (!settings->show_progress) {
-		return 0;
+static time_t start_time = 0;
+static time_t last_progress_update = 0;
+
+static time_t get_elapsed_time(void) {
+	time_t t;
+
+	if (start_time > 0) {
+		t = time(NULL) - start_time;
+	} else {
+		t = 0;
+		start_time = time(NULL);
 	}
-	time_t t = time(NULL) - settings->start_time;
-	if (t <= settings->last_progress_update) {
+
+	if (t <= last_progress_update)
 		return 0;
-	}
-	settings->last_progress_update = t;
+
+	last_progress_update = t;
 	return t;
 }
 
-static psx_audio_xa_settings_t settings_to_libpsxav_xa_audio(settings_t *settings) {
-	psx_audio_xa_settings_t new_settings;
-	new_settings.bits_per_sample = settings->bits_per_sample;
-	new_settings.frequency = settings->frequency;
-	new_settings.stereo = settings->channels == 2;
-	new_settings.file_number = settings->file_number;
-	new_settings.channel_number = settings->channel_number;
+static psx_audio_xa_settings_t args_to_libpsxav_xa_audio(const args_t *args) {
+	psx_audio_xa_settings_t settings;
 
-	switch (settings->format) {
-		case FORMAT_XA:
-		case FORMAT_STR2:
-			new_settings.format = PSX_AUDIO_XA_FORMAT_XA;
-			break;
-		default:
-			new_settings.format = PSX_AUDIO_XA_FORMAT_XACD;
-			break;
-	}
+	settings.bits_per_sample = args->audio_bit_depth;
+	settings.frequency = args->audio_frequency;
+	settings.stereo = (args->audio_channels == 2);
+	settings.file_number = args->audio_xa_file;
+	settings.channel_number = args->audio_xa_channel;
 
-	return new_settings;
+	if (args->format == FORMAT_XACD || args->format == FORMAT_STRCD)
+		settings.format = PSX_AUDIO_XA_FORMAT_XACD;
+	else
+		settings.format = PSX_AUDIO_XA_FORMAT_XA;
+
+	return settings;
 };
 
-void write_vag_header(int size_per_channel, uint8_t *header, settings_t *settings) {
+static void init_sector_buffer_video(const args_t *args, psx_cdrom_sector_mode2_t *sector, int lba) {
+	psx_cdrom_init_sector((psx_cdrom_sector_t *)sector, lba, PSX_CDROM_SECTOR_TYPE_MODE2_FORM1);
+
+	sector->subheader[0].file = args->audio_xa_file;
+	sector->subheader[0].channel = args->audio_xa_channel & PSX_CDROM_SECTOR_XA_CHANNEL_MASK;
+	sector->subheader[0].submode = PSX_CDROM_SECTOR_XA_SUBMODE_DATA | PSX_CDROM_SECTOR_XA_SUBMODE_RT;
+	sector->subheader[0].coding = 0;
+
+	memcpy(sector->subheader + 1, sector->subheader, sizeof(psx_cdrom_sector_xa_subheader_t));
+}
+
+#define VAG_HEADER_SIZE 0x30
+
+static void write_vag_header(const args_t *args, int size_per_channel, uint8_t *header) {
+	memset(header, 0, VAG_HEADER_SIZE);
+
 	// Magic
 	header[0x00] = 'V';
 	header[0x01] = 'A';
 	header[0x02] = 'G';
-	header[0x03] = settings->interleave ? 'i' : 'p';
+
+	if (args->format == FORMAT_VAGI)
+		header[0x03] = 'i';
+	else
+	 	header[0x03] = 'p';
 
 	// Version (big-endian)
 	header[0x04] = 0x00;
@@ -72,150 +101,228 @@ void write_vag_header(int size_per_channel, uint8_t *header, settings_t *setting
 	header[0x07] = 0x20;
 
 	// Interleave (little-endian)
-	header[0x08] = (uint8_t)settings->interleave;
-	header[0x09] = (uint8_t)(settings->interleave>>8);
-	header[0x0a] = (uint8_t)(settings->interleave>>16);
-	header[0x0b] = (uint8_t)(settings->interleave>>24);
+	if (args->format == FORMAT_VAGI) {
+		header[0x08] = (uint8_t)args->audio_interleave;
+		header[0x09] = (uint8_t)(args->audio_interleave >> 8);
+		header[0x0a] = (uint8_t)(args->audio_interleave >> 16);
+		header[0x0b] = (uint8_t)(args->audio_interleave >> 24);
+	}
 
 	// Length of data for each channel (big-endian)
-	header[0x0c] = (uint8_t)(size_per_channel>>24);
-	header[0x0d] = (uint8_t)(size_per_channel>>16);
-	header[0x0e] = (uint8_t)(size_per_channel>>8);
+	header[0x0c] = (uint8_t)(size_per_channel >> 24);
+	header[0x0d] = (uint8_t)(size_per_channel >> 16);
+	header[0x0e] = (uint8_t)(size_per_channel >> 8);
 	header[0x0f] = (uint8_t)size_per_channel;
 
 	// Sample rate (big-endian)
-	header[0x10] = (uint8_t)(settings->frequency>>24);
-	header[0x11] = (uint8_t)(settings->frequency>>16);
-	header[0x12] = (uint8_t)(settings->frequency>>8);
-	header[0x13] = (uint8_t)settings->frequency;
+	header[0x10] = (uint8_t)(args->audio_frequency >> 24);
+	header[0x11] = (uint8_t)(args->audio_frequency >> 16);
+	header[0x12] = (uint8_t)(args->audio_frequency >> 8);
+	header[0x13] = (uint8_t)args->audio_frequency;
 
 	// Number of channels (little-endian)
-	header[0x1e] = (uint8_t)settings->channels;
+	header[0x1e] = (uint8_t)args->audio_channels;
 	header[0x1f] = 0x00;
 
 	// Filename
-	//strncpy(header + 0x20, "psxavenc", 16);
-	memset(header + 0x20, 0, 16);
+	int name_offset = strlen(args->output_file);
+	while (
+		name_offset > 0 &&
+		args->output_file[name_offset - 1] != '/' &&
+		args->output_file[name_offset - 1] != '\\'
+	)
+		name_offset--;
+
+	strncpy((char*)(header + 0x20), &args->output_file[name_offset], 16);
 }
 
-void encode_file_spu(settings_t *settings, FILE *output) {
-	psx_audio_encoder_channel_state_t audio_state;	
+void encode_file_xa(args_t *args, decoder_t *decoder, FILE *output) {
+	psx_audio_xa_settings_t xa_settings = args_to_libpsxav_xa_audio(args);
+
+	int audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings);
+
+	psx_audio_encoder_state_t audio_state;
+	memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t));
+
+	for (int j = 0; ensure_av_data(decoder, audio_samples_per_sector * args->audio_channels, 0); j++) {
+		int samples_length = decoder->audio_sample_count / args->audio_channels;
+
+		if (samples_length > audio_samples_per_sector)
+			samples_length = audio_samples_per_sector;
+
+		uint8_t buffer[PSX_CDROM_SECTOR_SIZE];
+		int length = psx_audio_xa_encode(
+			xa_settings,
+			&audio_state,
+			decoder->audio_samples,
+			samples_length,
+			buffer
+		);
+
+		if (decoder->end_of_input)
+			psx_audio_xa_encode_finalize(xa_settings, buffer, length);
+
+		if (args->format == FORMAT_XACD) {
+			int t = j + 75*2;
+
+			// Put the time in
+			buffer[0x00C] = ((t/75/60)%10)|(((t/75/60)/10)<<4);
+			buffer[0x00D] = (((t/75)%60)%10)|((((t/75)%60)/10)<<4);
+			buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4);
+		}
+
+		retire_av_data(decoder, samples_length * args->audio_channels, 0);
+		fwrite(buffer, length, 1, output);
+
+		time_t t = get_elapsed_time();
+
+		if (!(args->flags & FLAG_HIDE_PROGRESS) && t) {
+			fprintf(
+				stderr,
+				"\rLBA: %6d | Encoding speed: %5.2fx",
+				j,
+				(double)(j * audio_samples_per_sector) / (double)(args->audio_frequency * t)
+			);
+		}
+	}
+}
+
+void encode_file_spu(args_t *args, decoder_t *decoder, FILE *output) {
+	psx_audio_encoder_channel_state_t audio_state;
+	memset(&audio_state, 0, sizeof(psx_audio_encoder_channel_state_t));
+
 	int audio_samples_per_block = psx_audio_spu_get_samples_per_block();
 	int block_size = psx_audio_spu_get_buffer_size_per_block();
-	uint8_t buffer[16];
 	int block_count;
 
-	memset(&audio_state, 0, sizeof(psx_audio_encoder_channel_state_t));
-
 	// The header must be written after the data as we don't yet know the
 	// number of audio samples.
-	if (settings->format == FORMAT_VAG) {
-		fseek(output, 48, SEEK_SET);
-	}
+	if (args->format == FORMAT_VAG)
+		fseek(output, VAG_HEADER_SIZE, SEEK_SET);
 
-	for (block_count = 0; ensure_av_data(settings, audio_samples_per_block, 0); block_count++) {
-		int samples_length = settings->audio_sample_count;
-		if (samples_length > audio_samples_per_block) samples_length = audio_samples_per_block;
+	for (block_count = 0; ensure_av_data(decoder, audio_samples_per_block, 0); block_count++) {
+		int samples_length = decoder->audio_sample_count;
 
-		int length = psx_audio_spu_encode(&audio_state, settings->audio_samples, samples_length, 1, buffer);
-		if (!block_count) {
-			// This flag is not required as the SPU already resets the loop
-			// address when starting playback of a sample.
-			//buffer[1] |= PSX_AUDIO_SPU_LOOP_START;
-		}
-		if (settings->end_of_input) {
-			buffer[1] |= settings->loop ? PSX_AUDIO_SPU_LOOP_REPEAT : PSX_AUDIO_SPU_LOOP_END;
+		if (samples_length > audio_samples_per_block)
+			samples_length = audio_samples_per_block;
+
+		uint8_t buffer[16];
+		int length = psx_audio_spu_encode(
+			&audio_state,
+			decoder->audio_samples,
+			samples_length,
+			1,
+			buffer
+		);
+
+		// TODO: implement proper loop flag support
+		if (false)
+			buffer[1] |= PSX_AUDIO_SPU_LOOP_START;
+		if (decoder->end_of_input) {
+			if (args->flags & FLAG_SPU_LOOP_END)
+				buffer[1] |= PSX_AUDIO_SPU_LOOP_REPEAT;
+			else
+			 	buffer[1] |= PSX_AUDIO_SPU_LOOP_END;
 		}
 
-		retire_av_data(settings, samples_length, 0);
+		retire_av_data(decoder, samples_length, 0);
 		fwrite(buffer, length, 1, output);
 
-		time_t t = get_elapsed_time(settings);
-		if (t) {
-			fprintf(stderr, "\rBlock: %6d | Encoding speed: %5.2fx",
+		time_t t = get_elapsed_time();
+
+		if (!(args->flags & FLAG_HIDE_PROGRESS) && t) {
+			fprintf(
+				stderr,
+				"\rBlock: %6d | Encoding speed: %5.2fx",
 				block_count,
-				(double)(block_count*audio_samples_per_block) / (double)(settings->frequency*t)
+				(double)(block_count * audio_samples_per_block) / (double)(args->audio_frequency * t)
 			);
 		}
 	}
 
-	int padding_size = (block_count*block_size) % settings->alignment;
-	if (padding_size) {
-		padding_size = settings->alignment - padding_size;
-		uint8_t *padding = malloc(padding_size);
-		memset(padding, 0, padding_size);
-		fwrite(padding, padding_size, 1, output);
-		free(padding);
-	}
+	int overflow = (block_count * block_size) % args->alignment;
+
+	if (overflow) {
+		for (int i = 0; i < (args->alignment - overflow); i++)
+			fputc(0, output);
+	}
+	if (args->format == FORMAT_VAG) {
+		uint8_t header[VAG_HEADER_SIZE];
+		write_vag_header(args, block_count * block_size, header);
 
-	if (settings->format == FORMAT_VAG) {
-		uint8_t header[48];
-		memset(header, 0, 48);
-		write_vag_header(block_count*block_size, header, settings);
 		fseek(output, 0, SEEK_SET);
-		fwrite(header, 48, 1, output);
+		fwrite(header, VAG_HEADER_SIZE, 1, output);
 	}
 }
 
-void encode_file_spu_interleaved(settings_t *settings, FILE *output) {
-	int audio_state_size = sizeof(psx_audio_encoder_channel_state_t) * settings->channels;
+void encode_file_spui(args_t *args, decoder_t *decoder, FILE *output) {
+	int audio_state_size = sizeof(psx_audio_encoder_channel_state_t) * args->audio_channels;
 
 	// NOTE: since the interleaved .vag format is not standardized, some tools
 	// (such as vgmstream) will not properly play files with interleave < 2048,
 	// alignment != 2048 or channels != 2.
-	int buffer_size = settings->interleave + settings->alignment - 1;
-	buffer_size -= buffer_size % settings->alignment;
-	int header_size = 48 + settings->alignment - 1;
-	header_size -= header_size % settings->alignment;
+	int buffer_size = args->audio_interleave + args->alignment - 1;
+	buffer_size -= buffer_size % args->alignment;
+
+	int header_size = VAG_HEADER_SIZE + args->alignment - 1;
+	header_size -= header_size % args->alignment;
+
+	int audio_samples_per_block = psx_audio_spu_get_samples_per_block();
+	int block_size = psx_audio_spu_get_buffer_size_per_block();
+	int audio_samples_per_chunk = args->audio_interleave / block_size * audio_samples_per_block;
+	int chunk_count;
+
+	if (args->format == FORMAT_VAGI)
+		fseek(output, header_size, SEEK_SET);
 
 	psx_audio_encoder_channel_state_t *audio_state = malloc(audio_state_size);
 	uint8_t *buffer = malloc(buffer_size);
-	int audio_samples_per_block = psx_audio_spu_get_samples_per_block();
-	int block_size = psx_audio_spu_get_buffer_size_per_block();
-	int audio_samples_per_chunk = settings->interleave / block_size * audio_samples_per_block;
-	int chunk_count;
-
 	memset(audio_state, 0, audio_state_size);
 
-	if (settings->format == FORMAT_VAGI) {
-		fseek(output, header_size, SEEK_SET);
-	}
-
-	for (chunk_count = 0; ensure_av_data(settings, audio_samples_per_chunk*settings->channels, 0); chunk_count++) {
-		int samples_length = settings->audio_sample_count / settings->channels;
+	for (chunk_count = 0; ensure_av_data(decoder, audio_samples_per_chunk * args->audio_channels, 0); chunk_count++) {
+		int samples_length = decoder->audio_sample_count / args->audio_channels;
 		if (samples_length > audio_samples_per_chunk) samples_length = audio_samples_per_chunk;
 
-		for (int ch = 0; ch < settings->channels; ch++) {
+		for (int ch = 0; ch < args->audio_channels; ch++) {
 			memset(buffer, 0, buffer_size);
-			int length = psx_audio_spu_encode(audio_state + ch, settings->audio_samples + ch, samples_length, settings->channels, buffer);
+			int length = psx_audio_spu_encode(
+				audio_state + ch,
+				decoder->audio_samples + ch,
+				samples_length,
+				args->audio_channels,
+				buffer
+			);
+
 			if (length) {
-				//buffer[1] |= PSX_AUDIO_SPU_LOOP_START;
-				if (settings->loop) {
+				// TODO: implement proper loop flag support
+				if (args->flags & FLAG_SPU_LOOP_END)
 					buffer[length - block_size + 1] |= PSX_AUDIO_SPU_LOOP_REPEAT;
-				}
-				if (settings->end_of_input) {
+				else if (decoder->end_of_input)
 					buffer[length - block_size + 1] |= PSX_AUDIO_SPU_LOOP_END;
-				}
 			}
 
 			fwrite(buffer, buffer_size, 1, output);
 
-			time_t t = get_elapsed_time(settings);
-			if (t) {
-				fprintf(stderr, "\rChunk: %6d | Encoding speed: %5.2fx",
+			time_t t = get_elapsed_time();
+
+			if (!(args->flags & FLAG_HIDE_PROGRESS) && t) {
+				fprintf(
+					stderr,
+					"\rChunk: %6d | Encoding speed: %5.2fx",
 					chunk_count,
-					(double)(chunk_count*audio_samples_per_chunk) / (double)(settings->frequency*t)
+					(double)(chunk_count * audio_samples_per_chunk) / (double)(args->audio_frequency * t)
 				);
 			}
 		}
 
-		retire_av_data(settings, samples_length*settings->channels, 0);
+		retire_av_data(decoder, samples_length * args->audio_channels, 0);
 	}
 
-	if (settings->format == FORMAT_VAGI) {
+	if (args->format == FORMAT_VAGI) {
 		uint8_t *header = malloc(header_size);
 		memset(header, 0, header_size);
-		write_vag_header(chunk_count*settings->interleave, header, settings);
+		write_vag_header(args, chunk_count * args->audio_interleave, header);
+
 		fseek(output, 0, SEEK_SET);
 		fwrite(header, header_size, 1, output);
 		free(header);
@@ -225,52 +332,14 @@ void encode_file_spu_interleaved(settings_t *settings, FILE *output) {
 	free(buffer);
 }
 
-void encode_file_xa(settings_t *settings, FILE *output) {
-	psx_audio_xa_settings_t xa_settings = settings_to_libpsxav_xa_audio(settings);
-	psx_audio_encoder_state_t audio_state;	
-	int audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings);
-	uint8_t buffer[PSX_CDROM_SECTOR_SIZE];
-
-	memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t));
-
-	for (int j = 0; ensure_av_data(settings, audio_samples_per_sector*settings->channels, 0); j++) {
-		int samples_length = settings->audio_sample_count / settings->channels;
-		if (samples_length > audio_samples_per_sector) samples_length = audio_samples_per_sector;
-		int length = psx_audio_xa_encode(xa_settings, &audio_state, settings->audio_samples, samples_length, buffer);
-		if (settings->end_of_input) {
-			psx_audio_xa_encode_finalize(xa_settings, buffer, length);
-		}
-
-		if (settings->format == FORMAT_XACD) {
-			int t = j + 75*2;
-
-			// Put the time in
-			buffer[0x00C] = ((t/75/60)%10)|(((t/75/60)/10)<<4);
-			buffer[0x00D] = (((t/75)%60)%10)|((((t/75)%60)/10)<<4);
-			buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4);
-		}
-
-		retire_av_data(settings, samples_length*settings->channels, 0);
-		fwrite(buffer, length, 1, output);
-
-		time_t t = get_elapsed_time(settings);
-		if (t) {
-			fprintf(stderr, "\rLBA: %6d | Encoding speed: %5.2fx",
-				j,
-				(double)(j*audio_samples_per_sector) / (double)(settings->frequency*t)
-			);
-		}
-	}
-}
-
-void encode_file_str(settings_t *settings, FILE *output) {
-	psx_audio_xa_settings_t xa_settings = settings_to_libpsxav_xa_audio(settings);
-	psx_audio_encoder_state_t audio_state;
+void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) {
+	psx_audio_xa_settings_t xa_settings = args_to_libpsxav_xa_audio(args);
 	int audio_samples_per_sector;
 	uint8_t buffer[PSX_CDROM_SECTOR_SIZE];
 
 	int offset, sector_size;
-	if (settings->format == FORMAT_STR2V) {
+
+	if (args->format == FORMAT_STRV) {
 		sector_size = 2048;
 		offset = 0x18;
 	} else {
@@ -280,16 +349,21 @@ void encode_file_str(settings_t *settings, FILE *output) {
 
 	int interleave;
 	int video_sectors_per_block;
-	if (settings->decoder_state_av.audio_stream) {
+	if (decoder->state.audio_stream) {
 		// 1/N audio, (N-1)/N video
 		audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings);
-		interleave = psx_audio_xa_get_sector_interleave(xa_settings) * settings->cd_speed;
+		interleave = psx_audio_xa_get_sector_interleave(xa_settings) * args->str_cd_speed;
 		video_sectors_per_block = interleave - 1;
 
-		if (!settings->quiet) {
-			fprintf(stderr, "Interleave: %d/%d audio, %d/%d video\n",
-				interleave - video_sectors_per_block, interleave, video_sectors_per_block, interleave);
-		}
+		if (!(args->flags & FLAG_QUIET))
+			fprintf(
+				stderr,
+				"Interleave: %d/%d audio, %d/%d video\n",
+				interleave - video_sectors_per_block,
+				interleave,
+				video_sectors_per_block,
+				interleave
+			);
 	} else {
 		// 0/1 audio, 1/1 video
 		audio_samples_per_sector = 0;
@@ -297,54 +371,65 @@ void encode_file_str(settings_t *settings, FILE *output) {
 		video_sectors_per_block = 1;
 	}
 
+	psx_audio_encoder_state_t audio_state;
 	memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t));
 
-	// e.g. 15fps = (150*7/8/15) = 8.75 blocks per frame
-	settings->state_vid.frame_block_base_overflow = (75*settings->cd_speed) * video_sectors_per_block * settings->video_fps_den;
-	settings->state_vid.frame_block_overflow_den = interleave * settings->video_fps_num;
-	double frame_size = (double)settings->state_vid.frame_block_base_overflow / (double)settings->state_vid.frame_block_overflow_den;
-	if (!settings->quiet) {
-		fprintf(stderr, "Frame size: %.2f sectors\n", frame_size);
-	}
+	mdec_encoder_t encoder;
+	init_mdec_encoder(&encoder, args->video_width, args->video_height);
 
-	init_encoder_state(settings);
-	settings->state_vid.frame_output = malloc(2016 * (int)ceil(frame_size));
-	settings->state_vid.frame_index = 0;
-	settings->state_vid.frame_data_offset = 0;
-	settings->state_vid.frame_max_size = 0;
-	settings->state_vid.frame_block_overflow_num = 0;
-	settings->state_vid.quant_scale_sum = 0;
+	// e.g. 15fps = (150*7/8/15) = 8.75 blocks per frame
+	encoder.state.frame_block_base_overflow = (75 * args->str_cd_speed) * video_sectors_per_block * args->str_fps_den;
+	encoder.state.frame_block_overflow_den = interleave * args->str_fps_num;
+	double frame_size = (double)encoder.state.frame_block_base_overflow / (double)encoder.state.frame_block_overflow_den;
+
+	if (!(args->flags & FLAG_QUIET))
+		fprintf(stderr, "Frame size: %.2f sectors\n", frame_size);
+
+	encoder.state.frame_output = malloc(2016 * (int)ceil(frame_size));
+	encoder.state.frame_index = 0;
+	encoder.state.frame_data_offset = 0;
+	encoder.state.frame_max_size = 0;
+	encoder.state.frame_block_overflow_num = 0;
+	encoder.state.quant_scale_sum = 0;
 
 	// FIXME: this needs an extra frame to prevent A/V desync
 	int frames_needed = (int) ceil((double)video_sectors_per_block / frame_size);
 	if (frames_needed < 2) frames_needed = 2;
 
-	for (int j = 0; !settings->end_of_input || settings->state_vid.frame_data_offset < settings->state_vid.frame_max_size; j++) {
-		ensure_av_data(settings, audio_samples_per_sector*settings->channels, frames_needed);
+	for (int j = 0; !decoder->end_of_input || encoder.state.frame_data_offset < encoder.state.frame_max_size; j++) {
+		ensure_av_data(decoder, audio_samples_per_sector * args->audio_channels, frames_needed);
 
 		if ((j%interleave) < video_sectors_per_block) {
 			// Video sector
-			init_sector_buffer_video((psx_cdrom_sector_mode2_t*) buffer, settings);
-			encode_sector_str(settings->video_frames, buffer, settings);
+			init_sector_buffer_video(args, (psx_cdrom_sector_mode2_t*) buffer, j);
+
+			int frames_used = encode_sector_str(&encoder, decoder->video_frames, buffer);
+			retire_av_data(decoder, 0, frames_used);
 		} else {
 			// Audio sector
-			int samples_length = settings->audio_sample_count / settings->channels;
+			int samples_length = decoder->audio_sample_count / args->audio_channels;
 			if (samples_length > audio_samples_per_sector) samples_length = audio_samples_per_sector;
 
 			// FIXME: this is an extremely hacky way to handle audio tracks
 			// shorter than the video track
-			if (!samples_length) {
+			if (!samples_length)
 				video_sectors_per_block++;
-			}
 
-			int length = psx_audio_xa_encode(xa_settings, &audio_state, settings->audio_samples, samples_length, buffer);
-			if (settings->end_of_input) {
+			int length = psx_audio_xa_encode(
+				xa_settings,
+				&audio_state,
+				decoder->audio_samples,
+				samples_length,
+				buffer
+			);
+
+			if (decoder->end_of_input)
 				psx_audio_xa_encode_finalize(xa_settings, buffer, length);
-			}
-			retire_av_data(settings, samples_length*settings->channels, 0);
+
+			retire_av_data(decoder, samples_length * args->audio_channels, 0);
 		}
 
-		if (settings->format == FORMAT_STR2CD) {
+		if (args->format == FORMAT_STRCD) {
 			int t = j + 75*2;
 
 			// Put the time in
@@ -353,48 +438,57 @@ void encode_file_str(settings_t *settings, FILE *output) {
 			buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4);
 		}
 
-		if((j%interleave) < video_sectors_per_block) {
-			calculate_edc_data(buffer);
-		}
+		if((j%interleave) < video_sectors_per_block)
+			psx_cdrom_calculate_checksums((psx_cdrom_sector_t *)buffer, PSX_CDROM_SECTOR_TYPE_MODE2_FORM1);
 
 		fwrite(buffer + offset, sector_size, 1, output);
 
-		time_t t = get_elapsed_time(settings);
-		if (t) {
-			fprintf(stderr, "\rFrame: %4d | LBA: %6d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx",
-				settings->state_vid.frame_index,
+		time_t t = get_elapsed_time();
+
+		if (!(args->flags & FLAG_HIDE_PROGRESS) && t) {
+			fprintf(
+				stderr,
+				"\rFrame: %4d | LBA: %6d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx",
+				encoder.state.frame_index,
 				j,
-				(double)settings->state_vid.quant_scale_sum / (double)settings->state_vid.frame_index,
-				(double)(settings->state_vid.frame_index*settings->video_fps_den) / (double)(t*settings->video_fps_num)
+				(double)encoder.state.quant_scale_sum / (double)encoder.state.frame_index,
+				(double)(encoder.state.frame_index * args->str_fps_den) / (double)(t * args->str_fps_num)
 			);
 		}
 	}
 
-	free(settings->state_vid.frame_output);
-	destroy_encoder_state(settings);
+	free(encoder.state.frame_output);
+	destroy_mdec_encoder(&encoder);
 }
 
-void encode_file_sbs(settings_t *settings, FILE *output) {
-	init_encoder_state(settings);
-	settings->state_vid.frame_output = malloc(settings->alignment);
-	settings->state_vid.frame_data_offset = 0;
-	settings->state_vid.frame_max_size = settings->alignment;
-	settings->state_vid.quant_scale_sum = 0;
+void encode_file_sbs(args_t *args, decoder_t *decoder, FILE *output) {
+	mdec_encoder_t encoder;
+	init_mdec_encoder(&encoder, args->video_width, args->video_height);
 
-	for (int j = 0; ensure_av_data(settings, 0, 1); j++) {
-		encode_frame_bs(settings->video_frames, settings);
-		fwrite(settings->state_vid.frame_output, settings->alignment, 1, output);
+	encoder.state.frame_output = malloc(args->alignment);
+	encoder.state.frame_data_offset = 0;
+	encoder.state.frame_max_size = args->alignment;
+	encoder.state.quant_scale_sum = 0;
 
-		time_t t = get_elapsed_time(settings);
-		if (t) {
-			fprintf(stderr, "\rFrame: %4d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx",
+	for (int j = 0; ensure_av_data(decoder, 0, 1); j++) {
+		encode_frame_bs(&encoder, decoder->video_frames);
+
+		retire_av_data(decoder, 0, 1);
+		fwrite(encoder.state.frame_output, args->alignment, 1, output);
+
+		time_t t = get_elapsed_time();
+
+		if (!(args->flags & FLAG_HIDE_PROGRESS) && t) {
+			fprintf(
+				stderr,
+				"\rFrame: %4d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx",
 				j,
-				(double)settings->state_vid.quant_scale_sum / (double)j,
-				(double)(j*settings->video_fps_den) / (double)(t*settings->video_fps_num)
+				(double)encoder.state.quant_scale_sum / (double)j,
+				(double)(j * args->str_fps_den) / (double)(t * args->str_fps_num)
 			);
 		}
 	}
 
-	free(settings->state_vid.frame_output);
-	destroy_encoder_state(settings);
+	free(encoder.state.frame_output);
+	destroy_mdec_encoder(&encoder);
 }
diff --git a/psxavenc/filefmt.h b/psxavenc/filefmt.h
new file mode 100644
index 0000000..5f8eb38
--- /dev/null
+++ b/psxavenc/filefmt.h
@@ -0,0 +1,35 @@
+/*
+psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
+
+Copyright (c) 2019, 2020 Adrian "asie" Siekierka
+Copyright (c) 2019 Ben "GreaseMonkey" Russell
+Copyright (c) 2023, 2025 spicyjpeg
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+   claim that you wrote the original software. If you use this software
+   in a product, an acknowledgment in the product documentation would be
+   appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#pragma once
+
+#include <stdio.h>
+#include "args.h"
+#include "decoding.h"
+
+void encode_file_xa(const args_t *args, decoder_t *decoder, FILE *output);
+void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output);
+void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output);
+void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output);
+void encode_file_sbs(const args_t *args, decoder_t *decoder, FILE *output);
diff --git a/psxavenc/main.c b/psxavenc/main.c
new file mode 100644
index 0000000..78c0935
--- /dev/null
+++ b/psxavenc/main.c
@@ -0,0 +1,174 @@
+/*
+psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
+
+Copyright (c) 2019, 2020 Adrian "asie" Siekierka
+Copyright (c) 2019 Ben "GreaseMonkey" Russell
+Copyright (c) 2023 spicyjpeg
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+   claim that you wrote the original software. If you use this software
+   in a product, an acknowledgment in the product documentation would be
+   appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include <stdint.h>
+#include <stdio.h>
+#include "args.h"
+#include "decoding.h"
+#include "filefmt.h"
+
+static const char *const bs_codec_names[NUM_BS_CODECS] = {
+	"BS v2",
+	"BS v3",
+	"BS v3 (with DC wrapping)"
+};
+
+static const uint8_t decoder_flags[NUM_FORMATS] = {
+	DECODER_USE_AUDIO | DECODER_AUDIO_REQUIRED, // xa
+	DECODER_USE_AUDIO | DECODER_AUDIO_REQUIRED, // xacd
+	DECODER_USE_AUDIO | DECODER_AUDIO_REQUIRED, // spu
+	DECODER_USE_AUDIO | DECODER_AUDIO_REQUIRED, // vag
+	DECODER_USE_AUDIO | DECODER_AUDIO_REQUIRED, // spui
+	DECODER_USE_AUDIO | DECODER_AUDIO_REQUIRED, // vagi
+	DECODER_USE_AUDIO | DECODER_USE_VIDEO | DECODER_VIDEO_REQUIRED, // str
+	DECODER_USE_AUDIO | DECODER_USE_VIDEO | DECODER_VIDEO_REQUIRED, // strcd
+	DECODER_USE_AUDIO | DECODER_USE_VIDEO | DECODER_VIDEO_REQUIRED, // strspu
+	DECODER_USE_VIDEO | DECODER_VIDEO_REQUIRED, // strv
+	DECODER_USE_VIDEO | DECODER_VIDEO_REQUIRED // sbs
+};
+
+int main(int argc, const char **argv) {
+	args_t args;
+	decoder_t decoder;
+	FILE *output;
+
+	args.flags = 0;
+
+	args.format = FORMAT_INVALID;
+	args.input_file = NULL;
+	args.output_file = NULL;
+	args.swresample_options = NULL;
+	args.swscale_options = NULL;
+
+	if (!parse_args(&args, argv + 1, argc - 1))
+		return 1;
+	if (!open_av_data(&decoder, &args, decoder_flags[args.format])) {
+		fprintf(stderr, "Failed to open input file: %s\n", args.input_file);
+		return 1;
+	}
+
+	output = fopen(args.output_file, "wb");
+
+	if (output == NULL) {
+		fprintf(stderr, "Failed to open output file: %s\n", args.output_file);
+		return 1;
+	}
+
+	switch (args.format) {
+		case FORMAT_XA:
+		case FORMAT_XACD:
+			if (!(args.flags & FLAG_QUIET))
+				fprintf(
+					stderr,
+					"Audio format: XA-ADPCM, %d Hz %d-bit %s, F=%d C=%d\n",
+					args.audio_frequency,
+					args.audio_bit_depth,
+					(args.audio_channels == 2) ? "stereo" : "mono",
+					args.audio_xa_file,
+					args.audio_xa_channel
+				);
+
+			encode_file_xa(&args, &decoder, output);
+			break;
+
+		case FORMAT_SPU:
+		case FORMAT_VAG:
+			if (!(args.flags & FLAG_QUIET))
+				fprintf(
+					stderr,
+					"Audio format: SPU-ADPCM, %d Hz mono\n",
+					args.audio_frequency
+				);
+
+			encode_file_spu(&args, &decoder, output);
+			break;
+
+		case FORMAT_SPUI:
+		case FORMAT_VAGI:
+			if (!(args.flags & FLAG_QUIET))
+				fprintf(
+					stderr,
+					"Audio format: SPU-ADPCM, %d Hz %d channels, interleave=%d\n",
+					args.audio_frequency,
+					args.audio_channels,
+					args.audio_interleave
+				);
+
+			encode_file_spui(&args, &decoder, output);
+			break;
+
+		case FORMAT_STR:
+		case FORMAT_STRCD:
+		case FORMAT_STRSPU:
+		case FORMAT_STRV:
+			if (!(args.flags & FLAG_QUIET)) {
+				if (decoder.state.audio_stream)
+					fprintf(
+						stderr,
+						"Audio format: XA-ADPCM, %d Hz %d-bit %s, F=%d C=%d\n",
+						args.audio_frequency,
+						args.audio_bit_depth,
+						(args.audio_channels == 2) ? "stereo" : "mono",
+						args.audio_xa_file,
+						args.audio_xa_channel
+					);
+
+				fprintf(
+					stderr,
+					"Video format: %s, %dx%d, %.2f fps\n",
+					bs_codec_names[args.video_codec],
+					args.video_width,
+					args.video_height,
+					(double)args.str_fps_num / (double)args.str_fps_den
+				);
+			}
+
+			encode_file_str(&args, &decoder, output);
+			break;
+
+		case FORMAT_SBS:
+			if (!(args.flags & FLAG_QUIET))
+				fprintf(
+					stderr,
+					"Video format: %s, %dx%d, %.2f fps\n",
+					bs_codec_names[args.video_codec],
+					args.video_width,
+					args.video_height,
+					(double)args.str_fps_num / (double)args.str_fps_den
+				);
+
+			encode_file_sbs(&args, &decoder, output);
+			break;
+
+		default:
+			;
+	}
+
+	if (!(args.flags & FLAG_HIDE_PROGRESS))
+		fprintf(stderr, "\nDone.\n");
+
+	fclose(output);
+	close_av_data(&decoder);
+	return 0;
+}
diff --git a/psxavenc/mdec.c b/psxavenc/mdec.c
index 6abffea..095bb33 100644
--- a/psxavenc/mdec.c
+++ b/psxavenc/mdec.c
@@ -22,131 +22,139 @@ freely, subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#include "common.h"
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <libavcodec/avdct.h>
+#include "mdec.h"
 
-#define MAKE_HUFFMAN_PAIR(zeroes, value) (((zeroes)<<10)|((+(value))&0x3FF)),(((zeroes)<<10)|((-(value))&0x3FF))
-const struct {
+#define AC_PAIR(zeroes, value) \
+	(((zeroes) << 10) | ((+(value)) & 0x3FF)), \
+	(((zeroes) << 10) | ((-(value)) & 0x3FF))
+
+static const struct {
 	int c_bits;
 	uint32_t c_value;
 	uint16_t u_hword_pos;
 	uint16_t u_hword_neg;
-} huffman_lookup[] = {
+} ac_huffman_tree[] = {
 	// Fuck this Huffman tree in particular --GM
-	{2,0x3,MAKE_HUFFMAN_PAIR(0,1)},
-	{3,0x3,MAKE_HUFFMAN_PAIR(1,1)},
-	{4,0x4,MAKE_HUFFMAN_PAIR(0,2)},
-	{4,0x5,MAKE_HUFFMAN_PAIR(2,1)},
-	{5,0x05,MAKE_HUFFMAN_PAIR(0,3)},
-	{5,0x06,MAKE_HUFFMAN_PAIR(4,1)},
-	{5,0x07,MAKE_HUFFMAN_PAIR(3,1)},
-	{6,0x04,MAKE_HUFFMAN_PAIR(7,1)},
-	{6,0x05,MAKE_HUFFMAN_PAIR(6,1)},
-	{6,0x06,MAKE_HUFFMAN_PAIR(1,2)},
-	{6,0x07,MAKE_HUFFMAN_PAIR(5,1)},
-	{7,0x04,MAKE_HUFFMAN_PAIR(2,2)},
-	{7,0x05,MAKE_HUFFMAN_PAIR(9,1)},
-	{7,0x06,MAKE_HUFFMAN_PAIR(0,4)},
-	{7,0x07,MAKE_HUFFMAN_PAIR(8,1)},
-	{8,0x20,MAKE_HUFFMAN_PAIR(13,1)},
-	{8,0x21,MAKE_HUFFMAN_PAIR(0,6)},
-	{8,0x22,MAKE_HUFFMAN_PAIR(12,1)},
-	{8,0x23,MAKE_HUFFMAN_PAIR(11,1)},
-	{8,0x24,MAKE_HUFFMAN_PAIR(3,2)},
-	{8,0x25,MAKE_HUFFMAN_PAIR(1,3)},
-	{8,0x26,MAKE_HUFFMAN_PAIR(0,5)},
-	{8,0x27,MAKE_HUFFMAN_PAIR(10,1)},
-	{10,0x008,MAKE_HUFFMAN_PAIR(16,1)},
-	{10,0x009,MAKE_HUFFMAN_PAIR(5,2)},
-	{10,0x00A,MAKE_HUFFMAN_PAIR(0,7)},
-	{10,0x00B,MAKE_HUFFMAN_PAIR(2,3)},
-	{10,0x00C,MAKE_HUFFMAN_PAIR(1,4)},
-	{10,0x00D,MAKE_HUFFMAN_PAIR(15,1)},
-	{10,0x00E,MAKE_HUFFMAN_PAIR(14,1)},
-	{10,0x00F,MAKE_HUFFMAN_PAIR(4,2)},
-	{12,0x010,MAKE_HUFFMAN_PAIR(0,11)},
-	{12,0x011,MAKE_HUFFMAN_PAIR(8,2)},
-	{12,0x012,MAKE_HUFFMAN_PAIR(4,3)},
-	{12,0x013,MAKE_HUFFMAN_PAIR(0,10)},
-	{12,0x014,MAKE_HUFFMAN_PAIR(2,4)},
-	{12,0x015,MAKE_HUFFMAN_PAIR(7,2)},
-	{12,0x016,MAKE_HUFFMAN_PAIR(21,1)},
-	{12,0x017,MAKE_HUFFMAN_PAIR(20,1)},
-	{12,0x018,MAKE_HUFFMAN_PAIR(0,9)},
-	{12,0x019,MAKE_HUFFMAN_PAIR(19,1)},
-	{12,0x01A,MAKE_HUFFMAN_PAIR(18,1)},
-	{12,0x01B,MAKE_HUFFMAN_PAIR(1,5)},
-	{12,0x01C,MAKE_HUFFMAN_PAIR(3,3)},
-	{12,0x01D,MAKE_HUFFMAN_PAIR(0,8)},
-	{12,0x01E,MAKE_HUFFMAN_PAIR(6,2)},
-	{12,0x01F,MAKE_HUFFMAN_PAIR(17,1)},
-	{13,0x0010,MAKE_HUFFMAN_PAIR(10,2)},
-	{13,0x0011,MAKE_HUFFMAN_PAIR(9,2)},
-	{13,0x0012,MAKE_HUFFMAN_PAIR(5,3)},
-	{13,0x0013,MAKE_HUFFMAN_PAIR(3,4)},
-	{13,0x0014,MAKE_HUFFMAN_PAIR(2,5)},
-	{13,0x0015,MAKE_HUFFMAN_PAIR(1,7)},
-	{13,0x0016,MAKE_HUFFMAN_PAIR(1,6)},
-	{13,0x0017,MAKE_HUFFMAN_PAIR(0,15)},
-	{13,0x0018,MAKE_HUFFMAN_PAIR(0,14)},
-	{13,0x0019,MAKE_HUFFMAN_PAIR(0,13)},
-	{13,0x001A,MAKE_HUFFMAN_PAIR(0,12)},
-	{13,0x001B,MAKE_HUFFMAN_PAIR(26,1)},
-	{13,0x001C,MAKE_HUFFMAN_PAIR(25,1)},
-	{13,0x001D,MAKE_HUFFMAN_PAIR(24,1)},
-	{13,0x001E,MAKE_HUFFMAN_PAIR(23,1)},
-	{13,0x001F,MAKE_HUFFMAN_PAIR(22,1)},
-	{14,0x0010,MAKE_HUFFMAN_PAIR(0,31)},
-	{14,0x0011,MAKE_HUFFMAN_PAIR(0,30)},
-	{14,0x0012,MAKE_HUFFMAN_PAIR(0,29)},
-	{14,0x0013,MAKE_HUFFMAN_PAIR(0,28)},
-	{14,0x0014,MAKE_HUFFMAN_PAIR(0,27)},
-	{14,0x0015,MAKE_HUFFMAN_PAIR(0,26)},
-	{14,0x0016,MAKE_HUFFMAN_PAIR(0,25)},
-	{14,0x0017,MAKE_HUFFMAN_PAIR(0,24)},
-	{14,0x0018,MAKE_HUFFMAN_PAIR(0,23)},
-	{14,0x0019,MAKE_HUFFMAN_PAIR(0,22)},
-	{14,0x001A,MAKE_HUFFMAN_PAIR(0,21)},
-	{14,0x001B,MAKE_HUFFMAN_PAIR(0,20)},
-	{14,0x001C,MAKE_HUFFMAN_PAIR(0,19)},
-	{14,0x001D,MAKE_HUFFMAN_PAIR(0,18)},
-	{14,0x001E,MAKE_HUFFMAN_PAIR(0,17)},
-	{14,0x001F,MAKE_HUFFMAN_PAIR(0,16)},
-	{15,0x0010,MAKE_HUFFMAN_PAIR(0,40)},
-	{15,0x0011,MAKE_HUFFMAN_PAIR(0,39)},
-	{15,0x0012,MAKE_HUFFMAN_PAIR(0,38)},
-	{15,0x0013,MAKE_HUFFMAN_PAIR(0,37)},
-	{15,0x0014,MAKE_HUFFMAN_PAIR(0,36)},
-	{15,0x0015,MAKE_HUFFMAN_PAIR(0,35)},
-	{15,0x0016,MAKE_HUFFMAN_PAIR(0,34)},
-	{15,0x0017,MAKE_HUFFMAN_PAIR(0,33)},
-	{15,0x0018,MAKE_HUFFMAN_PAIR(0,32)},
-	{15,0x0019,MAKE_HUFFMAN_PAIR(1,14)},
-	{15,0x001A,MAKE_HUFFMAN_PAIR(1,13)},
-	{15,0x001B,MAKE_HUFFMAN_PAIR(1,12)},
-	{15,0x001C,MAKE_HUFFMAN_PAIR(1,11)},
-	{15,0x001D,MAKE_HUFFMAN_PAIR(1,10)},
-	{15,0x001E,MAKE_HUFFMAN_PAIR(1,9)},
-	{15,0x001F,MAKE_HUFFMAN_PAIR(1,8)},
-	{16,0x0010,MAKE_HUFFMAN_PAIR(1,18)},
-	{16,0x0011,MAKE_HUFFMAN_PAIR(1,17)},
-	{16,0x0012,MAKE_HUFFMAN_PAIR(1,16)},
-	{16,0x0013,MAKE_HUFFMAN_PAIR(1,15)},
-	{16,0x0014,MAKE_HUFFMAN_PAIR(6,3)},
-	{16,0x0015,MAKE_HUFFMAN_PAIR(16,2)},
-	{16,0x0016,MAKE_HUFFMAN_PAIR(15,2)},
-	{16,0x0017,MAKE_HUFFMAN_PAIR(14,2)},
-	{16,0x0018,MAKE_HUFFMAN_PAIR(13,2)},
-	{16,0x0019,MAKE_HUFFMAN_PAIR(12,2)},
-	{16,0x001A,MAKE_HUFFMAN_PAIR(11,2)},
-	{16,0x001B,MAKE_HUFFMAN_PAIR(31,1)},
-	{16,0x001C,MAKE_HUFFMAN_PAIR(30,1)},
-	{16,0x001D,MAKE_HUFFMAN_PAIR(29,1)},
-	{16,0x001E,MAKE_HUFFMAN_PAIR(28,1)},
-	{16,0x001F,MAKE_HUFFMAN_PAIR(27,1)},
+	{ 2, 0x3,    AC_PAIR( 0,  1)},
+	{ 3, 0x3,    AC_PAIR( 1,  1)},
+	{ 4, 0x4,    AC_PAIR( 0,  2)},
+	{ 4, 0x5,    AC_PAIR( 2,  1)},
+	{ 5, 0x05,   AC_PAIR( 0,  3)},
+	{ 5, 0x06,   AC_PAIR( 4,  1)},
+	{ 5, 0x07,   AC_PAIR( 3,  1)},
+	{ 6, 0x04,   AC_PAIR( 7,  1)},
+	{ 6, 0x05,   AC_PAIR( 6,  1)},
+	{ 6, 0x06,   AC_PAIR( 1,  2)},
+	{ 6, 0x07,   AC_PAIR( 5,  1)},
+	{ 7, 0x04,   AC_PAIR( 2,  2)},
+	{ 7, 0x05,   AC_PAIR( 9,  1)},
+	{ 7, 0x06,   AC_PAIR( 0,  4)},
+	{ 7, 0x07,   AC_PAIR( 8,  1)},
+	{ 8, 0x20,   AC_PAIR(13,  1)},
+	{ 8, 0x21,   AC_PAIR( 0,  6)},
+	{ 8, 0x22,   AC_PAIR(12,  1)},
+	{ 8, 0x23,   AC_PAIR(11,  1)},
+	{ 8, 0x24,   AC_PAIR( 3,  2)},
+	{ 8, 0x25,   AC_PAIR( 1,  3)},
+	{ 8, 0x26,   AC_PAIR( 0,  5)},
+	{ 8, 0x27,   AC_PAIR(10,  1)},
+	{10, 0x008,  AC_PAIR(16,  1)},
+	{10, 0x009,  AC_PAIR( 5,  2)},
+	{10, 0x00A,  AC_PAIR( 0,  7)},
+	{10, 0x00B,  AC_PAIR( 2,  3)},
+	{10, 0x00C,  AC_PAIR( 1,  4)},
+	{10, 0x00D,  AC_PAIR(15,  1)},
+	{10, 0x00E,  AC_PAIR(14,  1)},
+	{10, 0x00F,  AC_PAIR( 4,  2)},
+	{12, 0x010,  AC_PAIR( 0, 11)},
+	{12, 0x011,  AC_PAIR( 8,  2)},
+	{12, 0x012,  AC_PAIR( 4,  3)},
+	{12, 0x013,  AC_PAIR( 0, 10)},
+	{12, 0x014,  AC_PAIR( 2,  4)},
+	{12, 0x015,  AC_PAIR( 7,  2)},
+	{12, 0x016,  AC_PAIR(21,  1)},
+	{12, 0x017,  AC_PAIR(20,  1)},
+	{12, 0x018,  AC_PAIR( 0,  9)},
+	{12, 0x019,  AC_PAIR(19,  1)},
+	{12, 0x01A,  AC_PAIR(18,  1)},
+	{12, 0x01B,  AC_PAIR( 1,  5)},
+	{12, 0x01C,  AC_PAIR( 3,  3)},
+	{12, 0x01D,  AC_PAIR( 0,  8)},
+	{12, 0x01E,  AC_PAIR( 6,  2)},
+	{12, 0x01F,  AC_PAIR(17,  1)},
+	{13, 0x0010, AC_PAIR(10,  2)},
+	{13, 0x0011, AC_PAIR( 9,  2)},
+	{13, 0x0012, AC_PAIR( 5,  3)},
+	{13, 0x0013, AC_PAIR( 3,  4)},
+	{13, 0x0014, AC_PAIR( 2,  5)},
+	{13, 0x0015, AC_PAIR( 1,  7)},
+	{13, 0x0016, AC_PAIR( 1,  6)},
+	{13, 0x0017, AC_PAIR( 0, 15)},
+	{13, 0x0018, AC_PAIR( 0, 14)},
+	{13, 0x0019, AC_PAIR( 0, 13)},
+	{13, 0x001A, AC_PAIR( 0, 12)},
+	{13, 0x001B, AC_PAIR(26,  1)},
+	{13, 0x001C, AC_PAIR(25,  1)},
+	{13, 0x001D, AC_PAIR(24,  1)},
+	{13, 0x001E, AC_PAIR(23,  1)},
+	{13, 0x001F, AC_PAIR(22,  1)},
+	{14, 0x0010, AC_PAIR( 0, 31)},
+	{14, 0x0011, AC_PAIR( 0, 30)},
+	{14, 0x0012, AC_PAIR( 0, 29)},
+	{14, 0x0013, AC_PAIR( 0, 28)},
+	{14, 0x0014, AC_PAIR( 0, 27)},
+	{14, 0x0015, AC_PAIR( 0, 26)},
+	{14, 0x0016, AC_PAIR( 0, 25)},
+	{14, 0x0017, AC_PAIR( 0, 24)},
+	{14, 0x0018, AC_PAIR( 0, 23)},
+	{14, 0x0019, AC_PAIR( 0, 22)},
+	{14, 0x001A, AC_PAIR( 0, 21)},
+	{14, 0x001B, AC_PAIR( 0, 20)},
+	{14, 0x001C, AC_PAIR( 0, 19)},
+	{14, 0x001D, AC_PAIR( 0, 18)},
+	{14, 0x001E, AC_PAIR( 0, 17)},
+	{14, 0x001F, AC_PAIR( 0, 16)},
+	{15, 0x0010, AC_PAIR( 0, 40)},
+	{15, 0x0011, AC_PAIR( 0, 39)},
+	{15, 0x0012, AC_PAIR( 0, 38)},
+	{15, 0x0013, AC_PAIR( 0, 37)},
+	{15, 0x0014, AC_PAIR( 0, 36)},
+	{15, 0x0015, AC_PAIR( 0, 35)},
+	{15, 0x0016, AC_PAIR( 0, 34)},
+	{15, 0x0017, AC_PAIR( 0, 33)},
+	{15, 0x0018, AC_PAIR( 0, 32)},
+	{15, 0x0019, AC_PAIR( 1, 14)},
+	{15, 0x001A, AC_PAIR( 1, 13)},
+	{15, 0x001B, AC_PAIR( 1, 12)},
+	{15, 0x001C, AC_PAIR( 1, 11)},
+	{15, 0x001D, AC_PAIR( 1, 10)},
+	{15, 0x001E, AC_PAIR( 1,  9)},
+	{15, 0x001F, AC_PAIR( 1,  8)},
+	{16, 0x0010, AC_PAIR( 1, 18)},
+	{16, 0x0011, AC_PAIR( 1, 17)},
+	{16, 0x0012, AC_PAIR( 1, 16)},
+	{16, 0x0013, AC_PAIR( 1, 15)},
+	{16, 0x0014, AC_PAIR( 6,  3)},
+	{16, 0x0015, AC_PAIR(16,  2)},
+	{16, 0x0016, AC_PAIR(15,  2)},
+	{16, 0x0017, AC_PAIR(14,  2)},
+	{16, 0x0018, AC_PAIR(13,  2)},
+	{16, 0x0019, AC_PAIR(12,  2)},
+	{16, 0x001A, AC_PAIR(11,  2)},
+	{16, 0x001B, AC_PAIR(31,  1)},
+	{16, 0x001C, AC_PAIR(30,  1)},
+	{16, 0x001D, AC_PAIR(29,  1)},
+	{16, 0x001E, AC_PAIR(28,  1)},
+	{16, 0x001F, AC_PAIR(27,  1)}
 };
-#undef MAKE_HUFFMAN_PAIR
 
-const uint8_t quant_dec[8*8] = {
+static const uint8_t quant_dec[8*8] = {
 	 2, 16, 19, 22, 26, 27, 29, 34,
 	16, 16, 22, 24, 27, 29, 34, 37,
 	19, 22, 26, 27, 29, 34, 34, 38,
@@ -154,96 +162,110 @@ const uint8_t quant_dec[8*8] = {
 	22, 26, 27, 29, 32, 35, 40, 48,
 	26, 27, 29, 32, 35, 40, 48, 58,
 	26, 27, 29, 34, 38, 46, 56, 69,
-	27, 29, 35, 38, 46, 56, 69, 83,
+	27, 29, 35, 38, 46, 56, 69, 83
 };
 
-const uint8_t dct_zigzag_table[8*8] = {
-	0x00,0x01,0x05,0x06,0x0E,0x0F,0x1B,0x1C,
-	0x02,0x04,0x07,0x0D,0x10,0x1A,0x1D,0x2A,
-	0x03,0x08,0x0C,0x11,0x19,0x1E,0x29,0x2B,
-	0x09,0x0B,0x12,0x18,0x1F,0x28,0x2C,0x35,
-	0x0A,0x13,0x17,0x20,0x27,0x2D,0x34,0x36,
-	0x14,0x16,0x21,0x26,0x2E,0x33,0x37,0x3C,
-	0x15,0x22,0x25,0x2F,0x32,0x38,0x3B,0x3D,
-	0x23,0x24,0x30,0x31,0x39,0x3A,0x3E,0x3F,
+static const uint8_t dct_zigzag_table[8*8] = {
+	 0,  1,  5,  6, 14, 15, 27, 28,
+	 2,  4,  7, 13, 16, 26, 29, 42,
+	 3,  8, 12, 17, 25, 30, 41, 43,
+	 9, 11, 18, 24, 31, 40, 44, 53,
+	10, 19, 23, 32, 39, 45, 52, 54,
+	20, 22, 33, 38, 46, 51, 55, 60,
+	21, 34, 37, 47, 50, 56, 59, 61,
+	35, 36, 48, 49, 57, 58, 62, 63
 };
 
-const uint8_t dct_zagzig_table[8*8] = {
-	0x00,0x01,0x08,0x10,0x09,0x02,0x03,0x0A,
-	0x11,0x18,0x20,0x19,0x12,0x0B,0x04,0x05,
-	0x0C,0x13,0x1A,0x21,0x28,0x30,0x29,0x22,
-	0x1B,0x14,0x0D,0x06,0x07,0x0E,0x15,0x1C,
-	0x23,0x2A,0x31,0x38,0x39,0x32,0x2B,0x24,
-	0x1D,0x16,0x0F,0x17,0x1E,0x25,0x2C,0x33,
-	0x3A,0x3B,0x34,0x2D,0x26,0x1F,0x27,0x2E,
-	0x35,0x3C,0x3D,0x36,0x2F,0x37,0x3E,0x3F,
+static const uint8_t dct_zagzig_table[8*8] = {
+	 0,  1,  8, 16,  9,  2,  3, 10,
+	17, 24, 32, 25, 18, 11,  4,  5,
+	12, 19, 26, 33, 40, 48, 41, 34,
+	27, 20, 13,  6,  7, 14, 21, 28,
+	35, 42, 49, 56, 57, 50, 43, 36,
+	29, 22, 15, 23, 30, 37, 44, 51,
+	58, 59, 52, 45, 38, 31, 39, 46,
+	53, 60, 61, 54, 47, 55, 62, 63
 };
 
-const int16_t dct_scale_table[8*8] = {
-	+0x5A82, +0x5A82, +0x5A82, +0x5A82, +0x5A82, +0x5A82, +0x5A82, +0x5A82,
-	+0x7D8A, +0x6A6D, +0x471C, +0x18F8, -0x18F9, -0x471D, -0x6A6E, -0x7D8B,
-	+0x7641, +0x30FB, -0x30FC, -0x7642, -0x7642, -0x30FC, +0x30FB, +0x7641,
-	+0x6A6D, -0x18F9, -0x7D8B, -0x471D, +0x471C, +0x7D8A, +0x18F8, -0x6A6E,
-	+0x5A82, -0x5A83, -0x5A83, +0x5A82, +0x5A82, -0x5A83, -0x5A83, +0x5A82,
-	+0x471C, -0x7D8B, +0x18F8, +0x6A6D, -0x6A6E, -0x18F9, +0x7D8A, -0x471D,
-	+0x30FB, -0x7642, +0x7641, -0x30FC, -0x30FC, +0x7641, -0x7642, +0x30FB,
-	+0x18F8, -0x471D, +0x6A6D, -0x7D8B, +0x7D8A, -0x6A6E, +0x471C, -0x18F9,
-};
+#if 0
+#define SF0 0x5a82 // cos(0/16 * pi) * sqrt(2)
+#define SF1 0x7d8a // cos(1/16 * pi) * 2
+#define SF2 0x7641 // cos(2/16 * pi) * 2
+#define SF3 0x6a6d // cos(3/16 * pi) * 2
+#define SF4 0x5a82 // cos(4/16 * pi) * 2
+#define SF5 0x471c // cos(5/16 * pi) * 2
+#define SF6 0x30fb // cos(6/16 * pi) * 2
+#define SF7 0x18f8 // cos(7/16 * pi) * 2
 
-static void init_dct_data(vid_encoder_state_t *state)
-{
+static const int16_t dct_scale_table[8*8] = {
+	SF0,  SF0,  SF0,  SF0,  SF0,  SF0,  SF0,  SF0,
+	SF1,  SF3,  SF5,  SF7, -SF7, -SF5, -SF3, -SF1,
+	SF2,  SF6, -SF6, -SF2, -SF2, -SF6,  SF6,  SF2,
+	SF3, -SF7, -SF1, -SF5,  SF5,  SF1,  SF7, -SF3,
+	SF4, -SF4, -SF4,  SF4,  SF4, -SF4, -SF4,  SF4,
+	SF5, -SF1,  SF7,  SF3, -SF3, -SF7,  SF1, -SF5,
+	SF6, -SF2,  SF2, -SF6, -SF6,  SF2, -SF2,  SF6,
+	SF7, -SF5,  SF3, -SF1,  SF1, -SF3,  SF5, -SF7
+};
+#endif
+
+static void init_dct_data(mdec_encoder_state_t *state) {
 	for(int i = 0; i <= 0xFFFF; i++) {
 		// high 8 bits = bit count
 		// low 24 bits = value
-		state->huffman_encoding_map[i] = ((6+16)<<24)|((0x01<<16)|(i));
+		state->ac_huffman_map[i] = ((6+16) << 24) | (0x01 << 16) | i;
 
 		int16_t coeff = (int16_t)i;
-		if (coeff < -0x200) { coeff = -0x200; }
-		if (coeff > +0x1FF) { coeff = +0x1FF; }
-		state->coeff_clamp_map[i] = coeff&0x3FF;
+
+		if (coeff < -0x200)
+			coeff = -0x200;
+		else if (coeff > +0x1FF)
+			coeff = +0x1FF;
+
+		state->coeff_clamp_map[i] = coeff & 0x3FF;
 	}
 
-	for(int i = 0; i < sizeof(huffman_lookup)/sizeof(huffman_lookup[0]); i++) {
-		int bits = huffman_lookup[i].c_bits+1;
-		uint32_t base_value = huffman_lookup[i].c_value;
-		state->huffman_encoding_map[huffman_lookup[i].u_hword_pos] = (bits<<24)|(base_value<<1)|0;
-		state->huffman_encoding_map[huffman_lookup[i].u_hword_neg] = (bits<<24)|(base_value<<1)|1;
-	}
+	int tree_item_count = sizeof(ac_huffman_tree) / sizeof(ac_huffman_tree[0]);
 
+	for(int i = 0; i < tree_item_count; i++) {
+		int bits = ac_huffman_tree[i].c_bits+1;
+		uint32_t base_value = ac_huffman_tree[i].c_value;
+
+		state->ac_huffman_map[ac_huffman_tree[i].u_hword_pos] = (bits << 24) | (base_value << 1) | 0;
+		state->ac_huffman_map[ac_huffman_tree[i].u_hword_neg] = (bits << 24) | (base_value << 1) | 1;
+	}
 }
 
-static bool flush_bits(vid_encoder_state_t *state)
-{
+static bool flush_bits(mdec_encoder_state_t *state) {
 	if(state->bits_left < 16) {
 		state->frame_output[state->bytes_used++] = (uint8_t)state->bits_value;
-		if (state->bytes_used >= state->frame_max_size) {
+		if (state->bytes_used >= state->frame_max_size)
 			return false;
-		}
+
 		state->frame_output[state->bytes_used++] = (uint8_t)(state->bits_value>>8);
 	}
+
 	state->bits_left = 16;
 	state->bits_value = 0;
 	return true;
 }
 
-static bool encode_bits(vid_encoder_state_t *state, int bits, uint32_t val)
-{
+static bool encode_bits(mdec_encoder_state_t *state, int bits, uint32_t val) {
 	assert(val < (1<<bits));
 
 	// FIXME: for some reason the main logic breaks when bits > 16
 	// and I have no idea why, so I have to split this up --GM
 	if (bits > 16) {
-		if (!encode_bits(state, bits-16, val>>16)) {
+		if (!encode_bits(state, bits-16, val>>16))
 			return false;
-		}
+
 		bits = 16;
 		val &= 0xFFFF;
 	}
 
 	if (state->bits_left == 0) {
-		if (!flush_bits(state)) {
+		if (!flush_bits(state))
 			return false;
-		}
 	}
 
 	while (bits > state->bits_left) {
@@ -260,9 +282,8 @@ static bool encode_bits(vid_encoder_state_t *state, int bits, uint32_t val)
 		val &= mask;
 		assert(mask >= 1);
 		assert(val < (1<<bits));
-		if (!flush_bits(state)) {
+		if (!flush_bits(state))
 			return false;
-		}
 	}
 
 	if (bits >= 1) {
@@ -281,31 +302,31 @@ static bool encode_bits(vid_encoder_state_t *state, int bits, uint32_t val)
 	return true;
 }
 
-static bool encode_ac_value(vid_encoder_state_t *state, uint16_t value)
-{
+static bool encode_ac_value(mdec_encoder_state_t *state, uint16_t value) {
 	assert(0 <= value && value <= 0xFFFF);
 
 #if 0
-	for(int i = 0; i < sizeof(huffman_lookup)/sizeof(huffman_lookup[0]); i++) {
-		if(value == huffman_lookup[i].u_hword_pos) {
-			return encode_bits(state, huffman_lookup[i].c_bits+1, (((uint32_t)huffman_lookup[i].c_value)<<1)|0);
-		}
-		else if(value == huffman_lookup[i].u_hword_neg) {
-			return encode_bits(state, huffman_lookup[i].c_bits+1, (((uint32_t)huffman_lookup[i].c_value)<<1)|1);
+	int tree_item_count = sizeof(ac_huffman_tree) / sizeof(ac_huffman_tree[0]);
+
+	for (int i = 0; i < tree_item_count; i++) {
+		if (value == ac_huffman_tree[i].u_hword_pos) {
+			return encode_bits(state, ac_huffman_tree[i].c_bits+1, ((uint32_t)ac_huffman_tree[i].c_value << 1) | 0);
+		} else if (value == ac_huffman_tree[i].u_hword_neg) {
+			return encode_bits(state, ac_huffman_tree[i].c_bits+1, ((uint32_t)ac_huffman_tree[i].c_value << 1) | 1);
 		}
 	}
 
 	// Use an escape
-	return encode_bits(state, 6+16, (0x01<<16)|(0xFFFF&(uint32_t)value));
+	return encode_bits(state, 6+16, (0x01 << 16) | (0xFFFF & (uint32_t)value));
 #else
-	uint32_t outword = state->huffman_encoding_map[value];
-	return encode_bits(state, outword>>24, outword&0xFFFFFF);
+	uint32_t outword = state->ac_huffman_map[value];
+
+	return encode_bits(state, outword >> 24, outword & 0xFFFFFF);
 #endif
 }
 
-static void transform_dct_block(vid_encoder_state_t *state, int16_t *block)
-{
 #if 0
+static void transform_dct_block(int16_t *block) {
 	// Apply DCT to block
 	int midblock[8*8];
 
@@ -327,55 +348,9 @@ static void transform_dct_block(vid_encoder_state_t *state, int16_t *block)
 		block[8*i+j] = (int16_t)((v + 0xFFF) >> 13);
 	}
 	}
-#else
-	state->dct_context->fdct(block);
-#endif
 }
 
-// https://stackoverflow.com/a/60011209
-//#define DIVIDE_ROUNDED(n, d) (((n) >= 0) ? (((n) + (d)/2) / (d)) : (((n) - (d)/2) / (d)))
-#define DIVIDE_ROUNDED(n, d) ((int)round((double)(n) / (double)(d)))
-
-static bool encode_dct_block(vid_encoder_state_t *state, const int16_t *block, const int16_t *quant_table)
-{
-	int dc = DIVIDE_ROUNDED(block[0], quant_table[0]);
-	dc = state->coeff_clamp_map[dc&0xFFFF];
-
-	if (!encode_bits(state, 10, dc)) {
-		return false;
-	}
-
-	for (int i = 1, zeroes = 0; i < 64; i++) {
-		int ri = dct_zagzig_table[i];
-		int ac = DIVIDE_ROUNDED(block[ri], quant_table[ri]);
-		ac = state->coeff_clamp_map[ac&0xFFFF];
-
-		if (ac == 0) {
-			zeroes++;
-		} else {
-			if (!encode_ac_value(state, (zeroes<<10)|ac)) {
-				return false;
-			}
-			zeroes = 0;
-			state->uncomp_hwords_used += 1;
-		}
-	}
-
-	//fprintf(stderr, "dc %08X rles %2d\n", dc, zero_rle_words);
-	//assert(dc >= -0x200); assert(dc <  +0x200);
-
-	// Store end of block
-	if (!encode_bits(state, 2, 0x2)) {
-		return false;
-	}
-	state->uncomp_hwords_used += 2;
-	//state->uncomp_hwords_used = (state->uncomp_hwords_used+0xF)&~0xF;
-	return true;
-}
-
-#if 0
-static int reduce_dct_block(vid_encoder_state_t *state, int32_t *block, int32_t min_val, int *values_to_shed)
-{
+static int reduce_dct_block(mdec_encoder_state_t *state, int32_t *block, int32_t min_val, int *values_to_shed) {
 	// Reduce so it can all fit
 	int nonzeroes = 0;
 
@@ -397,117 +372,178 @@ static int reduce_dct_block(vid_encoder_state_t *state, int32_t *block, int32_t
 }
 #endif
 
-bool init_encoder_state(settings_t *settings)
-{
-	if (settings->state_vid.huffman_encoding_map) {
-		return true;
-	}
+// https://stackoverflow.com/a/60011209
+#if 0
+#define DIVIDE_ROUNDED(n, d) (((n) >= 0) ? (((n) + (d)/2) / (d)) : (((n) - (d)/2) / (d)))
+#else
+#define DIVIDE_ROUNDED(n, d) ((int)round((double)(n) / (double)(d)))
+#endif
 
-	settings->state_vid.huffman_encoding_map = malloc(0x10000*sizeof(uint32_t));
-	settings->state_vid.coeff_clamp_map = malloc(0x10000*sizeof(int16_t));
-	if (!settings->state_vid.huffman_encoding_map || !settings->state_vid.coeff_clamp_map) {
+static bool encode_dct_block(mdec_encoder_state_t *state, const int16_t *block, const int16_t *quant_table) {
+	int dc = DIVIDE_ROUNDED(block[0], quant_table[0]);
+	dc = state->coeff_clamp_map[dc&0xFFFF];
+
+	if (!encode_bits(state, 10, dc))
 		return false;
-	}
-	init_dct_data(&(settings->state_vid));
 
-	settings->state_vid.dct_context = avcodec_dct_alloc();
-	if (!settings->state_vid.dct_context) {
-		return false;
-	}
-	avcodec_dct_init(settings->state_vid.dct_context);
+	for (int i = 1, zeroes = 0; i < 64; i++) {
+		int ri = dct_zagzig_table[i];
+		int ac = DIVIDE_ROUNDED(block[ri], quant_table[ri]);
+		ac = state->coeff_clamp_map[ac&0xFFFF];
 
-	int dct_block_count_x = (settings->video_width+15)/16;
-	int dct_block_count_y = (settings->video_height+15)/16;
+		if (ac == 0) {
+			zeroes++;
+		} else {
+			if (!encode_ac_value(state, (zeroes<<10)|ac))
+				return false;
 
-	int dct_block_size = dct_block_count_x*dct_block_count_y*sizeof(int16_t)*8*8;
-	for (int i = 0; i < 6; i++) {
-		settings->state_vid.dct_block_lists[i] = malloc(dct_block_size);
-		if (!settings->state_vid.dct_block_lists[i]) {
-			return false;
+			zeroes = 0;
+			state->uncomp_hwords_used += 1;
 		}
 	}
 
+	//fprintf(stderr, "dc %08X rles %2d\n", dc, zero_rle_words);
+	//assert(dc >= -0x200); assert(dc <  +0x200);
+
+	// Store end of block
+	if (!encode_bits(state, 2, 0x2))
+		return false;
+
+	state->uncomp_hwords_used += 2;
+	//state->uncomp_hwords_used = (state->uncomp_hwords_used+0xF)&~0xF;
 	return true;
 }
 
-void destroy_encoder_state(settings_t *settings)
-{
-	if (settings->state_vid.huffman_encoding_map) {
-		free(settings->state_vid.huffman_encoding_map);
-		settings->state_vid.huffman_encoding_map = NULL;
+bool init_mdec_encoder(mdec_encoder_t *encoder, int video_width, int video_height) {
+	mdec_encoder_state_t *state = &(encoder->state);
+
+	if (state->dct_context != NULL)
+		return true;
+
+	state->dct_context = avcodec_dct_alloc();
+	state->ac_huffman_map = malloc(0x10000 * sizeof(uint32_t));
+	state->dc_huffman_map = NULL;
+	state->coeff_clamp_map = malloc(0x10000 * sizeof(int16_t));
+	state->delta_clamp_map = NULL;
+
+	if (
+		state->dct_context == NULL ||
+		state->ac_huffman_map == NULL ||
+		state->coeff_clamp_map == NULL
+	)
+		return false;
+
+	int dct_block_count_x = (video_width + 15) / 16;
+	int dct_block_count_y = (video_height + 15) / 16;
+	int dct_block_size = dct_block_count_x * dct_block_count_y * sizeof(int16_t) * 8*8;
+
+	for (int i = 0; i < 6; i++) {
+		state->dct_block_lists[i] = malloc(dct_block_size);
+
+		if (!state->dct_block_lists[i])
+			return false;
 	}
-	if (settings->state_vid.coeff_clamp_map) {
-		free(settings->state_vid.coeff_clamp_map);
-		settings->state_vid.coeff_clamp_map = NULL;
+
+	avcodec_dct_init(state->dct_context);
+	init_dct_data(state);
+	return true;
+}
+
+void destroy_mdec_encoder(mdec_encoder_t *encoder) {
+	mdec_encoder_state_t *state = &(encoder->state);
+
+	if (state->dct_context) {
+		av_free(state->dct_context);
+		state->dct_context = NULL;
 	}
-	if (settings->state_vid.dct_context) {
-		av_free(settings->state_vid.dct_context);
-		settings->state_vid.dct_context = NULL;
+	if (state->ac_huffman_map) {
+		free(state->ac_huffman_map);
+		state->ac_huffman_map = NULL;
 	}
-	if (settings->state_vid.dct_block_lists[0]) {
-		for (int i = 0; i < 6; i++) {
-			free(settings->state_vid.dct_block_lists[i]);
-			settings->state_vid.dct_block_lists[i] = NULL;
+	if (state->dc_huffman_map) {
+		free(state->dc_huffman_map);
+		state->dc_huffman_map = NULL;
+	}
+	if (state->coeff_clamp_map) {
+		free(state->coeff_clamp_map);
+		state->coeff_clamp_map = NULL;
+	}
+	if (state->delta_clamp_map) {
+		free(state->delta_clamp_map);
+		state->delta_clamp_map = NULL;
+	}
+	for (int i = 0; i < 6; i++) {
+		if (state->dct_block_lists[i]) {
+			free(state->dct_block_lists[i]);
+			state->dct_block_lists[i] = NULL;
 		}
 	}
 }
 
-void encode_frame_bs(uint8_t *video_frame, settings_t *settings)
-{
-	int pitch = settings->video_width;
-	/*int real_index = (settings->state_vid.frame_index-1);
-	if (real_index > video_frame_count-1) {
-		real_index = video_frame_count-1;
-	}
-	uint8_t *y_plane = video_frames + settings->video_width*settings->video_height*3/2*real_index;*/
+void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame) {
+	mdec_encoder_state_t *state = &(encoder->state);
+
+	assert(state->dct_context);
+
+	int pitch = encoder->video_width;
+#if 0
+	int real_index = state->frame_index - 1;
+	if (real_index > (video_frame_count - 1))
+		real_index = video_frame_count - 1;
+
+	uint8_t *y_plane = video_frames + encoder->video_width * encoder->video_height * 3/2 * real_index;
+#else
 	uint8_t *y_plane = video_frame;
-	uint8_t *c_plane = y_plane + (settings->video_width*settings->video_height);
+	uint8_t *c_plane = y_plane + (encoder->video_width * encoder->video_height);
+#endif
 
-	assert(settings->state_vid.huffman_encoding_map);
-
-	int dct_block_count_x = (settings->video_width+15)/16;
-	int dct_block_count_y = (settings->video_height+15)/16;
+	int dct_block_count_x = (encoder->video_width + 15) / 16;
+	int dct_block_count_y = (encoder->video_height + 15) / 16;
 
 	// TODO: non-16x16-aligned videos
-	assert((settings->video_width % 16) == 0);
-	assert((settings->video_height % 16) == 0);
+	assert((encoder->video_width % 16) == 0);
+	assert((encoder->video_height % 16) == 0);
 
 	// Rearrange the Y/C planes returned by libswscale into macroblocks.
-	for(int fx = 0; fx < dct_block_count_x; fx++) {
-	for(int fy = 0; fy < dct_block_count_y; fy++) {
-		// Order: Cr Cb [Y1|Y2\nY3|Y4]
-		int block_offs = 64 * (fy*dct_block_count_x + fx);
-		int16_t *blocks[6] = {
-			settings->state_vid.dct_block_lists[0] + block_offs,
-			settings->state_vid.dct_block_lists[1] + block_offs,
-			settings->state_vid.dct_block_lists[2] + block_offs,
-			settings->state_vid.dct_block_lists[3] + block_offs,
-			settings->state_vid.dct_block_lists[4] + block_offs,
-			settings->state_vid.dct_block_lists[5] + block_offs,
-		};
+	for (int fx = 0; fx < dct_block_count_x; fx++) {
+		for (int fy = 0; fy < dct_block_count_y; fy++) {
+			// Order: Cr Cb [Y1|Y2]
+			//              [Y3|Y4]
+			int block_offs = 64 * (fy*dct_block_count_x + fx);
+			int16_t *blocks[6] = {
+				state->dct_block_lists[0] + block_offs,
+				state->dct_block_lists[1] + block_offs,
+				state->dct_block_lists[2] + block_offs,
+				state->dct_block_lists[3] + block_offs,
+				state->dct_block_lists[4] + block_offs,
+				state->dct_block_lists[5] + block_offs
+			};
 
-		for(int y = 0; y < 8; y++) {
-		for(int x = 0; x < 8; x++) {
-			int k = y*8 + x;
-			int cx = fx*8 + x;
-			int cy = fy*8 + y;
-			int lx = fx*16 + x;
-			int ly = fy*16 + y;
+			for (int y = 0; y < 8; y++) {
+				for (int x = 0; x < 8; x++) {
+					int k = y*8 + x;
+					int cx = fx*8 + x;
+					int cy = fy*8 + y;
+					int lx = fx*16 + x;
+					int ly = fy*16 + y;
 
-			blocks[0][k] = (int16_t)c_plane[pitch*cy + 2*cx + 0] - 128;
-			blocks[1][k] = (int16_t)c_plane[pitch*cy + 2*cx + 1] - 128;
-			blocks[2][k] = (int16_t)y_plane[pitch*(ly+0) + (lx+0)] - 128;
-			blocks[3][k] = (int16_t)y_plane[pitch*(ly+0) + (lx+8)] - 128;
-			blocks[4][k] = (int16_t)y_plane[pitch*(ly+8) + (lx+0)] - 128;
-			blocks[5][k] = (int16_t)y_plane[pitch*(ly+8) + (lx+8)] - 128;
-		}
-		}
+					blocks[0][k] = (int16_t)c_plane[pitch*cy + 2*cx + 0] - 128;
+					blocks[1][k] = (int16_t)c_plane[pitch*cy + 2*cx + 1] - 128;
+					blocks[2][k] = (int16_t)y_plane[pitch*(ly+0) + (lx+0)] - 128;
+					blocks[3][k] = (int16_t)y_plane[pitch*(ly+0) + (lx+8)] - 128;
+					blocks[4][k] = (int16_t)y_plane[pitch*(ly+8) + (lx+0)] - 128;
+					blocks[5][k] = (int16_t)y_plane[pitch*(ly+8) + (lx+8)] - 128;
+				}
+			}
 
-		for(int i = 0; i < 6; i++) {
-			transform_dct_block(&(settings->state_vid), blocks[i]);
+			for (int i = 0; i < 6; i++)
+#if 0
+				transform_dct_block(blocks[i]);
+#else
+				state->dct_context->fdct(blocks[i]);
+#endif
 		}
 	}
-	}
 
 	// Attempt encoding the frame at the maximum quality. If the result is too
 	// large, increase the quantization scale and try again.
@@ -516,100 +552,107 @@ void encode_frame_bs(uint8_t *video_frame, settings_t *settings)
 	// compressing at scale N but optimizing coefficients away until it fits
 	// (like the old algorithm did)
 	for (
-		settings->state_vid.quant_scale = 1;
-		settings->state_vid.quant_scale < 64;
-		settings->state_vid.quant_scale++
+		state->quant_scale = 1;
+		state->quant_scale < 64;
+		state->quant_scale++
 	) {
 		int16_t quant_table[8*8];
 
 		// The DC coefficient's quantization scale is always 8.
 		quant_table[0] = quant_dec[0] * 8;
-		for (int i = 1; i < 64; i++) {
-			quant_table[i] = quant_dec[i] * settings->state_vid.quant_scale;
-		}
 
-		memset(settings->state_vid.frame_output, 0, settings->state_vid.frame_max_size);
+		for (int i = 1; i < 64; i++)
+			quant_table[i] = quant_dec[i] * state->quant_scale;
 
-		settings->state_vid.bits_value = 0;
-		settings->state_vid.bits_left = 16;
-		settings->state_vid.uncomp_hwords_used = 0;
-		settings->state_vid.bytes_used = 8;
+		memset(state->frame_output, 0, state->frame_max_size);
+
+		state->bits_value = 0;
+		state->bits_left = 16;
+		state->uncomp_hwords_used = 0;
+		state->bytes_used = 8;
 
 		bool ok = true;
-		for(int fx = 0; ok && (fx < dct_block_count_x); fx++) {
-		for(int fy = 0; ok && (fy < dct_block_count_y); fy++) {
-			// Order: Cr Cb [Y1|Y2\nY3|Y4]
-			int block_offs = 64 * (fy*dct_block_count_x + fx);
-			int16_t *blocks[6] = {
-				settings->state_vid.dct_block_lists[0] + block_offs,
-				settings->state_vid.dct_block_lists[1] + block_offs,
-				settings->state_vid.dct_block_lists[2] + block_offs,
-				settings->state_vid.dct_block_lists[3] + block_offs,
-				settings->state_vid.dct_block_lists[4] + block_offs,
-				settings->state_vid.dct_block_lists[5] + block_offs,
-			};
+		for (int fx = 0; ok && (fx < dct_block_count_x); fx++) {
+			for (int fy = 0; ok && (fy < dct_block_count_y); fy++) {
+				// Order: Cr Cb [Y1|Y2]
+				//              [Y3|Y4]
+				int block_offs = 64 * (fy*dct_block_count_x + fx);
+				int16_t *blocks[6] = {
+					state->dct_block_lists[0] + block_offs,
+					state->dct_block_lists[1] + block_offs,
+					state->dct_block_lists[2] + block_offs,
+					state->dct_block_lists[3] + block_offs,
+					state->dct_block_lists[4] + block_offs,
+					state->dct_block_lists[5] + block_offs
+				};
 
-			for(int i = 0; ok && (i < 6); i++) {
-				ok = encode_dct_block(&(settings->state_vid), blocks[i], quant_table);
+				for(int i = 0; ok && (i < 6); i++)
+					ok = encode_dct_block(state, blocks[i], quant_table);
 			}
 		}
-		}
 
-		if (!ok) { continue; }
-		if (!encode_bits(&(settings->state_vid), 10, 0x1FF)) { continue; }
-		if (!encode_bits(&(settings->state_vid), 2, 0x2)) { continue; }
-		if (!flush_bits(&(settings->state_vid))) { continue; }
+		if (!ok)
+			continue;
+		if (!encode_bits(state, 10, 0x1FF))
+			continue;
+		if (!encode_bits(state, 2, 0x2))
+			continue;
+		if (!flush_bits(state))
+			continue;
 
-		settings->state_vid.uncomp_hwords_used += 2;
-		settings->state_vid.quant_scale_sum += settings->state_vid.quant_scale;
+		state->uncomp_hwords_used += 2;
+		state->quant_scale_sum += state->quant_scale;
 		break;
 	}
-	assert(settings->state_vid.quant_scale < 64);
+	assert(state->quant_scale < 64);
 
 	// MDEC DMA is usually configured to transfer data in 32-word chunks.
-	settings->state_vid.uncomp_hwords_used = (settings->state_vid.uncomp_hwords_used+0x3F)&~0x3F;
+	state->uncomp_hwords_used = (state->uncomp_hwords_used+0x3F)&~0x3F;
 
 	// This is not the number of 32-byte blocks required for uncompressed data
 	// as jPSXdec docs say, but rather the number of 32-*bit* words required.
 	// The first 4 bytes of the frame header are in fact the MDEC command to
 	// start decoding, which contains the data length in words in the lower 16
 	// bits.
-	settings->state_vid.blocks_used = (settings->state_vid.uncomp_hwords_used+1)>>1;
+	state->blocks_used = (state->uncomp_hwords_used+1)>>1;
 
 	// We need a multiple of 4
-	settings->state_vid.bytes_used = (settings->state_vid.bytes_used+0x3)&~0x3;
+	state->bytes_used = (state->bytes_used+0x3)&~0x3;
 
 	// MDEC command (size of decompressed MDEC data)
-	settings->state_vid.frame_output[0x000] = (uint8_t)settings->state_vid.blocks_used;
-	settings->state_vid.frame_output[0x001] = (uint8_t)(settings->state_vid.blocks_used>>8);
-	settings->state_vid.frame_output[0x002] = (uint8_t)0x00;
-	settings->state_vid.frame_output[0x003] = (uint8_t)0x38;
+	state->frame_output[0x000] = (uint8_t)state->blocks_used;
+	state->frame_output[0x001] = (uint8_t)(state->blocks_used>>8);
+	state->frame_output[0x002] = (uint8_t)0x00;
+	state->frame_output[0x003] = (uint8_t)0x38;
 
 	// Quantization scale
-	settings->state_vid.frame_output[0x004] = (uint8_t)settings->state_vid.quant_scale;
-	settings->state_vid.frame_output[0x005] = (uint8_t)(settings->state_vid.quant_scale>>8);
+	state->frame_output[0x004] = (uint8_t)state->quant_scale;
+	state->frame_output[0x005] = (uint8_t)(state->quant_scale>>8);
 
 	// BS version
-	settings->state_vid.frame_output[0x006] = 0x02;
-	settings->state_vid.frame_output[0x007] = 0x00;
-
-	retire_av_data(settings, 0, 1);
+	state->frame_output[0x006] = 0x02;
+	state->frame_output[0x007] = 0x00;
 }
 
-void encode_sector_str(uint8_t *video_frames, uint8_t *output, settings_t *settings)
-{
+int encode_sector_str(mdec_encoder_t *encoder, uint8_t *video_frames, uint8_t *output) {
+	mdec_encoder_state_t *state = &(encoder->state);
+	int last_frame_index = state->frame_index;
+	int frame_size = encoder->video_width * encoder->video_height * 2;
+
 	uint8_t header[32];
 	memset(header, 0, sizeof(header));
 
-	while(settings->state_vid.frame_data_offset >= settings->state_vid.frame_max_size) {
-		settings->state_vid.frame_index++;
+	while (state->frame_data_offset >= state->frame_max_size) {
+		state->frame_index++;
 		// TODO: work out an optimal block count for this
 		// TODO: calculate this all based on FPS
-		settings->state_vid.frame_block_overflow_num += settings->state_vid.frame_block_base_overflow;
-		settings->state_vid.frame_max_size = settings->state_vid.frame_block_overflow_num / settings->state_vid.frame_block_overflow_den * 2016;
-		settings->state_vid.frame_block_overflow_num %= settings->state_vid.frame_block_overflow_den;
-		settings->state_vid.frame_data_offset = 0;
-		encode_frame_bs(video_frames, settings);
+		state->frame_block_overflow_num += state->frame_block_base_overflow;
+		state->frame_max_size = state->frame_block_overflow_num / state->frame_block_overflow_den * 2016;
+		state->frame_block_overflow_num %= state->frame_block_overflow_den;
+		state->frame_data_offset = 0;
+
+		encode_frame_bs(encoder, video_frames);
+		video_frames += frame_size;
 	}
 
 	// STR version
@@ -621,47 +664,48 @@ void encode_sector_str(uint8_t *video_frames, uint8_t *output, settings_t *setti
 	header[0x003] = 0x80;
 
 	// Muxed chunk index/count
-	int chunk_index = settings->state_vid.frame_data_offset/2016;
-	int chunk_count = settings->state_vid.frame_max_size/2016;
+	int chunk_index = state->frame_data_offset / 2016;
+	int chunk_count = state->frame_max_size / 2016;
 	header[0x004] = (uint8_t)chunk_index;
-	header[0x005] = (uint8_t)(chunk_index>>8);
+	header[0x005] = (uint8_t)(chunk_index >> 8);
 	header[0x006] = (uint8_t)chunk_count;
-	header[0x007] = (uint8_t)(chunk_count>>8);
+	header[0x007] = (uint8_t)(chunk_count >> 8);
 
 	// Frame index
-	header[0x008] = (uint8_t)settings->state_vid.frame_index;
-	header[0x009] = (uint8_t)(settings->state_vid.frame_index>>8);
-	header[0x00A] = (uint8_t)(settings->state_vid.frame_index>>16);
-	header[0x00B] = (uint8_t)(settings->state_vid.frame_index>>24);
+	header[0x008] = (uint8_t)state->frame_index;
+	header[0x009] = (uint8_t)(state->frame_index >> 8);
+	header[0x00A] = (uint8_t)(state->frame_index >> 16);
+	header[0x00B] = (uint8_t)(state->frame_index >> 24);
 
 	// Video frame size
-	header[0x010] = (uint8_t)settings->video_width;
-	header[0x011] = (uint8_t)(settings->video_width>>8);
-	header[0x012] = (uint8_t)settings->video_height;
-	header[0x013] = (uint8_t)(settings->video_height>>8);
+	header[0x010] = (uint8_t)encoder->video_width;
+	header[0x011] = (uint8_t)(encoder->video_width >> 8);
+	header[0x012] = (uint8_t)encoder->video_height;
+	header[0x013] = (uint8_t)(encoder->video_height >> 8);
 
 	// MDEC command (size of decompressed MDEC data)
-	header[0x014] = (uint8_t)settings->state_vid.blocks_used;
-	header[0x015] = (uint8_t)(settings->state_vid.blocks_used>>8);
+	header[0x014] = (uint8_t)state->blocks_used;
+	header[0x015] = (uint8_t)(state->blocks_used >> 8);
 	header[0x016] = 0x00;
 	header[0x017] = 0x38;
 
 	// Quantization scale
-	header[0x018] = (uint8_t)settings->state_vid.quant_scale;
-	header[0x019] = (uint8_t)(settings->state_vid.quant_scale>>8);
+	header[0x018] = (uint8_t)state->quant_scale;
+	header[0x019] = (uint8_t)(state->quant_scale >> 8);
 
 	// BS version
 	header[0x01A] = 0x02;
 	header[0x01B] = 0x00;
 
 	// Demuxed bytes used as a multiple of 4
-	header[0x00C] = (uint8_t)settings->state_vid.bytes_used;
-	header[0x00D] = (uint8_t)(settings->state_vid.bytes_used>>8);
-	header[0x00E] = (uint8_t)(settings->state_vid.bytes_used>>16);
-	header[0x00F] = (uint8_t)(settings->state_vid.bytes_used>>24);
+	header[0x00C] = (uint8_t)state->bytes_used;
+	header[0x00D] = (uint8_t)(state->bytes_used >> 8);
+	header[0x00E] = (uint8_t)(state->bytes_used >> 16);
+	header[0x00F] = (uint8_t)(state->bytes_used >> 24);
 
 	memcpy(output + 0x018, header, sizeof(header));
-	memcpy(output + 0x018 + 0x020, settings->state_vid.frame_output + settings->state_vid.frame_data_offset, 2016);
+	memcpy(output + 0x018 + 0x020, state->frame_output + state->frame_data_offset, 2016);
 
-	settings->state_vid.frame_data_offset += 2016;
+	state->frame_data_offset += 2016;
+	return state->frame_index - last_frame_index;
 }
diff --git a/psxavenc/mdec.h b/psxavenc/mdec.h
new file mode 100644
index 0000000..6b22e20
--- /dev/null
+++ b/psxavenc/mdec.h
@@ -0,0 +1,67 @@
+/*
+psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
+
+Copyright (c) 2019, 2020 Adrian "asie" Siekierka
+Copyright (c) 2019 Ben "GreaseMonkey" Russell
+Copyright (c) 2023, 2025 spicyjpeg
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+   claim that you wrote the original software. If you use this software
+   in a product, an acknowledgment in the product documentation would be
+   appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <libavcodec/avdct.h>
+
+typedef struct {
+	int frame_index;
+	int frame_data_offset;
+	int frame_max_size;
+	int frame_block_base_overflow;
+	int frame_block_overflow_num;
+	int frame_block_overflow_den;
+	int block_type;
+	int16_t last_dc_values[3];
+	uint16_t bits_value;
+	int bits_left;
+	uint8_t *frame_output;
+	int bytes_used;
+	int blocks_used;
+	int uncomp_hwords_used;
+	int quant_scale;
+	int quant_scale_sum;
+
+	AVDCT *dct_context;
+	uint32_t *ac_huffman_map;
+	uint32_t *dc_huffman_map;
+	int16_t *coeff_clamp_map;
+	int16_t *delta_clamp_map;
+	int16_t *dct_block_lists[6];
+} mdec_encoder_state_t;
+
+typedef struct {
+	int video_width;
+	int video_height;
+
+	mdec_encoder_state_t state;
+} mdec_encoder_t;
+
+bool init_mdec_encoder(mdec_encoder_t *encoder, int video_width, int video_height);
+void destroy_mdec_encoder(mdec_encoder_t *encoder);
+void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame);
+int encode_sector_str(mdec_encoder_t *encoder, uint8_t *video_frames, uint8_t *output);
diff --git a/psxavenc/psxavenc.c b/psxavenc/psxavenc.c
deleted file mode 100644
index d980f1d..0000000
--- a/psxavenc/psxavenc.c
+++ /dev/null
@@ -1,495 +0,0 @@
-/*
-psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
-
-Copyright (c) 2019, 2020 Adrian "asie" Siekierka
-Copyright (c) 2019 Ben "GreaseMonkey" Russell
-Copyright (c) 2023 spicyjpeg
-
-This software is provided 'as-is', without any express or implied
-warranty. In no event will the authors be held liable for any damages
-arising from the use of this software.
-
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it
-freely, subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not
-   claim that you wrote the original software. If you use this software
-   in a product, an acknowledgment in the product documentation would be
-   appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be
-   misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#include "common.h"
-#include "config.h"
-
-const char *format_names[NUM_FORMATS] = {
-	"xa", "xacd",
-	"spu", "spui",
-	"vag", "vagi",
-	"str2", "str2cd", "str2v",
-	"sbs2"
-};
-
-void print_help(void) {
-	fprintf(stderr,
-		"Usage:\n"
-		"    psxavenc -t <xa|xacd>     [-f 18900|37800] [-b 4|8] [-c 1|2] [-F 0-255] [-C 0-31] <in> <out.xa>\n"
-		"    psxavenc -t <str2|str2cd> [-f 18900|37800] [-b 4|8] [-c 1|2] [-F 0-255] [-C 0-31] [-s WxH] [-I] [-r num/den] [-x 1|2] <in> <out.str>\n"
-		"    psxavenc -t str2v         [-s WxH] [-I] [-r num/den] [-x 1|2] <in> <out.str>\n"
-		"    psxavenc -t sbs2          [-s WxH] [-I] [-r num/den] [-a size] <in> <out.str>\n"
-		"    psxavenc -t <spu|vag>     [-f freq] [-L] [-a size] <in> <out.vag>\n"
-		"    psxavenc -t <spui|vagi>   [-f freq] [-c 1-24] [-L] [-i size] [-a size] <in> <out.vag>\n"
-		"\nTool options:\n"
-		"    -h               Show this help message and exit\n"
-		"    -V               Show version information and exit\n"
-		"    -q               Suppress all non-error messages\n"
-		"\n"
-		"Output options:\n"
-		"    -t format        Use specified output type\n"
-		"                       xa     [A.] XA-ADPCM, 2336-byte sectors\n"
-		"                       xacd   [A.] XA-ADPCM, 2352-byte sectors\n"
-		"                       spu    [A.] raw SPU-ADPCM mono data\n"
-		"                       spui   [A.] raw SPU-ADPCM interleaved data\n"
-		"                       vag    [A.] .vag SPU-ADPCM mono\n"
-		"                       vagi   [A.] .vag SPU-ADPCM interleaved\n"
-		"                       str2   [AV] v2 .str video, 2336-byte sectors\n"
-		"                       str2cd [AV] v2 .str video, 2352-byte sectors\n"
-		"                       str2v  [.V] v2 .str video file\n"
-		"                       sbs2   [.V] v2 .sbs video, 2048-byte sectors\n"
-		"    -F num           xa/str2: Set the XA file number\n"
-		"                       0-255, default 0\n"
-		"    -C num           xa/str2: Set the XA channel number\n"
-		"                       0-31, default 0\n"
-		"\n"
-		"Audio options:\n"
-		"    -f freq          Use specified sample rate\n"
-		"                       xa/str2:   18900 or 37800, default 37800\n"
-		"                       spu/vag:   any value, default 44100\n"
-		"                       spui/vagi: any value, default 44100\n"
-		"    -b bitdepth      Use specified bit depth\n"
-		"                       xa/str2:   4 or 8, default 4\n"
-		"                       spu/vag:   must be 4\n"
-		"                       spui/vagi: must be 4\n"
-		"    -c channels      Use specified channel count\n"
-		"                       xa/str2:   1 or 2, default 2\n"
-		"                       spu/vag:   must be 1\n"
-		"                       spui/vagi: any value, default 2\n"
-		"    -R key=value,... Pass custom options to libswresample (see FFmpeg docs)\n"
-		"\n"
-		"SPU-ADPCM options (spu/spui/vag/vagi formats):\n"
-		"    -L               spu/vag:   Add a loop marker at the end of sample data\n"
-		"                     spui/vagi: Add a loop marker at the end of each chunk\n"
-		"    -i size          spui/vagi: Use specified channel interleave\n"
-		"                       Any multiple of 16, default 2048\n"
-		"    -a size          spu/vag:   Pad sample data to multiple of specified size\n"
-		"                       Any value >= 16, default 64\n"
-		"                     spui/vagi: Pad header and each chunk to multiple of specified size\n"
-		"                       Any value >= 16, default 2048\n"
-		"\n"
-		"Video options:\n"
-		"    -s WxH           Rescale input file to fit within specified size\n"
-		"                       16x16-320x256 in 16-pixel increments, default 320x240\n"
-		"    -I               Force stretching to given size without preserving aspect ratio\n"
-		"    -r num[/den]     Set frame rate to specified integer or fraction\n"
-		"                       1-30, default 15\n"
-		"    -x speed         str2: Set the CD-ROM speed the file is meant to played at\n"
-		"                       1 or 2, default 2\n"
-		"    -a size          sbs2: Set the size of each frame\n"
-		"                       Any value >= 256, default 8192\n"
-		"    -S key=value,... Pass custom options to libswscale (see FFmpeg docs)\n"
-		"\n"
-	);
-}
-
-void print_version(void) {
-	printf("psxavenc " VERSION "\n");
-}
-
-int parse_args_old(settings_t* settings, int argc, char** argv) {
-	int c, i;
-	char *next;
-	while ((c = getopt(argc, argv, "?hVqt:F:C:f:b:c:LR:i:a:s:IS:r:x:")) != -1) {
-		switch (c) {
-			case '?':
-			case 'h': {
-				print_help();
-				return -1;
-			} break;
-			case 'V': {
-				print_version();
-				return -1;
-			} break;
-			case 'q': {
-				settings->quiet = true;
-				settings->show_progress = false;
-			} break;
-			case 't': {
-				settings->format = -1;
-				for (i = 0; i < NUM_FORMATS; i++) {
-					if (!strcmp(optarg, format_names[i])) {
-						settings->format = i;
-						break;
-					}
-				}
-				if (settings->format < 0) {
-					fprintf(stderr, "Invalid format: %s\n", optarg);
-					return -1;
-				}
-			} break;
-			case 'F': {
-				settings->file_number = strtol(optarg, NULL, 0);
-				if (settings->file_number < 0 || settings->file_number > 255) {
-					fprintf(stderr, "Invalid file number: %d (must be in 0-255 range)\n", settings->file_number);
-					return -1;
-				}
-			} break;
-			case 'C': {
-				settings->channel_number = strtol(optarg, NULL, 0);
-				if (settings->channel_number < 0 || settings->channel_number > 31) {
-					fprintf(stderr, "Invalid channel number: %d (must be in 0-31 range)\n", settings->channel_number);
-					return -1;
-				}
-			} break;
-			case 'f': {
-				settings->frequency = strtol(optarg, NULL, 0);
-				if (settings->frequency < 1000) {
-					fprintf(stderr, "Invalid frequency: %d (must be at least 1000)\n", settings->frequency);
-					return -1;
-				}
-			} break;
-			case 'b': {
-				settings->bits_per_sample = strtol(optarg, NULL, 0);
-				if (settings->bits_per_sample != 4 && settings->bits_per_sample != 8) {
-					fprintf(stderr, "Invalid bit depth: %d (must be 4 or 8)\n", settings->bits_per_sample);
-					return -1;
-				}
-			} break;
-			case 'c': {
-				settings->channels = strtol(optarg, NULL, 0);
-				if (settings->channels < 1) {
-					fprintf(stderr, "Invalid channel count: %d (must be at least 1)\n", settings->channels);
-					return -1;
-				}
-			} break;
-			case 'L': {
-				settings->loop = true;
-			} break;
-			case 'R': {
-				settings->swresample_options = optarg;
-			} break;
-			case 'i': {
-				settings->interleave = (strtol(optarg, NULL, 0) + 15) & ~15;
-				if (settings->interleave < 16) {
-					fprintf(stderr, "Invalid interleave: %d (must be at least 16)\n", settings->interleave);
-					return -1;
-				}
-			} break;
-			case 'a': {
-				settings->alignment = strtol(optarg, NULL, 0);
-				if (settings->alignment < 16) {
-					fprintf(stderr, "Invalid alignment: %d (must be at least 16)\n", settings->alignment);
-					return -1;
-				}
-			} break;
-			case 's': {
-				settings->video_width = (strtol(optarg, &next, 0) + 15) & ~15;
-				if (*next != 'x') {
-					fprintf(stderr, "Invalid video size (must be specified as <width>x<height>)\n");
-					return -1;
-				}
-				settings->video_height = (strtol(next + 1, NULL, 0) + 15) & ~15;
-
-				if (settings->video_width < 16 || settings->video_width > 320) {
-					fprintf(stderr, "Invalid video width: %d (must be in 16-320 range)\n", settings->video_width);
-					return -1;
-				}
-				if (settings->video_height < 16 || settings->video_height > 256) {
-					fprintf(stderr, "Invalid video height: %d (must be in 16-256 range)\n", settings->video_height);
-					return -1;
-				}
-			} break;
-			case 'I': {
-				settings->ignore_aspect_ratio = true;
-			} break;
-			case 'S': {
-				settings->swscale_options = optarg;
-			} break;
-			case 'r': {
-				settings->video_fps_num = strtol(optarg, &next, 0);
-				if (*next == '/') {
-					settings->video_fps_den = strtol(next + 1, NULL, 0);
-				} else {
-					settings->video_fps_den = 1;
-				}
-
-				if (!settings->video_fps_den) {
-					fprintf(stderr, "Invalid frame rate denominator\n");
-					return -1;
-				}
-				i = settings->video_fps_num / settings->video_fps_den;
-				if (i < 1 || i > 60) {
-					fprintf(stderr, "Invalid frame rate: %d/%d (must be in 1-60 range)\n", settings->video_fps_num, settings->video_fps_den);
-					return -1;
-				}
-			} break;
-			case 'x': {
-				settings->cd_speed = strtol(optarg, NULL, 0);
-				if (settings->cd_speed < 1 || settings->cd_speed > 2) {
-					fprintf(stderr, "Invalid CD-ROM speed: %d (must be 1 or 2)\n", settings->cd_speed);
-					return -1;
-				}
-			} break;
-		}
-	}
-
-	// Some settings' (frequency, channels, interleave and alignment) default
-	// values are initialized here as they depend on the chosen format.
-	switch (settings->format) {
-		case FORMAT_XA:
-		case FORMAT_XACD:
-		case FORMAT_STR2:
-		case FORMAT_STR2CD:
-		case FORMAT_STR2V:
-			if (!settings->frequency) {
-				settings->frequency = PSX_AUDIO_XA_FREQ_DOUBLE;
-			} else if (settings->frequency != PSX_AUDIO_XA_FREQ_SINGLE && settings->frequency != PSX_AUDIO_XA_FREQ_DOUBLE) {
-				fprintf(
-					stderr, "Invalid XA-ADPCM frequency: %d Hz (must be %d or %d Hz)\n", settings->frequency,
-					PSX_AUDIO_XA_FREQ_SINGLE, PSX_AUDIO_XA_FREQ_DOUBLE
-				);
-				return -1;
-			}
-			if (!settings->channels) {
-				settings->channels = 2;
-			} else if (settings->channels > 2) {
-				fprintf(stderr, "Invalid XA-ADPCM channel count: %d (must be 1 or 2)\n", settings->channels);
-				return -1;
-			}
-			if (settings->interleave || settings->alignment) {
-				fprintf(stderr, "Interleave and frame size cannot be specified for this format\n");
-				return -1;
-			}
-			if (settings->loop) {
-				fprintf(stderr, "XA-ADPCM does not support loop markers\n");
-				return -1;
-			}
-			break;
-		case FORMAT_SPU:
-		case FORMAT_VAG:
-			if (!settings->frequency) {
-				settings->frequency = 44100;
-			}
-			if (settings->bits_per_sample != 4) {
-				fprintf(stderr, "Invalid SPU-ADPCM bit depth: %d (must be 4)\n", settings->bits_per_sample);
-				return -1;
-			}
-			if (!settings->channels) {
-				settings->channels = 1;
-			} else if (settings->channels > 1) {
-				fprintf(stderr, "Invalid SPU-ADPCM channel count: %d (must be 1)\n", settings->channels);
-				return -1;
-			}
-			if (settings->interleave) {
-				fprintf(stderr, "Interleave cannot be specified for this format\n");
-				return -1;
-			}
-			if (!settings->alignment) {
-				settings->alignment = 64;
-			}
-			break;
-		case FORMAT_SPUI:
-		case FORMAT_VAGI:
-			if (!settings->frequency) {
-				settings->frequency = 44100;
-			}
-			if (settings->bits_per_sample != 4) {
-				fprintf(stderr, "Invalid SPU-ADPCM bit depth: %d (must be 4)\n", settings->bits_per_sample);
-				return -1;
-			}
-			if (!settings->channels) {
-				settings->channels = 2;
-			}
-			if (!settings->interleave) {
-				settings->interleave = 2048;
-			}
-			if (!settings->alignment) {
-				settings->alignment = 2048;
-			}
-			break;
-		case FORMAT_SBS2:
-			if (settings->interleave) {
-				fprintf(stderr, "Interleave cannot be specified for this format\n");
-				return -1;
-			}
-			if (!settings->alignment) {
-				settings->alignment = 8192;
-			} else if (settings->alignment < 256) {
-				fprintf(stderr, "Invalid frame size: %d (must be at least 256)\n", settings->alignment);
-				return -1;
-			}
-			break;
-		default:
-			fprintf(stderr, "Output format must be specified\n");
-			return -1;
-	}
-
-	return optind;
-}
-
-int main(int argc, char **argv) {
-	settings_t settings;
-	int arg_offset;
-	FILE* output;
-
-	memset(&settings,0,sizeof(settings_t));
-
-	settings.quiet = false;
-	settings.show_progress = isatty(fileno(stderr));
-
-	settings.format = -1;
-	settings.file_number = 0;
-	settings.channel_number = 0;
-	settings.cd_speed = 2;
-	settings.channels = 0;
-	settings.frequency = 0;
-	settings.bits_per_sample = 4;
-	settings.interleave = 0;
-	settings.alignment = 0;
-	settings.loop = false;
-
-	// NOTE: ffmpeg/ffplay's .str demuxer has the frame rate hardcoded to 15fps
-	// so if you're messing around with this make sure you test generated files
-	// with another player and/or in an emulator.
-	settings.video_width = 320;
-	settings.video_height = 240;
-	settings.video_fps_num = 15;
-	settings.video_fps_den = 1;
-	settings.ignore_aspect_ratio = false;
-
-	settings.swresample_options = NULL;
-	settings.swscale_options = NULL;
-
-	settings.audio_samples = NULL;
-	settings.audio_sample_count = 0;
-	settings.video_frames = NULL;
-	settings.video_frame_count = 0;
-
-	settings.state_vid.huffman_encoding_map = NULL;
-	settings.state_vid.coeff_clamp_map = NULL;
-	settings.state_vid.dct_context = NULL;
-	for(int i = 0; i < 6; i++) {
-		settings.state_vid.dct_block_lists[i] = NULL;
-	}
-
-	if (argc < 2) {
-		print_help();
-		return 1;
-	}
-
-	arg_offset = parse_args_old(&settings, argc, argv);
-	if (arg_offset < 0) {
-		return 1;
-	} else if (argc < arg_offset + 2) {
-		print_help();
-		return 1;
-	}
-
-	bool has_audio =
-		(settings.format != FORMAT_STR2V) &&
-		(settings.format != FORMAT_SBS2);
-	bool has_video =
-		(settings.format == FORMAT_STR2) ||
-		(settings.format == FORMAT_STR2CD) ||
-		(settings.format == FORMAT_STR2V) ||
-		(settings.format == FORMAT_SBS2);
-
-	bool did_open_data = open_av_data(argv[arg_offset + 0], &settings,
-		has_audio, has_video, !has_video, has_video);
-	if (!did_open_data) {
-		fprintf(stderr, "Could not open input file!\n");
-		return 1;
-	}
-
-	output = fopen(argv[arg_offset + 1], "wb");
-	if (output == NULL) {
-		fprintf(stderr, "Could not open output file!\n");
-		return 1;
-	}
-
-	settings.start_time = time(NULL);
-	settings.last_progress_update = 0;
-
-	switch (settings.format) {
-		case FORMAT_XA:
-		case FORMAT_XACD:
-			if (!settings.quiet) {
-				fprintf(stderr, "Audio format: XA-ADPCM, %d Hz %d-bit %s, F=%d C=%d\n",
-					settings.frequency, settings.bits_per_sample,
-					(settings.channels == 2) ? "stereo" : "mono",
-					settings.file_number, settings.channel_number
-				);
-			}
-
-			encode_file_xa(&settings, output);
-			break;
-		case FORMAT_SPU:
-		case FORMAT_VAG:
-			if (!settings.quiet) {
-				fprintf(stderr, "Audio format: SPU-ADPCM, %d Hz mono\n",
-					settings.frequency
-				);
-			}
-
-			encode_file_spu(&settings, output);
-			break;
-		case FORMAT_SPUI:
-		case FORMAT_VAGI:
-			if (!settings.quiet) {
-				fprintf(stderr, "Audio format: SPU-ADPCM, %d Hz %d channels, interleave=%d\n",
-					settings.frequency, settings.channels, settings.interleave
-				);
-			}
-
-			encode_file_spu_interleaved(&settings, output);
-			break;
-		case FORMAT_STR2:
-		case FORMAT_STR2CD:
-		case FORMAT_STR2V:
-			if (!settings.quiet) {
-				if (settings.decoder_state_av.audio_stream) {
-					fprintf(stderr, "Audio format: XA-ADPCM, %d Hz %d-bit %s, F=%d C=%d\n",
-						settings.frequency, settings.bits_per_sample,
-						(settings.channels == 2) ? "stereo" : "mono",
-						settings.file_number, settings.channel_number
-					);
-				}
-				fprintf(stderr, "Video format: BS v2, %dx%d, %.2f fps\n",
-					settings.video_width, settings.video_height,
-					(double)settings.video_fps_num / (double)settings.video_fps_den
-				);
-			}
-
-			encode_file_str(&settings, output);
-			break;
-		case FORMAT_SBS2:
-			if (!settings.quiet) {
-				fprintf(stderr, "Video format: BS v2, %dx%d, %.2f fps\n",
-					settings.video_width, settings.video_height,
-					(double)settings.video_fps_num / (double)settings.video_fps_den
-				);
-			}
-
-			encode_file_sbs(&settings, output);
-			break;
-	}
-
-	if (settings.show_progress) {
-		fprintf(stderr, "\nDone.\n");
-	}
-	fclose(output);
-	close_av_data(&settings);
-	return 0;
-}