diff --git a/libpsxav/adpcm.c b/libpsxav/adpcm.c
index 03d298f..ecc7264 100644
--- a/libpsxav/adpcm.c
+++ b/libpsxav/adpcm.c
@@ -29,8 +29,8 @@ freely, subject to the following restrictions:
 #define SHIFT_RANGE_4BPS 12
 #define SHIFT_RANGE_8BPS 8
 
-#define ADPCM_FILTER_COUNT 5
-#define XA_ADPCM_FILTER_COUNT 4
+#define ADPCM_FILTER_COUNT     5
+#define XA_ADPCM_FILTER_COUNT  4
 #define SPU_ADPCM_FILTER_COUNT 5
 
 static const int16_t filter_k1[ADPCM_FILTER_COUNT] = {0, 60, 115, 98, 122};
@@ -54,7 +54,7 @@ static int find_min_shift(const psx_audio_encoder_channel_state_t *state, int16_
 
 	int32_t s_min = 0;
 	int32_t s_max = 0;
-	for (int i = 0; i < 28; i++) {
+	for (int i = 0; i < PSX_AUDIO_SPU_SAMPLES_PER_BLOCK; i++) {
 		int32_t raw_sample = (i >= sample_limit) ? 0 : samples[i * pitch];
 		int32_t previous_values = (k1*prev1 + k2*prev2 + (1<<5))>>6;
 		int32_t sample = raw_sample - previous_values;
@@ -87,7 +87,7 @@ static uint8_t attempt_to_encode(psx_audio_encoder_channel_state_t *outstate, co
 
 	outstate->mse = 0;
 
-	for (int i = 0; i < 28; i++) {
+	for (int i = 0; i < PSX_AUDIO_SPU_SAMPLES_PER_BLOCK; i++) {
 		int32_t sample = ((i >= sample_limit) ? 0 : samples[i * pitch]) + outstate->qerr;
 		int32_t previous_values = (k1*outstate->prev1 + k2*outstate->prev2 + (1<<5))>>6;
 		int32_t sample_enc = sample - previous_values;
@@ -205,25 +205,17 @@ uint32_t psx_audio_xa_get_buffer_size(psx_audio_xa_settings_t settings, int samp
 }
 
 uint32_t psx_audio_spu_get_buffer_size(int sample_count) {
-	return ((sample_count + 27) / 28) << 4;
+	return ((sample_count + PSX_AUDIO_SPU_SAMPLES_PER_BLOCK - 1) / PSX_AUDIO_SPU_SAMPLES_PER_BLOCK) << 4;
 }
 
 uint32_t psx_audio_xa_get_buffer_size_per_sector(psx_audio_xa_settings_t settings) {
 	return settings.format == PSX_AUDIO_XA_FORMAT_XA ? 2336 : 2352;
 }
 
-uint32_t psx_audio_spu_get_buffer_size_per_block(void) {
-	return 16;
-}
-
 uint32_t psx_audio_xa_get_samples_per_sector(psx_audio_xa_settings_t settings) {
 	return (((settings.bits_per_sample == 8) ? 112 : 224) >> (settings.stereo ? 1 : 0)) * 18;
 }
 
-uint32_t psx_audio_spu_get_samples_per_block(void) {
-	return 28;
-}
-
 uint32_t psx_audio_xa_get_sector_interleave(psx_audio_xa_settings_t settings) {
 	// 1/2 interleave for 37800 Hz 8-bit stereo at 1x speed
 	int interleave = settings.stereo ? 2 : 4;
@@ -307,14 +299,14 @@ int psx_audio_xa_encode_simple(psx_audio_xa_settings_t settings, int16_t* sample
 }
 
 int psx_audio_spu_encode(psx_audio_encoder_channel_state_t *state, int16_t* samples, int sample_count, int pitch, uint8_t *output) {
-	uint8_t prebuf[28];
+	uint8_t prebuf[PSX_AUDIO_SPU_SAMPLES_PER_BLOCK];
 	uint8_t *buffer = output;
 
-	for (int i = 0; i < sample_count; i += 28, buffer += 16) {
+	for (int i = 0; i < sample_count; i += PSX_AUDIO_SPU_SAMPLES_PER_BLOCK, buffer += PSX_AUDIO_SPU_BLOCK_SIZE) {
 		buffer[0] = encode(state, samples + i * pitch, sample_count - i, pitch, prebuf, 0, 1, SPU_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
 		buffer[1] = 0;
 
-		for (int j = 0; j < 28; j+=2) {
+		for (int j = 0; j < PSX_AUDIO_SPU_SAMPLES_PER_BLOCK; j+=2) {
 			buffer[2 + (j>>1)] = (prebuf[j] & 0x0F) | (prebuf[j+1] << 4);
 		}
 	}
@@ -327,24 +319,24 @@ int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *out
 	memset(&state, 0, sizeof(psx_audio_encoder_channel_state_t));
 	int length = psx_audio_spu_encode(&state, samples, sample_count, 1, output);
 
-	if (length >= 32) {
+	if (length >= PSX_AUDIO_SPU_BLOCK_SIZE) {
+		uint8_t *last_block = output + length - PSX_AUDIO_SPU_BLOCK_SIZE;
+
 		if (loop_start < 0) {
-			//output[1] = PSX_AUDIO_SPU_LOOP_START;
-			output[length - 16 + 1] = PSX_AUDIO_SPU_LOOP_END;
+			last_block[1] |= PSX_AUDIO_SPU_LOOP_END;
+
+			// Insert trailing looping block
+			memset(output + length, 0, PSX_AUDIO_SPU_BLOCK_SIZE);
+			output[length + 1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END;
+
+			length += PSX_AUDIO_SPU_BLOCK_SIZE;
 		} else {
-			psx_audio_spu_set_flag_at_sample(output, loop_start, PSX_AUDIO_SPU_LOOP_START);
-			output[length - 16 + 1] = PSX_AUDIO_SPU_LOOP_REPEAT;
+			int loop_start_offset = loop_start / PSX_AUDIO_SPU_SAMPLES_PER_BLOCK * PSX_AUDIO_SPU_BLOCK_SIZE;
+
+			last_block[1] |= PSX_AUDIO_SPU_LOOP_REPEAT;
+			output[loop_start_offset + 1] |= PSX_AUDIO_SPU_LOOP_START;
 		}
-	} else if (length >= 16) {
-		output[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END;
-		if (loop_start >= 0)
-			output[1] |= PSX_AUDIO_SPU_LOOP_REPEAT;
 	}
 
 	return length;
 }
-
-void psx_audio_spu_set_flag_at_sample(uint8_t* spu_data, int sample_pos, int flag) {
-	int buffer_pos = (sample_pos / 28) << 4;
-	spu_data[buffer_pos + 1] = flag;
-}
diff --git a/libpsxav/libpsxav.h b/libpsxav/libpsxav.h
index 32eabaf..1b8aaa1 100644
--- a/libpsxav/libpsxav.h
+++ b/libpsxav/libpsxav.h
@@ -28,8 +28,13 @@ freely, subject to the following restrictions:
 
 // audio.c
 
-#define PSX_AUDIO_XA_FREQ_SINGLE 18900
-#define PSX_AUDIO_XA_FREQ_DOUBLE 37800
+#define PSX_AUDIO_SPU_BLOCK_SIZE        16
+#define PSX_AUDIO_SPU_SAMPLES_PER_BLOCK 28
+
+enum {
+	PSX_AUDIO_XA_FREQ_SINGLE = 18900,
+	PSX_AUDIO_XA_FREQ_DOUBLE = 37800
+};
 
 typedef enum {
 	PSX_AUDIO_XA_FORMAT_XA, // .xa file
@@ -56,23 +61,22 @@ typedef struct {
 	psx_audio_encoder_channel_state_t right;
 } psx_audio_encoder_state_t;
 
-#define PSX_AUDIO_SPU_LOOP_END 1
-#define PSX_AUDIO_SPU_LOOP_REPEAT 3
-#define PSX_AUDIO_SPU_LOOP_START 4
+enum {
+	PSX_AUDIO_SPU_LOOP_END    = 1 << 0,
+	PSX_AUDIO_SPU_LOOP_REPEAT = 3 << 0,
+	PSX_AUDIO_SPU_LOOP_START  = 1 << 2
+};
 
 uint32_t psx_audio_xa_get_buffer_size(psx_audio_xa_settings_t settings, int sample_count);
 uint32_t psx_audio_spu_get_buffer_size(int sample_count);
 uint32_t psx_audio_xa_get_buffer_size_per_sector(psx_audio_xa_settings_t settings);
-uint32_t psx_audio_spu_get_buffer_size_per_block(void);
 uint32_t psx_audio_xa_get_samples_per_sector(psx_audio_xa_settings_t settings);
-uint32_t psx_audio_spu_get_samples_per_block(void);
 uint32_t psx_audio_xa_get_sector_interleave(psx_audio_xa_settings_t settings);
 int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_state_t *state, int16_t* samples, int sample_count, uint8_t *output);
 int psx_audio_xa_encode_simple(psx_audio_xa_settings_t settings, int16_t* samples, int sample_count, uint8_t *output);
 int psx_audio_spu_encode(psx_audio_encoder_channel_state_t *state, int16_t* samples, int sample_count, int pitch, uint8_t *output);
 int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *output, int loop_start);
 void psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length);
-void psx_audio_spu_set_flag_at_sample(uint8_t* spu_data, int sample_pos, int flag);
 
 // cdrom.c
 
@@ -115,25 +119,29 @@ _Static_assert(sizeof(psx_cdrom_sector_mode2_t) == PSX_CDROM_SECTOR_SIZE, "Inval
 
 #define PSX_CDROM_SECTOR_XA_CHANNEL_MASK 0x1F
 
-#define PSX_CDROM_SECTOR_XA_SUBMODE_EOR     0x01
-#define PSX_CDROM_SECTOR_XA_SUBMODE_VIDEO   0x02
-#define PSX_CDROM_SECTOR_XA_SUBMODE_AUDIO   0x04
-#define PSX_CDROM_SECTOR_XA_SUBMODE_DATA    0x08
-#define PSX_CDROM_SECTOR_XA_SUBMODE_TRIGGER 0x10
-#define PSX_CDROM_SECTOR_XA_SUBMODE_FORM2   0x20
-#define PSX_CDROM_SECTOR_XA_SUBMODE_RT      0x40
-#define PSX_CDROM_SECTOR_XA_SUBMODE_EOF     0x80
+enum {
+	PSX_CDROM_SECTOR_XA_SUBMODE_EOR     = 1 << 0,
+	PSX_CDROM_SECTOR_XA_SUBMODE_VIDEO   = 1 << 1,
+	PSX_CDROM_SECTOR_XA_SUBMODE_AUDIO   = 1 << 2,
+	PSX_CDROM_SECTOR_XA_SUBMODE_DATA    = 1 << 3,
+	PSX_CDROM_SECTOR_XA_SUBMODE_TRIGGER = 1 << 4,
+	PSX_CDROM_SECTOR_XA_SUBMODE_FORM2   = 1 << 5,
+	PSX_CDROM_SECTOR_XA_SUBMODE_RT      = 1 << 6,
+	PSX_CDROM_SECTOR_XA_SUBMODE_EOF     = 1 << 7
+};
 
-#define PSX_CDROM_SECTOR_XA_CODING_MONO         0x00
-#define PSX_CDROM_SECTOR_XA_CODING_STEREO       0x01
-#define PSX_CDROM_SECTOR_XA_CODING_CHANNEL_MASK 0x03
-#define PSX_CDROM_SECTOR_XA_CODING_FREQ_DOUBLE  0x00
-#define PSX_CDROM_SECTOR_XA_CODING_FREQ_SINGLE  0x04
-#define PSX_CDROM_SECTOR_XA_CODING_FREQ_MASK    0x0C
-#define PSX_CDROM_SECTOR_XA_CODING_BITS_4       0x00
-#define PSX_CDROM_SECTOR_XA_CODING_BITS_8       0x10
-#define PSX_CDROM_SECTOR_XA_CODING_BITS_MASK    0x30
-#define PSX_CDROM_SECTOR_XA_CODING_EMPHASIS     0x40
+enum {
+	PSX_CDROM_SECTOR_XA_CODING_MONO         = 0 << 0,
+	PSX_CDROM_SECTOR_XA_CODING_STEREO       = 1 << 0,
+	PSX_CDROM_SECTOR_XA_CODING_CHANNEL_MASK = 3 << 0,
+	PSX_CDROM_SECTOR_XA_CODING_FREQ_DOUBLE  = 0 << 2,
+	PSX_CDROM_SECTOR_XA_CODING_FREQ_SINGLE  = 1 << 2,
+	PSX_CDROM_SECTOR_XA_CODING_FREQ_MASK    = 3 << 2,
+	PSX_CDROM_SECTOR_XA_CODING_BITS_4       = 0 << 4,
+	PSX_CDROM_SECTOR_XA_CODING_BITS_8       = 1 << 4,
+	PSX_CDROM_SECTOR_XA_CODING_BITS_MASK    = 3 << 4,
+	PSX_CDROM_SECTOR_XA_CODING_EMPHASIS     = 1 << 6
+};
 
 typedef enum {
 	PSX_CDROM_SECTOR_TYPE_MODE1,
diff --git a/psxavenc/args.h b/psxavenc/args.h
index 9249290..f0fab88 100644
--- a/psxavenc/args.h
+++ b/psxavenc/args.h
@@ -26,19 +26,19 @@ freely, subject to the following restrictions:
 
 #include <stdbool.h>
 
-#define NUM_FORMATS 11
+#define NUM_FORMATS   11
 #define NUM_BS_CODECS 3
 
 enum {
-	FLAG_IGNORE_OPTIONS = 1 << 0,
-	FLAG_QUIET = 1 << 1,
-	FLAG_HIDE_PROGRESS = 1 << 2,
-	FLAG_PRINT_HELP = 1 << 3,
-	FLAG_PRINT_VERSION = 1 << 4,
-	FLAG_SPU_LOOP_END = 1 << 5,
+	FLAG_IGNORE_OPTIONS       = 1 << 0,
+	FLAG_QUIET                = 1 << 1,
+	FLAG_HIDE_PROGRESS        = 1 << 2,
+	FLAG_PRINT_HELP           = 1 << 3,
+	FLAG_PRINT_VERSION        = 1 << 4,
+	FLAG_SPU_LOOP_END         = 1 << 5,
 	FLAG_SPU_NO_LEADING_DUMMY = 1 << 6,
-	FLAG_BS_IGNORE_ASPECT = 1 << 7,
-	FLAG_STR_TRAILING_AUDIO = 1 << 8
+	FLAG_BS_IGNORE_ASPECT     = 1 << 7,
+	FLAG_STR_TRAILING_AUDIO   = 1 << 8
 };
 
 typedef enum {
diff --git a/psxavenc/decoding.h b/psxavenc/decoding.h
index ccf0b65..311cb69 100644
--- a/psxavenc/decoding.h
+++ b/psxavenc/decoding.h
@@ -67,8 +67,8 @@ typedef struct {
 } decoder_t;
 
 enum {
-	DECODER_USE_AUDIO = 1 << 0,
-	DECODER_USE_VIDEO = 1 << 1,
+	DECODER_USE_AUDIO      = 1 << 0,
+	DECODER_USE_VIDEO      = 1 << 1,
 	DECODER_AUDIO_REQUIRED = 1 << 2,
 	DECODER_VIDEO_REQUIRED = 1 << 3
 };
diff --git a/psxavenc/filefmt.c b/psxavenc/filefmt.c
index 7e508f5..e5d930b 100644
--- a/psxavenc/filefmt.c
+++ b/psxavenc/filefmt.c
@@ -136,7 +136,10 @@ static void write_vag_header(const args_t *args, int size_per_channel, uint8_t *
 	strncpy((char*)(header + 0x20), &args->output_file[name_offset], 16);
 }
 
-void encode_file_xa(args_t *args, decoder_t *decoder, FILE *output) {
+// The functions below are some peak spaghetti code I would rewrite if that
+// didn't also require scrapping the rest of the codebase. -- spicyjpeg
+
+void encode_file_xa(const args_t *args, decoder_t *decoder, FILE *output) {
 	psx_audio_xa_settings_t xa_settings = args_to_libpsxav_xa_audio(args);
 
 	int audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings);
@@ -187,26 +190,37 @@ void encode_file_xa(args_t *args, decoder_t *decoder, FILE *output) {
 	}
 }
 
-void encode_file_spu(args_t *args, decoder_t *decoder, FILE *output) {
+void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output) {
 	psx_audio_encoder_channel_state_t audio_state;
 	memset(&audio_state, 0, sizeof(psx_audio_encoder_channel_state_t));
 
-	int audio_samples_per_block = psx_audio_spu_get_samples_per_block();
-	int block_size = psx_audio_spu_get_buffer_size_per_block();
-	int block_count;
-
 	// The header must be written after the data as we don't yet know the
 	// number of audio samples.
 	if (args->format == FORMAT_VAG)
 		fseek(output, VAG_HEADER_SIZE, SEEK_SET);
 
-	for (block_count = 0; ensure_av_data(decoder, audio_samples_per_block, 0); block_count++) {
+	uint8_t buffer[PSX_AUDIO_SPU_BLOCK_SIZE];
+	int block_count = 0;
+
+	if (!(args->flags & FLAG_SPU_NO_LEADING_DUMMY)) {
+		// Insert leading silent block
+		memset(buffer, 0, PSX_AUDIO_SPU_BLOCK_SIZE);
+
+		fwrite(buffer, PSX_AUDIO_SPU_BLOCK_SIZE, 1, output);
+		block_count++;
+	}
+
+	int loop_start_block = -1;
+
+	if (args->audio_loop_point >= 0)
+		loop_start_block = (args->audio_loop_point * args->audio_frequency) / (PSX_AUDIO_SPU_SAMPLES_PER_BLOCK * 1000);
+
+	for (; ensure_av_data(decoder, PSX_AUDIO_SPU_SAMPLES_PER_BLOCK, 0); block_count++) {
 		int samples_length = decoder->audio_sample_count;
 
-		if (samples_length > audio_samples_per_block)
-			samples_length = audio_samples_per_block;
+		if (samples_length > PSX_AUDIO_SPU_SAMPLES_PER_BLOCK)
+			samples_length = PSX_AUDIO_SPU_SAMPLES_PER_BLOCK;
 
-		uint8_t buffer[16];
 		int length = psx_audio_spu_encode(
 			&audio_state,
 			decoder->audio_samples,
@@ -215,15 +229,10 @@ void encode_file_spu(args_t *args, decoder_t *decoder, FILE *output) {
 			buffer
 		);
 
-		// TODO: implement proper loop flag support
-		if (false)
+		if (block_count == loop_start_block)
 			buffer[1] |= PSX_AUDIO_SPU_LOOP_START;
-		if (decoder->end_of_input) {
-			if (args->flags & FLAG_SPU_LOOP_END)
-				buffer[1] |= PSX_AUDIO_SPU_LOOP_REPEAT;
-			else
-			 	buffer[1] |= PSX_AUDIO_SPU_LOOP_END;
-		}
+		if ((args->flags & FLAG_SPU_LOOP_END) && decoder->end_of_input)
+			buffer[1] |= PSX_AUDIO_SPU_LOOP_REPEAT;
 
 		retire_av_data(decoder, samples_length, 0);
 		fwrite(buffer, length, 1, output);
@@ -235,12 +244,21 @@ void encode_file_spu(args_t *args, decoder_t *decoder, FILE *output) {
 				stderr,
 				"\rBlock: %6d | Encoding speed: %5.2fx",
 				block_count,
-				(double)(block_count * audio_samples_per_block) / (double)(args->audio_frequency * t)
+				(double)(block_count * PSX_AUDIO_SPU_SAMPLES_PER_BLOCK) / (double)(args->audio_frequency * t)
 			);
 		}
 	}
 
-	int overflow = (block_count * block_size) % args->alignment;
+	if (!(args->flags & FLAG_SPU_LOOP_END)) {
+		// Insert trailing looping block
+		memset(buffer, 0, PSX_AUDIO_SPU_BLOCK_SIZE);
+		buffer[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END;
+
+		fwrite(buffer, PSX_AUDIO_SPU_BLOCK_SIZE, 1, output);
+		block_count++;
+	}
+
+	int overflow = (block_count * PSX_AUDIO_SPU_BLOCK_SIZE) % args->alignment;
 
 	if (overflow) {
 		for (int i = 0; i < (args->alignment - overflow); i++)
@@ -248,15 +266,15 @@ void encode_file_spu(args_t *args, decoder_t *decoder, FILE *output) {
 	}
 	if (args->format == FORMAT_VAG) {
 		uint8_t header[VAG_HEADER_SIZE];
-		write_vag_header(args, block_count * block_size, header);
+		write_vag_header(args, block_count * PSX_AUDIO_SPU_BLOCK_SIZE, header);
 
 		fseek(output, 0, SEEK_SET);
 		fwrite(header, VAG_HEADER_SIZE, 1, output);
 	}
 }
 
-void encode_file_spui(args_t *args, decoder_t *decoder, FILE *output) {
-	int audio_state_size = sizeof(psx_audio_encoder_channel_state_t) * args->audio_channels;
+void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) {
+	int audio_samples_per_chunk = args->audio_interleave / PSX_AUDIO_SPU_BLOCK_SIZE * PSX_AUDIO_SPU_SAMPLES_PER_BLOCK;
 
 	// NOTE: since the interleaved .vag format is not standardized, some tools
 	// (such as vgmstream) will not properly play files with interleave < 2048,
@@ -267,38 +285,52 @@ void encode_file_spui(args_t *args, decoder_t *decoder, FILE *output) {
 	int header_size = VAG_HEADER_SIZE + args->alignment - 1;
 	header_size -= header_size % args->alignment;
 
-	int audio_samples_per_block = psx_audio_spu_get_samples_per_block();
-	int block_size = psx_audio_spu_get_buffer_size_per_block();
-	int audio_samples_per_chunk = args->audio_interleave / block_size * audio_samples_per_block;
-	int chunk_count;
-
 	if (args->format == FORMAT_VAGI)
 		fseek(output, header_size, SEEK_SET);
 
+	int audio_state_size = sizeof(psx_audio_encoder_channel_state_t) * args->audio_channels;
 	psx_audio_encoder_channel_state_t *audio_state = malloc(audio_state_size);
-	uint8_t *buffer = malloc(buffer_size);
 	memset(audio_state, 0, audio_state_size);
 
-	for (chunk_count = 0; ensure_av_data(decoder, audio_samples_per_chunk * args->audio_channels, 0); chunk_count++) {
+	uint8_t *buffer = malloc(buffer_size);
+	int chunk_count = 0;
+
+	for (; ensure_av_data(decoder, audio_samples_per_chunk * args->audio_channels, 0); chunk_count++) {
 		int samples_length = decoder->audio_sample_count / args->audio_channels;
-		if (samples_length > audio_samples_per_chunk) samples_length = audio_samples_per_chunk;
+		int buffer_offset = 0;
+
+		if (samples_length > audio_samples_per_chunk)
+			samples_length = audio_samples_per_chunk;
+
+		// Insert leading silent block
+		if (chunk_count == 0 && !(args->flags & FLAG_SPU_NO_LEADING_DUMMY)) {
+			buffer_offset = PSX_AUDIO_SPU_BLOCK_SIZE;
+			samples_length -= PSX_AUDIO_SPU_BLOCK_SIZE;
+		}
 
 		for (int ch = 0; ch < args->audio_channels; ch++) {
 			memset(buffer, 0, buffer_size);
+
 			int length = psx_audio_spu_encode(
 				audio_state + ch,
 				decoder->audio_samples + ch,
 				samples_length,
 				args->audio_channels,
-				buffer
+				buffer + buffer_offset
 			);
 
-			if (length) {
-				// TODO: implement proper loop flag support
-				if (args->flags & FLAG_SPU_LOOP_END)
-					buffer[length - block_size + 1] |= PSX_AUDIO_SPU_LOOP_REPEAT;
-				else if (decoder->end_of_input)
-					buffer[length - block_size + 1] |= PSX_AUDIO_SPU_LOOP_END;
+			if (length > 0) {
+				uint8_t *last_block = buffer + length - PSX_AUDIO_SPU_BLOCK_SIZE;
+
+				if (args->flags & FLAG_SPU_LOOP_END) {
+					last_block[1] = PSX_AUDIO_SPU_LOOP_REPEAT;
+				} else if (decoder->end_of_input) {
+					// HACK: the trailing block should in theory be appended to
+					// the existing data, but it's easier to just zerofill and
+					// repurpose the last encoded block
+					memset(last_block, 0, PSX_AUDIO_SPU_BLOCK_SIZE);
+					last_block[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END;
+				}
 			}
 
 			fwrite(buffer, buffer_size, 1, output);
@@ -332,10 +364,9 @@ void encode_file_spui(args_t *args, decoder_t *decoder, FILE *output) {
 	free(buffer);
 }
 
-void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) {
+void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) {
 	psx_audio_xa_settings_t xa_settings = args_to_libpsxav_xa_audio(args);
 	int audio_samples_per_sector;
-	uint8_t buffer[PSX_CDROM_SECTOR_SIZE];
 
 	int offset, sector_size;
 
@@ -349,7 +380,8 @@ void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) {
 
 	int interleave;
 	int video_sectors_per_block;
-	if (decoder->state.audio_stream) {
+
+	if (decoder->state.audio_stream != NULL) {
 		// 1/N audio, (N-1)/N video
 		audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings);
 		interleave = psx_audio_xa_get_sector_interleave(xa_settings) * args->str_cd_speed;
@@ -399,16 +431,24 @@ void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) {
 	for (int j = 0; !decoder->end_of_input || encoder.state.frame_data_offset < encoder.state.frame_max_size; j++) {
 		ensure_av_data(decoder, audio_samples_per_sector * args->audio_channels, frames_needed);
 
-		if ((j%interleave) < video_sectors_per_block) {
-			// Video sector
+		uint8_t buffer[PSX_CDROM_SECTOR_SIZE];
+		bool is_video_sector;
+
+		if (args->flags & FLAG_STR_TRAILING_AUDIO)
+			is_video_sector = (j % interleave) < video_sectors_per_block;
+		else
+			is_video_sector = (j % interleave) > 0;
+
+		if (is_video_sector) {
 			init_sector_buffer_video(args, (psx_cdrom_sector_mode2_t*) buffer, j);
 
 			int frames_used = encode_sector_str(&encoder, args->format, decoder->video_frames, buffer);
 			retire_av_data(decoder, 0, frames_used);
 		} else {
-			// Audio sector
 			int samples_length = decoder->audio_sample_count / args->audio_channels;
-			if (samples_length > audio_samples_per_sector) samples_length = audio_samples_per_sector;
+
+			if (samples_length > audio_samples_per_sector)
+				samples_length = audio_samples_per_sector;
 
 			// FIXME: this is an extremely hacky way to handle audio tracks
 			// shorter than the video track
@@ -438,7 +478,7 @@ void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) {
 			buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4);
 		}
 
-		if((j%interleave) < video_sectors_per_block)
+		if (is_video_sector)
 			psx_cdrom_calculate_checksums((psx_cdrom_sector_t *)buffer, PSX_CDROM_SECTOR_TYPE_MODE2_FORM1);
 
 		fwrite(buffer + offset, sector_size, 1, output);
@@ -461,7 +501,11 @@ void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) {
 	destroy_mdec_encoder(&encoder);
 }
 
-void encode_file_sbs(args_t *args, decoder_t *decoder, FILE *output) {
+void encode_file_strspu(const args_t *args, decoder_t *decoder, FILE *output) {
+	// TODO: implement
+}
+
+void encode_file_sbs(const args_t *args, decoder_t *decoder, FILE *output) {
 	mdec_encoder_t encoder;
 	init_mdec_encoder(&encoder, args->video_codec, args->video_width, args->video_height);
 
diff --git a/psxavenc/filefmt.h b/psxavenc/filefmt.h
index 5f8eb38..9276160 100644
--- a/psxavenc/filefmt.h
+++ b/psxavenc/filefmt.h
@@ -32,4 +32,5 @@ void encode_file_xa(const args_t *args, decoder_t *decoder, FILE *output);
 void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output);
 void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output);
 void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output);
+void encode_file_strspu(const args_t *args, decoder_t *decoder, FILE *output);
 void encode_file_sbs(const args_t *args, decoder_t *decoder, FILE *output);
diff --git a/psxavenc/main.c b/psxavenc/main.c
index 78c0935..277aa26 100644
--- a/psxavenc/main.c
+++ b/psxavenc/main.c
@@ -120,7 +120,6 @@ int main(int argc, const char **argv) {
 
 		case FORMAT_STR:
 		case FORMAT_STRCD:
-		case FORMAT_STRSPU:
 		case FORMAT_STRV:
 			if (!(args.flags & FLAG_QUIET)) {
 				if (decoder.state.audio_stream)
@@ -147,6 +146,30 @@ int main(int argc, const char **argv) {
 			encode_file_str(&args, &decoder, output);
 			break;
 
+		case FORMAT_STRSPU:
+			if (!(args.flags & FLAG_QUIET)) {
+				if (decoder.state.audio_stream)
+					fprintf(
+						stderr,
+						"Audio format: SPU-ADPCM, %d Hz %d channels, interleave=%d\n",
+						args.audio_frequency,
+						args.audio_channels,
+						args.audio_interleave
+					);
+
+				fprintf(
+					stderr,
+					"Video format: %s, %dx%d, %.2f fps\n",
+					bs_codec_names[args.video_codec],
+					args.video_width,
+					args.video_height,
+					(double)args.str_fps_num / (double)args.str_fps_den
+				);
+			}
+
+			encode_file_strspu(&args, &decoder, output);
+			break;
+
 		case FORMAT_SBS:
 			if (!(args.flags & FLAG_QUIET))
 				fprintf(
diff --git a/psxavenc/mdec.c b/psxavenc/mdec.c
index 0d945c3..2221764 100644
--- a/psxavenc/mdec.c
+++ b/psxavenc/mdec.c
@@ -23,6 +23,7 @@ freely, subject to the following restrictions:
 */
 
 #include <assert.h>
+#include <math.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdlib.h>
@@ -236,14 +237,16 @@ static const uint8_t dct_zagzig_table[8*8] = {
 };
 
 #if 0
-#define SF0 0x5a82 // cos(0/16 * pi) * sqrt(2)
-#define SF1 0x7d8a // cos(1/16 * pi) * 2
-#define SF2 0x7641 // cos(2/16 * pi) * 2
-#define SF3 0x6a6d // cos(3/16 * pi) * 2
-#define SF4 0x5a82 // cos(4/16 * pi) * 2
-#define SF5 0x471c // cos(5/16 * pi) * 2
-#define SF6 0x30fb // cos(6/16 * pi) * 2
-#define SF7 0x18f8 // cos(7/16 * pi) * 2
+enum {
+	SF0 = 0x5a82, // cos(0/16 * pi) * sqrt(2)
+	SF1 = 0x7d8a, // cos(1/16 * pi) * 2
+	SF2 = 0x7641, // cos(2/16 * pi) * 2
+	SF3 = 0x6a6d, // cos(3/16 * pi) * 2
+	SF4 = 0x5a82, // cos(4/16 * pi) * 2
+	SF5 = 0x471c, // cos(5/16 * pi) * 2
+	SF6 = 0x30fb, // cos(6/16 * pi) * 2
+	SF7 = 0x18f8  // cos(7/16 * pi) * 2
+};
 
 static const int16_t dct_scale_table[8*8] = {
 	SF0,  SF0,  SF0,  SF0,  SF0,  SF0,  SF0,  SF0,
@@ -525,7 +528,9 @@ bool init_mdec_encoder(mdec_encoder_t *encoder, bs_codec_t video_codec, int vide
 	if (
 		state->dct_context == NULL ||
 		state->ac_huffman_map == NULL ||
-		state->coeff_clamp_map == NULL
+		state->dc_huffman_map == NULL ||
+		state->coeff_clamp_map == NULL ||
+		state->delta_clamp_map == NULL
 	)
 		return false;
 
@@ -536,7 +541,7 @@ bool init_mdec_encoder(mdec_encoder_t *encoder, bs_codec_t video_codec, int vide
 	for (int i = 0; i < 6; i++) {
 		state->dct_block_lists[i] = malloc(dct_block_size);
 
-		if (!state->dct_block_lists[i])
+		if (state->dct_block_lists[i] == NULL)
 			return false;
 	}