From e457d59bb13e68102ca585b1f59c0343b57e23b2 Mon Sep 17 00:00:00 2001 From: spicyjpeg Date: Mon, 15 May 2023 16:47:58 +0200 Subject: [PATCH 1/2] Move changes over from old PR --- libpsxav/adpcm.c | 132 +++++++------ libpsxav/libpsxav.h | 3 +- psxavenc/cdrom.c | 23 ++- psxavenc/common.h | 53 +++-- psxavenc/decoding.c | 325 ++++++++++++++++++------------- psxavenc/filefmt.c | 345 ++++++++++++++++++++++++++++----- psxavenc/mdec.c | 456 +++++++++++++++++++++----------------------- psxavenc/psxavenc.c | 370 ++++++++++++++++++++++++++++------- 8 files changed, 1134 insertions(+), 573 deletions(-) diff --git a/libpsxav/adpcm.c b/libpsxav/adpcm.c index 931f876..2baa393 100644 --- a/libpsxav/adpcm.c +++ b/libpsxav/adpcm.c @@ -3,6 +3,7 @@ libpsxav: MDEC video + SPU/XA-ADPCM audio library Copyright (c) 2019, 2020 Adrian "asie" Siekierka Copyright (c) 2019 Ben "GreaseMonkey" Russell +Copyright (c) 2023 spicyjpeg This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages @@ -25,6 +26,9 @@ freely, subject to the following restrictions: #include #include "libpsxav.h" +#define SHIFT_RANGE_4BPS 12 +#define SHIFT_RANGE_8BPS 8 + #define ADPCM_FILTER_COUNT 5 #define XA_ADPCM_FILTER_COUNT 4 #define SPU_ADPCM_FILTER_COUNT 5 @@ -32,7 +36,7 @@ freely, subject to the following restrictions: static const int16_t filter_k1[ADPCM_FILTER_COUNT] = {0, 60, 115, 98, 122}; static const int16_t filter_k2[ADPCM_FILTER_COUNT] = {0, 0, -52, -55, -60}; -static int find_min_shift(const psx_audio_encoder_channel_state_t *state, int16_t *samples, int pitch, int filter) { +static int find_min_shift(const psx_audio_encoder_channel_state_t *state, int16_t *samples, int sample_limit, int pitch, int filter, int shift_range) { // Assumption made: // // There is value in shifting right one step further to allow the nibbles to clip. @@ -51,7 +55,7 @@ static int find_min_shift(const psx_audio_encoder_channel_state_t *state, int16_ int32_t s_min = 0; int32_t s_max = 0; for (int i = 0; i < 28; i++) { - int32_t raw_sample = samples[i * pitch]; + int32_t raw_sample = (i >= sample_limit) ? 0 : samples[i * pitch]; int32_t previous_values = (k1*prev1 + k2*prev2 + (1<<5))>>6; int32_t sample = raw_sample - previous_values; if (sample < s_min) { s_min = sample; } @@ -59,16 +63,18 @@ static int find_min_shift(const psx_audio_encoder_channel_state_t *state, int16_ prev2 = prev1; prev1 = raw_sample; } - while(right_shift < 12 && (s_max>>right_shift) > +0x7) { right_shift += 1; }; - while(right_shift < 12 && (s_min>>right_shift) < -0x8) { right_shift += 1; }; + while(right_shift < shift_range && (s_max>>right_shift) > (+0x7FFF >> shift_range)) { right_shift += 1; }; + while(right_shift < shift_range && (s_min>>right_shift) < (-0x8000 >> shift_range)) { right_shift += 1; }; - int min_shift = 12 - right_shift; - assert(0 <= min_shift && min_shift <= 12); + int min_shift = shift_range - right_shift; + assert(0 <= min_shift && min_shift <= shift_range); return min_shift; } -static uint8_t attempt_to_encode_nibbles(psx_audio_encoder_channel_state_t *outstate, const psx_audio_encoder_channel_state_t *instate, int16_t *samples, int sample_limit, int pitch, uint8_t *data, int data_shift, int data_pitch, int filter, int sample_shift) { - uint8_t nondata_mask = ~(0x0F << data_shift); +static uint8_t attempt_to_encode(psx_audio_encoder_channel_state_t *outstate, const psx_audio_encoder_channel_state_t *instate, int16_t *samples, int sample_limit, int pitch, uint8_t *data, int data_shift, int data_pitch, int filter, int sample_shift, int shift_range) { + uint8_t sample_mask = 0xFFFF >> shift_range; + uint8_t nondata_mask = ~(sample_mask << data_shift); + int min_shift = sample_shift; int k1 = filter_k1[filter]; int k2 = filter_k2[filter]; @@ -82,17 +88,17 @@ static uint8_t attempt_to_encode_nibbles(psx_audio_encoder_channel_state_t *outs outstate->mse = 0; for (int i = 0; i < 28; i++) { - int32_t sample = ((i * pitch) >= sample_limit ? 0 : samples[i * pitch]) + outstate->qerr; + int32_t sample = ((i >= sample_limit) ? 0 : samples[i * pitch]) + outstate->qerr; int32_t previous_values = (k1*outstate->prev1 + k2*outstate->prev2 + (1<<5))>>6; int32_t sample_enc = sample - previous_values; sample_enc <<= min_shift; - sample_enc += (1<<(12-1)); - sample_enc >>= 12; - if(sample_enc < -8) { sample_enc = -8; } - if(sample_enc > +7) { sample_enc = +7; } - sample_enc &= 0xF; + sample_enc += (1<<(shift_range-1)); + sample_enc >>= shift_range; + if(sample_enc < (-0x8000 >> shift_range)) { sample_enc = -0x8000 >> shift_range; } + if(sample_enc > (+0x7FFF >> shift_range)) { sample_enc = +0x7FFF >> shift_range; } + sample_enc &= sample_mask; - int32_t sample_dec = (int16_t) ((sample_enc&0xF) << 12); + int32_t sample_dec = (int16_t) ((sample_enc & sample_mask) << shift_range); sample_dec >>= min_shift; sample_dec += previous_values; if (sample_dec > +0x7FFF) { sample_dec = +0x7FFF; } @@ -114,14 +120,14 @@ static uint8_t attempt_to_encode_nibbles(psx_audio_encoder_channel_state_t *outs return hdr; } -static uint8_t encode_nibbles(psx_audio_encoder_channel_state_t *state, int16_t *samples, int sample_limit, int pitch, uint8_t *data, int data_shift, int data_pitch, int filter_count) { +static uint8_t encode(psx_audio_encoder_channel_state_t *state, int16_t *samples, int sample_limit, int pitch, uint8_t *data, int data_shift, int data_pitch, int filter_count, int shift_range) { psx_audio_encoder_channel_state_t proposed; int64_t best_mse = ((int64_t)1<<(int64_t)50); int best_filter = 0; int best_sample_shift = 0; for (int filter = 0; filter < filter_count; filter++) { - int true_min_shift = find_min_shift(state, samples, pitch, filter); + int true_min_shift = find_min_shift(state, samples, sample_limit, pitch, filter, shift_range); // Testing has shown that the optimal shift can be off the true minimum shift // by 1 in *either* direction. @@ -129,15 +135,15 @@ static uint8_t encode_nibbles(psx_audio_encoder_channel_state_t *state, int16_t int min_shift = true_min_shift - 1; int max_shift = true_min_shift + 1; if (min_shift < 0) { min_shift = 0; } - if (max_shift > 12) { max_shift = 12; } + if (max_shift > shift_range) { max_shift = shift_range; } for (int sample_shift = min_shift; sample_shift <= max_shift; sample_shift++) { // ignore header here - attempt_to_encode_nibbles( + attempt_to_encode( &proposed, state, samples, sample_limit, pitch, data, data_shift, data_pitch, - filter, sample_shift); + filter, sample_shift, shift_range); if (best_mse > proposed.mse) { best_mse = proposed.mse; @@ -148,46 +154,46 @@ static uint8_t encode_nibbles(psx_audio_encoder_channel_state_t *state, int16_t } // now go with the encoder - return attempt_to_encode_nibbles( + return attempt_to_encode( state, state, samples, sample_limit, pitch, data, data_shift, data_pitch, - best_filter, best_sample_shift); + best_filter, best_sample_shift, shift_range); } static void encode_block_xa(int16_t *audio_samples, int audio_samples_limit, uint8_t *data, psx_audio_xa_settings_t settings, psx_audio_encoder_state_t *state) { if (settings.bits_per_sample == 4) { if (settings.stereo) { - data[0] = encode_nibbles(&(state->left), audio_samples, audio_samples_limit, 2, data + 0x10, 0, 4, XA_ADPCM_FILTER_COUNT); - data[1] = encode_nibbles(&(state->right), audio_samples + 1, audio_samples_limit - 1, 2, data + 0x10, 4, 4, XA_ADPCM_FILTER_COUNT); - data[2] = encode_nibbles(&(state->left), audio_samples + 56, audio_samples_limit - 56, 2, data + 0x11, 0, 4, XA_ADPCM_FILTER_COUNT); - data[3] = encode_nibbles(&(state->right), audio_samples + 56 + 1, audio_samples_limit - 56 - 1, 2, data + 0x11, 4, 4, XA_ADPCM_FILTER_COUNT); - data[8] = encode_nibbles(&(state->left), audio_samples + 56*2, audio_samples_limit - 56*2, 2, data + 0x12, 0, 4, XA_ADPCM_FILTER_COUNT); - data[9] = encode_nibbles(&(state->right), audio_samples + 56*2 + 1, audio_samples_limit - 56*2 - 1, 2, data + 0x12, 4, 4, XA_ADPCM_FILTER_COUNT); - data[10] = encode_nibbles(&(state->left), audio_samples + 56*3, audio_samples_limit - 56*3, 2, data + 0x13, 0, 4, XA_ADPCM_FILTER_COUNT); - data[11] = encode_nibbles(&(state->right), audio_samples + 56*3 + 1, audio_samples_limit - 56*3 - 1, 2, data + 0x13, 4, 4, XA_ADPCM_FILTER_COUNT); + data[0] = encode(&(state->left), audio_samples, audio_samples_limit, 2, data + 0x10, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS); + data[1] = encode(&(state->right), audio_samples + 1, audio_samples_limit, 2, data + 0x10, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS); + data[2] = encode(&(state->left), audio_samples + 56, audio_samples_limit - 28, 2, data + 0x11, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS); + data[3] = encode(&(state->right), audio_samples + 56 + 1, audio_samples_limit - 28, 2, data + 0x11, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS); + data[8] = encode(&(state->left), audio_samples + 56*2, audio_samples_limit - 28*2, 2, data + 0x12, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS); + data[9] = encode(&(state->right), audio_samples + 56*2 + 1, audio_samples_limit - 28*2, 2, data + 0x12, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS); + data[10] = encode(&(state->left), audio_samples + 56*3, audio_samples_limit - 28*3, 2, data + 0x13, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS); + data[11] = encode(&(state->right), audio_samples + 56*3 + 1, audio_samples_limit - 28*3, 2, data + 0x13, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS); } else { - data[0] = encode_nibbles(&(state->left), audio_samples, audio_samples_limit, 1, data + 0x10, 0, 4, XA_ADPCM_FILTER_COUNT); - data[1] = encode_nibbles(&(state->right), audio_samples + 28, audio_samples_limit - 28, 1, data + 0x10, 4, 4, XA_ADPCM_FILTER_COUNT); - data[2] = encode_nibbles(&(state->left), audio_samples + 28*2, audio_samples_limit - 28*2, 1, data + 0x11, 0, 4, XA_ADPCM_FILTER_COUNT); - data[3] = encode_nibbles(&(state->right), audio_samples + 28*3, audio_samples_limit - 28*3, 1, data + 0x11, 4, 4, XA_ADPCM_FILTER_COUNT); - data[8] = encode_nibbles(&(state->left), audio_samples + 28*4, audio_samples_limit - 28*4, 1, data + 0x12, 0, 4, XA_ADPCM_FILTER_COUNT); - data[9] = encode_nibbles(&(state->right), audio_samples + 28*5, audio_samples_limit - 28*5, 1, data + 0x12, 4, 4, XA_ADPCM_FILTER_COUNT); - data[10] = encode_nibbles(&(state->left), audio_samples + 28*6, audio_samples_limit - 28*6, 1, data + 0x13, 0, 4, XA_ADPCM_FILTER_COUNT); - data[11] = encode_nibbles(&(state->right), audio_samples + 28*7, audio_samples_limit - 28*7, 1, data + 0x13, 4, 4, XA_ADPCM_FILTER_COUNT); + data[0] = encode(&(state->left), audio_samples, audio_samples_limit, 1, data + 0x10, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS); + data[1] = encode(&(state->left), audio_samples + 28, audio_samples_limit - 28, 1, data + 0x10, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS); + data[2] = encode(&(state->left), audio_samples + 28*2, audio_samples_limit - 28*2, 1, data + 0x11, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS); + data[3] = encode(&(state->left), audio_samples + 28*3, audio_samples_limit - 28*3, 1, data + 0x11, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS); + data[8] = encode(&(state->left), audio_samples + 28*4, audio_samples_limit - 28*4, 1, data + 0x12, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS); + data[9] = encode(&(state->left), audio_samples + 28*5, audio_samples_limit - 28*5, 1, data + 0x12, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS); + data[10] = encode(&(state->left), audio_samples + 28*6, audio_samples_limit - 28*6, 1, data + 0x13, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS); + data[11] = encode(&(state->left), audio_samples + 28*7, audio_samples_limit - 28*7, 1, data + 0x13, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS); } } else { -/* if (settings->stereo) { - data[0] = encode_bytes(audio_samples, 2, data + 0x10); - data[1] = encode_bytes(audio_samples + 1, 2, data + 0x11); - data[2] = encode_bytes(audio_samples + 56, 2, data + 0x12); - data[3] = encode_bytes(audio_samples + 57, 2, data + 0x13); + if (settings.stereo) { + data[0] = encode(&(state->left), audio_samples, audio_samples_limit, 2, data + 0x10, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS); + data[1] = encode(&(state->right), audio_samples + 1, audio_samples_limit, 2, data + 0x11, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS); + data[2] = encode(&(state->left), audio_samples + 56, audio_samples_limit - 28, 2, data + 0x12, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS); + data[3] = encode(&(state->right), audio_samples + 56 + 1, audio_samples_limit - 28, 2, data + 0x13, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS); } else { - data[0] = encode_bytes(audio_samples, 1, data + 0x10); - data[1] = encode_bytes(audio_samples + 28, 1, data + 0x11); - data[2] = encode_bytes(audio_samples + 56, 1, data + 0x12); - data[3] = encode_bytes(audio_samples + 84, 1, data + 0x13); - } */ + data[0] = encode(&(state->left), audio_samples, audio_samples_limit, 1, data + 0x10, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS); + data[1] = encode(&(state->left), audio_samples + 28, audio_samples_limit - 28, 1, data + 0x11, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS); + data[2] = encode(&(state->left), audio_samples + 28*2, audio_samples_limit - 28*2, 1, data + 0x12, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS); + data[3] = encode(&(state->left), audio_samples + 28*3, audio_samples_limit - 28*3, 1, data + 0x13, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS); + } } } @@ -218,6 +224,14 @@ uint32_t psx_audio_spu_get_samples_per_block(void) { return 28; } +uint32_t psx_audio_xa_get_sector_interleave(psx_audio_xa_settings_t settings) { + // 1/2 interleave for 37800 Hz 8-bit stereo at 1x speed + int interleave = settings.stereo ? 2 : 4; + if (settings.frequency == PSX_AUDIO_XA_FREQ_SINGLE) { interleave <<= 1; } + if (settings.bits_per_sample == 4) { interleave <<= 1; } + return interleave; +} + static void psx_audio_xa_encode_init_sector(uint8_t *buffer, psx_audio_xa_settings_t settings) { if (settings.format == PSX_AUDIO_XA_FORMAT_XACD) { memset(buffer, 0, 2352); @@ -284,13 +298,13 @@ int psx_audio_xa_encode_simple(psx_audio_xa_settings_t settings, int16_t* sample return length; } -int psx_audio_spu_encode(psx_audio_encoder_state_t *state, int16_t* samples, int sample_count, uint8_t *output) { +int psx_audio_spu_encode(psx_audio_encoder_channel_state_t *state, int16_t* samples, int sample_count, int pitch, uint8_t *output) { uint8_t prebuf[28]; uint8_t *buffer = output; uint8_t *data; for (int i = 0; i < sample_count; i += 28, buffer += 16) { - buffer[0] = encode_nibbles(&(state->left), samples + i, sample_count - i, 1, prebuf, 0, 1, SPU_ADPCM_FILTER_COUNT); + buffer[0] = encode(state, samples + i * pitch, sample_count - i, pitch, prebuf, 0, 1, SPU_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS); buffer[1] = 0; for (int j = 0; j < 28; j+=2) { @@ -302,20 +316,22 @@ int psx_audio_spu_encode(psx_audio_encoder_state_t *state, int16_t* samples, int } int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *output, int loop_start) { - psx_audio_encoder_state_t state; - memset(&state, 0, sizeof(psx_audio_encoder_state_t)); - int length = psx_audio_spu_encode(&state, samples, sample_count, output); + psx_audio_encoder_channel_state_t state; + memset(&state, 0, sizeof(psx_audio_encoder_channel_state_t)); + int length = psx_audio_spu_encode(&state, samples, sample_count, 1, output); if (length >= 32) { if (loop_start < 0) { - output[1] = 4; - output[length - 16 + 1] = 1; + //output[1] = PSX_AUDIO_SPU_LOOP_START; + output[length - 16 + 1] = PSX_AUDIO_SPU_LOOP_END; } else { - psx_audio_spu_set_flag_at_sample(output, loop_start, 4); - output[length - 16 + 1] = 3; + psx_audio_spu_set_flag_at_sample(output, loop_start, PSX_AUDIO_SPU_LOOP_START); + output[length - 16 + 1] = PSX_AUDIO_SPU_LOOP_REPEAT; } } else if (length >= 16) { - output[1] = loop_start >= 0 ? 7 : 5; + output[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END; + if (loop_start >= 0) + output[1] |= PSX_AUDIO_SPU_LOOP_REPEAT; } return length; diff --git a/libpsxav/libpsxav.h b/libpsxav/libpsxav.h index 828d8f8..5725df9 100644 --- a/libpsxav/libpsxav.h +++ b/libpsxav/libpsxav.h @@ -67,9 +67,10 @@ uint32_t psx_audio_xa_get_buffer_size_per_sector(psx_audio_xa_settings_t setting uint32_t psx_audio_spu_get_buffer_size_per_block(void); uint32_t psx_audio_xa_get_samples_per_sector(psx_audio_xa_settings_t settings); uint32_t psx_audio_spu_get_samples_per_block(void); +uint32_t psx_audio_xa_get_sector_interleave(psx_audio_xa_settings_t settings); int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_state_t *state, int16_t* samples, int sample_count, uint8_t *output); int psx_audio_xa_encode_simple(psx_audio_xa_settings_t settings, int16_t* samples, int sample_count, uint8_t *output); -int psx_audio_spu_encode(psx_audio_encoder_state_t *state, int16_t* samples, int sample_count, uint8_t *output); +int psx_audio_spu_encode(psx_audio_encoder_channel_state_t *state, int16_t* samples, int sample_count, int pitch, uint8_t *output); int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *output, int loop_start); int psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length); void psx_audio_spu_set_flag_at_sample(uint8_t* spu_data, int sample_pos, int flag); diff --git a/psxavenc/cdrom.c b/psxavenc/cdrom.c index fe6a83f..c73aac4 100644 --- a/psxavenc/cdrom.c +++ b/psxavenc/cdrom.c @@ -24,15 +24,22 @@ freely, subject to the following restrictions: #include "common.h" void init_sector_buffer_video(uint8_t *buffer, settings_t *settings) { - memset(buffer,0,2352); - memset(buffer+0x001,0xFF,10); + int offset; + if (settings->format == FORMAT_STR2CD) { + memset(buffer, 0, 2352); + memset(buffer+0x001, 0xFF, 10); + buffer[0x00F] = 0x02; + offset = 0x10; + } else { + memset(buffer, 0, 2336); + offset = 0; + } - buffer[0x00F] = 0x02; - buffer[0x010] = settings->file_number; - buffer[0x011] = settings->channel_number & 0x1F; - buffer[0x012] = 0x08 | 0x40; - buffer[0x013] = 0x00; - memcpy(buffer + 0x014, buffer + 0x010, 4); + buffer[offset+0] = settings->file_number; + buffer[offset+1] = settings->channel_number & 0x1F; + buffer[offset+2] = 0x08 | 0x40; + buffer[offset+3] = 0x00; + memcpy(buffer + offset + 4, buffer + offset, 4); } void calculate_edc_data(uint8_t *buffer) diff --git a/psxavenc/common.h b/psxavenc/common.h index e43dbd7..857031c 100644 --- a/psxavenc/common.h +++ b/psxavenc/common.h @@ -28,6 +28,9 @@ freely, subject to the following restrictions: #include #include #include +#include +#include +#include #include #include @@ -36,27 +39,33 @@ freely, subject to the following restrictions: #include #include +#define NUM_FORMATS 9 #define FORMAT_XA 0 #define FORMAT_XACD 1 #define FORMAT_SPU 2 -#define FORMAT_STR2 3 +#define FORMAT_SPUI 3 +#define FORMAT_VAG 4 +#define FORMAT_VAGI 5 +#define FORMAT_STR2 6 +#define FORMAT_STR2CD 7 +#define FORMAT_SBS2 8 -#define MAX_UNMUXED_BLOCKS 9 typedef struct { int frame_index; - int frame_block_index; - int frame_block_count; + int frame_data_offset; + int frame_max_size; int frame_block_base_overflow; int frame_block_overflow_num; int frame_block_overflow_den; uint16_t bits_value; int bits_left; - uint8_t unmuxed[2016*MAX_UNMUXED_BLOCKS]; + uint8_t *frame_output; int bytes_used; int blocks_used; int uncomp_hwords_used; int quant_scale; - int32_t *dct_block_lists[6]; + int quant_scale_sum; + float *dct_block_lists[6]; } vid_encoder_state_t; typedef struct { @@ -69,8 +78,6 @@ typedef struct { AVStream* video_stream; AVCodecContext* audio_codec_context; AVCodecContext* video_codec_context; - AVCodec* audio_codec; - AVCodec* video_codec; struct SwrContext* resampler; struct SwsContext* scaler; AVFrame* frame; @@ -81,17 +88,28 @@ typedef struct { } av_decoder_state_t; typedef struct { + bool quiet; + bool show_progress; + int format; // FORMAT_* - bool stereo; // false or true + int channels; + int cd_speed; // 1 or 2 int frequency; // 18900 or 37800 Hz int bits_per_sample; // 4 or 8 int file_number; // 00-FF int channel_number; // 00-1F + int interleave; + int alignment; + bool loop; int video_width; int video_height; int video_fps_num; // FPS numerator int video_fps_den; // FPS denominator + bool ignore_aspect_ratio; + + char *swresample_options; + char *swscale_options; int16_t *audio_samples; int audio_sample_count; @@ -99,8 +117,11 @@ typedef struct { int video_frame_count; av_decoder_state_t decoder_state_av; - vid_encoder_state_t state_vid; + bool end_of_input; + + time_t start_time; + time_t last_progress_update; } settings_t; // cdrom.c @@ -108,17 +129,19 @@ void init_sector_buffer_video(uint8_t *buffer, settings_t *settings); void calculate_edc_data(uint8_t *buffer); // decoding.c -bool open_av_data(const char *filename, settings_t *settings); +bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bool use_video, bool audio_required, bool video_required); bool poll_av_data(settings_t *settings); bool ensure_av_data(settings_t *settings, int needed_audio_samples, int needed_video_frames); -void pull_all_av_data(settings_t *settings); void retire_av_data(settings_t *settings, int retired_audio_samples, int retired_video_frames); void close_av_data(settings_t *settings); // filefmt.c -void encode_file_spu(int16_t *audio_samples, int audio_sample_count, settings_t *settings, FILE *output); -void encode_file_xa(int16_t *audio_samples, int audio_sample_count, settings_t *settings, FILE *output); +void encode_file_spu(settings_t *settings, FILE *output); +void encode_file_spu_interleaved(settings_t *settings, FILE *output); +void encode_file_xa(settings_t *settings, FILE *output); void encode_file_str(settings_t *settings, FILE *output); +void encode_file_sbs(settings_t *settings, FILE *output); // mdec.c -void encode_block_str(uint8_t *video_frames, int video_frame_count, uint8_t *output, settings_t *settings); +void encode_frame_bs(uint8_t *video_frame, settings_t *settings); +void encode_sector_str(uint8_t *video_frames, uint8_t *output, settings_t *settings); diff --git a/psxavenc/decoding.c b/psxavenc/decoding.c index 48aff71..3c573d6 100644 --- a/psxavenc/decoding.c +++ b/psxavenc/decoding.c @@ -3,6 +3,7 @@ psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend Copyright (c) 2019, 2020 Adrian "asie" Siekierka Copyright (c) 2019 Ben "GreaseMonkey" Russell +Copyright (c) 2023 spicyjpeg This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages @@ -25,7 +26,7 @@ freely, subject to the following restrictions: static void poll_av_packet(settings_t *settings, AVPacket *packet); -int decode_audio_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, AVPacket *packet) { +int decode_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, AVPacket *packet) { int ret; if (packet != NULL) { @@ -44,29 +45,8 @@ int decode_audio_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, A } } -int decode_video_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, AVPacket *packet) { - int ret; - - if (packet != NULL) { - ret = avcodec_send_packet(codec, packet); - if (ret != 0) { - return 0; - } - } - - ret = avcodec_receive_frame(codec, frame); - if (ret >= 0) { - *frame_size = ret; - return 1; - } else { - return ret == AVERROR(EAGAIN) ? 1 : 0; - } -} - -bool open_av_data(const char *filename, settings_t *settings) +bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bool use_video, bool audio_required, bool video_required) { - AVPacket packet; - av_decoder_state_t* av = &(settings->decoder_state_av); av->video_next_pts = 0.0; av->frame = NULL; @@ -79,11 +59,13 @@ bool open_av_data(const char *filename, settings_t *settings) av->video_stream = NULL; av->audio_codec_context = NULL; av->video_codec_context = NULL; - av->audio_codec = NULL; - av->video_codec = NULL; av->resampler = NULL; av->scaler = NULL; + if (settings->quiet) { + av_log_set_level(AV_LOG_QUIET); + } + av->format = avformat_alloc_context(); if (avformat_open_input(&(av->format), filename, NULL, NULL)) { return false; @@ -92,89 +74,157 @@ bool open_av_data(const char *filename, settings_t *settings) return false; } - for (int i = 0; i < av->format->nb_streams; i++) { - if (av->format->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { - if (av->audio_stream_index >= 0) { - fprintf(stderr, "open_av_data: found multiple audio tracks?\n"); - return false; + if (use_audio) { + for (int i = 0; i < av->format->nb_streams; i++) { + if (av->format->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { + if (av->audio_stream_index >= 0) { + fprintf(stderr, "Input file must have a single audio track\n"); + return false; + } + av->audio_stream_index = i; } - av->audio_stream_index = i; } - } - if (av->audio_stream_index == -1) { - return false; - } - - for (int i = 0; i < av->format->nb_streams; i++) { - if (av->format->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) { - if (av->video_stream_index >= 0) { - fprintf(stderr, "open_av_data: found multiple video tracks?\n"); - return false; - } - av->video_stream_index = i; + if (audio_required && av->audio_stream_index == -1) { + fprintf(stderr, "Input file has no audio data\n"); + return false; } } - av->audio_stream = av->format->streams[av->audio_stream_index]; + if (use_video) { + for (int i = 0; i < av->format->nb_streams; i++) { + if (av->format->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) { + if (av->video_stream_index >= 0) { + fprintf(stderr, "Input file must have a single video track\n"); + return false; + } + av->video_stream_index = i; + } + } + if (video_required && av->video_stream_index == -1) { + fprintf(stderr, "Input file has no video data\n"); + return false; + } + } + + av->audio_stream = (av->audio_stream_index != -1 ? av->format->streams[av->audio_stream_index] : NULL); av->video_stream = (av->video_stream_index != -1 ? av->format->streams[av->video_stream_index] : NULL); - av->audio_codec = avcodec_find_decoder(av->audio_stream->codecpar->codec_id); - av->audio_codec_context = avcodec_alloc_context3(av->audio_codec); - if (av->audio_codec_context == NULL) { - return false; - } - if (avcodec_parameters_to_context(av->audio_codec_context, av->audio_stream->codecpar) < 0) { - return false; - } - if (avcodec_open2(av->audio_codec_context, av->audio_codec, NULL) < 0) { - return false; - } - av->resampler = swr_alloc(); - av_opt_set_int(av->resampler, "in_channel_count", av->audio_codec_context->channels, 0); - av_opt_set_int(av->resampler, "in_channel_layout", av->audio_codec_context->channel_layout, 0); - av_opt_set_int(av->resampler, "in_sample_rate", av->audio_codec_context->sample_rate, 0); - av_opt_set_sample_fmt(av->resampler, "in_sample_fmt", av->audio_codec_context->sample_fmt, 0); + if (av->audio_stream != NULL) { + const AVCodec *codec = avcodec_find_decoder(av->audio_stream->codecpar->codec_id); + av->audio_codec_context = avcodec_alloc_context3(codec); + if (av->audio_codec_context == NULL) { + return false; + } + if (avcodec_parameters_to_context(av->audio_codec_context, av->audio_stream->codecpar) < 0) { + return false; + } + if (avcodec_open2(av->audio_codec_context, codec, NULL) < 0) { + return false; + } - av->sample_count_mul = settings->stereo ? 2 : 1; - av_opt_set_int(av->resampler, "out_channel_count", settings->stereo ? 2 : 1, 0); - av_opt_set_int(av->resampler, "out_channel_layout", settings->stereo ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO, 0); - av_opt_set_int(av->resampler, "out_sample_rate", settings->frequency, 0); - av_opt_set_sample_fmt(av->resampler, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0); + AVChannelLayout layout; + layout.nb_channels = settings->channels; + if (settings->channels <= 2) { + layout.order = AV_CHANNEL_ORDER_NATIVE; + layout.u.mask = (settings->channels == 2) ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO; + } else { + layout.order = AV_CHANNEL_ORDER_UNSPEC; + } + if (!settings->quiet && settings->channels > av->audio_codec_context->ch_layout.nb_channels) { + fprintf(stderr, "Warning: input file has less than %d channels\n", settings->channels); + } - if (swr_init(av->resampler) < 0) { - return false; + av->sample_count_mul = settings->channels; + if (swr_alloc_set_opts2( + &av->resampler, + &layout, + AV_SAMPLE_FMT_S16, + settings->frequency, + &av->audio_codec_context->ch_layout, + av->audio_codec_context->sample_fmt, + av->audio_codec_context->sample_rate, + 0, + NULL + ) < 0) { + return false; + } + if (settings->swresample_options) { + if (av_opt_set_from_string(av->resampler, settings->swresample_options, NULL, "=", ":,") < 0) { + return false; + } + } + + if (swr_init(av->resampler) < 0) { + return false; + } } if (av->video_stream != NULL) { - av->video_codec = avcodec_find_decoder(av->video_stream->codecpar->codec_id); - av->video_codec_context = avcodec_alloc_context3(av->video_codec); + const AVCodec *codec = avcodec_find_decoder(av->video_stream->codecpar->codec_id); + av->video_codec_context = avcodec_alloc_context3(codec); if(av->video_codec_context == NULL) { return false; } if (avcodec_parameters_to_context(av->video_codec_context, av->video_stream->codecpar) < 0) { return false; } - if (avcodec_open2(av->video_codec_context, av->video_codec, NULL) < 0) { + if (avcodec_open2(av->video_codec_context, codec, NULL) < 0) { return false; } + if (!settings->quiet && ( + settings->video_width > av->video_codec_context->width || + settings->video_height > av->video_codec_context->height + )) { + fprintf(stderr, "Warning: input file has resolution lower than %dx%d\n", + settings->video_width, settings->video_height + ); + } + if (!settings->ignore_aspect_ratio) { + // Reduce the provided size so that it matches the input file's + // aspect ratio. + double src_ratio = (double)av->video_codec_context->width / (double)av->video_codec_context->height; + double dst_ratio = (double)settings->video_width / (double)settings->video_height; + if (src_ratio < dst_ratio) { + settings->video_width = (int)((double)settings->video_height * src_ratio + 15.0) & ~15; + } else { + settings->video_height = (int)((double)settings->video_width / src_ratio + 15.0) & ~15; + } + } + av->scaler = sws_getContext( av->video_codec_context->width, av->video_codec_context->height, av->video_codec_context->pix_fmt, settings->video_width, settings->video_height, - AV_PIX_FMT_RGBA, + AV_PIX_FMT_NV21, SWS_BICUBIC, NULL, NULL, - NULL); - + NULL + ); + // Is this even necessary? -- spicyjpeg + sws_setColorspaceDetails( + av->scaler, + sws_getCoefficients(av->video_codec_context->colorspace), + (av->video_codec_context->color_range == AVCOL_RANGE_JPEG), + sws_getCoefficients(SWS_CS_ITU601), + true, + 0, + 0, + 0 + ); + if (settings->swscale_options) { + if (av_opt_set_from_string(av->scaler, settings->swscale_options, NULL, "=", ":,") < 0) { + return false; + } + } + av->video_frame_src_size = 4*av->video_codec_context->width*av->video_codec_context->height; - av->video_frame_dst_size = 4*settings->video_width*settings->video_height; + av->video_frame_dst_size = 3*settings->video_width*settings->video_height/2; } - av_init_packet(&packet); av->frame = av_frame_alloc(); if (av->frame == NULL) { return false; @@ -184,6 +234,7 @@ bool open_av_data(const char *filename, settings_t *settings) settings->audio_sample_count = 0; settings->video_frames = NULL; settings->video_frame_count = 0; + settings->end_of_input = false; return true; } @@ -195,7 +246,7 @@ static void poll_av_packet_audio(settings_t *settings, AVPacket *packet) int frame_size, frame_sample_count; uint8_t *buffer[1]; - if (decode_audio_frame(av->audio_codec_context, av->frame, &frame_size, packet)) { + if (decode_frame(av->audio_codec_context, av->frame, &frame_size, packet)) { size_t buffer_size = sizeof(int16_t) * av->sample_count_mul * swr_get_out_samples(av->resampler, av->frame->nb_samples); buffer[0] = malloc(buffer_size); memset(buffer[0], 0, buffer_size); @@ -212,54 +263,61 @@ static void poll_av_packet_video(settings_t *settings, AVPacket *packet) av_decoder_state_t* av = &(settings->decoder_state_av); int frame_size; + double pts_step = ((double)1.0*(double)settings->video_fps_den)/(double)settings->video_fps_num; - if (decode_video_frame(av->video_codec_context, av->frame, &frame_size, packet)) { + int plane_size = settings->video_width*settings->video_height; + int dst_strides[2] = { + settings->video_width, settings->video_width + }; + + if (decode_frame(av->video_codec_context, av->frame, &frame_size, packet)) { + if (!av->frame->width || !av->frame->height || !av->frame->data) { + return; + } + + // Some files seem to have timestamps starting from a negative value + // (but otherwise valid) for whatever reason. double pts = (((double)av->frame->pts)*(double)av->video_stream->time_base.num)/av->video_stream->time_base.den; - //fprintf(stderr, "%f\n", pts); - // Drop frames with negative PTS values - if(pts < 0.0) { - // do nothing + //if (pts < 0.0) { + //return; + //} + if (settings->video_frame_count >= 1 && pts < av->video_next_pts) { return; } - if((settings->video_frame_count) >= 1 && pts < av->video_next_pts) { - // do nothing - return; - } - if((settings->video_frame_count) < 1) { + if ((settings->video_frame_count) < 1) { av->video_next_pts = pts; + } else { + av->video_next_pts += pts_step; } - double pts_step = ((double)1.0*(double)settings->video_fps_den)/(double)settings->video_fps_num; //fprintf(stderr, "%d %f %f %f\n", (settings->video_frame_count), pts, av->video_next_pts, pts_step); - av->video_next_pts += pts_step; - // FIXME: increasing framerate doesn't fill it in with duplicate frames! - assert(av->video_next_pts > pts); - //size_t buffer_size = frame_count_mul; - //buffer[0] = malloc(buffer_size); - //memset(buffer[0], 0, buffer_size); - settings->video_frames = realloc(settings->video_frames, (settings->video_frame_count + 1) * av->video_frame_dst_size); - int dst_strides[1] = { - settings->video_width*4, + + // Insert duplicate frames if the frame rate of the input stream is + // lower than the target frame rate. + int dupe_frames = (int) ceil((pts - av->video_next_pts) / pts_step); + if (dupe_frames < 0) dupe_frames = 0; + settings->video_frames = realloc( + settings->video_frames, + (settings->video_frame_count + dupe_frames + 1) * av->video_frame_dst_size + ); + + for (; dupe_frames; dupe_frames--) { + memcpy( + (settings->video_frames) + av->video_frame_dst_size*(settings->video_frame_count), + (settings->video_frames) + av->video_frame_dst_size*(settings->video_frame_count-1), + av->video_frame_dst_size + ); + settings->video_frame_count += 1; + av->video_next_pts += pts_step; + } + + uint8_t *dst_frame = (settings->video_frames) + av->video_frame_dst_size*(settings->video_frame_count); + uint8_t *dst_pointers[2] = { + dst_frame, dst_frame + plane_size }; - uint8_t *dst_pointers[1] = { - (settings->video_frames) + av->video_frame_dst_size*(settings->video_frame_count), - }; - sws_scale(av->scaler, av->frame->data, av->frame->linesize, 0, av->frame->height, dst_pointers, dst_strides); + sws_scale(av->scaler, (const uint8_t *const *) av->frame->data, av->frame->linesize, 0, av->frame->height, dst_pointers, dst_strides); settings->video_frame_count += 1; - //free(buffer[0]); - } -} - -static void poll_av_packet(settings_t *settings, AVPacket *packet) -{ - av_decoder_state_t* av = &(settings->decoder_state_av); - - if (packet->stream_index == av->audio_stream_index) { - poll_av_packet_audio(settings, packet); - } - else if (packet->stream_index == av->video_stream_index) { - poll_av_packet_video(settings, packet); } } @@ -268,29 +326,38 @@ bool poll_av_data(settings_t *settings) av_decoder_state_t* av = &(settings->decoder_state_av); AVPacket packet; + if (settings->end_of_input) { + return false; + } + if (av_read_frame(av->format, &packet) >= 0) { - poll_av_packet(settings, &packet); + if (packet.stream_index == av->audio_stream_index) { + poll_av_packet_audio(settings, &packet); + } else if (packet.stream_index == av->video_stream_index) { + poll_av_packet_video(settings, &packet); + } av_packet_unref(&packet); return true; } else { // out is always padded out with 4032 "0" samples, this makes calculations elsewhere easier - memset((settings->audio_samples) + (settings->audio_sample_count), 0, 4032 * av->sample_count_mul * sizeof(int16_t)); + if (av->audio_stream) { + memset((settings->audio_samples) + (settings->audio_sample_count), 0, 4032 * av->sample_count_mul * sizeof(int16_t)); + } + settings->end_of_input = true; return false; } } bool ensure_av_data(settings_t *settings, int needed_audio_samples, int needed_video_frames) { - // - av_decoder_state_t* av = &(settings->decoder_state_av); - - while (settings->audio_sample_count < needed_audio_samples || settings->video_frame_count < needed_video_frames) { //fprintf(stderr, "ensure %d -> %d, %d -> %d\n", settings->audio_sample_count, needed_audio_samples, settings->video_frame_count, needed_video_frames); - if(!poll_av_data(settings)) { - //fprintf(stderr, "cannot ensure\n"); - return false; + if (!poll_av_data(settings)) { + // Keep returning true even if the end of the input file has been + // reached, if the buffer is not yet completely empty. + return (settings->audio_sample_count || !needed_audio_samples) + && (settings->video_frame_count || !needed_video_frames); } } //fprintf(stderr, "ensure %d -> %d, %d -> %d\n", settings->audio_sample_count, needed_audio_samples, settings->video_frame_count, needed_video_frames); @@ -298,16 +365,6 @@ bool ensure_av_data(settings_t *settings, int needed_audio_samples, int needed_v return true; } -void pull_all_av_data(settings_t *settings) -{ - while (poll_av_data(settings)) { - // do nothing - } - - fprintf(stderr, "Loaded %d samples.\n", settings->audio_sample_count); - fprintf(stderr, "Loaded %d frames.\n", settings->video_frame_count); -} - void retire_av_data(settings_t *settings, int retired_audio_samples, int retired_video_frames) { av_decoder_state_t* av = &(settings->decoder_state_av); @@ -319,14 +376,14 @@ void retire_av_data(settings_t *settings, int retired_audio_samples, int retired int sample_size = sizeof(int16_t); if (settings->audio_sample_count > retired_audio_samples) { memmove(settings->audio_samples, settings->audio_samples + retired_audio_samples, (settings->audio_sample_count - retired_audio_samples)*sample_size); - settings->audio_sample_count -= retired_audio_samples; } + settings->audio_sample_count -= retired_audio_samples; int frame_size = av->video_frame_dst_size; if (settings->video_frame_count > retired_video_frames) { memmove(settings->video_frames, settings->video_frames + retired_video_frames*frame_size, (settings->video_frame_count - retired_video_frames)*frame_size); - settings->video_frame_count -= retired_video_frames; } + settings->video_frame_count -= retired_video_frames; } void close_av_data(settings_t *settings) diff --git a/psxavenc/filefmt.c b/psxavenc/filefmt.c index 51db14e..873b558 100644 --- a/psxavenc/filefmt.c +++ b/psxavenc/filefmt.c @@ -3,6 +3,7 @@ psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend Copyright (c) 2019, 2020 Adrian "asie" Siekierka Copyright (c) 2019 Ben "GreaseMonkey" Russell +Copyright (c) 2023 spicyjpeg This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages @@ -24,16 +25,29 @@ freely, subject to the following restrictions: #include "common.h" #include "libpsxav.h" +static time_t get_elapsed_time(settings_t *settings) { + if (!settings->show_progress) { + return 0; + } + time_t t = time(NULL) - settings->start_time; + if (t <= settings->last_progress_update) { + return 0; + } + settings->last_progress_update = t; + return t; +} + static psx_audio_xa_settings_t settings_to_libpsxav_xa_audio(settings_t *settings) { psx_audio_xa_settings_t new_settings; new_settings.bits_per_sample = settings->bits_per_sample; new_settings.frequency = settings->frequency; - new_settings.stereo = settings->stereo; + new_settings.stereo = settings->channels == 2; new_settings.file_number = settings->file_number; new_settings.channel_number = settings->channel_number; switch (settings->format) { case FORMAT_XA: + case FORMAT_STR2: new_settings.format = PSX_AUDIO_XA_FORMAT_XA; break; default: @@ -44,93 +58,324 @@ static psx_audio_xa_settings_t settings_to_libpsxav_xa_audio(settings_t *setting return new_settings; }; -void encode_file_spu(int16_t *audio_samples, int audio_sample_count, settings_t *settings, FILE *output) { - psx_audio_encoder_state_t audio_state; +void write_vag_header(int size_per_channel, uint8_t *header, settings_t *settings) { + // Magic + header[0x00] = 'V'; + header[0x01] = 'A'; + header[0x02] = 'G'; + header[0x03] = settings->interleave ? 'i' : 'p'; + + // Version (big-endian) + header[0x04] = 0x00; + header[0x05] = 0x00; + header[0x06] = 0x00; + header[0x07] = 0x20; + + // Interleave (little-endian) + header[0x08] = (uint8_t)settings->interleave; + header[0x09] = (uint8_t)(settings->interleave>>8); + header[0x0a] = (uint8_t)(settings->interleave>>16); + header[0x0b] = (uint8_t)(settings->interleave>>24); + + // Length of data for each channel (big-endian) + header[0x0c] = (uint8_t)(size_per_channel>>24); + header[0x0d] = (uint8_t)(size_per_channel>>16); + header[0x0e] = (uint8_t)(size_per_channel>>8); + header[0x0f] = (uint8_t)size_per_channel; + + // Sample rate (big-endian) + header[0x10] = (uint8_t)(settings->frequency>>24); + header[0x11] = (uint8_t)(settings->frequency>>16); + header[0x12] = (uint8_t)(settings->frequency>>8); + header[0x13] = (uint8_t)settings->frequency; + + // Number of channels (little-endian) + header[0x1e] = (uint8_t)settings->channels; + header[0x1f] = 0x00; + + // Filename + //strncpy(header + 0x20, "psxavenc", 16); + memset(header + 0x20, 0, 16); +} + +void encode_file_spu(settings_t *settings, FILE *output) { + psx_audio_encoder_channel_state_t audio_state; int audio_samples_per_block = psx_audio_spu_get_samples_per_block(); + int block_size = psx_audio_spu_get_buffer_size_per_block(); uint8_t buffer[16]; + int block_count; - memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t)); + memset(&audio_state, 0, sizeof(psx_audio_encoder_channel_state_t)); - for (int i = 0; i < audio_sample_count; i += audio_samples_per_block) { - int samples_length = audio_sample_count - i; + // The header must be written after the data as we don't yet know the + // number of audio samples. + if (settings->format == FORMAT_VAG) { + fseek(output, 48, SEEK_SET); + } + + for (block_count = 0; ensure_av_data(settings, audio_samples_per_block, 0); block_count++) { + int samples_length = settings->audio_sample_count; if (samples_length > audio_samples_per_block) samples_length = audio_samples_per_block; - int length = psx_audio_spu_encode(&audio_state, audio_samples + i, samples_length, buffer); - if (i == 0) { - buffer[1] = PSX_AUDIO_SPU_LOOP_START; - } else if ((i + audio_samples_per_block) >= audio_sample_count) { - buffer[1] = PSX_AUDIO_SPU_LOOP_END; + + int length = psx_audio_spu_encode(&audio_state, settings->audio_samples, samples_length, 1, buffer); + if (!block_count) { + // This flag is not required as the SPU already resets the loop + // address when starting playback of a sample. + //buffer[1] |= PSX_AUDIO_SPU_LOOP_START; } + if (settings->end_of_input) { + buffer[1] |= settings->loop ? PSX_AUDIO_SPU_LOOP_REPEAT : PSX_AUDIO_SPU_LOOP_END; + } + + retire_av_data(settings, samples_length, 0); fwrite(buffer, length, 1, output); + + time_t t = get_elapsed_time(settings); + if (t) { + fprintf(stderr, "\rBlock: %6d | Encoding speed: %5.2fx", + block_count, + (double)(block_count*audio_samples_per_block) / (double)(settings->frequency*t) + ); + } + } + + if (settings->format == FORMAT_VAG) { + uint8_t header[48]; + memset(header, 0, 48); + write_vag_header(block_count*block_size, header, settings); + fseek(output, 0, SEEK_SET); + fwrite(header, 48, 1, output); } } -void encode_file_xa(int16_t *audio_samples, int audio_sample_count, settings_t *settings, FILE *output) { +void encode_file_spu_interleaved(settings_t *settings, FILE *output) { + int audio_state_size = sizeof(psx_audio_encoder_channel_state_t) * settings->channels; + + // NOTE: since the interleaved .vag format is not standardized, some tools + // (such as vgmstream) will not properly play files with interleave < 2048, + // alignment != 2048 or channels != 2. + int buffer_size = settings->interleave + settings->alignment - 1; + buffer_size -= buffer_size % settings->alignment; + int header_size = 48 + settings->alignment - 1; + header_size -= header_size % settings->alignment; + + psx_audio_encoder_channel_state_t *audio_state = malloc(audio_state_size); + uint8_t *buffer = malloc(buffer_size); + int audio_samples_per_block = psx_audio_spu_get_samples_per_block(); + int block_size = psx_audio_spu_get_buffer_size_per_block(); + int audio_samples_per_chunk = settings->interleave / block_size * audio_samples_per_block; + int chunk_count; + + memset(audio_state, 0, audio_state_size); + + if (settings->format == FORMAT_VAGI) { + fseek(output, header_size, SEEK_SET); + } + + for (chunk_count = 0; ensure_av_data(settings, audio_samples_per_chunk*settings->channels, 0); chunk_count++) { + int samples_length = settings->audio_sample_count / settings->channels; + if (samples_length > audio_samples_per_chunk) samples_length = audio_samples_per_chunk; + + for (int ch = 0; ch < settings->channels; ch++) { + memset(buffer, 0, buffer_size); + int length = psx_audio_spu_encode(audio_state + ch, settings->audio_samples + ch, samples_length, settings->channels, buffer); + if (length) { + //buffer[1] |= PSX_AUDIO_SPU_LOOP_START; + if (settings->loop) { + buffer[length - block_size + 1] |= PSX_AUDIO_SPU_LOOP_REPEAT; + } + if (settings->end_of_input) { + buffer[length - block_size + 1] |= PSX_AUDIO_SPU_LOOP_END; + } + } + + fwrite(buffer, buffer_size, 1, output); + + time_t t = get_elapsed_time(settings); + if (t) { + fprintf(stderr, "\rChunk: %6d | Encoding speed: %5.2fx", + chunk_count, + (double)(chunk_count*audio_samples_per_chunk) / (double)(settings->frequency*t) + ); + } + } + + retire_av_data(settings, samples_length*settings->channels, 0); + } + + if (settings->format == FORMAT_VAGI) { + uint8_t *header = malloc(header_size); + memset(header, 0, header_size); + write_vag_header(chunk_count*settings->interleave, header, settings); + fseek(output, 0, SEEK_SET); + fwrite(header, header_size, 1, output); + free(header); + } + + free(audio_state); + free(buffer); +} + +void encode_file_xa(settings_t *settings, FILE *output) { psx_audio_xa_settings_t xa_settings = settings_to_libpsxav_xa_audio(settings); psx_audio_encoder_state_t audio_state; int audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings); - int av_sample_mul = settings->stereo ? 2 : 1; uint8_t buffer[2352]; memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t)); - for (int i = 0; i < audio_sample_count; i += audio_samples_per_sector) { - int samples_length = audio_sample_count - i; + for (int j = 0; ensure_av_data(settings, audio_samples_per_sector*settings->channels, 0); j++) { + int samples_length = settings->audio_sample_count / settings->channels; if (samples_length > audio_samples_per_sector) samples_length = audio_samples_per_sector; - int length = psx_audio_xa_encode(xa_settings, &audio_state, audio_samples + (i * av_sample_mul), samples_length, buffer); - if ((i + audio_samples_per_sector) >= audio_sample_count) { + int length = psx_audio_xa_encode(xa_settings, &audio_state, settings->audio_samples, samples_length, buffer); + if (settings->end_of_input) { psx_audio_xa_encode_finalize(xa_settings, buffer, length); } + + if (settings->format == FORMAT_XACD) { + int t = j + 75*2; + + // Put the time in + buffer[0x00C] = ((t/75/60)%10)|(((t/75/60)/10)<<4); + buffer[0x00D] = (((t/75)%60)%10)|((((t/75)%60)/10)<<4); + buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4); + } + + retire_av_data(settings, samples_length*settings->channels, 0); fwrite(buffer, length, 1, output); + + time_t t = get_elapsed_time(settings); + if (t) { + fprintf(stderr, "\rLBA: %6d | Encoding speed: %5.2fx", + j, + (double)(j*audio_samples_per_sector) / (double)(settings->frequency*t) + ); + } } } void encode_file_str(settings_t *settings, FILE *output) { - uint8_t buffer[2352*8]; psx_audio_xa_settings_t xa_settings = settings_to_libpsxav_xa_audio(settings); - psx_audio_encoder_state_t audio_state; - int audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings); - int av_sample_mul = settings->stereo ? 2 : 1; + psx_audio_encoder_state_t audio_state; + int audio_samples_per_sector; + uint8_t buffer[2352]; + + int interleave; + int video_sectors_per_block; + if (settings->decoder_state_av.audio_stream) { + // 1/N audio, (N-1)/N video + audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings); + interleave = psx_audio_xa_get_sector_interleave(xa_settings) * settings->cd_speed; + video_sectors_per_block = interleave - 1; + } else { + // 0/1 audio, 1/1 video + audio_samples_per_sector = 0; + interleave = 1; + video_sectors_per_block = 1; + } + + if (!settings->quiet) { + fprintf(stderr, "Interleave: %d/%d audio, %d/%d video\n", + interleave - video_sectors_per_block, interleave, video_sectors_per_block, interleave); + } memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t)); + // e.g. 15fps = (150*7/8/15) = 8.75 blocks per frame + settings->state_vid.frame_block_base_overflow = (75*settings->cd_speed) * video_sectors_per_block * settings->video_fps_den; + settings->state_vid.frame_block_overflow_den = interleave * settings->video_fps_num; + double frame_size = (double)settings->state_vid.frame_block_base_overflow / (double)settings->state_vid.frame_block_overflow_den; + if (!settings->quiet) { + fprintf(stderr, "Frame size: %.2f sectors\n", frame_size); + } + + settings->state_vid.frame_output = malloc(2016 * (int)ceil(frame_size)); settings->state_vid.frame_index = 0; - settings->state_vid.bits_value = 0; - settings->state_vid.bits_left = 16; - settings->state_vid.frame_block_index = 0; - settings->state_vid.frame_block_count = 0; - + settings->state_vid.frame_data_offset = 0; + settings->state_vid.frame_max_size = 0; settings->state_vid.frame_block_overflow_num = 0; - - // Number of total sectors per second: 150 - // Proportion of sectors for video due to A/V interleave: 7/8 - // 15FPS = (150*7/8/15) = 8.75 blocks per frame - settings->state_vid.frame_block_base_overflow = 150*7*settings->video_fps_den; - settings->state_vid.frame_block_overflow_den = 8*settings->video_fps_num; - //fprintf(stderr, "%f\n", ((double)settings->state_vid.frame_block_base_overflow)/((double)settings->state_vid.frame_block_overflow_den)); abort(); + settings->state_vid.quant_scale_sum = 0; // FIXME: this needs an extra frame to prevent A/V desync - const int frames_needed = 2; - for (int j = 0; ensure_av_data(settings, audio_samples_per_sector*av_sample_mul*frames_needed, 1*frames_needed); j+=18) { - psx_audio_xa_encode(xa_settings, &audio_state, settings->audio_samples, audio_samples_per_sector, buffer + 2352 * 7); - - // TODO: the final buffer - for(int k = 0; k < 7; k++) { - init_sector_buffer_video(buffer + 2352*k, settings); + int frames_needed = (int) ceil((double)video_sectors_per_block / frame_size); + if (frames_needed < 2) frames_needed = 2; + + for (int j = 0; !settings->end_of_input || settings->state_vid.frame_data_offset < settings->state_vid.frame_max_size; j++) { + ensure_av_data(settings, audio_samples_per_sector*settings->channels, frames_needed); + + if ((j%interleave) < video_sectors_per_block) { + // Video sector + init_sector_buffer_video(buffer, settings); + encode_sector_str(settings->video_frames, buffer, settings); + } else { + // Audio sector + int samples_length = settings->audio_sample_count / settings->channels; + if (samples_length > audio_samples_per_sector) samples_length = audio_samples_per_sector; + + // FIXME: this is an extremely hacky way to handle audio tracks + // shorter than the video track + if (!samples_length) { + video_sectors_per_block++; + } + + int length = psx_audio_xa_encode(xa_settings, &audio_state, settings->audio_samples, samples_length, buffer); + if (settings->end_of_input) { + psx_audio_xa_encode_finalize(xa_settings, buffer, length); + } + retire_av_data(settings, samples_length*settings->channels, 0); } - encode_block_str(settings->video_frames, settings->video_frame_count, buffer, settings); - for(int k = 0; k < 8; k++) { - int t = k + (j/18)*8 + 75*2; + + if (settings->format == FORMAT_STR2CD) { + int t = j + 75*2; // Put the time in - buffer[0x00C + 2352*k] = ((t/75/60)%10)|(((t/75/60)/10)<<4); - buffer[0x00D + 2352*k] = (((t/75)%60)%10)|((((t/75)%60)/10)<<4); - buffer[0x00E + 2352*k] = ((t%75)%10)|(((t%75)/10)<<4); + buffer[0x00C] = ((t/75/60)%10)|(((t/75/60)/10)<<4); + buffer[0x00D] = (((t/75)%60)%10)|((((t/75)%60)/10)<<4); + buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4); - if(k != 7) { - calculate_edc_data(buffer + 2352*k); + // FIXME: EDC is not calculated in 2336-byte sector mode (shouldn't + // matter anyway, any CD image builder will have to recalculate it + // due to the sector's MSF changing) + if((j%interleave) < video_sectors_per_block) { + calculate_edc_data(buffer); } } - retire_av_data(settings, audio_samples_per_sector*av_sample_mul, 0); - fwrite(buffer, 2352*8, 1, output); + + fwrite(buffer, 2352, 1, output); + + time_t t = get_elapsed_time(settings); + if (t) { + fprintf(stderr, "\rFrame: %4d | LBA: %6d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx", + settings->state_vid.frame_index, + j, + (double)settings->state_vid.quant_scale_sum / (double)settings->state_vid.frame_index, + (double)(settings->state_vid.frame_index*settings->video_fps_den) / (double)(t*settings->video_fps_num) + ); + } } + + free(settings->state_vid.frame_output); +} + +void encode_file_sbs(settings_t *settings, FILE *output) { + settings->state_vid.frame_output = malloc(settings->alignment); + settings->state_vid.frame_data_offset = 0; + settings->state_vid.frame_max_size = settings->alignment; + settings->state_vid.quant_scale_sum = 0; + + for (int j = 0; ensure_av_data(settings, 0, 1); j++) { + encode_frame_bs(settings->video_frames, settings); + fwrite(settings->state_vid.frame_output, settings->alignment, 1, output); + + time_t t = get_elapsed_time(settings); + if (t) { + fprintf(stderr, "\rFrame: %4d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx", + j, + (double)settings->state_vid.quant_scale_sum / (double)j, + (double)(j*settings->video_fps_den) / (double)(t*settings->video_fps_num) + ); + } + } + + free(settings->state_vid.frame_output); } diff --git a/psxavenc/mdec.c b/psxavenc/mdec.c index 1e35bbd..9822efc 100644 --- a/psxavenc/mdec.c +++ b/psxavenc/mdec.c @@ -3,6 +3,7 @@ psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend Copyright (c) 2019, 2020 Adrian "asie" Siekierka Copyright (c) 2019 Ben "GreaseMonkey" Russell +Copyright (c) 2023 spicyjpeg This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages @@ -209,33 +210,38 @@ static void init_dct_data(void) } -static void flush_bits(vid_encoder_state_t *state) +static bool flush_bits(vid_encoder_state_t *state) { if(state->bits_left < 16) { - assert(state->bytes_used < sizeof(state->unmuxed)); - state->unmuxed[state->bytes_used++] = (uint8_t)state->bits_value; - assert(state->bytes_used < sizeof(state->unmuxed)); - assert(state->bytes_used < 2016*state->frame_block_count); - state->unmuxed[state->bytes_used++] = (uint8_t)(state->bits_value>>8); + state->frame_output[state->bytes_used++] = (uint8_t)state->bits_value; + if (state->bytes_used >= state->frame_max_size) { + return false; + } + state->frame_output[state->bytes_used++] = (uint8_t)(state->bits_value>>8); } state->bits_left = 16; state->bits_value = 0; + return true; } -static void encode_bits(vid_encoder_state_t *state, int bits, uint32_t val) +static bool encode_bits(vid_encoder_state_t *state, int bits, uint32_t val) { assert(val < (1< 16 // and I have no idea why, so I have to split this up --GM if (bits > 16) { - encode_bits(state, bits-16, val>>16); + if (!encode_bits(state, bits-16, val>>16)) { + return false; + } bits = 16; val &= 0xFFFF; } if (state->bits_left == 0) { - flush_bits(state); + if (!flush_bits(state)) { + return false; + } } while (bits > state->bits_left) { @@ -252,7 +258,9 @@ static void encode_bits(vid_encoder_state_t *state, int bits, uint32_t val) val &= mask; assert(mask >= 1); assert(val < (1<= 1) { @@ -267,81 +275,76 @@ static void encode_bits(vid_encoder_state_t *state, int bits, uint32_t val) //fprintf(stderr, "plop %2d %2d %08X %04X %04X\n", bits, state->bits_left, val, state->bits_value); state->bits_left -= bits; } + + return true; } -static void encode_ac_value(vid_encoder_state_t *state, uint16_t value) +static bool encode_ac_value(vid_encoder_state_t *state, uint16_t value) { assert(0 <= value && value <= 0xFFFF); #if 0 for(int i = 0; i < sizeof(huffman_lookup)/sizeof(huffman_lookup[0]); i++) { if(value == huffman_lookup[i].u_hword_pos) { - encode_bits(state, huffman_lookup[i].c_bits+1, (((uint32_t)huffman_lookup[i].c_value)<<1)|0); - return; + return encode_bits(state, huffman_lookup[i].c_bits+1, (((uint32_t)huffman_lookup[i].c_value)<<1)|0); } else if(value == huffman_lookup[i].u_hword_neg) { - encode_bits(state, huffman_lookup[i].c_bits+1, (((uint32_t)huffman_lookup[i].c_value)<<1)|1); - return; + return encode_bits(state, huffman_lookup[i].c_bits+1, (((uint32_t)huffman_lookup[i].c_value)<<1)|1); } } // Use an escape - encode_bits(state, 6+16, (0x01<<16)|(0xFFFF&(uint32_t)value)); + return encode_bits(state, 6+16, (0x01<<16)|(0xFFFF&(uint32_t)value)); #else uint32_t outword = huffman_encoding_map[value]; - encode_bits(state, outword>>24, outword&0xFFFFFF); + return encode_bits(state, outword>>24, outword&0xFFFFFF); #endif } -static void transform_dct_block(vid_encoder_state_t *state, int32_t *block) +static void transform_dct_block(vid_encoder_state_t *state, float *block) { // Apply DCT to block - int32_t midblock[8*8]; + float midblock[8*8]; - for (int reps = 0; reps < 2; reps++) { - for (int i = 0; i < 8; i++) { - for (int j = 0; j < 8; j++) { - int32_t v = 0; - for(int k = 0; k < 8; k++) { - v += block[8*j+k]*dct_scale_table[8*i+k]; - } - midblock[8*i+j] = (v + (1<<((14)-1)))>>(14); + for (int i = 0; i < 8; i++) { + for (int j = 0; j < 8; j++) { + float v = 0.0f; + for(int k = 0; k < 8; k++) { + v += block[8*j+k] * (float)dct_scale_table[8*i+k] / (float)(1 << 16); } - } - memcpy(block, midblock, sizeof(midblock)); + midblock[8*i+j] = v; + } + } + for (int i = 0; i < 8; i++) { + for (int j = 0; j < 8; j++) { + float v = 0.0f; + for(int k = 0; k < 8; k++) { + v += midblock[8*j+k] * (float)dct_scale_table[8*i+k] / (float)(1 << 16); + } + block[8*i+j] = v; } - - // FIXME: Work out why the math has to go this way - block[0] /= 8; - for (int i = 0; i < 64; i++) { - // Finish reducing it - block[i] /= 4; - - // If it's below the quantisation threshold, zero it - if(abs(block[i]) < quant_dec[i]) { - block[i] = 0; - } } - } -static void encode_dct_block(vid_encoder_state_t *state, int32_t *block) +static bool encode_dct_block(vid_encoder_state_t *state, float *block) { - int dc_value = 0; + int16_t coeffs[64]; + float scale = 8.0f / (float)state->quant_scale; for (int i = 0; i < 64; i++) { - // Quantise it - block[i] = (block[i])/quant_dec[i]; + // The DC coefficient is not affected by the quantization scale. + float x = block[i]; + if (i) { x *= scale; } - // Clamp it - if (block[i] < -0x200) { block[i] = -0x200; } - if (block[i] > +0x1FF) { block[i] = +0x1FF; } + int v = (int)roundf(x / (float)quant_dec[i]); + if (v < -0x200) { v = -0x200; } + if (v > +0x1FF) { v = +0x1FF; } + coeffs[i] = v; } - // Get DC value - dc_value = block[0]; - //dc_value = 0; - encode_bits(state, 10, dc_value&0x3FF); + if (!encode_bits(state, 10, coeffs[0]&0x3FF)) { + return false; + } // Build RLE output uint16_t zero_rle_data[8*8]; @@ -349,10 +352,10 @@ static void encode_dct_block(vid_encoder_state_t *state, int32_t *block) for (int i = 1, zeroes = 0; i < 64; i++) { int ri = dct_zagzig_table[i]; //int ri = dct_zigzag_table[i]; - if (block[ri] == 0) { + if (coeffs[ri] == 0) { zeroes++; } else { - zero_rle_data[zero_rle_words++] = (zeroes<<10)|(block[ri]&0x3FF); + zero_rle_data[zero_rle_words++] = (zeroes<<10)|(coeffs[ri]&0x3FF); zeroes = 0; state->uncomp_hwords_used += 1; } @@ -360,19 +363,24 @@ static void encode_dct_block(vid_encoder_state_t *state, int32_t *block) // Now Huffman-code the data for (int i = 0; i < zero_rle_words; i++) { - encode_ac_value(state, zero_rle_data[i]); + if (!encode_ac_value(state, zero_rle_data[i])) { + return false; + } } - //fprintf(stderr, "dc %08X rles %2d\n", dc_value, zero_rle_words); - //assert(dc_value >= -0x200); assert(dc_value < +0x200); + //fprintf(stderr, "dc %08X rles %2d\n", coeffs[0], zero_rle_words); + //assert(coeffs[0] >= -0x200); assert(coeffs[0] < +0x200); // Store end of block - encode_bits(state, 2, 0x2); + if (!encode_bits(state, 2, 0x2)) { + return false; + } state->uncomp_hwords_used += 2; - - state->uncomp_hwords_used = (state->uncomp_hwords_used+0xF)&~0xF; + //state->uncomp_hwords_used = (state->uncomp_hwords_used+0xF)&~0xF; + return true; } +#if 0 static int reduce_dct_block(vid_encoder_state_t *state, int32_t *block, int32_t min_val, int *values_to_shed) { // Reduce so it can all fit @@ -394,48 +402,44 @@ static int reduce_dct_block(vid_encoder_state_t *state, int32_t *block, int32_t // Factor in DC + EOF values return nonzeroes+2; } +#endif -static void encode_frame_str(uint8_t *video_frames, int video_frame_count, uint8_t *output, settings_t *settings) +void encode_frame_bs(uint8_t *video_frame, settings_t *settings) { - int pitch = settings->video_width*4; - int real_index = (settings->state_vid.frame_index-1); + int pitch = settings->video_width; + /*int real_index = (settings->state_vid.frame_index-1); if (real_index > video_frame_count-1) { real_index = video_frame_count-1; } - //uint8_t *video_frame = video_frames + settings->video_width*settings->video_height*4*real_index; - uint8_t *video_frame = video_frames; + uint8_t *y_plane = video_frames + settings->video_width*settings->video_height*3/2*real_index;*/ + uint8_t *y_plane = video_frame; + uint8_t *c_plane = y_plane + (settings->video_width*settings->video_height); if (!dct_done_init) { init_dct_data(); dct_done_init = true; } + int dct_block_count_x = (settings->video_width+15)/16; + int dct_block_count_y = (settings->video_height+15)/16; + if (settings->state_vid.dct_block_lists[0] == NULL) { - int dct_block_count_x = (settings->video_width+15)/16; - int dct_block_count_y = (settings->video_height+15)/16; - int dct_block_size = dct_block_count_x*dct_block_count_y*sizeof(int32_t)*8*8; + int dct_block_size = dct_block_count_x*dct_block_count_y*sizeof(float)*8*8; for (int i = 0; i < 6; i++) { settings->state_vid.dct_block_lists[i] = malloc(dct_block_size); } } - memset(settings->state_vid.unmuxed, 0, sizeof(settings->state_vid.unmuxed)); - - settings->state_vid.quant_scale = 1; - settings->state_vid.uncomp_hwords_used = 0; - settings->state_vid.bytes_used = 8; - settings->state_vid.blocks_used = 0; - // TODO: non-16x16-aligned videos assert((settings->video_width % 16) == 0); assert((settings->video_height % 16) == 0); - // Do the initial transform - for(int fx = 0; fx < settings->video_width; fx += 16) { - for(int fy = 0; fy < settings->video_height; fy += 16) { + // Rearrange the Y/C planes returned by libswscale into macroblocks. + for(int fx = 0; fx < dct_block_count_x; fx++) { + for(int fy = 0; fy < dct_block_count_y; fy++) { // Order: Cr Cb [Y1|Y2\nY3|Y4] - int block_offs = 8*8*((fy>>4)*((settings->video_width+15)/16)+(fx>>4)); - int32_t *blocks[6] = { + int block_offs = 64 * (fy*dct_block_count_x + fx); + float *blocks[6] = { settings->state_vid.dct_block_lists[0] + block_offs, settings->state_vid.dct_block_lists[1] + block_offs, settings->state_vid.dct_block_lists[2] + block_offs, @@ -446,66 +450,51 @@ static void encode_frame_str(uint8_t *video_frames, int video_frame_count, uint8 for(int y = 0; y < 8; y++) { for(int x = 0; x < 8; x++) { - int k = y*8+x; + int k = y*8 + x; + int cx = fx*8 + x; + int cy = fy*8 + y; + int lx = fx*16 + x; + int ly = fy*16 + y; - int cr = 0; - int cg = 0; - int cb = 0; - for(int cy = 0; cy < 2; cy++) { - for(int cx = 0; cx < 2; cx++) { - int coffs = pitch*(fy+y*2+cy) + 4*(fx+x*2+cx); - cr += video_frame[coffs+0]; - cg += video_frame[coffs+1]; - cb += video_frame[coffs+2]; - } - } - - // TODO: Get the real math for this - int cluma = cr+cg*2+cb; -#if 1 - blocks[0][k] = ((cr<<2) - cluma + (1<<(4-1)))>>4; - blocks[1][k] = ((cb<<2) - cluma + (1<<(4-1)))>>4; -#else - blocks[0][k] = 0; - blocks[1][k] = 0; -#endif - - for(int ly = 0; ly < 2; ly++) { - for(int lx = 0; lx < 2; lx++) { - int loffs = pitch*(fy+ly*8+y) + 4*(fx+lx*8+x); - int lr = video_frame[loffs+0]; - int lg = video_frame[loffs+1]; - int lb = video_frame[loffs+2]; - - // TODO: Get the real math for this - int lluma = (lr+lg*2+lb+2)-0x200; - if(lluma < -0x200) { lluma = -0x200; } - if(lluma > +0x1FF) { lluma = +0x1FF; } - lluma >>= 1; - blocks[2+2*ly+lx][k] = lluma; - } - } + blocks[0][k] = (float)c_plane[pitch*cy + 2*cx + 0] - 128.0f; + blocks[1][k] = (float)c_plane[pitch*cy + 2*cx + 1] - 128.0f; + blocks[2][k] = (float)y_plane[pitch*(ly+0) + (lx+0)] - 128.0f; + blocks[3][k] = (float)y_plane[pitch*(ly+0) + (lx+8)] - 128.0f; + blocks[4][k] = (float)y_plane[pitch*(ly+8) + (lx+0)] - 128.0f; + blocks[5][k] = (float)y_plane[pitch*(ly+8) + (lx+8)] - 128.0f; } } + for(int i = 0; i < 6; i++) { transform_dct_block(&(settings->state_vid), blocks[i]); } } } - // Now reduce all the blocks - // TODO: Base this on actual bit count - //const int accum_threshold = 6500; - const int accum_threshold = 1025*settings->state_vid.frame_block_count; - //const int accum_threshold = 900*settings->state_vid.frame_block_count; - int values_to_shed = 0; - for(int min_val = 0;; min_val += 1) { - int accum = 0; - for(int fx = 0; fx < settings->video_width; fx += 16) { - for(int fy = 0; fy < settings->video_height; fy += 16) { + // Attempt encoding the frame at the maximum quality. If the result is too + // large, increase the quantization scale and try again. + // TODO: if a frame encoded at scale N is too large but the same frame + // encoded at scale N-1 leaves a significant amount of free space, attempt + // compressing at scale N but optimizing coefficients away until it fits + // (like the old algorithm did) + for ( + settings->state_vid.quant_scale = 1; + settings->state_vid.quant_scale < 64; + settings->state_vid.quant_scale++ + ) { + memset(settings->state_vid.frame_output, 0, settings->state_vid.frame_max_size); + + settings->state_vid.bits_value = 0; + settings->state_vid.bits_left = 16; + settings->state_vid.uncomp_hwords_used = 0; + settings->state_vid.bytes_used = 8; + + bool ok = true; + for(int fx = 0; ok && (fx < dct_block_count_x); fx++) { + for(int fy = 0; ok && (fy < dct_block_count_y); fy++) { // Order: Cr Cb [Y1|Y2\nY3|Y4] - int block_offs = 8*8*((fy>>4)*((settings->video_width+15)/16)+(fx>>4)); - int32_t *blocks[6] = { + int block_offs = 64 * (fy*dct_block_count_x + fx); + float *blocks[6] = { settings->state_vid.dct_block_lists[0] + block_offs, settings->state_vid.dct_block_lists[1] + block_offs, settings->state_vid.dct_block_lists[2] + block_offs, @@ -513,132 +502,125 @@ static void encode_frame_str(uint8_t *video_frames, int video_frame_count, uint8 settings->state_vid.dct_block_lists[4] + block_offs, settings->state_vid.dct_block_lists[5] + block_offs, }; - const int luma_reduce_mul = 8; - const int chroma_reduce_mul = 8; - for(int i = 6-1; i >= 0; i--) { - accum += reduce_dct_block(&(settings->state_vid), blocks[i], (i < 2 ? min_val*luma_reduce_mul+1 : min_val*chroma_reduce_mul+1), &values_to_shed); + + for(int i = 0; ok && (i < 6); i++) { + ok = encode_dct_block(&(settings->state_vid), blocks[i]); } } } - if(accum <= accum_threshold) { - break; - } + if (!ok) { continue; } + if (!encode_bits(&(settings->state_vid), 10, 0x1FF)) { continue; } + if (!encode_bits(&(settings->state_vid), 2, 0x2)) { continue; } + if (!flush_bits(&(settings->state_vid))) { continue; } - values_to_shed = accum - accum_threshold; + settings->state_vid.uncomp_hwords_used += 2; + settings->state_vid.quant_scale_sum += settings->state_vid.quant_scale; + break; } + assert(settings->state_vid.quant_scale < 64); - // Now encode all the blocks - for(int fx = 0; fx < settings->video_width; fx += 16) { - for(int fy = 0; fy < settings->video_height; fy += 16) { - // Order: Cr Cb [Y1|Y2\nY3|Y4] - int block_offs = 8*8*((fy>>4)*((settings->video_width+15)/16)+(fx>>4)); - int32_t *blocks[6] = { - settings->state_vid.dct_block_lists[0] + block_offs, - settings->state_vid.dct_block_lists[1] + block_offs, - settings->state_vid.dct_block_lists[2] + block_offs, - settings->state_vid.dct_block_lists[3] + block_offs, - settings->state_vid.dct_block_lists[4] + block_offs, - settings->state_vid.dct_block_lists[5] + block_offs, - }; - for(int i = 0; i < 6; i++) { - encode_dct_block(&(settings->state_vid), blocks[i]); - } - } - } + // MDEC DMA is usually configured to transfer data in 32-word chunks. + settings->state_vid.uncomp_hwords_used = (settings->state_vid.uncomp_hwords_used+0x3F)&~0x3F; - encode_bits(&(settings->state_vid), 10, 0x1FF); - encode_bits(&(settings->state_vid), 2, 0x2); - settings->state_vid.uncomp_hwords_used += 2; - settings->state_vid.uncomp_hwords_used = (settings->state_vid.uncomp_hwords_used+0xF)&~0xF; - - flush_bits(&(settings->state_vid)); - - settings->state_vid.blocks_used = ((settings->state_vid.uncomp_hwords_used+0xF)&~0xF)>>4; + // This is not the number of 32-byte blocks required for uncompressed data + // as jPSXdec docs say, but rather the number of 32-*bit* words required. + // The first 4 bytes of the frame header are in fact the MDEC command to + // start decoding, which contains the data length in words in the lower 16 + // bits. + settings->state_vid.blocks_used = (settings->state_vid.uncomp_hwords_used+1)>>1; // We need a multiple of 4 settings->state_vid.bytes_used = (settings->state_vid.bytes_used+0x3)&~0x3; - // Build the demuxed header - settings->state_vid.unmuxed[0x000] = (uint8_t)settings->state_vid.blocks_used; - settings->state_vid.unmuxed[0x001] = (uint8_t)(settings->state_vid.blocks_used>>8); - settings->state_vid.unmuxed[0x002] = (uint8_t)0x00; - settings->state_vid.unmuxed[0x003] = (uint8_t)0x38; - settings->state_vid.unmuxed[0x004] = (uint8_t)settings->state_vid.quant_scale; - settings->state_vid.unmuxed[0x005] = (uint8_t)(settings->state_vid.quant_scale>>8); - settings->state_vid.unmuxed[0x006] = 0x02; // Version 2 - settings->state_vid.unmuxed[0x007] = 0x00; + // MDEC command (size of decompressed MDEC data) + settings->state_vid.frame_output[0x000] = (uint8_t)settings->state_vid.blocks_used; + settings->state_vid.frame_output[0x001] = (uint8_t)(settings->state_vid.blocks_used>>8); + settings->state_vid.frame_output[0x002] = (uint8_t)0x00; + settings->state_vid.frame_output[0x003] = (uint8_t)0x38; + + // Quantization scale + settings->state_vid.frame_output[0x004] = (uint8_t)settings->state_vid.quant_scale; + settings->state_vid.frame_output[0x005] = (uint8_t)(settings->state_vid.quant_scale>>8); + + // BS version + settings->state_vid.frame_output[0x006] = 0x02; + settings->state_vid.frame_output[0x007] = 0x00; retire_av_data(settings, 0, 1); } -void encode_block_str(uint8_t *video_frames, int video_frame_count, uint8_t *output, settings_t *settings) +void encode_sector_str(uint8_t *video_frames, uint8_t *output, settings_t *settings) { uint8_t header[32]; memset(header, 0, sizeof(header)); - for(int i = 0; i < 7; i++) { - while(settings->state_vid.frame_block_index >= settings->state_vid.frame_block_count) { - settings->state_vid.frame_index++; - // TODO: work out an optimal block count for this - // TODO: calculate this all based on FPS - settings->state_vid.frame_block_overflow_num += settings->state_vid.frame_block_base_overflow; - settings->state_vid.frame_block_count = settings->state_vid.frame_block_overflow_num / settings->state_vid.frame_block_overflow_den; - settings->state_vid.frame_block_overflow_num %= settings->state_vid.frame_block_overflow_den; - settings->state_vid.frame_block_index = 0; - encode_frame_str(video_frames, video_frame_count, output, settings); - } - // Header: MDEC0 register - header[0x000] = 0x60; - header[0x001] = 0x01; - header[0x002] = 0x01; - header[0x003] = 0x80; - - // Muxed chunk index/count - int chunk_index = settings->state_vid.frame_block_index; - int chunk_count = settings->state_vid.frame_block_count; - header[0x004] = (uint8_t)chunk_index; - header[0x005] = (uint8_t)(chunk_index>>8); - header[0x006] = (uint8_t)chunk_count; - header[0x007] = (uint8_t)(chunk_count>>8); - - // Frame index - header[0x008] = (uint8_t)settings->state_vid.frame_index; - header[0x009] = (uint8_t)(settings->state_vid.frame_index>>8); - header[0x00A] = (uint8_t)(settings->state_vid.frame_index>>16); - header[0x00B] = (uint8_t)(settings->state_vid.frame_index>>24); - - // Video frame size - header[0x010] = (uint8_t)settings->video_width; - header[0x011] = (uint8_t)(settings->video_width>>8); - header[0x012] = (uint8_t)settings->video_height; - header[0x013] = (uint8_t)(settings->video_height>>8); - - // 32-byte blocks required for MDEC data - header[0x014] = (uint8_t)settings->state_vid.blocks_used; - header[0x015] = (uint8_t)(settings->state_vid.blocks_used>>8); - - // Some weird thing - header[0x016] = 0x00; - header[0x017] = 0x38; - - // Quantization scale - header[0x018] = (uint8_t)settings->state_vid.quant_scale; - header[0x019] = (uint8_t)(settings->state_vid.quant_scale>>8); - - // Version - header[0x01A] = 0x02; // Version 2 - header[0x01B] = 0x00; - - // Demuxed bytes used as a multiple of 4 - header[0x00C] = (uint8_t)settings->state_vid.bytes_used; - header[0x00D] = (uint8_t)(settings->state_vid.bytes_used>>8); - header[0x00E] = (uint8_t)(settings->state_vid.bytes_used>>16); - header[0x00F] = (uint8_t)(settings->state_vid.bytes_used>>24); - - memcpy(output + 2352*i + 0x018, header, sizeof(header)); - memcpy(output + 2352*i + 0x018 + 0x020, settings->state_vid.unmuxed + 2016*settings->state_vid.frame_block_index, 2016); - - settings->state_vid.frame_block_index++; + while(settings->state_vid.frame_data_offset >= settings->state_vid.frame_max_size) { + settings->state_vid.frame_index++; + // TODO: work out an optimal block count for this + // TODO: calculate this all based on FPS + settings->state_vid.frame_block_overflow_num += settings->state_vid.frame_block_base_overflow; + settings->state_vid.frame_max_size = settings->state_vid.frame_block_overflow_num / settings->state_vid.frame_block_overflow_den * 2016; + settings->state_vid.frame_block_overflow_num %= settings->state_vid.frame_block_overflow_den; + settings->state_vid.frame_data_offset = 0; + encode_frame_bs(video_frames, settings); } + + // STR version + header[0x000] = 0x60; + header[0x001] = 0x01; + + // Chunk type: MDEC data + header[0x002] = 0x01; + header[0x003] = 0x80; + + // Muxed chunk index/count + int chunk_index = settings->state_vid.frame_data_offset/2016; + int chunk_count = settings->state_vid.frame_max_size/2016; + header[0x004] = (uint8_t)chunk_index; + header[0x005] = (uint8_t)(chunk_index>>8); + header[0x006] = (uint8_t)chunk_count; + header[0x007] = (uint8_t)(chunk_count>>8); + + // Frame index + header[0x008] = (uint8_t)settings->state_vid.frame_index; + header[0x009] = (uint8_t)(settings->state_vid.frame_index>>8); + header[0x00A] = (uint8_t)(settings->state_vid.frame_index>>16); + header[0x00B] = (uint8_t)(settings->state_vid.frame_index>>24); + + // Video frame size + header[0x010] = (uint8_t)settings->video_width; + header[0x011] = (uint8_t)(settings->video_width>>8); + header[0x012] = (uint8_t)settings->video_height; + header[0x013] = (uint8_t)(settings->video_height>>8); + + // MDEC command (size of decompressed MDEC data) + header[0x014] = (uint8_t)settings->state_vid.blocks_used; + header[0x015] = (uint8_t)(settings->state_vid.blocks_used>>8); + header[0x016] = 0x00; + header[0x017] = 0x38; + + // Quantization scale + header[0x018] = (uint8_t)settings->state_vid.quant_scale; + header[0x019] = (uint8_t)(settings->state_vid.quant_scale>>8); + + // BS version + header[0x01A] = 0x02; + header[0x01B] = 0x00; + + // Demuxed bytes used as a multiple of 4 + header[0x00C] = (uint8_t)settings->state_vid.bytes_used; + header[0x00D] = (uint8_t)(settings->state_vid.bytes_used>>8); + header[0x00E] = (uint8_t)(settings->state_vid.bytes_used>>16); + header[0x00F] = (uint8_t)(settings->state_vid.bytes_used>>24); + + if (settings->format == FORMAT_STR2CD) { + memcpy(output + 0x018, header, sizeof(header)); + memcpy(output + 0x018 + 0x020, settings->state_vid.frame_output + settings->state_vid.frame_data_offset, 2016); + } else { + memcpy(output + 0x008, header, sizeof(header)); + memcpy(output + 0x008 + 0x020, settings->state_vid.frame_output + settings->state_vid.frame_data_offset, 2016); + } + + settings->state_vid.frame_data_offset += 2016; } diff --git a/psxavenc/psxavenc.c b/psxavenc/psxavenc.c index 8ee15ba..0e34126 100644 --- a/psxavenc/psxavenc.c +++ b/psxavenc/psxavenc.c @@ -3,6 +3,7 @@ psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend Copyright (c) 2019, 2020 Adrian "asie" Siekierka Copyright (c) 2019 Ben "GreaseMonkey" Russell +Copyright (c) 2023 spicyjpeg This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages @@ -23,87 +24,247 @@ freely, subject to the following restrictions: #include "common.h" +const char *format_names[NUM_FORMATS] = { + "xa", "xacd", + "spu", "spui", + "vag", "vagi", + "str2", "str2cd", + "sbs2" +}; + void print_help(void) { - fprintf(stderr, "Usage: psxavenc [-f freq] [-b bitdepth] [-c channels] [-F num] [-C num] [-t xa|xacd|spu|str2] \n\n"); - fprintf(stderr, " -f freq Use specified frequency\n"); - fprintf(stderr, " -t format Use specified output type:\n"); - fprintf(stderr, " xa [A.] .xa 2336-byte sectors\n"); - fprintf(stderr, " xacd [A.] .xa 2352-byte sectors\n"); - fprintf(stderr, " spu [A.] raw SPU-ADPCM data\n"); - fprintf(stderr, " str2 [AV] v2 .str video 2352-byte sectors\n"); - fprintf(stderr, " -b bitdepth Use specified bit depth (only 4 bits supported)\n"); - fprintf(stderr, " -c channels Use specified channel count (1 or 2)\n"); - fprintf(stderr, " -F num [.xa] Set the file number to num (0-255)\n"); - fprintf(stderr, " -C num [.xa] Set the channel number to num (0-31)\n"); + fprintf(stderr, + "Usage:\n" + " psxavenc -t [-f 18900|37800] [-b 4|8] [-c 1|2] [-F 0-255] [-C 0-31] \n" + " psxavenc -t [-f 18900|37800] [-b 4|8] [-c 1|2] [-F 0-255] [-C 0-31] [-s WxH] [-I] [-r num/den] [-x 1|2] \n" + " psxavenc -t sbs2 [-s WxH] [-I] [-r num/den] [-a size] \n" + " psxavenc -t [-f freq] [-L] \n" + " psxavenc -t [-f freq] [-c 1-24] [-L] [-i size] [-a size] \n" + "\nTool options:\n" + " -h Show this help message and exit\n" + " -q Suppress all non-error messages\n" + "\nOutput options:\n" + " -t format Use specified output type:\n" + " xa [A.] .xa, 2336-byte sectors\n" + " xacd [A.] .xa, 2352-byte sectors\n" + " spu [A.] raw SPU-ADPCM mono data\n" + " spui [A.] raw SPU-ADPCM interleaved data\n" + " vag [A.] .vag SPU-ADPCM mono\n" + " vagi [A.] .vag SPU-ADPCM interleaved\n" + " str2 [AV] v2 .str video, 2336-byte sectors\n" + " str2cd [AV] v2 .str video, 2352-byte sectors\n" + " sbs2 [.V] v2 .sbs video, 2048-byte sectors\n" + " -F num Set the XA file number for xa/str2 (0-255)\n" + " -C num Set the XA channel number for xa/str2 (0-31)\n" + "\nAudio options:\n" + " -f freq Use specified sample rate (must be 18900 or 37800 for xa/str2)\n" + " -b bitdepth Use specified bit depth for xa/str2 (4 or 8)\n" + " -c channels Use specified channel count (1-2 for xa/str2, any for spui/vagi)\n" + " -L Add a loop marker at the end of SPU-ADPCM data\n" + " -R key=value,... Pass custom options to libswresample (see ffmpeg docs)\n" + "\nSPU interleaving options (spui/vagi format):\n" + " -i size Use specified interleave\n" + " -a size Pad header and each interleaved chunk to specified size\n" + "\nVideo options (str2/str2cd/sbs2 format):\n" + " -s WxH Rescale input file to fit within specified size (default 320x240)\n" + " -I Force stretching to given size without preserving aspect ratio\n" + " -S key=value,... Pass custom options to libswscale (see ffmpeg docs)\n" + " -r num/den Set frame rate to specified integer or fraction (default 15)\n" + " -x speed Set the CD-ROM speed the file is meant to played at (1-2)\n" + " -a size Set the size of each frame for sbs2\n" + ); } int parse_args(settings_t* settings, int argc, char** argv) { - int c; - while ((c = getopt(argc, argv, "t:f:b:c:F:C:")) != -1) { + int c, i; + char *next; + while ((c = getopt(argc, argv, "?hqt:F:C:f:b:c:LR:i:a:s:IS:r:x:")) != -1) { switch (c) { + case '?': + case 'h': { + print_help(); + return -1; + } break; + case 'q': { + settings->quiet = true; + settings->show_progress = false; + } break; case 't': { - if (strcmp(optarg, "xa") == 0) { - settings->format = FORMAT_XA; - } else if (strcmp(optarg, "xacd") == 0) { - settings->format = FORMAT_XACD; - } else if (strcmp(optarg, "spu") == 0) { - settings->format = FORMAT_SPU; - } else if (strcmp(optarg, "str2") == 0) { - settings->format = FORMAT_STR2; - } else { + settings->format = -1; + for (i = 0; i < NUM_FORMATS; i++) { + if (!strcmp(optarg, format_names[i])) { + settings->format = i; + break; + } + } + if (settings->format < 0) { fprintf(stderr, "Invalid format: %s\n", optarg); return -1; } } break; - case 'f': { - settings->frequency = atoi(optarg); - } break; - case 'b': { - settings->bits_per_sample = atoi(optarg); - if (settings->bits_per_sample != 4) { - fprintf(stderr, "Invalid bit depth: %d\n", settings->frequency); - return -1; - } - } break; - case 'c': { - int ch = atoi(optarg); - if (ch <= 0 || ch > 2) { - fprintf(stderr, "Invalid channel count: %d\n", ch); - return -1; - } - settings->stereo = (ch == 2 ? 1 : 0); - } break; case 'F': { - settings->file_number = atoi(optarg); + settings->file_number = strtol(optarg, NULL, 0); if (settings->file_number < 0 || settings->file_number > 255) { fprintf(stderr, "Invalid file number: %d\n", settings->file_number); return -1; } } break; case 'C': { - settings->channel_number = atoi(optarg); + settings->channel_number = strtol(optarg, NULL, 0); if (settings->channel_number < 0 || settings->channel_number > 31) { fprintf(stderr, "Invalid channel number: %d\n", settings->channel_number); return -1; } } break; - case '?': - case 'h': { - print_help(); - return -1; + case 'f': { + settings->frequency = strtol(optarg, NULL, 0); + } break; + case 'b': { + settings->bits_per_sample = strtol(optarg, NULL, 0); + if (settings->bits_per_sample != 4 && settings->bits_per_sample != 8) { + fprintf(stderr, "Invalid bit depth: %d\n", settings->frequency); + return -1; + } + } break; + case 'c': { + settings->channels = strtol(optarg, NULL, 0); + if (settings->channels < 1 || settings->channels > 24) { + fprintf(stderr, "Invalid channel count: %d\n", settings->channels); + return -1; + } + } break; + case 'L': { + settings->loop = true; + } break; + case 'R': { + settings->swresample_options = optarg; + } break; + case 'i': { + settings->interleave = (strtol(optarg, NULL, 0) + 15) & ~15; + if (settings->interleave < 16) { + fprintf(stderr, "Invalid interleave: %d\n", settings->interleave); + return -1; + } + } break; + case 'a': { + settings->alignment = strtol(optarg, NULL, 0); + if (settings->alignment < 1) { + fprintf(stderr, "Invalid alignment: %d\n", settings->alignment); + return -1; + } + } break; + case 's': { + settings->video_width = (strtol(optarg, &next, 0) + 15) & ~15; + if (*next != 'x') { + fprintf(stderr, "Invalid video size (must be specified as x)\n"); + return -1; + } + settings->video_height = (strtol(next + 1, NULL, 0) + 15) & ~15; + + if (settings->video_width < 16 || settings->video_width > 320) { + fprintf(stderr, "Invalid video width: %d\n", settings->video_width); + return -1; + } + if (settings->video_height < 16 || settings->video_height > 240) { + fprintf(stderr, "Invalid video height: %d\n", settings->video_height); + return -1; + } + } break; + case 'I': { + settings->ignore_aspect_ratio = true; + } break; + case 'S': { + settings->swscale_options = optarg; + } break; + case 'r': { + settings->video_fps_num = strtol(optarg, &next, 0); + if (*next == '/') { + settings->video_fps_den = strtol(next + 1, NULL, 0); + } else { + settings->video_fps_den = 1; + } + + if (!settings->video_fps_den) { + fprintf(stderr, "Invalid frame rate denominator\n"); + return -1; + } + i = settings->video_fps_num / settings->video_fps_den; + if (i < 1 || i > 30) { + fprintf(stderr, "Invalid frame rate: %d/%d\n", settings->video_fps_num, settings->video_fps_den); + return -1; + } + } break; + case 'x': { + settings->cd_speed = strtol(optarg, NULL, 0); + if (settings->cd_speed < 1 || settings->cd_speed > 2) { + fprintf(stderr, "Invalid CD-ROM speed: %d\n", settings->cd_speed); + return -1; + } } break; } } - if (settings->format == FORMAT_XA || settings->format == FORMAT_XACD) { - if (settings->frequency != PSX_AUDIO_XA_FREQ_SINGLE && settings->frequency != PSX_AUDIO_XA_FREQ_DOUBLE) { - fprintf(stderr, "Invalid frequency: %d Hz\n", settings->frequency); + // Validate settings + switch (settings->format) { + case FORMAT_XA: + case FORMAT_XACD: + case FORMAT_STR2: + case FORMAT_STR2CD: + if (settings->frequency != PSX_AUDIO_XA_FREQ_SINGLE && settings->frequency != PSX_AUDIO_XA_FREQ_DOUBLE) { + fprintf( + stderr, "Invalid XA-ADPCM frequency: %d Hz (must be %d or %d Hz)\n", settings->frequency, + PSX_AUDIO_XA_FREQ_SINGLE, PSX_AUDIO_XA_FREQ_DOUBLE + ); + return -1; + } + if (settings->channels > 2) { + fprintf(stderr, "Invalid XA-ADPCM channel count: %d (must be 1 or 2)\n", settings->channels); + return -1; + } + if (settings->loop) { + fprintf(stderr, "XA-ADPCM does not support loop markers\n"); + return -1; + } + break; + case FORMAT_SPU: + case FORMAT_VAG: + if (settings->bits_per_sample != 4) { + fprintf(stderr, "Invalid SPU-ADPCM bit depth: %d (must be 4)\n", settings->bits_per_sample); + return -1; + } + if (settings->channels != 1) { + fprintf(stderr, "Invalid SPU-ADPCM channel count: %d (must be 1)\n", settings->channels); + return -1; + } + if (settings->interleave) { + fprintf(stderr, "Interleave cannot be specified for mono SPU-ADPCM\n"); + return -1; + } + break; + case FORMAT_SPUI: + case FORMAT_VAGI: + if (settings->bits_per_sample != 4) { + fprintf(stderr, "Invalid SPU-ADPCM bit depth: %d (must be 4)\n", settings->bits_per_sample); + return -1; + } + if (!settings->interleave) { + fprintf(stderr, "Interleave must be specified for interleaved SPU-ADPCM\n"); + return -1; + } + break; + case FORMAT_SBS2: + if (!settings->alignment) { + fprintf(stderr, "Alignment (frame size) must be specified\n"); + return -1; + } + if (settings->alignment < 256) { + fprintf(stderr, "Invalid frame size: %d (must be at least 256)\n", settings->alignment); + return -1; + } + break; + default: + fprintf(stderr, "Output format must be specified\n"); return -1; - } - } - - if (settings->format == FORMAT_SPU) { - settings->stereo = false; } return optind; @@ -116,28 +277,46 @@ int main(int argc, char **argv) { memset(&settings,0,sizeof(settings_t)); + settings.quiet = false; + settings.show_progress = isatty(fileno(stderr)); + + settings.format = -1; settings.file_number = 0; settings.channel_number = 0; - settings.stereo = true; + settings.cd_speed = 2; + settings.channels = 1; settings.frequency = PSX_AUDIO_XA_FREQ_DOUBLE; settings.bits_per_sample = 4; + settings.interleave = 0; + settings.alignment = 2048; + settings.loop = false; + // NOTE: ffmpeg/ffplay's .str demuxer has the frame rate hardcoded to 15fps + // so if you're messing around with this make sure you test generated files + // with another player and/or in an emulator. settings.video_width = 320; settings.video_height = 240; + settings.video_fps_num = 15; + settings.video_fps_den = 1; + settings.ignore_aspect_ratio = false; + + settings.swresample_options = NULL; + settings.swscale_options = NULL; settings.audio_samples = NULL; settings.audio_sample_count = 0; settings.video_frames = NULL; settings.video_frame_count = 0; - // TODO: make this adjustable - // also for some reason ffmpeg seems to hard-code the framerate to 15fps - settings.video_fps_num = 15; - settings.video_fps_den = 1; for(int i = 0; i < 6; i++) { settings.state_vid.dct_block_lists[i] = NULL; } + if (argc < 2) { + print_help(); + return 1; + } + arg_offset = parse_args(&settings, argc, argv); if (arg_offset < 0) { return 1; @@ -146,13 +325,12 @@ int main(int argc, char **argv) { return 1; } - fprintf(stderr, "Using settings: %d Hz @ %d bit depth, %s. F%d C%d\n", - settings.frequency, settings.bits_per_sample, - settings.stereo ? "stereo" : "mono", - settings.file_number, settings.channel_number - ); + bool has_audio = (settings.format != FORMAT_SBS2); + bool has_video = (settings.format == FORMAT_STR2) || + (settings.format == FORMAT_STR2CD) || (settings.format == FORMAT_SBS2); - bool did_open_data = open_av_data(argv[arg_offset + 0], &settings); + bool did_open_data = open_av_data(argv[arg_offset + 0], &settings, + has_audio, has_video, !has_video, has_video); if (!did_open_data) { fprintf(stderr, "Could not open input file!\n"); return 1; @@ -164,23 +342,75 @@ int main(int argc, char **argv) { return 1; } - int av_sample_mul = settings.stereo ? 2 : 1; + settings.start_time = time(NULL); + settings.last_progress_update = 0; switch (settings.format) { case FORMAT_XA: case FORMAT_XACD: - pull_all_av_data(&settings); - encode_file_xa(settings.audio_samples, settings.audio_sample_count / av_sample_mul, &settings, output); + if (!settings.quiet) { + fprintf(stderr, "Audio format: XA-ADPCM, %d Hz %d-bit %s, F=%d C=%d\n", + settings.frequency, settings.bits_per_sample, + (settings.channels == 2) ? "stereo" : "mono", + settings.file_number, settings.channel_number + ); + } + + encode_file_xa(&settings, output); break; case FORMAT_SPU: - pull_all_av_data(&settings); - encode_file_spu(settings.audio_samples, settings.audio_sample_count / av_sample_mul, &settings, output); + case FORMAT_VAG: + if (!settings.quiet) { + fprintf(stderr, "Audio format: SPU-ADPCM, %d Hz mono\n", + settings.frequency + ); + } + + encode_file_spu(&settings, output); + break; + case FORMAT_SPUI: + case FORMAT_VAGI: + if (!settings.quiet) { + fprintf(stderr, "Audio format: SPU-ADPCM, %d Hz %d channels, interleave=%d\n", + settings.frequency, settings.channels, settings.interleave + ); + } + + encode_file_spu_interleaved(&settings, output); break; case FORMAT_STR2: + case FORMAT_STR2CD: + if (!settings.quiet) { + if (settings.decoder_state_av.audio_stream) { + fprintf(stderr, "Audio format: XA-ADPCM, %d Hz %d-bit %s, F=%d C=%d\n", + settings.frequency, settings.bits_per_sample, + (settings.channels == 2) ? "stereo" : "mono", + settings.file_number, settings.channel_number + ); + } + fprintf(stderr, "Video format: BS v2, %dx%d, %.2f fps\n", + settings.video_width, settings.video_height, + (double)settings.video_fps_num / (double)settings.video_fps_den + ); + } + encode_file_str(&settings, output); break; + case FORMAT_SBS2: + if (!settings.quiet) { + fprintf(stderr, "Video format: BS v2, %dx%d, %.2f fps\n", + settings.video_width, settings.video_height, + (double)settings.video_fps_num / (double)settings.video_fps_den + ); + } + + encode_file_sbs(&settings, output); + break; } + if (settings.show_progress) { + fprintf(stderr, "\nDone.\n"); + } fclose(output); close_av_data(&settings); return 0; From e31da8a4a6977519d7bfca2403d7a2eaacca3f88 Mon Sep 17 00:00:00 2001 From: spicyjpeg Date: Mon, 15 May 2023 18:12:07 +0200 Subject: [PATCH 2/2] Fix compile errors and warnings, update readme --- README.md | 68 ++++++++++--- libpsxav/adpcm.c | 3 +- libpsxav/libpsxav.h | 2 +- meson.build | 4 +- psxavenc/decoding.c | 4 +- psxavenc/mdec.c | 226 ++++++++++++++++++++++---------------------- 6 files changed, 175 insertions(+), 132 deletions(-) diff --git a/README.md b/README.md index 3bece70..57afdd5 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,22 @@ + # psxavenc -psxavenc is an open-source command-line tool allowing for the encoding of PS1-format audio and video data. +psxavenc is an open-source command-line tool for encoding audio and video data +into formats commonly used on the original PlayStation. + +## Installation + +Requirements: + +- a recent version of FFmpeg libraries (`libavformat`, `libavcodec`, + `libavutil`, `libswresample`, `libswscale`); +- a recent version of Meson. + +```shell +$ meson setup build +$ cd build +$ ninja install +``` ## Usage @@ -8,21 +24,49 @@ Run `psxavenc`. ### Examples -Converting a sound file to a 22050Hz SPU sample: +Rescale a video file to ≤320x240 pixels (preserving aspect ratio) and encode it +into a 15fps .STR file with 37800 Hz 4-bit stereo audio and 2352-byte sectors, +meant to be played at 2x CD-ROM speed: ```shell -$ psxavenc -f 22050 -t spu -c 1 -b 4 sound_file.ogg sound_file.snd +$ psxavenc -t str2cd -f 37800 -b 4 -c 2 -s 320x240 -r 15 -x 2 in.mp4 out.str ``` -## Installation - -Requirements: - -* a recent version of FFmpeg, -* a recent version of Meson. +Convert a mono audio sample to 22050 Hz raw SPU-ADPCM data: ```shell -$ meson setup build -$ cd build -$ ninja install +$ psxavenc -t spu -f 22050 in.ogg out.snd ``` + +Convert a stereo audio file to a 44100 Hz interleaved .VAG file with 8192-byte +interleave and loop flags set at the end of each interleaved chunk: + +```shell +$ psxavenc -t vagi -f 44100 -c 2 -L -i 8192 in.wav out.vag +``` + +## Supported formats + +| Format | Audio | Channels | Video | Sector size | +| :------- | :--------------- | :------- | :---- | :---------- | +| `xa` | XA-ADPCM | 1 or 2 | None | 2336 bytes | +| `xacd` | XA-ADPCM | 1 or 2 | None | 2352 bytes | +| `spu` | SPU-ADPCM | 1 | None | | +| `spui` | SPU-ADPCM | Any | None | Any | +| `vag` | SPU-ADPCM | 1 | None | | +| `vagi` | SPU-ADPCM | Any | None | Any | +| `str2` | None or XA-ADPCM | 1 or 2 | BS v2 | 2336 bytes | +| `str2cd` | None or XA-ADPCM | 1 or 2 | BS v2 | 2352 bytes | +| `sbs2` | None | | BS v2 | Any | + +Notes: + +- `vag` and `vagi` are similar to `spu` and `spui` respectively, but add a .VAG + header at the beginning of the file. The header is always 48 bytes long for + `vag` files, while in the case of `vagi` files it is padded to the size + specified using the `-a` option (2048 bytes by default). Note that `vagi` + files with more than 2 channels and/or alignment other than 2048 bytes are not + standardized. +- The `sbs2` format (used in some System 573 games) is simply a series of + concatenated BS v2 frames, each padded to the size specified by the `-a` + option, with no additional headers besides the BS frame headers. diff --git a/libpsxav/adpcm.c b/libpsxav/adpcm.c index 2baa393..631f17f 100644 --- a/libpsxav/adpcm.c +++ b/libpsxav/adpcm.c @@ -283,7 +283,7 @@ int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_stat return (((j + 17) / 18) * xa_sector_size); } -int psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length) { +void psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length) { if (output_length >= 2336) { output[output_length - 2352 + 0x12] |= 0x80; output[output_length - 2352 + 0x18] |= 0x80; @@ -301,7 +301,6 @@ int psx_audio_xa_encode_simple(psx_audio_xa_settings_t settings, int16_t* sample int psx_audio_spu_encode(psx_audio_encoder_channel_state_t *state, int16_t* samples, int sample_count, int pitch, uint8_t *output) { uint8_t prebuf[28]; uint8_t *buffer = output; - uint8_t *data; for (int i = 0; i < sample_count; i += 28, buffer += 16) { buffer[0] = encode(state, samples + i * pitch, sample_count - i, pitch, prebuf, 0, 1, SPU_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS); diff --git a/libpsxav/libpsxav.h b/libpsxav/libpsxav.h index 5725df9..5558200 100644 --- a/libpsxav/libpsxav.h +++ b/libpsxav/libpsxav.h @@ -72,7 +72,7 @@ int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_stat int psx_audio_xa_encode_simple(psx_audio_xa_settings_t settings, int16_t* samples, int sample_count, uint8_t *output); int psx_audio_spu_encode(psx_audio_encoder_channel_state_t *state, int16_t* samples, int sample_count, int pitch, uint8_t *output); int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *output, int loop_start); -int psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length); +void psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length); void psx_audio_spu_set_flag_at_sample(uint8_t* spu_data, int sample_pos, int flag); // cdrom.c diff --git a/meson.build b/meson.build index 59d113c..f50c3ef 100644 --- a/meson.build +++ b/meson.build @@ -1,5 +1,7 @@ project('psxavenc', 'c', default_options: ['c_std=c11']) +libm_dep = meson.get_compiler('c').find_library('m') + ffmpeg = [ dependency('libavformat'), dependency('libavcodec'), @@ -21,4 +23,4 @@ executable('psxavenc', [ 'psxavenc/filefmt.c', 'psxavenc/mdec.c', 'psxavenc/psxavenc.c' -], dependencies: [ffmpeg, libpsxav_dep], install: true) +], dependencies: [libm_dep, ffmpeg, libpsxav_dep], install: true) diff --git a/psxavenc/decoding.c b/psxavenc/decoding.c index 3c573d6..ac84e1c 100644 --- a/psxavenc/decoding.c +++ b/psxavenc/decoding.c @@ -24,8 +24,6 @@ freely, subject to the following restrictions: #include "common.h" -static void poll_av_packet(settings_t *settings, AVPacket *packet); - int decode_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, AVPacket *packet) { int ret; @@ -271,7 +269,7 @@ static void poll_av_packet_video(settings_t *settings, AVPacket *packet) }; if (decode_frame(av->video_codec_context, av->frame, &frame_size, packet)) { - if (!av->frame->width || !av->frame->height || !av->frame->data) { + if (!av->frame->width || !av->frame->height || !av->frame->data[0]) { return; } diff --git a/psxavenc/mdec.c b/psxavenc/mdec.c index 9822efc..b7f7157 100644 --- a/psxavenc/mdec.c +++ b/psxavenc/mdec.c @@ -37,117 +37,117 @@ const struct { uint16_t u_hword_neg; } huffman_lookup[] = { // Fuck this Huffman tree in particular --GM - 2,0x3,MAKE_HUFFMAN_PAIR(0,1), - 3,0x3,MAKE_HUFFMAN_PAIR(1,1), - 4,0x4,MAKE_HUFFMAN_PAIR(0,2), - 4,0x5,MAKE_HUFFMAN_PAIR(2,1), - 5,0x05,MAKE_HUFFMAN_PAIR(0,3), - 5,0x06,MAKE_HUFFMAN_PAIR(4,1), - 5,0x07,MAKE_HUFFMAN_PAIR(3,1), - 6,0x04,MAKE_HUFFMAN_PAIR(7,1), - 6,0x05,MAKE_HUFFMAN_PAIR(6,1), - 6,0x06,MAKE_HUFFMAN_PAIR(1,2), - 6,0x07,MAKE_HUFFMAN_PAIR(5,1), - 7,0x04,MAKE_HUFFMAN_PAIR(2,2), - 7,0x05,MAKE_HUFFMAN_PAIR(9,1), - 7,0x06,MAKE_HUFFMAN_PAIR(0,4), - 7,0x07,MAKE_HUFFMAN_PAIR(8,1), - 8,0x20,MAKE_HUFFMAN_PAIR(13,1), - 8,0x21,MAKE_HUFFMAN_PAIR(0,6), - 8,0x22,MAKE_HUFFMAN_PAIR(12,1), - 8,0x23,MAKE_HUFFMAN_PAIR(11,1), - 8,0x24,MAKE_HUFFMAN_PAIR(3,2), - 8,0x25,MAKE_HUFFMAN_PAIR(1,3), - 8,0x26,MAKE_HUFFMAN_PAIR(0,5), - 8,0x27,MAKE_HUFFMAN_PAIR(10,1), - 10,0x008,MAKE_HUFFMAN_PAIR(16,1), - 10,0x009,MAKE_HUFFMAN_PAIR(5,2), - 10,0x00A,MAKE_HUFFMAN_PAIR(0,7), - 10,0x00B,MAKE_HUFFMAN_PAIR(2,3), - 10,0x00C,MAKE_HUFFMAN_PAIR(1,4), - 10,0x00D,MAKE_HUFFMAN_PAIR(15,1), - 10,0x00E,MAKE_HUFFMAN_PAIR(14,1), - 10,0x00F,MAKE_HUFFMAN_PAIR(4,2), - 12,0x010,MAKE_HUFFMAN_PAIR(0,11), - 12,0x011,MAKE_HUFFMAN_PAIR(8,2), - 12,0x012,MAKE_HUFFMAN_PAIR(4,3), - 12,0x013,MAKE_HUFFMAN_PAIR(0,10), - 12,0x014,MAKE_HUFFMAN_PAIR(2,4), - 12,0x015,MAKE_HUFFMAN_PAIR(7,2), - 12,0x016,MAKE_HUFFMAN_PAIR(21,1), - 12,0x017,MAKE_HUFFMAN_PAIR(20,1), - 12,0x018,MAKE_HUFFMAN_PAIR(0,9), - 12,0x019,MAKE_HUFFMAN_PAIR(19,1), - 12,0x01A,MAKE_HUFFMAN_PAIR(18,1), - 12,0x01B,MAKE_HUFFMAN_PAIR(1,5), - 12,0x01C,MAKE_HUFFMAN_PAIR(3,3), - 12,0x01D,MAKE_HUFFMAN_PAIR(0,8), - 12,0x01E,MAKE_HUFFMAN_PAIR(6,2), - 12,0x01F,MAKE_HUFFMAN_PAIR(17,1), - 13,0x0010,MAKE_HUFFMAN_PAIR(10,2), - 13,0x0011,MAKE_HUFFMAN_PAIR(9,2), - 13,0x0012,MAKE_HUFFMAN_PAIR(5,3), - 13,0x0013,MAKE_HUFFMAN_PAIR(3,4), - 13,0x0014,MAKE_HUFFMAN_PAIR(2,5), - 13,0x0015,MAKE_HUFFMAN_PAIR(1,7), - 13,0x0016,MAKE_HUFFMAN_PAIR(1,6), - 13,0x0017,MAKE_HUFFMAN_PAIR(0,15), - 13,0x0018,MAKE_HUFFMAN_PAIR(0,14), - 13,0x0019,MAKE_HUFFMAN_PAIR(0,13), - 13,0x001A,MAKE_HUFFMAN_PAIR(0,12), - 13,0x001B,MAKE_HUFFMAN_PAIR(26,1), - 13,0x001C,MAKE_HUFFMAN_PAIR(25,1), - 13,0x001D,MAKE_HUFFMAN_PAIR(24,1), - 13,0x001E,MAKE_HUFFMAN_PAIR(23,1), - 13,0x001F,MAKE_HUFFMAN_PAIR(22,1), - 14,0x0010,MAKE_HUFFMAN_PAIR(0,31), - 14,0x0011,MAKE_HUFFMAN_PAIR(0,30), - 14,0x0012,MAKE_HUFFMAN_PAIR(0,29), - 14,0x0013,MAKE_HUFFMAN_PAIR(0,28), - 14,0x0014,MAKE_HUFFMAN_PAIR(0,27), - 14,0x0015,MAKE_HUFFMAN_PAIR(0,26), - 14,0x0016,MAKE_HUFFMAN_PAIR(0,25), - 14,0x0017,MAKE_HUFFMAN_PAIR(0,24), - 14,0x0018,MAKE_HUFFMAN_PAIR(0,23), - 14,0x0019,MAKE_HUFFMAN_PAIR(0,22), - 14,0x001A,MAKE_HUFFMAN_PAIR(0,21), - 14,0x001B,MAKE_HUFFMAN_PAIR(0,20), - 14,0x001C,MAKE_HUFFMAN_PAIR(0,19), - 14,0x001D,MAKE_HUFFMAN_PAIR(0,18), - 14,0x001E,MAKE_HUFFMAN_PAIR(0,17), - 14,0x001F,MAKE_HUFFMAN_PAIR(0,16), - 15,0x0010,MAKE_HUFFMAN_PAIR(0,40), - 15,0x0011,MAKE_HUFFMAN_PAIR(0,39), - 15,0x0012,MAKE_HUFFMAN_PAIR(0,38), - 15,0x0013,MAKE_HUFFMAN_PAIR(0,37), - 15,0x0014,MAKE_HUFFMAN_PAIR(0,36), - 15,0x0015,MAKE_HUFFMAN_PAIR(0,35), - 15,0x0016,MAKE_HUFFMAN_PAIR(0,34), - 15,0x0017,MAKE_HUFFMAN_PAIR(0,33), - 15,0x0018,MAKE_HUFFMAN_PAIR(0,32), - 15,0x0019,MAKE_HUFFMAN_PAIR(1,14), - 15,0x001A,MAKE_HUFFMAN_PAIR(1,13), - 15,0x001B,MAKE_HUFFMAN_PAIR(1,12), - 15,0x001C,MAKE_HUFFMAN_PAIR(1,11), - 15,0x001D,MAKE_HUFFMAN_PAIR(1,10), - 15,0x001E,MAKE_HUFFMAN_PAIR(1,9), - 15,0x001F,MAKE_HUFFMAN_PAIR(1,8), - 16,0x0010,MAKE_HUFFMAN_PAIR(1,18), - 16,0x0011,MAKE_HUFFMAN_PAIR(1,17), - 16,0x0012,MAKE_HUFFMAN_PAIR(1,16), - 16,0x0013,MAKE_HUFFMAN_PAIR(1,15), - 16,0x0014,MAKE_HUFFMAN_PAIR(6,3), - 16,0x0015,MAKE_HUFFMAN_PAIR(16,2), - 16,0x0016,MAKE_HUFFMAN_PAIR(15,2), - 16,0x0017,MAKE_HUFFMAN_PAIR(14,2), - 16,0x0018,MAKE_HUFFMAN_PAIR(13,2), - 16,0x0019,MAKE_HUFFMAN_PAIR(12,2), - 16,0x001A,MAKE_HUFFMAN_PAIR(11,2), - 16,0x001B,MAKE_HUFFMAN_PAIR(31,1), - 16,0x001C,MAKE_HUFFMAN_PAIR(30,1), - 16,0x001D,MAKE_HUFFMAN_PAIR(29,1), - 16,0x001E,MAKE_HUFFMAN_PAIR(28,1), - 16,0x001F,MAKE_HUFFMAN_PAIR(27,1), + {2,0x3,MAKE_HUFFMAN_PAIR(0,1)}, + {3,0x3,MAKE_HUFFMAN_PAIR(1,1)}, + {4,0x4,MAKE_HUFFMAN_PAIR(0,2)}, + {4,0x5,MAKE_HUFFMAN_PAIR(2,1)}, + {5,0x05,MAKE_HUFFMAN_PAIR(0,3)}, + {5,0x06,MAKE_HUFFMAN_PAIR(4,1)}, + {5,0x07,MAKE_HUFFMAN_PAIR(3,1)}, + {6,0x04,MAKE_HUFFMAN_PAIR(7,1)}, + {6,0x05,MAKE_HUFFMAN_PAIR(6,1)}, + {6,0x06,MAKE_HUFFMAN_PAIR(1,2)}, + {6,0x07,MAKE_HUFFMAN_PAIR(5,1)}, + {7,0x04,MAKE_HUFFMAN_PAIR(2,2)}, + {7,0x05,MAKE_HUFFMAN_PAIR(9,1)}, + {7,0x06,MAKE_HUFFMAN_PAIR(0,4)}, + {7,0x07,MAKE_HUFFMAN_PAIR(8,1)}, + {8,0x20,MAKE_HUFFMAN_PAIR(13,1)}, + {8,0x21,MAKE_HUFFMAN_PAIR(0,6)}, + {8,0x22,MAKE_HUFFMAN_PAIR(12,1)}, + {8,0x23,MAKE_HUFFMAN_PAIR(11,1)}, + {8,0x24,MAKE_HUFFMAN_PAIR(3,2)}, + {8,0x25,MAKE_HUFFMAN_PAIR(1,3)}, + {8,0x26,MAKE_HUFFMAN_PAIR(0,5)}, + {8,0x27,MAKE_HUFFMAN_PAIR(10,1)}, + {10,0x008,MAKE_HUFFMAN_PAIR(16,1)}, + {10,0x009,MAKE_HUFFMAN_PAIR(5,2)}, + {10,0x00A,MAKE_HUFFMAN_PAIR(0,7)}, + {10,0x00B,MAKE_HUFFMAN_PAIR(2,3)}, + {10,0x00C,MAKE_HUFFMAN_PAIR(1,4)}, + {10,0x00D,MAKE_HUFFMAN_PAIR(15,1)}, + {10,0x00E,MAKE_HUFFMAN_PAIR(14,1)}, + {10,0x00F,MAKE_HUFFMAN_PAIR(4,2)}, + {12,0x010,MAKE_HUFFMAN_PAIR(0,11)}, + {12,0x011,MAKE_HUFFMAN_PAIR(8,2)}, + {12,0x012,MAKE_HUFFMAN_PAIR(4,3)}, + {12,0x013,MAKE_HUFFMAN_PAIR(0,10)}, + {12,0x014,MAKE_HUFFMAN_PAIR(2,4)}, + {12,0x015,MAKE_HUFFMAN_PAIR(7,2)}, + {12,0x016,MAKE_HUFFMAN_PAIR(21,1)}, + {12,0x017,MAKE_HUFFMAN_PAIR(20,1)}, + {12,0x018,MAKE_HUFFMAN_PAIR(0,9)}, + {12,0x019,MAKE_HUFFMAN_PAIR(19,1)}, + {12,0x01A,MAKE_HUFFMAN_PAIR(18,1)}, + {12,0x01B,MAKE_HUFFMAN_PAIR(1,5)}, + {12,0x01C,MAKE_HUFFMAN_PAIR(3,3)}, + {12,0x01D,MAKE_HUFFMAN_PAIR(0,8)}, + {12,0x01E,MAKE_HUFFMAN_PAIR(6,2)}, + {12,0x01F,MAKE_HUFFMAN_PAIR(17,1)}, + {13,0x0010,MAKE_HUFFMAN_PAIR(10,2)}, + {13,0x0011,MAKE_HUFFMAN_PAIR(9,2)}, + {13,0x0012,MAKE_HUFFMAN_PAIR(5,3)}, + {13,0x0013,MAKE_HUFFMAN_PAIR(3,4)}, + {13,0x0014,MAKE_HUFFMAN_PAIR(2,5)}, + {13,0x0015,MAKE_HUFFMAN_PAIR(1,7)}, + {13,0x0016,MAKE_HUFFMAN_PAIR(1,6)}, + {13,0x0017,MAKE_HUFFMAN_PAIR(0,15)}, + {13,0x0018,MAKE_HUFFMAN_PAIR(0,14)}, + {13,0x0019,MAKE_HUFFMAN_PAIR(0,13)}, + {13,0x001A,MAKE_HUFFMAN_PAIR(0,12)}, + {13,0x001B,MAKE_HUFFMAN_PAIR(26,1)}, + {13,0x001C,MAKE_HUFFMAN_PAIR(25,1)}, + {13,0x001D,MAKE_HUFFMAN_PAIR(24,1)}, + {13,0x001E,MAKE_HUFFMAN_PAIR(23,1)}, + {13,0x001F,MAKE_HUFFMAN_PAIR(22,1)}, + {14,0x0010,MAKE_HUFFMAN_PAIR(0,31)}, + {14,0x0011,MAKE_HUFFMAN_PAIR(0,30)}, + {14,0x0012,MAKE_HUFFMAN_PAIR(0,29)}, + {14,0x0013,MAKE_HUFFMAN_PAIR(0,28)}, + {14,0x0014,MAKE_HUFFMAN_PAIR(0,27)}, + {14,0x0015,MAKE_HUFFMAN_PAIR(0,26)}, + {14,0x0016,MAKE_HUFFMAN_PAIR(0,25)}, + {14,0x0017,MAKE_HUFFMAN_PAIR(0,24)}, + {14,0x0018,MAKE_HUFFMAN_PAIR(0,23)}, + {14,0x0019,MAKE_HUFFMAN_PAIR(0,22)}, + {14,0x001A,MAKE_HUFFMAN_PAIR(0,21)}, + {14,0x001B,MAKE_HUFFMAN_PAIR(0,20)}, + {14,0x001C,MAKE_HUFFMAN_PAIR(0,19)}, + {14,0x001D,MAKE_HUFFMAN_PAIR(0,18)}, + {14,0x001E,MAKE_HUFFMAN_PAIR(0,17)}, + {14,0x001F,MAKE_HUFFMAN_PAIR(0,16)}, + {15,0x0010,MAKE_HUFFMAN_PAIR(0,40)}, + {15,0x0011,MAKE_HUFFMAN_PAIR(0,39)}, + {15,0x0012,MAKE_HUFFMAN_PAIR(0,38)}, + {15,0x0013,MAKE_HUFFMAN_PAIR(0,37)}, + {15,0x0014,MAKE_HUFFMAN_PAIR(0,36)}, + {15,0x0015,MAKE_HUFFMAN_PAIR(0,35)}, + {15,0x0016,MAKE_HUFFMAN_PAIR(0,34)}, + {15,0x0017,MAKE_HUFFMAN_PAIR(0,33)}, + {15,0x0018,MAKE_HUFFMAN_PAIR(0,32)}, + {15,0x0019,MAKE_HUFFMAN_PAIR(1,14)}, + {15,0x001A,MAKE_HUFFMAN_PAIR(1,13)}, + {15,0x001B,MAKE_HUFFMAN_PAIR(1,12)}, + {15,0x001C,MAKE_HUFFMAN_PAIR(1,11)}, + {15,0x001D,MAKE_HUFFMAN_PAIR(1,10)}, + {15,0x001E,MAKE_HUFFMAN_PAIR(1,9)}, + {15,0x001F,MAKE_HUFFMAN_PAIR(1,8)}, + {16,0x0010,MAKE_HUFFMAN_PAIR(1,18)}, + {16,0x0011,MAKE_HUFFMAN_PAIR(1,17)}, + {16,0x0012,MAKE_HUFFMAN_PAIR(1,16)}, + {16,0x0013,MAKE_HUFFMAN_PAIR(1,15)}, + {16,0x0014,MAKE_HUFFMAN_PAIR(6,3)}, + {16,0x0015,MAKE_HUFFMAN_PAIR(16,2)}, + {16,0x0016,MAKE_HUFFMAN_PAIR(15,2)}, + {16,0x0017,MAKE_HUFFMAN_PAIR(14,2)}, + {16,0x0018,MAKE_HUFFMAN_PAIR(13,2)}, + {16,0x0019,MAKE_HUFFMAN_PAIR(12,2)}, + {16,0x001A,MAKE_HUFFMAN_PAIR(11,2)}, + {16,0x001B,MAKE_HUFFMAN_PAIR(31,1)}, + {16,0x001C,MAKE_HUFFMAN_PAIR(30,1)}, + {16,0x001D,MAKE_HUFFMAN_PAIR(29,1)}, + {16,0x001E,MAKE_HUFFMAN_PAIR(28,1)}, + {16,0x001F,MAKE_HUFFMAN_PAIR(27,1)}, }; #undef MAKE_HUFFMAN_PAIR @@ -249,7 +249,7 @@ static bool encode_bits(vid_encoder_state_t *state, int bits, uint32_t val) uint32_t outval = val; outval >>= bits - state->bits_left; assert(outval < (1<<16)); - uint16_t old_value = state->bits_value; + //uint16_t old_value = state->bits_value; assert((state->bits_value & outval) == 0); state->bits_value |= (uint16_t)outval; //fprintf(stderr, "trunc %2d %2d %08X %04X %04X\n", bits, state->bits_left, val, old_value, state->bits_value); @@ -269,7 +269,7 @@ static bool encode_bits(vid_encoder_state_t *state, int bits, uint32_t val) uint32_t outval = val; outval <<= state->bits_left - bits; assert(outval < (1<<16)); - uint16_t old_value = state->bits_value; + //uint16_t old_value = state->bits_value; assert((state->bits_value & outval) == 0); state->bits_value |= (uint16_t)outval; //fprintf(stderr, "plop %2d %2d %08X %04X %04X\n", bits, state->bits_left, val, state->bits_value);