From 60cbaca2b294d2044478927729a01dfd8963efd2 Mon Sep 17 00:00:00 2001 From: spicyjpeg Date: Wed, 5 Mar 2025 01:32:35 +0100 Subject: [PATCH] Fix str subheader corruption, update README --- README.md | 93 +++++++++++++++++++++++--------- libpsxav/adpcm.c | 68 ++++++++++++++++-------- libpsxav/cdrom.c | 21 ++++---- libpsxav/libpsxav.h | 26 +++++++-- psxavenc/filefmt.c | 127 +++++++++++++++++++++----------------------- psxavenc/mdec.c | 10 ++-- 6 files changed, 215 insertions(+), 130 deletions(-) diff --git a/README.md b/README.md index 411c595..12f3218 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ # psxavenc psxavenc is an open-source command-line tool for encoding audio and video data -into formats commonly used on the original PlayStation. +into formats commonly used on the original PlayStation and PlayStation 2. ## Installation @@ -14,22 +14,22 @@ Requirements: ```shell $ meson setup build -$ cd build -$ ninja install +$ meson compile -C build +$ meson install -C build ``` ## Usage -Run `psxavenc`. +Run `psxavenc -h`. ### Examples Rescale a video file to ≤320x240 pixels (preserving aspect ratio) and encode it -into a 15fps .STR file with 37800 Hz 4-bit stereo audio and 2352-byte sectors, -meant to be played at 2x CD-ROM speed: +into a 15 fps version 2 .str file with 37800 Hz 4-bit stereo audio and 2352-byte +sectors, meant to be played at 2x CD-ROM speed: ```shell -$ psxavenc -t str2cd -f 37800 -b 4 -c 2 -s 320x240 -r 15 -x 2 in.mp4 out.str +$ psxavenc -t strcd -v v2 -f 37800 -b 4 -c 2 -s 320x240 -r 15 -x 2 in.mp4 out.str ``` Convert a mono audio sample to 22050 Hz raw SPU-ADPCM data: @@ -38,36 +38,77 @@ Convert a mono audio sample to 22050 Hz raw SPU-ADPCM data: $ psxavenc -t spu -f 22050 in.ogg out.snd ``` -Convert a stereo audio file to a 44100 Hz interleaved .VAG file with 8192-byte +Convert a stereo audio file to a 44100 Hz interleaved .vag file with 2048-byte interleave and loop flags set at the end of each interleaved chunk: ```shell -$ psxavenc -t vagi -f 44100 -c 2 -L -i 8192 in.wav out.vag +$ psxavenc -t vagi -f 44100 -c 2 -L -i 2048 in.wav out.vag ``` -## Supported formats +## Supported output formats -| Format | Audio | Channels | Video | Sector size | -| :------- | :--------------- | :------- | :---- | :---------- | -| `xa` | XA-ADPCM | 1 or 2 | None | 2336 bytes | -| `xacd` | XA-ADPCM | 1 or 2 | None | 2352 bytes | -| `spu` | SPU-ADPCM | 1 | None | | -| `spui` | SPU-ADPCM | Any | None | Any | -| `vag` | SPU-ADPCM | 1 | None | | -| `vagi` | SPU-ADPCM | Any | None | Any | -| `str2` | None or XA-ADPCM | 1 or 2 | BS v2 | 2336 bytes | -| `str2cd` | None or XA-ADPCM | 1 or 2 | BS v2 | 2352 bytes | -| `str2v` | None | | BS v2 | | -| `sbs2` | None | | BS v2 | Any | +The output format must be set using the `-t` option. + +| Format | Audio codec | Audio channels | Video codec | Sector size | +| :------- | :------------------- | :------------- | :------------ | :---------- | +| `xa` | XA-ADPCM | 1 or 2 | | 2336 bytes | +| `xacd` | XA-ADPCM | 1 or 2 | | 2352 bytes | +| `spu` | SPU-ADPCM | 1 | | | +| `vag` | SPU-ADPCM | 1 | | | +| `spui` | SPU-ADPCM | Any | | | +| `vagi` | SPU-ADPCM | Any | | | +| `str` | XA-ADPCM (optional) | 1 or 2 | BS v2/v3/v3dc | 2336 bytes | +| `strcd` | XA-ADPCM (optional) | 1 or 2 | BS v2/v3/v3dc | 2352 bytes | +| `strspu` | SPU-ADPCM (optional) | Any | BS v2/v3/v3dc | 2048 bytes | +| `strv` | | | BS v2/v3/v3dc | 2048 bytes | +| `sbs` | | | BS v2/v3/v3dc | | Notes: -- `vag` and `vagi` are similar to `spu` and `spui` respectively, but add a .VAG +- The `xa`, `xacd`, `str` and `strcd` formats will output files with 2336- or + 2352-byte CD-ROM sectors, containing the appropriate CD-XA subheaders and + dummy EDC/ECC placeholders in addition to the actual sector data. Such files + **cannot be added to a disc image as-is** and must instead be parsed by an + authoring tool capable of rebuilding the EDC/ECC data (as it is dependent on + the file's absolute location on the disc) and generating a Mode 2 CD-ROM image + with "native" 2352-byte sectors. +- Similarly, files generated with `-t xa` or `-t xacd` **must be interleaved** + **with other XA-ADPCM tracks or empty padding using an external tool** before + they can be played. +- `vag` and `vagi` are similar to `spu` and `spui` respectively, but add a .vag header at the beginning of the file. The header is always 48 bytes long for `vag` files, while in the case of `vagi` files it is padded to the size specified using the `-a` option (2048 bytes by default). Note that `vagi` files with more than 2 channels and/or alignment other than 2048 bytes are not standardized. -- The `sbs2` format (used in some System 573 games) is simply a series of - concatenated BS v2 frames, each padded to the size specified by the `-a` - option, with no additional headers besides the BS frame headers. +- The `strspu` format encodes the input file's audio track as a series of custom + .str chunks (type ID `0x0001` by default) holding interleaved SPU-ADPCM data + in the same format as `spui`, rather than XA-ADPCM. As .str chunks do not + require custom XA subheaders, a file with standard 2048-byte sectors that does + not need any special handling will be generated. +- The `strv` format disables audio altogether and is equivalent to `strspu` on + an input file with no audio track. +- The `sbs` format (used in some System 573 games) consists of a series of + concatenated BS frames, each padded to the size specified by the `-a` option + (the default setting is 8192 bytes), with no additional headers besides the BS + frame headers. + +## Supported video codecs + +All formats with a video track (`str`, `strcd`, `strspu`, `strv` and `sbs`) can +use any of the codecs listed below. The codec can be set using the `-v` option. + +| Codec | Supported by | Typ. decoder CPU usage | +| :------------- | :-------------------- | :--------------------- | +| `v2` (default) | All players/decoders | Medium | +| `v3` | Most players/decoders | High | +| `v3dc` | Few players/decoders | High | + +Notes: + +- The `v3dc` format is a variant of `v3` with a slightly better compression + ratio, however most tools and playback libraries (including FFmpeg, jPSXdec + and earlier versions of Sony's own BS decoder) are unable to decode it + correctly; its use is thus highly discouraged. Refer to + [the psx-spx section on DC coefficient encoding](https://psx-spx.consoledev.net/cdromfileformats/#dc-v3) + for more details. diff --git a/libpsxav/adpcm.c b/libpsxav/adpcm.c index ecc7264..2d5dbc5 100644 --- a/libpsxav/adpcm.c +++ b/libpsxav/adpcm.c @@ -228,14 +228,9 @@ static inline void psx_audio_xa_sync_subheader_copy(psx_cdrom_sector_mode2_t *bu memcpy(buffer->subheader + 1, buffer->subheader, sizeof(psx_cdrom_sector_xa_subheader_t)); } -static void psx_audio_xa_encode_init_sector(psx_cdrom_sector_mode2_t *buffer, psx_audio_xa_settings_t settings) { - if (settings.format == PSX_AUDIO_XA_FORMAT_XACD) { - memset(buffer, 0, PSX_CDROM_SECTOR_SIZE); - memset(buffer->sync + 1, 0xFF, 10); - buffer->header.mode = 0x02; - } else { - memset(buffer->subheader, 0, PSX_CDROM_SECTOR_SIZE - 16); - } +static void psx_audio_xa_encode_init_sector(psx_cdrom_sector_mode2_t *buffer, int lba, psx_audio_xa_settings_t settings) { + if (settings.format == PSX_AUDIO_XA_FORMAT_XACD) + psx_cdrom_init_sector((psx_cdrom_sector_t *)buffer, lba, PSX_CDROM_SECTOR_TYPE_MODE2_FORM2); buffer->subheader[0].file = settings.file_number; buffer->subheader[0].channel = settings.channel_number & PSX_CDROM_SECTOR_XA_CHANNEL_MASK; @@ -243,28 +238,46 @@ static void psx_audio_xa_encode_init_sector(psx_cdrom_sector_mode2_t *buffer, ps PSX_CDROM_SECTOR_XA_SUBMODE_AUDIO | PSX_CDROM_SECTOR_XA_SUBMODE_FORM2 | PSX_CDROM_SECTOR_XA_SUBMODE_RT; - buffer->subheader[0].coding = - (settings.stereo ? PSX_CDROM_SECTOR_XA_CODING_STEREO : PSX_CDROM_SECTOR_XA_CODING_MONO) - | (settings.frequency >= PSX_AUDIO_XA_FREQ_DOUBLE ? PSX_CDROM_SECTOR_XA_CODING_FREQ_DOUBLE : PSX_CDROM_SECTOR_XA_CODING_FREQ_SINGLE) - | (settings.bits_per_sample >= 8 ? PSX_CDROM_SECTOR_XA_CODING_BITS_8 : PSX_CDROM_SECTOR_XA_CODING_BITS_4); + + if (settings.stereo) + buffer->subheader[0].coding |= PSX_CDROM_SECTOR_XA_CODING_STEREO; + else + buffer->subheader[0].coding |= PSX_CDROM_SECTOR_XA_CODING_MONO; + if (settings.frequency == PSX_AUDIO_XA_FREQ_DOUBLE) + buffer->subheader[0].coding |= PSX_CDROM_SECTOR_XA_CODING_FREQ_DOUBLE; + else + buffer->subheader[0].coding |= PSX_CDROM_SECTOR_XA_CODING_FREQ_SINGLE; + if (settings.bits_per_sample == 8) + buffer->subheader[0].coding |= PSX_CDROM_SECTOR_XA_CODING_BITS_8; + else + buffer->subheader[0].coding |= PSX_CDROM_SECTOR_XA_CODING_BITS_4; + psx_audio_xa_sync_subheader_copy(buffer); } -int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_state_t *state, int16_t* samples, int sample_count, uint8_t *output) { +int psx_audio_xa_encode( + psx_audio_xa_settings_t settings, + psx_audio_encoder_state_t *state, + int16_t* samples, + int sample_count, + int lba, + uint8_t *output +) { int sample_jump = (settings.bits_per_sample == 8) ? 112 : 224; int i, j; - int xa_sector_size = settings.format == PSX_AUDIO_XA_FORMAT_XA ? 2336 : 2352; - int xa_offset = 2352 - xa_sector_size; + int xa_sector_size = psx_audio_xa_get_buffer_size_per_sector(settings); + int xa_offset = PSX_CDROM_SECTOR_SIZE - xa_sector_size; uint8_t init_sector = 1; - if (settings.stereo) { sample_count <<= 1; } + if (settings.stereo) + sample_count *= 2; for (i = 0, j = 0; i < sample_count || ((j % 18) != 0); i += sample_jump, j++) { psx_cdrom_sector_mode2_t *sector_data = (psx_cdrom_sector_mode2_t*) (output + ((j/18) * xa_sector_size) - xa_offset); uint8_t *block_data = sector_data->data + ((j%18) * 0x80); if (init_sector) { - psx_audio_xa_encode_init_sector(sector_data, settings); + psx_audio_xa_encode_init_sector(sector_data, lba, settings); init_sector = 0; } @@ -276,6 +289,7 @@ int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_stat if ((j+1)%18 == 0) { psx_cdrom_calculate_checksums((psx_cdrom_sector_t *)sector_data, PSX_CDROM_SECTOR_TYPE_MODE2_FORM2); init_sector = 1; + lba++; } } @@ -284,21 +298,33 @@ int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_stat void psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length) { if (output_length >= 2336) { - psx_cdrom_sector_mode2_t *sector = (psx_cdrom_sector_mode2_t*) &output[output_length - 2352]; + psx_cdrom_sector_mode2_t *sector = (psx_cdrom_sector_mode2_t*) &output[output_length - PSX_CDROM_SECTOR_SIZE]; sector->subheader[0].submode |= PSX_CDROM_SECTOR_XA_SUBMODE_EOF; psx_audio_xa_sync_subheader_copy(sector); } } -int psx_audio_xa_encode_simple(psx_audio_xa_settings_t settings, int16_t* samples, int sample_count, uint8_t *output) { +int psx_audio_xa_encode_simple( + psx_audio_xa_settings_t settings, + int16_t* samples, + int sample_count, + int lba, + uint8_t *output +) { psx_audio_encoder_state_t state; memset(&state, 0, sizeof(psx_audio_encoder_state_t)); - int length = psx_audio_xa_encode(settings, &state, samples, sample_count, output); + int length = psx_audio_xa_encode(settings, &state, samples, sample_count, lba, output); psx_audio_xa_encode_finalize(settings, output, length); return length; } -int psx_audio_spu_encode(psx_audio_encoder_channel_state_t *state, int16_t* samples, int sample_count, int pitch, uint8_t *output) { +int psx_audio_spu_encode( + psx_audio_encoder_channel_state_t *state, + int16_t* samples, + int sample_count, + int pitch, + uint8_t *output +) { uint8_t prebuf[PSX_AUDIO_SPU_SAMPLES_PER_BLOCK]; uint8_t *buffer = output; diff --git a/libpsxav/cdrom.c b/libpsxav/cdrom.c index ac9de32..509e525 100644 --- a/libpsxav/cdrom.c +++ b/libpsxav/cdrom.c @@ -42,11 +42,21 @@ static uint32_t edc_crc32(uint8_t *data, int length) { #define TO_BCD(x) ((x) + ((x) / 10) * 6) +void psx_cdrom_init_xa_subheader(psx_cdrom_sector_xa_subheader_t *subheader, psx_cdrom_sector_type_t type) { + memset(subheader, 0, sizeof(psx_cdrom_sector_xa_subheader_t) * 2); + subheader->submode = PSX_CDROM_SECTOR_XA_SUBMODE_DATA; + + if (type == PSX_CDROM_SECTOR_TYPE_MODE2_FORM2) + subheader->submode |= PSX_CDROM_SECTOR_XA_SUBMODE_FORM2; + + memcpy(subheader + 1, subheader, sizeof(psx_cdrom_sector_xa_subheader_t)); +} + void psx_cdrom_init_sector(psx_cdrom_sector_t *sector, int lba, psx_cdrom_sector_type_t type) { // Sync sequence memset(sector->mode1.sync + 1, 0xff, 10); sector->mode1.sync[0x0] = 0x00; - sector->mode1.sync[0xb] = 0x00; + sector->mode1.sync[0xB] = 0x00; // Timecode lba += 150; @@ -59,14 +69,7 @@ void psx_cdrom_init_sector(psx_cdrom_sector_t *sector, int lba, psx_cdrom_sector sector->mode1.header.mode = 0x01; } else { sector->mode2.header.mode = 0x02; - - memset(sector->mode2.subheader, 0, sizeof(psx_cdrom_sector_xa_subheader_t)); - sector->mode2.subheader[0].submode = PSX_CDROM_SECTOR_XA_SUBMODE_DATA; - - if (type == PSX_CDROM_SECTOR_TYPE_MODE2_FORM2) - sector->mode2.subheader[0].submode |= PSX_CDROM_SECTOR_XA_SUBMODE_FORM2; - - memcpy(sector->mode2.subheader + 1, sector->mode2.subheader, sizeof(psx_cdrom_sector_xa_subheader_t)); + psx_cdrom_init_xa_subheader(sector->mode2.subheader, type); } } diff --git a/libpsxav/libpsxav.h b/libpsxav/libpsxav.h index 1b8aaa1..0d9d171 100644 --- a/libpsxav/libpsxav.h +++ b/libpsxav/libpsxav.h @@ -72,9 +72,28 @@ uint32_t psx_audio_spu_get_buffer_size(int sample_count); uint32_t psx_audio_xa_get_buffer_size_per_sector(psx_audio_xa_settings_t settings); uint32_t psx_audio_xa_get_samples_per_sector(psx_audio_xa_settings_t settings); uint32_t psx_audio_xa_get_sector_interleave(psx_audio_xa_settings_t settings); -int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_state_t *state, int16_t* samples, int sample_count, uint8_t *output); -int psx_audio_xa_encode_simple(psx_audio_xa_settings_t settings, int16_t* samples, int sample_count, uint8_t *output); -int psx_audio_spu_encode(psx_audio_encoder_channel_state_t *state, int16_t* samples, int sample_count, int pitch, uint8_t *output); +int psx_audio_xa_encode( + psx_audio_xa_settings_t settings, + psx_audio_encoder_state_t *state, + int16_t* samples, + int sample_count, + int lba, + uint8_t *output +); +int psx_audio_xa_encode_simple( + psx_audio_xa_settings_t settings, + int16_t* samples, + int sample_count, + int lba, + uint8_t *output +); +int psx_audio_spu_encode( + psx_audio_encoder_channel_state_t *state, + int16_t* samples, + int sample_count, + int pitch, + uint8_t *output +); int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *output, int loop_start); void psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length); @@ -149,5 +168,6 @@ typedef enum { PSX_CDROM_SECTOR_TYPE_MODE2_FORM2 } psx_cdrom_sector_type_t; +void psx_cdrom_init_xa_subheader(psx_cdrom_sector_xa_subheader_t *subheader, psx_cdrom_sector_type_t type); void psx_cdrom_init_sector(psx_cdrom_sector_t *sector, int lba, psx_cdrom_sector_type_t type); void psx_cdrom_calculate_checksums(psx_cdrom_sector_t *sector, psx_cdrom_sector_type_t type); diff --git a/psxavenc/filefmt.c b/psxavenc/filefmt.c index 3f6ce45..1150364 100644 --- a/psxavenc/filefmt.c +++ b/psxavenc/filefmt.c @@ -68,15 +68,24 @@ static psx_audio_xa_settings_t args_to_libpsxav_xa_audio(const args_t *args) { return settings; }; -static void init_sector_buffer_video(const args_t *args, psx_cdrom_sector_mode2_t *sector, int lba) { - psx_cdrom_init_sector((psx_cdrom_sector_t *)sector, lba, PSX_CDROM_SECTOR_TYPE_MODE2_FORM1); +static void init_sector_buffer_video(const args_t *args, uint8_t *sector, int lba) { + psx_cdrom_sector_xa_subheader_t *subheader = NULL; - sector->subheader[0].file = args->audio_xa_file; - sector->subheader[0].channel = args->audio_xa_channel & PSX_CDROM_SECTOR_XA_CHANNEL_MASK; - sector->subheader[0].submode = PSX_CDROM_SECTOR_XA_SUBMODE_DATA | PSX_CDROM_SECTOR_XA_SUBMODE_RT; - sector->subheader[0].coding = 0; + if (args->format == FORMAT_STRCD) { + psx_cdrom_init_sector((psx_cdrom_sector_t *)sector, lba, PSX_CDROM_SECTOR_TYPE_MODE2_FORM1); + subheader = ((psx_cdrom_sector_t *)sector)->mode2.subheader; + } else if (args->format == FORMAT_STR) { + subheader = (psx_cdrom_sector_xa_subheader_t *)sector; + } - memcpy(sector->subheader + 1, sector->subheader, sizeof(psx_cdrom_sector_xa_subheader_t)); + if (subheader != NULL) { + subheader->file = args->audio_xa_file; + subheader->channel = args->audio_xa_channel & PSX_CDROM_SECTOR_XA_CHANNEL_MASK; + subheader->submode = PSX_CDROM_SECTOR_XA_SUBMODE_DATA | PSX_CDROM_SECTOR_XA_SUBMODE_RT; + subheader->coding = 0; + + memcpy(subheader + 1, subheader, sizeof(psx_cdrom_sector_xa_subheader_t)); + } } #define VAG_HEADER_SIZE 0x30 @@ -147,35 +156,29 @@ void encode_file_xa(const args_t *args, decoder_t *decoder, FILE *output) { psx_audio_encoder_state_t audio_state; memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t)); - for (int j = 0; ensure_av_data(decoder, audio_samples_per_sector * args->audio_channels, 0); j++) { + int sector_count = 0; + + for (; ensure_av_data(decoder, audio_samples_per_sector * args->audio_channels, 0); sector_count++) { int samples_length = decoder->audio_sample_count / args->audio_channels; if (samples_length > audio_samples_per_sector) samples_length = audio_samples_per_sector; - uint8_t buffer[PSX_CDROM_SECTOR_SIZE]; + uint8_t sector[PSX_CDROM_SECTOR_SIZE]; int length = psx_audio_xa_encode( xa_settings, &audio_state, decoder->audio_samples, samples_length, - buffer + sector_count, + sector ); if (decoder->end_of_input) - psx_audio_xa_encode_finalize(xa_settings, buffer, length); - - if (args->format == FORMAT_XACD) { - int t = j + 75*2; - - // Put the time in - buffer[0x00C] = ((t/75/60)%10)|(((t/75/60)/10)<<4); - buffer[0x00D] = (((t/75)%60)%10)|((((t/75)%60)/10)<<4); - buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4); - } + psx_audio_xa_encode_finalize(xa_settings, sector, length); retire_av_data(decoder, samples_length * args->audio_channels, 0); - fwrite(buffer, length, 1, output); + fwrite(sector, length, 1, output); time_t t = get_elapsed_time(); @@ -183,8 +186,8 @@ void encode_file_xa(const args_t *args, decoder_t *decoder, FILE *output) { fprintf( stderr, "\rLBA: %6d | Encoding speed: %5.2fx", - j, - (double)(j * audio_samples_per_sector) / (double)(args->audio_frequency * t) + sector_count, + (double)(sector_count * audio_samples_per_sector) / (double)(args->audio_frequency * t) ); } } @@ -199,14 +202,14 @@ void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output) { if (args->format == FORMAT_VAG) fseek(output, VAG_HEADER_SIZE, SEEK_SET); - uint8_t buffer[PSX_AUDIO_SPU_BLOCK_SIZE]; + uint8_t block[PSX_AUDIO_SPU_BLOCK_SIZE]; int block_count = 0; if (!(args->flags & FLAG_SPU_NO_LEADING_DUMMY)) { // Insert leading silent block - memset(buffer, 0, PSX_AUDIO_SPU_BLOCK_SIZE); + memset(block, 0, PSX_AUDIO_SPU_BLOCK_SIZE); - fwrite(buffer, PSX_AUDIO_SPU_BLOCK_SIZE, 1, output); + fwrite(block, PSX_AUDIO_SPU_BLOCK_SIZE, 1, output); block_count++; } @@ -226,16 +229,16 @@ void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output) { decoder->audio_samples, samples_length, 1, - buffer + block ); if (block_count == loop_start_block) - buffer[1] |= PSX_AUDIO_SPU_LOOP_START; + block[1] |= PSX_AUDIO_SPU_LOOP_START; if ((args->flags & FLAG_SPU_LOOP_END) && decoder->end_of_input) - buffer[1] |= PSX_AUDIO_SPU_LOOP_REPEAT; + block[1] |= PSX_AUDIO_SPU_LOOP_REPEAT; retire_av_data(decoder, samples_length, 0); - fwrite(buffer, length, 1, output); + fwrite(block, length, 1, output); time_t t = get_elapsed_time(); @@ -251,10 +254,10 @@ void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output) { if (!(args->flags & FLAG_SPU_LOOP_END)) { // Insert trailing looping block - memset(buffer, 0, PSX_AUDIO_SPU_BLOCK_SIZE); - buffer[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END; + memset(block, 0, PSX_AUDIO_SPU_BLOCK_SIZE); + block[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END; - fwrite(buffer, PSX_AUDIO_SPU_BLOCK_SIZE, 1, output); + fwrite(block, PSX_AUDIO_SPU_BLOCK_SIZE, 1, output); block_count++; } @@ -279,8 +282,8 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) { // NOTE: since the interleaved .vag format is not standardized, some tools // (such as vgmstream) will not properly play files with interleave < 2048, // alignment != 2048 or channels != 2. - int buffer_size = args->audio_interleave * args->audio_channels + args->alignment - 1; - buffer_size -= buffer_size % args->alignment; + int chunk_size = args->audio_interleave * args->audio_channels + args->alignment - 1; + chunk_size -= chunk_size % args->alignment; int header_size = VAG_HEADER_SIZE + args->alignment - 1; header_size -= header_size % args->alignment; @@ -292,7 +295,7 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) { psx_audio_encoder_channel_state_t *audio_state = malloc(audio_state_size); memset(audio_state, 0, audio_state_size); - uint8_t *buffer = malloc(buffer_size); + uint8_t *chunk = malloc(chunk_size); int chunk_count = 0; for (; ensure_av_data(decoder, audio_samples_per_chunk * args->audio_channels, 0); chunk_count++) { @@ -301,26 +304,26 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) { if (samples_length > audio_samples_per_chunk) samples_length = audio_samples_per_chunk; - memset(buffer, 0, buffer_size); - uint8_t *buffer_ptr = buffer; + memset(chunk, 0, chunk_size); + uint8_t *chunk_ptr = chunk; // Insert leading silent block if (chunk_count == 0 && !(args->flags & FLAG_SPU_NO_LEADING_DUMMY)) { - buffer_ptr += PSX_AUDIO_SPU_BLOCK_SIZE; + chunk_ptr += PSX_AUDIO_SPU_BLOCK_SIZE; samples_length -= PSX_AUDIO_SPU_SAMPLES_PER_BLOCK; } - for (int ch = 0; ch < args->audio_channels; ch++, buffer_ptr += args->audio_interleave) { + for (int ch = 0; ch < args->audio_channels; ch++, chunk_ptr += args->audio_interleave) { int length = psx_audio_spu_encode( audio_state + ch, decoder->audio_samples + ch, samples_length, args->audio_channels, - buffer_ptr + chunk_ptr ); if (length > 0) { - uint8_t *last_block = buffer_ptr + length - PSX_AUDIO_SPU_BLOCK_SIZE; + uint8_t *last_block = chunk_ptr + length - PSX_AUDIO_SPU_BLOCK_SIZE; if (args->flags & FLAG_SPU_LOOP_END) { last_block[1] = PSX_AUDIO_SPU_LOOP_REPEAT; @@ -335,7 +338,7 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) { } retire_av_data(decoder, samples_length * args->audio_channels, 0); - fwrite(buffer, buffer_size, 1, output); + fwrite(chunk, chunk_size, 1, output); time_t t = get_elapsed_time(); @@ -351,7 +354,7 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) { } free(audio_state); - free(buffer); + free(chunk); if (args->format == FORMAT_VAGI) { uint8_t *header = malloc(header_size); @@ -421,28 +424,31 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) { if (frames_needed < 2) frames_needed = 2; - for (int j = 0; !decoder->end_of_input || encoder.state.frame_data_offset < encoder.state.frame_max_size; j++) { + int sector_count = 0; + + for (; !decoder->end_of_input || encoder.state.frame_data_offset < encoder.state.frame_max_size; sector_count++) { ensure_av_data(decoder, audio_samples_per_sector * args->audio_channels, frames_needed); - uint8_t buffer[PSX_CDROM_SECTOR_SIZE]; + uint8_t sector[PSX_CDROM_SECTOR_SIZE]; bool is_video_sector; if (args->flags & FLAG_STR_TRAILING_AUDIO) - is_video_sector = (j % interleave) < video_sectors_per_block; + is_video_sector = (sector_count % interleave) < video_sectors_per_block; else - is_video_sector = (j % interleave) > 0; + is_video_sector = (sector_count % interleave) > 0; if (is_video_sector) { - init_sector_buffer_video(args, (psx_cdrom_sector_mode2_t*)buffer, j); + init_sector_buffer_video(args, sector, sector_count); int frames_used = encode_sector_str( &encoder, args->format, args->str_video_id, decoder->video_frames, - buffer + sector ); + psx_cdrom_calculate_checksums((psx_cdrom_sector_t *)sector, PSX_CDROM_SECTOR_TYPE_MODE2_FORM1); retire_av_data(decoder, 0, frames_used); } else { int samples_length = decoder->audio_sample_count / args->audio_channels; @@ -460,28 +466,17 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) { &audio_state, decoder->audio_samples, samples_length, - buffer + sector_count, + sector ); if (decoder->end_of_input) - psx_audio_xa_encode_finalize(xa_settings, buffer, length); + psx_audio_xa_encode_finalize(xa_settings, sector, length); retire_av_data(decoder, samples_length * args->audio_channels, 0); } - if (args->format == FORMAT_STRCD) { - int t = j + 75*2; - - // Put the time in - buffer[0x00C] = ((t/75/60)%10)|(((t/75/60)/10)<<4); - buffer[0x00D] = (((t/75)%60)%10)|((((t/75)%60)/10)<<4); - buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4); - } - - if (is_video_sector) - psx_cdrom_calculate_checksums((psx_cdrom_sector_t *)buffer, PSX_CDROM_SECTOR_TYPE_MODE2_FORM1); - - fwrite(buffer + PSX_CDROM_SECTOR_SIZE - sector_size, sector_size, 1, output); + fwrite(sector, sector_size, 1, output); time_t t = get_elapsed_time(); @@ -490,7 +485,7 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) { stderr, "\rFrame: %4d | LBA: %6d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx", encoder.state.frame_index, - j, + sector_count, (double)encoder.state.quant_scale_sum / (double)encoder.state.frame_index, (double)(encoder.state.frame_index * args->str_fps_den) / (double)(t * args->str_fps_num) ); diff --git a/psxavenc/mdec.c b/psxavenc/mdec.c index 3587ce1..602e4cc 100644 --- a/psxavenc/mdec.c +++ b/psxavenc/mdec.c @@ -288,11 +288,11 @@ static void init_dct_data(mdec_encoder_state_t *state, bs_codec_t codec) { uint32_t base_value = dc_c_huffman_tree[i].c_value; int pos_offset = 1 << dc_bits; - int neg_offset = 1 - (1 << (dc_bits + 1)); + int neg_offset = pos_offset * 2 - 1; for (int j = 0; j < (1 << dc_bits); j++) { int pos = (j + pos_offset) & 0x1FF; - int neg = (j + neg_offset) & 0x1FF; + int neg = (j - neg_offset) & 0x1FF; state->dc_huffman_map[(INDEX_CR << 9) | pos] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (1 << dc_bits) | j); state->dc_huffman_map[(INDEX_CR << 9) | neg] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (0 << dc_bits) | j); @@ -306,11 +306,11 @@ static void init_dct_data(mdec_encoder_state_t *state, bs_codec_t codec) { uint32_t base_value = dc_y_huffman_tree[i].c_value; int pos_offset = 1 << dc_bits; - int neg_offset = 1 - (1 << (dc_bits + 1)); + int neg_offset = pos_offset * 2 - 1; for (int j = 0; j < (1 << dc_bits); j++) { int pos = (j + pos_offset) & 0x1FF; - int neg = (j + neg_offset) & 0x1FF; + int neg = (j - neg_offset) & 0x1FF; state->dc_huffman_map[(INDEX_Y << 9) | pos] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (1 << dc_bits) | j); state->dc_huffman_map[(INDEX_Y << 9) | neg] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (0 << dc_bits) | j); @@ -657,7 +657,7 @@ void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame) { // Attempt encoding the frame at the maximum quality. If the result is too // large, increase the quantization scale and try again. // TODO: if a frame encoded at scale N is too large but the same frame - // encoded at scale N-1 leaves a significant amount of free space, attempt + // encoded at scale N+1 leaves a significant amount of free space, attempt // compressing at scale N but optimizing coefficients away until it fits // (like the old algorithm did) for (