Merge pull request #1 from spicyjpeg/old-pr

.STR conversion bugfix, support for generating mono and interleaved .VAG files
This commit is contained in:
Adrian Siekierka 2023-05-15 18:52:34 +02:00 committed by GitHub
commit 2482bc14db
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 1308 additions and 704 deletions

View File

@ -1,6 +1,22 @@
# psxavenc # psxavenc
psxavenc is an open-source command-line tool allowing for the encoding of PS1-format audio and video data. psxavenc is an open-source command-line tool for encoding audio and video data
into formats commonly used on the original PlayStation.
## Installation
Requirements:
- a recent version of FFmpeg libraries (`libavformat`, `libavcodec`,
`libavutil`, `libswresample`, `libswscale`);
- a recent version of Meson.
```shell
$ meson setup build
$ cd build
$ ninja install
```
## Usage ## Usage
@ -8,21 +24,49 @@ Run `psxavenc`.
### Examples ### Examples
Converting a sound file to a 22050Hz SPU sample: Rescale a video file to ≤320x240 pixels (preserving aspect ratio) and encode it
into a 15fps .STR file with 37800 Hz 4-bit stereo audio and 2352-byte sectors,
meant to be played at 2x CD-ROM speed:
```shell ```shell
$ psxavenc -f 22050 -t spu -c 1 -b 4 sound_file.ogg sound_file.snd $ psxavenc -t str2cd -f 37800 -b 4 -c 2 -s 320x240 -r 15 -x 2 in.mp4 out.str
``` ```
## Installation Convert a mono audio sample to 22050 Hz raw SPU-ADPCM data:
Requirements:
* a recent version of FFmpeg,
* a recent version of Meson.
```shell ```shell
$ meson setup build $ psxavenc -t spu -f 22050 in.ogg out.snd
$ cd build
$ ninja install
``` ```
Convert a stereo audio file to a 44100 Hz interleaved .VAG file with 8192-byte
interleave and loop flags set at the end of each interleaved chunk:
```shell
$ psxavenc -t vagi -f 44100 -c 2 -L -i 8192 in.wav out.vag
```
## Supported formats
| Format | Audio | Channels | Video | Sector size |
| :------- | :--------------- | :------- | :---- | :---------- |
| `xa` | XA-ADPCM | 1 or 2 | None | 2336 bytes |
| `xacd` | XA-ADPCM | 1 or 2 | None | 2352 bytes |
| `spu` | SPU-ADPCM | 1 | None | |
| `spui` | SPU-ADPCM | Any | None | Any |
| `vag` | SPU-ADPCM | 1 | None | |
| `vagi` | SPU-ADPCM | Any | None | Any |
| `str2` | None or XA-ADPCM | 1 or 2 | BS v2 | 2336 bytes |
| `str2cd` | None or XA-ADPCM | 1 or 2 | BS v2 | 2352 bytes |
| `sbs2` | None | | BS v2 | Any |
Notes:
- `vag` and `vagi` are similar to `spu` and `spui` respectively, but add a .VAG
header at the beginning of the file. The header is always 48 bytes long for
`vag` files, while in the case of `vagi` files it is padded to the size
specified using the `-a` option (2048 bytes by default). Note that `vagi`
files with more than 2 channels and/or alignment other than 2048 bytes are not
standardized.
- The `sbs2` format (used in some System 573 games) is simply a series of
concatenated BS v2 frames, each padded to the size specified by the `-a`
option, with no additional headers besides the BS frame headers.

View File

@ -3,6 +3,7 @@ libpsxav: MDEC video + SPU/XA-ADPCM audio library
Copyright (c) 2019, 2020 Adrian "asie" Siekierka Copyright (c) 2019, 2020 Adrian "asie" Siekierka
Copyright (c) 2019 Ben "GreaseMonkey" Russell Copyright (c) 2019 Ben "GreaseMonkey" Russell
Copyright (c) 2023 spicyjpeg
This software is provided 'as-is', without any express or implied This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages warranty. In no event will the authors be held liable for any damages
@ -25,6 +26,9 @@ freely, subject to the following restrictions:
#include <string.h> #include <string.h>
#include "libpsxav.h" #include "libpsxav.h"
#define SHIFT_RANGE_4BPS 12
#define SHIFT_RANGE_8BPS 8
#define ADPCM_FILTER_COUNT 5 #define ADPCM_FILTER_COUNT 5
#define XA_ADPCM_FILTER_COUNT 4 #define XA_ADPCM_FILTER_COUNT 4
#define SPU_ADPCM_FILTER_COUNT 5 #define SPU_ADPCM_FILTER_COUNT 5
@ -32,7 +36,7 @@ freely, subject to the following restrictions:
static const int16_t filter_k1[ADPCM_FILTER_COUNT] = {0, 60, 115, 98, 122}; static const int16_t filter_k1[ADPCM_FILTER_COUNT] = {0, 60, 115, 98, 122};
static const int16_t filter_k2[ADPCM_FILTER_COUNT] = {0, 0, -52, -55, -60}; static const int16_t filter_k2[ADPCM_FILTER_COUNT] = {0, 0, -52, -55, -60};
static int find_min_shift(const psx_audio_encoder_channel_state_t *state, int16_t *samples, int pitch, int filter) { static int find_min_shift(const psx_audio_encoder_channel_state_t *state, int16_t *samples, int sample_limit, int pitch, int filter, int shift_range) {
// Assumption made: // Assumption made:
// //
// There is value in shifting right one step further to allow the nibbles to clip. // There is value in shifting right one step further to allow the nibbles to clip.
@ -51,7 +55,7 @@ static int find_min_shift(const psx_audio_encoder_channel_state_t *state, int16_
int32_t s_min = 0; int32_t s_min = 0;
int32_t s_max = 0; int32_t s_max = 0;
for (int i = 0; i < 28; i++) { for (int i = 0; i < 28; i++) {
int32_t raw_sample = samples[i * pitch]; int32_t raw_sample = (i >= sample_limit) ? 0 : samples[i * pitch];
int32_t previous_values = (k1*prev1 + k2*prev2 + (1<<5))>>6; int32_t previous_values = (k1*prev1 + k2*prev2 + (1<<5))>>6;
int32_t sample = raw_sample - previous_values; int32_t sample = raw_sample - previous_values;
if (sample < s_min) { s_min = sample; } if (sample < s_min) { s_min = sample; }
@ -59,16 +63,18 @@ static int find_min_shift(const psx_audio_encoder_channel_state_t *state, int16_
prev2 = prev1; prev2 = prev1;
prev1 = raw_sample; prev1 = raw_sample;
} }
while(right_shift < 12 && (s_max>>right_shift) > +0x7) { right_shift += 1; }; while(right_shift < shift_range && (s_max>>right_shift) > (+0x7FFF >> shift_range)) { right_shift += 1; };
while(right_shift < 12 && (s_min>>right_shift) < -0x8) { right_shift += 1; }; while(right_shift < shift_range && (s_min>>right_shift) < (-0x8000 >> shift_range)) { right_shift += 1; };
int min_shift = 12 - right_shift; int min_shift = shift_range - right_shift;
assert(0 <= min_shift && min_shift <= 12); assert(0 <= min_shift && min_shift <= shift_range);
return min_shift; return min_shift;
} }
static uint8_t attempt_to_encode_nibbles(psx_audio_encoder_channel_state_t *outstate, const psx_audio_encoder_channel_state_t *instate, int16_t *samples, int sample_limit, int pitch, uint8_t *data, int data_shift, int data_pitch, int filter, int sample_shift) { static uint8_t attempt_to_encode(psx_audio_encoder_channel_state_t *outstate, const psx_audio_encoder_channel_state_t *instate, int16_t *samples, int sample_limit, int pitch, uint8_t *data, int data_shift, int data_pitch, int filter, int sample_shift, int shift_range) {
uint8_t nondata_mask = ~(0x0F << data_shift); uint8_t sample_mask = 0xFFFF >> shift_range;
uint8_t nondata_mask = ~(sample_mask << data_shift);
int min_shift = sample_shift; int min_shift = sample_shift;
int k1 = filter_k1[filter]; int k1 = filter_k1[filter];
int k2 = filter_k2[filter]; int k2 = filter_k2[filter];
@ -82,17 +88,17 @@ static uint8_t attempt_to_encode_nibbles(psx_audio_encoder_channel_state_t *outs
outstate->mse = 0; outstate->mse = 0;
for (int i = 0; i < 28; i++) { for (int i = 0; i < 28; i++) {
int32_t sample = ((i * pitch) >= sample_limit ? 0 : samples[i * pitch]) + outstate->qerr; int32_t sample = ((i >= sample_limit) ? 0 : samples[i * pitch]) + outstate->qerr;
int32_t previous_values = (k1*outstate->prev1 + k2*outstate->prev2 + (1<<5))>>6; int32_t previous_values = (k1*outstate->prev1 + k2*outstate->prev2 + (1<<5))>>6;
int32_t sample_enc = sample - previous_values; int32_t sample_enc = sample - previous_values;
sample_enc <<= min_shift; sample_enc <<= min_shift;
sample_enc += (1<<(12-1)); sample_enc += (1<<(shift_range-1));
sample_enc >>= 12; sample_enc >>= shift_range;
if(sample_enc < -8) { sample_enc = -8; } if(sample_enc < (-0x8000 >> shift_range)) { sample_enc = -0x8000 >> shift_range; }
if(sample_enc > +7) { sample_enc = +7; } if(sample_enc > (+0x7FFF >> shift_range)) { sample_enc = +0x7FFF >> shift_range; }
sample_enc &= 0xF; sample_enc &= sample_mask;
int32_t sample_dec = (int16_t) ((sample_enc&0xF) << 12); int32_t sample_dec = (int16_t) ((sample_enc & sample_mask) << shift_range);
sample_dec >>= min_shift; sample_dec >>= min_shift;
sample_dec += previous_values; sample_dec += previous_values;
if (sample_dec > +0x7FFF) { sample_dec = +0x7FFF; } if (sample_dec > +0x7FFF) { sample_dec = +0x7FFF; }
@ -114,14 +120,14 @@ static uint8_t attempt_to_encode_nibbles(psx_audio_encoder_channel_state_t *outs
return hdr; return hdr;
} }
static uint8_t encode_nibbles(psx_audio_encoder_channel_state_t *state, int16_t *samples, int sample_limit, int pitch, uint8_t *data, int data_shift, int data_pitch, int filter_count) { static uint8_t encode(psx_audio_encoder_channel_state_t *state, int16_t *samples, int sample_limit, int pitch, uint8_t *data, int data_shift, int data_pitch, int filter_count, int shift_range) {
psx_audio_encoder_channel_state_t proposed; psx_audio_encoder_channel_state_t proposed;
int64_t best_mse = ((int64_t)1<<(int64_t)50); int64_t best_mse = ((int64_t)1<<(int64_t)50);
int best_filter = 0; int best_filter = 0;
int best_sample_shift = 0; int best_sample_shift = 0;
for (int filter = 0; filter < filter_count; filter++) { for (int filter = 0; filter < filter_count; filter++) {
int true_min_shift = find_min_shift(state, samples, pitch, filter); int true_min_shift = find_min_shift(state, samples, sample_limit, pitch, filter, shift_range);
// Testing has shown that the optimal shift can be off the true minimum shift // Testing has shown that the optimal shift can be off the true minimum shift
// by 1 in *either* direction. // by 1 in *either* direction.
@ -129,15 +135,15 @@ static uint8_t encode_nibbles(psx_audio_encoder_channel_state_t *state, int16_t
int min_shift = true_min_shift - 1; int min_shift = true_min_shift - 1;
int max_shift = true_min_shift + 1; int max_shift = true_min_shift + 1;
if (min_shift < 0) { min_shift = 0; } if (min_shift < 0) { min_shift = 0; }
if (max_shift > 12) { max_shift = 12; } if (max_shift > shift_range) { max_shift = shift_range; }
for (int sample_shift = min_shift; sample_shift <= max_shift; sample_shift++) { for (int sample_shift = min_shift; sample_shift <= max_shift; sample_shift++) {
// ignore header here // ignore header here
attempt_to_encode_nibbles( attempt_to_encode(
&proposed, state, &proposed, state,
samples, sample_limit, pitch, samples, sample_limit, pitch,
data, data_shift, data_pitch, data, data_shift, data_pitch,
filter, sample_shift); filter, sample_shift, shift_range);
if (best_mse > proposed.mse) { if (best_mse > proposed.mse) {
best_mse = proposed.mse; best_mse = proposed.mse;
@ -148,46 +154,46 @@ static uint8_t encode_nibbles(psx_audio_encoder_channel_state_t *state, int16_t
} }
// now go with the encoder // now go with the encoder
return attempt_to_encode_nibbles( return attempt_to_encode(
state, state, state, state,
samples, sample_limit, pitch, samples, sample_limit, pitch,
data, data_shift, data_pitch, data, data_shift, data_pitch,
best_filter, best_sample_shift); best_filter, best_sample_shift, shift_range);
} }
static void encode_block_xa(int16_t *audio_samples, int audio_samples_limit, uint8_t *data, psx_audio_xa_settings_t settings, psx_audio_encoder_state_t *state) { static void encode_block_xa(int16_t *audio_samples, int audio_samples_limit, uint8_t *data, psx_audio_xa_settings_t settings, psx_audio_encoder_state_t *state) {
if (settings.bits_per_sample == 4) { if (settings.bits_per_sample == 4) {
if (settings.stereo) { if (settings.stereo) {
data[0] = encode_nibbles(&(state->left), audio_samples, audio_samples_limit, 2, data + 0x10, 0, 4, XA_ADPCM_FILTER_COUNT); data[0] = encode(&(state->left), audio_samples, audio_samples_limit, 2, data + 0x10, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[1] = encode_nibbles(&(state->right), audio_samples + 1, audio_samples_limit - 1, 2, data + 0x10, 4, 4, XA_ADPCM_FILTER_COUNT); data[1] = encode(&(state->right), audio_samples + 1, audio_samples_limit, 2, data + 0x10, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[2] = encode_nibbles(&(state->left), audio_samples + 56, audio_samples_limit - 56, 2, data + 0x11, 0, 4, XA_ADPCM_FILTER_COUNT); data[2] = encode(&(state->left), audio_samples + 56, audio_samples_limit - 28, 2, data + 0x11, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[3] = encode_nibbles(&(state->right), audio_samples + 56 + 1, audio_samples_limit - 56 - 1, 2, data + 0x11, 4, 4, XA_ADPCM_FILTER_COUNT); data[3] = encode(&(state->right), audio_samples + 56 + 1, audio_samples_limit - 28, 2, data + 0x11, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[8] = encode_nibbles(&(state->left), audio_samples + 56*2, audio_samples_limit - 56*2, 2, data + 0x12, 0, 4, XA_ADPCM_FILTER_COUNT); data[8] = encode(&(state->left), audio_samples + 56*2, audio_samples_limit - 28*2, 2, data + 0x12, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[9] = encode_nibbles(&(state->right), audio_samples + 56*2 + 1, audio_samples_limit - 56*2 - 1, 2, data + 0x12, 4, 4, XA_ADPCM_FILTER_COUNT); data[9] = encode(&(state->right), audio_samples + 56*2 + 1, audio_samples_limit - 28*2, 2, data + 0x12, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[10] = encode_nibbles(&(state->left), audio_samples + 56*3, audio_samples_limit - 56*3, 2, data + 0x13, 0, 4, XA_ADPCM_FILTER_COUNT); data[10] = encode(&(state->left), audio_samples + 56*3, audio_samples_limit - 28*3, 2, data + 0x13, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[11] = encode_nibbles(&(state->right), audio_samples + 56*3 + 1, audio_samples_limit - 56*3 - 1, 2, data + 0x13, 4, 4, XA_ADPCM_FILTER_COUNT); data[11] = encode(&(state->right), audio_samples + 56*3 + 1, audio_samples_limit - 28*3, 2, data + 0x13, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
} else { } else {
data[0] = encode_nibbles(&(state->left), audio_samples, audio_samples_limit, 1, data + 0x10, 0, 4, XA_ADPCM_FILTER_COUNT); data[0] = encode(&(state->left), audio_samples, audio_samples_limit, 1, data + 0x10, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[1] = encode_nibbles(&(state->right), audio_samples + 28, audio_samples_limit - 28, 1, data + 0x10, 4, 4, XA_ADPCM_FILTER_COUNT); data[1] = encode(&(state->left), audio_samples + 28, audio_samples_limit - 28, 1, data + 0x10, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[2] = encode_nibbles(&(state->left), audio_samples + 28*2, audio_samples_limit - 28*2, 1, data + 0x11, 0, 4, XA_ADPCM_FILTER_COUNT); data[2] = encode(&(state->left), audio_samples + 28*2, audio_samples_limit - 28*2, 1, data + 0x11, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[3] = encode_nibbles(&(state->right), audio_samples + 28*3, audio_samples_limit - 28*3, 1, data + 0x11, 4, 4, XA_ADPCM_FILTER_COUNT); data[3] = encode(&(state->left), audio_samples + 28*3, audio_samples_limit - 28*3, 1, data + 0x11, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[8] = encode_nibbles(&(state->left), audio_samples + 28*4, audio_samples_limit - 28*4, 1, data + 0x12, 0, 4, XA_ADPCM_FILTER_COUNT); data[8] = encode(&(state->left), audio_samples + 28*4, audio_samples_limit - 28*4, 1, data + 0x12, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[9] = encode_nibbles(&(state->right), audio_samples + 28*5, audio_samples_limit - 28*5, 1, data + 0x12, 4, 4, XA_ADPCM_FILTER_COUNT); data[9] = encode(&(state->left), audio_samples + 28*5, audio_samples_limit - 28*5, 1, data + 0x12, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[10] = encode_nibbles(&(state->left), audio_samples + 28*6, audio_samples_limit - 28*6, 1, data + 0x13, 0, 4, XA_ADPCM_FILTER_COUNT); data[10] = encode(&(state->left), audio_samples + 28*6, audio_samples_limit - 28*6, 1, data + 0x13, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[11] = encode_nibbles(&(state->right), audio_samples + 28*7, audio_samples_limit - 28*7, 1, data + 0x13, 4, 4, XA_ADPCM_FILTER_COUNT); data[11] = encode(&(state->left), audio_samples + 28*7, audio_samples_limit - 28*7, 1, data + 0x13, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
} }
} else { } else {
/* if (settings->stereo) { if (settings.stereo) {
data[0] = encode_bytes(audio_samples, 2, data + 0x10); data[0] = encode(&(state->left), audio_samples, audio_samples_limit, 2, data + 0x10, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS);
data[1] = encode_bytes(audio_samples + 1, 2, data + 0x11); data[1] = encode(&(state->right), audio_samples + 1, audio_samples_limit, 2, data + 0x11, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS);
data[2] = encode_bytes(audio_samples + 56, 2, data + 0x12); data[2] = encode(&(state->left), audio_samples + 56, audio_samples_limit - 28, 2, data + 0x12, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS);
data[3] = encode_bytes(audio_samples + 57, 2, data + 0x13); data[3] = encode(&(state->right), audio_samples + 56 + 1, audio_samples_limit - 28, 2, data + 0x13, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS);
} else { } else {
data[0] = encode_bytes(audio_samples, 1, data + 0x10); data[0] = encode(&(state->left), audio_samples, audio_samples_limit, 1, data + 0x10, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS);
data[1] = encode_bytes(audio_samples + 28, 1, data + 0x11); data[1] = encode(&(state->left), audio_samples + 28, audio_samples_limit - 28, 1, data + 0x11, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS);
data[2] = encode_bytes(audio_samples + 56, 1, data + 0x12); data[2] = encode(&(state->left), audio_samples + 28*2, audio_samples_limit - 28*2, 1, data + 0x12, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS);
data[3] = encode_bytes(audio_samples + 84, 1, data + 0x13); data[3] = encode(&(state->left), audio_samples + 28*3, audio_samples_limit - 28*3, 1, data + 0x13, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS);
} */ }
} }
} }
@ -218,6 +224,14 @@ uint32_t psx_audio_spu_get_samples_per_block(void) {
return 28; return 28;
} }
uint32_t psx_audio_xa_get_sector_interleave(psx_audio_xa_settings_t settings) {
// 1/2 interleave for 37800 Hz 8-bit stereo at 1x speed
int interleave = settings.stereo ? 2 : 4;
if (settings.frequency == PSX_AUDIO_XA_FREQ_SINGLE) { interleave <<= 1; }
if (settings.bits_per_sample == 4) { interleave <<= 1; }
return interleave;
}
static void psx_audio_xa_encode_init_sector(uint8_t *buffer, psx_audio_xa_settings_t settings) { static void psx_audio_xa_encode_init_sector(uint8_t *buffer, psx_audio_xa_settings_t settings) {
if (settings.format == PSX_AUDIO_XA_FORMAT_XACD) { if (settings.format == PSX_AUDIO_XA_FORMAT_XACD) {
memset(buffer, 0, 2352); memset(buffer, 0, 2352);
@ -269,7 +283,7 @@ int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_stat
return (((j + 17) / 18) * xa_sector_size); return (((j + 17) / 18) * xa_sector_size);
} }
int psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length) { void psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length) {
if (output_length >= 2336) { if (output_length >= 2336) {
output[output_length - 2352 + 0x12] |= 0x80; output[output_length - 2352 + 0x12] |= 0x80;
output[output_length - 2352 + 0x18] |= 0x80; output[output_length - 2352 + 0x18] |= 0x80;
@ -284,13 +298,12 @@ int psx_audio_xa_encode_simple(psx_audio_xa_settings_t settings, int16_t* sample
return length; return length;
} }
int psx_audio_spu_encode(psx_audio_encoder_state_t *state, int16_t* samples, int sample_count, uint8_t *output) { int psx_audio_spu_encode(psx_audio_encoder_channel_state_t *state, int16_t* samples, int sample_count, int pitch, uint8_t *output) {
uint8_t prebuf[28]; uint8_t prebuf[28];
uint8_t *buffer = output; uint8_t *buffer = output;
uint8_t *data;
for (int i = 0; i < sample_count; i += 28, buffer += 16) { for (int i = 0; i < sample_count; i += 28, buffer += 16) {
buffer[0] = encode_nibbles(&(state->left), samples + i, sample_count - i, 1, prebuf, 0, 1, SPU_ADPCM_FILTER_COUNT); buffer[0] = encode(state, samples + i * pitch, sample_count - i, pitch, prebuf, 0, 1, SPU_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
buffer[1] = 0; buffer[1] = 0;
for (int j = 0; j < 28; j+=2) { for (int j = 0; j < 28; j+=2) {
@ -302,20 +315,22 @@ int psx_audio_spu_encode(psx_audio_encoder_state_t *state, int16_t* samples, int
} }
int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *output, int loop_start) { int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *output, int loop_start) {
psx_audio_encoder_state_t state; psx_audio_encoder_channel_state_t state;
memset(&state, 0, sizeof(psx_audio_encoder_state_t)); memset(&state, 0, sizeof(psx_audio_encoder_channel_state_t));
int length = psx_audio_spu_encode(&state, samples, sample_count, output); int length = psx_audio_spu_encode(&state, samples, sample_count, 1, output);
if (length >= 32) { if (length >= 32) {
if (loop_start < 0) { if (loop_start < 0) {
output[1] = 4; //output[1] = PSX_AUDIO_SPU_LOOP_START;
output[length - 16 + 1] = 1; output[length - 16 + 1] = PSX_AUDIO_SPU_LOOP_END;
} else { } else {
psx_audio_spu_set_flag_at_sample(output, loop_start, 4); psx_audio_spu_set_flag_at_sample(output, loop_start, PSX_AUDIO_SPU_LOOP_START);
output[length - 16 + 1] = 3; output[length - 16 + 1] = PSX_AUDIO_SPU_LOOP_REPEAT;
} }
} else if (length >= 16) { } else if (length >= 16) {
output[1] = loop_start >= 0 ? 7 : 5; output[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END;
if (loop_start >= 0)
output[1] |= PSX_AUDIO_SPU_LOOP_REPEAT;
} }
return length; return length;

View File

@ -67,11 +67,12 @@ uint32_t psx_audio_xa_get_buffer_size_per_sector(psx_audio_xa_settings_t setting
uint32_t psx_audio_spu_get_buffer_size_per_block(void); uint32_t psx_audio_spu_get_buffer_size_per_block(void);
uint32_t psx_audio_xa_get_samples_per_sector(psx_audio_xa_settings_t settings); uint32_t psx_audio_xa_get_samples_per_sector(psx_audio_xa_settings_t settings);
uint32_t psx_audio_spu_get_samples_per_block(void); uint32_t psx_audio_spu_get_samples_per_block(void);
uint32_t psx_audio_xa_get_sector_interleave(psx_audio_xa_settings_t settings);
int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_state_t *state, int16_t* samples, int sample_count, uint8_t *output); int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_state_t *state, int16_t* samples, int sample_count, uint8_t *output);
int psx_audio_xa_encode_simple(psx_audio_xa_settings_t settings, int16_t* samples, int sample_count, uint8_t *output); int psx_audio_xa_encode_simple(psx_audio_xa_settings_t settings, int16_t* samples, int sample_count, uint8_t *output);
int psx_audio_spu_encode(psx_audio_encoder_state_t *state, int16_t* samples, int sample_count, uint8_t *output); int psx_audio_spu_encode(psx_audio_encoder_channel_state_t *state, int16_t* samples, int sample_count, int pitch, uint8_t *output);
int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *output, int loop_start); int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *output, int loop_start);
int psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length); void psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length);
void psx_audio_spu_set_flag_at_sample(uint8_t* spu_data, int sample_pos, int flag); void psx_audio_spu_set_flag_at_sample(uint8_t* spu_data, int sample_pos, int flag);
// cdrom.c // cdrom.c

View File

@ -1,5 +1,7 @@
project('psxavenc', 'c', default_options: ['c_std=c11']) project('psxavenc', 'c', default_options: ['c_std=c11'])
libm_dep = meson.get_compiler('c').find_library('m')
ffmpeg = [ ffmpeg = [
dependency('libavformat'), dependency('libavformat'),
dependency('libavcodec'), dependency('libavcodec'),
@ -21,4 +23,4 @@ executable('psxavenc', [
'psxavenc/filefmt.c', 'psxavenc/filefmt.c',
'psxavenc/mdec.c', 'psxavenc/mdec.c',
'psxavenc/psxavenc.c' 'psxavenc/psxavenc.c'
], dependencies: [ffmpeg, libpsxav_dep], install: true) ], dependencies: [libm_dep, ffmpeg, libpsxav_dep], install: true)

View File

@ -24,15 +24,22 @@ freely, subject to the following restrictions:
#include "common.h" #include "common.h"
void init_sector_buffer_video(uint8_t *buffer, settings_t *settings) { void init_sector_buffer_video(uint8_t *buffer, settings_t *settings) {
memset(buffer,0,2352); int offset;
memset(buffer+0x001,0xFF,10); if (settings->format == FORMAT_STR2CD) {
memset(buffer, 0, 2352);
memset(buffer+0x001, 0xFF, 10);
buffer[0x00F] = 0x02;
offset = 0x10;
} else {
memset(buffer, 0, 2336);
offset = 0;
}
buffer[0x00F] = 0x02; buffer[offset+0] = settings->file_number;
buffer[0x010] = settings->file_number; buffer[offset+1] = settings->channel_number & 0x1F;
buffer[0x011] = settings->channel_number & 0x1F; buffer[offset+2] = 0x08 | 0x40;
buffer[0x012] = 0x08 | 0x40; buffer[offset+3] = 0x00;
buffer[0x013] = 0x00; memcpy(buffer + offset + 4, buffer + offset, 4);
memcpy(buffer + 0x014, buffer + 0x010, 4);
} }
void calculate_edc_data(uint8_t *buffer) void calculate_edc_data(uint8_t *buffer)

View File

@ -28,6 +28,9 @@ freely, subject to the following restrictions:
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <math.h>
#include <time.h>
#include <unistd.h>
#include <libavutil/opt.h> #include <libavutil/opt.h>
#include <libavcodec/avcodec.h> #include <libavcodec/avcodec.h>
@ -36,27 +39,33 @@ freely, subject to the following restrictions:
#include <libswresample/swresample.h> #include <libswresample/swresample.h>
#include <libpsxav.h> #include <libpsxav.h>
#define NUM_FORMATS 9
#define FORMAT_XA 0 #define FORMAT_XA 0
#define FORMAT_XACD 1 #define FORMAT_XACD 1
#define FORMAT_SPU 2 #define FORMAT_SPU 2
#define FORMAT_STR2 3 #define FORMAT_SPUI 3
#define FORMAT_VAG 4
#define FORMAT_VAGI 5
#define FORMAT_STR2 6
#define FORMAT_STR2CD 7
#define FORMAT_SBS2 8
#define MAX_UNMUXED_BLOCKS 9
typedef struct { typedef struct {
int frame_index; int frame_index;
int frame_block_index; int frame_data_offset;
int frame_block_count; int frame_max_size;
int frame_block_base_overflow; int frame_block_base_overflow;
int frame_block_overflow_num; int frame_block_overflow_num;
int frame_block_overflow_den; int frame_block_overflow_den;
uint16_t bits_value; uint16_t bits_value;
int bits_left; int bits_left;
uint8_t unmuxed[2016*MAX_UNMUXED_BLOCKS]; uint8_t *frame_output;
int bytes_used; int bytes_used;
int blocks_used; int blocks_used;
int uncomp_hwords_used; int uncomp_hwords_used;
int quant_scale; int quant_scale;
int32_t *dct_block_lists[6]; int quant_scale_sum;
float *dct_block_lists[6];
} vid_encoder_state_t; } vid_encoder_state_t;
typedef struct { typedef struct {
@ -69,8 +78,6 @@ typedef struct {
AVStream* video_stream; AVStream* video_stream;
AVCodecContext* audio_codec_context; AVCodecContext* audio_codec_context;
AVCodecContext* video_codec_context; AVCodecContext* video_codec_context;
AVCodec* audio_codec;
AVCodec* video_codec;
struct SwrContext* resampler; struct SwrContext* resampler;
struct SwsContext* scaler; struct SwsContext* scaler;
AVFrame* frame; AVFrame* frame;
@ -81,17 +88,28 @@ typedef struct {
} av_decoder_state_t; } av_decoder_state_t;
typedef struct { typedef struct {
bool quiet;
bool show_progress;
int format; // FORMAT_* int format; // FORMAT_*
bool stereo; // false or true int channels;
int cd_speed; // 1 or 2
int frequency; // 18900 or 37800 Hz int frequency; // 18900 or 37800 Hz
int bits_per_sample; // 4 or 8 int bits_per_sample; // 4 or 8
int file_number; // 00-FF int file_number; // 00-FF
int channel_number; // 00-1F int channel_number; // 00-1F
int interleave;
int alignment;
bool loop;
int video_width; int video_width;
int video_height; int video_height;
int video_fps_num; // FPS numerator int video_fps_num; // FPS numerator
int video_fps_den; // FPS denominator int video_fps_den; // FPS denominator
bool ignore_aspect_ratio;
char *swresample_options;
char *swscale_options;
int16_t *audio_samples; int16_t *audio_samples;
int audio_sample_count; int audio_sample_count;
@ -99,8 +117,11 @@ typedef struct {
int video_frame_count; int video_frame_count;
av_decoder_state_t decoder_state_av; av_decoder_state_t decoder_state_av;
vid_encoder_state_t state_vid; vid_encoder_state_t state_vid;
bool end_of_input;
time_t start_time;
time_t last_progress_update;
} settings_t; } settings_t;
// cdrom.c // cdrom.c
@ -108,17 +129,19 @@ void init_sector_buffer_video(uint8_t *buffer, settings_t *settings);
void calculate_edc_data(uint8_t *buffer); void calculate_edc_data(uint8_t *buffer);
// decoding.c // decoding.c
bool open_av_data(const char *filename, settings_t *settings); bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bool use_video, bool audio_required, bool video_required);
bool poll_av_data(settings_t *settings); bool poll_av_data(settings_t *settings);
bool ensure_av_data(settings_t *settings, int needed_audio_samples, int needed_video_frames); bool ensure_av_data(settings_t *settings, int needed_audio_samples, int needed_video_frames);
void pull_all_av_data(settings_t *settings);
void retire_av_data(settings_t *settings, int retired_audio_samples, int retired_video_frames); void retire_av_data(settings_t *settings, int retired_audio_samples, int retired_video_frames);
void close_av_data(settings_t *settings); void close_av_data(settings_t *settings);
// filefmt.c // filefmt.c
void encode_file_spu(int16_t *audio_samples, int audio_sample_count, settings_t *settings, FILE *output); void encode_file_spu(settings_t *settings, FILE *output);
void encode_file_xa(int16_t *audio_samples, int audio_sample_count, settings_t *settings, FILE *output); void encode_file_spu_interleaved(settings_t *settings, FILE *output);
void encode_file_xa(settings_t *settings, FILE *output);
void encode_file_str(settings_t *settings, FILE *output); void encode_file_str(settings_t *settings, FILE *output);
void encode_file_sbs(settings_t *settings, FILE *output);
// mdec.c // mdec.c
void encode_block_str(uint8_t *video_frames, int video_frame_count, uint8_t *output, settings_t *settings); void encode_frame_bs(uint8_t *video_frame, settings_t *settings);
void encode_sector_str(uint8_t *video_frames, uint8_t *output, settings_t *settings);

View File

@ -3,6 +3,7 @@ psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
Copyright (c) 2019, 2020 Adrian "asie" Siekierka Copyright (c) 2019, 2020 Adrian "asie" Siekierka
Copyright (c) 2019 Ben "GreaseMonkey" Russell Copyright (c) 2019 Ben "GreaseMonkey" Russell
Copyright (c) 2023 spicyjpeg
This software is provided 'as-is', without any express or implied This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages warranty. In no event will the authors be held liable for any damages
@ -23,9 +24,7 @@ freely, subject to the following restrictions:
#include "common.h" #include "common.h"
static void poll_av_packet(settings_t *settings, AVPacket *packet); int decode_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, AVPacket *packet) {
int decode_audio_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, AVPacket *packet) {
int ret; int ret;
if (packet != NULL) { if (packet != NULL) {
@ -44,29 +43,8 @@ int decode_audio_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, A
} }
} }
int decode_video_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, AVPacket *packet) { bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bool use_video, bool audio_required, bool video_required)
int ret;
if (packet != NULL) {
ret = avcodec_send_packet(codec, packet);
if (ret != 0) {
return 0;
}
}
ret = avcodec_receive_frame(codec, frame);
if (ret >= 0) {
*frame_size = ret;
return 1;
} else {
return ret == AVERROR(EAGAIN) ? 1 : 0;
}
}
bool open_av_data(const char *filename, settings_t *settings)
{ {
AVPacket packet;
av_decoder_state_t* av = &(settings->decoder_state_av); av_decoder_state_t* av = &(settings->decoder_state_av);
av->video_next_pts = 0.0; av->video_next_pts = 0.0;
av->frame = NULL; av->frame = NULL;
@ -79,11 +57,13 @@ bool open_av_data(const char *filename, settings_t *settings)
av->video_stream = NULL; av->video_stream = NULL;
av->audio_codec_context = NULL; av->audio_codec_context = NULL;
av->video_codec_context = NULL; av->video_codec_context = NULL;
av->audio_codec = NULL;
av->video_codec = NULL;
av->resampler = NULL; av->resampler = NULL;
av->scaler = NULL; av->scaler = NULL;
if (settings->quiet) {
av_log_set_level(AV_LOG_QUIET);
}
av->format = avformat_alloc_context(); av->format = avformat_alloc_context();
if (avformat_open_input(&(av->format), filename, NULL, NULL)) { if (avformat_open_input(&(av->format), filename, NULL, NULL)) {
return false; return false;
@ -92,89 +72,157 @@ bool open_av_data(const char *filename, settings_t *settings)
return false; return false;
} }
for (int i = 0; i < av->format->nb_streams; i++) { if (use_audio) {
if (av->format->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { for (int i = 0; i < av->format->nb_streams; i++) {
if (av->audio_stream_index >= 0) { if (av->format->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
fprintf(stderr, "open_av_data: found multiple audio tracks?\n"); if (av->audio_stream_index >= 0) {
return false; fprintf(stderr, "Input file must have a single audio track\n");
return false;
}
av->audio_stream_index = i;
} }
av->audio_stream_index = i;
} }
} if (audio_required && av->audio_stream_index == -1) {
if (av->audio_stream_index == -1) { fprintf(stderr, "Input file has no audio data\n");
return false; return false;
}
for (int i = 0; i < av->format->nb_streams; i++) {
if (av->format->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
if (av->video_stream_index >= 0) {
fprintf(stderr, "open_av_data: found multiple video tracks?\n");
return false;
}
av->video_stream_index = i;
} }
} }
av->audio_stream = av->format->streams[av->audio_stream_index]; if (use_video) {
for (int i = 0; i < av->format->nb_streams; i++) {
if (av->format->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
if (av->video_stream_index >= 0) {
fprintf(stderr, "Input file must have a single video track\n");
return false;
}
av->video_stream_index = i;
}
}
if (video_required && av->video_stream_index == -1) {
fprintf(stderr, "Input file has no video data\n");
return false;
}
}
av->audio_stream = (av->audio_stream_index != -1 ? av->format->streams[av->audio_stream_index] : NULL);
av->video_stream = (av->video_stream_index != -1 ? av->format->streams[av->video_stream_index] : NULL); av->video_stream = (av->video_stream_index != -1 ? av->format->streams[av->video_stream_index] : NULL);
av->audio_codec = avcodec_find_decoder(av->audio_stream->codecpar->codec_id);
av->audio_codec_context = avcodec_alloc_context3(av->audio_codec);
if (av->audio_codec_context == NULL) {
return false;
}
if (avcodec_parameters_to_context(av->audio_codec_context, av->audio_stream->codecpar) < 0) {
return false;
}
if (avcodec_open2(av->audio_codec_context, av->audio_codec, NULL) < 0) {
return false;
}
av->resampler = swr_alloc(); if (av->audio_stream != NULL) {
av_opt_set_int(av->resampler, "in_channel_count", av->audio_codec_context->channels, 0); const AVCodec *codec = avcodec_find_decoder(av->audio_stream->codecpar->codec_id);
av_opt_set_int(av->resampler, "in_channel_layout", av->audio_codec_context->channel_layout, 0); av->audio_codec_context = avcodec_alloc_context3(codec);
av_opt_set_int(av->resampler, "in_sample_rate", av->audio_codec_context->sample_rate, 0); if (av->audio_codec_context == NULL) {
av_opt_set_sample_fmt(av->resampler, "in_sample_fmt", av->audio_codec_context->sample_fmt, 0); return false;
}
if (avcodec_parameters_to_context(av->audio_codec_context, av->audio_stream->codecpar) < 0) {
return false;
}
if (avcodec_open2(av->audio_codec_context, codec, NULL) < 0) {
return false;
}
av->sample_count_mul = settings->stereo ? 2 : 1; AVChannelLayout layout;
av_opt_set_int(av->resampler, "out_channel_count", settings->stereo ? 2 : 1, 0); layout.nb_channels = settings->channels;
av_opt_set_int(av->resampler, "out_channel_layout", settings->stereo ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO, 0); if (settings->channels <= 2) {
av_opt_set_int(av->resampler, "out_sample_rate", settings->frequency, 0); layout.order = AV_CHANNEL_ORDER_NATIVE;
av_opt_set_sample_fmt(av->resampler, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0); layout.u.mask = (settings->channels == 2) ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO;
} else {
layout.order = AV_CHANNEL_ORDER_UNSPEC;
}
if (!settings->quiet && settings->channels > av->audio_codec_context->ch_layout.nb_channels) {
fprintf(stderr, "Warning: input file has less than %d channels\n", settings->channels);
}
if (swr_init(av->resampler) < 0) { av->sample_count_mul = settings->channels;
return false; if (swr_alloc_set_opts2(
&av->resampler,
&layout,
AV_SAMPLE_FMT_S16,
settings->frequency,
&av->audio_codec_context->ch_layout,
av->audio_codec_context->sample_fmt,
av->audio_codec_context->sample_rate,
0,
NULL
) < 0) {
return false;
}
if (settings->swresample_options) {
if (av_opt_set_from_string(av->resampler, settings->swresample_options, NULL, "=", ":,") < 0) {
return false;
}
}
if (swr_init(av->resampler) < 0) {
return false;
}
} }
if (av->video_stream != NULL) { if (av->video_stream != NULL) {
av->video_codec = avcodec_find_decoder(av->video_stream->codecpar->codec_id); const AVCodec *codec = avcodec_find_decoder(av->video_stream->codecpar->codec_id);
av->video_codec_context = avcodec_alloc_context3(av->video_codec); av->video_codec_context = avcodec_alloc_context3(codec);
if(av->video_codec_context == NULL) { if(av->video_codec_context == NULL) {
return false; return false;
} }
if (avcodec_parameters_to_context(av->video_codec_context, av->video_stream->codecpar) < 0) { if (avcodec_parameters_to_context(av->video_codec_context, av->video_stream->codecpar) < 0) {
return false; return false;
} }
if (avcodec_open2(av->video_codec_context, av->video_codec, NULL) < 0) { if (avcodec_open2(av->video_codec_context, codec, NULL) < 0) {
return false; return false;
} }
if (!settings->quiet && (
settings->video_width > av->video_codec_context->width ||
settings->video_height > av->video_codec_context->height
)) {
fprintf(stderr, "Warning: input file has resolution lower than %dx%d\n",
settings->video_width, settings->video_height
);
}
if (!settings->ignore_aspect_ratio) {
// Reduce the provided size so that it matches the input file's
// aspect ratio.
double src_ratio = (double)av->video_codec_context->width / (double)av->video_codec_context->height;
double dst_ratio = (double)settings->video_width / (double)settings->video_height;
if (src_ratio < dst_ratio) {
settings->video_width = (int)((double)settings->video_height * src_ratio + 15.0) & ~15;
} else {
settings->video_height = (int)((double)settings->video_width / src_ratio + 15.0) & ~15;
}
}
av->scaler = sws_getContext( av->scaler = sws_getContext(
av->video_codec_context->width, av->video_codec_context->width,
av->video_codec_context->height, av->video_codec_context->height,
av->video_codec_context->pix_fmt, av->video_codec_context->pix_fmt,
settings->video_width, settings->video_width,
settings->video_height, settings->video_height,
AV_PIX_FMT_RGBA, AV_PIX_FMT_NV21,
SWS_BICUBIC, SWS_BICUBIC,
NULL, NULL,
NULL, NULL,
NULL); NULL
);
// Is this even necessary? -- spicyjpeg
sws_setColorspaceDetails(
av->scaler,
sws_getCoefficients(av->video_codec_context->colorspace),
(av->video_codec_context->color_range == AVCOL_RANGE_JPEG),
sws_getCoefficients(SWS_CS_ITU601),
true,
0,
0,
0
);
if (settings->swscale_options) {
if (av_opt_set_from_string(av->scaler, settings->swscale_options, NULL, "=", ":,") < 0) {
return false;
}
}
av->video_frame_src_size = 4*av->video_codec_context->width*av->video_codec_context->height; av->video_frame_src_size = 4*av->video_codec_context->width*av->video_codec_context->height;
av->video_frame_dst_size = 4*settings->video_width*settings->video_height; av->video_frame_dst_size = 3*settings->video_width*settings->video_height/2;
} }
av_init_packet(&packet);
av->frame = av_frame_alloc(); av->frame = av_frame_alloc();
if (av->frame == NULL) { if (av->frame == NULL) {
return false; return false;
@ -184,6 +232,7 @@ bool open_av_data(const char *filename, settings_t *settings)
settings->audio_sample_count = 0; settings->audio_sample_count = 0;
settings->video_frames = NULL; settings->video_frames = NULL;
settings->video_frame_count = 0; settings->video_frame_count = 0;
settings->end_of_input = false;
return true; return true;
} }
@ -195,7 +244,7 @@ static void poll_av_packet_audio(settings_t *settings, AVPacket *packet)
int frame_size, frame_sample_count; int frame_size, frame_sample_count;
uint8_t *buffer[1]; uint8_t *buffer[1];
if (decode_audio_frame(av->audio_codec_context, av->frame, &frame_size, packet)) { if (decode_frame(av->audio_codec_context, av->frame, &frame_size, packet)) {
size_t buffer_size = sizeof(int16_t) * av->sample_count_mul * swr_get_out_samples(av->resampler, av->frame->nb_samples); size_t buffer_size = sizeof(int16_t) * av->sample_count_mul * swr_get_out_samples(av->resampler, av->frame->nb_samples);
buffer[0] = malloc(buffer_size); buffer[0] = malloc(buffer_size);
memset(buffer[0], 0, buffer_size); memset(buffer[0], 0, buffer_size);
@ -212,54 +261,61 @@ static void poll_av_packet_video(settings_t *settings, AVPacket *packet)
av_decoder_state_t* av = &(settings->decoder_state_av); av_decoder_state_t* av = &(settings->decoder_state_av);
int frame_size; int frame_size;
double pts_step = ((double)1.0*(double)settings->video_fps_den)/(double)settings->video_fps_num;
if (decode_video_frame(av->video_codec_context, av->frame, &frame_size, packet)) { int plane_size = settings->video_width*settings->video_height;
int dst_strides[2] = {
settings->video_width, settings->video_width
};
if (decode_frame(av->video_codec_context, av->frame, &frame_size, packet)) {
if (!av->frame->width || !av->frame->height || !av->frame->data[0]) {
return;
}
// Some files seem to have timestamps starting from a negative value
// (but otherwise valid) for whatever reason.
double pts = (((double)av->frame->pts)*(double)av->video_stream->time_base.num)/av->video_stream->time_base.den; double pts = (((double)av->frame->pts)*(double)av->video_stream->time_base.num)/av->video_stream->time_base.den;
//fprintf(stderr, "%f\n", pts); //if (pts < 0.0) {
// Drop frames with negative PTS values //return;
if(pts < 0.0) { //}
// do nothing if (settings->video_frame_count >= 1 && pts < av->video_next_pts) {
return; return;
} }
if((settings->video_frame_count) >= 1 && pts < av->video_next_pts) { if ((settings->video_frame_count) < 1) {
// do nothing
return;
}
if((settings->video_frame_count) < 1) {
av->video_next_pts = pts; av->video_next_pts = pts;
} else {
av->video_next_pts += pts_step;
} }
double pts_step = ((double)1.0*(double)settings->video_fps_den)/(double)settings->video_fps_num;
//fprintf(stderr, "%d %f %f %f\n", (settings->video_frame_count), pts, av->video_next_pts, pts_step); //fprintf(stderr, "%d %f %f %f\n", (settings->video_frame_count), pts, av->video_next_pts, pts_step);
av->video_next_pts += pts_step;
// FIXME: increasing framerate doesn't fill it in with duplicate frames! // Insert duplicate frames if the frame rate of the input stream is
assert(av->video_next_pts > pts); // lower than the target frame rate.
//size_t buffer_size = frame_count_mul; int dupe_frames = (int) ceil((pts - av->video_next_pts) / pts_step);
//buffer[0] = malloc(buffer_size); if (dupe_frames < 0) dupe_frames = 0;
//memset(buffer[0], 0, buffer_size); settings->video_frames = realloc(
settings->video_frames = realloc(settings->video_frames, (settings->video_frame_count + 1) * av->video_frame_dst_size); settings->video_frames,
int dst_strides[1] = { (settings->video_frame_count + dupe_frames + 1) * av->video_frame_dst_size
settings->video_width*4, );
for (; dupe_frames; dupe_frames--) {
memcpy(
(settings->video_frames) + av->video_frame_dst_size*(settings->video_frame_count),
(settings->video_frames) + av->video_frame_dst_size*(settings->video_frame_count-1),
av->video_frame_dst_size
);
settings->video_frame_count += 1;
av->video_next_pts += pts_step;
}
uint8_t *dst_frame = (settings->video_frames) + av->video_frame_dst_size*(settings->video_frame_count);
uint8_t *dst_pointers[2] = {
dst_frame, dst_frame + plane_size
}; };
uint8_t *dst_pointers[1] = { sws_scale(av->scaler, (const uint8_t *const *) av->frame->data, av->frame->linesize, 0, av->frame->height, dst_pointers, dst_strides);
(settings->video_frames) + av->video_frame_dst_size*(settings->video_frame_count),
};
sws_scale(av->scaler, av->frame->data, av->frame->linesize, 0, av->frame->height, dst_pointers, dst_strides);
settings->video_frame_count += 1; settings->video_frame_count += 1;
//free(buffer[0]);
}
}
static void poll_av_packet(settings_t *settings, AVPacket *packet)
{
av_decoder_state_t* av = &(settings->decoder_state_av);
if (packet->stream_index == av->audio_stream_index) {
poll_av_packet_audio(settings, packet);
}
else if (packet->stream_index == av->video_stream_index) {
poll_av_packet_video(settings, packet);
} }
} }
@ -268,29 +324,38 @@ bool poll_av_data(settings_t *settings)
av_decoder_state_t* av = &(settings->decoder_state_av); av_decoder_state_t* av = &(settings->decoder_state_av);
AVPacket packet; AVPacket packet;
if (settings->end_of_input) {
return false;
}
if (av_read_frame(av->format, &packet) >= 0) { if (av_read_frame(av->format, &packet) >= 0) {
poll_av_packet(settings, &packet); if (packet.stream_index == av->audio_stream_index) {
poll_av_packet_audio(settings, &packet);
} else if (packet.stream_index == av->video_stream_index) {
poll_av_packet_video(settings, &packet);
}
av_packet_unref(&packet); av_packet_unref(&packet);
return true; return true;
} else { } else {
// out is always padded out with 4032 "0" samples, this makes calculations elsewhere easier // out is always padded out with 4032 "0" samples, this makes calculations elsewhere easier
memset((settings->audio_samples) + (settings->audio_sample_count), 0, 4032 * av->sample_count_mul * sizeof(int16_t)); if (av->audio_stream) {
memset((settings->audio_samples) + (settings->audio_sample_count), 0, 4032 * av->sample_count_mul * sizeof(int16_t));
}
settings->end_of_input = true;
return false; return false;
} }
} }
bool ensure_av_data(settings_t *settings, int needed_audio_samples, int needed_video_frames) bool ensure_av_data(settings_t *settings, int needed_audio_samples, int needed_video_frames)
{ {
//
av_decoder_state_t* av = &(settings->decoder_state_av);
while (settings->audio_sample_count < needed_audio_samples || settings->video_frame_count < needed_video_frames) { while (settings->audio_sample_count < needed_audio_samples || settings->video_frame_count < needed_video_frames) {
//fprintf(stderr, "ensure %d -> %d, %d -> %d\n", settings->audio_sample_count, needed_audio_samples, settings->video_frame_count, needed_video_frames); //fprintf(stderr, "ensure %d -> %d, %d -> %d\n", settings->audio_sample_count, needed_audio_samples, settings->video_frame_count, needed_video_frames);
if(!poll_av_data(settings)) { if (!poll_av_data(settings)) {
//fprintf(stderr, "cannot ensure\n"); // Keep returning true even if the end of the input file has been
return false; // reached, if the buffer is not yet completely empty.
return (settings->audio_sample_count || !needed_audio_samples)
&& (settings->video_frame_count || !needed_video_frames);
} }
} }
//fprintf(stderr, "ensure %d -> %d, %d -> %d\n", settings->audio_sample_count, needed_audio_samples, settings->video_frame_count, needed_video_frames); //fprintf(stderr, "ensure %d -> %d, %d -> %d\n", settings->audio_sample_count, needed_audio_samples, settings->video_frame_count, needed_video_frames);
@ -298,16 +363,6 @@ bool ensure_av_data(settings_t *settings, int needed_audio_samples, int needed_v
return true; return true;
} }
void pull_all_av_data(settings_t *settings)
{
while (poll_av_data(settings)) {
// do nothing
}
fprintf(stderr, "Loaded %d samples.\n", settings->audio_sample_count);
fprintf(stderr, "Loaded %d frames.\n", settings->video_frame_count);
}
void retire_av_data(settings_t *settings, int retired_audio_samples, int retired_video_frames) void retire_av_data(settings_t *settings, int retired_audio_samples, int retired_video_frames)
{ {
av_decoder_state_t* av = &(settings->decoder_state_av); av_decoder_state_t* av = &(settings->decoder_state_av);
@ -319,14 +374,14 @@ void retire_av_data(settings_t *settings, int retired_audio_samples, int retired
int sample_size = sizeof(int16_t); int sample_size = sizeof(int16_t);
if (settings->audio_sample_count > retired_audio_samples) { if (settings->audio_sample_count > retired_audio_samples) {
memmove(settings->audio_samples, settings->audio_samples + retired_audio_samples, (settings->audio_sample_count - retired_audio_samples)*sample_size); memmove(settings->audio_samples, settings->audio_samples + retired_audio_samples, (settings->audio_sample_count - retired_audio_samples)*sample_size);
settings->audio_sample_count -= retired_audio_samples;
} }
settings->audio_sample_count -= retired_audio_samples;
int frame_size = av->video_frame_dst_size; int frame_size = av->video_frame_dst_size;
if (settings->video_frame_count > retired_video_frames) { if (settings->video_frame_count > retired_video_frames) {
memmove(settings->video_frames, settings->video_frames + retired_video_frames*frame_size, (settings->video_frame_count - retired_video_frames)*frame_size); memmove(settings->video_frames, settings->video_frames + retired_video_frames*frame_size, (settings->video_frame_count - retired_video_frames)*frame_size);
settings->video_frame_count -= retired_video_frames;
} }
settings->video_frame_count -= retired_video_frames;
} }
void close_av_data(settings_t *settings) void close_av_data(settings_t *settings)

View File

@ -3,6 +3,7 @@ psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
Copyright (c) 2019, 2020 Adrian "asie" Siekierka Copyright (c) 2019, 2020 Adrian "asie" Siekierka
Copyright (c) 2019 Ben "GreaseMonkey" Russell Copyright (c) 2019 Ben "GreaseMonkey" Russell
Copyright (c) 2023 spicyjpeg
This software is provided 'as-is', without any express or implied This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages warranty. In no event will the authors be held liable for any damages
@ -24,16 +25,29 @@ freely, subject to the following restrictions:
#include "common.h" #include "common.h"
#include "libpsxav.h" #include "libpsxav.h"
static time_t get_elapsed_time(settings_t *settings) {
if (!settings->show_progress) {
return 0;
}
time_t t = time(NULL) - settings->start_time;
if (t <= settings->last_progress_update) {
return 0;
}
settings->last_progress_update = t;
return t;
}
static psx_audio_xa_settings_t settings_to_libpsxav_xa_audio(settings_t *settings) { static psx_audio_xa_settings_t settings_to_libpsxav_xa_audio(settings_t *settings) {
psx_audio_xa_settings_t new_settings; psx_audio_xa_settings_t new_settings;
new_settings.bits_per_sample = settings->bits_per_sample; new_settings.bits_per_sample = settings->bits_per_sample;
new_settings.frequency = settings->frequency; new_settings.frequency = settings->frequency;
new_settings.stereo = settings->stereo; new_settings.stereo = settings->channels == 2;
new_settings.file_number = settings->file_number; new_settings.file_number = settings->file_number;
new_settings.channel_number = settings->channel_number; new_settings.channel_number = settings->channel_number;
switch (settings->format) { switch (settings->format) {
case FORMAT_XA: case FORMAT_XA:
case FORMAT_STR2:
new_settings.format = PSX_AUDIO_XA_FORMAT_XA; new_settings.format = PSX_AUDIO_XA_FORMAT_XA;
break; break;
default: default:
@ -44,93 +58,324 @@ static psx_audio_xa_settings_t settings_to_libpsxav_xa_audio(settings_t *setting
return new_settings; return new_settings;
}; };
void encode_file_spu(int16_t *audio_samples, int audio_sample_count, settings_t *settings, FILE *output) { void write_vag_header(int size_per_channel, uint8_t *header, settings_t *settings) {
psx_audio_encoder_state_t audio_state; // Magic
header[0x00] = 'V';
header[0x01] = 'A';
header[0x02] = 'G';
header[0x03] = settings->interleave ? 'i' : 'p';
// Version (big-endian)
header[0x04] = 0x00;
header[0x05] = 0x00;
header[0x06] = 0x00;
header[0x07] = 0x20;
// Interleave (little-endian)
header[0x08] = (uint8_t)settings->interleave;
header[0x09] = (uint8_t)(settings->interleave>>8);
header[0x0a] = (uint8_t)(settings->interleave>>16);
header[0x0b] = (uint8_t)(settings->interleave>>24);
// Length of data for each channel (big-endian)
header[0x0c] = (uint8_t)(size_per_channel>>24);
header[0x0d] = (uint8_t)(size_per_channel>>16);
header[0x0e] = (uint8_t)(size_per_channel>>8);
header[0x0f] = (uint8_t)size_per_channel;
// Sample rate (big-endian)
header[0x10] = (uint8_t)(settings->frequency>>24);
header[0x11] = (uint8_t)(settings->frequency>>16);
header[0x12] = (uint8_t)(settings->frequency>>8);
header[0x13] = (uint8_t)settings->frequency;
// Number of channels (little-endian)
header[0x1e] = (uint8_t)settings->channels;
header[0x1f] = 0x00;
// Filename
//strncpy(header + 0x20, "psxavenc", 16);
memset(header + 0x20, 0, 16);
}
void encode_file_spu(settings_t *settings, FILE *output) {
psx_audio_encoder_channel_state_t audio_state;
int audio_samples_per_block = psx_audio_spu_get_samples_per_block(); int audio_samples_per_block = psx_audio_spu_get_samples_per_block();
int block_size = psx_audio_spu_get_buffer_size_per_block();
uint8_t buffer[16]; uint8_t buffer[16];
int block_count;
memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t)); memset(&audio_state, 0, sizeof(psx_audio_encoder_channel_state_t));
for (int i = 0; i < audio_sample_count; i += audio_samples_per_block) { // The header must be written after the data as we don't yet know the
int samples_length = audio_sample_count - i; // number of audio samples.
if (settings->format == FORMAT_VAG) {
fseek(output, 48, SEEK_SET);
}
for (block_count = 0; ensure_av_data(settings, audio_samples_per_block, 0); block_count++) {
int samples_length = settings->audio_sample_count;
if (samples_length > audio_samples_per_block) samples_length = audio_samples_per_block; if (samples_length > audio_samples_per_block) samples_length = audio_samples_per_block;
int length = psx_audio_spu_encode(&audio_state, audio_samples + i, samples_length, buffer);
if (i == 0) { int length = psx_audio_spu_encode(&audio_state, settings->audio_samples, samples_length, 1, buffer);
buffer[1] = PSX_AUDIO_SPU_LOOP_START; if (!block_count) {
} else if ((i + audio_samples_per_block) >= audio_sample_count) { // This flag is not required as the SPU already resets the loop
buffer[1] = PSX_AUDIO_SPU_LOOP_END; // address when starting playback of a sample.
//buffer[1] |= PSX_AUDIO_SPU_LOOP_START;
} }
if (settings->end_of_input) {
buffer[1] |= settings->loop ? PSX_AUDIO_SPU_LOOP_REPEAT : PSX_AUDIO_SPU_LOOP_END;
}
retire_av_data(settings, samples_length, 0);
fwrite(buffer, length, 1, output); fwrite(buffer, length, 1, output);
time_t t = get_elapsed_time(settings);
if (t) {
fprintf(stderr, "\rBlock: %6d | Encoding speed: %5.2fx",
block_count,
(double)(block_count*audio_samples_per_block) / (double)(settings->frequency*t)
);
}
}
if (settings->format == FORMAT_VAG) {
uint8_t header[48];
memset(header, 0, 48);
write_vag_header(block_count*block_size, header, settings);
fseek(output, 0, SEEK_SET);
fwrite(header, 48, 1, output);
} }
} }
void encode_file_xa(int16_t *audio_samples, int audio_sample_count, settings_t *settings, FILE *output) { void encode_file_spu_interleaved(settings_t *settings, FILE *output) {
int audio_state_size = sizeof(psx_audio_encoder_channel_state_t) * settings->channels;
// NOTE: since the interleaved .vag format is not standardized, some tools
// (such as vgmstream) will not properly play files with interleave < 2048,
// alignment != 2048 or channels != 2.
int buffer_size = settings->interleave + settings->alignment - 1;
buffer_size -= buffer_size % settings->alignment;
int header_size = 48 + settings->alignment - 1;
header_size -= header_size % settings->alignment;
psx_audio_encoder_channel_state_t *audio_state = malloc(audio_state_size);
uint8_t *buffer = malloc(buffer_size);
int audio_samples_per_block = psx_audio_spu_get_samples_per_block();
int block_size = psx_audio_spu_get_buffer_size_per_block();
int audio_samples_per_chunk = settings->interleave / block_size * audio_samples_per_block;
int chunk_count;
memset(audio_state, 0, audio_state_size);
if (settings->format == FORMAT_VAGI) {
fseek(output, header_size, SEEK_SET);
}
for (chunk_count = 0; ensure_av_data(settings, audio_samples_per_chunk*settings->channels, 0); chunk_count++) {
int samples_length = settings->audio_sample_count / settings->channels;
if (samples_length > audio_samples_per_chunk) samples_length = audio_samples_per_chunk;
for (int ch = 0; ch < settings->channels; ch++) {
memset(buffer, 0, buffer_size);
int length = psx_audio_spu_encode(audio_state + ch, settings->audio_samples + ch, samples_length, settings->channels, buffer);
if (length) {
//buffer[1] |= PSX_AUDIO_SPU_LOOP_START;
if (settings->loop) {
buffer[length - block_size + 1] |= PSX_AUDIO_SPU_LOOP_REPEAT;
}
if (settings->end_of_input) {
buffer[length - block_size + 1] |= PSX_AUDIO_SPU_LOOP_END;
}
}
fwrite(buffer, buffer_size, 1, output);
time_t t = get_elapsed_time(settings);
if (t) {
fprintf(stderr, "\rChunk: %6d | Encoding speed: %5.2fx",
chunk_count,
(double)(chunk_count*audio_samples_per_chunk) / (double)(settings->frequency*t)
);
}
}
retire_av_data(settings, samples_length*settings->channels, 0);
}
if (settings->format == FORMAT_VAGI) {
uint8_t *header = malloc(header_size);
memset(header, 0, header_size);
write_vag_header(chunk_count*settings->interleave, header, settings);
fseek(output, 0, SEEK_SET);
fwrite(header, header_size, 1, output);
free(header);
}
free(audio_state);
free(buffer);
}
void encode_file_xa(settings_t *settings, FILE *output) {
psx_audio_xa_settings_t xa_settings = settings_to_libpsxav_xa_audio(settings); psx_audio_xa_settings_t xa_settings = settings_to_libpsxav_xa_audio(settings);
psx_audio_encoder_state_t audio_state; psx_audio_encoder_state_t audio_state;
int audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings); int audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings);
int av_sample_mul = settings->stereo ? 2 : 1;
uint8_t buffer[2352]; uint8_t buffer[2352];
memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t)); memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t));
for (int i = 0; i < audio_sample_count; i += audio_samples_per_sector) { for (int j = 0; ensure_av_data(settings, audio_samples_per_sector*settings->channels, 0); j++) {
int samples_length = audio_sample_count - i; int samples_length = settings->audio_sample_count / settings->channels;
if (samples_length > audio_samples_per_sector) samples_length = audio_samples_per_sector; if (samples_length > audio_samples_per_sector) samples_length = audio_samples_per_sector;
int length = psx_audio_xa_encode(xa_settings, &audio_state, audio_samples + (i * av_sample_mul), samples_length, buffer); int length = psx_audio_xa_encode(xa_settings, &audio_state, settings->audio_samples, samples_length, buffer);
if ((i + audio_samples_per_sector) >= audio_sample_count) { if (settings->end_of_input) {
psx_audio_xa_encode_finalize(xa_settings, buffer, length); psx_audio_xa_encode_finalize(xa_settings, buffer, length);
} }
if (settings->format == FORMAT_XACD) {
int t = j + 75*2;
// Put the time in
buffer[0x00C] = ((t/75/60)%10)|(((t/75/60)/10)<<4);
buffer[0x00D] = (((t/75)%60)%10)|((((t/75)%60)/10)<<4);
buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4);
}
retire_av_data(settings, samples_length*settings->channels, 0);
fwrite(buffer, length, 1, output); fwrite(buffer, length, 1, output);
time_t t = get_elapsed_time(settings);
if (t) {
fprintf(stderr, "\rLBA: %6d | Encoding speed: %5.2fx",
j,
(double)(j*audio_samples_per_sector) / (double)(settings->frequency*t)
);
}
} }
} }
void encode_file_str(settings_t *settings, FILE *output) { void encode_file_str(settings_t *settings, FILE *output) {
uint8_t buffer[2352*8];
psx_audio_xa_settings_t xa_settings = settings_to_libpsxav_xa_audio(settings); psx_audio_xa_settings_t xa_settings = settings_to_libpsxav_xa_audio(settings);
psx_audio_encoder_state_t audio_state; psx_audio_encoder_state_t audio_state;
int audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings); int audio_samples_per_sector;
int av_sample_mul = settings->stereo ? 2 : 1; uint8_t buffer[2352];
int interleave;
int video_sectors_per_block;
if (settings->decoder_state_av.audio_stream) {
// 1/N audio, (N-1)/N video
audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings);
interleave = psx_audio_xa_get_sector_interleave(xa_settings) * settings->cd_speed;
video_sectors_per_block = interleave - 1;
} else {
// 0/1 audio, 1/1 video
audio_samples_per_sector = 0;
interleave = 1;
video_sectors_per_block = 1;
}
if (!settings->quiet) {
fprintf(stderr, "Interleave: %d/%d audio, %d/%d video\n",
interleave - video_sectors_per_block, interleave, video_sectors_per_block, interleave);
}
memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t)); memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t));
// e.g. 15fps = (150*7/8/15) = 8.75 blocks per frame
settings->state_vid.frame_block_base_overflow = (75*settings->cd_speed) * video_sectors_per_block * settings->video_fps_den;
settings->state_vid.frame_block_overflow_den = interleave * settings->video_fps_num;
double frame_size = (double)settings->state_vid.frame_block_base_overflow / (double)settings->state_vid.frame_block_overflow_den;
if (!settings->quiet) {
fprintf(stderr, "Frame size: %.2f sectors\n", frame_size);
}
settings->state_vid.frame_output = malloc(2016 * (int)ceil(frame_size));
settings->state_vid.frame_index = 0; settings->state_vid.frame_index = 0;
settings->state_vid.bits_value = 0; settings->state_vid.frame_data_offset = 0;
settings->state_vid.bits_left = 16; settings->state_vid.frame_max_size = 0;
settings->state_vid.frame_block_index = 0;
settings->state_vid.frame_block_count = 0;
settings->state_vid.frame_block_overflow_num = 0; settings->state_vid.frame_block_overflow_num = 0;
settings->state_vid.quant_scale_sum = 0;
// Number of total sectors per second: 150
// Proportion of sectors for video due to A/V interleave: 7/8
// 15FPS = (150*7/8/15) = 8.75 blocks per frame
settings->state_vid.frame_block_base_overflow = 150*7*settings->video_fps_den;
settings->state_vid.frame_block_overflow_den = 8*settings->video_fps_num;
//fprintf(stderr, "%f\n", ((double)settings->state_vid.frame_block_base_overflow)/((double)settings->state_vid.frame_block_overflow_den)); abort();
// FIXME: this needs an extra frame to prevent A/V desync // FIXME: this needs an extra frame to prevent A/V desync
const int frames_needed = 2; int frames_needed = (int) ceil((double)video_sectors_per_block / frame_size);
for (int j = 0; ensure_av_data(settings, audio_samples_per_sector*av_sample_mul*frames_needed, 1*frames_needed); j+=18) { if (frames_needed < 2) frames_needed = 2;
psx_audio_xa_encode(xa_settings, &audio_state, settings->audio_samples, audio_samples_per_sector, buffer + 2352 * 7);
// TODO: the final buffer for (int j = 0; !settings->end_of_input || settings->state_vid.frame_data_offset < settings->state_vid.frame_max_size; j++) {
for(int k = 0; k < 7; k++) { ensure_av_data(settings, audio_samples_per_sector*settings->channels, frames_needed);
init_sector_buffer_video(buffer + 2352*k, settings);
if ((j%interleave) < video_sectors_per_block) {
// Video sector
init_sector_buffer_video(buffer, settings);
encode_sector_str(settings->video_frames, buffer, settings);
} else {
// Audio sector
int samples_length = settings->audio_sample_count / settings->channels;
if (samples_length > audio_samples_per_sector) samples_length = audio_samples_per_sector;
// FIXME: this is an extremely hacky way to handle audio tracks
// shorter than the video track
if (!samples_length) {
video_sectors_per_block++;
}
int length = psx_audio_xa_encode(xa_settings, &audio_state, settings->audio_samples, samples_length, buffer);
if (settings->end_of_input) {
psx_audio_xa_encode_finalize(xa_settings, buffer, length);
}
retire_av_data(settings, samples_length*settings->channels, 0);
} }
encode_block_str(settings->video_frames, settings->video_frame_count, buffer, settings);
for(int k = 0; k < 8; k++) { if (settings->format == FORMAT_STR2CD) {
int t = k + (j/18)*8 + 75*2; int t = j + 75*2;
// Put the time in // Put the time in
buffer[0x00C + 2352*k] = ((t/75/60)%10)|(((t/75/60)/10)<<4); buffer[0x00C] = ((t/75/60)%10)|(((t/75/60)/10)<<4);
buffer[0x00D + 2352*k] = (((t/75)%60)%10)|((((t/75)%60)/10)<<4); buffer[0x00D] = (((t/75)%60)%10)|((((t/75)%60)/10)<<4);
buffer[0x00E + 2352*k] = ((t%75)%10)|(((t%75)/10)<<4); buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4);
if(k != 7) { // FIXME: EDC is not calculated in 2336-byte sector mode (shouldn't
calculate_edc_data(buffer + 2352*k); // matter anyway, any CD image builder will have to recalculate it
// due to the sector's MSF changing)
if((j%interleave) < video_sectors_per_block) {
calculate_edc_data(buffer);
} }
} }
retire_av_data(settings, audio_samples_per_sector*av_sample_mul, 0);
fwrite(buffer, 2352*8, 1, output); fwrite(buffer, 2352, 1, output);
time_t t = get_elapsed_time(settings);
if (t) {
fprintf(stderr, "\rFrame: %4d | LBA: %6d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx",
settings->state_vid.frame_index,
j,
(double)settings->state_vid.quant_scale_sum / (double)settings->state_vid.frame_index,
(double)(settings->state_vid.frame_index*settings->video_fps_den) / (double)(t*settings->video_fps_num)
);
}
} }
free(settings->state_vid.frame_output);
}
void encode_file_sbs(settings_t *settings, FILE *output) {
settings->state_vid.frame_output = malloc(settings->alignment);
settings->state_vid.frame_data_offset = 0;
settings->state_vid.frame_max_size = settings->alignment;
settings->state_vid.quant_scale_sum = 0;
for (int j = 0; ensure_av_data(settings, 0, 1); j++) {
encode_frame_bs(settings->video_frames, settings);
fwrite(settings->state_vid.frame_output, settings->alignment, 1, output);
time_t t = get_elapsed_time(settings);
if (t) {
fprintf(stderr, "\rFrame: %4d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx",
j,
(double)settings->state_vid.quant_scale_sum / (double)j,
(double)(j*settings->video_fps_den) / (double)(t*settings->video_fps_num)
);
}
}
free(settings->state_vid.frame_output);
} }

View File

@ -3,6 +3,7 @@ psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
Copyright (c) 2019, 2020 Adrian "asie" Siekierka Copyright (c) 2019, 2020 Adrian "asie" Siekierka
Copyright (c) 2019 Ben "GreaseMonkey" Russell Copyright (c) 2019 Ben "GreaseMonkey" Russell
Copyright (c) 2023 spicyjpeg
This software is provided 'as-is', without any express or implied This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages warranty. In no event will the authors be held liable for any damages
@ -36,117 +37,117 @@ const struct {
uint16_t u_hword_neg; uint16_t u_hword_neg;
} huffman_lookup[] = { } huffman_lookup[] = {
// Fuck this Huffman tree in particular --GM // Fuck this Huffman tree in particular --GM
2,0x3,MAKE_HUFFMAN_PAIR(0,1), {2,0x3,MAKE_HUFFMAN_PAIR(0,1)},
3,0x3,MAKE_HUFFMAN_PAIR(1,1), {3,0x3,MAKE_HUFFMAN_PAIR(1,1)},
4,0x4,MAKE_HUFFMAN_PAIR(0,2), {4,0x4,MAKE_HUFFMAN_PAIR(0,2)},
4,0x5,MAKE_HUFFMAN_PAIR(2,1), {4,0x5,MAKE_HUFFMAN_PAIR(2,1)},
5,0x05,MAKE_HUFFMAN_PAIR(0,3), {5,0x05,MAKE_HUFFMAN_PAIR(0,3)},
5,0x06,MAKE_HUFFMAN_PAIR(4,1), {5,0x06,MAKE_HUFFMAN_PAIR(4,1)},
5,0x07,MAKE_HUFFMAN_PAIR(3,1), {5,0x07,MAKE_HUFFMAN_PAIR(3,1)},
6,0x04,MAKE_HUFFMAN_PAIR(7,1), {6,0x04,MAKE_HUFFMAN_PAIR(7,1)},
6,0x05,MAKE_HUFFMAN_PAIR(6,1), {6,0x05,MAKE_HUFFMAN_PAIR(6,1)},
6,0x06,MAKE_HUFFMAN_PAIR(1,2), {6,0x06,MAKE_HUFFMAN_PAIR(1,2)},
6,0x07,MAKE_HUFFMAN_PAIR(5,1), {6,0x07,MAKE_HUFFMAN_PAIR(5,1)},
7,0x04,MAKE_HUFFMAN_PAIR(2,2), {7,0x04,MAKE_HUFFMAN_PAIR(2,2)},
7,0x05,MAKE_HUFFMAN_PAIR(9,1), {7,0x05,MAKE_HUFFMAN_PAIR(9,1)},
7,0x06,MAKE_HUFFMAN_PAIR(0,4), {7,0x06,MAKE_HUFFMAN_PAIR(0,4)},
7,0x07,MAKE_HUFFMAN_PAIR(8,1), {7,0x07,MAKE_HUFFMAN_PAIR(8,1)},
8,0x20,MAKE_HUFFMAN_PAIR(13,1), {8,0x20,MAKE_HUFFMAN_PAIR(13,1)},
8,0x21,MAKE_HUFFMAN_PAIR(0,6), {8,0x21,MAKE_HUFFMAN_PAIR(0,6)},
8,0x22,MAKE_HUFFMAN_PAIR(12,1), {8,0x22,MAKE_HUFFMAN_PAIR(12,1)},
8,0x23,MAKE_HUFFMAN_PAIR(11,1), {8,0x23,MAKE_HUFFMAN_PAIR(11,1)},
8,0x24,MAKE_HUFFMAN_PAIR(3,2), {8,0x24,MAKE_HUFFMAN_PAIR(3,2)},
8,0x25,MAKE_HUFFMAN_PAIR(1,3), {8,0x25,MAKE_HUFFMAN_PAIR(1,3)},
8,0x26,MAKE_HUFFMAN_PAIR(0,5), {8,0x26,MAKE_HUFFMAN_PAIR(0,5)},
8,0x27,MAKE_HUFFMAN_PAIR(10,1), {8,0x27,MAKE_HUFFMAN_PAIR(10,1)},
10,0x008,MAKE_HUFFMAN_PAIR(16,1), {10,0x008,MAKE_HUFFMAN_PAIR(16,1)},
10,0x009,MAKE_HUFFMAN_PAIR(5,2), {10,0x009,MAKE_HUFFMAN_PAIR(5,2)},
10,0x00A,MAKE_HUFFMAN_PAIR(0,7), {10,0x00A,MAKE_HUFFMAN_PAIR(0,7)},
10,0x00B,MAKE_HUFFMAN_PAIR(2,3), {10,0x00B,MAKE_HUFFMAN_PAIR(2,3)},
10,0x00C,MAKE_HUFFMAN_PAIR(1,4), {10,0x00C,MAKE_HUFFMAN_PAIR(1,4)},
10,0x00D,MAKE_HUFFMAN_PAIR(15,1), {10,0x00D,MAKE_HUFFMAN_PAIR(15,1)},
10,0x00E,MAKE_HUFFMAN_PAIR(14,1), {10,0x00E,MAKE_HUFFMAN_PAIR(14,1)},
10,0x00F,MAKE_HUFFMAN_PAIR(4,2), {10,0x00F,MAKE_HUFFMAN_PAIR(4,2)},
12,0x010,MAKE_HUFFMAN_PAIR(0,11), {12,0x010,MAKE_HUFFMAN_PAIR(0,11)},
12,0x011,MAKE_HUFFMAN_PAIR(8,2), {12,0x011,MAKE_HUFFMAN_PAIR(8,2)},
12,0x012,MAKE_HUFFMAN_PAIR(4,3), {12,0x012,MAKE_HUFFMAN_PAIR(4,3)},
12,0x013,MAKE_HUFFMAN_PAIR(0,10), {12,0x013,MAKE_HUFFMAN_PAIR(0,10)},
12,0x014,MAKE_HUFFMAN_PAIR(2,4), {12,0x014,MAKE_HUFFMAN_PAIR(2,4)},
12,0x015,MAKE_HUFFMAN_PAIR(7,2), {12,0x015,MAKE_HUFFMAN_PAIR(7,2)},
12,0x016,MAKE_HUFFMAN_PAIR(21,1), {12,0x016,MAKE_HUFFMAN_PAIR(21,1)},
12,0x017,MAKE_HUFFMAN_PAIR(20,1), {12,0x017,MAKE_HUFFMAN_PAIR(20,1)},
12,0x018,MAKE_HUFFMAN_PAIR(0,9), {12,0x018,MAKE_HUFFMAN_PAIR(0,9)},
12,0x019,MAKE_HUFFMAN_PAIR(19,1), {12,0x019,MAKE_HUFFMAN_PAIR(19,1)},
12,0x01A,MAKE_HUFFMAN_PAIR(18,1), {12,0x01A,MAKE_HUFFMAN_PAIR(18,1)},
12,0x01B,MAKE_HUFFMAN_PAIR(1,5), {12,0x01B,MAKE_HUFFMAN_PAIR(1,5)},
12,0x01C,MAKE_HUFFMAN_PAIR(3,3), {12,0x01C,MAKE_HUFFMAN_PAIR(3,3)},
12,0x01D,MAKE_HUFFMAN_PAIR(0,8), {12,0x01D,MAKE_HUFFMAN_PAIR(0,8)},
12,0x01E,MAKE_HUFFMAN_PAIR(6,2), {12,0x01E,MAKE_HUFFMAN_PAIR(6,2)},
12,0x01F,MAKE_HUFFMAN_PAIR(17,1), {12,0x01F,MAKE_HUFFMAN_PAIR(17,1)},
13,0x0010,MAKE_HUFFMAN_PAIR(10,2), {13,0x0010,MAKE_HUFFMAN_PAIR(10,2)},
13,0x0011,MAKE_HUFFMAN_PAIR(9,2), {13,0x0011,MAKE_HUFFMAN_PAIR(9,2)},
13,0x0012,MAKE_HUFFMAN_PAIR(5,3), {13,0x0012,MAKE_HUFFMAN_PAIR(5,3)},
13,0x0013,MAKE_HUFFMAN_PAIR(3,4), {13,0x0013,MAKE_HUFFMAN_PAIR(3,4)},
13,0x0014,MAKE_HUFFMAN_PAIR(2,5), {13,0x0014,MAKE_HUFFMAN_PAIR(2,5)},
13,0x0015,MAKE_HUFFMAN_PAIR(1,7), {13,0x0015,MAKE_HUFFMAN_PAIR(1,7)},
13,0x0016,MAKE_HUFFMAN_PAIR(1,6), {13,0x0016,MAKE_HUFFMAN_PAIR(1,6)},
13,0x0017,MAKE_HUFFMAN_PAIR(0,15), {13,0x0017,MAKE_HUFFMAN_PAIR(0,15)},
13,0x0018,MAKE_HUFFMAN_PAIR(0,14), {13,0x0018,MAKE_HUFFMAN_PAIR(0,14)},
13,0x0019,MAKE_HUFFMAN_PAIR(0,13), {13,0x0019,MAKE_HUFFMAN_PAIR(0,13)},
13,0x001A,MAKE_HUFFMAN_PAIR(0,12), {13,0x001A,MAKE_HUFFMAN_PAIR(0,12)},
13,0x001B,MAKE_HUFFMAN_PAIR(26,1), {13,0x001B,MAKE_HUFFMAN_PAIR(26,1)},
13,0x001C,MAKE_HUFFMAN_PAIR(25,1), {13,0x001C,MAKE_HUFFMAN_PAIR(25,1)},
13,0x001D,MAKE_HUFFMAN_PAIR(24,1), {13,0x001D,MAKE_HUFFMAN_PAIR(24,1)},
13,0x001E,MAKE_HUFFMAN_PAIR(23,1), {13,0x001E,MAKE_HUFFMAN_PAIR(23,1)},
13,0x001F,MAKE_HUFFMAN_PAIR(22,1), {13,0x001F,MAKE_HUFFMAN_PAIR(22,1)},
14,0x0010,MAKE_HUFFMAN_PAIR(0,31), {14,0x0010,MAKE_HUFFMAN_PAIR(0,31)},
14,0x0011,MAKE_HUFFMAN_PAIR(0,30), {14,0x0011,MAKE_HUFFMAN_PAIR(0,30)},
14,0x0012,MAKE_HUFFMAN_PAIR(0,29), {14,0x0012,MAKE_HUFFMAN_PAIR(0,29)},
14,0x0013,MAKE_HUFFMAN_PAIR(0,28), {14,0x0013,MAKE_HUFFMAN_PAIR(0,28)},
14,0x0014,MAKE_HUFFMAN_PAIR(0,27), {14,0x0014,MAKE_HUFFMAN_PAIR(0,27)},
14,0x0015,MAKE_HUFFMAN_PAIR(0,26), {14,0x0015,MAKE_HUFFMAN_PAIR(0,26)},
14,0x0016,MAKE_HUFFMAN_PAIR(0,25), {14,0x0016,MAKE_HUFFMAN_PAIR(0,25)},
14,0x0017,MAKE_HUFFMAN_PAIR(0,24), {14,0x0017,MAKE_HUFFMAN_PAIR(0,24)},
14,0x0018,MAKE_HUFFMAN_PAIR(0,23), {14,0x0018,MAKE_HUFFMAN_PAIR(0,23)},
14,0x0019,MAKE_HUFFMAN_PAIR(0,22), {14,0x0019,MAKE_HUFFMAN_PAIR(0,22)},
14,0x001A,MAKE_HUFFMAN_PAIR(0,21), {14,0x001A,MAKE_HUFFMAN_PAIR(0,21)},
14,0x001B,MAKE_HUFFMAN_PAIR(0,20), {14,0x001B,MAKE_HUFFMAN_PAIR(0,20)},
14,0x001C,MAKE_HUFFMAN_PAIR(0,19), {14,0x001C,MAKE_HUFFMAN_PAIR(0,19)},
14,0x001D,MAKE_HUFFMAN_PAIR(0,18), {14,0x001D,MAKE_HUFFMAN_PAIR(0,18)},
14,0x001E,MAKE_HUFFMAN_PAIR(0,17), {14,0x001E,MAKE_HUFFMAN_PAIR(0,17)},
14,0x001F,MAKE_HUFFMAN_PAIR(0,16), {14,0x001F,MAKE_HUFFMAN_PAIR(0,16)},
15,0x0010,MAKE_HUFFMAN_PAIR(0,40), {15,0x0010,MAKE_HUFFMAN_PAIR(0,40)},
15,0x0011,MAKE_HUFFMAN_PAIR(0,39), {15,0x0011,MAKE_HUFFMAN_PAIR(0,39)},
15,0x0012,MAKE_HUFFMAN_PAIR(0,38), {15,0x0012,MAKE_HUFFMAN_PAIR(0,38)},
15,0x0013,MAKE_HUFFMAN_PAIR(0,37), {15,0x0013,MAKE_HUFFMAN_PAIR(0,37)},
15,0x0014,MAKE_HUFFMAN_PAIR(0,36), {15,0x0014,MAKE_HUFFMAN_PAIR(0,36)},
15,0x0015,MAKE_HUFFMAN_PAIR(0,35), {15,0x0015,MAKE_HUFFMAN_PAIR(0,35)},
15,0x0016,MAKE_HUFFMAN_PAIR(0,34), {15,0x0016,MAKE_HUFFMAN_PAIR(0,34)},
15,0x0017,MAKE_HUFFMAN_PAIR(0,33), {15,0x0017,MAKE_HUFFMAN_PAIR(0,33)},
15,0x0018,MAKE_HUFFMAN_PAIR(0,32), {15,0x0018,MAKE_HUFFMAN_PAIR(0,32)},
15,0x0019,MAKE_HUFFMAN_PAIR(1,14), {15,0x0019,MAKE_HUFFMAN_PAIR(1,14)},
15,0x001A,MAKE_HUFFMAN_PAIR(1,13), {15,0x001A,MAKE_HUFFMAN_PAIR(1,13)},
15,0x001B,MAKE_HUFFMAN_PAIR(1,12), {15,0x001B,MAKE_HUFFMAN_PAIR(1,12)},
15,0x001C,MAKE_HUFFMAN_PAIR(1,11), {15,0x001C,MAKE_HUFFMAN_PAIR(1,11)},
15,0x001D,MAKE_HUFFMAN_PAIR(1,10), {15,0x001D,MAKE_HUFFMAN_PAIR(1,10)},
15,0x001E,MAKE_HUFFMAN_PAIR(1,9), {15,0x001E,MAKE_HUFFMAN_PAIR(1,9)},
15,0x001F,MAKE_HUFFMAN_PAIR(1,8), {15,0x001F,MAKE_HUFFMAN_PAIR(1,8)},
16,0x0010,MAKE_HUFFMAN_PAIR(1,18), {16,0x0010,MAKE_HUFFMAN_PAIR(1,18)},
16,0x0011,MAKE_HUFFMAN_PAIR(1,17), {16,0x0011,MAKE_HUFFMAN_PAIR(1,17)},
16,0x0012,MAKE_HUFFMAN_PAIR(1,16), {16,0x0012,MAKE_HUFFMAN_PAIR(1,16)},
16,0x0013,MAKE_HUFFMAN_PAIR(1,15), {16,0x0013,MAKE_HUFFMAN_PAIR(1,15)},
16,0x0014,MAKE_HUFFMAN_PAIR(6,3), {16,0x0014,MAKE_HUFFMAN_PAIR(6,3)},
16,0x0015,MAKE_HUFFMAN_PAIR(16,2), {16,0x0015,MAKE_HUFFMAN_PAIR(16,2)},
16,0x0016,MAKE_HUFFMAN_PAIR(15,2), {16,0x0016,MAKE_HUFFMAN_PAIR(15,2)},
16,0x0017,MAKE_HUFFMAN_PAIR(14,2), {16,0x0017,MAKE_HUFFMAN_PAIR(14,2)},
16,0x0018,MAKE_HUFFMAN_PAIR(13,2), {16,0x0018,MAKE_HUFFMAN_PAIR(13,2)},
16,0x0019,MAKE_HUFFMAN_PAIR(12,2), {16,0x0019,MAKE_HUFFMAN_PAIR(12,2)},
16,0x001A,MAKE_HUFFMAN_PAIR(11,2), {16,0x001A,MAKE_HUFFMAN_PAIR(11,2)},
16,0x001B,MAKE_HUFFMAN_PAIR(31,1), {16,0x001B,MAKE_HUFFMAN_PAIR(31,1)},
16,0x001C,MAKE_HUFFMAN_PAIR(30,1), {16,0x001C,MAKE_HUFFMAN_PAIR(30,1)},
16,0x001D,MAKE_HUFFMAN_PAIR(29,1), {16,0x001D,MAKE_HUFFMAN_PAIR(29,1)},
16,0x001E,MAKE_HUFFMAN_PAIR(28,1), {16,0x001E,MAKE_HUFFMAN_PAIR(28,1)},
16,0x001F,MAKE_HUFFMAN_PAIR(27,1), {16,0x001F,MAKE_HUFFMAN_PAIR(27,1)},
}; };
#undef MAKE_HUFFMAN_PAIR #undef MAKE_HUFFMAN_PAIR
@ -209,33 +210,38 @@ static void init_dct_data(void)
} }
static void flush_bits(vid_encoder_state_t *state) static bool flush_bits(vid_encoder_state_t *state)
{ {
if(state->bits_left < 16) { if(state->bits_left < 16) {
assert(state->bytes_used < sizeof(state->unmuxed)); state->frame_output[state->bytes_used++] = (uint8_t)state->bits_value;
state->unmuxed[state->bytes_used++] = (uint8_t)state->bits_value; if (state->bytes_used >= state->frame_max_size) {
assert(state->bytes_used < sizeof(state->unmuxed)); return false;
assert(state->bytes_used < 2016*state->frame_block_count); }
state->unmuxed[state->bytes_used++] = (uint8_t)(state->bits_value>>8); state->frame_output[state->bytes_used++] = (uint8_t)(state->bits_value>>8);
} }
state->bits_left = 16; state->bits_left = 16;
state->bits_value = 0; state->bits_value = 0;
return true;
} }
static void encode_bits(vid_encoder_state_t *state, int bits, uint32_t val) static bool encode_bits(vid_encoder_state_t *state, int bits, uint32_t val)
{ {
assert(val < (1<<bits)); assert(val < (1<<bits));
// FIXME: for some reason the main logic breaks when bits > 16 // FIXME: for some reason the main logic breaks when bits > 16
// and I have no idea why, so I have to split this up --GM // and I have no idea why, so I have to split this up --GM
if (bits > 16) { if (bits > 16) {
encode_bits(state, bits-16, val>>16); if (!encode_bits(state, bits-16, val>>16)) {
return false;
}
bits = 16; bits = 16;
val &= 0xFFFF; val &= 0xFFFF;
} }
if (state->bits_left == 0) { if (state->bits_left == 0) {
flush_bits(state); if (!flush_bits(state)) {
return false;
}
} }
while (bits > state->bits_left) { while (bits > state->bits_left) {
@ -243,7 +249,7 @@ static void encode_bits(vid_encoder_state_t *state, int bits, uint32_t val)
uint32_t outval = val; uint32_t outval = val;
outval >>= bits - state->bits_left; outval >>= bits - state->bits_left;
assert(outval < (1<<16)); assert(outval < (1<<16));
uint16_t old_value = state->bits_value; //uint16_t old_value = state->bits_value;
assert((state->bits_value & outval) == 0); assert((state->bits_value & outval) == 0);
state->bits_value |= (uint16_t)outval; state->bits_value |= (uint16_t)outval;
//fprintf(stderr, "trunc %2d %2d %08X %04X %04X\n", bits, state->bits_left, val, old_value, state->bits_value); //fprintf(stderr, "trunc %2d %2d %08X %04X %04X\n", bits, state->bits_left, val, old_value, state->bits_value);
@ -252,7 +258,9 @@ static void encode_bits(vid_encoder_state_t *state, int bits, uint32_t val)
val &= mask; val &= mask;
assert(mask >= 1); assert(mask >= 1);
assert(val < (1<<bits)); assert(val < (1<<bits));
flush_bits(state); if (!flush_bits(state)) {
return false;
}
} }
if (bits >= 1) { if (bits >= 1) {
@ -261,87 +269,82 @@ static void encode_bits(vid_encoder_state_t *state, int bits, uint32_t val)
uint32_t outval = val; uint32_t outval = val;
outval <<= state->bits_left - bits; outval <<= state->bits_left - bits;
assert(outval < (1<<16)); assert(outval < (1<<16));
uint16_t old_value = state->bits_value; //uint16_t old_value = state->bits_value;
assert((state->bits_value & outval) == 0); assert((state->bits_value & outval) == 0);
state->bits_value |= (uint16_t)outval; state->bits_value |= (uint16_t)outval;
//fprintf(stderr, "plop %2d %2d %08X %04X %04X\n", bits, state->bits_left, val, state->bits_value); //fprintf(stderr, "plop %2d %2d %08X %04X %04X\n", bits, state->bits_left, val, state->bits_value);
state->bits_left -= bits; state->bits_left -= bits;
} }
return true;
} }
static void encode_ac_value(vid_encoder_state_t *state, uint16_t value) static bool encode_ac_value(vid_encoder_state_t *state, uint16_t value)
{ {
assert(0 <= value && value <= 0xFFFF); assert(0 <= value && value <= 0xFFFF);
#if 0 #if 0
for(int i = 0; i < sizeof(huffman_lookup)/sizeof(huffman_lookup[0]); i++) { for(int i = 0; i < sizeof(huffman_lookup)/sizeof(huffman_lookup[0]); i++) {
if(value == huffman_lookup[i].u_hword_pos) { if(value == huffman_lookup[i].u_hword_pos) {
encode_bits(state, huffman_lookup[i].c_bits+1, (((uint32_t)huffman_lookup[i].c_value)<<1)|0); return encode_bits(state, huffman_lookup[i].c_bits+1, (((uint32_t)huffman_lookup[i].c_value)<<1)|0);
return;
} }
else if(value == huffman_lookup[i].u_hword_neg) { else if(value == huffman_lookup[i].u_hword_neg) {
encode_bits(state, huffman_lookup[i].c_bits+1, (((uint32_t)huffman_lookup[i].c_value)<<1)|1); return encode_bits(state, huffman_lookup[i].c_bits+1, (((uint32_t)huffman_lookup[i].c_value)<<1)|1);
return;
} }
} }
// Use an escape // Use an escape
encode_bits(state, 6+16, (0x01<<16)|(0xFFFF&(uint32_t)value)); return encode_bits(state, 6+16, (0x01<<16)|(0xFFFF&(uint32_t)value));
#else #else
uint32_t outword = huffman_encoding_map[value]; uint32_t outword = huffman_encoding_map[value];
encode_bits(state, outword>>24, outword&0xFFFFFF); return encode_bits(state, outword>>24, outword&0xFFFFFF);
#endif #endif
} }
static void transform_dct_block(vid_encoder_state_t *state, int32_t *block) static void transform_dct_block(vid_encoder_state_t *state, float *block)
{ {
// Apply DCT to block // Apply DCT to block
int32_t midblock[8*8]; float midblock[8*8];
for (int reps = 0; reps < 2; reps++) { for (int i = 0; i < 8; i++) {
for (int i = 0; i < 8; i++) { for (int j = 0; j < 8; j++) {
for (int j = 0; j < 8; j++) { float v = 0.0f;
int32_t v = 0; for(int k = 0; k < 8; k++) {
for(int k = 0; k < 8; k++) { v += block[8*j+k] * (float)dct_scale_table[8*i+k] / (float)(1 << 16);
v += block[8*j+k]*dct_scale_table[8*i+k];
}
midblock[8*i+j] = (v + (1<<((14)-1)))>>(14);
} }
} midblock[8*i+j] = v;
memcpy(block, midblock, sizeof(midblock)); }
}
for (int i = 0; i < 8; i++) {
for (int j = 0; j < 8; j++) {
float v = 0.0f;
for(int k = 0; k < 8; k++) {
v += midblock[8*j+k] * (float)dct_scale_table[8*i+k] / (float)(1 << 16);
}
block[8*i+j] = v;
} }
// FIXME: Work out why the math has to go this way
block[0] /= 8;
for (int i = 0; i < 64; i++) {
// Finish reducing it
block[i] /= 4;
// If it's below the quantisation threshold, zero it
if(abs(block[i]) < quant_dec[i]) {
block[i] = 0;
}
} }
} }
static void encode_dct_block(vid_encoder_state_t *state, int32_t *block) static bool encode_dct_block(vid_encoder_state_t *state, float *block)
{ {
int dc_value = 0; int16_t coeffs[64];
float scale = 8.0f / (float)state->quant_scale;
for (int i = 0; i < 64; i++) { for (int i = 0; i < 64; i++) {
// Quantise it // The DC coefficient is not affected by the quantization scale.
block[i] = (block[i])/quant_dec[i]; float x = block[i];
if (i) { x *= scale; }
// Clamp it int v = (int)roundf(x / (float)quant_dec[i]);
if (block[i] < -0x200) { block[i] = -0x200; } if (v < -0x200) { v = -0x200; }
if (block[i] > +0x1FF) { block[i] = +0x1FF; } if (v > +0x1FF) { v = +0x1FF; }
coeffs[i] = v;
} }
// Get DC value if (!encode_bits(state, 10, coeffs[0]&0x3FF)) {
dc_value = block[0]; return false;
//dc_value = 0; }
encode_bits(state, 10, dc_value&0x3FF);
// Build RLE output // Build RLE output
uint16_t zero_rle_data[8*8]; uint16_t zero_rle_data[8*8];
@ -349,10 +352,10 @@ static void encode_dct_block(vid_encoder_state_t *state, int32_t *block)
for (int i = 1, zeroes = 0; i < 64; i++) { for (int i = 1, zeroes = 0; i < 64; i++) {
int ri = dct_zagzig_table[i]; int ri = dct_zagzig_table[i];
//int ri = dct_zigzag_table[i]; //int ri = dct_zigzag_table[i];
if (block[ri] == 0) { if (coeffs[ri] == 0) {
zeroes++; zeroes++;
} else { } else {
zero_rle_data[zero_rle_words++] = (zeroes<<10)|(block[ri]&0x3FF); zero_rle_data[zero_rle_words++] = (zeroes<<10)|(coeffs[ri]&0x3FF);
zeroes = 0; zeroes = 0;
state->uncomp_hwords_used += 1; state->uncomp_hwords_used += 1;
} }
@ -360,19 +363,24 @@ static void encode_dct_block(vid_encoder_state_t *state, int32_t *block)
// Now Huffman-code the data // Now Huffman-code the data
for (int i = 0; i < zero_rle_words; i++) { for (int i = 0; i < zero_rle_words; i++) {
encode_ac_value(state, zero_rle_data[i]); if (!encode_ac_value(state, zero_rle_data[i])) {
return false;
}
} }
//fprintf(stderr, "dc %08X rles %2d\n", dc_value, zero_rle_words); //fprintf(stderr, "dc %08X rles %2d\n", coeffs[0], zero_rle_words);
//assert(dc_value >= -0x200); assert(dc_value < +0x200); //assert(coeffs[0] >= -0x200); assert(coeffs[0] < +0x200);
// Store end of block // Store end of block
encode_bits(state, 2, 0x2); if (!encode_bits(state, 2, 0x2)) {
return false;
}
state->uncomp_hwords_used += 2; state->uncomp_hwords_used += 2;
//state->uncomp_hwords_used = (state->uncomp_hwords_used+0xF)&~0xF;
state->uncomp_hwords_used = (state->uncomp_hwords_used+0xF)&~0xF; return true;
} }
#if 0
static int reduce_dct_block(vid_encoder_state_t *state, int32_t *block, int32_t min_val, int *values_to_shed) static int reduce_dct_block(vid_encoder_state_t *state, int32_t *block, int32_t min_val, int *values_to_shed)
{ {
// Reduce so it can all fit // Reduce so it can all fit
@ -394,48 +402,44 @@ static int reduce_dct_block(vid_encoder_state_t *state, int32_t *block, int32_t
// Factor in DC + EOF values // Factor in DC + EOF values
return nonzeroes+2; return nonzeroes+2;
} }
#endif
static void encode_frame_str(uint8_t *video_frames, int video_frame_count, uint8_t *output, settings_t *settings) void encode_frame_bs(uint8_t *video_frame, settings_t *settings)
{ {
int pitch = settings->video_width*4; int pitch = settings->video_width;
int real_index = (settings->state_vid.frame_index-1); /*int real_index = (settings->state_vid.frame_index-1);
if (real_index > video_frame_count-1) { if (real_index > video_frame_count-1) {
real_index = video_frame_count-1; real_index = video_frame_count-1;
} }
//uint8_t *video_frame = video_frames + settings->video_width*settings->video_height*4*real_index; uint8_t *y_plane = video_frames + settings->video_width*settings->video_height*3/2*real_index;*/
uint8_t *video_frame = video_frames; uint8_t *y_plane = video_frame;
uint8_t *c_plane = y_plane + (settings->video_width*settings->video_height);
if (!dct_done_init) { if (!dct_done_init) {
init_dct_data(); init_dct_data();
dct_done_init = true; dct_done_init = true;
} }
int dct_block_count_x = (settings->video_width+15)/16;
int dct_block_count_y = (settings->video_height+15)/16;
if (settings->state_vid.dct_block_lists[0] == NULL) { if (settings->state_vid.dct_block_lists[0] == NULL) {
int dct_block_count_x = (settings->video_width+15)/16; int dct_block_size = dct_block_count_x*dct_block_count_y*sizeof(float)*8*8;
int dct_block_count_y = (settings->video_height+15)/16;
int dct_block_size = dct_block_count_x*dct_block_count_y*sizeof(int32_t)*8*8;
for (int i = 0; i < 6; i++) { for (int i = 0; i < 6; i++) {
settings->state_vid.dct_block_lists[i] = malloc(dct_block_size); settings->state_vid.dct_block_lists[i] = malloc(dct_block_size);
} }
} }
memset(settings->state_vid.unmuxed, 0, sizeof(settings->state_vid.unmuxed));
settings->state_vid.quant_scale = 1;
settings->state_vid.uncomp_hwords_used = 0;
settings->state_vid.bytes_used = 8;
settings->state_vid.blocks_used = 0;
// TODO: non-16x16-aligned videos // TODO: non-16x16-aligned videos
assert((settings->video_width % 16) == 0); assert((settings->video_width % 16) == 0);
assert((settings->video_height % 16) == 0); assert((settings->video_height % 16) == 0);
// Do the initial transform // Rearrange the Y/C planes returned by libswscale into macroblocks.
for(int fx = 0; fx < settings->video_width; fx += 16) { for(int fx = 0; fx < dct_block_count_x; fx++) {
for(int fy = 0; fy < settings->video_height; fy += 16) { for(int fy = 0; fy < dct_block_count_y; fy++) {
// Order: Cr Cb [Y1|Y2\nY3|Y4] // Order: Cr Cb [Y1|Y2\nY3|Y4]
int block_offs = 8*8*((fy>>4)*((settings->video_width+15)/16)+(fx>>4)); int block_offs = 64 * (fy*dct_block_count_x + fx);
int32_t *blocks[6] = { float *blocks[6] = {
settings->state_vid.dct_block_lists[0] + block_offs, settings->state_vid.dct_block_lists[0] + block_offs,
settings->state_vid.dct_block_lists[1] + block_offs, settings->state_vid.dct_block_lists[1] + block_offs,
settings->state_vid.dct_block_lists[2] + block_offs, settings->state_vid.dct_block_lists[2] + block_offs,
@ -446,66 +450,51 @@ static void encode_frame_str(uint8_t *video_frames, int video_frame_count, uint8
for(int y = 0; y < 8; y++) { for(int y = 0; y < 8; y++) {
for(int x = 0; x < 8; x++) { for(int x = 0; x < 8; x++) {
int k = y*8+x; int k = y*8 + x;
int cx = fx*8 + x;
int cy = fy*8 + y;
int lx = fx*16 + x;
int ly = fy*16 + y;
int cr = 0; blocks[0][k] = (float)c_plane[pitch*cy + 2*cx + 0] - 128.0f;
int cg = 0; blocks[1][k] = (float)c_plane[pitch*cy + 2*cx + 1] - 128.0f;
int cb = 0; blocks[2][k] = (float)y_plane[pitch*(ly+0) + (lx+0)] - 128.0f;
for(int cy = 0; cy < 2; cy++) { blocks[3][k] = (float)y_plane[pitch*(ly+0) + (lx+8)] - 128.0f;
for(int cx = 0; cx < 2; cx++) { blocks[4][k] = (float)y_plane[pitch*(ly+8) + (lx+0)] - 128.0f;
int coffs = pitch*(fy+y*2+cy) + 4*(fx+x*2+cx); blocks[5][k] = (float)y_plane[pitch*(ly+8) + (lx+8)] - 128.0f;
cr += video_frame[coffs+0];
cg += video_frame[coffs+1];
cb += video_frame[coffs+2];
}
}
// TODO: Get the real math for this
int cluma = cr+cg*2+cb;
#if 1
blocks[0][k] = ((cr<<2) - cluma + (1<<(4-1)))>>4;
blocks[1][k] = ((cb<<2) - cluma + (1<<(4-1)))>>4;
#else
blocks[0][k] = 0;
blocks[1][k] = 0;
#endif
for(int ly = 0; ly < 2; ly++) {
for(int lx = 0; lx < 2; lx++) {
int loffs = pitch*(fy+ly*8+y) + 4*(fx+lx*8+x);
int lr = video_frame[loffs+0];
int lg = video_frame[loffs+1];
int lb = video_frame[loffs+2];
// TODO: Get the real math for this
int lluma = (lr+lg*2+lb+2)-0x200;
if(lluma < -0x200) { lluma = -0x200; }
if(lluma > +0x1FF) { lluma = +0x1FF; }
lluma >>= 1;
blocks[2+2*ly+lx][k] = lluma;
}
}
} }
} }
for(int i = 0; i < 6; i++) { for(int i = 0; i < 6; i++) {
transform_dct_block(&(settings->state_vid), blocks[i]); transform_dct_block(&(settings->state_vid), blocks[i]);
} }
} }
} }
// Now reduce all the blocks // Attempt encoding the frame at the maximum quality. If the result is too
// TODO: Base this on actual bit count // large, increase the quantization scale and try again.
//const int accum_threshold = 6500; // TODO: if a frame encoded at scale N is too large but the same frame
const int accum_threshold = 1025*settings->state_vid.frame_block_count; // encoded at scale N-1 leaves a significant amount of free space, attempt
//const int accum_threshold = 900*settings->state_vid.frame_block_count; // compressing at scale N but optimizing coefficients away until it fits
int values_to_shed = 0; // (like the old algorithm did)
for(int min_val = 0;; min_val += 1) { for (
int accum = 0; settings->state_vid.quant_scale = 1;
for(int fx = 0; fx < settings->video_width; fx += 16) { settings->state_vid.quant_scale < 64;
for(int fy = 0; fy < settings->video_height; fy += 16) { settings->state_vid.quant_scale++
) {
memset(settings->state_vid.frame_output, 0, settings->state_vid.frame_max_size);
settings->state_vid.bits_value = 0;
settings->state_vid.bits_left = 16;
settings->state_vid.uncomp_hwords_used = 0;
settings->state_vid.bytes_used = 8;
bool ok = true;
for(int fx = 0; ok && (fx < dct_block_count_x); fx++) {
for(int fy = 0; ok && (fy < dct_block_count_y); fy++) {
// Order: Cr Cb [Y1|Y2\nY3|Y4] // Order: Cr Cb [Y1|Y2\nY3|Y4]
int block_offs = 8*8*((fy>>4)*((settings->video_width+15)/16)+(fx>>4)); int block_offs = 64 * (fy*dct_block_count_x + fx);
int32_t *blocks[6] = { float *blocks[6] = {
settings->state_vid.dct_block_lists[0] + block_offs, settings->state_vid.dct_block_lists[0] + block_offs,
settings->state_vid.dct_block_lists[1] + block_offs, settings->state_vid.dct_block_lists[1] + block_offs,
settings->state_vid.dct_block_lists[2] + block_offs, settings->state_vid.dct_block_lists[2] + block_offs,
@ -513,132 +502,125 @@ static void encode_frame_str(uint8_t *video_frames, int video_frame_count, uint8
settings->state_vid.dct_block_lists[4] + block_offs, settings->state_vid.dct_block_lists[4] + block_offs,
settings->state_vid.dct_block_lists[5] + block_offs, settings->state_vid.dct_block_lists[5] + block_offs,
}; };
const int luma_reduce_mul = 8;
const int chroma_reduce_mul = 8; for(int i = 0; ok && (i < 6); i++) {
for(int i = 6-1; i >= 0; i--) { ok = encode_dct_block(&(settings->state_vid), blocks[i]);
accum += reduce_dct_block(&(settings->state_vid), blocks[i], (i < 2 ? min_val*luma_reduce_mul+1 : min_val*chroma_reduce_mul+1), &values_to_shed);
} }
} }
} }
if(accum <= accum_threshold) { if (!ok) { continue; }
break; if (!encode_bits(&(settings->state_vid), 10, 0x1FF)) { continue; }
} if (!encode_bits(&(settings->state_vid), 2, 0x2)) { continue; }
if (!flush_bits(&(settings->state_vid))) { continue; }
values_to_shed = accum - accum_threshold; settings->state_vid.uncomp_hwords_used += 2;
settings->state_vid.quant_scale_sum += settings->state_vid.quant_scale;
break;
} }
assert(settings->state_vid.quant_scale < 64);
// Now encode all the blocks // MDEC DMA is usually configured to transfer data in 32-word chunks.
for(int fx = 0; fx < settings->video_width; fx += 16) { settings->state_vid.uncomp_hwords_used = (settings->state_vid.uncomp_hwords_used+0x3F)&~0x3F;
for(int fy = 0; fy < settings->video_height; fy += 16) {
// Order: Cr Cb [Y1|Y2\nY3|Y4]
int block_offs = 8*8*((fy>>4)*((settings->video_width+15)/16)+(fx>>4));
int32_t *blocks[6] = {
settings->state_vid.dct_block_lists[0] + block_offs,
settings->state_vid.dct_block_lists[1] + block_offs,
settings->state_vid.dct_block_lists[2] + block_offs,
settings->state_vid.dct_block_lists[3] + block_offs,
settings->state_vid.dct_block_lists[4] + block_offs,
settings->state_vid.dct_block_lists[5] + block_offs,
};
for(int i = 0; i < 6; i++) {
encode_dct_block(&(settings->state_vid), blocks[i]);
}
}
}
encode_bits(&(settings->state_vid), 10, 0x1FF); // This is not the number of 32-byte blocks required for uncompressed data
encode_bits(&(settings->state_vid), 2, 0x2); // as jPSXdec docs say, but rather the number of 32-*bit* words required.
settings->state_vid.uncomp_hwords_used += 2; // The first 4 bytes of the frame header are in fact the MDEC command to
settings->state_vid.uncomp_hwords_used = (settings->state_vid.uncomp_hwords_used+0xF)&~0xF; // start decoding, which contains the data length in words in the lower 16
// bits.
flush_bits(&(settings->state_vid)); settings->state_vid.blocks_used = (settings->state_vid.uncomp_hwords_used+1)>>1;
settings->state_vid.blocks_used = ((settings->state_vid.uncomp_hwords_used+0xF)&~0xF)>>4;
// We need a multiple of 4 // We need a multiple of 4
settings->state_vid.bytes_used = (settings->state_vid.bytes_used+0x3)&~0x3; settings->state_vid.bytes_used = (settings->state_vid.bytes_used+0x3)&~0x3;
// Build the demuxed header // MDEC command (size of decompressed MDEC data)
settings->state_vid.unmuxed[0x000] = (uint8_t)settings->state_vid.blocks_used; settings->state_vid.frame_output[0x000] = (uint8_t)settings->state_vid.blocks_used;
settings->state_vid.unmuxed[0x001] = (uint8_t)(settings->state_vid.blocks_used>>8); settings->state_vid.frame_output[0x001] = (uint8_t)(settings->state_vid.blocks_used>>8);
settings->state_vid.unmuxed[0x002] = (uint8_t)0x00; settings->state_vid.frame_output[0x002] = (uint8_t)0x00;
settings->state_vid.unmuxed[0x003] = (uint8_t)0x38; settings->state_vid.frame_output[0x003] = (uint8_t)0x38;
settings->state_vid.unmuxed[0x004] = (uint8_t)settings->state_vid.quant_scale;
settings->state_vid.unmuxed[0x005] = (uint8_t)(settings->state_vid.quant_scale>>8); // Quantization scale
settings->state_vid.unmuxed[0x006] = 0x02; // Version 2 settings->state_vid.frame_output[0x004] = (uint8_t)settings->state_vid.quant_scale;
settings->state_vid.unmuxed[0x007] = 0x00; settings->state_vid.frame_output[0x005] = (uint8_t)(settings->state_vid.quant_scale>>8);
// BS version
settings->state_vid.frame_output[0x006] = 0x02;
settings->state_vid.frame_output[0x007] = 0x00;
retire_av_data(settings, 0, 1); retire_av_data(settings, 0, 1);
} }
void encode_block_str(uint8_t *video_frames, int video_frame_count, uint8_t *output, settings_t *settings) void encode_sector_str(uint8_t *video_frames, uint8_t *output, settings_t *settings)
{ {
uint8_t header[32]; uint8_t header[32];
memset(header, 0, sizeof(header)); memset(header, 0, sizeof(header));
for(int i = 0; i < 7; i++) { while(settings->state_vid.frame_data_offset >= settings->state_vid.frame_max_size) {
while(settings->state_vid.frame_block_index >= settings->state_vid.frame_block_count) { settings->state_vid.frame_index++;
settings->state_vid.frame_index++; // TODO: work out an optimal block count for this
// TODO: work out an optimal block count for this // TODO: calculate this all based on FPS
// TODO: calculate this all based on FPS settings->state_vid.frame_block_overflow_num += settings->state_vid.frame_block_base_overflow;
settings->state_vid.frame_block_overflow_num += settings->state_vid.frame_block_base_overflow; settings->state_vid.frame_max_size = settings->state_vid.frame_block_overflow_num / settings->state_vid.frame_block_overflow_den * 2016;
settings->state_vid.frame_block_count = settings->state_vid.frame_block_overflow_num / settings->state_vid.frame_block_overflow_den; settings->state_vid.frame_block_overflow_num %= settings->state_vid.frame_block_overflow_den;
settings->state_vid.frame_block_overflow_num %= settings->state_vid.frame_block_overflow_den; settings->state_vid.frame_data_offset = 0;
settings->state_vid.frame_block_index = 0; encode_frame_bs(video_frames, settings);
encode_frame_str(video_frames, video_frame_count, output, settings);
}
// Header: MDEC0 register
header[0x000] = 0x60;
header[0x001] = 0x01;
header[0x002] = 0x01;
header[0x003] = 0x80;
// Muxed chunk index/count
int chunk_index = settings->state_vid.frame_block_index;
int chunk_count = settings->state_vid.frame_block_count;
header[0x004] = (uint8_t)chunk_index;
header[0x005] = (uint8_t)(chunk_index>>8);
header[0x006] = (uint8_t)chunk_count;
header[0x007] = (uint8_t)(chunk_count>>8);
// Frame index
header[0x008] = (uint8_t)settings->state_vid.frame_index;
header[0x009] = (uint8_t)(settings->state_vid.frame_index>>8);
header[0x00A] = (uint8_t)(settings->state_vid.frame_index>>16);
header[0x00B] = (uint8_t)(settings->state_vid.frame_index>>24);
// Video frame size
header[0x010] = (uint8_t)settings->video_width;
header[0x011] = (uint8_t)(settings->video_width>>8);
header[0x012] = (uint8_t)settings->video_height;
header[0x013] = (uint8_t)(settings->video_height>>8);
// 32-byte blocks required for MDEC data
header[0x014] = (uint8_t)settings->state_vid.blocks_used;
header[0x015] = (uint8_t)(settings->state_vid.blocks_used>>8);
// Some weird thing
header[0x016] = 0x00;
header[0x017] = 0x38;
// Quantization scale
header[0x018] = (uint8_t)settings->state_vid.quant_scale;
header[0x019] = (uint8_t)(settings->state_vid.quant_scale>>8);
// Version
header[0x01A] = 0x02; // Version 2
header[0x01B] = 0x00;
// Demuxed bytes used as a multiple of 4
header[0x00C] = (uint8_t)settings->state_vid.bytes_used;
header[0x00D] = (uint8_t)(settings->state_vid.bytes_used>>8);
header[0x00E] = (uint8_t)(settings->state_vid.bytes_used>>16);
header[0x00F] = (uint8_t)(settings->state_vid.bytes_used>>24);
memcpy(output + 2352*i + 0x018, header, sizeof(header));
memcpy(output + 2352*i + 0x018 + 0x020, settings->state_vid.unmuxed + 2016*settings->state_vid.frame_block_index, 2016);
settings->state_vid.frame_block_index++;
} }
// STR version
header[0x000] = 0x60;
header[0x001] = 0x01;
// Chunk type: MDEC data
header[0x002] = 0x01;
header[0x003] = 0x80;
// Muxed chunk index/count
int chunk_index = settings->state_vid.frame_data_offset/2016;
int chunk_count = settings->state_vid.frame_max_size/2016;
header[0x004] = (uint8_t)chunk_index;
header[0x005] = (uint8_t)(chunk_index>>8);
header[0x006] = (uint8_t)chunk_count;
header[0x007] = (uint8_t)(chunk_count>>8);
// Frame index
header[0x008] = (uint8_t)settings->state_vid.frame_index;
header[0x009] = (uint8_t)(settings->state_vid.frame_index>>8);
header[0x00A] = (uint8_t)(settings->state_vid.frame_index>>16);
header[0x00B] = (uint8_t)(settings->state_vid.frame_index>>24);
// Video frame size
header[0x010] = (uint8_t)settings->video_width;
header[0x011] = (uint8_t)(settings->video_width>>8);
header[0x012] = (uint8_t)settings->video_height;
header[0x013] = (uint8_t)(settings->video_height>>8);
// MDEC command (size of decompressed MDEC data)
header[0x014] = (uint8_t)settings->state_vid.blocks_used;
header[0x015] = (uint8_t)(settings->state_vid.blocks_used>>8);
header[0x016] = 0x00;
header[0x017] = 0x38;
// Quantization scale
header[0x018] = (uint8_t)settings->state_vid.quant_scale;
header[0x019] = (uint8_t)(settings->state_vid.quant_scale>>8);
// BS version
header[0x01A] = 0x02;
header[0x01B] = 0x00;
// Demuxed bytes used as a multiple of 4
header[0x00C] = (uint8_t)settings->state_vid.bytes_used;
header[0x00D] = (uint8_t)(settings->state_vid.bytes_used>>8);
header[0x00E] = (uint8_t)(settings->state_vid.bytes_used>>16);
header[0x00F] = (uint8_t)(settings->state_vid.bytes_used>>24);
if (settings->format == FORMAT_STR2CD) {
memcpy(output + 0x018, header, sizeof(header));
memcpy(output + 0x018 + 0x020, settings->state_vid.frame_output + settings->state_vid.frame_data_offset, 2016);
} else {
memcpy(output + 0x008, header, sizeof(header));
memcpy(output + 0x008 + 0x020, settings->state_vid.frame_output + settings->state_vid.frame_data_offset, 2016);
}
settings->state_vid.frame_data_offset += 2016;
} }

View File

@ -3,6 +3,7 @@ psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
Copyright (c) 2019, 2020 Adrian "asie" Siekierka Copyright (c) 2019, 2020 Adrian "asie" Siekierka
Copyright (c) 2019 Ben "GreaseMonkey" Russell Copyright (c) 2019 Ben "GreaseMonkey" Russell
Copyright (c) 2023 spicyjpeg
This software is provided 'as-is', without any express or implied This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages warranty. In no event will the authors be held liable for any damages
@ -23,87 +24,247 @@ freely, subject to the following restrictions:
#include "common.h" #include "common.h"
const char *format_names[NUM_FORMATS] = {
"xa", "xacd",
"spu", "spui",
"vag", "vagi",
"str2", "str2cd",
"sbs2"
};
void print_help(void) { void print_help(void) {
fprintf(stderr, "Usage: psxavenc [-f freq] [-b bitdepth] [-c channels] [-F num] [-C num] [-t xa|xacd|spu|str2] <in> <out>\n\n"); fprintf(stderr,
fprintf(stderr, " -f freq Use specified frequency\n"); "Usage:\n"
fprintf(stderr, " -t format Use specified output type:\n"); " psxavenc -t <xa|xacd> [-f 18900|37800] [-b 4|8] [-c 1|2] [-F 0-255] [-C 0-31] <in> <out.xa>\n"
fprintf(stderr, " xa [A.] .xa 2336-byte sectors\n"); " psxavenc -t <str2|str2cd> [-f 18900|37800] [-b 4|8] [-c 1|2] [-F 0-255] [-C 0-31] [-s WxH] [-I] [-r num/den] [-x 1|2] <in> <out.str>\n"
fprintf(stderr, " xacd [A.] .xa 2352-byte sectors\n"); " psxavenc -t sbs2 [-s WxH] [-I] [-r num/den] [-a size] <in> <out.str>\n"
fprintf(stderr, " spu [A.] raw SPU-ADPCM data\n"); " psxavenc -t <spu|vag> [-f freq] [-L] <in> <out.vag>\n"
fprintf(stderr, " str2 [AV] v2 .str video 2352-byte sectors\n"); " psxavenc -t <spui|vagi> [-f freq] [-c 1-24] [-L] [-i size] [-a size] <in> <out.vag>\n"
fprintf(stderr, " -b bitdepth Use specified bit depth (only 4 bits supported)\n"); "\nTool options:\n"
fprintf(stderr, " -c channels Use specified channel count (1 or 2)\n"); " -h Show this help message and exit\n"
fprintf(stderr, " -F num [.xa] Set the file number to num (0-255)\n"); " -q Suppress all non-error messages\n"
fprintf(stderr, " -C num [.xa] Set the channel number to num (0-31)\n"); "\nOutput options:\n"
" -t format Use specified output type:\n"
" xa [A.] .xa, 2336-byte sectors\n"
" xacd [A.] .xa, 2352-byte sectors\n"
" spu [A.] raw SPU-ADPCM mono data\n"
" spui [A.] raw SPU-ADPCM interleaved data\n"
" vag [A.] .vag SPU-ADPCM mono\n"
" vagi [A.] .vag SPU-ADPCM interleaved\n"
" str2 [AV] v2 .str video, 2336-byte sectors\n"
" str2cd [AV] v2 .str video, 2352-byte sectors\n"
" sbs2 [.V] v2 .sbs video, 2048-byte sectors\n"
" -F num Set the XA file number for xa/str2 (0-255)\n"
" -C num Set the XA channel number for xa/str2 (0-31)\n"
"\nAudio options:\n"
" -f freq Use specified sample rate (must be 18900 or 37800 for xa/str2)\n"
" -b bitdepth Use specified bit depth for xa/str2 (4 or 8)\n"
" -c channels Use specified channel count (1-2 for xa/str2, any for spui/vagi)\n"
" -L Add a loop marker at the end of SPU-ADPCM data\n"
" -R key=value,... Pass custom options to libswresample (see ffmpeg docs)\n"
"\nSPU interleaving options (spui/vagi format):\n"
" -i size Use specified interleave\n"
" -a size Pad header and each interleaved chunk to specified size\n"
"\nVideo options (str2/str2cd/sbs2 format):\n"
" -s WxH Rescale input file to fit within specified size (default 320x240)\n"
" -I Force stretching to given size without preserving aspect ratio\n"
" -S key=value,... Pass custom options to libswscale (see ffmpeg docs)\n"
" -r num/den Set frame rate to specified integer or fraction (default 15)\n"
" -x speed Set the CD-ROM speed the file is meant to played at (1-2)\n"
" -a size Set the size of each frame for sbs2\n"
);
} }
int parse_args(settings_t* settings, int argc, char** argv) { int parse_args(settings_t* settings, int argc, char** argv) {
int c; int c, i;
while ((c = getopt(argc, argv, "t:f:b:c:F:C:")) != -1) { char *next;
while ((c = getopt(argc, argv, "?hqt:F:C:f:b:c:LR:i:a:s:IS:r:x:")) != -1) {
switch (c) { switch (c) {
case '?':
case 'h': {
print_help();
return -1;
} break;
case 'q': {
settings->quiet = true;
settings->show_progress = false;
} break;
case 't': { case 't': {
if (strcmp(optarg, "xa") == 0) { settings->format = -1;
settings->format = FORMAT_XA; for (i = 0; i < NUM_FORMATS; i++) {
} else if (strcmp(optarg, "xacd") == 0) { if (!strcmp(optarg, format_names[i])) {
settings->format = FORMAT_XACD; settings->format = i;
} else if (strcmp(optarg, "spu") == 0) { break;
settings->format = FORMAT_SPU; }
} else if (strcmp(optarg, "str2") == 0) { }
settings->format = FORMAT_STR2; if (settings->format < 0) {
} else {
fprintf(stderr, "Invalid format: %s\n", optarg); fprintf(stderr, "Invalid format: %s\n", optarg);
return -1; return -1;
} }
} break; } break;
case 'f': {
settings->frequency = atoi(optarg);
} break;
case 'b': {
settings->bits_per_sample = atoi(optarg);
if (settings->bits_per_sample != 4) {
fprintf(stderr, "Invalid bit depth: %d\n", settings->frequency);
return -1;
}
} break;
case 'c': {
int ch = atoi(optarg);
if (ch <= 0 || ch > 2) {
fprintf(stderr, "Invalid channel count: %d\n", ch);
return -1;
}
settings->stereo = (ch == 2 ? 1 : 0);
} break;
case 'F': { case 'F': {
settings->file_number = atoi(optarg); settings->file_number = strtol(optarg, NULL, 0);
if (settings->file_number < 0 || settings->file_number > 255) { if (settings->file_number < 0 || settings->file_number > 255) {
fprintf(stderr, "Invalid file number: %d\n", settings->file_number); fprintf(stderr, "Invalid file number: %d\n", settings->file_number);
return -1; return -1;
} }
} break; } break;
case 'C': { case 'C': {
settings->channel_number = atoi(optarg); settings->channel_number = strtol(optarg, NULL, 0);
if (settings->channel_number < 0 || settings->channel_number > 31) { if (settings->channel_number < 0 || settings->channel_number > 31) {
fprintf(stderr, "Invalid channel number: %d\n", settings->channel_number); fprintf(stderr, "Invalid channel number: %d\n", settings->channel_number);
return -1; return -1;
} }
} break; } break;
case '?': case 'f': {
case 'h': { settings->frequency = strtol(optarg, NULL, 0);
print_help(); } break;
return -1; case 'b': {
settings->bits_per_sample = strtol(optarg, NULL, 0);
if (settings->bits_per_sample != 4 && settings->bits_per_sample != 8) {
fprintf(stderr, "Invalid bit depth: %d\n", settings->frequency);
return -1;
}
} break;
case 'c': {
settings->channels = strtol(optarg, NULL, 0);
if (settings->channels < 1 || settings->channels > 24) {
fprintf(stderr, "Invalid channel count: %d\n", settings->channels);
return -1;
}
} break;
case 'L': {
settings->loop = true;
} break;
case 'R': {
settings->swresample_options = optarg;
} break;
case 'i': {
settings->interleave = (strtol(optarg, NULL, 0) + 15) & ~15;
if (settings->interleave < 16) {
fprintf(stderr, "Invalid interleave: %d\n", settings->interleave);
return -1;
}
} break;
case 'a': {
settings->alignment = strtol(optarg, NULL, 0);
if (settings->alignment < 1) {
fprintf(stderr, "Invalid alignment: %d\n", settings->alignment);
return -1;
}
} break;
case 's': {
settings->video_width = (strtol(optarg, &next, 0) + 15) & ~15;
if (*next != 'x') {
fprintf(stderr, "Invalid video size (must be specified as <width>x<height>)\n");
return -1;
}
settings->video_height = (strtol(next + 1, NULL, 0) + 15) & ~15;
if (settings->video_width < 16 || settings->video_width > 320) {
fprintf(stderr, "Invalid video width: %d\n", settings->video_width);
return -1;
}
if (settings->video_height < 16 || settings->video_height > 240) {
fprintf(stderr, "Invalid video height: %d\n", settings->video_height);
return -1;
}
} break;
case 'I': {
settings->ignore_aspect_ratio = true;
} break;
case 'S': {
settings->swscale_options = optarg;
} break;
case 'r': {
settings->video_fps_num = strtol(optarg, &next, 0);
if (*next == '/') {
settings->video_fps_den = strtol(next + 1, NULL, 0);
} else {
settings->video_fps_den = 1;
}
if (!settings->video_fps_den) {
fprintf(stderr, "Invalid frame rate denominator\n");
return -1;
}
i = settings->video_fps_num / settings->video_fps_den;
if (i < 1 || i > 30) {
fprintf(stderr, "Invalid frame rate: %d/%d\n", settings->video_fps_num, settings->video_fps_den);
return -1;
}
} break;
case 'x': {
settings->cd_speed = strtol(optarg, NULL, 0);
if (settings->cd_speed < 1 || settings->cd_speed > 2) {
fprintf(stderr, "Invalid CD-ROM speed: %d\n", settings->cd_speed);
return -1;
}
} break; } break;
} }
} }
if (settings->format == FORMAT_XA || settings->format == FORMAT_XACD) { // Validate settings
if (settings->frequency != PSX_AUDIO_XA_FREQ_SINGLE && settings->frequency != PSX_AUDIO_XA_FREQ_DOUBLE) { switch (settings->format) {
fprintf(stderr, "Invalid frequency: %d Hz\n", settings->frequency); case FORMAT_XA:
case FORMAT_XACD:
case FORMAT_STR2:
case FORMAT_STR2CD:
if (settings->frequency != PSX_AUDIO_XA_FREQ_SINGLE && settings->frequency != PSX_AUDIO_XA_FREQ_DOUBLE) {
fprintf(
stderr, "Invalid XA-ADPCM frequency: %d Hz (must be %d or %d Hz)\n", settings->frequency,
PSX_AUDIO_XA_FREQ_SINGLE, PSX_AUDIO_XA_FREQ_DOUBLE
);
return -1;
}
if (settings->channels > 2) {
fprintf(stderr, "Invalid XA-ADPCM channel count: %d (must be 1 or 2)\n", settings->channels);
return -1;
}
if (settings->loop) {
fprintf(stderr, "XA-ADPCM does not support loop markers\n");
return -1;
}
break;
case FORMAT_SPU:
case FORMAT_VAG:
if (settings->bits_per_sample != 4) {
fprintf(stderr, "Invalid SPU-ADPCM bit depth: %d (must be 4)\n", settings->bits_per_sample);
return -1;
}
if (settings->channels != 1) {
fprintf(stderr, "Invalid SPU-ADPCM channel count: %d (must be 1)\n", settings->channels);
return -1;
}
if (settings->interleave) {
fprintf(stderr, "Interleave cannot be specified for mono SPU-ADPCM\n");
return -1;
}
break;
case FORMAT_SPUI:
case FORMAT_VAGI:
if (settings->bits_per_sample != 4) {
fprintf(stderr, "Invalid SPU-ADPCM bit depth: %d (must be 4)\n", settings->bits_per_sample);
return -1;
}
if (!settings->interleave) {
fprintf(stderr, "Interleave must be specified for interleaved SPU-ADPCM\n");
return -1;
}
break;
case FORMAT_SBS2:
if (!settings->alignment) {
fprintf(stderr, "Alignment (frame size) must be specified\n");
return -1;
}
if (settings->alignment < 256) {
fprintf(stderr, "Invalid frame size: %d (must be at least 256)\n", settings->alignment);
return -1;
}
break;
default:
fprintf(stderr, "Output format must be specified\n");
return -1; return -1;
}
}
if (settings->format == FORMAT_SPU) {
settings->stereo = false;
} }
return optind; return optind;
@ -116,28 +277,46 @@ int main(int argc, char **argv) {
memset(&settings,0,sizeof(settings_t)); memset(&settings,0,sizeof(settings_t));
settings.quiet = false;
settings.show_progress = isatty(fileno(stderr));
settings.format = -1;
settings.file_number = 0; settings.file_number = 0;
settings.channel_number = 0; settings.channel_number = 0;
settings.stereo = true; settings.cd_speed = 2;
settings.channels = 1;
settings.frequency = PSX_AUDIO_XA_FREQ_DOUBLE; settings.frequency = PSX_AUDIO_XA_FREQ_DOUBLE;
settings.bits_per_sample = 4; settings.bits_per_sample = 4;
settings.interleave = 0;
settings.alignment = 2048;
settings.loop = false;
// NOTE: ffmpeg/ffplay's .str demuxer has the frame rate hardcoded to 15fps
// so if you're messing around with this make sure you test generated files
// with another player and/or in an emulator.
settings.video_width = 320; settings.video_width = 320;
settings.video_height = 240; settings.video_height = 240;
settings.video_fps_num = 15;
settings.video_fps_den = 1;
settings.ignore_aspect_ratio = false;
settings.swresample_options = NULL;
settings.swscale_options = NULL;
settings.audio_samples = NULL; settings.audio_samples = NULL;
settings.audio_sample_count = 0; settings.audio_sample_count = 0;
settings.video_frames = NULL; settings.video_frames = NULL;
settings.video_frame_count = 0; settings.video_frame_count = 0;
// TODO: make this adjustable
// also for some reason ffmpeg seems to hard-code the framerate to 15fps
settings.video_fps_num = 15;
settings.video_fps_den = 1;
for(int i = 0; i < 6; i++) { for(int i = 0; i < 6; i++) {
settings.state_vid.dct_block_lists[i] = NULL; settings.state_vid.dct_block_lists[i] = NULL;
} }
if (argc < 2) {
print_help();
return 1;
}
arg_offset = parse_args(&settings, argc, argv); arg_offset = parse_args(&settings, argc, argv);
if (arg_offset < 0) { if (arg_offset < 0) {
return 1; return 1;
@ -146,13 +325,12 @@ int main(int argc, char **argv) {
return 1; return 1;
} }
fprintf(stderr, "Using settings: %d Hz @ %d bit depth, %s. F%d C%d\n", bool has_audio = (settings.format != FORMAT_SBS2);
settings.frequency, settings.bits_per_sample, bool has_video = (settings.format == FORMAT_STR2) ||
settings.stereo ? "stereo" : "mono", (settings.format == FORMAT_STR2CD) || (settings.format == FORMAT_SBS2);
settings.file_number, settings.channel_number
);
bool did_open_data = open_av_data(argv[arg_offset + 0], &settings); bool did_open_data = open_av_data(argv[arg_offset + 0], &settings,
has_audio, has_video, !has_video, has_video);
if (!did_open_data) { if (!did_open_data) {
fprintf(stderr, "Could not open input file!\n"); fprintf(stderr, "Could not open input file!\n");
return 1; return 1;
@ -164,23 +342,75 @@ int main(int argc, char **argv) {
return 1; return 1;
} }
int av_sample_mul = settings.stereo ? 2 : 1; settings.start_time = time(NULL);
settings.last_progress_update = 0;
switch (settings.format) { switch (settings.format) {
case FORMAT_XA: case FORMAT_XA:
case FORMAT_XACD: case FORMAT_XACD:
pull_all_av_data(&settings); if (!settings.quiet) {
encode_file_xa(settings.audio_samples, settings.audio_sample_count / av_sample_mul, &settings, output); fprintf(stderr, "Audio format: XA-ADPCM, %d Hz %d-bit %s, F=%d C=%d\n",
settings.frequency, settings.bits_per_sample,
(settings.channels == 2) ? "stereo" : "mono",
settings.file_number, settings.channel_number
);
}
encode_file_xa(&settings, output);
break; break;
case FORMAT_SPU: case FORMAT_SPU:
pull_all_av_data(&settings); case FORMAT_VAG:
encode_file_spu(settings.audio_samples, settings.audio_sample_count / av_sample_mul, &settings, output); if (!settings.quiet) {
fprintf(stderr, "Audio format: SPU-ADPCM, %d Hz mono\n",
settings.frequency
);
}
encode_file_spu(&settings, output);
break;
case FORMAT_SPUI:
case FORMAT_VAGI:
if (!settings.quiet) {
fprintf(stderr, "Audio format: SPU-ADPCM, %d Hz %d channels, interleave=%d\n",
settings.frequency, settings.channels, settings.interleave
);
}
encode_file_spu_interleaved(&settings, output);
break; break;
case FORMAT_STR2: case FORMAT_STR2:
case FORMAT_STR2CD:
if (!settings.quiet) {
if (settings.decoder_state_av.audio_stream) {
fprintf(stderr, "Audio format: XA-ADPCM, %d Hz %d-bit %s, F=%d C=%d\n",
settings.frequency, settings.bits_per_sample,
(settings.channels == 2) ? "stereo" : "mono",
settings.file_number, settings.channel_number
);
}
fprintf(stderr, "Video format: BS v2, %dx%d, %.2f fps\n",
settings.video_width, settings.video_height,
(double)settings.video_fps_num / (double)settings.video_fps_den
);
}
encode_file_str(&settings, output); encode_file_str(&settings, output);
break; break;
case FORMAT_SBS2:
if (!settings.quiet) {
fprintf(stderr, "Video format: BS v2, %dx%d, %.2f fps\n",
settings.video_width, settings.video_height,
(double)settings.video_fps_num / (double)settings.video_fps_den
);
}
encode_file_sbs(&settings, output);
break;
} }
if (settings.show_progress) {
fprintf(stderr, "\nDone.\n");
}
fclose(output); fclose(output);
close_av_data(&settings); close_av_data(&settings);
return 0; return 0;