Merge pull request #1 from spicyjpeg/old-pr

.STR conversion bugfix, support for generating mono and interleaved .VAG files
This commit is contained in:
Adrian Siekierka 2023-05-15 18:52:34 +02:00 committed by GitHub
commit 2482bc14db
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 1308 additions and 704 deletions

View File

@ -1,6 +1,22 @@
# psxavenc
psxavenc is an open-source command-line tool allowing for the encoding of PS1-format audio and video data.
psxavenc is an open-source command-line tool for encoding audio and video data
into formats commonly used on the original PlayStation.
## Installation
Requirements:
- a recent version of FFmpeg libraries (`libavformat`, `libavcodec`,
`libavutil`, `libswresample`, `libswscale`);
- a recent version of Meson.
```shell
$ meson setup build
$ cd build
$ ninja install
```
## Usage
@ -8,21 +24,49 @@ Run `psxavenc`.
### Examples
Converting a sound file to a 22050Hz SPU sample:
Rescale a video file to ≤320x240 pixels (preserving aspect ratio) and encode it
into a 15fps .STR file with 37800 Hz 4-bit stereo audio and 2352-byte sectors,
meant to be played at 2x CD-ROM speed:
```shell
$ psxavenc -f 22050 -t spu -c 1 -b 4 sound_file.ogg sound_file.snd
$ psxavenc -t str2cd -f 37800 -b 4 -c 2 -s 320x240 -r 15 -x 2 in.mp4 out.str
```
## Installation
Requirements:
* a recent version of FFmpeg,
* a recent version of Meson.
Convert a mono audio sample to 22050 Hz raw SPU-ADPCM data:
```shell
$ meson setup build
$ cd build
$ ninja install
$ psxavenc -t spu -f 22050 in.ogg out.snd
```
Convert a stereo audio file to a 44100 Hz interleaved .VAG file with 8192-byte
interleave and loop flags set at the end of each interleaved chunk:
```shell
$ psxavenc -t vagi -f 44100 -c 2 -L -i 8192 in.wav out.vag
```
## Supported formats
| Format | Audio | Channels | Video | Sector size |
| :------- | :--------------- | :------- | :---- | :---------- |
| `xa` | XA-ADPCM | 1 or 2 | None | 2336 bytes |
| `xacd` | XA-ADPCM | 1 or 2 | None | 2352 bytes |
| `spu` | SPU-ADPCM | 1 | None | |
| `spui` | SPU-ADPCM | Any | None | Any |
| `vag` | SPU-ADPCM | 1 | None | |
| `vagi` | SPU-ADPCM | Any | None | Any |
| `str2` | None or XA-ADPCM | 1 or 2 | BS v2 | 2336 bytes |
| `str2cd` | None or XA-ADPCM | 1 or 2 | BS v2 | 2352 bytes |
| `sbs2` | None | | BS v2 | Any |
Notes:
- `vag` and `vagi` are similar to `spu` and `spui` respectively, but add a .VAG
header at the beginning of the file. The header is always 48 bytes long for
`vag` files, while in the case of `vagi` files it is padded to the size
specified using the `-a` option (2048 bytes by default). Note that `vagi`
files with more than 2 channels and/or alignment other than 2048 bytes are not
standardized.
- The `sbs2` format (used in some System 573 games) is simply a series of
concatenated BS v2 frames, each padded to the size specified by the `-a`
option, with no additional headers besides the BS frame headers.

View File

@ -3,6 +3,7 @@ libpsxav: MDEC video + SPU/XA-ADPCM audio library
Copyright (c) 2019, 2020 Adrian "asie" Siekierka
Copyright (c) 2019 Ben "GreaseMonkey" Russell
Copyright (c) 2023 spicyjpeg
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
@ -25,6 +26,9 @@ freely, subject to the following restrictions:
#include <string.h>
#include "libpsxav.h"
#define SHIFT_RANGE_4BPS 12
#define SHIFT_RANGE_8BPS 8
#define ADPCM_FILTER_COUNT 5
#define XA_ADPCM_FILTER_COUNT 4
#define SPU_ADPCM_FILTER_COUNT 5
@ -32,7 +36,7 @@ freely, subject to the following restrictions:
static const int16_t filter_k1[ADPCM_FILTER_COUNT] = {0, 60, 115, 98, 122};
static const int16_t filter_k2[ADPCM_FILTER_COUNT] = {0, 0, -52, -55, -60};
static int find_min_shift(const psx_audio_encoder_channel_state_t *state, int16_t *samples, int pitch, int filter) {
static int find_min_shift(const psx_audio_encoder_channel_state_t *state, int16_t *samples, int sample_limit, int pitch, int filter, int shift_range) {
// Assumption made:
//
// There is value in shifting right one step further to allow the nibbles to clip.
@ -51,7 +55,7 @@ static int find_min_shift(const psx_audio_encoder_channel_state_t *state, int16_
int32_t s_min = 0;
int32_t s_max = 0;
for (int i = 0; i < 28; i++) {
int32_t raw_sample = samples[i * pitch];
int32_t raw_sample = (i >= sample_limit) ? 0 : samples[i * pitch];
int32_t previous_values = (k1*prev1 + k2*prev2 + (1<<5))>>6;
int32_t sample = raw_sample - previous_values;
if (sample < s_min) { s_min = sample; }
@ -59,16 +63,18 @@ static int find_min_shift(const psx_audio_encoder_channel_state_t *state, int16_
prev2 = prev1;
prev1 = raw_sample;
}
while(right_shift < 12 && (s_max>>right_shift) > +0x7) { right_shift += 1; };
while(right_shift < 12 && (s_min>>right_shift) < -0x8) { right_shift += 1; };
while(right_shift < shift_range && (s_max>>right_shift) > (+0x7FFF >> shift_range)) { right_shift += 1; };
while(right_shift < shift_range && (s_min>>right_shift) < (-0x8000 >> shift_range)) { right_shift += 1; };
int min_shift = 12 - right_shift;
assert(0 <= min_shift && min_shift <= 12);
int min_shift = shift_range - right_shift;
assert(0 <= min_shift && min_shift <= shift_range);
return min_shift;
}
static uint8_t attempt_to_encode_nibbles(psx_audio_encoder_channel_state_t *outstate, const psx_audio_encoder_channel_state_t *instate, int16_t *samples, int sample_limit, int pitch, uint8_t *data, int data_shift, int data_pitch, int filter, int sample_shift) {
uint8_t nondata_mask = ~(0x0F << data_shift);
static uint8_t attempt_to_encode(psx_audio_encoder_channel_state_t *outstate, const psx_audio_encoder_channel_state_t *instate, int16_t *samples, int sample_limit, int pitch, uint8_t *data, int data_shift, int data_pitch, int filter, int sample_shift, int shift_range) {
uint8_t sample_mask = 0xFFFF >> shift_range;
uint8_t nondata_mask = ~(sample_mask << data_shift);
int min_shift = sample_shift;
int k1 = filter_k1[filter];
int k2 = filter_k2[filter];
@ -82,17 +88,17 @@ static uint8_t attempt_to_encode_nibbles(psx_audio_encoder_channel_state_t *outs
outstate->mse = 0;
for (int i = 0; i < 28; i++) {
int32_t sample = ((i * pitch) >= sample_limit ? 0 : samples[i * pitch]) + outstate->qerr;
int32_t sample = ((i >= sample_limit) ? 0 : samples[i * pitch]) + outstate->qerr;
int32_t previous_values = (k1*outstate->prev1 + k2*outstate->prev2 + (1<<5))>>6;
int32_t sample_enc = sample - previous_values;
sample_enc <<= min_shift;
sample_enc += (1<<(12-1));
sample_enc >>= 12;
if(sample_enc < -8) { sample_enc = -8; }
if(sample_enc > +7) { sample_enc = +7; }
sample_enc &= 0xF;
sample_enc += (1<<(shift_range-1));
sample_enc >>= shift_range;
if(sample_enc < (-0x8000 >> shift_range)) { sample_enc = -0x8000 >> shift_range; }
if(sample_enc > (+0x7FFF >> shift_range)) { sample_enc = +0x7FFF >> shift_range; }
sample_enc &= sample_mask;
int32_t sample_dec = (int16_t) ((sample_enc&0xF) << 12);
int32_t sample_dec = (int16_t) ((sample_enc & sample_mask) << shift_range);
sample_dec >>= min_shift;
sample_dec += previous_values;
if (sample_dec > +0x7FFF) { sample_dec = +0x7FFF; }
@ -114,14 +120,14 @@ static uint8_t attempt_to_encode_nibbles(psx_audio_encoder_channel_state_t *outs
return hdr;
}
static uint8_t encode_nibbles(psx_audio_encoder_channel_state_t *state, int16_t *samples, int sample_limit, int pitch, uint8_t *data, int data_shift, int data_pitch, int filter_count) {
static uint8_t encode(psx_audio_encoder_channel_state_t *state, int16_t *samples, int sample_limit, int pitch, uint8_t *data, int data_shift, int data_pitch, int filter_count, int shift_range) {
psx_audio_encoder_channel_state_t proposed;
int64_t best_mse = ((int64_t)1<<(int64_t)50);
int best_filter = 0;
int best_sample_shift = 0;
for (int filter = 0; filter < filter_count; filter++) {
int true_min_shift = find_min_shift(state, samples, pitch, filter);
int true_min_shift = find_min_shift(state, samples, sample_limit, pitch, filter, shift_range);
// Testing has shown that the optimal shift can be off the true minimum shift
// by 1 in *either* direction.
@ -129,15 +135,15 @@ static uint8_t encode_nibbles(psx_audio_encoder_channel_state_t *state, int16_t
int min_shift = true_min_shift - 1;
int max_shift = true_min_shift + 1;
if (min_shift < 0) { min_shift = 0; }
if (max_shift > 12) { max_shift = 12; }
if (max_shift > shift_range) { max_shift = shift_range; }
for (int sample_shift = min_shift; sample_shift <= max_shift; sample_shift++) {
// ignore header here
attempt_to_encode_nibbles(
attempt_to_encode(
&proposed, state,
samples, sample_limit, pitch,
data, data_shift, data_pitch,
filter, sample_shift);
filter, sample_shift, shift_range);
if (best_mse > proposed.mse) {
best_mse = proposed.mse;
@ -148,46 +154,46 @@ static uint8_t encode_nibbles(psx_audio_encoder_channel_state_t *state, int16_t
}
// now go with the encoder
return attempt_to_encode_nibbles(
return attempt_to_encode(
state, state,
samples, sample_limit, pitch,
data, data_shift, data_pitch,
best_filter, best_sample_shift);
best_filter, best_sample_shift, shift_range);
}
static void encode_block_xa(int16_t *audio_samples, int audio_samples_limit, uint8_t *data, psx_audio_xa_settings_t settings, psx_audio_encoder_state_t *state) {
if (settings.bits_per_sample == 4) {
if (settings.stereo) {
data[0] = encode_nibbles(&(state->left), audio_samples, audio_samples_limit, 2, data + 0x10, 0, 4, XA_ADPCM_FILTER_COUNT);
data[1] = encode_nibbles(&(state->right), audio_samples + 1, audio_samples_limit - 1, 2, data + 0x10, 4, 4, XA_ADPCM_FILTER_COUNT);
data[2] = encode_nibbles(&(state->left), audio_samples + 56, audio_samples_limit - 56, 2, data + 0x11, 0, 4, XA_ADPCM_FILTER_COUNT);
data[3] = encode_nibbles(&(state->right), audio_samples + 56 + 1, audio_samples_limit - 56 - 1, 2, data + 0x11, 4, 4, XA_ADPCM_FILTER_COUNT);
data[8] = encode_nibbles(&(state->left), audio_samples + 56*2, audio_samples_limit - 56*2, 2, data + 0x12, 0, 4, XA_ADPCM_FILTER_COUNT);
data[9] = encode_nibbles(&(state->right), audio_samples + 56*2 + 1, audio_samples_limit - 56*2 - 1, 2, data + 0x12, 4, 4, XA_ADPCM_FILTER_COUNT);
data[10] = encode_nibbles(&(state->left), audio_samples + 56*3, audio_samples_limit - 56*3, 2, data + 0x13, 0, 4, XA_ADPCM_FILTER_COUNT);
data[11] = encode_nibbles(&(state->right), audio_samples + 56*3 + 1, audio_samples_limit - 56*3 - 1, 2, data + 0x13, 4, 4, XA_ADPCM_FILTER_COUNT);
data[0] = encode(&(state->left), audio_samples, audio_samples_limit, 2, data + 0x10, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[1] = encode(&(state->right), audio_samples + 1, audio_samples_limit, 2, data + 0x10, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[2] = encode(&(state->left), audio_samples + 56, audio_samples_limit - 28, 2, data + 0x11, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[3] = encode(&(state->right), audio_samples + 56 + 1, audio_samples_limit - 28, 2, data + 0x11, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[8] = encode(&(state->left), audio_samples + 56*2, audio_samples_limit - 28*2, 2, data + 0x12, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[9] = encode(&(state->right), audio_samples + 56*2 + 1, audio_samples_limit - 28*2, 2, data + 0x12, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[10] = encode(&(state->left), audio_samples + 56*3, audio_samples_limit - 28*3, 2, data + 0x13, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[11] = encode(&(state->right), audio_samples + 56*3 + 1, audio_samples_limit - 28*3, 2, data + 0x13, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
} else {
data[0] = encode_nibbles(&(state->left), audio_samples, audio_samples_limit, 1, data + 0x10, 0, 4, XA_ADPCM_FILTER_COUNT);
data[1] = encode_nibbles(&(state->right), audio_samples + 28, audio_samples_limit - 28, 1, data + 0x10, 4, 4, XA_ADPCM_FILTER_COUNT);
data[2] = encode_nibbles(&(state->left), audio_samples + 28*2, audio_samples_limit - 28*2, 1, data + 0x11, 0, 4, XA_ADPCM_FILTER_COUNT);
data[3] = encode_nibbles(&(state->right), audio_samples + 28*3, audio_samples_limit - 28*3, 1, data + 0x11, 4, 4, XA_ADPCM_FILTER_COUNT);
data[8] = encode_nibbles(&(state->left), audio_samples + 28*4, audio_samples_limit - 28*4, 1, data + 0x12, 0, 4, XA_ADPCM_FILTER_COUNT);
data[9] = encode_nibbles(&(state->right), audio_samples + 28*5, audio_samples_limit - 28*5, 1, data + 0x12, 4, 4, XA_ADPCM_FILTER_COUNT);
data[10] = encode_nibbles(&(state->left), audio_samples + 28*6, audio_samples_limit - 28*6, 1, data + 0x13, 0, 4, XA_ADPCM_FILTER_COUNT);
data[11] = encode_nibbles(&(state->right), audio_samples + 28*7, audio_samples_limit - 28*7, 1, data + 0x13, 4, 4, XA_ADPCM_FILTER_COUNT);
data[0] = encode(&(state->left), audio_samples, audio_samples_limit, 1, data + 0x10, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[1] = encode(&(state->left), audio_samples + 28, audio_samples_limit - 28, 1, data + 0x10, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[2] = encode(&(state->left), audio_samples + 28*2, audio_samples_limit - 28*2, 1, data + 0x11, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[3] = encode(&(state->left), audio_samples + 28*3, audio_samples_limit - 28*3, 1, data + 0x11, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[8] = encode(&(state->left), audio_samples + 28*4, audio_samples_limit - 28*4, 1, data + 0x12, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[9] = encode(&(state->left), audio_samples + 28*5, audio_samples_limit - 28*5, 1, data + 0x12, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[10] = encode(&(state->left), audio_samples + 28*6, audio_samples_limit - 28*6, 1, data + 0x13, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
data[11] = encode(&(state->left), audio_samples + 28*7, audio_samples_limit - 28*7, 1, data + 0x13, 4, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
}
} else {
/* if (settings->stereo) {
data[0] = encode_bytes(audio_samples, 2, data + 0x10);
data[1] = encode_bytes(audio_samples + 1, 2, data + 0x11);
data[2] = encode_bytes(audio_samples + 56, 2, data + 0x12);
data[3] = encode_bytes(audio_samples + 57, 2, data + 0x13);
if (settings.stereo) {
data[0] = encode(&(state->left), audio_samples, audio_samples_limit, 2, data + 0x10, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS);
data[1] = encode(&(state->right), audio_samples + 1, audio_samples_limit, 2, data + 0x11, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS);
data[2] = encode(&(state->left), audio_samples + 56, audio_samples_limit - 28, 2, data + 0x12, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS);
data[3] = encode(&(state->right), audio_samples + 56 + 1, audio_samples_limit - 28, 2, data + 0x13, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS);
} else {
data[0] = encode_bytes(audio_samples, 1, data + 0x10);
data[1] = encode_bytes(audio_samples + 28, 1, data + 0x11);
data[2] = encode_bytes(audio_samples + 56, 1, data + 0x12);
data[3] = encode_bytes(audio_samples + 84, 1, data + 0x13);
} */
data[0] = encode(&(state->left), audio_samples, audio_samples_limit, 1, data + 0x10, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS);
data[1] = encode(&(state->left), audio_samples + 28, audio_samples_limit - 28, 1, data + 0x11, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS);
data[2] = encode(&(state->left), audio_samples + 28*2, audio_samples_limit - 28*2, 1, data + 0x12, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS);
data[3] = encode(&(state->left), audio_samples + 28*3, audio_samples_limit - 28*3, 1, data + 0x13, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS);
}
}
}
@ -218,6 +224,14 @@ uint32_t psx_audio_spu_get_samples_per_block(void) {
return 28;
}
uint32_t psx_audio_xa_get_sector_interleave(psx_audio_xa_settings_t settings) {
// 1/2 interleave for 37800 Hz 8-bit stereo at 1x speed
int interleave = settings.stereo ? 2 : 4;
if (settings.frequency == PSX_AUDIO_XA_FREQ_SINGLE) { interleave <<= 1; }
if (settings.bits_per_sample == 4) { interleave <<= 1; }
return interleave;
}
static void psx_audio_xa_encode_init_sector(uint8_t *buffer, psx_audio_xa_settings_t settings) {
if (settings.format == PSX_AUDIO_XA_FORMAT_XACD) {
memset(buffer, 0, 2352);
@ -269,7 +283,7 @@ int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_stat
return (((j + 17) / 18) * xa_sector_size);
}
int psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length) {
void psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length) {
if (output_length >= 2336) {
output[output_length - 2352 + 0x12] |= 0x80;
output[output_length - 2352 + 0x18] |= 0x80;
@ -284,13 +298,12 @@ int psx_audio_xa_encode_simple(psx_audio_xa_settings_t settings, int16_t* sample
return length;
}
int psx_audio_spu_encode(psx_audio_encoder_state_t *state, int16_t* samples, int sample_count, uint8_t *output) {
int psx_audio_spu_encode(psx_audio_encoder_channel_state_t *state, int16_t* samples, int sample_count, int pitch, uint8_t *output) {
uint8_t prebuf[28];
uint8_t *buffer = output;
uint8_t *data;
for (int i = 0; i < sample_count; i += 28, buffer += 16) {
buffer[0] = encode_nibbles(&(state->left), samples + i, sample_count - i, 1, prebuf, 0, 1, SPU_ADPCM_FILTER_COUNT);
buffer[0] = encode(state, samples + i * pitch, sample_count - i, pitch, prebuf, 0, 1, SPU_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS);
buffer[1] = 0;
for (int j = 0; j < 28; j+=2) {
@ -302,20 +315,22 @@ int psx_audio_spu_encode(psx_audio_encoder_state_t *state, int16_t* samples, int
}
int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *output, int loop_start) {
psx_audio_encoder_state_t state;
memset(&state, 0, sizeof(psx_audio_encoder_state_t));
int length = psx_audio_spu_encode(&state, samples, sample_count, output);
psx_audio_encoder_channel_state_t state;
memset(&state, 0, sizeof(psx_audio_encoder_channel_state_t));
int length = psx_audio_spu_encode(&state, samples, sample_count, 1, output);
if (length >= 32) {
if (loop_start < 0) {
output[1] = 4;
output[length - 16 + 1] = 1;
//output[1] = PSX_AUDIO_SPU_LOOP_START;
output[length - 16 + 1] = PSX_AUDIO_SPU_LOOP_END;
} else {
psx_audio_spu_set_flag_at_sample(output, loop_start, 4);
output[length - 16 + 1] = 3;
psx_audio_spu_set_flag_at_sample(output, loop_start, PSX_AUDIO_SPU_LOOP_START);
output[length - 16 + 1] = PSX_AUDIO_SPU_LOOP_REPEAT;
}
} else if (length >= 16) {
output[1] = loop_start >= 0 ? 7 : 5;
output[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END;
if (loop_start >= 0)
output[1] |= PSX_AUDIO_SPU_LOOP_REPEAT;
}
return length;

View File

@ -67,11 +67,12 @@ uint32_t psx_audio_xa_get_buffer_size_per_sector(psx_audio_xa_settings_t setting
uint32_t psx_audio_spu_get_buffer_size_per_block(void);
uint32_t psx_audio_xa_get_samples_per_sector(psx_audio_xa_settings_t settings);
uint32_t psx_audio_spu_get_samples_per_block(void);
uint32_t psx_audio_xa_get_sector_interleave(psx_audio_xa_settings_t settings);
int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_state_t *state, int16_t* samples, int sample_count, uint8_t *output);
int psx_audio_xa_encode_simple(psx_audio_xa_settings_t settings, int16_t* samples, int sample_count, uint8_t *output);
int psx_audio_spu_encode(psx_audio_encoder_state_t *state, int16_t* samples, int sample_count, uint8_t *output);
int psx_audio_spu_encode(psx_audio_encoder_channel_state_t *state, int16_t* samples, int sample_count, int pitch, uint8_t *output);
int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *output, int loop_start);
int psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length);
void psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length);
void psx_audio_spu_set_flag_at_sample(uint8_t* spu_data, int sample_pos, int flag);
// cdrom.c

View File

@ -1,5 +1,7 @@
project('psxavenc', 'c', default_options: ['c_std=c11'])
libm_dep = meson.get_compiler('c').find_library('m')
ffmpeg = [
dependency('libavformat'),
dependency('libavcodec'),
@ -21,4 +23,4 @@ executable('psxavenc', [
'psxavenc/filefmt.c',
'psxavenc/mdec.c',
'psxavenc/psxavenc.c'
], dependencies: [ffmpeg, libpsxav_dep], install: true)
], dependencies: [libm_dep, ffmpeg, libpsxav_dep], install: true)

View File

@ -24,15 +24,22 @@ freely, subject to the following restrictions:
#include "common.h"
void init_sector_buffer_video(uint8_t *buffer, settings_t *settings) {
memset(buffer,0,2352);
memset(buffer+0x001,0xFF,10);
int offset;
if (settings->format == FORMAT_STR2CD) {
memset(buffer, 0, 2352);
memset(buffer+0x001, 0xFF, 10);
buffer[0x00F] = 0x02;
offset = 0x10;
} else {
memset(buffer, 0, 2336);
offset = 0;
}
buffer[0x00F] = 0x02;
buffer[0x010] = settings->file_number;
buffer[0x011] = settings->channel_number & 0x1F;
buffer[0x012] = 0x08 | 0x40;
buffer[0x013] = 0x00;
memcpy(buffer + 0x014, buffer + 0x010, 4);
buffer[offset+0] = settings->file_number;
buffer[offset+1] = settings->channel_number & 0x1F;
buffer[offset+2] = 0x08 | 0x40;
buffer[offset+3] = 0x00;
memcpy(buffer + offset + 4, buffer + offset, 4);
}
void calculate_edc_data(uint8_t *buffer)

View File

@ -28,6 +28,9 @@ freely, subject to the following restrictions:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>
#include <unistd.h>
#include <libavutil/opt.h>
#include <libavcodec/avcodec.h>
@ -36,27 +39,33 @@ freely, subject to the following restrictions:
#include <libswresample/swresample.h>
#include <libpsxav.h>
#define NUM_FORMATS 9
#define FORMAT_XA 0
#define FORMAT_XACD 1
#define FORMAT_SPU 2
#define FORMAT_STR2 3
#define FORMAT_SPUI 3
#define FORMAT_VAG 4
#define FORMAT_VAGI 5
#define FORMAT_STR2 6
#define FORMAT_STR2CD 7
#define FORMAT_SBS2 8
#define MAX_UNMUXED_BLOCKS 9
typedef struct {
int frame_index;
int frame_block_index;
int frame_block_count;
int frame_data_offset;
int frame_max_size;
int frame_block_base_overflow;
int frame_block_overflow_num;
int frame_block_overflow_den;
uint16_t bits_value;
int bits_left;
uint8_t unmuxed[2016*MAX_UNMUXED_BLOCKS];
uint8_t *frame_output;
int bytes_used;
int blocks_used;
int uncomp_hwords_used;
int quant_scale;
int32_t *dct_block_lists[6];
int quant_scale_sum;
float *dct_block_lists[6];
} vid_encoder_state_t;
typedef struct {
@ -69,8 +78,6 @@ typedef struct {
AVStream* video_stream;
AVCodecContext* audio_codec_context;
AVCodecContext* video_codec_context;
AVCodec* audio_codec;
AVCodec* video_codec;
struct SwrContext* resampler;
struct SwsContext* scaler;
AVFrame* frame;
@ -81,17 +88,28 @@ typedef struct {
} av_decoder_state_t;
typedef struct {
bool quiet;
bool show_progress;
int format; // FORMAT_*
bool stereo; // false or true
int channels;
int cd_speed; // 1 or 2
int frequency; // 18900 or 37800 Hz
int bits_per_sample; // 4 or 8
int file_number; // 00-FF
int channel_number; // 00-1F
int interleave;
int alignment;
bool loop;
int video_width;
int video_height;
int video_fps_num; // FPS numerator
int video_fps_den; // FPS denominator
bool ignore_aspect_ratio;
char *swresample_options;
char *swscale_options;
int16_t *audio_samples;
int audio_sample_count;
@ -99,8 +117,11 @@ typedef struct {
int video_frame_count;
av_decoder_state_t decoder_state_av;
vid_encoder_state_t state_vid;
bool end_of_input;
time_t start_time;
time_t last_progress_update;
} settings_t;
// cdrom.c
@ -108,17 +129,19 @@ void init_sector_buffer_video(uint8_t *buffer, settings_t *settings);
void calculate_edc_data(uint8_t *buffer);
// decoding.c
bool open_av_data(const char *filename, settings_t *settings);
bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bool use_video, bool audio_required, bool video_required);
bool poll_av_data(settings_t *settings);
bool ensure_av_data(settings_t *settings, int needed_audio_samples, int needed_video_frames);
void pull_all_av_data(settings_t *settings);
void retire_av_data(settings_t *settings, int retired_audio_samples, int retired_video_frames);
void close_av_data(settings_t *settings);
// filefmt.c
void encode_file_spu(int16_t *audio_samples, int audio_sample_count, settings_t *settings, FILE *output);
void encode_file_xa(int16_t *audio_samples, int audio_sample_count, settings_t *settings, FILE *output);
void encode_file_spu(settings_t *settings, FILE *output);
void encode_file_spu_interleaved(settings_t *settings, FILE *output);
void encode_file_xa(settings_t *settings, FILE *output);
void encode_file_str(settings_t *settings, FILE *output);
void encode_file_sbs(settings_t *settings, FILE *output);
// mdec.c
void encode_block_str(uint8_t *video_frames, int video_frame_count, uint8_t *output, settings_t *settings);
void encode_frame_bs(uint8_t *video_frame, settings_t *settings);
void encode_sector_str(uint8_t *video_frames, uint8_t *output, settings_t *settings);

View File

@ -3,6 +3,7 @@ psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
Copyright (c) 2019, 2020 Adrian "asie" Siekierka
Copyright (c) 2019 Ben "GreaseMonkey" Russell
Copyright (c) 2023 spicyjpeg
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
@ -23,9 +24,7 @@ freely, subject to the following restrictions:
#include "common.h"
static void poll_av_packet(settings_t *settings, AVPacket *packet);
int decode_audio_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, AVPacket *packet) {
int decode_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, AVPacket *packet) {
int ret;
if (packet != NULL) {
@ -44,29 +43,8 @@ int decode_audio_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, A
}
}
int decode_video_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, AVPacket *packet) {
int ret;
if (packet != NULL) {
ret = avcodec_send_packet(codec, packet);
if (ret != 0) {
return 0;
}
}
ret = avcodec_receive_frame(codec, frame);
if (ret >= 0) {
*frame_size = ret;
return 1;
} else {
return ret == AVERROR(EAGAIN) ? 1 : 0;
}
}
bool open_av_data(const char *filename, settings_t *settings)
bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bool use_video, bool audio_required, bool video_required)
{
AVPacket packet;
av_decoder_state_t* av = &(settings->decoder_state_av);
av->video_next_pts = 0.0;
av->frame = NULL;
@ -79,11 +57,13 @@ bool open_av_data(const char *filename, settings_t *settings)
av->video_stream = NULL;
av->audio_codec_context = NULL;
av->video_codec_context = NULL;
av->audio_codec = NULL;
av->video_codec = NULL;
av->resampler = NULL;
av->scaler = NULL;
if (settings->quiet) {
av_log_set_level(AV_LOG_QUIET);
}
av->format = avformat_alloc_context();
if (avformat_open_input(&(av->format), filename, NULL, NULL)) {
return false;
@ -92,89 +72,157 @@ bool open_av_data(const char *filename, settings_t *settings)
return false;
}
for (int i = 0; i < av->format->nb_streams; i++) {
if (av->format->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
if (av->audio_stream_index >= 0) {
fprintf(stderr, "open_av_data: found multiple audio tracks?\n");
return false;
if (use_audio) {
for (int i = 0; i < av->format->nb_streams; i++) {
if (av->format->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
if (av->audio_stream_index >= 0) {
fprintf(stderr, "Input file must have a single audio track\n");
return false;
}
av->audio_stream_index = i;
}
av->audio_stream_index = i;
}
}
if (av->audio_stream_index == -1) {
return false;
}
for (int i = 0; i < av->format->nb_streams; i++) {
if (av->format->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
if (av->video_stream_index >= 0) {
fprintf(stderr, "open_av_data: found multiple video tracks?\n");
return false;
}
av->video_stream_index = i;
if (audio_required && av->audio_stream_index == -1) {
fprintf(stderr, "Input file has no audio data\n");
return false;
}
}
av->audio_stream = av->format->streams[av->audio_stream_index];
if (use_video) {
for (int i = 0; i < av->format->nb_streams; i++) {
if (av->format->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
if (av->video_stream_index >= 0) {
fprintf(stderr, "Input file must have a single video track\n");
return false;
}
av->video_stream_index = i;
}
}
if (video_required && av->video_stream_index == -1) {
fprintf(stderr, "Input file has no video data\n");
return false;
}
}
av->audio_stream = (av->audio_stream_index != -1 ? av->format->streams[av->audio_stream_index] : NULL);
av->video_stream = (av->video_stream_index != -1 ? av->format->streams[av->video_stream_index] : NULL);
av->audio_codec = avcodec_find_decoder(av->audio_stream->codecpar->codec_id);
av->audio_codec_context = avcodec_alloc_context3(av->audio_codec);
if (av->audio_codec_context == NULL) {
return false;
}
if (avcodec_parameters_to_context(av->audio_codec_context, av->audio_stream->codecpar) < 0) {
return false;
}
if (avcodec_open2(av->audio_codec_context, av->audio_codec, NULL) < 0) {
return false;
}
av->resampler = swr_alloc();
av_opt_set_int(av->resampler, "in_channel_count", av->audio_codec_context->channels, 0);
av_opt_set_int(av->resampler, "in_channel_layout", av->audio_codec_context->channel_layout, 0);
av_opt_set_int(av->resampler, "in_sample_rate", av->audio_codec_context->sample_rate, 0);
av_opt_set_sample_fmt(av->resampler, "in_sample_fmt", av->audio_codec_context->sample_fmt, 0);
if (av->audio_stream != NULL) {
const AVCodec *codec = avcodec_find_decoder(av->audio_stream->codecpar->codec_id);
av->audio_codec_context = avcodec_alloc_context3(codec);
if (av->audio_codec_context == NULL) {
return false;
}
if (avcodec_parameters_to_context(av->audio_codec_context, av->audio_stream->codecpar) < 0) {
return false;
}
if (avcodec_open2(av->audio_codec_context, codec, NULL) < 0) {
return false;
}
av->sample_count_mul = settings->stereo ? 2 : 1;
av_opt_set_int(av->resampler, "out_channel_count", settings->stereo ? 2 : 1, 0);
av_opt_set_int(av->resampler, "out_channel_layout", settings->stereo ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO, 0);
av_opt_set_int(av->resampler, "out_sample_rate", settings->frequency, 0);
av_opt_set_sample_fmt(av->resampler, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0);
AVChannelLayout layout;
layout.nb_channels = settings->channels;
if (settings->channels <= 2) {
layout.order = AV_CHANNEL_ORDER_NATIVE;
layout.u.mask = (settings->channels == 2) ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO;
} else {
layout.order = AV_CHANNEL_ORDER_UNSPEC;
}
if (!settings->quiet && settings->channels > av->audio_codec_context->ch_layout.nb_channels) {
fprintf(stderr, "Warning: input file has less than %d channels\n", settings->channels);
}
if (swr_init(av->resampler) < 0) {
return false;
av->sample_count_mul = settings->channels;
if (swr_alloc_set_opts2(
&av->resampler,
&layout,
AV_SAMPLE_FMT_S16,
settings->frequency,
&av->audio_codec_context->ch_layout,
av->audio_codec_context->sample_fmt,
av->audio_codec_context->sample_rate,
0,
NULL
) < 0) {
return false;
}
if (settings->swresample_options) {
if (av_opt_set_from_string(av->resampler, settings->swresample_options, NULL, "=", ":,") < 0) {
return false;
}
}
if (swr_init(av->resampler) < 0) {
return false;
}
}
if (av->video_stream != NULL) {
av->video_codec = avcodec_find_decoder(av->video_stream->codecpar->codec_id);
av->video_codec_context = avcodec_alloc_context3(av->video_codec);
const AVCodec *codec = avcodec_find_decoder(av->video_stream->codecpar->codec_id);
av->video_codec_context = avcodec_alloc_context3(codec);
if(av->video_codec_context == NULL) {
return false;
}
if (avcodec_parameters_to_context(av->video_codec_context, av->video_stream->codecpar) < 0) {
return false;
}
if (avcodec_open2(av->video_codec_context, av->video_codec, NULL) < 0) {
if (avcodec_open2(av->video_codec_context, codec, NULL) < 0) {
return false;
}
if (!settings->quiet && (
settings->video_width > av->video_codec_context->width ||
settings->video_height > av->video_codec_context->height
)) {
fprintf(stderr, "Warning: input file has resolution lower than %dx%d\n",
settings->video_width, settings->video_height
);
}
if (!settings->ignore_aspect_ratio) {
// Reduce the provided size so that it matches the input file's
// aspect ratio.
double src_ratio = (double)av->video_codec_context->width / (double)av->video_codec_context->height;
double dst_ratio = (double)settings->video_width / (double)settings->video_height;
if (src_ratio < dst_ratio) {
settings->video_width = (int)((double)settings->video_height * src_ratio + 15.0) & ~15;
} else {
settings->video_height = (int)((double)settings->video_width / src_ratio + 15.0) & ~15;
}
}
av->scaler = sws_getContext(
av->video_codec_context->width,
av->video_codec_context->height,
av->video_codec_context->pix_fmt,
settings->video_width,
settings->video_height,
AV_PIX_FMT_RGBA,
AV_PIX_FMT_NV21,
SWS_BICUBIC,
NULL,
NULL,
NULL);
NULL
);
// Is this even necessary? -- spicyjpeg
sws_setColorspaceDetails(
av->scaler,
sws_getCoefficients(av->video_codec_context->colorspace),
(av->video_codec_context->color_range == AVCOL_RANGE_JPEG),
sws_getCoefficients(SWS_CS_ITU601),
true,
0,
0,
0
);
if (settings->swscale_options) {
if (av_opt_set_from_string(av->scaler, settings->swscale_options, NULL, "=", ":,") < 0) {
return false;
}
}
av->video_frame_src_size = 4*av->video_codec_context->width*av->video_codec_context->height;
av->video_frame_dst_size = 4*settings->video_width*settings->video_height;
av->video_frame_dst_size = 3*settings->video_width*settings->video_height/2;
}
av_init_packet(&packet);
av->frame = av_frame_alloc();
if (av->frame == NULL) {
return false;
@ -184,6 +232,7 @@ bool open_av_data(const char *filename, settings_t *settings)
settings->audio_sample_count = 0;
settings->video_frames = NULL;
settings->video_frame_count = 0;
settings->end_of_input = false;
return true;
}
@ -195,7 +244,7 @@ static void poll_av_packet_audio(settings_t *settings, AVPacket *packet)
int frame_size, frame_sample_count;
uint8_t *buffer[1];
if (decode_audio_frame(av->audio_codec_context, av->frame, &frame_size, packet)) {
if (decode_frame(av->audio_codec_context, av->frame, &frame_size, packet)) {
size_t buffer_size = sizeof(int16_t) * av->sample_count_mul * swr_get_out_samples(av->resampler, av->frame->nb_samples);
buffer[0] = malloc(buffer_size);
memset(buffer[0], 0, buffer_size);
@ -212,54 +261,61 @@ static void poll_av_packet_video(settings_t *settings, AVPacket *packet)
av_decoder_state_t* av = &(settings->decoder_state_av);
int frame_size;
double pts_step = ((double)1.0*(double)settings->video_fps_den)/(double)settings->video_fps_num;
if (decode_video_frame(av->video_codec_context, av->frame, &frame_size, packet)) {
int plane_size = settings->video_width*settings->video_height;
int dst_strides[2] = {
settings->video_width, settings->video_width
};
if (decode_frame(av->video_codec_context, av->frame, &frame_size, packet)) {
if (!av->frame->width || !av->frame->height || !av->frame->data[0]) {
return;
}
// Some files seem to have timestamps starting from a negative value
// (but otherwise valid) for whatever reason.
double pts = (((double)av->frame->pts)*(double)av->video_stream->time_base.num)/av->video_stream->time_base.den;
//fprintf(stderr, "%f\n", pts);
// Drop frames with negative PTS values
if(pts < 0.0) {
// do nothing
//if (pts < 0.0) {
//return;
//}
if (settings->video_frame_count >= 1 && pts < av->video_next_pts) {
return;
}
if((settings->video_frame_count) >= 1 && pts < av->video_next_pts) {
// do nothing
return;
}
if((settings->video_frame_count) < 1) {
if ((settings->video_frame_count) < 1) {
av->video_next_pts = pts;
} else {
av->video_next_pts += pts_step;
}
double pts_step = ((double)1.0*(double)settings->video_fps_den)/(double)settings->video_fps_num;
//fprintf(stderr, "%d %f %f %f\n", (settings->video_frame_count), pts, av->video_next_pts, pts_step);
av->video_next_pts += pts_step;
// FIXME: increasing framerate doesn't fill it in with duplicate frames!
assert(av->video_next_pts > pts);
//size_t buffer_size = frame_count_mul;
//buffer[0] = malloc(buffer_size);
//memset(buffer[0], 0, buffer_size);
settings->video_frames = realloc(settings->video_frames, (settings->video_frame_count + 1) * av->video_frame_dst_size);
int dst_strides[1] = {
settings->video_width*4,
// Insert duplicate frames if the frame rate of the input stream is
// lower than the target frame rate.
int dupe_frames = (int) ceil((pts - av->video_next_pts) / pts_step);
if (dupe_frames < 0) dupe_frames = 0;
settings->video_frames = realloc(
settings->video_frames,
(settings->video_frame_count + dupe_frames + 1) * av->video_frame_dst_size
);
for (; dupe_frames; dupe_frames--) {
memcpy(
(settings->video_frames) + av->video_frame_dst_size*(settings->video_frame_count),
(settings->video_frames) + av->video_frame_dst_size*(settings->video_frame_count-1),
av->video_frame_dst_size
);
settings->video_frame_count += 1;
av->video_next_pts += pts_step;
}
uint8_t *dst_frame = (settings->video_frames) + av->video_frame_dst_size*(settings->video_frame_count);
uint8_t *dst_pointers[2] = {
dst_frame, dst_frame + plane_size
};
uint8_t *dst_pointers[1] = {
(settings->video_frames) + av->video_frame_dst_size*(settings->video_frame_count),
};
sws_scale(av->scaler, av->frame->data, av->frame->linesize, 0, av->frame->height, dst_pointers, dst_strides);
sws_scale(av->scaler, (const uint8_t *const *) av->frame->data, av->frame->linesize, 0, av->frame->height, dst_pointers, dst_strides);
settings->video_frame_count += 1;
//free(buffer[0]);
}
}
static void poll_av_packet(settings_t *settings, AVPacket *packet)
{
av_decoder_state_t* av = &(settings->decoder_state_av);
if (packet->stream_index == av->audio_stream_index) {
poll_av_packet_audio(settings, packet);
}
else if (packet->stream_index == av->video_stream_index) {
poll_av_packet_video(settings, packet);
}
}
@ -268,29 +324,38 @@ bool poll_av_data(settings_t *settings)
av_decoder_state_t* av = &(settings->decoder_state_av);
AVPacket packet;
if (settings->end_of_input) {
return false;
}
if (av_read_frame(av->format, &packet) >= 0) {
poll_av_packet(settings, &packet);
if (packet.stream_index == av->audio_stream_index) {
poll_av_packet_audio(settings, &packet);
} else if (packet.stream_index == av->video_stream_index) {
poll_av_packet_video(settings, &packet);
}
av_packet_unref(&packet);
return true;
} else {
// out is always padded out with 4032 "0" samples, this makes calculations elsewhere easier
memset((settings->audio_samples) + (settings->audio_sample_count), 0, 4032 * av->sample_count_mul * sizeof(int16_t));
if (av->audio_stream) {
memset((settings->audio_samples) + (settings->audio_sample_count), 0, 4032 * av->sample_count_mul * sizeof(int16_t));
}
settings->end_of_input = true;
return false;
}
}
bool ensure_av_data(settings_t *settings, int needed_audio_samples, int needed_video_frames)
{
//
av_decoder_state_t* av = &(settings->decoder_state_av);
while (settings->audio_sample_count < needed_audio_samples || settings->video_frame_count < needed_video_frames) {
//fprintf(stderr, "ensure %d -> %d, %d -> %d\n", settings->audio_sample_count, needed_audio_samples, settings->video_frame_count, needed_video_frames);
if(!poll_av_data(settings)) {
//fprintf(stderr, "cannot ensure\n");
return false;
if (!poll_av_data(settings)) {
// Keep returning true even if the end of the input file has been
// reached, if the buffer is not yet completely empty.
return (settings->audio_sample_count || !needed_audio_samples)
&& (settings->video_frame_count || !needed_video_frames);
}
}
//fprintf(stderr, "ensure %d -> %d, %d -> %d\n", settings->audio_sample_count, needed_audio_samples, settings->video_frame_count, needed_video_frames);
@ -298,16 +363,6 @@ bool ensure_av_data(settings_t *settings, int needed_audio_samples, int needed_v
return true;
}
void pull_all_av_data(settings_t *settings)
{
while (poll_av_data(settings)) {
// do nothing
}
fprintf(stderr, "Loaded %d samples.\n", settings->audio_sample_count);
fprintf(stderr, "Loaded %d frames.\n", settings->video_frame_count);
}
void retire_av_data(settings_t *settings, int retired_audio_samples, int retired_video_frames)
{
av_decoder_state_t* av = &(settings->decoder_state_av);
@ -319,14 +374,14 @@ void retire_av_data(settings_t *settings, int retired_audio_samples, int retired
int sample_size = sizeof(int16_t);
if (settings->audio_sample_count > retired_audio_samples) {
memmove(settings->audio_samples, settings->audio_samples + retired_audio_samples, (settings->audio_sample_count - retired_audio_samples)*sample_size);
settings->audio_sample_count -= retired_audio_samples;
}
settings->audio_sample_count -= retired_audio_samples;
int frame_size = av->video_frame_dst_size;
if (settings->video_frame_count > retired_video_frames) {
memmove(settings->video_frames, settings->video_frames + retired_video_frames*frame_size, (settings->video_frame_count - retired_video_frames)*frame_size);
settings->video_frame_count -= retired_video_frames;
}
settings->video_frame_count -= retired_video_frames;
}
void close_av_data(settings_t *settings)

View File

@ -3,6 +3,7 @@ psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
Copyright (c) 2019, 2020 Adrian "asie" Siekierka
Copyright (c) 2019 Ben "GreaseMonkey" Russell
Copyright (c) 2023 spicyjpeg
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
@ -24,16 +25,29 @@ freely, subject to the following restrictions:
#include "common.h"
#include "libpsxav.h"
static time_t get_elapsed_time(settings_t *settings) {
if (!settings->show_progress) {
return 0;
}
time_t t = time(NULL) - settings->start_time;
if (t <= settings->last_progress_update) {
return 0;
}
settings->last_progress_update = t;
return t;
}
static psx_audio_xa_settings_t settings_to_libpsxav_xa_audio(settings_t *settings) {
psx_audio_xa_settings_t new_settings;
new_settings.bits_per_sample = settings->bits_per_sample;
new_settings.frequency = settings->frequency;
new_settings.stereo = settings->stereo;
new_settings.stereo = settings->channels == 2;
new_settings.file_number = settings->file_number;
new_settings.channel_number = settings->channel_number;
switch (settings->format) {
case FORMAT_XA:
case FORMAT_STR2:
new_settings.format = PSX_AUDIO_XA_FORMAT_XA;
break;
default:
@ -44,93 +58,324 @@ static psx_audio_xa_settings_t settings_to_libpsxav_xa_audio(settings_t *setting
return new_settings;
};
void encode_file_spu(int16_t *audio_samples, int audio_sample_count, settings_t *settings, FILE *output) {
psx_audio_encoder_state_t audio_state;
void write_vag_header(int size_per_channel, uint8_t *header, settings_t *settings) {
// Magic
header[0x00] = 'V';
header[0x01] = 'A';
header[0x02] = 'G';
header[0x03] = settings->interleave ? 'i' : 'p';
// Version (big-endian)
header[0x04] = 0x00;
header[0x05] = 0x00;
header[0x06] = 0x00;
header[0x07] = 0x20;
// Interleave (little-endian)
header[0x08] = (uint8_t)settings->interleave;
header[0x09] = (uint8_t)(settings->interleave>>8);
header[0x0a] = (uint8_t)(settings->interleave>>16);
header[0x0b] = (uint8_t)(settings->interleave>>24);
// Length of data for each channel (big-endian)
header[0x0c] = (uint8_t)(size_per_channel>>24);
header[0x0d] = (uint8_t)(size_per_channel>>16);
header[0x0e] = (uint8_t)(size_per_channel>>8);
header[0x0f] = (uint8_t)size_per_channel;
// Sample rate (big-endian)
header[0x10] = (uint8_t)(settings->frequency>>24);
header[0x11] = (uint8_t)(settings->frequency>>16);
header[0x12] = (uint8_t)(settings->frequency>>8);
header[0x13] = (uint8_t)settings->frequency;
// Number of channels (little-endian)
header[0x1e] = (uint8_t)settings->channels;
header[0x1f] = 0x00;
// Filename
//strncpy(header + 0x20, "psxavenc", 16);
memset(header + 0x20, 0, 16);
}
void encode_file_spu(settings_t *settings, FILE *output) {
psx_audio_encoder_channel_state_t audio_state;
int audio_samples_per_block = psx_audio_spu_get_samples_per_block();
int block_size = psx_audio_spu_get_buffer_size_per_block();
uint8_t buffer[16];
int block_count;
memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t));
memset(&audio_state, 0, sizeof(psx_audio_encoder_channel_state_t));
for (int i = 0; i < audio_sample_count; i += audio_samples_per_block) {
int samples_length = audio_sample_count - i;
// The header must be written after the data as we don't yet know the
// number of audio samples.
if (settings->format == FORMAT_VAG) {
fseek(output, 48, SEEK_SET);
}
for (block_count = 0; ensure_av_data(settings, audio_samples_per_block, 0); block_count++) {
int samples_length = settings->audio_sample_count;
if (samples_length > audio_samples_per_block) samples_length = audio_samples_per_block;
int length = psx_audio_spu_encode(&audio_state, audio_samples + i, samples_length, buffer);
if (i == 0) {
buffer[1] = PSX_AUDIO_SPU_LOOP_START;
} else if ((i + audio_samples_per_block) >= audio_sample_count) {
buffer[1] = PSX_AUDIO_SPU_LOOP_END;
int length = psx_audio_spu_encode(&audio_state, settings->audio_samples, samples_length, 1, buffer);
if (!block_count) {
// This flag is not required as the SPU already resets the loop
// address when starting playback of a sample.
//buffer[1] |= PSX_AUDIO_SPU_LOOP_START;
}
if (settings->end_of_input) {
buffer[1] |= settings->loop ? PSX_AUDIO_SPU_LOOP_REPEAT : PSX_AUDIO_SPU_LOOP_END;
}
retire_av_data(settings, samples_length, 0);
fwrite(buffer, length, 1, output);
time_t t = get_elapsed_time(settings);
if (t) {
fprintf(stderr, "\rBlock: %6d | Encoding speed: %5.2fx",
block_count,
(double)(block_count*audio_samples_per_block) / (double)(settings->frequency*t)
);
}
}
if (settings->format == FORMAT_VAG) {
uint8_t header[48];
memset(header, 0, 48);
write_vag_header(block_count*block_size, header, settings);
fseek(output, 0, SEEK_SET);
fwrite(header, 48, 1, output);
}
}
void encode_file_xa(int16_t *audio_samples, int audio_sample_count, settings_t *settings, FILE *output) {
void encode_file_spu_interleaved(settings_t *settings, FILE *output) {
int audio_state_size = sizeof(psx_audio_encoder_channel_state_t) * settings->channels;
// NOTE: since the interleaved .vag format is not standardized, some tools
// (such as vgmstream) will not properly play files with interleave < 2048,
// alignment != 2048 or channels != 2.
int buffer_size = settings->interleave + settings->alignment - 1;
buffer_size -= buffer_size % settings->alignment;
int header_size = 48 + settings->alignment - 1;
header_size -= header_size % settings->alignment;
psx_audio_encoder_channel_state_t *audio_state = malloc(audio_state_size);
uint8_t *buffer = malloc(buffer_size);
int audio_samples_per_block = psx_audio_spu_get_samples_per_block();
int block_size = psx_audio_spu_get_buffer_size_per_block();
int audio_samples_per_chunk = settings->interleave / block_size * audio_samples_per_block;
int chunk_count;
memset(audio_state, 0, audio_state_size);
if (settings->format == FORMAT_VAGI) {
fseek(output, header_size, SEEK_SET);
}
for (chunk_count = 0; ensure_av_data(settings, audio_samples_per_chunk*settings->channels, 0); chunk_count++) {
int samples_length = settings->audio_sample_count / settings->channels;
if (samples_length > audio_samples_per_chunk) samples_length = audio_samples_per_chunk;
for (int ch = 0; ch < settings->channels; ch++) {
memset(buffer, 0, buffer_size);
int length = psx_audio_spu_encode(audio_state + ch, settings->audio_samples + ch, samples_length, settings->channels, buffer);
if (length) {
//buffer[1] |= PSX_AUDIO_SPU_LOOP_START;
if (settings->loop) {
buffer[length - block_size + 1] |= PSX_AUDIO_SPU_LOOP_REPEAT;
}
if (settings->end_of_input) {
buffer[length - block_size + 1] |= PSX_AUDIO_SPU_LOOP_END;
}
}
fwrite(buffer, buffer_size, 1, output);
time_t t = get_elapsed_time(settings);
if (t) {
fprintf(stderr, "\rChunk: %6d | Encoding speed: %5.2fx",
chunk_count,
(double)(chunk_count*audio_samples_per_chunk) / (double)(settings->frequency*t)
);
}
}
retire_av_data(settings, samples_length*settings->channels, 0);
}
if (settings->format == FORMAT_VAGI) {
uint8_t *header = malloc(header_size);
memset(header, 0, header_size);
write_vag_header(chunk_count*settings->interleave, header, settings);
fseek(output, 0, SEEK_SET);
fwrite(header, header_size, 1, output);
free(header);
}
free(audio_state);
free(buffer);
}
void encode_file_xa(settings_t *settings, FILE *output) {
psx_audio_xa_settings_t xa_settings = settings_to_libpsxav_xa_audio(settings);
psx_audio_encoder_state_t audio_state;
int audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings);
int av_sample_mul = settings->stereo ? 2 : 1;
uint8_t buffer[2352];
memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t));
for (int i = 0; i < audio_sample_count; i += audio_samples_per_sector) {
int samples_length = audio_sample_count - i;
for (int j = 0; ensure_av_data(settings, audio_samples_per_sector*settings->channels, 0); j++) {
int samples_length = settings->audio_sample_count / settings->channels;
if (samples_length > audio_samples_per_sector) samples_length = audio_samples_per_sector;
int length = psx_audio_xa_encode(xa_settings, &audio_state, audio_samples + (i * av_sample_mul), samples_length, buffer);
if ((i + audio_samples_per_sector) >= audio_sample_count) {
int length = psx_audio_xa_encode(xa_settings, &audio_state, settings->audio_samples, samples_length, buffer);
if (settings->end_of_input) {
psx_audio_xa_encode_finalize(xa_settings, buffer, length);
}
if (settings->format == FORMAT_XACD) {
int t = j + 75*2;
// Put the time in
buffer[0x00C] = ((t/75/60)%10)|(((t/75/60)/10)<<4);
buffer[0x00D] = (((t/75)%60)%10)|((((t/75)%60)/10)<<4);
buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4);
}
retire_av_data(settings, samples_length*settings->channels, 0);
fwrite(buffer, length, 1, output);
time_t t = get_elapsed_time(settings);
if (t) {
fprintf(stderr, "\rLBA: %6d | Encoding speed: %5.2fx",
j,
(double)(j*audio_samples_per_sector) / (double)(settings->frequency*t)
);
}
}
}
void encode_file_str(settings_t *settings, FILE *output) {
uint8_t buffer[2352*8];
psx_audio_xa_settings_t xa_settings = settings_to_libpsxav_xa_audio(settings);
psx_audio_encoder_state_t audio_state;
int audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings);
int av_sample_mul = settings->stereo ? 2 : 1;
psx_audio_encoder_state_t audio_state;
int audio_samples_per_sector;
uint8_t buffer[2352];
int interleave;
int video_sectors_per_block;
if (settings->decoder_state_av.audio_stream) {
// 1/N audio, (N-1)/N video
audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings);
interleave = psx_audio_xa_get_sector_interleave(xa_settings) * settings->cd_speed;
video_sectors_per_block = interleave - 1;
} else {
// 0/1 audio, 1/1 video
audio_samples_per_sector = 0;
interleave = 1;
video_sectors_per_block = 1;
}
if (!settings->quiet) {
fprintf(stderr, "Interleave: %d/%d audio, %d/%d video\n",
interleave - video_sectors_per_block, interleave, video_sectors_per_block, interleave);
}
memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t));
// e.g. 15fps = (150*7/8/15) = 8.75 blocks per frame
settings->state_vid.frame_block_base_overflow = (75*settings->cd_speed) * video_sectors_per_block * settings->video_fps_den;
settings->state_vid.frame_block_overflow_den = interleave * settings->video_fps_num;
double frame_size = (double)settings->state_vid.frame_block_base_overflow / (double)settings->state_vid.frame_block_overflow_den;
if (!settings->quiet) {
fprintf(stderr, "Frame size: %.2f sectors\n", frame_size);
}
settings->state_vid.frame_output = malloc(2016 * (int)ceil(frame_size));
settings->state_vid.frame_index = 0;
settings->state_vid.bits_value = 0;
settings->state_vid.bits_left = 16;
settings->state_vid.frame_block_index = 0;
settings->state_vid.frame_block_count = 0;
settings->state_vid.frame_data_offset = 0;
settings->state_vid.frame_max_size = 0;
settings->state_vid.frame_block_overflow_num = 0;
// Number of total sectors per second: 150
// Proportion of sectors for video due to A/V interleave: 7/8
// 15FPS = (150*7/8/15) = 8.75 blocks per frame
settings->state_vid.frame_block_base_overflow = 150*7*settings->video_fps_den;
settings->state_vid.frame_block_overflow_den = 8*settings->video_fps_num;
//fprintf(stderr, "%f\n", ((double)settings->state_vid.frame_block_base_overflow)/((double)settings->state_vid.frame_block_overflow_den)); abort();
settings->state_vid.quant_scale_sum = 0;
// FIXME: this needs an extra frame to prevent A/V desync
const int frames_needed = 2;
for (int j = 0; ensure_av_data(settings, audio_samples_per_sector*av_sample_mul*frames_needed, 1*frames_needed); j+=18) {
psx_audio_xa_encode(xa_settings, &audio_state, settings->audio_samples, audio_samples_per_sector, buffer + 2352 * 7);
// TODO: the final buffer
for(int k = 0; k < 7; k++) {
init_sector_buffer_video(buffer + 2352*k, settings);
int frames_needed = (int) ceil((double)video_sectors_per_block / frame_size);
if (frames_needed < 2) frames_needed = 2;
for (int j = 0; !settings->end_of_input || settings->state_vid.frame_data_offset < settings->state_vid.frame_max_size; j++) {
ensure_av_data(settings, audio_samples_per_sector*settings->channels, frames_needed);
if ((j%interleave) < video_sectors_per_block) {
// Video sector
init_sector_buffer_video(buffer, settings);
encode_sector_str(settings->video_frames, buffer, settings);
} else {
// Audio sector
int samples_length = settings->audio_sample_count / settings->channels;
if (samples_length > audio_samples_per_sector) samples_length = audio_samples_per_sector;
// FIXME: this is an extremely hacky way to handle audio tracks
// shorter than the video track
if (!samples_length) {
video_sectors_per_block++;
}
int length = psx_audio_xa_encode(xa_settings, &audio_state, settings->audio_samples, samples_length, buffer);
if (settings->end_of_input) {
psx_audio_xa_encode_finalize(xa_settings, buffer, length);
}
retire_av_data(settings, samples_length*settings->channels, 0);
}
encode_block_str(settings->video_frames, settings->video_frame_count, buffer, settings);
for(int k = 0; k < 8; k++) {
int t = k + (j/18)*8 + 75*2;
if (settings->format == FORMAT_STR2CD) {
int t = j + 75*2;
// Put the time in
buffer[0x00C + 2352*k] = ((t/75/60)%10)|(((t/75/60)/10)<<4);
buffer[0x00D + 2352*k] = (((t/75)%60)%10)|((((t/75)%60)/10)<<4);
buffer[0x00E + 2352*k] = ((t%75)%10)|(((t%75)/10)<<4);
buffer[0x00C] = ((t/75/60)%10)|(((t/75/60)/10)<<4);
buffer[0x00D] = (((t/75)%60)%10)|((((t/75)%60)/10)<<4);
buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4);
if(k != 7) {
calculate_edc_data(buffer + 2352*k);
// FIXME: EDC is not calculated in 2336-byte sector mode (shouldn't
// matter anyway, any CD image builder will have to recalculate it
// due to the sector's MSF changing)
if((j%interleave) < video_sectors_per_block) {
calculate_edc_data(buffer);
}
}
retire_av_data(settings, audio_samples_per_sector*av_sample_mul, 0);
fwrite(buffer, 2352*8, 1, output);
fwrite(buffer, 2352, 1, output);
time_t t = get_elapsed_time(settings);
if (t) {
fprintf(stderr, "\rFrame: %4d | LBA: %6d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx",
settings->state_vid.frame_index,
j,
(double)settings->state_vid.quant_scale_sum / (double)settings->state_vid.frame_index,
(double)(settings->state_vid.frame_index*settings->video_fps_den) / (double)(t*settings->video_fps_num)
);
}
}
free(settings->state_vid.frame_output);
}
void encode_file_sbs(settings_t *settings, FILE *output) {
settings->state_vid.frame_output = malloc(settings->alignment);
settings->state_vid.frame_data_offset = 0;
settings->state_vid.frame_max_size = settings->alignment;
settings->state_vid.quant_scale_sum = 0;
for (int j = 0; ensure_av_data(settings, 0, 1); j++) {
encode_frame_bs(settings->video_frames, settings);
fwrite(settings->state_vid.frame_output, settings->alignment, 1, output);
time_t t = get_elapsed_time(settings);
if (t) {
fprintf(stderr, "\rFrame: %4d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx",
j,
(double)settings->state_vid.quant_scale_sum / (double)j,
(double)(j*settings->video_fps_den) / (double)(t*settings->video_fps_num)
);
}
}
free(settings->state_vid.frame_output);
}

View File

@ -3,6 +3,7 @@ psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
Copyright (c) 2019, 2020 Adrian "asie" Siekierka
Copyright (c) 2019 Ben "GreaseMonkey" Russell
Copyright (c) 2023 spicyjpeg
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
@ -36,117 +37,117 @@ const struct {
uint16_t u_hword_neg;
} huffman_lookup[] = {
// Fuck this Huffman tree in particular --GM
2,0x3,MAKE_HUFFMAN_PAIR(0,1),
3,0x3,MAKE_HUFFMAN_PAIR(1,1),
4,0x4,MAKE_HUFFMAN_PAIR(0,2),
4,0x5,MAKE_HUFFMAN_PAIR(2,1),
5,0x05,MAKE_HUFFMAN_PAIR(0,3),
5,0x06,MAKE_HUFFMAN_PAIR(4,1),
5,0x07,MAKE_HUFFMAN_PAIR(3,1),
6,0x04,MAKE_HUFFMAN_PAIR(7,1),
6,0x05,MAKE_HUFFMAN_PAIR(6,1),
6,0x06,MAKE_HUFFMAN_PAIR(1,2),
6,0x07,MAKE_HUFFMAN_PAIR(5,1),
7,0x04,MAKE_HUFFMAN_PAIR(2,2),
7,0x05,MAKE_HUFFMAN_PAIR(9,1),
7,0x06,MAKE_HUFFMAN_PAIR(0,4),
7,0x07,MAKE_HUFFMAN_PAIR(8,1),
8,0x20,MAKE_HUFFMAN_PAIR(13,1),
8,0x21,MAKE_HUFFMAN_PAIR(0,6),
8,0x22,MAKE_HUFFMAN_PAIR(12,1),
8,0x23,MAKE_HUFFMAN_PAIR(11,1),
8,0x24,MAKE_HUFFMAN_PAIR(3,2),
8,0x25,MAKE_HUFFMAN_PAIR(1,3),
8,0x26,MAKE_HUFFMAN_PAIR(0,5),
8,0x27,MAKE_HUFFMAN_PAIR(10,1),
10,0x008,MAKE_HUFFMAN_PAIR(16,1),
10,0x009,MAKE_HUFFMAN_PAIR(5,2),
10,0x00A,MAKE_HUFFMAN_PAIR(0,7),
10,0x00B,MAKE_HUFFMAN_PAIR(2,3),
10,0x00C,MAKE_HUFFMAN_PAIR(1,4),
10,0x00D,MAKE_HUFFMAN_PAIR(15,1),
10,0x00E,MAKE_HUFFMAN_PAIR(14,1),
10,0x00F,MAKE_HUFFMAN_PAIR(4,2),
12,0x010,MAKE_HUFFMAN_PAIR(0,11),
12,0x011,MAKE_HUFFMAN_PAIR(8,2),
12,0x012,MAKE_HUFFMAN_PAIR(4,3),
12,0x013,MAKE_HUFFMAN_PAIR(0,10),
12,0x014,MAKE_HUFFMAN_PAIR(2,4),
12,0x015,MAKE_HUFFMAN_PAIR(7,2),
12,0x016,MAKE_HUFFMAN_PAIR(21,1),
12,0x017,MAKE_HUFFMAN_PAIR(20,1),
12,0x018,MAKE_HUFFMAN_PAIR(0,9),
12,0x019,MAKE_HUFFMAN_PAIR(19,1),
12,0x01A,MAKE_HUFFMAN_PAIR(18,1),
12,0x01B,MAKE_HUFFMAN_PAIR(1,5),
12,0x01C,MAKE_HUFFMAN_PAIR(3,3),
12,0x01D,MAKE_HUFFMAN_PAIR(0,8),
12,0x01E,MAKE_HUFFMAN_PAIR(6,2),
12,0x01F,MAKE_HUFFMAN_PAIR(17,1),
13,0x0010,MAKE_HUFFMAN_PAIR(10,2),
13,0x0011,MAKE_HUFFMAN_PAIR(9,2),
13,0x0012,MAKE_HUFFMAN_PAIR(5,3),
13,0x0013,MAKE_HUFFMAN_PAIR(3,4),
13,0x0014,MAKE_HUFFMAN_PAIR(2,5),
13,0x0015,MAKE_HUFFMAN_PAIR(1,7),
13,0x0016,MAKE_HUFFMAN_PAIR(1,6),
13,0x0017,MAKE_HUFFMAN_PAIR(0,15),
13,0x0018,MAKE_HUFFMAN_PAIR(0,14),
13,0x0019,MAKE_HUFFMAN_PAIR(0,13),
13,0x001A,MAKE_HUFFMAN_PAIR(0,12),
13,0x001B,MAKE_HUFFMAN_PAIR(26,1),
13,0x001C,MAKE_HUFFMAN_PAIR(25,1),
13,0x001D,MAKE_HUFFMAN_PAIR(24,1),
13,0x001E,MAKE_HUFFMAN_PAIR(23,1),
13,0x001F,MAKE_HUFFMAN_PAIR(22,1),
14,0x0010,MAKE_HUFFMAN_PAIR(0,31),
14,0x0011,MAKE_HUFFMAN_PAIR(0,30),
14,0x0012,MAKE_HUFFMAN_PAIR(0,29),
14,0x0013,MAKE_HUFFMAN_PAIR(0,28),
14,0x0014,MAKE_HUFFMAN_PAIR(0,27),
14,0x0015,MAKE_HUFFMAN_PAIR(0,26),
14,0x0016,MAKE_HUFFMAN_PAIR(0,25),
14,0x0017,MAKE_HUFFMAN_PAIR(0,24),
14,0x0018,MAKE_HUFFMAN_PAIR(0,23),
14,0x0019,MAKE_HUFFMAN_PAIR(0,22),
14,0x001A,MAKE_HUFFMAN_PAIR(0,21),
14,0x001B,MAKE_HUFFMAN_PAIR(0,20),
14,0x001C,MAKE_HUFFMAN_PAIR(0,19),
14,0x001D,MAKE_HUFFMAN_PAIR(0,18),
14,0x001E,MAKE_HUFFMAN_PAIR(0,17),
14,0x001F,MAKE_HUFFMAN_PAIR(0,16),
15,0x0010,MAKE_HUFFMAN_PAIR(0,40),
15,0x0011,MAKE_HUFFMAN_PAIR(0,39),
15,0x0012,MAKE_HUFFMAN_PAIR(0,38),
15,0x0013,MAKE_HUFFMAN_PAIR(0,37),
15,0x0014,MAKE_HUFFMAN_PAIR(0,36),
15,0x0015,MAKE_HUFFMAN_PAIR(0,35),
15,0x0016,MAKE_HUFFMAN_PAIR(0,34),
15,0x0017,MAKE_HUFFMAN_PAIR(0,33),
15,0x0018,MAKE_HUFFMAN_PAIR(0,32),
15,0x0019,MAKE_HUFFMAN_PAIR(1,14),
15,0x001A,MAKE_HUFFMAN_PAIR(1,13),
15,0x001B,MAKE_HUFFMAN_PAIR(1,12),
15,0x001C,MAKE_HUFFMAN_PAIR(1,11),
15,0x001D,MAKE_HUFFMAN_PAIR(1,10),
15,0x001E,MAKE_HUFFMAN_PAIR(1,9),
15,0x001F,MAKE_HUFFMAN_PAIR(1,8),
16,0x0010,MAKE_HUFFMAN_PAIR(1,18),
16,0x0011,MAKE_HUFFMAN_PAIR(1,17),
16,0x0012,MAKE_HUFFMAN_PAIR(1,16),
16,0x0013,MAKE_HUFFMAN_PAIR(1,15),
16,0x0014,MAKE_HUFFMAN_PAIR(6,3),
16,0x0015,MAKE_HUFFMAN_PAIR(16,2),
16,0x0016,MAKE_HUFFMAN_PAIR(15,2),
16,0x0017,MAKE_HUFFMAN_PAIR(14,2),
16,0x0018,MAKE_HUFFMAN_PAIR(13,2),
16,0x0019,MAKE_HUFFMAN_PAIR(12,2),
16,0x001A,MAKE_HUFFMAN_PAIR(11,2),
16,0x001B,MAKE_HUFFMAN_PAIR(31,1),
16,0x001C,MAKE_HUFFMAN_PAIR(30,1),
16,0x001D,MAKE_HUFFMAN_PAIR(29,1),
16,0x001E,MAKE_HUFFMAN_PAIR(28,1),
16,0x001F,MAKE_HUFFMAN_PAIR(27,1),
{2,0x3,MAKE_HUFFMAN_PAIR(0,1)},
{3,0x3,MAKE_HUFFMAN_PAIR(1,1)},
{4,0x4,MAKE_HUFFMAN_PAIR(0,2)},
{4,0x5,MAKE_HUFFMAN_PAIR(2,1)},
{5,0x05,MAKE_HUFFMAN_PAIR(0,3)},
{5,0x06,MAKE_HUFFMAN_PAIR(4,1)},
{5,0x07,MAKE_HUFFMAN_PAIR(3,1)},
{6,0x04,MAKE_HUFFMAN_PAIR(7,1)},
{6,0x05,MAKE_HUFFMAN_PAIR(6,1)},
{6,0x06,MAKE_HUFFMAN_PAIR(1,2)},
{6,0x07,MAKE_HUFFMAN_PAIR(5,1)},
{7,0x04,MAKE_HUFFMAN_PAIR(2,2)},
{7,0x05,MAKE_HUFFMAN_PAIR(9,1)},
{7,0x06,MAKE_HUFFMAN_PAIR(0,4)},
{7,0x07,MAKE_HUFFMAN_PAIR(8,1)},
{8,0x20,MAKE_HUFFMAN_PAIR(13,1)},
{8,0x21,MAKE_HUFFMAN_PAIR(0,6)},
{8,0x22,MAKE_HUFFMAN_PAIR(12,1)},
{8,0x23,MAKE_HUFFMAN_PAIR(11,1)},
{8,0x24,MAKE_HUFFMAN_PAIR(3,2)},
{8,0x25,MAKE_HUFFMAN_PAIR(1,3)},
{8,0x26,MAKE_HUFFMAN_PAIR(0,5)},
{8,0x27,MAKE_HUFFMAN_PAIR(10,1)},
{10,0x008,MAKE_HUFFMAN_PAIR(16,1)},
{10,0x009,MAKE_HUFFMAN_PAIR(5,2)},
{10,0x00A,MAKE_HUFFMAN_PAIR(0,7)},
{10,0x00B,MAKE_HUFFMAN_PAIR(2,3)},
{10,0x00C,MAKE_HUFFMAN_PAIR(1,4)},
{10,0x00D,MAKE_HUFFMAN_PAIR(15,1)},
{10,0x00E,MAKE_HUFFMAN_PAIR(14,1)},
{10,0x00F,MAKE_HUFFMAN_PAIR(4,2)},
{12,0x010,MAKE_HUFFMAN_PAIR(0,11)},
{12,0x011,MAKE_HUFFMAN_PAIR(8,2)},
{12,0x012,MAKE_HUFFMAN_PAIR(4,3)},
{12,0x013,MAKE_HUFFMAN_PAIR(0,10)},
{12,0x014,MAKE_HUFFMAN_PAIR(2,4)},
{12,0x015,MAKE_HUFFMAN_PAIR(7,2)},
{12,0x016,MAKE_HUFFMAN_PAIR(21,1)},
{12,0x017,MAKE_HUFFMAN_PAIR(20,1)},
{12,0x018,MAKE_HUFFMAN_PAIR(0,9)},
{12,0x019,MAKE_HUFFMAN_PAIR(19,1)},
{12,0x01A,MAKE_HUFFMAN_PAIR(18,1)},
{12,0x01B,MAKE_HUFFMAN_PAIR(1,5)},
{12,0x01C,MAKE_HUFFMAN_PAIR(3,3)},
{12,0x01D,MAKE_HUFFMAN_PAIR(0,8)},
{12,0x01E,MAKE_HUFFMAN_PAIR(6,2)},
{12,0x01F,MAKE_HUFFMAN_PAIR(17,1)},
{13,0x0010,MAKE_HUFFMAN_PAIR(10,2)},
{13,0x0011,MAKE_HUFFMAN_PAIR(9,2)},
{13,0x0012,MAKE_HUFFMAN_PAIR(5,3)},
{13,0x0013,MAKE_HUFFMAN_PAIR(3,4)},
{13,0x0014,MAKE_HUFFMAN_PAIR(2,5)},
{13,0x0015,MAKE_HUFFMAN_PAIR(1,7)},
{13,0x0016,MAKE_HUFFMAN_PAIR(1,6)},
{13,0x0017,MAKE_HUFFMAN_PAIR(0,15)},
{13,0x0018,MAKE_HUFFMAN_PAIR(0,14)},
{13,0x0019,MAKE_HUFFMAN_PAIR(0,13)},
{13,0x001A,MAKE_HUFFMAN_PAIR(0,12)},
{13,0x001B,MAKE_HUFFMAN_PAIR(26,1)},
{13,0x001C,MAKE_HUFFMAN_PAIR(25,1)},
{13,0x001D,MAKE_HUFFMAN_PAIR(24,1)},
{13,0x001E,MAKE_HUFFMAN_PAIR(23,1)},
{13,0x001F,MAKE_HUFFMAN_PAIR(22,1)},
{14,0x0010,MAKE_HUFFMAN_PAIR(0,31)},
{14,0x0011,MAKE_HUFFMAN_PAIR(0,30)},
{14,0x0012,MAKE_HUFFMAN_PAIR(0,29)},
{14,0x0013,MAKE_HUFFMAN_PAIR(0,28)},
{14,0x0014,MAKE_HUFFMAN_PAIR(0,27)},
{14,0x0015,MAKE_HUFFMAN_PAIR(0,26)},
{14,0x0016,MAKE_HUFFMAN_PAIR(0,25)},
{14,0x0017,MAKE_HUFFMAN_PAIR(0,24)},
{14,0x0018,MAKE_HUFFMAN_PAIR(0,23)},
{14,0x0019,MAKE_HUFFMAN_PAIR(0,22)},
{14,0x001A,MAKE_HUFFMAN_PAIR(0,21)},
{14,0x001B,MAKE_HUFFMAN_PAIR(0,20)},
{14,0x001C,MAKE_HUFFMAN_PAIR(0,19)},
{14,0x001D,MAKE_HUFFMAN_PAIR(0,18)},
{14,0x001E,MAKE_HUFFMAN_PAIR(0,17)},
{14,0x001F,MAKE_HUFFMAN_PAIR(0,16)},
{15,0x0010,MAKE_HUFFMAN_PAIR(0,40)},
{15,0x0011,MAKE_HUFFMAN_PAIR(0,39)},
{15,0x0012,MAKE_HUFFMAN_PAIR(0,38)},
{15,0x0013,MAKE_HUFFMAN_PAIR(0,37)},
{15,0x0014,MAKE_HUFFMAN_PAIR(0,36)},
{15,0x0015,MAKE_HUFFMAN_PAIR(0,35)},
{15,0x0016,MAKE_HUFFMAN_PAIR(0,34)},
{15,0x0017,MAKE_HUFFMAN_PAIR(0,33)},
{15,0x0018,MAKE_HUFFMAN_PAIR(0,32)},
{15,0x0019,MAKE_HUFFMAN_PAIR(1,14)},
{15,0x001A,MAKE_HUFFMAN_PAIR(1,13)},
{15,0x001B,MAKE_HUFFMAN_PAIR(1,12)},
{15,0x001C,MAKE_HUFFMAN_PAIR(1,11)},
{15,0x001D,MAKE_HUFFMAN_PAIR(1,10)},
{15,0x001E,MAKE_HUFFMAN_PAIR(1,9)},
{15,0x001F,MAKE_HUFFMAN_PAIR(1,8)},
{16,0x0010,MAKE_HUFFMAN_PAIR(1,18)},
{16,0x0011,MAKE_HUFFMAN_PAIR(1,17)},
{16,0x0012,MAKE_HUFFMAN_PAIR(1,16)},
{16,0x0013,MAKE_HUFFMAN_PAIR(1,15)},
{16,0x0014,MAKE_HUFFMAN_PAIR(6,3)},
{16,0x0015,MAKE_HUFFMAN_PAIR(16,2)},
{16,0x0016,MAKE_HUFFMAN_PAIR(15,2)},
{16,0x0017,MAKE_HUFFMAN_PAIR(14,2)},
{16,0x0018,MAKE_HUFFMAN_PAIR(13,2)},
{16,0x0019,MAKE_HUFFMAN_PAIR(12,2)},
{16,0x001A,MAKE_HUFFMAN_PAIR(11,2)},
{16,0x001B,MAKE_HUFFMAN_PAIR(31,1)},
{16,0x001C,MAKE_HUFFMAN_PAIR(30,1)},
{16,0x001D,MAKE_HUFFMAN_PAIR(29,1)},
{16,0x001E,MAKE_HUFFMAN_PAIR(28,1)},
{16,0x001F,MAKE_HUFFMAN_PAIR(27,1)},
};
#undef MAKE_HUFFMAN_PAIR
@ -209,33 +210,38 @@ static void init_dct_data(void)
}
static void flush_bits(vid_encoder_state_t *state)
static bool flush_bits(vid_encoder_state_t *state)
{
if(state->bits_left < 16) {
assert(state->bytes_used < sizeof(state->unmuxed));
state->unmuxed[state->bytes_used++] = (uint8_t)state->bits_value;
assert(state->bytes_used < sizeof(state->unmuxed));
assert(state->bytes_used < 2016*state->frame_block_count);
state->unmuxed[state->bytes_used++] = (uint8_t)(state->bits_value>>8);
state->frame_output[state->bytes_used++] = (uint8_t)state->bits_value;
if (state->bytes_used >= state->frame_max_size) {
return false;
}
state->frame_output[state->bytes_used++] = (uint8_t)(state->bits_value>>8);
}
state->bits_left = 16;
state->bits_value = 0;
return true;
}
static void encode_bits(vid_encoder_state_t *state, int bits, uint32_t val)
static bool encode_bits(vid_encoder_state_t *state, int bits, uint32_t val)
{
assert(val < (1<<bits));
// FIXME: for some reason the main logic breaks when bits > 16
// and I have no idea why, so I have to split this up --GM
if (bits > 16) {
encode_bits(state, bits-16, val>>16);
if (!encode_bits(state, bits-16, val>>16)) {
return false;
}
bits = 16;
val &= 0xFFFF;
}
if (state->bits_left == 0) {
flush_bits(state);
if (!flush_bits(state)) {
return false;
}
}
while (bits > state->bits_left) {
@ -243,7 +249,7 @@ static void encode_bits(vid_encoder_state_t *state, int bits, uint32_t val)
uint32_t outval = val;
outval >>= bits - state->bits_left;
assert(outval < (1<<16));
uint16_t old_value = state->bits_value;
//uint16_t old_value = state->bits_value;
assert((state->bits_value & outval) == 0);
state->bits_value |= (uint16_t)outval;
//fprintf(stderr, "trunc %2d %2d %08X %04X %04X\n", bits, state->bits_left, val, old_value, state->bits_value);
@ -252,7 +258,9 @@ static void encode_bits(vid_encoder_state_t *state, int bits, uint32_t val)
val &= mask;
assert(mask >= 1);
assert(val < (1<<bits));
flush_bits(state);
if (!flush_bits(state)) {
return false;
}
}
if (bits >= 1) {
@ -261,87 +269,82 @@ static void encode_bits(vid_encoder_state_t *state, int bits, uint32_t val)
uint32_t outval = val;
outval <<= state->bits_left - bits;
assert(outval < (1<<16));
uint16_t old_value = state->bits_value;
//uint16_t old_value = state->bits_value;
assert((state->bits_value & outval) == 0);
state->bits_value |= (uint16_t)outval;
//fprintf(stderr, "plop %2d %2d %08X %04X %04X\n", bits, state->bits_left, val, state->bits_value);
state->bits_left -= bits;
}
return true;
}
static void encode_ac_value(vid_encoder_state_t *state, uint16_t value)
static bool encode_ac_value(vid_encoder_state_t *state, uint16_t value)
{
assert(0 <= value && value <= 0xFFFF);
#if 0
for(int i = 0; i < sizeof(huffman_lookup)/sizeof(huffman_lookup[0]); i++) {
if(value == huffman_lookup[i].u_hword_pos) {
encode_bits(state, huffman_lookup[i].c_bits+1, (((uint32_t)huffman_lookup[i].c_value)<<1)|0);
return;
return encode_bits(state, huffman_lookup[i].c_bits+1, (((uint32_t)huffman_lookup[i].c_value)<<1)|0);
}
else if(value == huffman_lookup[i].u_hword_neg) {
encode_bits(state, huffman_lookup[i].c_bits+1, (((uint32_t)huffman_lookup[i].c_value)<<1)|1);
return;
return encode_bits(state, huffman_lookup[i].c_bits+1, (((uint32_t)huffman_lookup[i].c_value)<<1)|1);
}
}
// Use an escape
encode_bits(state, 6+16, (0x01<<16)|(0xFFFF&(uint32_t)value));
return encode_bits(state, 6+16, (0x01<<16)|(0xFFFF&(uint32_t)value));
#else
uint32_t outword = huffman_encoding_map[value];
encode_bits(state, outword>>24, outword&0xFFFFFF);
return encode_bits(state, outword>>24, outword&0xFFFFFF);
#endif
}
static void transform_dct_block(vid_encoder_state_t *state, int32_t *block)
static void transform_dct_block(vid_encoder_state_t *state, float *block)
{
// Apply DCT to block
int32_t midblock[8*8];
float midblock[8*8];
for (int reps = 0; reps < 2; reps++) {
for (int i = 0; i < 8; i++) {
for (int j = 0; j < 8; j++) {
int32_t v = 0;
for(int k = 0; k < 8; k++) {
v += block[8*j+k]*dct_scale_table[8*i+k];
}
midblock[8*i+j] = (v + (1<<((14)-1)))>>(14);
for (int i = 0; i < 8; i++) {
for (int j = 0; j < 8; j++) {
float v = 0.0f;
for(int k = 0; k < 8; k++) {
v += block[8*j+k] * (float)dct_scale_table[8*i+k] / (float)(1 << 16);
}
}
memcpy(block, midblock, sizeof(midblock));
midblock[8*i+j] = v;
}
}
for (int i = 0; i < 8; i++) {
for (int j = 0; j < 8; j++) {
float v = 0.0f;
for(int k = 0; k < 8; k++) {
v += midblock[8*j+k] * (float)dct_scale_table[8*i+k] / (float)(1 << 16);
}
block[8*i+j] = v;
}
// FIXME: Work out why the math has to go this way
block[0] /= 8;
for (int i = 0; i < 64; i++) {
// Finish reducing it
block[i] /= 4;
// If it's below the quantisation threshold, zero it
if(abs(block[i]) < quant_dec[i]) {
block[i] = 0;
}
}
}
static void encode_dct_block(vid_encoder_state_t *state, int32_t *block)
static bool encode_dct_block(vid_encoder_state_t *state, float *block)
{
int dc_value = 0;
int16_t coeffs[64];
float scale = 8.0f / (float)state->quant_scale;
for (int i = 0; i < 64; i++) {
// Quantise it
block[i] = (block[i])/quant_dec[i];
// The DC coefficient is not affected by the quantization scale.
float x = block[i];
if (i) { x *= scale; }
// Clamp it
if (block[i] < -0x200) { block[i] = -0x200; }
if (block[i] > +0x1FF) { block[i] = +0x1FF; }
int v = (int)roundf(x / (float)quant_dec[i]);
if (v < -0x200) { v = -0x200; }
if (v > +0x1FF) { v = +0x1FF; }
coeffs[i] = v;
}
// Get DC value
dc_value = block[0];
//dc_value = 0;
encode_bits(state, 10, dc_value&0x3FF);
if (!encode_bits(state, 10, coeffs[0]&0x3FF)) {
return false;
}
// Build RLE output
uint16_t zero_rle_data[8*8];
@ -349,10 +352,10 @@ static void encode_dct_block(vid_encoder_state_t *state, int32_t *block)
for (int i = 1, zeroes = 0; i < 64; i++) {
int ri = dct_zagzig_table[i];
//int ri = dct_zigzag_table[i];
if (block[ri] == 0) {
if (coeffs[ri] == 0) {
zeroes++;
} else {
zero_rle_data[zero_rle_words++] = (zeroes<<10)|(block[ri]&0x3FF);
zero_rle_data[zero_rle_words++] = (zeroes<<10)|(coeffs[ri]&0x3FF);
zeroes = 0;
state->uncomp_hwords_used += 1;
}
@ -360,19 +363,24 @@ static void encode_dct_block(vid_encoder_state_t *state, int32_t *block)
// Now Huffman-code the data
for (int i = 0; i < zero_rle_words; i++) {
encode_ac_value(state, zero_rle_data[i]);
if (!encode_ac_value(state, zero_rle_data[i])) {
return false;
}
}
//fprintf(stderr, "dc %08X rles %2d\n", dc_value, zero_rle_words);
//assert(dc_value >= -0x200); assert(dc_value < +0x200);
//fprintf(stderr, "dc %08X rles %2d\n", coeffs[0], zero_rle_words);
//assert(coeffs[0] >= -0x200); assert(coeffs[0] < +0x200);
// Store end of block
encode_bits(state, 2, 0x2);
if (!encode_bits(state, 2, 0x2)) {
return false;
}
state->uncomp_hwords_used += 2;
state->uncomp_hwords_used = (state->uncomp_hwords_used+0xF)&~0xF;
//state->uncomp_hwords_used = (state->uncomp_hwords_used+0xF)&~0xF;
return true;
}
#if 0
static int reduce_dct_block(vid_encoder_state_t *state, int32_t *block, int32_t min_val, int *values_to_shed)
{
// Reduce so it can all fit
@ -394,48 +402,44 @@ static int reduce_dct_block(vid_encoder_state_t *state, int32_t *block, int32_t
// Factor in DC + EOF values
return nonzeroes+2;
}
#endif
static void encode_frame_str(uint8_t *video_frames, int video_frame_count, uint8_t *output, settings_t *settings)
void encode_frame_bs(uint8_t *video_frame, settings_t *settings)
{
int pitch = settings->video_width*4;
int real_index = (settings->state_vid.frame_index-1);
int pitch = settings->video_width;
/*int real_index = (settings->state_vid.frame_index-1);
if (real_index > video_frame_count-1) {
real_index = video_frame_count-1;
}
//uint8_t *video_frame = video_frames + settings->video_width*settings->video_height*4*real_index;
uint8_t *video_frame = video_frames;
uint8_t *y_plane = video_frames + settings->video_width*settings->video_height*3/2*real_index;*/
uint8_t *y_plane = video_frame;
uint8_t *c_plane = y_plane + (settings->video_width*settings->video_height);
if (!dct_done_init) {
init_dct_data();
dct_done_init = true;
}
int dct_block_count_x = (settings->video_width+15)/16;
int dct_block_count_y = (settings->video_height+15)/16;
if (settings->state_vid.dct_block_lists[0] == NULL) {
int dct_block_count_x = (settings->video_width+15)/16;
int dct_block_count_y = (settings->video_height+15)/16;
int dct_block_size = dct_block_count_x*dct_block_count_y*sizeof(int32_t)*8*8;
int dct_block_size = dct_block_count_x*dct_block_count_y*sizeof(float)*8*8;
for (int i = 0; i < 6; i++) {
settings->state_vid.dct_block_lists[i] = malloc(dct_block_size);
}
}
memset(settings->state_vid.unmuxed, 0, sizeof(settings->state_vid.unmuxed));
settings->state_vid.quant_scale = 1;
settings->state_vid.uncomp_hwords_used = 0;
settings->state_vid.bytes_used = 8;
settings->state_vid.blocks_used = 0;
// TODO: non-16x16-aligned videos
assert((settings->video_width % 16) == 0);
assert((settings->video_height % 16) == 0);
// Do the initial transform
for(int fx = 0; fx < settings->video_width; fx += 16) {
for(int fy = 0; fy < settings->video_height; fy += 16) {
// Rearrange the Y/C planes returned by libswscale into macroblocks.
for(int fx = 0; fx < dct_block_count_x; fx++) {
for(int fy = 0; fy < dct_block_count_y; fy++) {
// Order: Cr Cb [Y1|Y2\nY3|Y4]
int block_offs = 8*8*((fy>>4)*((settings->video_width+15)/16)+(fx>>4));
int32_t *blocks[6] = {
int block_offs = 64 * (fy*dct_block_count_x + fx);
float *blocks[6] = {
settings->state_vid.dct_block_lists[0] + block_offs,
settings->state_vid.dct_block_lists[1] + block_offs,
settings->state_vid.dct_block_lists[2] + block_offs,
@ -446,66 +450,51 @@ static void encode_frame_str(uint8_t *video_frames, int video_frame_count, uint8
for(int y = 0; y < 8; y++) {
for(int x = 0; x < 8; x++) {
int k = y*8+x;
int k = y*8 + x;
int cx = fx*8 + x;
int cy = fy*8 + y;
int lx = fx*16 + x;
int ly = fy*16 + y;
int cr = 0;
int cg = 0;
int cb = 0;
for(int cy = 0; cy < 2; cy++) {
for(int cx = 0; cx < 2; cx++) {
int coffs = pitch*(fy+y*2+cy) + 4*(fx+x*2+cx);
cr += video_frame[coffs+0];
cg += video_frame[coffs+1];
cb += video_frame[coffs+2];
}
}
// TODO: Get the real math for this
int cluma = cr+cg*2+cb;
#if 1
blocks[0][k] = ((cr<<2) - cluma + (1<<(4-1)))>>4;
blocks[1][k] = ((cb<<2) - cluma + (1<<(4-1)))>>4;
#else
blocks[0][k] = 0;
blocks[1][k] = 0;
#endif
for(int ly = 0; ly < 2; ly++) {
for(int lx = 0; lx < 2; lx++) {
int loffs = pitch*(fy+ly*8+y) + 4*(fx+lx*8+x);
int lr = video_frame[loffs+0];
int lg = video_frame[loffs+1];
int lb = video_frame[loffs+2];
// TODO: Get the real math for this
int lluma = (lr+lg*2+lb+2)-0x200;
if(lluma < -0x200) { lluma = -0x200; }
if(lluma > +0x1FF) { lluma = +0x1FF; }
lluma >>= 1;
blocks[2+2*ly+lx][k] = lluma;
}
}
blocks[0][k] = (float)c_plane[pitch*cy + 2*cx + 0] - 128.0f;
blocks[1][k] = (float)c_plane[pitch*cy + 2*cx + 1] - 128.0f;
blocks[2][k] = (float)y_plane[pitch*(ly+0) + (lx+0)] - 128.0f;
blocks[3][k] = (float)y_plane[pitch*(ly+0) + (lx+8)] - 128.0f;
blocks[4][k] = (float)y_plane[pitch*(ly+8) + (lx+0)] - 128.0f;
blocks[5][k] = (float)y_plane[pitch*(ly+8) + (lx+8)] - 128.0f;
}
}
for(int i = 0; i < 6; i++) {
transform_dct_block(&(settings->state_vid), blocks[i]);
}
}
}
// Now reduce all the blocks
// TODO: Base this on actual bit count
//const int accum_threshold = 6500;
const int accum_threshold = 1025*settings->state_vid.frame_block_count;
//const int accum_threshold = 900*settings->state_vid.frame_block_count;
int values_to_shed = 0;
for(int min_val = 0;; min_val += 1) {
int accum = 0;
for(int fx = 0; fx < settings->video_width; fx += 16) {
for(int fy = 0; fy < settings->video_height; fy += 16) {
// Attempt encoding the frame at the maximum quality. If the result is too
// large, increase the quantization scale and try again.
// TODO: if a frame encoded at scale N is too large but the same frame
// encoded at scale N-1 leaves a significant amount of free space, attempt
// compressing at scale N but optimizing coefficients away until it fits
// (like the old algorithm did)
for (
settings->state_vid.quant_scale = 1;
settings->state_vid.quant_scale < 64;
settings->state_vid.quant_scale++
) {
memset(settings->state_vid.frame_output, 0, settings->state_vid.frame_max_size);
settings->state_vid.bits_value = 0;
settings->state_vid.bits_left = 16;
settings->state_vid.uncomp_hwords_used = 0;
settings->state_vid.bytes_used = 8;
bool ok = true;
for(int fx = 0; ok && (fx < dct_block_count_x); fx++) {
for(int fy = 0; ok && (fy < dct_block_count_y); fy++) {
// Order: Cr Cb [Y1|Y2\nY3|Y4]
int block_offs = 8*8*((fy>>4)*((settings->video_width+15)/16)+(fx>>4));
int32_t *blocks[6] = {
int block_offs = 64 * (fy*dct_block_count_x + fx);
float *blocks[6] = {
settings->state_vid.dct_block_lists[0] + block_offs,
settings->state_vid.dct_block_lists[1] + block_offs,
settings->state_vid.dct_block_lists[2] + block_offs,
@ -513,132 +502,125 @@ static void encode_frame_str(uint8_t *video_frames, int video_frame_count, uint8
settings->state_vid.dct_block_lists[4] + block_offs,
settings->state_vid.dct_block_lists[5] + block_offs,
};
const int luma_reduce_mul = 8;
const int chroma_reduce_mul = 8;
for(int i = 6-1; i >= 0; i--) {
accum += reduce_dct_block(&(settings->state_vid), blocks[i], (i < 2 ? min_val*luma_reduce_mul+1 : min_val*chroma_reduce_mul+1), &values_to_shed);
for(int i = 0; ok && (i < 6); i++) {
ok = encode_dct_block(&(settings->state_vid), blocks[i]);
}
}
}
if(accum <= accum_threshold) {
break;
}
if (!ok) { continue; }
if (!encode_bits(&(settings->state_vid), 10, 0x1FF)) { continue; }
if (!encode_bits(&(settings->state_vid), 2, 0x2)) { continue; }
if (!flush_bits(&(settings->state_vid))) { continue; }
values_to_shed = accum - accum_threshold;
settings->state_vid.uncomp_hwords_used += 2;
settings->state_vid.quant_scale_sum += settings->state_vid.quant_scale;
break;
}
assert(settings->state_vid.quant_scale < 64);
// Now encode all the blocks
for(int fx = 0; fx < settings->video_width; fx += 16) {
for(int fy = 0; fy < settings->video_height; fy += 16) {
// Order: Cr Cb [Y1|Y2\nY3|Y4]
int block_offs = 8*8*((fy>>4)*((settings->video_width+15)/16)+(fx>>4));
int32_t *blocks[6] = {
settings->state_vid.dct_block_lists[0] + block_offs,
settings->state_vid.dct_block_lists[1] + block_offs,
settings->state_vid.dct_block_lists[2] + block_offs,
settings->state_vid.dct_block_lists[3] + block_offs,
settings->state_vid.dct_block_lists[4] + block_offs,
settings->state_vid.dct_block_lists[5] + block_offs,
};
for(int i = 0; i < 6; i++) {
encode_dct_block(&(settings->state_vid), blocks[i]);
}
}
}
// MDEC DMA is usually configured to transfer data in 32-word chunks.
settings->state_vid.uncomp_hwords_used = (settings->state_vid.uncomp_hwords_used+0x3F)&~0x3F;
encode_bits(&(settings->state_vid), 10, 0x1FF);
encode_bits(&(settings->state_vid), 2, 0x2);
settings->state_vid.uncomp_hwords_used += 2;
settings->state_vid.uncomp_hwords_used = (settings->state_vid.uncomp_hwords_used+0xF)&~0xF;
flush_bits(&(settings->state_vid));
settings->state_vid.blocks_used = ((settings->state_vid.uncomp_hwords_used+0xF)&~0xF)>>4;
// This is not the number of 32-byte blocks required for uncompressed data
// as jPSXdec docs say, but rather the number of 32-*bit* words required.
// The first 4 bytes of the frame header are in fact the MDEC command to
// start decoding, which contains the data length in words in the lower 16
// bits.
settings->state_vid.blocks_used = (settings->state_vid.uncomp_hwords_used+1)>>1;
// We need a multiple of 4
settings->state_vid.bytes_used = (settings->state_vid.bytes_used+0x3)&~0x3;
// Build the demuxed header
settings->state_vid.unmuxed[0x000] = (uint8_t)settings->state_vid.blocks_used;
settings->state_vid.unmuxed[0x001] = (uint8_t)(settings->state_vid.blocks_used>>8);
settings->state_vid.unmuxed[0x002] = (uint8_t)0x00;
settings->state_vid.unmuxed[0x003] = (uint8_t)0x38;
settings->state_vid.unmuxed[0x004] = (uint8_t)settings->state_vid.quant_scale;
settings->state_vid.unmuxed[0x005] = (uint8_t)(settings->state_vid.quant_scale>>8);
settings->state_vid.unmuxed[0x006] = 0x02; // Version 2
settings->state_vid.unmuxed[0x007] = 0x00;
// MDEC command (size of decompressed MDEC data)
settings->state_vid.frame_output[0x000] = (uint8_t)settings->state_vid.blocks_used;
settings->state_vid.frame_output[0x001] = (uint8_t)(settings->state_vid.blocks_used>>8);
settings->state_vid.frame_output[0x002] = (uint8_t)0x00;
settings->state_vid.frame_output[0x003] = (uint8_t)0x38;
// Quantization scale
settings->state_vid.frame_output[0x004] = (uint8_t)settings->state_vid.quant_scale;
settings->state_vid.frame_output[0x005] = (uint8_t)(settings->state_vid.quant_scale>>8);
// BS version
settings->state_vid.frame_output[0x006] = 0x02;
settings->state_vid.frame_output[0x007] = 0x00;
retire_av_data(settings, 0, 1);
}
void encode_block_str(uint8_t *video_frames, int video_frame_count, uint8_t *output, settings_t *settings)
void encode_sector_str(uint8_t *video_frames, uint8_t *output, settings_t *settings)
{
uint8_t header[32];
memset(header, 0, sizeof(header));
for(int i = 0; i < 7; i++) {
while(settings->state_vid.frame_block_index >= settings->state_vid.frame_block_count) {
settings->state_vid.frame_index++;
// TODO: work out an optimal block count for this
// TODO: calculate this all based on FPS
settings->state_vid.frame_block_overflow_num += settings->state_vid.frame_block_base_overflow;
settings->state_vid.frame_block_count = settings->state_vid.frame_block_overflow_num / settings->state_vid.frame_block_overflow_den;
settings->state_vid.frame_block_overflow_num %= settings->state_vid.frame_block_overflow_den;
settings->state_vid.frame_block_index = 0;
encode_frame_str(video_frames, video_frame_count, output, settings);
}
// Header: MDEC0 register
header[0x000] = 0x60;
header[0x001] = 0x01;
header[0x002] = 0x01;
header[0x003] = 0x80;
// Muxed chunk index/count
int chunk_index = settings->state_vid.frame_block_index;
int chunk_count = settings->state_vid.frame_block_count;
header[0x004] = (uint8_t)chunk_index;
header[0x005] = (uint8_t)(chunk_index>>8);
header[0x006] = (uint8_t)chunk_count;
header[0x007] = (uint8_t)(chunk_count>>8);
// Frame index
header[0x008] = (uint8_t)settings->state_vid.frame_index;
header[0x009] = (uint8_t)(settings->state_vid.frame_index>>8);
header[0x00A] = (uint8_t)(settings->state_vid.frame_index>>16);
header[0x00B] = (uint8_t)(settings->state_vid.frame_index>>24);
// Video frame size
header[0x010] = (uint8_t)settings->video_width;
header[0x011] = (uint8_t)(settings->video_width>>8);
header[0x012] = (uint8_t)settings->video_height;
header[0x013] = (uint8_t)(settings->video_height>>8);
// 32-byte blocks required for MDEC data
header[0x014] = (uint8_t)settings->state_vid.blocks_used;
header[0x015] = (uint8_t)(settings->state_vid.blocks_used>>8);
// Some weird thing
header[0x016] = 0x00;
header[0x017] = 0x38;
// Quantization scale
header[0x018] = (uint8_t)settings->state_vid.quant_scale;
header[0x019] = (uint8_t)(settings->state_vid.quant_scale>>8);
// Version
header[0x01A] = 0x02; // Version 2
header[0x01B] = 0x00;
// Demuxed bytes used as a multiple of 4
header[0x00C] = (uint8_t)settings->state_vid.bytes_used;
header[0x00D] = (uint8_t)(settings->state_vid.bytes_used>>8);
header[0x00E] = (uint8_t)(settings->state_vid.bytes_used>>16);
header[0x00F] = (uint8_t)(settings->state_vid.bytes_used>>24);
memcpy(output + 2352*i + 0x018, header, sizeof(header));
memcpy(output + 2352*i + 0x018 + 0x020, settings->state_vid.unmuxed + 2016*settings->state_vid.frame_block_index, 2016);
settings->state_vid.frame_block_index++;
while(settings->state_vid.frame_data_offset >= settings->state_vid.frame_max_size) {
settings->state_vid.frame_index++;
// TODO: work out an optimal block count for this
// TODO: calculate this all based on FPS
settings->state_vid.frame_block_overflow_num += settings->state_vid.frame_block_base_overflow;
settings->state_vid.frame_max_size = settings->state_vid.frame_block_overflow_num / settings->state_vid.frame_block_overflow_den * 2016;
settings->state_vid.frame_block_overflow_num %= settings->state_vid.frame_block_overflow_den;
settings->state_vid.frame_data_offset = 0;
encode_frame_bs(video_frames, settings);
}
// STR version
header[0x000] = 0x60;
header[0x001] = 0x01;
// Chunk type: MDEC data
header[0x002] = 0x01;
header[0x003] = 0x80;
// Muxed chunk index/count
int chunk_index = settings->state_vid.frame_data_offset/2016;
int chunk_count = settings->state_vid.frame_max_size/2016;
header[0x004] = (uint8_t)chunk_index;
header[0x005] = (uint8_t)(chunk_index>>8);
header[0x006] = (uint8_t)chunk_count;
header[0x007] = (uint8_t)(chunk_count>>8);
// Frame index
header[0x008] = (uint8_t)settings->state_vid.frame_index;
header[0x009] = (uint8_t)(settings->state_vid.frame_index>>8);
header[0x00A] = (uint8_t)(settings->state_vid.frame_index>>16);
header[0x00B] = (uint8_t)(settings->state_vid.frame_index>>24);
// Video frame size
header[0x010] = (uint8_t)settings->video_width;
header[0x011] = (uint8_t)(settings->video_width>>8);
header[0x012] = (uint8_t)settings->video_height;
header[0x013] = (uint8_t)(settings->video_height>>8);
// MDEC command (size of decompressed MDEC data)
header[0x014] = (uint8_t)settings->state_vid.blocks_used;
header[0x015] = (uint8_t)(settings->state_vid.blocks_used>>8);
header[0x016] = 0x00;
header[0x017] = 0x38;
// Quantization scale
header[0x018] = (uint8_t)settings->state_vid.quant_scale;
header[0x019] = (uint8_t)(settings->state_vid.quant_scale>>8);
// BS version
header[0x01A] = 0x02;
header[0x01B] = 0x00;
// Demuxed bytes used as a multiple of 4
header[0x00C] = (uint8_t)settings->state_vid.bytes_used;
header[0x00D] = (uint8_t)(settings->state_vid.bytes_used>>8);
header[0x00E] = (uint8_t)(settings->state_vid.bytes_used>>16);
header[0x00F] = (uint8_t)(settings->state_vid.bytes_used>>24);
if (settings->format == FORMAT_STR2CD) {
memcpy(output + 0x018, header, sizeof(header));
memcpy(output + 0x018 + 0x020, settings->state_vid.frame_output + settings->state_vid.frame_data_offset, 2016);
} else {
memcpy(output + 0x008, header, sizeof(header));
memcpy(output + 0x008 + 0x020, settings->state_vid.frame_output + settings->state_vid.frame_data_offset, 2016);
}
settings->state_vid.frame_data_offset += 2016;
}

View File

@ -3,6 +3,7 @@ psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend
Copyright (c) 2019, 2020 Adrian "asie" Siekierka
Copyright (c) 2019 Ben "GreaseMonkey" Russell
Copyright (c) 2023 spicyjpeg
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
@ -23,87 +24,247 @@ freely, subject to the following restrictions:
#include "common.h"
const char *format_names[NUM_FORMATS] = {
"xa", "xacd",
"spu", "spui",
"vag", "vagi",
"str2", "str2cd",
"sbs2"
};
void print_help(void) {
fprintf(stderr, "Usage: psxavenc [-f freq] [-b bitdepth] [-c channels] [-F num] [-C num] [-t xa|xacd|spu|str2] <in> <out>\n\n");
fprintf(stderr, " -f freq Use specified frequency\n");
fprintf(stderr, " -t format Use specified output type:\n");
fprintf(stderr, " xa [A.] .xa 2336-byte sectors\n");
fprintf(stderr, " xacd [A.] .xa 2352-byte sectors\n");
fprintf(stderr, " spu [A.] raw SPU-ADPCM data\n");
fprintf(stderr, " str2 [AV] v2 .str video 2352-byte sectors\n");
fprintf(stderr, " -b bitdepth Use specified bit depth (only 4 bits supported)\n");
fprintf(stderr, " -c channels Use specified channel count (1 or 2)\n");
fprintf(stderr, " -F num [.xa] Set the file number to num (0-255)\n");
fprintf(stderr, " -C num [.xa] Set the channel number to num (0-31)\n");
fprintf(stderr,
"Usage:\n"
" psxavenc -t <xa|xacd> [-f 18900|37800] [-b 4|8] [-c 1|2] [-F 0-255] [-C 0-31] <in> <out.xa>\n"
" psxavenc -t <str2|str2cd> [-f 18900|37800] [-b 4|8] [-c 1|2] [-F 0-255] [-C 0-31] [-s WxH] [-I] [-r num/den] [-x 1|2] <in> <out.str>\n"
" psxavenc -t sbs2 [-s WxH] [-I] [-r num/den] [-a size] <in> <out.str>\n"
" psxavenc -t <spu|vag> [-f freq] [-L] <in> <out.vag>\n"
" psxavenc -t <spui|vagi> [-f freq] [-c 1-24] [-L] [-i size] [-a size] <in> <out.vag>\n"
"\nTool options:\n"
" -h Show this help message and exit\n"
" -q Suppress all non-error messages\n"
"\nOutput options:\n"
" -t format Use specified output type:\n"
" xa [A.] .xa, 2336-byte sectors\n"
" xacd [A.] .xa, 2352-byte sectors\n"
" spu [A.] raw SPU-ADPCM mono data\n"
" spui [A.] raw SPU-ADPCM interleaved data\n"
" vag [A.] .vag SPU-ADPCM mono\n"
" vagi [A.] .vag SPU-ADPCM interleaved\n"
" str2 [AV] v2 .str video, 2336-byte sectors\n"
" str2cd [AV] v2 .str video, 2352-byte sectors\n"
" sbs2 [.V] v2 .sbs video, 2048-byte sectors\n"
" -F num Set the XA file number for xa/str2 (0-255)\n"
" -C num Set the XA channel number for xa/str2 (0-31)\n"
"\nAudio options:\n"
" -f freq Use specified sample rate (must be 18900 or 37800 for xa/str2)\n"
" -b bitdepth Use specified bit depth for xa/str2 (4 or 8)\n"
" -c channels Use specified channel count (1-2 for xa/str2, any for spui/vagi)\n"
" -L Add a loop marker at the end of SPU-ADPCM data\n"
" -R key=value,... Pass custom options to libswresample (see ffmpeg docs)\n"
"\nSPU interleaving options (spui/vagi format):\n"
" -i size Use specified interleave\n"
" -a size Pad header and each interleaved chunk to specified size\n"
"\nVideo options (str2/str2cd/sbs2 format):\n"
" -s WxH Rescale input file to fit within specified size (default 320x240)\n"
" -I Force stretching to given size without preserving aspect ratio\n"
" -S key=value,... Pass custom options to libswscale (see ffmpeg docs)\n"
" -r num/den Set frame rate to specified integer or fraction (default 15)\n"
" -x speed Set the CD-ROM speed the file is meant to played at (1-2)\n"
" -a size Set the size of each frame for sbs2\n"
);
}
int parse_args(settings_t* settings, int argc, char** argv) {
int c;
while ((c = getopt(argc, argv, "t:f:b:c:F:C:")) != -1) {
int c, i;
char *next;
while ((c = getopt(argc, argv, "?hqt:F:C:f:b:c:LR:i:a:s:IS:r:x:")) != -1) {
switch (c) {
case '?':
case 'h': {
print_help();
return -1;
} break;
case 'q': {
settings->quiet = true;
settings->show_progress = false;
} break;
case 't': {
if (strcmp(optarg, "xa") == 0) {
settings->format = FORMAT_XA;
} else if (strcmp(optarg, "xacd") == 0) {
settings->format = FORMAT_XACD;
} else if (strcmp(optarg, "spu") == 0) {
settings->format = FORMAT_SPU;
} else if (strcmp(optarg, "str2") == 0) {
settings->format = FORMAT_STR2;
} else {
settings->format = -1;
for (i = 0; i < NUM_FORMATS; i++) {
if (!strcmp(optarg, format_names[i])) {
settings->format = i;
break;
}
}
if (settings->format < 0) {
fprintf(stderr, "Invalid format: %s\n", optarg);
return -1;
}
} break;
case 'f': {
settings->frequency = atoi(optarg);
} break;
case 'b': {
settings->bits_per_sample = atoi(optarg);
if (settings->bits_per_sample != 4) {
fprintf(stderr, "Invalid bit depth: %d\n", settings->frequency);
return -1;
}
} break;
case 'c': {
int ch = atoi(optarg);
if (ch <= 0 || ch > 2) {
fprintf(stderr, "Invalid channel count: %d\n", ch);
return -1;
}
settings->stereo = (ch == 2 ? 1 : 0);
} break;
case 'F': {
settings->file_number = atoi(optarg);
settings->file_number = strtol(optarg, NULL, 0);
if (settings->file_number < 0 || settings->file_number > 255) {
fprintf(stderr, "Invalid file number: %d\n", settings->file_number);
return -1;
}
} break;
case 'C': {
settings->channel_number = atoi(optarg);
settings->channel_number = strtol(optarg, NULL, 0);
if (settings->channel_number < 0 || settings->channel_number > 31) {
fprintf(stderr, "Invalid channel number: %d\n", settings->channel_number);
return -1;
}
} break;
case '?':
case 'h': {
print_help();
return -1;
case 'f': {
settings->frequency = strtol(optarg, NULL, 0);
} break;
case 'b': {
settings->bits_per_sample = strtol(optarg, NULL, 0);
if (settings->bits_per_sample != 4 && settings->bits_per_sample != 8) {
fprintf(stderr, "Invalid bit depth: %d\n", settings->frequency);
return -1;
}
} break;
case 'c': {
settings->channels = strtol(optarg, NULL, 0);
if (settings->channels < 1 || settings->channels > 24) {
fprintf(stderr, "Invalid channel count: %d\n", settings->channels);
return -1;
}
} break;
case 'L': {
settings->loop = true;
} break;
case 'R': {
settings->swresample_options = optarg;
} break;
case 'i': {
settings->interleave = (strtol(optarg, NULL, 0) + 15) & ~15;
if (settings->interleave < 16) {
fprintf(stderr, "Invalid interleave: %d\n", settings->interleave);
return -1;
}
} break;
case 'a': {
settings->alignment = strtol(optarg, NULL, 0);
if (settings->alignment < 1) {
fprintf(stderr, "Invalid alignment: %d\n", settings->alignment);
return -1;
}
} break;
case 's': {
settings->video_width = (strtol(optarg, &next, 0) + 15) & ~15;
if (*next != 'x') {
fprintf(stderr, "Invalid video size (must be specified as <width>x<height>)\n");
return -1;
}
settings->video_height = (strtol(next + 1, NULL, 0) + 15) & ~15;
if (settings->video_width < 16 || settings->video_width > 320) {
fprintf(stderr, "Invalid video width: %d\n", settings->video_width);
return -1;
}
if (settings->video_height < 16 || settings->video_height > 240) {
fprintf(stderr, "Invalid video height: %d\n", settings->video_height);
return -1;
}
} break;
case 'I': {
settings->ignore_aspect_ratio = true;
} break;
case 'S': {
settings->swscale_options = optarg;
} break;
case 'r': {
settings->video_fps_num = strtol(optarg, &next, 0);
if (*next == '/') {
settings->video_fps_den = strtol(next + 1, NULL, 0);
} else {
settings->video_fps_den = 1;
}
if (!settings->video_fps_den) {
fprintf(stderr, "Invalid frame rate denominator\n");
return -1;
}
i = settings->video_fps_num / settings->video_fps_den;
if (i < 1 || i > 30) {
fprintf(stderr, "Invalid frame rate: %d/%d\n", settings->video_fps_num, settings->video_fps_den);
return -1;
}
} break;
case 'x': {
settings->cd_speed = strtol(optarg, NULL, 0);
if (settings->cd_speed < 1 || settings->cd_speed > 2) {
fprintf(stderr, "Invalid CD-ROM speed: %d\n", settings->cd_speed);
return -1;
}
} break;
}
}
if (settings->format == FORMAT_XA || settings->format == FORMAT_XACD) {
if (settings->frequency != PSX_AUDIO_XA_FREQ_SINGLE && settings->frequency != PSX_AUDIO_XA_FREQ_DOUBLE) {
fprintf(stderr, "Invalid frequency: %d Hz\n", settings->frequency);
// Validate settings
switch (settings->format) {
case FORMAT_XA:
case FORMAT_XACD:
case FORMAT_STR2:
case FORMAT_STR2CD:
if (settings->frequency != PSX_AUDIO_XA_FREQ_SINGLE && settings->frequency != PSX_AUDIO_XA_FREQ_DOUBLE) {
fprintf(
stderr, "Invalid XA-ADPCM frequency: %d Hz (must be %d or %d Hz)\n", settings->frequency,
PSX_AUDIO_XA_FREQ_SINGLE, PSX_AUDIO_XA_FREQ_DOUBLE
);
return -1;
}
if (settings->channels > 2) {
fprintf(stderr, "Invalid XA-ADPCM channel count: %d (must be 1 or 2)\n", settings->channels);
return -1;
}
if (settings->loop) {
fprintf(stderr, "XA-ADPCM does not support loop markers\n");
return -1;
}
break;
case FORMAT_SPU:
case FORMAT_VAG:
if (settings->bits_per_sample != 4) {
fprintf(stderr, "Invalid SPU-ADPCM bit depth: %d (must be 4)\n", settings->bits_per_sample);
return -1;
}
if (settings->channels != 1) {
fprintf(stderr, "Invalid SPU-ADPCM channel count: %d (must be 1)\n", settings->channels);
return -1;
}
if (settings->interleave) {
fprintf(stderr, "Interleave cannot be specified for mono SPU-ADPCM\n");
return -1;
}
break;
case FORMAT_SPUI:
case FORMAT_VAGI:
if (settings->bits_per_sample != 4) {
fprintf(stderr, "Invalid SPU-ADPCM bit depth: %d (must be 4)\n", settings->bits_per_sample);
return -1;
}
if (!settings->interleave) {
fprintf(stderr, "Interleave must be specified for interleaved SPU-ADPCM\n");
return -1;
}
break;
case FORMAT_SBS2:
if (!settings->alignment) {
fprintf(stderr, "Alignment (frame size) must be specified\n");
return -1;
}
if (settings->alignment < 256) {
fprintf(stderr, "Invalid frame size: %d (must be at least 256)\n", settings->alignment);
return -1;
}
break;
default:
fprintf(stderr, "Output format must be specified\n");
return -1;
}
}
if (settings->format == FORMAT_SPU) {
settings->stereo = false;
}
return optind;
@ -116,28 +277,46 @@ int main(int argc, char **argv) {
memset(&settings,0,sizeof(settings_t));
settings.quiet = false;
settings.show_progress = isatty(fileno(stderr));
settings.format = -1;
settings.file_number = 0;
settings.channel_number = 0;
settings.stereo = true;
settings.cd_speed = 2;
settings.channels = 1;
settings.frequency = PSX_AUDIO_XA_FREQ_DOUBLE;
settings.bits_per_sample = 4;
settings.interleave = 0;
settings.alignment = 2048;
settings.loop = false;
// NOTE: ffmpeg/ffplay's .str demuxer has the frame rate hardcoded to 15fps
// so if you're messing around with this make sure you test generated files
// with another player and/or in an emulator.
settings.video_width = 320;
settings.video_height = 240;
settings.video_fps_num = 15;
settings.video_fps_den = 1;
settings.ignore_aspect_ratio = false;
settings.swresample_options = NULL;
settings.swscale_options = NULL;
settings.audio_samples = NULL;
settings.audio_sample_count = 0;
settings.video_frames = NULL;
settings.video_frame_count = 0;
// TODO: make this adjustable
// also for some reason ffmpeg seems to hard-code the framerate to 15fps
settings.video_fps_num = 15;
settings.video_fps_den = 1;
for(int i = 0; i < 6; i++) {
settings.state_vid.dct_block_lists[i] = NULL;
}
if (argc < 2) {
print_help();
return 1;
}
arg_offset = parse_args(&settings, argc, argv);
if (arg_offset < 0) {
return 1;
@ -146,13 +325,12 @@ int main(int argc, char **argv) {
return 1;
}
fprintf(stderr, "Using settings: %d Hz @ %d bit depth, %s. F%d C%d\n",
settings.frequency, settings.bits_per_sample,
settings.stereo ? "stereo" : "mono",
settings.file_number, settings.channel_number
);
bool has_audio = (settings.format != FORMAT_SBS2);
bool has_video = (settings.format == FORMAT_STR2) ||
(settings.format == FORMAT_STR2CD) || (settings.format == FORMAT_SBS2);
bool did_open_data = open_av_data(argv[arg_offset + 0], &settings);
bool did_open_data = open_av_data(argv[arg_offset + 0], &settings,
has_audio, has_video, !has_video, has_video);
if (!did_open_data) {
fprintf(stderr, "Could not open input file!\n");
return 1;
@ -164,23 +342,75 @@ int main(int argc, char **argv) {
return 1;
}
int av_sample_mul = settings.stereo ? 2 : 1;
settings.start_time = time(NULL);
settings.last_progress_update = 0;
switch (settings.format) {
case FORMAT_XA:
case FORMAT_XACD:
pull_all_av_data(&settings);
encode_file_xa(settings.audio_samples, settings.audio_sample_count / av_sample_mul, &settings, output);
if (!settings.quiet) {
fprintf(stderr, "Audio format: XA-ADPCM, %d Hz %d-bit %s, F=%d C=%d\n",
settings.frequency, settings.bits_per_sample,
(settings.channels == 2) ? "stereo" : "mono",
settings.file_number, settings.channel_number
);
}
encode_file_xa(&settings, output);
break;
case FORMAT_SPU:
pull_all_av_data(&settings);
encode_file_spu(settings.audio_samples, settings.audio_sample_count / av_sample_mul, &settings, output);
case FORMAT_VAG:
if (!settings.quiet) {
fprintf(stderr, "Audio format: SPU-ADPCM, %d Hz mono\n",
settings.frequency
);
}
encode_file_spu(&settings, output);
break;
case FORMAT_SPUI:
case FORMAT_VAGI:
if (!settings.quiet) {
fprintf(stderr, "Audio format: SPU-ADPCM, %d Hz %d channels, interleave=%d\n",
settings.frequency, settings.channels, settings.interleave
);
}
encode_file_spu_interleaved(&settings, output);
break;
case FORMAT_STR2:
case FORMAT_STR2CD:
if (!settings.quiet) {
if (settings.decoder_state_av.audio_stream) {
fprintf(stderr, "Audio format: XA-ADPCM, %d Hz %d-bit %s, F=%d C=%d\n",
settings.frequency, settings.bits_per_sample,
(settings.channels == 2) ? "stereo" : "mono",
settings.file_number, settings.channel_number
);
}
fprintf(stderr, "Video format: BS v2, %dx%d, %.2f fps\n",
settings.video_width, settings.video_height,
(double)settings.video_fps_num / (double)settings.video_fps_den
);
}
encode_file_str(&settings, output);
break;
case FORMAT_SBS2:
if (!settings.quiet) {
fprintf(stderr, "Video format: BS v2, %dx%d, %.2f fps\n",
settings.video_width, settings.video_height,
(double)settings.video_fps_num / (double)settings.video_fps_den
);
}
encode_file_sbs(&settings, output);
break;
}
if (settings.show_progress) {
fprintf(stderr, "\nDone.\n");
}
fclose(output);
close_av_data(&settings);
return 0;