From 982fad256ef10832f3dfc893b753ea189a05f2c3 Mon Sep 17 00:00:00 2001 From: spicyjpeg Date: Tue, 25 Feb 2025 18:54:53 +0100 Subject: [PATCH 1/8] Add .editorconfig, .gitignore and FFmpeg deprecation note --- .editorconfig | 9 +++++++++ .gitignore | 6 ++++++ meson.build | 26 +++++++++++++------------- psxavenc/decoding.c | 1 + 4 files changed, 29 insertions(+), 13 deletions(-) create mode 100644 .editorconfig create mode 100644 .gitignore diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..4dd2432 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,9 @@ +root = true + +[*] +indent_style = tab +indent_size = 4 +charset = utf-8 +end_of_line = lf +trim_trailing_whitespace = true +insert_final_newline = true diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5feaf27 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +desktop.ini +.DS_Store +.vscode/ +build/ +.cache/ +*.code-workspace diff --git a/meson.build b/meson.build index 99b6249..c70759c 100644 --- a/meson.build +++ b/meson.build @@ -9,24 +9,24 @@ configure_file(output: 'config.h', configuration: conf_data) libm_dep = meson.get_compiler('c').find_library('m') ffmpeg = [ - dependency('libavformat'), - dependency('libavcodec'), - dependency('libavutil'), - dependency('libswresample'), - dependency('libswscale') + dependency('libavformat'), + dependency('libavcodec'), + dependency('libavutil'), + dependency('libswresample'), + dependency('libswscale') ] libpsxav = static_library('psxav', [ - 'libpsxav/adpcm.c', - 'libpsxav/cdrom.c', - 'libpsxav/libpsxav.h' + 'libpsxav/adpcm.c', + 'libpsxav/cdrom.c', + 'libpsxav/libpsxav.h' ]) libpsxav_dep = declare_dependency(include_directories: include_directories('libpsxav'), link_with: libpsxav) executable('psxavenc', [ - 'psxavenc/cdrom.c', - 'psxavenc/decoding.c', - 'psxavenc/filefmt.c', - 'psxavenc/mdec.c', - 'psxavenc/psxavenc.c' + 'psxavenc/cdrom.c', + 'psxavenc/decoding.c', + 'psxavenc/filefmt.c', + 'psxavenc/mdec.c', + 'psxavenc/psxavenc.c' ], dependencies: [libm_dep, ffmpeg, libpsxav_dep], install: true) diff --git a/psxavenc/decoding.c b/psxavenc/decoding.c index 99895ab..54a9124 100644 --- a/psxavenc/decoding.c +++ b/psxavenc/decoding.c @@ -399,6 +399,7 @@ void close_av_data(settings_t *settings) av_frame_free(&(av->frame)); swr_free(&(av->resampler)); + // Deprecated, kept for compatibility with older FFmpeg versions. avcodec_close(av->audio_codec_context); avcodec_free_context(&(av->audio_codec_context)); avformat_free_context(av->format); From 7b5953322f789e3dea303f39858a51b40dba2057 Mon Sep 17 00:00:00 2001 From: spicyjpeg Date: Fri, 28 Feb 2025 01:26:41 +0100 Subject: [PATCH 2/8] Add new argument parser --- meson.build | 1 + psxavenc/args.c | 711 ++++++++++++++++++++++++++++++++++++++++++++ psxavenc/args.h | 93 ++++++ psxavenc/psxavenc.c | 4 +- 4 files changed, 807 insertions(+), 2 deletions(-) create mode 100644 psxavenc/args.c create mode 100644 psxavenc/args.h diff --git a/meson.build b/meson.build index c70759c..abd8a35 100644 --- a/meson.build +++ b/meson.build @@ -24,6 +24,7 @@ libpsxav = static_library('psxav', [ libpsxav_dep = declare_dependency(include_directories: include_directories('libpsxav'), link_with: libpsxav) executable('psxavenc', [ + 'psxavenc/args.c', 'psxavenc/cdrom.c', 'psxavenc/decoding.c', 'psxavenc/filefmt.c', diff --git a/psxavenc/args.c b/psxavenc/args.c new file mode 100644 index 0000000..03d0695 --- /dev/null +++ b/psxavenc/args.c @@ -0,0 +1,711 @@ +/* +psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend + +Copyright (c) 2019, 2020 Adrian "asie" Siekierka +Copyright (c) 2019 Ben "GreaseMonkey" Russell +Copyright (c) 2023, 2025 spicyjpeg + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include +#include +#include +#include +#include "args.h" + +#define INVALID_PARAM -1 + +static int parse_int( + int *output, + const char *name, + const char *value, + int min_value, + int max_value +) { + if (value == NULL) { + fprintf(stderr, "Missing %s value after option\n", name); + return INVALID_PARAM; + } + + *output = strtol(value, NULL, 0); + + if ( + (*output < min_value) || + (max_value >= 0 && *output > max_value) + ) { + if (max_value >= 0) + fprintf(stderr, "Invalid %s: %d (must be in %d-%d range)\n", name, *output, min_value, max_value); + else + fprintf(stderr, "Invalid %s: %d (must be %d or greater)\n", name, *output, min_value); + return INVALID_PARAM; + } + + return 2; +} + +static int parse_int_one_of( + int *output, + const char *name, + const char *value, + int value_a, + int value_b +) { + if (value == NULL) { + fprintf(stderr, "Missing %s value after option\n", name); + return INVALID_PARAM; + } + + *output = strtol(value, NULL, 0); + + if (*output != value_a && *output != value_b) { + fprintf(stderr, "Invalid %s: %d (must be %d or %d)\n", name, *output, value_a, value_b); + return INVALID_PARAM; + } + + return 2; +} + +static int parse_enum( + int *output, + const char *name, + const char *value, + const char *const *choices, + int count +) { + if (value == NULL) { + fprintf(stderr, "Missing %s value after option\n", name); + return INVALID_PARAM; + } + for (int i = 0; i < count; i++) { + if (strcmp(value, choices[i]) == 0) { + *output = i; + return 2; + } + } + + fprintf( + stderr, + "Invalid %s: %s\n" + "Must be one of the following values:\n", + name, + value + ); + for (int i = 0; i < count; i++) + fprintf(stderr, " %s\n", choices[i]); + return INVALID_PARAM; +} + +static const char *const general_options_help = + "General options:\n" + " -h Show this help message and exit\n" + " -V Show version information and exit\n" + " -q Suppress all non-error messages\n" + " -t format Use (or show help for) specified output format\n" + " xa: [A.] XA-ADPCM, 2336-byte sectors\n" + " xacd: [A.] XA-ADPCM, 2352-byte sectors\n" + " spu: [A.] raw SPU-ADPCM mono data\n" + " spui: [A.] raw SPU-ADPCM interleaved data\n" + " vag: [A.] .vag SPU-ADPCM mono\n" + " vagi: [A.] .vag SPU-ADPCM interleaved\n" + " str: [AV] .str video, 2336-byte sectors\n" + " strcd: [AV] .str video, 2352-byte sectors\n" + " strspu: [AV] .str video, 2048-byte sectors\n" + " strv: [.V] .str video, 2048-byte sectors\n" + " sbs: [.V] .sbs video\n" + " -R key=value,... Pass custom options to libswresample (see FFmpeg docs)\n" + " -S key=value,... Pass custom options to libswscale (see FFmpeg docs)\n" + "\n"; + +static const char *const format_names[NUM_FORMATS] = { + "xa", + "xacd", + "spu", + "vag", + "spui", + "vagi", + "str", + "strcd", + "strspu", + "strv", + "sbs" +}; + +static void init_default_args(args_t *args) { + args->flags = 0; + + args->input_file = NULL; + args->output_file = NULL; + args->swresample_options = NULL; + args->swscale_options = NULL; + + if ( + args->format == FORMAT_XA || args->format == FORMAT_XACD || + args->format == FORMAT_STR || args->format == FORMAT_STRCD + ) + args->audio_frequency = 37800; + else + args->audio_frequency = 44100; + if (args->format == FORMAT_SPU || args->format == FORMAT_VAG) + args->audio_channels = 1; + else + args->audio_channels = 2; + + args->audio_bit_depth = 4; + args->audio_xa_file = 0; + args->audio_xa_channel = 0; + args->audio_interleave = 2048; + args->audio_loop_point = -1; + + args->video_codec = BS_CODEC_V2; + args->video_width = 320; + args->video_height = 240; + + args->str_fps_num = 15; + args->str_fps_den = 1; + args->str_cd_speed = 2; + + if (args->format == FORMAT_SPU || args->format == FORMAT_VAG) + args->alignment = 64; + else if (args->format == FORMAT_SBS) + args->alignment = 8192; + else + args->alignment = 2048; +} + +static int parse_general_option(args_t *args, char option, const char *param) { + int parsed; + + switch (option) { + case '-': + args->flags |= FLAG_IGNORE_OPTIONS; + return 1; + + case 'h': + args->flags |= FLAG_PRINT_HELP; + return 1; + + case 'V': + args->flags |= FLAG_PRINT_VERSION; + return 1; + + case 'q': + args->flags |= FLAG_QUIET | FLAG_HIDE_PROGRESS; + return 1; + + case 't': + parsed = parse_enum(&(args->format), "format", param, format_names, NUM_FORMATS); + if (parsed > 0) + init_default_args(args); + return parsed; + + case 'R': + if (param == NULL) { + fprintf(stderr, "Missing libswresample parameter list after option\n"); + return INVALID_PARAM; + } + + args->swresample_options = param; + return 2; + + case 'S': + if (param == NULL) { + fprintf(stderr, "Missing libswscale parameter list after option\n"); + return INVALID_PARAM; + } + + args->swscale_options = param; + return 2; + + default: + return 0; + } +} + +static const char *const xa_options_help = + "XA-ADPCM options:\n" + " [-f 18900|37800] [-c 1|2] [-b 4|8] [-F 0-255] [-C 0-31]\n" + "\n" + " -f 18900|37800 Use specified sample rate (default 37800)\n" + " -c 1|2 Use specified channel count (default 2)\n" + " -b 4|8 Use specified bit depth (default 4)\n" + " -F 0-255 Set CD-XA file number (for both audio and video, default 0)\n" + " -C 0-31 Set CD-XA channel number (for both audio and video, default 0)\n" + "\n"; + +static int parse_xa_option(args_t *args, char option, const char *param) { + switch (option) { + case 'f': + return parse_int_one_of(&(args->audio_frequency), "sample rate", param, 18900, 37800); + + case 'c': + return parse_int_one_of(&(args->audio_channels), "channel count", param, 1, 2); + + case 'b': + return parse_int_one_of(&(args->audio_bit_depth), "bit depth", param, 4, 8); + + case 'F': + return parse_int(&(args->audio_xa_file), "file number", param, 0, 255); + + case 'C': + return parse_int(&(args->audio_xa_channel), "channel number", param, 0, 31); + + default: + return 0; + } +} + +static const char *const spu_options_help = + "SPU-ADPCM options:\n" + " [-f freq] [-a size] [-l ms | -L] [-D]\n" + "\n" + " -f freq Use specified sample rate (default 44100)\n" + " -a size Pad audio data excluding header to multiple of given size (default 64)\n" + " -l ms Add loop point at specified offset (in milliseconds)\n" + " -L Set loop end flag at the end of data but do not add a loop point\n" + " -D Do not prepend encoded data with a dummy silent block\n" + "\n"; + +static int parse_spu_option(args_t *args, char option, const char *param) { + switch (option) { + case 'f': + return parse_int(&(args->audio_frequency), "sample rate", param, 1, -1); + + case 'a': + return parse_int(&(args->alignment), "alignment", param, 1, -1); + + case 'l': + args->flags |= FLAG_SPU_LOOP_END; + return parse_int(&(args->audio_loop_point), "loop offset", param, 0, -1); + + case 'L': + args->flags |= FLAG_SPU_LOOP_END; + return 1; + + case 'D': + args->flags |= FLAG_SPU_NO_LEADING_DUMMY; + return 1; + + default: + return 0; + } +} + +static const char *const spui_options_help = + "Interleaved SPU-ADPCM options:\n" + " [-f freq] [-c channels] [-i size] [-a size] [-L] [-D]\n" + "\n" + " -f freq Use specified sample rate (default 44100)\n" + " -c channels Use specified channel count (default 2)\n" + " -i size Use specified channel interleave size (default 2048)\n" + " -a size Pad .vag header and each audio chunk to multiples of given size\n" + " (default 2048)\n" + " -L Set loop end flag at the end of each audio chunk\n" + " -D Do not prepend first chunk's data with a dummy silent block\n" + "\n"; + +static int parse_spui_option(args_t *args, char option, const char *param) { + int parsed; + + switch (option) { + case 'f': + return parse_int(&(args->audio_frequency), "sample rate", param, 1, -1); + + case 'c': + return parse_int(&(args->audio_channels), "channel count", param, 1, -1); + + case 'i': + parsed = parse_int(&(args->audio_interleave), "interleave", param, 16, -1); + + // Round up to nearest multiple of 16 + args->audio_interleave = (args->audio_interleave + 15) & ~15; + return parsed; + + case 'a': + return parse_int(&(args->alignment), "alignment", param, 1, -1); + + case 'L': + args->flags |= FLAG_SPU_LOOP_END; + return 1; + + case 'D': + args->flags |= FLAG_SPU_NO_LEADING_DUMMY; + return 1; + + default: + return 0; + } +} + +static const char *const bs_options_help = + "Video options:\n" + " [-v v2|v3|v3dc] [-s WxH] [-I]\n" + "\n" + " -v codec Use specified video codec\n" + " v2: MDEC BS v2 (default)\n" + " v3: MDEC BS v3\n" + " v3dc: MDEC BS v3, expect decoder to wrap DC coefficients\n" + " -s WxH Rescale input file to fit within specified size\n" + " (16x16-640x512 in 16-pixel increments, default 320x240)\n" + " -I Force stretching to given size without preserving aspect ratio\n" + "\n"; + +const char *const bs_codec_names[NUM_BS_CODECS] = { + "v2", + "v3", + "v3dc" +}; + +static int parse_bs_option(args_t *args, char option, const char *param) { + char *next = NULL; + + switch (option) { + case 'v': + return parse_enum(&(args->video_codec), "video codec", param, bs_codec_names, NUM_BS_CODECS); + + case 's': + if (param == NULL) { + fprintf(stderr, "Missing video size after option\n"); + return INVALID_PARAM; + } + + args->video_width = strtol(param, &next, 10); + + if (next && *next == 'x') { + args->video_height = strtol(next + 1, NULL, 10); + } else { + fprintf(stderr, "Invalid video size (must be specified as x)\n"); + return INVALID_PARAM; + } + + if (args->video_width < 16 || args->video_width > 640) { + fprintf(stderr, "Invalid video width: %d (must be in 16-640 range)\n", args->video_width); + return INVALID_PARAM; + } + if (args->video_height < 16 || args->video_height > 512) { + fprintf(stderr, "Invalid video height: %d (must be in 16-512 range)\n", args->video_height); + return INVALID_PARAM; + } + + // Round up to nearest multiples of 16 + args->video_width = (args->video_width + 15) & ~15; + args->video_height = (args->video_height + 15) & ~15; + return 2; + + case 'I': + args->flags |= FLAG_BS_IGNORE_ASPECT; + return 1; + + default: + return 0; + } +} + +static const char *const str_options_help = + ".str container options:\n" + " [-r num[/den]] [-x 1|2] [-A]\n" + "\n" + " -r num[/den] Set video frame rate to specified integer or fraction (default 15)\n" + " -x 1|2 Set CD-ROM speed the file is meant to played at (default 2)\n" + " -A Place audio sectors after corresponding video sectors\n" + " (rather than ahead of them)\n" + "\n"; + +static int parse_str_option(args_t *args, char option, const char *param) { + char *next = NULL; + int fps; + + switch (option) { + case 'r': + if (param == NULL) { + fprintf(stderr, "Missing frame rate value after option\n"); + return INVALID_PARAM; + } + + args->str_fps_num = strtol(param, &next, 10); + + if (next && *next == '/') + args->str_fps_den = strtol(next + 1, NULL, 10); + else + args->str_fps_den = 1; + + if (args->str_fps_num <= 0 || args->str_fps_den <= 0) { + fprintf(stderr, "Invalid frame rate (must be a non-zero integer or fraction)\n"); + return INVALID_PARAM; + } + + fps = args->str_fps_num / args->str_fps_den; + + if (fps < 1 || fps > 60) { + fprintf(stderr, "Invalid frame rate: %d/%d (must be in 1-60 range)\n", args->str_fps_num, args->str_fps_den); + return INVALID_PARAM; + } + return 2; + + case 'x': + return parse_int_one_of(&(args->str_cd_speed), "CD-ROM speed", param, 1, 2); + + case 'A': + args->flags |= FLAG_STR_TRAILING_AUDIO; + return 1; + + default: + return 0; + } +} + +static const char *const sbs_options_help = + ".sbs container options:\n" + " [-a size]\n" + "\n" + " -a size Set size of each video frame (default 8192)\n" + "\n"; + +static int parse_sbs_option(args_t *args, char option, const char *param) { + switch (option) { + case 'a': + return parse_int(&(args->alignment), "video frame size", param, 256, -1); + + default: + return 0; + } +} + +static const char *const general_usage = + "Usage:\n" + " psxavenc -t xa|xacd [xa-options] \n" + " psxavenc -t spu|vag [spu-options] \n" + " psxavenc -t spui|vagi [spui-options] \n" + " psxavenc -t str|strcd [xa-options] [bs-options] [str-options] \n" + " psxavenc -t strspu [spui-options] [bs-options] [str-options] \n" + " psxavenc -t strv [bs-options] [str-options] \n" + " psxavenc -t sbs [bs-options] [sbs-options] \n" + "\n"; + +static const struct { + const char *usage; + const char *audio_options_help; + const char *video_options_help; + const char *container_options_help; + int (*parse_audio_option)(args_t *, char, const char *); + int (*parse_video_option)(args_t *, char, const char *); + int (*parse_container_option)(args_t *, char, const char *); +} format_info[NUM_FORMATS] = { + { + .usage = "psxavenc -t xa [xa-options] ", + .audio_options_help = xa_options_help, + .video_options_help = NULL, + .container_options_help = NULL, + .parse_audio_option = parse_xa_option, + .parse_video_option = NULL, + .parse_container_option = NULL + }, { + .usage = "psxavenc -t xacd [xa-options] ", + .audio_options_help = xa_options_help, + .video_options_help = NULL, + .container_options_help = NULL, + .parse_audio_option = parse_xa_option, + .parse_video_option = NULL, + .parse_container_option = NULL + }, { + .usage = "psxavenc -t spu [spu-options] ", + .audio_options_help = spu_options_help, + .video_options_help = NULL, + .container_options_help = NULL, + .parse_audio_option = parse_spu_option, + .parse_video_option = NULL, + .parse_container_option = NULL + }, { + .usage = "psxavenc -t vag [spu-options] ", + .audio_options_help = spu_options_help, + .video_options_help = NULL, + .container_options_help = NULL, + .parse_audio_option = parse_spu_option, + .parse_video_option = NULL, + .parse_container_option = NULL + }, { + .usage = "psxavenc -t spui [spui-options] ", + .audio_options_help = spui_options_help, + .video_options_help = NULL, + .container_options_help = NULL, + .parse_audio_option = parse_spui_option, + .parse_video_option = NULL, + .parse_container_option = NULL + }, { + .usage = "psxavenc -t vagi [spui-options] ", + .audio_options_help = spui_options_help, + .video_options_help = NULL, + .container_options_help = NULL, + .parse_audio_option = parse_spui_option, + .parse_video_option = NULL, + .parse_container_option = NULL + }, { + .usage = "psxavenc -t str [xa-options] [bs-options] [str-options] ", + .audio_options_help = xa_options_help, + .video_options_help = bs_options_help, + .container_options_help = str_options_help, + .parse_audio_option = parse_xa_option, + .parse_video_option = parse_bs_option, + .parse_container_option = parse_str_option + }, { + .usage = "psxavenc -t strcd [xa-options] [bs-options] [str-options] ", + .audio_options_help = xa_options_help, + .video_options_help = bs_options_help, + .container_options_help = str_options_help, + .parse_audio_option = parse_xa_option, + .parse_video_option = parse_bs_option, + .parse_container_option = parse_str_option + }, { + .usage = "psxavenc -t strspu [spui-options] [bs-options] [str-options] ", + .audio_options_help = spui_options_help, + .video_options_help = bs_options_help, + .container_options_help = str_options_help, + .parse_audio_option = parse_spui_option, + .parse_video_option = parse_bs_option, + .parse_container_option = parse_str_option + }, { + .usage = "psxavenc -t strv [bs-options] [str-options] ", + .audio_options_help = NULL, + .video_options_help = bs_options_help, + .container_options_help = str_options_help, + .parse_audio_option = NULL, + .parse_video_option = parse_bs_option, + .parse_container_option = parse_str_option + }, { + .usage = "psxavenc -t sbs [bs-options] [sbs-options] ", + .audio_options_help = NULL, + .video_options_help = bs_options_help, + .container_options_help = sbs_options_help, + .parse_audio_option = NULL, + .parse_video_option = parse_bs_option, + .parse_container_option = parse_sbs_option + } +}; + +static int parse_option(args_t *args, char option, const char *param) { + int parsed = parse_general_option(args, option, param); + + if (parsed == 0 && args->format != FORMAT_INVALID) { + if (format_info[args->format].parse_audio_option != NULL) + parsed = format_info[args->format].parse_audio_option(args, option, param); + } + if (parsed == 0 && args->format != FORMAT_INVALID) { + if (format_info[args->format].parse_video_option != NULL) + parsed = format_info[args->format].parse_video_option(args, option, param); + } + if (parsed == 0 && args->format != FORMAT_INVALID) { + if (format_info[args->format].parse_container_option != NULL) + parsed = format_info[args->format].parse_container_option(args, option, param); + } + if (parsed == 0) { + if (args->format == FORMAT_INVALID) + fprintf( + stderr, + "Unknown general option: -%c\n" + "(if this is a format-specific option, it shall be passed after -t)\n", + option + ); + else + fprintf(stderr, "Unknown option for format %s: -%c\n", format_names[args->format], option); + } + + return parsed; +} + +static void print_help(format_t format) { + if (format == FORMAT_INVALID) { + printf( + "%s%s%s%s%s%s%s%s", + general_usage, + general_options_help, + xa_options_help, + spu_options_help, + spui_options_help, + bs_options_help, + str_options_help, + sbs_options_help + ); + return; + } + + printf( + "Usage:\n" + " %s\n" + "\n" + "%s", + format_info[format].usage, + general_options_help + ); + if (format_info[format].audio_options_help != NULL) + printf("%s", format_info[format].audio_options_help); + if (format_info[format].video_options_help != NULL) + printf("%s", format_info[format].video_options_help); + if (format_info[format].container_options_help != NULL) + printf("%s", format_info[format].container_options_help); +} + +bool parse_args(args_t *args, const char *const *options, int count) { + int arg_index = 0; + + while (arg_index < count) { + const char *option = options[arg_index]; + + if (option[0] == '-' && option[2] == 0 && !(args->flags & FLAG_IGNORE_OPTIONS)) { + const char *param; + if ((arg_index + 1) < count) + param = options[arg_index + 1]; + else + param = NULL; + + int parsed = parse_option(args, option[1], param); + if (parsed <= 0) + return false; + + arg_index += parsed; + continue; + } + + if (args->input_file == NULL) { + args->input_file = option; + } else if (args->output_file == NULL) { + args->output_file = option; + } else { + fprintf(stderr, "There should be no arguments after the output file path\n"); + return false; + } + arg_index++; + } + + if (args->flags & FLAG_PRINT_HELP) { + print_help(args->format); + return false; + } + if (args->format == FORMAT_INVALID || args->input_file == NULL || args->output_file == NULL) { + fprintf( + stderr, + "%s" + "For more information about the options supported for a given output format, run:\n" + " psxavenc -t -h\n" + "To view the full list of supported options, run:\n" + " psxavenc -h\n", + general_usage + ); + return false; + } + + return true; +} diff --git a/psxavenc/args.h b/psxavenc/args.h new file mode 100644 index 0000000..9249290 --- /dev/null +++ b/psxavenc/args.h @@ -0,0 +1,93 @@ +/* +psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend + +Copyright (c) 2019, 2020 Adrian "asie" Siekierka +Copyright (c) 2019 Ben "GreaseMonkey" Russell +Copyright (c) 2023, 2025 spicyjpeg + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#pragma once + +#include + +#define NUM_FORMATS 11 +#define NUM_BS_CODECS 3 + +enum { + FLAG_IGNORE_OPTIONS = 1 << 0, + FLAG_QUIET = 1 << 1, + FLAG_HIDE_PROGRESS = 1 << 2, + FLAG_PRINT_HELP = 1 << 3, + FLAG_PRINT_VERSION = 1 << 4, + FLAG_SPU_LOOP_END = 1 << 5, + FLAG_SPU_NO_LEADING_DUMMY = 1 << 6, + FLAG_BS_IGNORE_ASPECT = 1 << 7, + FLAG_STR_TRAILING_AUDIO = 1 << 8 +}; + +typedef enum { + FORMAT_INVALID = -1, + FORMAT_XA, + FORMAT_XACD, + FORMAT_SPU, + FORMAT_VAG, + FORMAT_SPUI, + FORMAT_VAGI, + FORMAT_STR, + FORMAT_STRCD, + FORMAT_STRSPU, + FORMAT_STRV, + FORMAT_SBS +} format_t; + +typedef enum { + BS_CODEC_INVALID = -1, + BS_CODEC_V2, + BS_CODEC_V3, + BS_CODEC_V3DC +} bs_codec_t; + +typedef struct { + int flags; + + format_t format; + const char *input_file; + const char *output_file; + const char *swresample_options; + const char *swscale_options; + + int audio_frequency; // 18900 or 37800 Hz + int audio_channels; + int audio_bit_depth; // 4 or 8 + int audio_xa_file; // 00-FF + int audio_xa_channel; // 00-1F + int audio_interleave; + int audio_loop_point; + + bs_codec_t video_codec; + int video_width; + int video_height; + + int str_fps_num; + int str_fps_den; + int str_cd_speed; // 1 or 2 + int alignment; +} args_t; + +bool parse_args(args_t *args, const char *const *options, int count); diff --git a/psxavenc/psxavenc.c b/psxavenc/psxavenc.c index c64a49b..d980f1d 100644 --- a/psxavenc/psxavenc.c +++ b/psxavenc/psxavenc.c @@ -108,7 +108,7 @@ void print_version(void) { printf("psxavenc " VERSION "\n"); } -int parse_args(settings_t* settings, int argc, char** argv) { +int parse_args_old(settings_t* settings, int argc, char** argv) { int c, i; char *next; while ((c = getopt(argc, argv, "?hVqt:F:C:f:b:c:LR:i:a:s:IS:r:x:")) != -1) { @@ -389,7 +389,7 @@ int main(int argc, char **argv) { return 1; } - arg_offset = parse_args(&settings, argc, argv); + arg_offset = parse_args_old(&settings, argc, argv); if (arg_offset < 0) { return 1; } else if (argc < arg_offset + 2) { From a39f159aaf5c0969f494ee4f0b5f36e25d51285a Mon Sep 17 00:00:00 2001 From: spicyjpeg Date: Fri, 28 Feb 2025 02:15:21 +0100 Subject: [PATCH 3/8] Refactor and get rid of common.h --- libpsxav/adpcm.c | 4 +- libpsxav/cdrom.c | 103 ++++-- libpsxav/libpsxav.h | 13 +- meson.build | 5 +- psxavenc/args.c | 12 +- psxavenc/cdrom.c | 61 ---- psxavenc/common.h | 156 --------- psxavenc/decoding.c | 377 +++++++++++--------- psxavenc/decoding.h | 80 +++++ psxavenc/filefmt.c | 518 ++++++++++++++++------------ psxavenc/filefmt.h | 35 ++ psxavenc/main.c | 174 ++++++++++ psxavenc/mdec.c | 822 +++++++++++++++++++++++--------------------- psxavenc/mdec.h | 67 ++++ psxavenc/psxavenc.c | 495 -------------------------- 15 files changed, 1396 insertions(+), 1526 deletions(-) delete mode 100644 psxavenc/cdrom.c delete mode 100644 psxavenc/common.h create mode 100644 psxavenc/decoding.h create mode 100644 psxavenc/filefmt.h create mode 100644 psxavenc/main.c create mode 100644 psxavenc/mdec.h delete mode 100644 psxavenc/psxavenc.c diff --git a/libpsxav/adpcm.c b/libpsxav/adpcm.c index 96c0ad0..03d298f 100644 --- a/libpsxav/adpcm.c +++ b/libpsxav/adpcm.c @@ -266,7 +266,7 @@ int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_stat uint8_t init_sector = 1; if (settings.stereo) { sample_count <<= 1; } - + for (i = 0, j = 0; i < sample_count || ((j % 18) != 0); i += sample_jump, j++) { psx_cdrom_sector_mode2_t *sector_data = (psx_cdrom_sector_mode2_t*) (output + ((j/18) * xa_sector_size) - xa_offset); uint8_t *block_data = sector_data->data + ((j%18) * 0x80); @@ -282,7 +282,7 @@ int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_stat memcpy(block_data + 12, block_data + 8, 4); if ((j+1)%18 == 0) { - psx_cdrom_calculate_checksums((uint8_t*) sector_data, PSX_CDROM_SECTOR_TYPE_MODE2_FORM2); + psx_cdrom_calculate_checksums((psx_cdrom_sector_t *)sector_data, PSX_CDROM_SECTOR_TYPE_MODE2_FORM2); init_sector = 1; } } diff --git a/libpsxav/cdrom.c b/libpsxav/cdrom.c index f6b0144..ac9de32 100644 --- a/libpsxav/cdrom.c +++ b/libpsxav/cdrom.c @@ -21,49 +21,88 @@ freely, subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ +#include #include #include "libpsxav.h" -static uint32_t psx_cdrom_calculate_edc(uint8_t *sector, uint32_t offset, uint32_t size) -{ +#define EDC_CRC32_POLYNOMIAL 0xD8018001 + +static uint32_t edc_crc32(uint8_t *data, int length) { uint32_t edc = 0; - for (int i = offset; i < offset+size; i++) { - edc ^= 0xFF&(uint32_t)sector[i]; - for (int ibit = 0; ibit < 8; ibit++) { - edc = (edc>>1)^(0xD8018001*(edc&0x1)); - } + + for (int i = 0; i < length; i++) { + edc ^= 0xFF & (uint32_t)data[i]; + + for (int j = 0; j < 8; j++) + edc = (edc >> 1) ^ (EDC_CRC32_POLYNOMIAL * (edc & 0x1)); } + return edc; } -void psx_cdrom_calculate_checksums(uint8_t *sector, psx_cdrom_sector_type_t type) -{ - switch (type) { - case PSX_CDROM_SECTOR_TYPE_MODE1: { - uint32_t edc = psx_cdrom_calculate_edc(sector, 0x0, 0x810); - sector[0x810] = (uint8_t)(edc); - sector[0x811] = (uint8_t)(edc >> 8); - sector[0x812] = (uint8_t)(edc >> 16); - sector[0x813] = (uint8_t)(edc >> 24); +#define TO_BCD(x) ((x) + ((x) / 10) * 6) +void psx_cdrom_init_sector(psx_cdrom_sector_t *sector, int lba, psx_cdrom_sector_type_t type) { + // Sync sequence + memset(sector->mode1.sync + 1, 0xff, 10); + sector->mode1.sync[0x0] = 0x00; + sector->mode1.sync[0xb] = 0x00; + + // Timecode + lba += 150; + sector->mode1.header.minute = TO_BCD(lba / 4500); + sector->mode1.header.second = TO_BCD((lba / 75) % 60); + sector->mode1.header.sector = TO_BCD(lba % 75); + + // Mode + if (type == PSX_CDROM_SECTOR_TYPE_MODE1) { + sector->mode1.header.mode = 0x01; + } else { + sector->mode2.header.mode = 0x02; + + memset(sector->mode2.subheader, 0, sizeof(psx_cdrom_sector_xa_subheader_t)); + sector->mode2.subheader[0].submode = PSX_CDROM_SECTOR_XA_SUBMODE_DATA; + + if (type == PSX_CDROM_SECTOR_TYPE_MODE2_FORM2) + sector->mode2.subheader[0].submode |= PSX_CDROM_SECTOR_XA_SUBMODE_FORM2; + + memcpy(sector->mode2.subheader + 1, sector->mode2.subheader, sizeof(psx_cdrom_sector_xa_subheader_t)); + } +} + +void psx_cdrom_calculate_checksums(psx_cdrom_sector_t *sector, psx_cdrom_sector_type_t type) { + uint8_t *data = (uint8_t *)sector; + uint32_t edc; + + switch (type) { + case PSX_CDROM_SECTOR_TYPE_MODE1: + edc = edc_crc32(data, 0x810); + + data[0x810] = (uint8_t)(edc); + data[0x811] = (uint8_t)(edc >> 8); + data[0x812] = (uint8_t)(edc >> 16); + data[0x813] = (uint8_t)(edc >> 24); memset(sector + 0x814, 0, 8); // TODO: ECC - } break; - case PSX_CDROM_SECTOR_TYPE_MODE2_FORM1: { - uint32_t edc = psx_cdrom_calculate_edc(sector, 0x10, 0x808); - sector[0x818] = (uint8_t)(edc); - sector[0x819] = (uint8_t)(edc >> 8); - sector[0x81A] = (uint8_t)(edc >> 16); - sector[0x81B] = (uint8_t)(edc >> 24); + break; + case PSX_CDROM_SECTOR_TYPE_MODE2_FORM1: + edc = edc_crc32(data + 0x10, 0x808); + + data[0x818] = (uint8_t)(edc); + data[0x819] = (uint8_t)(edc >> 8); + data[0x81A] = (uint8_t)(edc >> 16); + data[0x81B] = (uint8_t)(edc >> 24); // TODO: ECC - } break; - case PSX_CDROM_SECTOR_TYPE_MODE2_FORM2: { - uint32_t edc = psx_cdrom_calculate_edc(sector, 0x10, 0x91C); - sector[0x92C] = (uint8_t)(edc); - sector[0x92D] = (uint8_t)(edc >> 8); - sector[0x92E] = (uint8_t)(edc >> 16); - sector[0x92F] = (uint8_t)(edc >> 24); - } break; + break; + + case PSX_CDROM_SECTOR_TYPE_MODE2_FORM2: + edc = edc_crc32(data + 0x10, 0x91C); + + data[0x92C] = (uint8_t)(edc); + data[0x92D] = (uint8_t)(edc >> 8); + data[0x92E] = (uint8_t)(edc >> 16); + data[0x92F] = (uint8_t)(edc >> 24); + break; } -} \ No newline at end of file +} diff --git a/libpsxav/libpsxav.h b/libpsxav/libpsxav.h index e20138e..32eabaf 100644 --- a/libpsxav/libpsxav.h +++ b/libpsxav/libpsxav.h @@ -21,8 +21,7 @@ freely, subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ -#ifndef __LIBPSXAV_H__ -#define __LIBPSXAV_H__ +#pragma once #include #include @@ -106,6 +105,11 @@ typedef struct { uint8_t data[0x918]; } psx_cdrom_sector_mode2_t; +typedef union { + psx_cdrom_sector_mode1_t mode1; + psx_cdrom_sector_mode2_t mode2; +} psx_cdrom_sector_t; + _Static_assert(sizeof(psx_cdrom_sector_mode1_t) == PSX_CDROM_SECTOR_SIZE, "Invalid Mode1 sector size"); _Static_assert(sizeof(psx_cdrom_sector_mode2_t) == PSX_CDROM_SECTOR_SIZE, "Invalid Mode2 sector size"); @@ -137,6 +141,5 @@ typedef enum { PSX_CDROM_SECTOR_TYPE_MODE2_FORM2 } psx_cdrom_sector_type_t; -void psx_cdrom_calculate_checksums(uint8_t *sector, psx_cdrom_sector_type_t type); - -#endif /* __LIBPSXAV_H__ */ +void psx_cdrom_init_sector(psx_cdrom_sector_t *sector, int lba, psx_cdrom_sector_type_t type); +void psx_cdrom_calculate_checksums(psx_cdrom_sector_t *sector, psx_cdrom_sector_type_t type); diff --git a/meson.build b/meson.build index abd8a35..4061200 100644 --- a/meson.build +++ b/meson.build @@ -25,9 +25,8 @@ libpsxav_dep = declare_dependency(include_directories: include_directories('libp executable('psxavenc', [ 'psxavenc/args.c', - 'psxavenc/cdrom.c', 'psxavenc/decoding.c', 'psxavenc/filefmt.c', - 'psxavenc/mdec.c', - 'psxavenc/psxavenc.c' + 'psxavenc/main.c', + 'psxavenc/mdec.c' ], dependencies: [libm_dep, ffmpeg, libpsxav_dep], install: true) diff --git a/psxavenc/args.c b/psxavenc/args.c index 03d0695..8c92346 100644 --- a/psxavenc/args.c +++ b/psxavenc/args.c @@ -27,6 +27,7 @@ freely, subject to the following restrictions: #include #include #include "args.h" +#include "config.h" #define INVALID_PARAM -1 @@ -146,13 +147,6 @@ static const char *const format_names[NUM_FORMATS] = { }; static void init_default_args(args_t *args) { - args->flags = 0; - - args->input_file = NULL; - args->output_file = NULL; - args->swresample_options = NULL; - args->swscale_options = NULL; - if ( args->format == FORMAT_XA || args->format == FORMAT_XACD || args->format == FORMAT_STR || args->format == FORMAT_STRCD @@ -694,6 +688,10 @@ bool parse_args(args_t *args, const char *const *options, int count) { print_help(args->format); return false; } + if (args->flags & FLAG_PRINT_VERSION) { + printf("psxavenc " VERSION "\n"); + return false; + } if (args->format == FORMAT_INVALID || args->input_file == NULL || args->output_file == NULL) { fprintf( stderr, diff --git a/psxavenc/cdrom.c b/psxavenc/cdrom.c deleted file mode 100644 index d391e12..0000000 --- a/psxavenc/cdrom.c +++ /dev/null @@ -1,61 +0,0 @@ -/* -psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend - -Copyright (c) 2019, 2020 Adrian "asie" Siekierka -Copyright (c) 2019 Ben "GreaseMonkey" Russell - -This software is provided 'as-is', without any express or implied -warranty. In no event will the authors be held liable for any damages -arising from the use of this software. - -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it -freely, subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "common.h" - -void init_sector_buffer_video(psx_cdrom_sector_mode2_t *buffer, settings_t *settings) { - if (settings->format == FORMAT_STR2CD) { - memset(buffer, 0, PSX_CDROM_SECTOR_SIZE); - memset(buffer->sync + 1, 0xFF, 10); - buffer->header.mode = 0x02; - } else if (settings->format == FORMAT_STR2V) { - memset(buffer->data, 0, 2048); - } else { - memset(buffer->subheader, 0, PSX_CDROM_SECTOR_SIZE - 16); - } - - buffer->subheader[0].file = settings->file_number; - buffer->subheader[0].channel = settings->channel_number & PSX_CDROM_SECTOR_XA_CHANNEL_MASK; - buffer->subheader[0].submode = - PSX_CDROM_SECTOR_XA_SUBMODE_DATA - | PSX_CDROM_SECTOR_XA_SUBMODE_RT; - buffer->subheader[0].coding = 0; - memcpy(buffer->subheader + 1, buffer->subheader, sizeof(psx_cdrom_sector_xa_subheader_t)); -} - -void calculate_edc_data(uint8_t *buffer) -{ - uint32_t edc = 0; - for (int i = 0x010; i < 0x818; i++) { - edc ^= 0xFF&(uint32_t)buffer[i]; - for (int ibit = 0; ibit < 8; ibit++) { - edc = (edc>>1)^(0xD8018001*(edc&0x1)); - } - } - buffer[0x818] = (uint8_t)(edc); - buffer[0x819] = (uint8_t)(edc >> 8); - buffer[0x81A] = (uint8_t)(edc >> 16); - buffer[0x81B] = (uint8_t)(edc >> 24); - - // TODO: ECC -} diff --git a/psxavenc/common.h b/psxavenc/common.h deleted file mode 100644 index 6cf39f9..0000000 --- a/psxavenc/common.h +++ /dev/null @@ -1,156 +0,0 @@ -/* -psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend - -Copyright (c) 2019, 2020 Adrian "asie" Siekierka -Copyright (c) 2019 Ben "GreaseMonkey" Russell - -This software is provided 'as-is', without any express or implied -warranty. In no event will the authors be held liable for any damages -arising from the use of this software. - -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it -freely, subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -typedef enum { - FORMAT_XA, - FORMAT_XACD, - FORMAT_SPU, - FORMAT_SPUI, - FORMAT_VAG, - FORMAT_VAGI, - FORMAT_STR2, - FORMAT_STR2CD, - FORMAT_STR2V, - FORMAT_SBS2, - NUM_FORMATS -} psxavenc_format_t; - -typedef struct { - int frame_index; - int frame_data_offset; - int frame_max_size; - int frame_block_base_overflow; - int frame_block_overflow_num; - int frame_block_overflow_den; - uint16_t bits_value; - int bits_left; - uint8_t *frame_output; - int bytes_used; - int blocks_used; - int uncomp_hwords_used; - int quant_scale; - int quant_scale_sum; - - uint32_t *huffman_encoding_map; - int16_t *coeff_clamp_map; - int16_t *dct_block_lists[6]; - AVDCT *dct_context; -} vid_encoder_state_t; - -typedef struct { - int video_frame_dst_size; - int audio_stream_index; - int video_stream_index; - AVFormatContext* format; - AVStream* audio_stream; - AVStream* video_stream; - AVCodecContext* audio_codec_context; - AVCodecContext* video_codec_context; - struct SwrContext* resampler; - struct SwsContext* scaler; - AVFrame* frame; - - int sample_count_mul; - - double video_next_pts; -} av_decoder_state_t; - -typedef struct { - bool quiet; - bool show_progress; - - int format; // FORMAT_* - int channels; - int cd_speed; // 1 or 2 - int frequency; // 18900 or 37800 Hz - int bits_per_sample; // 4 or 8 - int file_number; // 00-FF - int channel_number; // 00-1F - int interleave; - int alignment; - bool loop; - - int video_width; - int video_height; - int video_fps_num; // FPS numerator - int video_fps_den; // FPS denominator - bool ignore_aspect_ratio; - - char *swresample_options; - char *swscale_options; - - int16_t *audio_samples; - int audio_sample_count; - uint8_t *video_frames; - int video_frame_count; - - av_decoder_state_t decoder_state_av; - vid_encoder_state_t state_vid; - bool end_of_input; - - time_t start_time; - time_t last_progress_update; -} settings_t; - -// cdrom.c -void init_sector_buffer_video(psx_cdrom_sector_mode2_t *buffer, settings_t *settings); -void calculate_edc_data(uint8_t *buffer); - -// decoding.c -bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bool use_video, bool audio_required, bool video_required); -bool poll_av_data(settings_t *settings); -bool ensure_av_data(settings_t *settings, int needed_audio_samples, int needed_video_frames); -void retire_av_data(settings_t *settings, int retired_audio_samples, int retired_video_frames); -void close_av_data(settings_t *settings); - -// filefmt.c -void encode_file_spu(settings_t *settings, FILE *output); -void encode_file_spu_interleaved(settings_t *settings, FILE *output); -void encode_file_xa(settings_t *settings, FILE *output); -void encode_file_str(settings_t *settings, FILE *output); -void encode_file_sbs(settings_t *settings, FILE *output); - -// mdec.c -bool init_encoder_state(settings_t *settings); -void destroy_encoder_state(settings_t *settings); -void encode_frame_bs(uint8_t *video_frame, settings_t *settings); -void encode_sector_str(uint8_t *video_frames, uint8_t *output, settings_t *settings); diff --git a/psxavenc/decoding.c b/psxavenc/decoding.c index 54a9124..a29e90a 100644 --- a/psxavenc/decoding.c +++ b/psxavenc/decoding.c @@ -22,30 +22,57 @@ freely, subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ -#include "common.h" - -int decode_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, AVPacket *packet) { - int ret; +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "args.h" +#include "decoding.h" +static int decode_frame( + AVCodecContext *codec, + AVFrame *frame, + int *frame_size, + AVPacket *packet +) { if (packet != NULL) { - ret = avcodec_send_packet(codec, packet); - if (ret != 0) { + if (avcodec_send_packet(codec, packet) != 0) return 0; - } } - ret = avcodec_receive_frame(codec, frame); + int ret = avcodec_receive_frame(codec, frame); + if (ret >= 0) { *frame_size = ret; return 1; + } else if (ret == AVERROR(EAGAIN)) { + return 1; } else { - return ret == AVERROR(EAGAIN) ? 1 : 0; + return 0; } } -bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bool use_video, bool audio_required, bool video_required) -{ - av_decoder_state_t* av = &(settings->decoder_state_av); +bool open_av_data(decoder_t *decoder, const args_t *args, int flags) { + decoder->audio_samples = NULL; + decoder->audio_sample_count = 0; + decoder->video_frames = NULL; + decoder->video_frame_count = 0; + + decoder->video_width = args->video_width; + decoder->video_height = args->video_height; + decoder->video_fps_num = args->str_fps_num; + decoder->video_fps_den = args->str_fps_den; + decoder->end_of_input = false; + + decoder_state_t *av = &(decoder->state); + av->video_next_pts = 0.0; av->frame = NULL; av->video_frame_dst_size = 0; @@ -59,19 +86,17 @@ bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bo av->resampler = NULL; av->scaler = NULL; - if (settings->quiet) { + if (args->flags & FLAG_QUIET) av_log_set_level(AV_LOG_QUIET); - } av->format = avformat_alloc_context(); - if (avformat_open_input(&(av->format), filename, NULL, NULL)) { - return false; - } - if (avformat_find_stream_info(av->format, NULL) < 0) { - return false; - } - if (use_audio) { + if (avformat_open_input(&(av->format), args->input_file, NULL, NULL)) + return false; + if (avformat_find_stream_info(av->format, NULL) < 0) + return false; + + if (flags & DECODER_USE_AUDIO) { for (int i = 0; i < av->format->nb_streams; i++) { if (av->format->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { if (av->audio_stream_index >= 0) { @@ -81,13 +106,14 @@ bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bo av->audio_stream_index = i; } } - if (audio_required && av->audio_stream_index == -1) { + + if ((flags & DECODER_AUDIO_REQUIRED) && av->audio_stream_index == -1) { fprintf(stderr, "Input file has no audio data\n"); return false; } } - if (use_video) { + if (flags & DECODER_USE_VIDEO) { for (int i = 0; i < av->format->nb_streams; i++) { if (av->format->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) { if (av->video_stream_index >= 0) { @@ -97,7 +123,8 @@ bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bo av->video_stream_index = i; } } - if (video_required && av->video_stream_index == -1) { + + if ((flags & DECODER_VIDEO_REQUIRED) && av->video_stream_index == -1) { fprintf(stderr, "Input file has no video data\n"); return false; } @@ -109,34 +136,39 @@ bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bo if (av->audio_stream != NULL) { const AVCodec *codec = avcodec_find_decoder(av->audio_stream->codecpar->codec_id); av->audio_codec_context = avcodec_alloc_context3(codec); - if (av->audio_codec_context == NULL) { + + if (av->audio_codec_context == NULL) return false; - } - if (avcodec_parameters_to_context(av->audio_codec_context, av->audio_stream->codecpar) < 0) { + if (avcodec_parameters_to_context(av->audio_codec_context, av->audio_stream->codecpar) < 0) return false; - } - if (avcodec_open2(av->audio_codec_context, codec, NULL) < 0) { + if (avcodec_open2(av->audio_codec_context, codec, NULL) < 0) return false; - } AVChannelLayout layout; - layout.nb_channels = settings->channels; - if (settings->channels <= 2) { + layout.nb_channels = args->audio_channels; + + if (args->audio_channels == 1) { layout.order = AV_CHANNEL_ORDER_NATIVE; - layout.u.mask = (settings->channels == 2) ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO; + layout.u.mask = AV_CH_LAYOUT_MONO; + } else if (args->audio_channels == 2) { + layout.order = AV_CHANNEL_ORDER_NATIVE; + layout.u.mask = AV_CH_LAYOUT_STEREO; } else { layout.order = AV_CHANNEL_ORDER_UNSPEC; } - if (!settings->quiet && settings->channels > av->audio_codec_context->ch_layout.nb_channels) { - fprintf(stderr, "Warning: input file has less than %d channels\n", settings->channels); + + if (!(args->flags & FLAG_QUIET)) { + if (args->audio_channels > av->audio_codec_context->ch_layout.nb_channels) + fprintf(stderr, "Warning: input file has less than %d channels\n", args->audio_channels); } - av->sample_count_mul = settings->channels; + av->sample_count_mul = args->audio_channels; + if (swr_alloc_set_opts2( &av->resampler, &layout, AV_SAMPLE_FMT_S16, - settings->frequency, + args->audio_frequency, &av->audio_codec_context->ch_layout, av->audio_codec_context->sample_fmt, av->audio_codec_context->sample_rate, @@ -145,47 +177,43 @@ bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bo ) < 0) { return false; } - if (settings->swresample_options) { - if (av_opt_set_from_string(av->resampler, settings->swresample_options, NULL, "=", ":,") < 0) { + if (args->swresample_options) { + if (av_opt_set_from_string(av->resampler, args->swresample_options, NULL, "=", ":,") < 0) return false; - } } - - if (swr_init(av->resampler) < 0) { + if (swr_init(av->resampler) < 0) return false; - } } if (av->video_stream != NULL) { const AVCodec *codec = avcodec_find_decoder(av->video_stream->codecpar->codec_id); av->video_codec_context = avcodec_alloc_context3(codec); - if(av->video_codec_context == NULL) { + + if (av->video_codec_context == NULL) return false; - } - if (avcodec_parameters_to_context(av->video_codec_context, av->video_stream->codecpar) < 0) { + if (avcodec_parameters_to_context(av->video_codec_context, av->video_stream->codecpar) < 0) return false; - } - if (avcodec_open2(av->video_codec_context, codec, NULL) < 0) { + if (avcodec_open2(av->video_codec_context, codec, NULL) < 0) return false; + + if (!(args->flags & FLAG_QUIET)) { + if ( + decoder->video_width > av->video_codec_context->width || + decoder->video_height > av->video_codec_context->height + ) + fprintf(stderr, "Warning: input file has resolution lower than %dx%d\n", decoder->video_width, decoder->video_height); } - if (!settings->quiet && ( - settings->video_width > av->video_codec_context->width || - settings->video_height > av->video_codec_context->height - )) { - fprintf(stderr, "Warning: input file has resolution lower than %dx%d\n", - settings->video_width, settings->video_height - ); - } - if (!settings->ignore_aspect_ratio) { + if (!(args->flags & FLAG_BS_IGNORE_ASPECT)) { // Reduce the provided size so that it matches the input file's // aspect ratio. double src_ratio = (double)av->video_codec_context->width / (double)av->video_codec_context->height; - double dst_ratio = (double)settings->video_width / (double)settings->video_height; + double dst_ratio = (double)decoder->video_width / (double)decoder->video_height; + if (src_ratio < dst_ratio) { - settings->video_width = (int)((double)settings->video_height * src_ratio + 15.0) & ~15; + decoder->video_width = (int)((double)decoder->video_height * src_ratio + 15.0) & ~15; } else { - settings->video_height = (int)((double)settings->video_width / src_ratio + 15.0) & ~15; + decoder->video_height = (int)((double)decoder->video_width / src_ratio + 15.0) & ~15; } } @@ -193,17 +221,16 @@ bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bo av->video_codec_context->width, av->video_codec_context->height, av->video_codec_context->pix_fmt, - settings->video_width, - settings->video_height, + decoder->video_width, + decoder->video_height, AV_PIX_FMT_NV21, SWS_BICUBIC, NULL, NULL, NULL ); - if (av->scaler == NULL) { + if (av->scaler == NULL) return false; - } if (sws_setColorspaceDetails( av->scaler, sws_getCoefficients(av->video_codec_context->colorspace), @@ -213,189 +240,211 @@ bool open_av_data(const char *filename, settings_t *settings, bool use_audio, bo 0, 1 << 16, 1 << 16 - ) < 0) { + ) < 0) return false; - } - if (settings->swscale_options) { - if (av_opt_set_from_string(av->scaler, settings->swscale_options, NULL, "=", ":,") < 0) { + if (args->swscale_options) { + if (av_opt_set_from_string(av->scaler, args->swscale_options, NULL, "=", ":,") < 0) return false; - } } - av->video_frame_dst_size = 3*settings->video_width*settings->video_height/2; + av->video_frame_dst_size = 3 * decoder->video_width * decoder->video_height / 2; } av->frame = av_frame_alloc(); - if (av->frame == NULL) { - return false; - } - settings->audio_samples = NULL; - settings->audio_sample_count = 0; - settings->video_frames = NULL; - settings->video_frame_count = 0; - settings->end_of_input = false; + if (av->frame == NULL) + return false; return true; } -static void poll_av_packet_audio(settings_t *settings, AVPacket *packet) -{ - av_decoder_state_t* av = &(settings->decoder_state_av); +static void poll_av_packet_audio(decoder_t *decoder, AVPacket *packet) { + decoder_state_t *av = &(decoder->state); int frame_size, frame_sample_count; uint8_t *buffer[1]; if (decode_frame(av->audio_codec_context, av->frame, &frame_size, packet)) { size_t buffer_size = sizeof(int16_t) * av->sample_count_mul * swr_get_out_samples(av->resampler, av->frame->nb_samples); + buffer[0] = malloc(buffer_size); memset(buffer[0], 0, buffer_size); - frame_sample_count = swr_convert(av->resampler, buffer, av->frame->nb_samples, (const uint8_t**)av->frame->data, av->frame->nb_samples); - settings->audio_samples = realloc(settings->audio_samples, (settings->audio_sample_count + ((frame_sample_count + 4032) * av->sample_count_mul)) * sizeof(int16_t)); - memmove(&(settings->audio_samples[settings->audio_sample_count]), buffer[0], sizeof(int16_t) * frame_sample_count * av->sample_count_mul); - settings->audio_sample_count += frame_sample_count * av->sample_count_mul; + + frame_sample_count = swr_convert( + av->resampler, + buffer, + av->frame->nb_samples, + (const uint8_t**)av->frame->data, + av->frame->nb_samples + ); + + decoder->audio_samples = realloc( + decoder->audio_samples, + (decoder->audio_sample_count + ((frame_sample_count + 4032) * av->sample_count_mul)) * sizeof(int16_t) + ); + memmove( + &(decoder->audio_samples[decoder->audio_sample_count]), + buffer[0], + sizeof(int16_t) * frame_sample_count * av->sample_count_mul + ); + decoder->audio_sample_count += frame_sample_count * av->sample_count_mul; free(buffer[0]); } } -static void poll_av_packet_video(settings_t *settings, AVPacket *packet) -{ - av_decoder_state_t* av = &(settings->decoder_state_av); +static void poll_av_packet_video(decoder_t *decoder, AVPacket *packet) { + decoder_state_t *av = &(decoder->state); int frame_size; - double pts_step = ((double)1.0*(double)settings->video_fps_den)/(double)settings->video_fps_num; + double pts_step = (double)decoder->video_fps_den / (double)decoder->video_fps_num; - int plane_size = settings->video_width*settings->video_height; + int plane_size = decoder->video_width * decoder->video_height; int dst_strides[2] = { - settings->video_width, settings->video_width + decoder->video_width, decoder->video_width }; if (decode_frame(av->video_codec_context, av->frame, &frame_size, packet)) { - if (!av->frame->width || !av->frame->height || !av->frame->data[0]) { + if (!av->frame->width || !av->frame->height || !av->frame->data[0]) return; - } // Some files seem to have timestamps starting from a negative value // (but otherwise valid) for whatever reason. - double pts = (((double)av->frame->pts)*(double)av->video_stream->time_base.num)/av->video_stream->time_base.den; - //if (pts < 0.0) { - //return; - //} - if (settings->video_frame_count >= 1 && pts < av->video_next_pts) { + double pts = + ((double)av->frame->pts * (double)av->video_stream->time_base.num) + / av->video_stream->time_base.den; +#if 0 + if (pts < 0.0) return; - } - if ((settings->video_frame_count) < 1) { +#endif + if (decoder->video_frame_count >= 1 && pts < av->video_next_pts) + return; + if (decoder->video_frame_count < 1) av->video_next_pts = pts; - } else { + else av->video_next_pts += pts_step; - } - //fprintf(stderr, "%d %f %f %f\n", (settings->video_frame_count), pts, av->video_next_pts, pts_step); + //fprintf(stderr, "%d %f %f %f\n", decoder->video_frame_count, pts, av->video_next_pts, pts_step); // Insert duplicate frames if the frame rate of the input stream is // lower than the target frame rate. int dupe_frames = (int) ceil((pts - av->video_next_pts) / pts_step); if (dupe_frames < 0) dupe_frames = 0; - settings->video_frames = realloc( - settings->video_frames, - (settings->video_frame_count + dupe_frames + 1) * av->video_frame_dst_size + decoder->video_frames = realloc( + decoder->video_frames, + (decoder->video_frame_count + dupe_frames + 1) * av->video_frame_dst_size ); for (; dupe_frames; dupe_frames--) { memcpy( - (settings->video_frames) + av->video_frame_dst_size*(settings->video_frame_count), - (settings->video_frames) + av->video_frame_dst_size*(settings->video_frame_count-1), + (decoder->video_frames) + av->video_frame_dst_size * decoder->video_frame_count, + (decoder->video_frames) + av->video_frame_dst_size * (decoder->video_frame_count - 1), av->video_frame_dst_size ); - settings->video_frame_count += 1; + decoder->video_frame_count += 1; av->video_next_pts += pts_step; } - uint8_t *dst_frame = (settings->video_frames) + av->video_frame_dst_size*(settings->video_frame_count); + uint8_t *dst_frame = decoder->video_frames + av->video_frame_dst_size * decoder->video_frame_count; uint8_t *dst_pointers[2] = { dst_frame, dst_frame + plane_size }; - sws_scale(av->scaler, (const uint8_t *const *) av->frame->data, av->frame->linesize, 0, av->frame->height, dst_pointers, dst_strides); + sws_scale( + av->scaler, + (const uint8_t *const *) av->frame->data, + av->frame->linesize, + 0, + av->frame->height, + dst_pointers, + dst_strides + ); - settings->video_frame_count += 1; + decoder->video_frame_count += 1; } } -bool poll_av_data(settings_t *settings) -{ - av_decoder_state_t* av = &(settings->decoder_state_av); +bool poll_av_data(decoder_t *decoder) { + decoder_state_t *av = &(decoder->state); + + if (decoder->end_of_input) + return false; + AVPacket packet; - if (settings->end_of_input) { - return false; - } - if (av_read_frame(av->format, &packet) >= 0) { - if (packet.stream_index == av->audio_stream_index) { - poll_av_packet_audio(settings, &packet); - } else if (packet.stream_index == av->video_stream_index) { - poll_av_packet_video(settings, &packet); - } + if (packet.stream_index == av->audio_stream_index) + poll_av_packet_audio(decoder, &packet); + else if (packet.stream_index == av->video_stream_index) + poll_av_packet_video(decoder, &packet); + av_packet_unref(&packet); return true; } else { // out is always padded out with 4032 "0" samples, this makes calculations elsewhere easier - if (av->audio_stream) { - memset((settings->audio_samples) + (settings->audio_sample_count), 0, 4032 * av->sample_count_mul * sizeof(int16_t)); - } + if (av->audio_stream) + memset( + decoder->audio_samples + decoder->audio_sample_count, + 0, + 4032 * av->sample_count_mul * sizeof(int16_t) + ); - settings->end_of_input = true; + decoder->end_of_input = true; return false; } } -bool ensure_av_data(settings_t *settings, int needed_audio_samples, int needed_video_frames) -{ - // HACK: in order to update settings->end_of_input as soon as all data has +bool ensure_av_data(decoder_t *decoder, int needed_audio_samples, int needed_video_frames) { + // HACK: in order to update decoder->end_of_input as soon as all data has // been read from the input file, this loop waits for more data than // strictly needed. - //while (settings->audio_sample_count < needed_audio_samples || settings->video_frame_count < needed_video_frames) { +#if 0 + while (decoder->audio_sample_count < needed_audio_samples || decoder->video_frame_count < needed_video_frames) { +#else while ( - (needed_audio_samples && settings->audio_sample_count <= needed_audio_samples) || - (needed_video_frames && settings->video_frame_count <= needed_video_frames) + (needed_audio_samples && decoder->audio_sample_count <= needed_audio_samples) || + (needed_video_frames && decoder->video_frame_count <= needed_video_frames) ) { - //fprintf(stderr, "ensure %d -> %d, %d -> %d\n", settings->audio_sample_count, needed_audio_samples, settings->video_frame_count, needed_video_frames); - if (!poll_av_data(settings)) { +#endif + //fprintf(stderr, "ensure %d -> %d, %d -> %d\n", decoder->audio_sample_count, needed_audio_samples, decoder->video_frame_count, needed_video_frames); + if (!poll_av_data(decoder)) { // Keep returning true even if the end of the input file has been // reached, if the buffer is not yet completely empty. - return (settings->audio_sample_count || !needed_audio_samples) - && (settings->video_frame_count || !needed_video_frames); + return + (decoder->audio_sample_count || !needed_audio_samples) && + (decoder->video_frame_count || !needed_video_frames); } } - //fprintf(stderr, "ensure %d -> %d, %d -> %d\n", settings->audio_sample_count, needed_audio_samples, settings->video_frame_count, needed_video_frames); + //fprintf(stderr, "ensure %d -> %d, %d -> %d\n", decoder->audio_sample_count, needed_audio_samples, decoder->video_frame_count, needed_video_frames); return true; } -void retire_av_data(settings_t *settings, int retired_audio_samples, int retired_video_frames) -{ - av_decoder_state_t* av = &(settings->decoder_state_av); - - //fprintf(stderr, "retire %d -> %d, %d -> %d\n", settings->audio_sample_count, retired_audio_samples, settings->video_frame_count, retired_video_frames); - assert(retired_audio_samples <= settings->audio_sample_count); - assert(retired_video_frames <= settings->video_frame_count); +void retire_av_data(decoder_t *decoder, int retired_audio_samples, int retired_video_frames) { + //fprintf(stderr, "retire %d -> %d, %d -> %d\n", decoder->audio_sample_count, retired_audio_samples, decoder->video_frame_count, retired_video_frames); + assert(retired_audio_samples <= decoder->audio_sample_count); + assert(retired_video_frames <= decoder->video_frame_count); int sample_size = sizeof(int16_t); - if (settings->audio_sample_count > retired_audio_samples) { - memmove(settings->audio_samples, settings->audio_samples + retired_audio_samples, (settings->audio_sample_count - retired_audio_samples)*sample_size); - } - settings->audio_sample_count -= retired_audio_samples; + int frame_size = decoder->state.video_frame_dst_size; - int frame_size = av->video_frame_dst_size; - if (settings->video_frame_count > retired_video_frames) { - memmove(settings->video_frames, settings->video_frames + retired_video_frames*frame_size, (settings->video_frame_count - retired_video_frames)*frame_size); - } - settings->video_frame_count -= retired_video_frames; + if (decoder->audio_sample_count > retired_audio_samples) + memmove( + decoder->audio_samples, + decoder->audio_samples + retired_audio_samples, + (decoder->audio_sample_count - retired_audio_samples) * sample_size + ); + if (decoder->video_frame_count > retired_video_frames) + memmove( + decoder->video_frames, + decoder->video_frames + retired_video_frames * frame_size, + (decoder->video_frame_count - retired_video_frames) * frame_size + ); + + decoder->audio_sample_count -= retired_audio_samples; + decoder->video_frame_count -= retired_video_frames; } -void close_av_data(settings_t *settings) -{ - av_decoder_state_t* av = &(settings->decoder_state_av); +void close_av_data(decoder_t *decoder) { + decoder_state_t *av = &(decoder->state); av_frame_free(&(av->frame)); swr_free(&(av->resampler)); @@ -404,12 +453,12 @@ void close_av_data(settings_t *settings) avcodec_free_context(&(av->audio_codec_context)); avformat_free_context(av->format); - if(settings->audio_samples != NULL) { - free(settings->audio_samples); - settings->audio_samples = NULL; + if(decoder->audio_samples != NULL) { + free(decoder->audio_samples); + decoder->audio_samples = NULL; } - if(settings->video_frames != NULL) { - free(settings->video_frames); - settings->video_frames = NULL; + if(decoder->video_frames != NULL) { + free(decoder->video_frames); + decoder->video_frames = NULL; } } diff --git a/psxavenc/decoding.h b/psxavenc/decoding.h new file mode 100644 index 0000000..ccf0b65 --- /dev/null +++ b/psxavenc/decoding.h @@ -0,0 +1,80 @@ +/* +psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend + +Copyright (c) 2019, 2020 Adrian "asie" Siekierka +Copyright (c) 2019 Ben "GreaseMonkey" Russell +Copyright (c) 2023, 2025 spicyjpeg + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "args.h" + +typedef struct { + int video_frame_dst_size; + int audio_stream_index; + int video_stream_index; + AVFormatContext* format; + AVStream* audio_stream; + AVStream* video_stream; + AVCodecContext* audio_codec_context; + AVCodecContext* video_codec_context; + struct SwrContext* resampler; + struct SwsContext* scaler; + AVFrame* frame; + + int sample_count_mul; + + double video_next_pts; +} decoder_state_t; + +typedef struct { + int16_t *audio_samples; + int audio_sample_count; + uint8_t *video_frames; + int video_frame_count; + + int video_width; + int video_height; + int video_fps_num; + int video_fps_den; + bool end_of_input; + + decoder_state_t state; +} decoder_t; + +enum { + DECODER_USE_AUDIO = 1 << 0, + DECODER_USE_VIDEO = 1 << 1, + DECODER_AUDIO_REQUIRED = 1 << 2, + DECODER_VIDEO_REQUIRED = 1 << 3 +}; + +bool open_av_data(decoder_t *decoder, const args_t *args, int flags); +bool poll_av_data(decoder_t *decoder); +bool ensure_av_data(decoder_t *decoder, int needed_audio_samples, int needed_video_frames); +void retire_av_data(decoder_t *decoder, int retired_audio_samples, int retired_video_frames); +void close_av_data(decoder_t *decoder); diff --git a/psxavenc/filefmt.c b/psxavenc/filefmt.c index 803ac2d..b00a29b 100644 --- a/psxavenc/filefmt.c +++ b/psxavenc/filefmt.c @@ -3,7 +3,7 @@ psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend Copyright (c) 2019, 2020 Adrian "asie" Siekierka Copyright (c) 2019 Ben "GreaseMonkey" Russell -Copyright (c) 2023 spicyjpeg +Copyright (c) 2023, 2025 spicyjpeg This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages @@ -22,48 +22,77 @@ freely, subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ -#include "common.h" -#include "libpsxav.h" +#include +#include +#include +#include +#include +#include "args.h" +#include "decoding.h" +#include "mdec.h" -static time_t get_elapsed_time(settings_t *settings) { - if (!settings->show_progress) { - return 0; +static time_t start_time = 0; +static time_t last_progress_update = 0; + +static time_t get_elapsed_time(void) { + time_t t; + + if (start_time > 0) { + t = time(NULL) - start_time; + } else { + t = 0; + start_time = time(NULL); } - time_t t = time(NULL) - settings->start_time; - if (t <= settings->last_progress_update) { + + if (t <= last_progress_update) return 0; - } - settings->last_progress_update = t; + + last_progress_update = t; return t; } -static psx_audio_xa_settings_t settings_to_libpsxav_xa_audio(settings_t *settings) { - psx_audio_xa_settings_t new_settings; - new_settings.bits_per_sample = settings->bits_per_sample; - new_settings.frequency = settings->frequency; - new_settings.stereo = settings->channels == 2; - new_settings.file_number = settings->file_number; - new_settings.channel_number = settings->channel_number; +static psx_audio_xa_settings_t args_to_libpsxav_xa_audio(const args_t *args) { + psx_audio_xa_settings_t settings; - switch (settings->format) { - case FORMAT_XA: - case FORMAT_STR2: - new_settings.format = PSX_AUDIO_XA_FORMAT_XA; - break; - default: - new_settings.format = PSX_AUDIO_XA_FORMAT_XACD; - break; - } + settings.bits_per_sample = args->audio_bit_depth; + settings.frequency = args->audio_frequency; + settings.stereo = (args->audio_channels == 2); + settings.file_number = args->audio_xa_file; + settings.channel_number = args->audio_xa_channel; - return new_settings; + if (args->format == FORMAT_XACD || args->format == FORMAT_STRCD) + settings.format = PSX_AUDIO_XA_FORMAT_XACD; + else + settings.format = PSX_AUDIO_XA_FORMAT_XA; + + return settings; }; -void write_vag_header(int size_per_channel, uint8_t *header, settings_t *settings) { +static void init_sector_buffer_video(const args_t *args, psx_cdrom_sector_mode2_t *sector, int lba) { + psx_cdrom_init_sector((psx_cdrom_sector_t *)sector, lba, PSX_CDROM_SECTOR_TYPE_MODE2_FORM1); + + sector->subheader[0].file = args->audio_xa_file; + sector->subheader[0].channel = args->audio_xa_channel & PSX_CDROM_SECTOR_XA_CHANNEL_MASK; + sector->subheader[0].submode = PSX_CDROM_SECTOR_XA_SUBMODE_DATA | PSX_CDROM_SECTOR_XA_SUBMODE_RT; + sector->subheader[0].coding = 0; + + memcpy(sector->subheader + 1, sector->subheader, sizeof(psx_cdrom_sector_xa_subheader_t)); +} + +#define VAG_HEADER_SIZE 0x30 + +static void write_vag_header(const args_t *args, int size_per_channel, uint8_t *header) { + memset(header, 0, VAG_HEADER_SIZE); + // Magic header[0x00] = 'V'; header[0x01] = 'A'; header[0x02] = 'G'; - header[0x03] = settings->interleave ? 'i' : 'p'; + + if (args->format == FORMAT_VAGI) + header[0x03] = 'i'; + else + header[0x03] = 'p'; // Version (big-endian) header[0x04] = 0x00; @@ -72,150 +101,228 @@ void write_vag_header(int size_per_channel, uint8_t *header, settings_t *setting header[0x07] = 0x20; // Interleave (little-endian) - header[0x08] = (uint8_t)settings->interleave; - header[0x09] = (uint8_t)(settings->interleave>>8); - header[0x0a] = (uint8_t)(settings->interleave>>16); - header[0x0b] = (uint8_t)(settings->interleave>>24); + if (args->format == FORMAT_VAGI) { + header[0x08] = (uint8_t)args->audio_interleave; + header[0x09] = (uint8_t)(args->audio_interleave >> 8); + header[0x0a] = (uint8_t)(args->audio_interleave >> 16); + header[0x0b] = (uint8_t)(args->audio_interleave >> 24); + } // Length of data for each channel (big-endian) - header[0x0c] = (uint8_t)(size_per_channel>>24); - header[0x0d] = (uint8_t)(size_per_channel>>16); - header[0x0e] = (uint8_t)(size_per_channel>>8); + header[0x0c] = (uint8_t)(size_per_channel >> 24); + header[0x0d] = (uint8_t)(size_per_channel >> 16); + header[0x0e] = (uint8_t)(size_per_channel >> 8); header[0x0f] = (uint8_t)size_per_channel; // Sample rate (big-endian) - header[0x10] = (uint8_t)(settings->frequency>>24); - header[0x11] = (uint8_t)(settings->frequency>>16); - header[0x12] = (uint8_t)(settings->frequency>>8); - header[0x13] = (uint8_t)settings->frequency; + header[0x10] = (uint8_t)(args->audio_frequency >> 24); + header[0x11] = (uint8_t)(args->audio_frequency >> 16); + header[0x12] = (uint8_t)(args->audio_frequency >> 8); + header[0x13] = (uint8_t)args->audio_frequency; // Number of channels (little-endian) - header[0x1e] = (uint8_t)settings->channels; + header[0x1e] = (uint8_t)args->audio_channels; header[0x1f] = 0x00; // Filename - //strncpy(header + 0x20, "psxavenc", 16); - memset(header + 0x20, 0, 16); + int name_offset = strlen(args->output_file); + while ( + name_offset > 0 && + args->output_file[name_offset - 1] != '/' && + args->output_file[name_offset - 1] != '\\' + ) + name_offset--; + + strncpy((char*)(header + 0x20), &args->output_file[name_offset], 16); } -void encode_file_spu(settings_t *settings, FILE *output) { - psx_audio_encoder_channel_state_t audio_state; +void encode_file_xa(args_t *args, decoder_t *decoder, FILE *output) { + psx_audio_xa_settings_t xa_settings = args_to_libpsxav_xa_audio(args); + + int audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings); + + psx_audio_encoder_state_t audio_state; + memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t)); + + for (int j = 0; ensure_av_data(decoder, audio_samples_per_sector * args->audio_channels, 0); j++) { + int samples_length = decoder->audio_sample_count / args->audio_channels; + + if (samples_length > audio_samples_per_sector) + samples_length = audio_samples_per_sector; + + uint8_t buffer[PSX_CDROM_SECTOR_SIZE]; + int length = psx_audio_xa_encode( + xa_settings, + &audio_state, + decoder->audio_samples, + samples_length, + buffer + ); + + if (decoder->end_of_input) + psx_audio_xa_encode_finalize(xa_settings, buffer, length); + + if (args->format == FORMAT_XACD) { + int t = j + 75*2; + + // Put the time in + buffer[0x00C] = ((t/75/60)%10)|(((t/75/60)/10)<<4); + buffer[0x00D] = (((t/75)%60)%10)|((((t/75)%60)/10)<<4); + buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4); + } + + retire_av_data(decoder, samples_length * args->audio_channels, 0); + fwrite(buffer, length, 1, output); + + time_t t = get_elapsed_time(); + + if (!(args->flags & FLAG_HIDE_PROGRESS) && t) { + fprintf( + stderr, + "\rLBA: %6d | Encoding speed: %5.2fx", + j, + (double)(j * audio_samples_per_sector) / (double)(args->audio_frequency * t) + ); + } + } +} + +void encode_file_spu(args_t *args, decoder_t *decoder, FILE *output) { + psx_audio_encoder_channel_state_t audio_state; + memset(&audio_state, 0, sizeof(psx_audio_encoder_channel_state_t)); + int audio_samples_per_block = psx_audio_spu_get_samples_per_block(); int block_size = psx_audio_spu_get_buffer_size_per_block(); - uint8_t buffer[16]; int block_count; - memset(&audio_state, 0, sizeof(psx_audio_encoder_channel_state_t)); - // The header must be written after the data as we don't yet know the // number of audio samples. - if (settings->format == FORMAT_VAG) { - fseek(output, 48, SEEK_SET); - } + if (args->format == FORMAT_VAG) + fseek(output, VAG_HEADER_SIZE, SEEK_SET); - for (block_count = 0; ensure_av_data(settings, audio_samples_per_block, 0); block_count++) { - int samples_length = settings->audio_sample_count; - if (samples_length > audio_samples_per_block) samples_length = audio_samples_per_block; + for (block_count = 0; ensure_av_data(decoder, audio_samples_per_block, 0); block_count++) { + int samples_length = decoder->audio_sample_count; - int length = psx_audio_spu_encode(&audio_state, settings->audio_samples, samples_length, 1, buffer); - if (!block_count) { - // This flag is not required as the SPU already resets the loop - // address when starting playback of a sample. - //buffer[1] |= PSX_AUDIO_SPU_LOOP_START; - } - if (settings->end_of_input) { - buffer[1] |= settings->loop ? PSX_AUDIO_SPU_LOOP_REPEAT : PSX_AUDIO_SPU_LOOP_END; + if (samples_length > audio_samples_per_block) + samples_length = audio_samples_per_block; + + uint8_t buffer[16]; + int length = psx_audio_spu_encode( + &audio_state, + decoder->audio_samples, + samples_length, + 1, + buffer + ); + + // TODO: implement proper loop flag support + if (false) + buffer[1] |= PSX_AUDIO_SPU_LOOP_START; + if (decoder->end_of_input) { + if (args->flags & FLAG_SPU_LOOP_END) + buffer[1] |= PSX_AUDIO_SPU_LOOP_REPEAT; + else + buffer[1] |= PSX_AUDIO_SPU_LOOP_END; } - retire_av_data(settings, samples_length, 0); + retire_av_data(decoder, samples_length, 0); fwrite(buffer, length, 1, output); - time_t t = get_elapsed_time(settings); - if (t) { - fprintf(stderr, "\rBlock: %6d | Encoding speed: %5.2fx", + time_t t = get_elapsed_time(); + + if (!(args->flags & FLAG_HIDE_PROGRESS) && t) { + fprintf( + stderr, + "\rBlock: %6d | Encoding speed: %5.2fx", block_count, - (double)(block_count*audio_samples_per_block) / (double)(settings->frequency*t) + (double)(block_count * audio_samples_per_block) / (double)(args->audio_frequency * t) ); } } - int padding_size = (block_count*block_size) % settings->alignment; - if (padding_size) { - padding_size = settings->alignment - padding_size; - uint8_t *padding = malloc(padding_size); - memset(padding, 0, padding_size); - fwrite(padding, padding_size, 1, output); - free(padding); - } + int overflow = (block_count * block_size) % args->alignment; + + if (overflow) { + for (int i = 0; i < (args->alignment - overflow); i++) + fputc(0, output); + } + if (args->format == FORMAT_VAG) { + uint8_t header[VAG_HEADER_SIZE]; + write_vag_header(args, block_count * block_size, header); - if (settings->format == FORMAT_VAG) { - uint8_t header[48]; - memset(header, 0, 48); - write_vag_header(block_count*block_size, header, settings); fseek(output, 0, SEEK_SET); - fwrite(header, 48, 1, output); + fwrite(header, VAG_HEADER_SIZE, 1, output); } } -void encode_file_spu_interleaved(settings_t *settings, FILE *output) { - int audio_state_size = sizeof(psx_audio_encoder_channel_state_t) * settings->channels; +void encode_file_spui(args_t *args, decoder_t *decoder, FILE *output) { + int audio_state_size = sizeof(psx_audio_encoder_channel_state_t) * args->audio_channels; // NOTE: since the interleaved .vag format is not standardized, some tools // (such as vgmstream) will not properly play files with interleave < 2048, // alignment != 2048 or channels != 2. - int buffer_size = settings->interleave + settings->alignment - 1; - buffer_size -= buffer_size % settings->alignment; - int header_size = 48 + settings->alignment - 1; - header_size -= header_size % settings->alignment; + int buffer_size = args->audio_interleave + args->alignment - 1; + buffer_size -= buffer_size % args->alignment; + + int header_size = VAG_HEADER_SIZE + args->alignment - 1; + header_size -= header_size % args->alignment; + + int audio_samples_per_block = psx_audio_spu_get_samples_per_block(); + int block_size = psx_audio_spu_get_buffer_size_per_block(); + int audio_samples_per_chunk = args->audio_interleave / block_size * audio_samples_per_block; + int chunk_count; + + if (args->format == FORMAT_VAGI) + fseek(output, header_size, SEEK_SET); psx_audio_encoder_channel_state_t *audio_state = malloc(audio_state_size); uint8_t *buffer = malloc(buffer_size); - int audio_samples_per_block = psx_audio_spu_get_samples_per_block(); - int block_size = psx_audio_spu_get_buffer_size_per_block(); - int audio_samples_per_chunk = settings->interleave / block_size * audio_samples_per_block; - int chunk_count; - memset(audio_state, 0, audio_state_size); - if (settings->format == FORMAT_VAGI) { - fseek(output, header_size, SEEK_SET); - } - - for (chunk_count = 0; ensure_av_data(settings, audio_samples_per_chunk*settings->channels, 0); chunk_count++) { - int samples_length = settings->audio_sample_count / settings->channels; + for (chunk_count = 0; ensure_av_data(decoder, audio_samples_per_chunk * args->audio_channels, 0); chunk_count++) { + int samples_length = decoder->audio_sample_count / args->audio_channels; if (samples_length > audio_samples_per_chunk) samples_length = audio_samples_per_chunk; - for (int ch = 0; ch < settings->channels; ch++) { + for (int ch = 0; ch < args->audio_channels; ch++) { memset(buffer, 0, buffer_size); - int length = psx_audio_spu_encode(audio_state + ch, settings->audio_samples + ch, samples_length, settings->channels, buffer); + int length = psx_audio_spu_encode( + audio_state + ch, + decoder->audio_samples + ch, + samples_length, + args->audio_channels, + buffer + ); + if (length) { - //buffer[1] |= PSX_AUDIO_SPU_LOOP_START; - if (settings->loop) { + // TODO: implement proper loop flag support + if (args->flags & FLAG_SPU_LOOP_END) buffer[length - block_size + 1] |= PSX_AUDIO_SPU_LOOP_REPEAT; - } - if (settings->end_of_input) { + else if (decoder->end_of_input) buffer[length - block_size + 1] |= PSX_AUDIO_SPU_LOOP_END; - } } fwrite(buffer, buffer_size, 1, output); - time_t t = get_elapsed_time(settings); - if (t) { - fprintf(stderr, "\rChunk: %6d | Encoding speed: %5.2fx", + time_t t = get_elapsed_time(); + + if (!(args->flags & FLAG_HIDE_PROGRESS) && t) { + fprintf( + stderr, + "\rChunk: %6d | Encoding speed: %5.2fx", chunk_count, - (double)(chunk_count*audio_samples_per_chunk) / (double)(settings->frequency*t) + (double)(chunk_count * audio_samples_per_chunk) / (double)(args->audio_frequency * t) ); } } - retire_av_data(settings, samples_length*settings->channels, 0); + retire_av_data(decoder, samples_length * args->audio_channels, 0); } - if (settings->format == FORMAT_VAGI) { + if (args->format == FORMAT_VAGI) { uint8_t *header = malloc(header_size); memset(header, 0, header_size); - write_vag_header(chunk_count*settings->interleave, header, settings); + write_vag_header(args, chunk_count * args->audio_interleave, header); + fseek(output, 0, SEEK_SET); fwrite(header, header_size, 1, output); free(header); @@ -225,52 +332,14 @@ void encode_file_spu_interleaved(settings_t *settings, FILE *output) { free(buffer); } -void encode_file_xa(settings_t *settings, FILE *output) { - psx_audio_xa_settings_t xa_settings = settings_to_libpsxav_xa_audio(settings); - psx_audio_encoder_state_t audio_state; - int audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings); - uint8_t buffer[PSX_CDROM_SECTOR_SIZE]; - - memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t)); - - for (int j = 0; ensure_av_data(settings, audio_samples_per_sector*settings->channels, 0); j++) { - int samples_length = settings->audio_sample_count / settings->channels; - if (samples_length > audio_samples_per_sector) samples_length = audio_samples_per_sector; - int length = psx_audio_xa_encode(xa_settings, &audio_state, settings->audio_samples, samples_length, buffer); - if (settings->end_of_input) { - psx_audio_xa_encode_finalize(xa_settings, buffer, length); - } - - if (settings->format == FORMAT_XACD) { - int t = j + 75*2; - - // Put the time in - buffer[0x00C] = ((t/75/60)%10)|(((t/75/60)/10)<<4); - buffer[0x00D] = (((t/75)%60)%10)|((((t/75)%60)/10)<<4); - buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4); - } - - retire_av_data(settings, samples_length*settings->channels, 0); - fwrite(buffer, length, 1, output); - - time_t t = get_elapsed_time(settings); - if (t) { - fprintf(stderr, "\rLBA: %6d | Encoding speed: %5.2fx", - j, - (double)(j*audio_samples_per_sector) / (double)(settings->frequency*t) - ); - } - } -} - -void encode_file_str(settings_t *settings, FILE *output) { - psx_audio_xa_settings_t xa_settings = settings_to_libpsxav_xa_audio(settings); - psx_audio_encoder_state_t audio_state; +void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) { + psx_audio_xa_settings_t xa_settings = args_to_libpsxav_xa_audio(args); int audio_samples_per_sector; uint8_t buffer[PSX_CDROM_SECTOR_SIZE]; int offset, sector_size; - if (settings->format == FORMAT_STR2V) { + + if (args->format == FORMAT_STRV) { sector_size = 2048; offset = 0x18; } else { @@ -280,16 +349,21 @@ void encode_file_str(settings_t *settings, FILE *output) { int interleave; int video_sectors_per_block; - if (settings->decoder_state_av.audio_stream) { + if (decoder->state.audio_stream) { // 1/N audio, (N-1)/N video audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings); - interleave = psx_audio_xa_get_sector_interleave(xa_settings) * settings->cd_speed; + interleave = psx_audio_xa_get_sector_interleave(xa_settings) * args->str_cd_speed; video_sectors_per_block = interleave - 1; - if (!settings->quiet) { - fprintf(stderr, "Interleave: %d/%d audio, %d/%d video\n", - interleave - video_sectors_per_block, interleave, video_sectors_per_block, interleave); - } + if (!(args->flags & FLAG_QUIET)) + fprintf( + stderr, + "Interleave: %d/%d audio, %d/%d video\n", + interleave - video_sectors_per_block, + interleave, + video_sectors_per_block, + interleave + ); } else { // 0/1 audio, 1/1 video audio_samples_per_sector = 0; @@ -297,54 +371,65 @@ void encode_file_str(settings_t *settings, FILE *output) { video_sectors_per_block = 1; } + psx_audio_encoder_state_t audio_state; memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t)); - // e.g. 15fps = (150*7/8/15) = 8.75 blocks per frame - settings->state_vid.frame_block_base_overflow = (75*settings->cd_speed) * video_sectors_per_block * settings->video_fps_den; - settings->state_vid.frame_block_overflow_den = interleave * settings->video_fps_num; - double frame_size = (double)settings->state_vid.frame_block_base_overflow / (double)settings->state_vid.frame_block_overflow_den; - if (!settings->quiet) { - fprintf(stderr, "Frame size: %.2f sectors\n", frame_size); - } + mdec_encoder_t encoder; + init_mdec_encoder(&encoder, args->video_width, args->video_height); - init_encoder_state(settings); - settings->state_vid.frame_output = malloc(2016 * (int)ceil(frame_size)); - settings->state_vid.frame_index = 0; - settings->state_vid.frame_data_offset = 0; - settings->state_vid.frame_max_size = 0; - settings->state_vid.frame_block_overflow_num = 0; - settings->state_vid.quant_scale_sum = 0; + // e.g. 15fps = (150*7/8/15) = 8.75 blocks per frame + encoder.state.frame_block_base_overflow = (75 * args->str_cd_speed) * video_sectors_per_block * args->str_fps_den; + encoder.state.frame_block_overflow_den = interleave * args->str_fps_num; + double frame_size = (double)encoder.state.frame_block_base_overflow / (double)encoder.state.frame_block_overflow_den; + + if (!(args->flags & FLAG_QUIET)) + fprintf(stderr, "Frame size: %.2f sectors\n", frame_size); + + encoder.state.frame_output = malloc(2016 * (int)ceil(frame_size)); + encoder.state.frame_index = 0; + encoder.state.frame_data_offset = 0; + encoder.state.frame_max_size = 0; + encoder.state.frame_block_overflow_num = 0; + encoder.state.quant_scale_sum = 0; // FIXME: this needs an extra frame to prevent A/V desync int frames_needed = (int) ceil((double)video_sectors_per_block / frame_size); if (frames_needed < 2) frames_needed = 2; - for (int j = 0; !settings->end_of_input || settings->state_vid.frame_data_offset < settings->state_vid.frame_max_size; j++) { - ensure_av_data(settings, audio_samples_per_sector*settings->channels, frames_needed); + for (int j = 0; !decoder->end_of_input || encoder.state.frame_data_offset < encoder.state.frame_max_size; j++) { + ensure_av_data(decoder, audio_samples_per_sector * args->audio_channels, frames_needed); if ((j%interleave) < video_sectors_per_block) { // Video sector - init_sector_buffer_video((psx_cdrom_sector_mode2_t*) buffer, settings); - encode_sector_str(settings->video_frames, buffer, settings); + init_sector_buffer_video(args, (psx_cdrom_sector_mode2_t*) buffer, j); + + int frames_used = encode_sector_str(&encoder, decoder->video_frames, buffer); + retire_av_data(decoder, 0, frames_used); } else { // Audio sector - int samples_length = settings->audio_sample_count / settings->channels; + int samples_length = decoder->audio_sample_count / args->audio_channels; if (samples_length > audio_samples_per_sector) samples_length = audio_samples_per_sector; // FIXME: this is an extremely hacky way to handle audio tracks // shorter than the video track - if (!samples_length) { + if (!samples_length) video_sectors_per_block++; - } - int length = psx_audio_xa_encode(xa_settings, &audio_state, settings->audio_samples, samples_length, buffer); - if (settings->end_of_input) { + int length = psx_audio_xa_encode( + xa_settings, + &audio_state, + decoder->audio_samples, + samples_length, + buffer + ); + + if (decoder->end_of_input) psx_audio_xa_encode_finalize(xa_settings, buffer, length); - } - retire_av_data(settings, samples_length*settings->channels, 0); + + retire_av_data(decoder, samples_length * args->audio_channels, 0); } - if (settings->format == FORMAT_STR2CD) { + if (args->format == FORMAT_STRCD) { int t = j + 75*2; // Put the time in @@ -353,48 +438,57 @@ void encode_file_str(settings_t *settings, FILE *output) { buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4); } - if((j%interleave) < video_sectors_per_block) { - calculate_edc_data(buffer); - } + if((j%interleave) < video_sectors_per_block) + psx_cdrom_calculate_checksums((psx_cdrom_sector_t *)buffer, PSX_CDROM_SECTOR_TYPE_MODE2_FORM1); fwrite(buffer + offset, sector_size, 1, output); - time_t t = get_elapsed_time(settings); - if (t) { - fprintf(stderr, "\rFrame: %4d | LBA: %6d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx", - settings->state_vid.frame_index, + time_t t = get_elapsed_time(); + + if (!(args->flags & FLAG_HIDE_PROGRESS) && t) { + fprintf( + stderr, + "\rFrame: %4d | LBA: %6d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx", + encoder.state.frame_index, j, - (double)settings->state_vid.quant_scale_sum / (double)settings->state_vid.frame_index, - (double)(settings->state_vid.frame_index*settings->video_fps_den) / (double)(t*settings->video_fps_num) + (double)encoder.state.quant_scale_sum / (double)encoder.state.frame_index, + (double)(encoder.state.frame_index * args->str_fps_den) / (double)(t * args->str_fps_num) ); } } - free(settings->state_vid.frame_output); - destroy_encoder_state(settings); + free(encoder.state.frame_output); + destroy_mdec_encoder(&encoder); } -void encode_file_sbs(settings_t *settings, FILE *output) { - init_encoder_state(settings); - settings->state_vid.frame_output = malloc(settings->alignment); - settings->state_vid.frame_data_offset = 0; - settings->state_vid.frame_max_size = settings->alignment; - settings->state_vid.quant_scale_sum = 0; +void encode_file_sbs(args_t *args, decoder_t *decoder, FILE *output) { + mdec_encoder_t encoder; + init_mdec_encoder(&encoder, args->video_width, args->video_height); - for (int j = 0; ensure_av_data(settings, 0, 1); j++) { - encode_frame_bs(settings->video_frames, settings); - fwrite(settings->state_vid.frame_output, settings->alignment, 1, output); + encoder.state.frame_output = malloc(args->alignment); + encoder.state.frame_data_offset = 0; + encoder.state.frame_max_size = args->alignment; + encoder.state.quant_scale_sum = 0; - time_t t = get_elapsed_time(settings); - if (t) { - fprintf(stderr, "\rFrame: %4d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx", + for (int j = 0; ensure_av_data(decoder, 0, 1); j++) { + encode_frame_bs(&encoder, decoder->video_frames); + + retire_av_data(decoder, 0, 1); + fwrite(encoder.state.frame_output, args->alignment, 1, output); + + time_t t = get_elapsed_time(); + + if (!(args->flags & FLAG_HIDE_PROGRESS) && t) { + fprintf( + stderr, + "\rFrame: %4d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx", j, - (double)settings->state_vid.quant_scale_sum / (double)j, - (double)(j*settings->video_fps_den) / (double)(t*settings->video_fps_num) + (double)encoder.state.quant_scale_sum / (double)j, + (double)(j * args->str_fps_den) / (double)(t * args->str_fps_num) ); } } - free(settings->state_vid.frame_output); - destroy_encoder_state(settings); + free(encoder.state.frame_output); + destroy_mdec_encoder(&encoder); } diff --git a/psxavenc/filefmt.h b/psxavenc/filefmt.h new file mode 100644 index 0000000..5f8eb38 --- /dev/null +++ b/psxavenc/filefmt.h @@ -0,0 +1,35 @@ +/* +psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend + +Copyright (c) 2019, 2020 Adrian "asie" Siekierka +Copyright (c) 2019 Ben "GreaseMonkey" Russell +Copyright (c) 2023, 2025 spicyjpeg + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#pragma once + +#include +#include "args.h" +#include "decoding.h" + +void encode_file_xa(const args_t *args, decoder_t *decoder, FILE *output); +void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output); +void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output); +void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output); +void encode_file_sbs(const args_t *args, decoder_t *decoder, FILE *output); diff --git a/psxavenc/main.c b/psxavenc/main.c new file mode 100644 index 0000000..78c0935 --- /dev/null +++ b/psxavenc/main.c @@ -0,0 +1,174 @@ +/* +psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend + +Copyright (c) 2019, 2020 Adrian "asie" Siekierka +Copyright (c) 2019 Ben "GreaseMonkey" Russell +Copyright (c) 2023 spicyjpeg + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include +#include +#include "args.h" +#include "decoding.h" +#include "filefmt.h" + +static const char *const bs_codec_names[NUM_BS_CODECS] = { + "BS v2", + "BS v3", + "BS v3 (with DC wrapping)" +}; + +static const uint8_t decoder_flags[NUM_FORMATS] = { + DECODER_USE_AUDIO | DECODER_AUDIO_REQUIRED, // xa + DECODER_USE_AUDIO | DECODER_AUDIO_REQUIRED, // xacd + DECODER_USE_AUDIO | DECODER_AUDIO_REQUIRED, // spu + DECODER_USE_AUDIO | DECODER_AUDIO_REQUIRED, // vag + DECODER_USE_AUDIO | DECODER_AUDIO_REQUIRED, // spui + DECODER_USE_AUDIO | DECODER_AUDIO_REQUIRED, // vagi + DECODER_USE_AUDIO | DECODER_USE_VIDEO | DECODER_VIDEO_REQUIRED, // str + DECODER_USE_AUDIO | DECODER_USE_VIDEO | DECODER_VIDEO_REQUIRED, // strcd + DECODER_USE_AUDIO | DECODER_USE_VIDEO | DECODER_VIDEO_REQUIRED, // strspu + DECODER_USE_VIDEO | DECODER_VIDEO_REQUIRED, // strv + DECODER_USE_VIDEO | DECODER_VIDEO_REQUIRED // sbs +}; + +int main(int argc, const char **argv) { + args_t args; + decoder_t decoder; + FILE *output; + + args.flags = 0; + + args.format = FORMAT_INVALID; + args.input_file = NULL; + args.output_file = NULL; + args.swresample_options = NULL; + args.swscale_options = NULL; + + if (!parse_args(&args, argv + 1, argc - 1)) + return 1; + if (!open_av_data(&decoder, &args, decoder_flags[args.format])) { + fprintf(stderr, "Failed to open input file: %s\n", args.input_file); + return 1; + } + + output = fopen(args.output_file, "wb"); + + if (output == NULL) { + fprintf(stderr, "Failed to open output file: %s\n", args.output_file); + return 1; + } + + switch (args.format) { + case FORMAT_XA: + case FORMAT_XACD: + if (!(args.flags & FLAG_QUIET)) + fprintf( + stderr, + "Audio format: XA-ADPCM, %d Hz %d-bit %s, F=%d C=%d\n", + args.audio_frequency, + args.audio_bit_depth, + (args.audio_channels == 2) ? "stereo" : "mono", + args.audio_xa_file, + args.audio_xa_channel + ); + + encode_file_xa(&args, &decoder, output); + break; + + case FORMAT_SPU: + case FORMAT_VAG: + if (!(args.flags & FLAG_QUIET)) + fprintf( + stderr, + "Audio format: SPU-ADPCM, %d Hz mono\n", + args.audio_frequency + ); + + encode_file_spu(&args, &decoder, output); + break; + + case FORMAT_SPUI: + case FORMAT_VAGI: + if (!(args.flags & FLAG_QUIET)) + fprintf( + stderr, + "Audio format: SPU-ADPCM, %d Hz %d channels, interleave=%d\n", + args.audio_frequency, + args.audio_channels, + args.audio_interleave + ); + + encode_file_spui(&args, &decoder, output); + break; + + case FORMAT_STR: + case FORMAT_STRCD: + case FORMAT_STRSPU: + case FORMAT_STRV: + if (!(args.flags & FLAG_QUIET)) { + if (decoder.state.audio_stream) + fprintf( + stderr, + "Audio format: XA-ADPCM, %d Hz %d-bit %s, F=%d C=%d\n", + args.audio_frequency, + args.audio_bit_depth, + (args.audio_channels == 2) ? "stereo" : "mono", + args.audio_xa_file, + args.audio_xa_channel + ); + + fprintf( + stderr, + "Video format: %s, %dx%d, %.2f fps\n", + bs_codec_names[args.video_codec], + args.video_width, + args.video_height, + (double)args.str_fps_num / (double)args.str_fps_den + ); + } + + encode_file_str(&args, &decoder, output); + break; + + case FORMAT_SBS: + if (!(args.flags & FLAG_QUIET)) + fprintf( + stderr, + "Video format: %s, %dx%d, %.2f fps\n", + bs_codec_names[args.video_codec], + args.video_width, + args.video_height, + (double)args.str_fps_num / (double)args.str_fps_den + ); + + encode_file_sbs(&args, &decoder, output); + break; + + default: + ; + } + + if (!(args.flags & FLAG_HIDE_PROGRESS)) + fprintf(stderr, "\nDone.\n"); + + fclose(output); + close_av_data(&decoder); + return 0; +} diff --git a/psxavenc/mdec.c b/psxavenc/mdec.c index 6abffea..095bb33 100644 --- a/psxavenc/mdec.c +++ b/psxavenc/mdec.c @@ -22,131 +22,139 @@ freely, subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ -#include "common.h" +#include +#include +#include +#include +#include +#include +#include "mdec.h" -#define MAKE_HUFFMAN_PAIR(zeroes, value) (((zeroes)<<10)|((+(value))&0x3FF)),(((zeroes)<<10)|((-(value))&0x3FF)) -const struct { +#define AC_PAIR(zeroes, value) \ + (((zeroes) << 10) | ((+(value)) & 0x3FF)), \ + (((zeroes) << 10) | ((-(value)) & 0x3FF)) + +static const struct { int c_bits; uint32_t c_value; uint16_t u_hword_pos; uint16_t u_hword_neg; -} huffman_lookup[] = { +} ac_huffman_tree[] = { // Fuck this Huffman tree in particular --GM - {2,0x3,MAKE_HUFFMAN_PAIR(0,1)}, - {3,0x3,MAKE_HUFFMAN_PAIR(1,1)}, - {4,0x4,MAKE_HUFFMAN_PAIR(0,2)}, - {4,0x5,MAKE_HUFFMAN_PAIR(2,1)}, - {5,0x05,MAKE_HUFFMAN_PAIR(0,3)}, - {5,0x06,MAKE_HUFFMAN_PAIR(4,1)}, - {5,0x07,MAKE_HUFFMAN_PAIR(3,1)}, - {6,0x04,MAKE_HUFFMAN_PAIR(7,1)}, - {6,0x05,MAKE_HUFFMAN_PAIR(6,1)}, - {6,0x06,MAKE_HUFFMAN_PAIR(1,2)}, - {6,0x07,MAKE_HUFFMAN_PAIR(5,1)}, - {7,0x04,MAKE_HUFFMAN_PAIR(2,2)}, - {7,0x05,MAKE_HUFFMAN_PAIR(9,1)}, - {7,0x06,MAKE_HUFFMAN_PAIR(0,4)}, - {7,0x07,MAKE_HUFFMAN_PAIR(8,1)}, - {8,0x20,MAKE_HUFFMAN_PAIR(13,1)}, - {8,0x21,MAKE_HUFFMAN_PAIR(0,6)}, - {8,0x22,MAKE_HUFFMAN_PAIR(12,1)}, - {8,0x23,MAKE_HUFFMAN_PAIR(11,1)}, - {8,0x24,MAKE_HUFFMAN_PAIR(3,2)}, - {8,0x25,MAKE_HUFFMAN_PAIR(1,3)}, - {8,0x26,MAKE_HUFFMAN_PAIR(0,5)}, - {8,0x27,MAKE_HUFFMAN_PAIR(10,1)}, - {10,0x008,MAKE_HUFFMAN_PAIR(16,1)}, - {10,0x009,MAKE_HUFFMAN_PAIR(5,2)}, - {10,0x00A,MAKE_HUFFMAN_PAIR(0,7)}, - {10,0x00B,MAKE_HUFFMAN_PAIR(2,3)}, - {10,0x00C,MAKE_HUFFMAN_PAIR(1,4)}, - {10,0x00D,MAKE_HUFFMAN_PAIR(15,1)}, - {10,0x00E,MAKE_HUFFMAN_PAIR(14,1)}, - {10,0x00F,MAKE_HUFFMAN_PAIR(4,2)}, - {12,0x010,MAKE_HUFFMAN_PAIR(0,11)}, - {12,0x011,MAKE_HUFFMAN_PAIR(8,2)}, - {12,0x012,MAKE_HUFFMAN_PAIR(4,3)}, - {12,0x013,MAKE_HUFFMAN_PAIR(0,10)}, - {12,0x014,MAKE_HUFFMAN_PAIR(2,4)}, - {12,0x015,MAKE_HUFFMAN_PAIR(7,2)}, - {12,0x016,MAKE_HUFFMAN_PAIR(21,1)}, - {12,0x017,MAKE_HUFFMAN_PAIR(20,1)}, - {12,0x018,MAKE_HUFFMAN_PAIR(0,9)}, - {12,0x019,MAKE_HUFFMAN_PAIR(19,1)}, - {12,0x01A,MAKE_HUFFMAN_PAIR(18,1)}, - {12,0x01B,MAKE_HUFFMAN_PAIR(1,5)}, - {12,0x01C,MAKE_HUFFMAN_PAIR(3,3)}, - {12,0x01D,MAKE_HUFFMAN_PAIR(0,8)}, - {12,0x01E,MAKE_HUFFMAN_PAIR(6,2)}, - {12,0x01F,MAKE_HUFFMAN_PAIR(17,1)}, - {13,0x0010,MAKE_HUFFMAN_PAIR(10,2)}, - {13,0x0011,MAKE_HUFFMAN_PAIR(9,2)}, - {13,0x0012,MAKE_HUFFMAN_PAIR(5,3)}, - {13,0x0013,MAKE_HUFFMAN_PAIR(3,4)}, - {13,0x0014,MAKE_HUFFMAN_PAIR(2,5)}, - {13,0x0015,MAKE_HUFFMAN_PAIR(1,7)}, - {13,0x0016,MAKE_HUFFMAN_PAIR(1,6)}, - {13,0x0017,MAKE_HUFFMAN_PAIR(0,15)}, - {13,0x0018,MAKE_HUFFMAN_PAIR(0,14)}, - {13,0x0019,MAKE_HUFFMAN_PAIR(0,13)}, - {13,0x001A,MAKE_HUFFMAN_PAIR(0,12)}, - {13,0x001B,MAKE_HUFFMAN_PAIR(26,1)}, - {13,0x001C,MAKE_HUFFMAN_PAIR(25,1)}, - {13,0x001D,MAKE_HUFFMAN_PAIR(24,1)}, - {13,0x001E,MAKE_HUFFMAN_PAIR(23,1)}, - {13,0x001F,MAKE_HUFFMAN_PAIR(22,1)}, - {14,0x0010,MAKE_HUFFMAN_PAIR(0,31)}, - {14,0x0011,MAKE_HUFFMAN_PAIR(0,30)}, - {14,0x0012,MAKE_HUFFMAN_PAIR(0,29)}, - {14,0x0013,MAKE_HUFFMAN_PAIR(0,28)}, - {14,0x0014,MAKE_HUFFMAN_PAIR(0,27)}, - {14,0x0015,MAKE_HUFFMAN_PAIR(0,26)}, - {14,0x0016,MAKE_HUFFMAN_PAIR(0,25)}, - {14,0x0017,MAKE_HUFFMAN_PAIR(0,24)}, - {14,0x0018,MAKE_HUFFMAN_PAIR(0,23)}, - {14,0x0019,MAKE_HUFFMAN_PAIR(0,22)}, - {14,0x001A,MAKE_HUFFMAN_PAIR(0,21)}, - {14,0x001B,MAKE_HUFFMAN_PAIR(0,20)}, - {14,0x001C,MAKE_HUFFMAN_PAIR(0,19)}, - {14,0x001D,MAKE_HUFFMAN_PAIR(0,18)}, - {14,0x001E,MAKE_HUFFMAN_PAIR(0,17)}, - {14,0x001F,MAKE_HUFFMAN_PAIR(0,16)}, - {15,0x0010,MAKE_HUFFMAN_PAIR(0,40)}, - {15,0x0011,MAKE_HUFFMAN_PAIR(0,39)}, - {15,0x0012,MAKE_HUFFMAN_PAIR(0,38)}, - {15,0x0013,MAKE_HUFFMAN_PAIR(0,37)}, - {15,0x0014,MAKE_HUFFMAN_PAIR(0,36)}, - {15,0x0015,MAKE_HUFFMAN_PAIR(0,35)}, - {15,0x0016,MAKE_HUFFMAN_PAIR(0,34)}, - {15,0x0017,MAKE_HUFFMAN_PAIR(0,33)}, - {15,0x0018,MAKE_HUFFMAN_PAIR(0,32)}, - {15,0x0019,MAKE_HUFFMAN_PAIR(1,14)}, - {15,0x001A,MAKE_HUFFMAN_PAIR(1,13)}, - {15,0x001B,MAKE_HUFFMAN_PAIR(1,12)}, - {15,0x001C,MAKE_HUFFMAN_PAIR(1,11)}, - {15,0x001D,MAKE_HUFFMAN_PAIR(1,10)}, - {15,0x001E,MAKE_HUFFMAN_PAIR(1,9)}, - {15,0x001F,MAKE_HUFFMAN_PAIR(1,8)}, - {16,0x0010,MAKE_HUFFMAN_PAIR(1,18)}, - {16,0x0011,MAKE_HUFFMAN_PAIR(1,17)}, - {16,0x0012,MAKE_HUFFMAN_PAIR(1,16)}, - {16,0x0013,MAKE_HUFFMAN_PAIR(1,15)}, - {16,0x0014,MAKE_HUFFMAN_PAIR(6,3)}, - {16,0x0015,MAKE_HUFFMAN_PAIR(16,2)}, - {16,0x0016,MAKE_HUFFMAN_PAIR(15,2)}, - {16,0x0017,MAKE_HUFFMAN_PAIR(14,2)}, - {16,0x0018,MAKE_HUFFMAN_PAIR(13,2)}, - {16,0x0019,MAKE_HUFFMAN_PAIR(12,2)}, - {16,0x001A,MAKE_HUFFMAN_PAIR(11,2)}, - {16,0x001B,MAKE_HUFFMAN_PAIR(31,1)}, - {16,0x001C,MAKE_HUFFMAN_PAIR(30,1)}, - {16,0x001D,MAKE_HUFFMAN_PAIR(29,1)}, - {16,0x001E,MAKE_HUFFMAN_PAIR(28,1)}, - {16,0x001F,MAKE_HUFFMAN_PAIR(27,1)}, + { 2, 0x3, AC_PAIR( 0, 1)}, + { 3, 0x3, AC_PAIR( 1, 1)}, + { 4, 0x4, AC_PAIR( 0, 2)}, + { 4, 0x5, AC_PAIR( 2, 1)}, + { 5, 0x05, AC_PAIR( 0, 3)}, + { 5, 0x06, AC_PAIR( 4, 1)}, + { 5, 0x07, AC_PAIR( 3, 1)}, + { 6, 0x04, AC_PAIR( 7, 1)}, + { 6, 0x05, AC_PAIR( 6, 1)}, + { 6, 0x06, AC_PAIR( 1, 2)}, + { 6, 0x07, AC_PAIR( 5, 1)}, + { 7, 0x04, AC_PAIR( 2, 2)}, + { 7, 0x05, AC_PAIR( 9, 1)}, + { 7, 0x06, AC_PAIR( 0, 4)}, + { 7, 0x07, AC_PAIR( 8, 1)}, + { 8, 0x20, AC_PAIR(13, 1)}, + { 8, 0x21, AC_PAIR( 0, 6)}, + { 8, 0x22, AC_PAIR(12, 1)}, + { 8, 0x23, AC_PAIR(11, 1)}, + { 8, 0x24, AC_PAIR( 3, 2)}, + { 8, 0x25, AC_PAIR( 1, 3)}, + { 8, 0x26, AC_PAIR( 0, 5)}, + { 8, 0x27, AC_PAIR(10, 1)}, + {10, 0x008, AC_PAIR(16, 1)}, + {10, 0x009, AC_PAIR( 5, 2)}, + {10, 0x00A, AC_PAIR( 0, 7)}, + {10, 0x00B, AC_PAIR( 2, 3)}, + {10, 0x00C, AC_PAIR( 1, 4)}, + {10, 0x00D, AC_PAIR(15, 1)}, + {10, 0x00E, AC_PAIR(14, 1)}, + {10, 0x00F, AC_PAIR( 4, 2)}, + {12, 0x010, AC_PAIR( 0, 11)}, + {12, 0x011, AC_PAIR( 8, 2)}, + {12, 0x012, AC_PAIR( 4, 3)}, + {12, 0x013, AC_PAIR( 0, 10)}, + {12, 0x014, AC_PAIR( 2, 4)}, + {12, 0x015, AC_PAIR( 7, 2)}, + {12, 0x016, AC_PAIR(21, 1)}, + {12, 0x017, AC_PAIR(20, 1)}, + {12, 0x018, AC_PAIR( 0, 9)}, + {12, 0x019, AC_PAIR(19, 1)}, + {12, 0x01A, AC_PAIR(18, 1)}, + {12, 0x01B, AC_PAIR( 1, 5)}, + {12, 0x01C, AC_PAIR( 3, 3)}, + {12, 0x01D, AC_PAIR( 0, 8)}, + {12, 0x01E, AC_PAIR( 6, 2)}, + {12, 0x01F, AC_PAIR(17, 1)}, + {13, 0x0010, AC_PAIR(10, 2)}, + {13, 0x0011, AC_PAIR( 9, 2)}, + {13, 0x0012, AC_PAIR( 5, 3)}, + {13, 0x0013, AC_PAIR( 3, 4)}, + {13, 0x0014, AC_PAIR( 2, 5)}, + {13, 0x0015, AC_PAIR( 1, 7)}, + {13, 0x0016, AC_PAIR( 1, 6)}, + {13, 0x0017, AC_PAIR( 0, 15)}, + {13, 0x0018, AC_PAIR( 0, 14)}, + {13, 0x0019, AC_PAIR( 0, 13)}, + {13, 0x001A, AC_PAIR( 0, 12)}, + {13, 0x001B, AC_PAIR(26, 1)}, + {13, 0x001C, AC_PAIR(25, 1)}, + {13, 0x001D, AC_PAIR(24, 1)}, + {13, 0x001E, AC_PAIR(23, 1)}, + {13, 0x001F, AC_PAIR(22, 1)}, + {14, 0x0010, AC_PAIR( 0, 31)}, + {14, 0x0011, AC_PAIR( 0, 30)}, + {14, 0x0012, AC_PAIR( 0, 29)}, + {14, 0x0013, AC_PAIR( 0, 28)}, + {14, 0x0014, AC_PAIR( 0, 27)}, + {14, 0x0015, AC_PAIR( 0, 26)}, + {14, 0x0016, AC_PAIR( 0, 25)}, + {14, 0x0017, AC_PAIR( 0, 24)}, + {14, 0x0018, AC_PAIR( 0, 23)}, + {14, 0x0019, AC_PAIR( 0, 22)}, + {14, 0x001A, AC_PAIR( 0, 21)}, + {14, 0x001B, AC_PAIR( 0, 20)}, + {14, 0x001C, AC_PAIR( 0, 19)}, + {14, 0x001D, AC_PAIR( 0, 18)}, + {14, 0x001E, AC_PAIR( 0, 17)}, + {14, 0x001F, AC_PAIR( 0, 16)}, + {15, 0x0010, AC_PAIR( 0, 40)}, + {15, 0x0011, AC_PAIR( 0, 39)}, + {15, 0x0012, AC_PAIR( 0, 38)}, + {15, 0x0013, AC_PAIR( 0, 37)}, + {15, 0x0014, AC_PAIR( 0, 36)}, + {15, 0x0015, AC_PAIR( 0, 35)}, + {15, 0x0016, AC_PAIR( 0, 34)}, + {15, 0x0017, AC_PAIR( 0, 33)}, + {15, 0x0018, AC_PAIR( 0, 32)}, + {15, 0x0019, AC_PAIR( 1, 14)}, + {15, 0x001A, AC_PAIR( 1, 13)}, + {15, 0x001B, AC_PAIR( 1, 12)}, + {15, 0x001C, AC_PAIR( 1, 11)}, + {15, 0x001D, AC_PAIR( 1, 10)}, + {15, 0x001E, AC_PAIR( 1, 9)}, + {15, 0x001F, AC_PAIR( 1, 8)}, + {16, 0x0010, AC_PAIR( 1, 18)}, + {16, 0x0011, AC_PAIR( 1, 17)}, + {16, 0x0012, AC_PAIR( 1, 16)}, + {16, 0x0013, AC_PAIR( 1, 15)}, + {16, 0x0014, AC_PAIR( 6, 3)}, + {16, 0x0015, AC_PAIR(16, 2)}, + {16, 0x0016, AC_PAIR(15, 2)}, + {16, 0x0017, AC_PAIR(14, 2)}, + {16, 0x0018, AC_PAIR(13, 2)}, + {16, 0x0019, AC_PAIR(12, 2)}, + {16, 0x001A, AC_PAIR(11, 2)}, + {16, 0x001B, AC_PAIR(31, 1)}, + {16, 0x001C, AC_PAIR(30, 1)}, + {16, 0x001D, AC_PAIR(29, 1)}, + {16, 0x001E, AC_PAIR(28, 1)}, + {16, 0x001F, AC_PAIR(27, 1)} }; -#undef MAKE_HUFFMAN_PAIR -const uint8_t quant_dec[8*8] = { +static const uint8_t quant_dec[8*8] = { 2, 16, 19, 22, 26, 27, 29, 34, 16, 16, 22, 24, 27, 29, 34, 37, 19, 22, 26, 27, 29, 34, 34, 38, @@ -154,96 +162,110 @@ const uint8_t quant_dec[8*8] = { 22, 26, 27, 29, 32, 35, 40, 48, 26, 27, 29, 32, 35, 40, 48, 58, 26, 27, 29, 34, 38, 46, 56, 69, - 27, 29, 35, 38, 46, 56, 69, 83, + 27, 29, 35, 38, 46, 56, 69, 83 }; -const uint8_t dct_zigzag_table[8*8] = { - 0x00,0x01,0x05,0x06,0x0E,0x0F,0x1B,0x1C, - 0x02,0x04,0x07,0x0D,0x10,0x1A,0x1D,0x2A, - 0x03,0x08,0x0C,0x11,0x19,0x1E,0x29,0x2B, - 0x09,0x0B,0x12,0x18,0x1F,0x28,0x2C,0x35, - 0x0A,0x13,0x17,0x20,0x27,0x2D,0x34,0x36, - 0x14,0x16,0x21,0x26,0x2E,0x33,0x37,0x3C, - 0x15,0x22,0x25,0x2F,0x32,0x38,0x3B,0x3D, - 0x23,0x24,0x30,0x31,0x39,0x3A,0x3E,0x3F, +static const uint8_t dct_zigzag_table[8*8] = { + 0, 1, 5, 6, 14, 15, 27, 28, + 2, 4, 7, 13, 16, 26, 29, 42, + 3, 8, 12, 17, 25, 30, 41, 43, + 9, 11, 18, 24, 31, 40, 44, 53, + 10, 19, 23, 32, 39, 45, 52, 54, + 20, 22, 33, 38, 46, 51, 55, 60, + 21, 34, 37, 47, 50, 56, 59, 61, + 35, 36, 48, 49, 57, 58, 62, 63 }; -const uint8_t dct_zagzig_table[8*8] = { - 0x00,0x01,0x08,0x10,0x09,0x02,0x03,0x0A, - 0x11,0x18,0x20,0x19,0x12,0x0B,0x04,0x05, - 0x0C,0x13,0x1A,0x21,0x28,0x30,0x29,0x22, - 0x1B,0x14,0x0D,0x06,0x07,0x0E,0x15,0x1C, - 0x23,0x2A,0x31,0x38,0x39,0x32,0x2B,0x24, - 0x1D,0x16,0x0F,0x17,0x1E,0x25,0x2C,0x33, - 0x3A,0x3B,0x34,0x2D,0x26,0x1F,0x27,0x2E, - 0x35,0x3C,0x3D,0x36,0x2F,0x37,0x3E,0x3F, +static const uint8_t dct_zagzig_table[8*8] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63 }; -const int16_t dct_scale_table[8*8] = { - +0x5A82, +0x5A82, +0x5A82, +0x5A82, +0x5A82, +0x5A82, +0x5A82, +0x5A82, - +0x7D8A, +0x6A6D, +0x471C, +0x18F8, -0x18F9, -0x471D, -0x6A6E, -0x7D8B, - +0x7641, +0x30FB, -0x30FC, -0x7642, -0x7642, -0x30FC, +0x30FB, +0x7641, - +0x6A6D, -0x18F9, -0x7D8B, -0x471D, +0x471C, +0x7D8A, +0x18F8, -0x6A6E, - +0x5A82, -0x5A83, -0x5A83, +0x5A82, +0x5A82, -0x5A83, -0x5A83, +0x5A82, - +0x471C, -0x7D8B, +0x18F8, +0x6A6D, -0x6A6E, -0x18F9, +0x7D8A, -0x471D, - +0x30FB, -0x7642, +0x7641, -0x30FC, -0x30FC, +0x7641, -0x7642, +0x30FB, - +0x18F8, -0x471D, +0x6A6D, -0x7D8B, +0x7D8A, -0x6A6E, +0x471C, -0x18F9, -}; +#if 0 +#define SF0 0x5a82 // cos(0/16 * pi) * sqrt(2) +#define SF1 0x7d8a // cos(1/16 * pi) * 2 +#define SF2 0x7641 // cos(2/16 * pi) * 2 +#define SF3 0x6a6d // cos(3/16 * pi) * 2 +#define SF4 0x5a82 // cos(4/16 * pi) * 2 +#define SF5 0x471c // cos(5/16 * pi) * 2 +#define SF6 0x30fb // cos(6/16 * pi) * 2 +#define SF7 0x18f8 // cos(7/16 * pi) * 2 -static void init_dct_data(vid_encoder_state_t *state) -{ +static const int16_t dct_scale_table[8*8] = { + SF0, SF0, SF0, SF0, SF0, SF0, SF0, SF0, + SF1, SF3, SF5, SF7, -SF7, -SF5, -SF3, -SF1, + SF2, SF6, -SF6, -SF2, -SF2, -SF6, SF6, SF2, + SF3, -SF7, -SF1, -SF5, SF5, SF1, SF7, -SF3, + SF4, -SF4, -SF4, SF4, SF4, -SF4, -SF4, SF4, + SF5, -SF1, SF7, SF3, -SF3, -SF7, SF1, -SF5, + SF6, -SF2, SF2, -SF6, -SF6, SF2, -SF2, SF6, + SF7, -SF5, SF3, -SF1, SF1, -SF3, SF5, -SF7 +}; +#endif + +static void init_dct_data(mdec_encoder_state_t *state) { for(int i = 0; i <= 0xFFFF; i++) { // high 8 bits = bit count // low 24 bits = value - state->huffman_encoding_map[i] = ((6+16)<<24)|((0x01<<16)|(i)); + state->ac_huffman_map[i] = ((6+16) << 24) | (0x01 << 16) | i; int16_t coeff = (int16_t)i; - if (coeff < -0x200) { coeff = -0x200; } - if (coeff > +0x1FF) { coeff = +0x1FF; } - state->coeff_clamp_map[i] = coeff&0x3FF; + + if (coeff < -0x200) + coeff = -0x200; + else if (coeff > +0x1FF) + coeff = +0x1FF; + + state->coeff_clamp_map[i] = coeff & 0x3FF; } - for(int i = 0; i < sizeof(huffman_lookup)/sizeof(huffman_lookup[0]); i++) { - int bits = huffman_lookup[i].c_bits+1; - uint32_t base_value = huffman_lookup[i].c_value; - state->huffman_encoding_map[huffman_lookup[i].u_hword_pos] = (bits<<24)|(base_value<<1)|0; - state->huffman_encoding_map[huffman_lookup[i].u_hword_neg] = (bits<<24)|(base_value<<1)|1; - } + int tree_item_count = sizeof(ac_huffman_tree) / sizeof(ac_huffman_tree[0]); + for(int i = 0; i < tree_item_count; i++) { + int bits = ac_huffman_tree[i].c_bits+1; + uint32_t base_value = ac_huffman_tree[i].c_value; + + state->ac_huffman_map[ac_huffman_tree[i].u_hword_pos] = (bits << 24) | (base_value << 1) | 0; + state->ac_huffman_map[ac_huffman_tree[i].u_hword_neg] = (bits << 24) | (base_value << 1) | 1; + } } -static bool flush_bits(vid_encoder_state_t *state) -{ +static bool flush_bits(mdec_encoder_state_t *state) { if(state->bits_left < 16) { state->frame_output[state->bytes_used++] = (uint8_t)state->bits_value; - if (state->bytes_used >= state->frame_max_size) { + if (state->bytes_used >= state->frame_max_size) return false; - } + state->frame_output[state->bytes_used++] = (uint8_t)(state->bits_value>>8); } + state->bits_left = 16; state->bits_value = 0; return true; } -static bool encode_bits(vid_encoder_state_t *state, int bits, uint32_t val) -{ +static bool encode_bits(mdec_encoder_state_t *state, int bits, uint32_t val) { assert(val < (1< 16 // and I have no idea why, so I have to split this up --GM if (bits > 16) { - if (!encode_bits(state, bits-16, val>>16)) { + if (!encode_bits(state, bits-16, val>>16)) return false; - } + bits = 16; val &= 0xFFFF; } if (state->bits_left == 0) { - if (!flush_bits(state)) { + if (!flush_bits(state)) return false; - } } while (bits > state->bits_left) { @@ -260,9 +282,8 @@ static bool encode_bits(vid_encoder_state_t *state, int bits, uint32_t val) val &= mask; assert(mask >= 1); assert(val < (1<= 1) { @@ -281,31 +302,31 @@ static bool encode_bits(vid_encoder_state_t *state, int bits, uint32_t val) return true; } -static bool encode_ac_value(vid_encoder_state_t *state, uint16_t value) -{ +static bool encode_ac_value(mdec_encoder_state_t *state, uint16_t value) { assert(0 <= value && value <= 0xFFFF); #if 0 - for(int i = 0; i < sizeof(huffman_lookup)/sizeof(huffman_lookup[0]); i++) { - if(value == huffman_lookup[i].u_hword_pos) { - return encode_bits(state, huffman_lookup[i].c_bits+1, (((uint32_t)huffman_lookup[i].c_value)<<1)|0); - } - else if(value == huffman_lookup[i].u_hword_neg) { - return encode_bits(state, huffman_lookup[i].c_bits+1, (((uint32_t)huffman_lookup[i].c_value)<<1)|1); + int tree_item_count = sizeof(ac_huffman_tree) / sizeof(ac_huffman_tree[0]); + + for (int i = 0; i < tree_item_count; i++) { + if (value == ac_huffman_tree[i].u_hword_pos) { + return encode_bits(state, ac_huffman_tree[i].c_bits+1, ((uint32_t)ac_huffman_tree[i].c_value << 1) | 0); + } else if (value == ac_huffman_tree[i].u_hword_neg) { + return encode_bits(state, ac_huffman_tree[i].c_bits+1, ((uint32_t)ac_huffman_tree[i].c_value << 1) | 1); } } // Use an escape - return encode_bits(state, 6+16, (0x01<<16)|(0xFFFF&(uint32_t)value)); + return encode_bits(state, 6+16, (0x01 << 16) | (0xFFFF & (uint32_t)value)); #else - uint32_t outword = state->huffman_encoding_map[value]; - return encode_bits(state, outword>>24, outword&0xFFFFFF); + uint32_t outword = state->ac_huffman_map[value]; + + return encode_bits(state, outword >> 24, outword & 0xFFFFFF); #endif } -static void transform_dct_block(vid_encoder_state_t *state, int16_t *block) -{ #if 0 +static void transform_dct_block(int16_t *block) { // Apply DCT to block int midblock[8*8]; @@ -327,55 +348,9 @@ static void transform_dct_block(vid_encoder_state_t *state, int16_t *block) block[8*i+j] = (int16_t)((v + 0xFFF) >> 13); } } -#else - state->dct_context->fdct(block); -#endif } -// https://stackoverflow.com/a/60011209 -//#define DIVIDE_ROUNDED(n, d) (((n) >= 0) ? (((n) + (d)/2) / (d)) : (((n) - (d)/2) / (d))) -#define DIVIDE_ROUNDED(n, d) ((int)round((double)(n) / (double)(d))) - -static bool encode_dct_block(vid_encoder_state_t *state, const int16_t *block, const int16_t *quant_table) -{ - int dc = DIVIDE_ROUNDED(block[0], quant_table[0]); - dc = state->coeff_clamp_map[dc&0xFFFF]; - - if (!encode_bits(state, 10, dc)) { - return false; - } - - for (int i = 1, zeroes = 0; i < 64; i++) { - int ri = dct_zagzig_table[i]; - int ac = DIVIDE_ROUNDED(block[ri], quant_table[ri]); - ac = state->coeff_clamp_map[ac&0xFFFF]; - - if (ac == 0) { - zeroes++; - } else { - if (!encode_ac_value(state, (zeroes<<10)|ac)) { - return false; - } - zeroes = 0; - state->uncomp_hwords_used += 1; - } - } - - //fprintf(stderr, "dc %08X rles %2d\n", dc, zero_rle_words); - //assert(dc >= -0x200); assert(dc < +0x200); - - // Store end of block - if (!encode_bits(state, 2, 0x2)) { - return false; - } - state->uncomp_hwords_used += 2; - //state->uncomp_hwords_used = (state->uncomp_hwords_used+0xF)&~0xF; - return true; -} - -#if 0 -static int reduce_dct_block(vid_encoder_state_t *state, int32_t *block, int32_t min_val, int *values_to_shed) -{ +static int reduce_dct_block(mdec_encoder_state_t *state, int32_t *block, int32_t min_val, int *values_to_shed) { // Reduce so it can all fit int nonzeroes = 0; @@ -397,117 +372,178 @@ static int reduce_dct_block(vid_encoder_state_t *state, int32_t *block, int32_t } #endif -bool init_encoder_state(settings_t *settings) -{ - if (settings->state_vid.huffman_encoding_map) { - return true; - } +// https://stackoverflow.com/a/60011209 +#if 0 +#define DIVIDE_ROUNDED(n, d) (((n) >= 0) ? (((n) + (d)/2) / (d)) : (((n) - (d)/2) / (d))) +#else +#define DIVIDE_ROUNDED(n, d) ((int)round((double)(n) / (double)(d))) +#endif - settings->state_vid.huffman_encoding_map = malloc(0x10000*sizeof(uint32_t)); - settings->state_vid.coeff_clamp_map = malloc(0x10000*sizeof(int16_t)); - if (!settings->state_vid.huffman_encoding_map || !settings->state_vid.coeff_clamp_map) { +static bool encode_dct_block(mdec_encoder_state_t *state, const int16_t *block, const int16_t *quant_table) { + int dc = DIVIDE_ROUNDED(block[0], quant_table[0]); + dc = state->coeff_clamp_map[dc&0xFFFF]; + + if (!encode_bits(state, 10, dc)) return false; - } - init_dct_data(&(settings->state_vid)); - settings->state_vid.dct_context = avcodec_dct_alloc(); - if (!settings->state_vid.dct_context) { - return false; - } - avcodec_dct_init(settings->state_vid.dct_context); + for (int i = 1, zeroes = 0; i < 64; i++) { + int ri = dct_zagzig_table[i]; + int ac = DIVIDE_ROUNDED(block[ri], quant_table[ri]); + ac = state->coeff_clamp_map[ac&0xFFFF]; - int dct_block_count_x = (settings->video_width+15)/16; - int dct_block_count_y = (settings->video_height+15)/16; + if (ac == 0) { + zeroes++; + } else { + if (!encode_ac_value(state, (zeroes<<10)|ac)) + return false; - int dct_block_size = dct_block_count_x*dct_block_count_y*sizeof(int16_t)*8*8; - for (int i = 0; i < 6; i++) { - settings->state_vid.dct_block_lists[i] = malloc(dct_block_size); - if (!settings->state_vid.dct_block_lists[i]) { - return false; + zeroes = 0; + state->uncomp_hwords_used += 1; } } + //fprintf(stderr, "dc %08X rles %2d\n", dc, zero_rle_words); + //assert(dc >= -0x200); assert(dc < +0x200); + + // Store end of block + if (!encode_bits(state, 2, 0x2)) + return false; + + state->uncomp_hwords_used += 2; + //state->uncomp_hwords_used = (state->uncomp_hwords_used+0xF)&~0xF; return true; } -void destroy_encoder_state(settings_t *settings) -{ - if (settings->state_vid.huffman_encoding_map) { - free(settings->state_vid.huffman_encoding_map); - settings->state_vid.huffman_encoding_map = NULL; +bool init_mdec_encoder(mdec_encoder_t *encoder, int video_width, int video_height) { + mdec_encoder_state_t *state = &(encoder->state); + + if (state->dct_context != NULL) + return true; + + state->dct_context = avcodec_dct_alloc(); + state->ac_huffman_map = malloc(0x10000 * sizeof(uint32_t)); + state->dc_huffman_map = NULL; + state->coeff_clamp_map = malloc(0x10000 * sizeof(int16_t)); + state->delta_clamp_map = NULL; + + if ( + state->dct_context == NULL || + state->ac_huffman_map == NULL || + state->coeff_clamp_map == NULL + ) + return false; + + int dct_block_count_x = (video_width + 15) / 16; + int dct_block_count_y = (video_height + 15) / 16; + int dct_block_size = dct_block_count_x * dct_block_count_y * sizeof(int16_t) * 8*8; + + for (int i = 0; i < 6; i++) { + state->dct_block_lists[i] = malloc(dct_block_size); + + if (!state->dct_block_lists[i]) + return false; } - if (settings->state_vid.coeff_clamp_map) { - free(settings->state_vid.coeff_clamp_map); - settings->state_vid.coeff_clamp_map = NULL; + + avcodec_dct_init(state->dct_context); + init_dct_data(state); + return true; +} + +void destroy_mdec_encoder(mdec_encoder_t *encoder) { + mdec_encoder_state_t *state = &(encoder->state); + + if (state->dct_context) { + av_free(state->dct_context); + state->dct_context = NULL; } - if (settings->state_vid.dct_context) { - av_free(settings->state_vid.dct_context); - settings->state_vid.dct_context = NULL; + if (state->ac_huffman_map) { + free(state->ac_huffman_map); + state->ac_huffman_map = NULL; } - if (settings->state_vid.dct_block_lists[0]) { - for (int i = 0; i < 6; i++) { - free(settings->state_vid.dct_block_lists[i]); - settings->state_vid.dct_block_lists[i] = NULL; + if (state->dc_huffman_map) { + free(state->dc_huffman_map); + state->dc_huffman_map = NULL; + } + if (state->coeff_clamp_map) { + free(state->coeff_clamp_map); + state->coeff_clamp_map = NULL; + } + if (state->delta_clamp_map) { + free(state->delta_clamp_map); + state->delta_clamp_map = NULL; + } + for (int i = 0; i < 6; i++) { + if (state->dct_block_lists[i]) { + free(state->dct_block_lists[i]); + state->dct_block_lists[i] = NULL; } } } -void encode_frame_bs(uint8_t *video_frame, settings_t *settings) -{ - int pitch = settings->video_width; - /*int real_index = (settings->state_vid.frame_index-1); - if (real_index > video_frame_count-1) { - real_index = video_frame_count-1; - } - uint8_t *y_plane = video_frames + settings->video_width*settings->video_height*3/2*real_index;*/ +void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame) { + mdec_encoder_state_t *state = &(encoder->state); + + assert(state->dct_context); + + int pitch = encoder->video_width; +#if 0 + int real_index = state->frame_index - 1; + if (real_index > (video_frame_count - 1)) + real_index = video_frame_count - 1; + + uint8_t *y_plane = video_frames + encoder->video_width * encoder->video_height * 3/2 * real_index; +#else uint8_t *y_plane = video_frame; - uint8_t *c_plane = y_plane + (settings->video_width*settings->video_height); + uint8_t *c_plane = y_plane + (encoder->video_width * encoder->video_height); +#endif - assert(settings->state_vid.huffman_encoding_map); - - int dct_block_count_x = (settings->video_width+15)/16; - int dct_block_count_y = (settings->video_height+15)/16; + int dct_block_count_x = (encoder->video_width + 15) / 16; + int dct_block_count_y = (encoder->video_height + 15) / 16; // TODO: non-16x16-aligned videos - assert((settings->video_width % 16) == 0); - assert((settings->video_height % 16) == 0); + assert((encoder->video_width % 16) == 0); + assert((encoder->video_height % 16) == 0); // Rearrange the Y/C planes returned by libswscale into macroblocks. - for(int fx = 0; fx < dct_block_count_x; fx++) { - for(int fy = 0; fy < dct_block_count_y; fy++) { - // Order: Cr Cb [Y1|Y2\nY3|Y4] - int block_offs = 64 * (fy*dct_block_count_x + fx); - int16_t *blocks[6] = { - settings->state_vid.dct_block_lists[0] + block_offs, - settings->state_vid.dct_block_lists[1] + block_offs, - settings->state_vid.dct_block_lists[2] + block_offs, - settings->state_vid.dct_block_lists[3] + block_offs, - settings->state_vid.dct_block_lists[4] + block_offs, - settings->state_vid.dct_block_lists[5] + block_offs, - }; + for (int fx = 0; fx < dct_block_count_x; fx++) { + for (int fy = 0; fy < dct_block_count_y; fy++) { + // Order: Cr Cb [Y1|Y2] + // [Y3|Y4] + int block_offs = 64 * (fy*dct_block_count_x + fx); + int16_t *blocks[6] = { + state->dct_block_lists[0] + block_offs, + state->dct_block_lists[1] + block_offs, + state->dct_block_lists[2] + block_offs, + state->dct_block_lists[3] + block_offs, + state->dct_block_lists[4] + block_offs, + state->dct_block_lists[5] + block_offs + }; - for(int y = 0; y < 8; y++) { - for(int x = 0; x < 8; x++) { - int k = y*8 + x; - int cx = fx*8 + x; - int cy = fy*8 + y; - int lx = fx*16 + x; - int ly = fy*16 + y; + for (int y = 0; y < 8; y++) { + for (int x = 0; x < 8; x++) { + int k = y*8 + x; + int cx = fx*8 + x; + int cy = fy*8 + y; + int lx = fx*16 + x; + int ly = fy*16 + y; - blocks[0][k] = (int16_t)c_plane[pitch*cy + 2*cx + 0] - 128; - blocks[1][k] = (int16_t)c_plane[pitch*cy + 2*cx + 1] - 128; - blocks[2][k] = (int16_t)y_plane[pitch*(ly+0) + (lx+0)] - 128; - blocks[3][k] = (int16_t)y_plane[pitch*(ly+0) + (lx+8)] - 128; - blocks[4][k] = (int16_t)y_plane[pitch*(ly+8) + (lx+0)] - 128; - blocks[5][k] = (int16_t)y_plane[pitch*(ly+8) + (lx+8)] - 128; - } - } + blocks[0][k] = (int16_t)c_plane[pitch*cy + 2*cx + 0] - 128; + blocks[1][k] = (int16_t)c_plane[pitch*cy + 2*cx + 1] - 128; + blocks[2][k] = (int16_t)y_plane[pitch*(ly+0) + (lx+0)] - 128; + blocks[3][k] = (int16_t)y_plane[pitch*(ly+0) + (lx+8)] - 128; + blocks[4][k] = (int16_t)y_plane[pitch*(ly+8) + (lx+0)] - 128; + blocks[5][k] = (int16_t)y_plane[pitch*(ly+8) + (lx+8)] - 128; + } + } - for(int i = 0; i < 6; i++) { - transform_dct_block(&(settings->state_vid), blocks[i]); + for (int i = 0; i < 6; i++) +#if 0 + transform_dct_block(blocks[i]); +#else + state->dct_context->fdct(blocks[i]); +#endif } } - } // Attempt encoding the frame at the maximum quality. If the result is too // large, increase the quantization scale and try again. @@ -516,100 +552,107 @@ void encode_frame_bs(uint8_t *video_frame, settings_t *settings) // compressing at scale N but optimizing coefficients away until it fits // (like the old algorithm did) for ( - settings->state_vid.quant_scale = 1; - settings->state_vid.quant_scale < 64; - settings->state_vid.quant_scale++ + state->quant_scale = 1; + state->quant_scale < 64; + state->quant_scale++ ) { int16_t quant_table[8*8]; // The DC coefficient's quantization scale is always 8. quant_table[0] = quant_dec[0] * 8; - for (int i = 1; i < 64; i++) { - quant_table[i] = quant_dec[i] * settings->state_vid.quant_scale; - } - memset(settings->state_vid.frame_output, 0, settings->state_vid.frame_max_size); + for (int i = 1; i < 64; i++) + quant_table[i] = quant_dec[i] * state->quant_scale; - settings->state_vid.bits_value = 0; - settings->state_vid.bits_left = 16; - settings->state_vid.uncomp_hwords_used = 0; - settings->state_vid.bytes_used = 8; + memset(state->frame_output, 0, state->frame_max_size); + + state->bits_value = 0; + state->bits_left = 16; + state->uncomp_hwords_used = 0; + state->bytes_used = 8; bool ok = true; - for(int fx = 0; ok && (fx < dct_block_count_x); fx++) { - for(int fy = 0; ok && (fy < dct_block_count_y); fy++) { - // Order: Cr Cb [Y1|Y2\nY3|Y4] - int block_offs = 64 * (fy*dct_block_count_x + fx); - int16_t *blocks[6] = { - settings->state_vid.dct_block_lists[0] + block_offs, - settings->state_vid.dct_block_lists[1] + block_offs, - settings->state_vid.dct_block_lists[2] + block_offs, - settings->state_vid.dct_block_lists[3] + block_offs, - settings->state_vid.dct_block_lists[4] + block_offs, - settings->state_vid.dct_block_lists[5] + block_offs, - }; + for (int fx = 0; ok && (fx < dct_block_count_x); fx++) { + for (int fy = 0; ok && (fy < dct_block_count_y); fy++) { + // Order: Cr Cb [Y1|Y2] + // [Y3|Y4] + int block_offs = 64 * (fy*dct_block_count_x + fx); + int16_t *blocks[6] = { + state->dct_block_lists[0] + block_offs, + state->dct_block_lists[1] + block_offs, + state->dct_block_lists[2] + block_offs, + state->dct_block_lists[3] + block_offs, + state->dct_block_lists[4] + block_offs, + state->dct_block_lists[5] + block_offs + }; - for(int i = 0; ok && (i < 6); i++) { - ok = encode_dct_block(&(settings->state_vid), blocks[i], quant_table); + for(int i = 0; ok && (i < 6); i++) + ok = encode_dct_block(state, blocks[i], quant_table); } } - } - if (!ok) { continue; } - if (!encode_bits(&(settings->state_vid), 10, 0x1FF)) { continue; } - if (!encode_bits(&(settings->state_vid), 2, 0x2)) { continue; } - if (!flush_bits(&(settings->state_vid))) { continue; } + if (!ok) + continue; + if (!encode_bits(state, 10, 0x1FF)) + continue; + if (!encode_bits(state, 2, 0x2)) + continue; + if (!flush_bits(state)) + continue; - settings->state_vid.uncomp_hwords_used += 2; - settings->state_vid.quant_scale_sum += settings->state_vid.quant_scale; + state->uncomp_hwords_used += 2; + state->quant_scale_sum += state->quant_scale; break; } - assert(settings->state_vid.quant_scale < 64); + assert(state->quant_scale < 64); // MDEC DMA is usually configured to transfer data in 32-word chunks. - settings->state_vid.uncomp_hwords_used = (settings->state_vid.uncomp_hwords_used+0x3F)&~0x3F; + state->uncomp_hwords_used = (state->uncomp_hwords_used+0x3F)&~0x3F; // This is not the number of 32-byte blocks required for uncompressed data // as jPSXdec docs say, but rather the number of 32-*bit* words required. // The first 4 bytes of the frame header are in fact the MDEC command to // start decoding, which contains the data length in words in the lower 16 // bits. - settings->state_vid.blocks_used = (settings->state_vid.uncomp_hwords_used+1)>>1; + state->blocks_used = (state->uncomp_hwords_used+1)>>1; // We need a multiple of 4 - settings->state_vid.bytes_used = (settings->state_vid.bytes_used+0x3)&~0x3; + state->bytes_used = (state->bytes_used+0x3)&~0x3; // MDEC command (size of decompressed MDEC data) - settings->state_vid.frame_output[0x000] = (uint8_t)settings->state_vid.blocks_used; - settings->state_vid.frame_output[0x001] = (uint8_t)(settings->state_vid.blocks_used>>8); - settings->state_vid.frame_output[0x002] = (uint8_t)0x00; - settings->state_vid.frame_output[0x003] = (uint8_t)0x38; + state->frame_output[0x000] = (uint8_t)state->blocks_used; + state->frame_output[0x001] = (uint8_t)(state->blocks_used>>8); + state->frame_output[0x002] = (uint8_t)0x00; + state->frame_output[0x003] = (uint8_t)0x38; // Quantization scale - settings->state_vid.frame_output[0x004] = (uint8_t)settings->state_vid.quant_scale; - settings->state_vid.frame_output[0x005] = (uint8_t)(settings->state_vid.quant_scale>>8); + state->frame_output[0x004] = (uint8_t)state->quant_scale; + state->frame_output[0x005] = (uint8_t)(state->quant_scale>>8); // BS version - settings->state_vid.frame_output[0x006] = 0x02; - settings->state_vid.frame_output[0x007] = 0x00; - - retire_av_data(settings, 0, 1); + state->frame_output[0x006] = 0x02; + state->frame_output[0x007] = 0x00; } -void encode_sector_str(uint8_t *video_frames, uint8_t *output, settings_t *settings) -{ +int encode_sector_str(mdec_encoder_t *encoder, uint8_t *video_frames, uint8_t *output) { + mdec_encoder_state_t *state = &(encoder->state); + int last_frame_index = state->frame_index; + int frame_size = encoder->video_width * encoder->video_height * 2; + uint8_t header[32]; memset(header, 0, sizeof(header)); - while(settings->state_vid.frame_data_offset >= settings->state_vid.frame_max_size) { - settings->state_vid.frame_index++; + while (state->frame_data_offset >= state->frame_max_size) { + state->frame_index++; // TODO: work out an optimal block count for this // TODO: calculate this all based on FPS - settings->state_vid.frame_block_overflow_num += settings->state_vid.frame_block_base_overflow; - settings->state_vid.frame_max_size = settings->state_vid.frame_block_overflow_num / settings->state_vid.frame_block_overflow_den * 2016; - settings->state_vid.frame_block_overflow_num %= settings->state_vid.frame_block_overflow_den; - settings->state_vid.frame_data_offset = 0; - encode_frame_bs(video_frames, settings); + state->frame_block_overflow_num += state->frame_block_base_overflow; + state->frame_max_size = state->frame_block_overflow_num / state->frame_block_overflow_den * 2016; + state->frame_block_overflow_num %= state->frame_block_overflow_den; + state->frame_data_offset = 0; + + encode_frame_bs(encoder, video_frames); + video_frames += frame_size; } // STR version @@ -621,47 +664,48 @@ void encode_sector_str(uint8_t *video_frames, uint8_t *output, settings_t *setti header[0x003] = 0x80; // Muxed chunk index/count - int chunk_index = settings->state_vid.frame_data_offset/2016; - int chunk_count = settings->state_vid.frame_max_size/2016; + int chunk_index = state->frame_data_offset / 2016; + int chunk_count = state->frame_max_size / 2016; header[0x004] = (uint8_t)chunk_index; - header[0x005] = (uint8_t)(chunk_index>>8); + header[0x005] = (uint8_t)(chunk_index >> 8); header[0x006] = (uint8_t)chunk_count; - header[0x007] = (uint8_t)(chunk_count>>8); + header[0x007] = (uint8_t)(chunk_count >> 8); // Frame index - header[0x008] = (uint8_t)settings->state_vid.frame_index; - header[0x009] = (uint8_t)(settings->state_vid.frame_index>>8); - header[0x00A] = (uint8_t)(settings->state_vid.frame_index>>16); - header[0x00B] = (uint8_t)(settings->state_vid.frame_index>>24); + header[0x008] = (uint8_t)state->frame_index; + header[0x009] = (uint8_t)(state->frame_index >> 8); + header[0x00A] = (uint8_t)(state->frame_index >> 16); + header[0x00B] = (uint8_t)(state->frame_index >> 24); // Video frame size - header[0x010] = (uint8_t)settings->video_width; - header[0x011] = (uint8_t)(settings->video_width>>8); - header[0x012] = (uint8_t)settings->video_height; - header[0x013] = (uint8_t)(settings->video_height>>8); + header[0x010] = (uint8_t)encoder->video_width; + header[0x011] = (uint8_t)(encoder->video_width >> 8); + header[0x012] = (uint8_t)encoder->video_height; + header[0x013] = (uint8_t)(encoder->video_height >> 8); // MDEC command (size of decompressed MDEC data) - header[0x014] = (uint8_t)settings->state_vid.blocks_used; - header[0x015] = (uint8_t)(settings->state_vid.blocks_used>>8); + header[0x014] = (uint8_t)state->blocks_used; + header[0x015] = (uint8_t)(state->blocks_used >> 8); header[0x016] = 0x00; header[0x017] = 0x38; // Quantization scale - header[0x018] = (uint8_t)settings->state_vid.quant_scale; - header[0x019] = (uint8_t)(settings->state_vid.quant_scale>>8); + header[0x018] = (uint8_t)state->quant_scale; + header[0x019] = (uint8_t)(state->quant_scale >> 8); // BS version header[0x01A] = 0x02; header[0x01B] = 0x00; // Demuxed bytes used as a multiple of 4 - header[0x00C] = (uint8_t)settings->state_vid.bytes_used; - header[0x00D] = (uint8_t)(settings->state_vid.bytes_used>>8); - header[0x00E] = (uint8_t)(settings->state_vid.bytes_used>>16); - header[0x00F] = (uint8_t)(settings->state_vid.bytes_used>>24); + header[0x00C] = (uint8_t)state->bytes_used; + header[0x00D] = (uint8_t)(state->bytes_used >> 8); + header[0x00E] = (uint8_t)(state->bytes_used >> 16); + header[0x00F] = (uint8_t)(state->bytes_used >> 24); memcpy(output + 0x018, header, sizeof(header)); - memcpy(output + 0x018 + 0x020, settings->state_vid.frame_output + settings->state_vid.frame_data_offset, 2016); + memcpy(output + 0x018 + 0x020, state->frame_output + state->frame_data_offset, 2016); - settings->state_vid.frame_data_offset += 2016; + state->frame_data_offset += 2016; + return state->frame_index - last_frame_index; } diff --git a/psxavenc/mdec.h b/psxavenc/mdec.h new file mode 100644 index 0000000..6b22e20 --- /dev/null +++ b/psxavenc/mdec.h @@ -0,0 +1,67 @@ +/* +psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend + +Copyright (c) 2019, 2020 Adrian "asie" Siekierka +Copyright (c) 2019 Ben "GreaseMonkey" Russell +Copyright (c) 2023, 2025 spicyjpeg + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#pragma once + +#include +#include +#include + +typedef struct { + int frame_index; + int frame_data_offset; + int frame_max_size; + int frame_block_base_overflow; + int frame_block_overflow_num; + int frame_block_overflow_den; + int block_type; + int16_t last_dc_values[3]; + uint16_t bits_value; + int bits_left; + uint8_t *frame_output; + int bytes_used; + int blocks_used; + int uncomp_hwords_used; + int quant_scale; + int quant_scale_sum; + + AVDCT *dct_context; + uint32_t *ac_huffman_map; + uint32_t *dc_huffman_map; + int16_t *coeff_clamp_map; + int16_t *delta_clamp_map; + int16_t *dct_block_lists[6]; +} mdec_encoder_state_t; + +typedef struct { + int video_width; + int video_height; + + mdec_encoder_state_t state; +} mdec_encoder_t; + +bool init_mdec_encoder(mdec_encoder_t *encoder, int video_width, int video_height); +void destroy_mdec_encoder(mdec_encoder_t *encoder); +void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame); +int encode_sector_str(mdec_encoder_t *encoder, uint8_t *video_frames, uint8_t *output); diff --git a/psxavenc/psxavenc.c b/psxavenc/psxavenc.c deleted file mode 100644 index d980f1d..0000000 --- a/psxavenc/psxavenc.c +++ /dev/null @@ -1,495 +0,0 @@ -/* -psxavenc: MDEC video + SPU/XA-ADPCM audio encoder frontend - -Copyright (c) 2019, 2020 Adrian "asie" Siekierka -Copyright (c) 2019 Ben "GreaseMonkey" Russell -Copyright (c) 2023 spicyjpeg - -This software is provided 'as-is', without any express or implied -warranty. In no event will the authors be held liable for any damages -arising from the use of this software. - -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it -freely, subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "common.h" -#include "config.h" - -const char *format_names[NUM_FORMATS] = { - "xa", "xacd", - "spu", "spui", - "vag", "vagi", - "str2", "str2cd", "str2v", - "sbs2" -}; - -void print_help(void) { - fprintf(stderr, - "Usage:\n" - " psxavenc -t [-f 18900|37800] [-b 4|8] [-c 1|2] [-F 0-255] [-C 0-31] \n" - " psxavenc -t [-f 18900|37800] [-b 4|8] [-c 1|2] [-F 0-255] [-C 0-31] [-s WxH] [-I] [-r num/den] [-x 1|2] \n" - " psxavenc -t str2v [-s WxH] [-I] [-r num/den] [-x 1|2] \n" - " psxavenc -t sbs2 [-s WxH] [-I] [-r num/den] [-a size] \n" - " psxavenc -t [-f freq] [-L] [-a size] \n" - " psxavenc -t [-f freq] [-c 1-24] [-L] [-i size] [-a size] \n" - "\nTool options:\n" - " -h Show this help message and exit\n" - " -V Show version information and exit\n" - " -q Suppress all non-error messages\n" - "\n" - "Output options:\n" - " -t format Use specified output type\n" - " xa [A.] XA-ADPCM, 2336-byte sectors\n" - " xacd [A.] XA-ADPCM, 2352-byte sectors\n" - " spu [A.] raw SPU-ADPCM mono data\n" - " spui [A.] raw SPU-ADPCM interleaved data\n" - " vag [A.] .vag SPU-ADPCM mono\n" - " vagi [A.] .vag SPU-ADPCM interleaved\n" - " str2 [AV] v2 .str video, 2336-byte sectors\n" - " str2cd [AV] v2 .str video, 2352-byte sectors\n" - " str2v [.V] v2 .str video file\n" - " sbs2 [.V] v2 .sbs video, 2048-byte sectors\n" - " -F num xa/str2: Set the XA file number\n" - " 0-255, default 0\n" - " -C num xa/str2: Set the XA channel number\n" - " 0-31, default 0\n" - "\n" - "Audio options:\n" - " -f freq Use specified sample rate\n" - " xa/str2: 18900 or 37800, default 37800\n" - " spu/vag: any value, default 44100\n" - " spui/vagi: any value, default 44100\n" - " -b bitdepth Use specified bit depth\n" - " xa/str2: 4 or 8, default 4\n" - " spu/vag: must be 4\n" - " spui/vagi: must be 4\n" - " -c channels Use specified channel count\n" - " xa/str2: 1 or 2, default 2\n" - " spu/vag: must be 1\n" - " spui/vagi: any value, default 2\n" - " -R key=value,... Pass custom options to libswresample (see FFmpeg docs)\n" - "\n" - "SPU-ADPCM options (spu/spui/vag/vagi formats):\n" - " -L spu/vag: Add a loop marker at the end of sample data\n" - " spui/vagi: Add a loop marker at the end of each chunk\n" - " -i size spui/vagi: Use specified channel interleave\n" - " Any multiple of 16, default 2048\n" - " -a size spu/vag: Pad sample data to multiple of specified size\n" - " Any value >= 16, default 64\n" - " spui/vagi: Pad header and each chunk to multiple of specified size\n" - " Any value >= 16, default 2048\n" - "\n" - "Video options:\n" - " -s WxH Rescale input file to fit within specified size\n" - " 16x16-320x256 in 16-pixel increments, default 320x240\n" - " -I Force stretching to given size without preserving aspect ratio\n" - " -r num[/den] Set frame rate to specified integer or fraction\n" - " 1-30, default 15\n" - " -x speed str2: Set the CD-ROM speed the file is meant to played at\n" - " 1 or 2, default 2\n" - " -a size sbs2: Set the size of each frame\n" - " Any value >= 256, default 8192\n" - " -S key=value,... Pass custom options to libswscale (see FFmpeg docs)\n" - "\n" - ); -} - -void print_version(void) { - printf("psxavenc " VERSION "\n"); -} - -int parse_args_old(settings_t* settings, int argc, char** argv) { - int c, i; - char *next; - while ((c = getopt(argc, argv, "?hVqt:F:C:f:b:c:LR:i:a:s:IS:r:x:")) != -1) { - switch (c) { - case '?': - case 'h': { - print_help(); - return -1; - } break; - case 'V': { - print_version(); - return -1; - } break; - case 'q': { - settings->quiet = true; - settings->show_progress = false; - } break; - case 't': { - settings->format = -1; - for (i = 0; i < NUM_FORMATS; i++) { - if (!strcmp(optarg, format_names[i])) { - settings->format = i; - break; - } - } - if (settings->format < 0) { - fprintf(stderr, "Invalid format: %s\n", optarg); - return -1; - } - } break; - case 'F': { - settings->file_number = strtol(optarg, NULL, 0); - if (settings->file_number < 0 || settings->file_number > 255) { - fprintf(stderr, "Invalid file number: %d (must be in 0-255 range)\n", settings->file_number); - return -1; - } - } break; - case 'C': { - settings->channel_number = strtol(optarg, NULL, 0); - if (settings->channel_number < 0 || settings->channel_number > 31) { - fprintf(stderr, "Invalid channel number: %d (must be in 0-31 range)\n", settings->channel_number); - return -1; - } - } break; - case 'f': { - settings->frequency = strtol(optarg, NULL, 0); - if (settings->frequency < 1000) { - fprintf(stderr, "Invalid frequency: %d (must be at least 1000)\n", settings->frequency); - return -1; - } - } break; - case 'b': { - settings->bits_per_sample = strtol(optarg, NULL, 0); - if (settings->bits_per_sample != 4 && settings->bits_per_sample != 8) { - fprintf(stderr, "Invalid bit depth: %d (must be 4 or 8)\n", settings->bits_per_sample); - return -1; - } - } break; - case 'c': { - settings->channels = strtol(optarg, NULL, 0); - if (settings->channels < 1) { - fprintf(stderr, "Invalid channel count: %d (must be at least 1)\n", settings->channels); - return -1; - } - } break; - case 'L': { - settings->loop = true; - } break; - case 'R': { - settings->swresample_options = optarg; - } break; - case 'i': { - settings->interleave = (strtol(optarg, NULL, 0) + 15) & ~15; - if (settings->interleave < 16) { - fprintf(stderr, "Invalid interleave: %d (must be at least 16)\n", settings->interleave); - return -1; - } - } break; - case 'a': { - settings->alignment = strtol(optarg, NULL, 0); - if (settings->alignment < 16) { - fprintf(stderr, "Invalid alignment: %d (must be at least 16)\n", settings->alignment); - return -1; - } - } break; - case 's': { - settings->video_width = (strtol(optarg, &next, 0) + 15) & ~15; - if (*next != 'x') { - fprintf(stderr, "Invalid video size (must be specified as x)\n"); - return -1; - } - settings->video_height = (strtol(next + 1, NULL, 0) + 15) & ~15; - - if (settings->video_width < 16 || settings->video_width > 320) { - fprintf(stderr, "Invalid video width: %d (must be in 16-320 range)\n", settings->video_width); - return -1; - } - if (settings->video_height < 16 || settings->video_height > 256) { - fprintf(stderr, "Invalid video height: %d (must be in 16-256 range)\n", settings->video_height); - return -1; - } - } break; - case 'I': { - settings->ignore_aspect_ratio = true; - } break; - case 'S': { - settings->swscale_options = optarg; - } break; - case 'r': { - settings->video_fps_num = strtol(optarg, &next, 0); - if (*next == '/') { - settings->video_fps_den = strtol(next + 1, NULL, 0); - } else { - settings->video_fps_den = 1; - } - - if (!settings->video_fps_den) { - fprintf(stderr, "Invalid frame rate denominator\n"); - return -1; - } - i = settings->video_fps_num / settings->video_fps_den; - if (i < 1 || i > 60) { - fprintf(stderr, "Invalid frame rate: %d/%d (must be in 1-60 range)\n", settings->video_fps_num, settings->video_fps_den); - return -1; - } - } break; - case 'x': { - settings->cd_speed = strtol(optarg, NULL, 0); - if (settings->cd_speed < 1 || settings->cd_speed > 2) { - fprintf(stderr, "Invalid CD-ROM speed: %d (must be 1 or 2)\n", settings->cd_speed); - return -1; - } - } break; - } - } - - // Some settings' (frequency, channels, interleave and alignment) default - // values are initialized here as they depend on the chosen format. - switch (settings->format) { - case FORMAT_XA: - case FORMAT_XACD: - case FORMAT_STR2: - case FORMAT_STR2CD: - case FORMAT_STR2V: - if (!settings->frequency) { - settings->frequency = PSX_AUDIO_XA_FREQ_DOUBLE; - } else if (settings->frequency != PSX_AUDIO_XA_FREQ_SINGLE && settings->frequency != PSX_AUDIO_XA_FREQ_DOUBLE) { - fprintf( - stderr, "Invalid XA-ADPCM frequency: %d Hz (must be %d or %d Hz)\n", settings->frequency, - PSX_AUDIO_XA_FREQ_SINGLE, PSX_AUDIO_XA_FREQ_DOUBLE - ); - return -1; - } - if (!settings->channels) { - settings->channels = 2; - } else if (settings->channels > 2) { - fprintf(stderr, "Invalid XA-ADPCM channel count: %d (must be 1 or 2)\n", settings->channels); - return -1; - } - if (settings->interleave || settings->alignment) { - fprintf(stderr, "Interleave and frame size cannot be specified for this format\n"); - return -1; - } - if (settings->loop) { - fprintf(stderr, "XA-ADPCM does not support loop markers\n"); - return -1; - } - break; - case FORMAT_SPU: - case FORMAT_VAG: - if (!settings->frequency) { - settings->frequency = 44100; - } - if (settings->bits_per_sample != 4) { - fprintf(stderr, "Invalid SPU-ADPCM bit depth: %d (must be 4)\n", settings->bits_per_sample); - return -1; - } - if (!settings->channels) { - settings->channels = 1; - } else if (settings->channels > 1) { - fprintf(stderr, "Invalid SPU-ADPCM channel count: %d (must be 1)\n", settings->channels); - return -1; - } - if (settings->interleave) { - fprintf(stderr, "Interleave cannot be specified for this format\n"); - return -1; - } - if (!settings->alignment) { - settings->alignment = 64; - } - break; - case FORMAT_SPUI: - case FORMAT_VAGI: - if (!settings->frequency) { - settings->frequency = 44100; - } - if (settings->bits_per_sample != 4) { - fprintf(stderr, "Invalid SPU-ADPCM bit depth: %d (must be 4)\n", settings->bits_per_sample); - return -1; - } - if (!settings->channels) { - settings->channels = 2; - } - if (!settings->interleave) { - settings->interleave = 2048; - } - if (!settings->alignment) { - settings->alignment = 2048; - } - break; - case FORMAT_SBS2: - if (settings->interleave) { - fprintf(stderr, "Interleave cannot be specified for this format\n"); - return -1; - } - if (!settings->alignment) { - settings->alignment = 8192; - } else if (settings->alignment < 256) { - fprintf(stderr, "Invalid frame size: %d (must be at least 256)\n", settings->alignment); - return -1; - } - break; - default: - fprintf(stderr, "Output format must be specified\n"); - return -1; - } - - return optind; -} - -int main(int argc, char **argv) { - settings_t settings; - int arg_offset; - FILE* output; - - memset(&settings,0,sizeof(settings_t)); - - settings.quiet = false; - settings.show_progress = isatty(fileno(stderr)); - - settings.format = -1; - settings.file_number = 0; - settings.channel_number = 0; - settings.cd_speed = 2; - settings.channels = 0; - settings.frequency = 0; - settings.bits_per_sample = 4; - settings.interleave = 0; - settings.alignment = 0; - settings.loop = false; - - // NOTE: ffmpeg/ffplay's .str demuxer has the frame rate hardcoded to 15fps - // so if you're messing around with this make sure you test generated files - // with another player and/or in an emulator. - settings.video_width = 320; - settings.video_height = 240; - settings.video_fps_num = 15; - settings.video_fps_den = 1; - settings.ignore_aspect_ratio = false; - - settings.swresample_options = NULL; - settings.swscale_options = NULL; - - settings.audio_samples = NULL; - settings.audio_sample_count = 0; - settings.video_frames = NULL; - settings.video_frame_count = 0; - - settings.state_vid.huffman_encoding_map = NULL; - settings.state_vid.coeff_clamp_map = NULL; - settings.state_vid.dct_context = NULL; - for(int i = 0; i < 6; i++) { - settings.state_vid.dct_block_lists[i] = NULL; - } - - if (argc < 2) { - print_help(); - return 1; - } - - arg_offset = parse_args_old(&settings, argc, argv); - if (arg_offset < 0) { - return 1; - } else if (argc < arg_offset + 2) { - print_help(); - return 1; - } - - bool has_audio = - (settings.format != FORMAT_STR2V) && - (settings.format != FORMAT_SBS2); - bool has_video = - (settings.format == FORMAT_STR2) || - (settings.format == FORMAT_STR2CD) || - (settings.format == FORMAT_STR2V) || - (settings.format == FORMAT_SBS2); - - bool did_open_data = open_av_data(argv[arg_offset + 0], &settings, - has_audio, has_video, !has_video, has_video); - if (!did_open_data) { - fprintf(stderr, "Could not open input file!\n"); - return 1; - } - - output = fopen(argv[arg_offset + 1], "wb"); - if (output == NULL) { - fprintf(stderr, "Could not open output file!\n"); - return 1; - } - - settings.start_time = time(NULL); - settings.last_progress_update = 0; - - switch (settings.format) { - case FORMAT_XA: - case FORMAT_XACD: - if (!settings.quiet) { - fprintf(stderr, "Audio format: XA-ADPCM, %d Hz %d-bit %s, F=%d C=%d\n", - settings.frequency, settings.bits_per_sample, - (settings.channels == 2) ? "stereo" : "mono", - settings.file_number, settings.channel_number - ); - } - - encode_file_xa(&settings, output); - break; - case FORMAT_SPU: - case FORMAT_VAG: - if (!settings.quiet) { - fprintf(stderr, "Audio format: SPU-ADPCM, %d Hz mono\n", - settings.frequency - ); - } - - encode_file_spu(&settings, output); - break; - case FORMAT_SPUI: - case FORMAT_VAGI: - if (!settings.quiet) { - fprintf(stderr, "Audio format: SPU-ADPCM, %d Hz %d channels, interleave=%d\n", - settings.frequency, settings.channels, settings.interleave - ); - } - - encode_file_spu_interleaved(&settings, output); - break; - case FORMAT_STR2: - case FORMAT_STR2CD: - case FORMAT_STR2V: - if (!settings.quiet) { - if (settings.decoder_state_av.audio_stream) { - fprintf(stderr, "Audio format: XA-ADPCM, %d Hz %d-bit %s, F=%d C=%d\n", - settings.frequency, settings.bits_per_sample, - (settings.channels == 2) ? "stereo" : "mono", - settings.file_number, settings.channel_number - ); - } - fprintf(stderr, "Video format: BS v2, %dx%d, %.2f fps\n", - settings.video_width, settings.video_height, - (double)settings.video_fps_num / (double)settings.video_fps_den - ); - } - - encode_file_str(&settings, output); - break; - case FORMAT_SBS2: - if (!settings.quiet) { - fprintf(stderr, "Video format: BS v2, %dx%d, %.2f fps\n", - settings.video_width, settings.video_height, - (double)settings.video_fps_num / (double)settings.video_fps_den - ); - } - - encode_file_sbs(&settings, output); - break; - } - - if (settings.show_progress) { - fprintf(stderr, "\nDone.\n"); - } - fclose(output); - close_av_data(&settings); - return 0; -} From 4a0d0c55fd80dad115a81a6490571505cb7138ce Mon Sep 17 00:00:00 2001 From: spicyjpeg Date: Fri, 28 Feb 2025 11:42:23 +0100 Subject: [PATCH 4/8] Add BS v3 encoding support --- psxavenc/filefmt.c | 6 +- psxavenc/mdec.c | 274 ++++++++++++++++++++++++++++++++------------- psxavenc/mdec.h | 6 +- 3 files changed, 203 insertions(+), 83 deletions(-) diff --git a/psxavenc/filefmt.c b/psxavenc/filefmt.c index b00a29b..7e508f5 100644 --- a/psxavenc/filefmt.c +++ b/psxavenc/filefmt.c @@ -375,7 +375,7 @@ void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) { memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t)); mdec_encoder_t encoder; - init_mdec_encoder(&encoder, args->video_width, args->video_height); + init_mdec_encoder(&encoder, args->video_codec, args->video_width, args->video_height); // e.g. 15fps = (150*7/8/15) = 8.75 blocks per frame encoder.state.frame_block_base_overflow = (75 * args->str_cd_speed) * video_sectors_per_block * args->str_fps_den; @@ -403,7 +403,7 @@ void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) { // Video sector init_sector_buffer_video(args, (psx_cdrom_sector_mode2_t*) buffer, j); - int frames_used = encode_sector_str(&encoder, decoder->video_frames, buffer); + int frames_used = encode_sector_str(&encoder, args->format, decoder->video_frames, buffer); retire_av_data(decoder, 0, frames_used); } else { // Audio sector @@ -463,7 +463,7 @@ void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) { void encode_file_sbs(args_t *args, decoder_t *decoder, FILE *output) { mdec_encoder_t encoder; - init_mdec_encoder(&encoder, args->video_width, args->video_height); + init_mdec_encoder(&encoder, args->video_codec, args->video_width, args->video_height); encoder.state.frame_output = malloc(args->alignment); encoder.state.frame_data_offset = 0; diff --git a/psxavenc/mdec.c b/psxavenc/mdec.c index 095bb33..0d945c3 100644 --- a/psxavenc/mdec.c +++ b/psxavenc/mdec.c @@ -28,8 +28,16 @@ freely, subject to the following restrictions: #include #include #include +#include "args.h" #include "mdec.h" +// https://stackoverflow.com/a/60011209 +#if 0 +#define DIVIDE_ROUNDED(n, d) (((n) >= 0) ? (((n) + (d)/2) / (d)) : (((n) - (d)/2) / (d))) +#else +#define DIVIDE_ROUNDED(n, d) ((int)round((double)(n) / (double)(d))) +#endif + #define AC_PAIR(zeroes, value) \ (((zeroes) << 10) | ((+(value)) & 0x3FF)), \ (((zeroes) << 10) | ((-(value)) & 0x3FF)) @@ -154,6 +162,44 @@ static const struct { {16, 0x001F, AC_PAIR(27, 1)} }; +static const struct { + int c_bits; + uint32_t c_value; + int sign_bits; + int value_bits; +} dc_c_huffman_tree[] = { + {2, 0x0, 0, 0}, + {2, 0x1, 1, 0}, + {2, 0x2, 1, 1}, + {3, 0x6, 1, 2}, + {4, 0xE, 1, 3}, + {5, 0x1E, 1, 4}, + {6, 0x3E, 1, 5}, + {7, 0x7E, 1, 6}, + {8, 0xFE, 1, 7}, +}; + +static const struct { + int c_bits; + uint32_t c_value; + int sign_bits; + int value_bits; +} dc_y_huffman_tree[] = { + {3, 0x4, 0, 0}, + {2, 0x0, 1, 0}, + {2, 0x1, 1, 1}, + {3, 0x5, 1, 2}, + {3, 0x6, 1, 3}, + {4, 0xE, 1, 4}, + {5, 0x1E, 1, 5}, + {6, 0x3E, 1, 6}, + {7, 0x7E, 1, 7}, +}; + +static const uint8_t dc_coeff_indices[6] = { + 0, 1, 2, 2, 2, 2 +}; + static const uint8_t quant_dec[8*8] = { 2, 16, 19, 22, 26, 27, 29, 34, 16, 16, 22, 24, 27, 29, 34, 37, @@ -165,6 +211,7 @@ static const uint8_t quant_dec[8*8] = { 27, 29, 35, 38, 46, 56, 69, 83 }; +#if 0 static const uint8_t dct_zigzag_table[8*8] = { 0, 1, 5, 6, 14, 15, 27, 28, 2, 4, 7, 13, 16, 26, 29, 42, @@ -175,6 +222,7 @@ static const uint8_t dct_zigzag_table[8*8] = { 21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63 }; +#endif static const uint8_t dct_zagzig_table[8*8] = { 0, 1, 8, 16, 9, 2, 3, 10, @@ -209,31 +257,84 @@ static const int16_t dct_scale_table[8*8] = { }; #endif -static void init_dct_data(mdec_encoder_state_t *state) { +static void init_dct_data(mdec_encoder_state_t *state, bs_codec_t codec) { for(int i = 0; i <= 0xFFFF; i++) { // high 8 bits = bit count // low 24 bits = value - state->ac_huffman_map[i] = ((6+16) << 24) | (0x01 << 16) | i; + state->ac_huffman_map[i] = ((6+16)<<24)|((0x01<<16)|(i)); int16_t coeff = (int16_t)i; - if (coeff < -0x200) coeff = -0x200; - else if (coeff > +0x1FF) - coeff = +0x1FF; + else if (coeff > +0x1FE) + coeff = +0x1FE; // 0x1FF = v2 end of frame - state->coeff_clamp_map[i] = coeff & 0x3FF; + state->coeff_clamp_map[i] = coeff; + + int16_t delta = (int16_t)DIVIDE_ROUNDED(i, 4); + if (delta < -0xFF) + delta = -0xFF; + else if (delta > +0xFF) + delta = +0xFF; + + // Some versions of Sony's BS v3 decoder compute each DC coefficient as + // ((last + delta * 4) & 0x3FF) instead of just (last + delta * 4). The + // encoder can leverage this behavior to represent large coefficient + // differences as smaller deltas that cause the decoder to overflow and + // wrap around (e.g. -1 to encode -512 -> 511 as opposed to +1023). This + // saves some space as larger DC values take up more bits. + if (codec == BS_CODEC_V3DC) { + if (delta > +0x80) + delta -= 0x100; + } + + state->delta_clamp_map[i] = delta; } - int tree_item_count = sizeof(ac_huffman_tree) / sizeof(ac_huffman_tree[0]); + int ac_tree_item_count = sizeof(ac_huffman_tree) / sizeof(ac_huffman_tree[0]); + int dc_c_tree_item_count = sizeof(dc_c_huffman_tree) / sizeof(dc_c_huffman_tree[0]); + int dc_y_tree_item_count = sizeof(dc_y_huffman_tree) / sizeof(dc_y_huffman_tree[0]); - for(int i = 0; i < tree_item_count; i++) { + for (int i = 0; i < ac_tree_item_count; i++) { int bits = ac_huffman_tree[i].c_bits+1; uint32_t base_value = ac_huffman_tree[i].c_value; state->ac_huffman_map[ac_huffman_tree[i].u_hword_pos] = (bits << 24) | (base_value << 1) | 0; state->ac_huffman_map[ac_huffman_tree[i].u_hword_neg] = (bits << 24) | (base_value << 1) | 1; } + for (int i = 0; i < dc_c_tree_item_count; i++) { + int dc_bits = dc_c_huffman_tree[i].sign_bits + dc_c_huffman_tree[i].value_bits; + int bits = dc_c_huffman_tree[i].c_bits + dc_bits; + uint32_t base_value = dc_c_huffman_tree[i].c_value << dc_bits; + + for (int j = 0; j < (1 << dc_bits); j++) { + int delta = j; + + if ((j >> dc_c_huffman_tree[i].value_bits) == 0) { + delta -= (1 << dc_bits) - 1; + delta &= 0x1FF; + } + + state->dc_huffman_map[(0 << 9) | delta] = (bits << 24) | base_value | j; + state->dc_huffman_map[(1 << 9) | delta] = (bits << 24) | base_value | j; + } + } + for (int i = 0; i < dc_y_tree_item_count; i++) { + int dc_bits = dc_y_huffman_tree[i].sign_bits + dc_y_huffman_tree[i].value_bits; + int bits = dc_y_huffman_tree[i].c_bits + dc_bits; + uint32_t base_value = dc_y_huffman_tree[i].c_value << dc_bits; + + for (int j = 0; j < (1 << dc_bits); j++) { + int delta = j; + + if ((j >> dc_y_huffman_tree[i].value_bits) == 0) { + delta -= (1 << dc_bits) - 1; + delta &= 0x1FF; + } + + state->dc_huffman_map[(2 << 9) | delta] = (bits << 24) | base_value | j; + } + } } static bool flush_bits(mdec_encoder_state_t *state) { @@ -302,29 +403,6 @@ static bool encode_bits(mdec_encoder_state_t *state, int bits, uint32_t val) { return true; } -static bool encode_ac_value(mdec_encoder_state_t *state, uint16_t value) { - assert(0 <= value && value <= 0xFFFF); - -#if 0 - int tree_item_count = sizeof(ac_huffman_tree) / sizeof(ac_huffman_tree[0]); - - for (int i = 0; i < tree_item_count; i++) { - if (value == ac_huffman_tree[i].u_hword_pos) { - return encode_bits(state, ac_huffman_tree[i].c_bits+1, ((uint32_t)ac_huffman_tree[i].c_value << 1) | 0); - } else if (value == ac_huffman_tree[i].u_hword_neg) { - return encode_bits(state, ac_huffman_tree[i].c_bits+1, ((uint32_t)ac_huffman_tree[i].c_value << 1) | 1); - } - } - - // Use an escape - return encode_bits(state, 6+16, (0x01 << 16) | (0xFFFF & (uint32_t)value)); -#else - uint32_t outword = state->ac_huffman_map[value]; - - return encode_bits(state, outword >> 24, outword & 0xFFFFFF); -#endif -} - #if 0 static void transform_dct_block(int16_t *block) { // Apply DCT to block @@ -372,49 +450,67 @@ static int reduce_dct_block(mdec_encoder_state_t *state, int32_t *block, int32_t } #endif -// https://stackoverflow.com/a/60011209 -#if 0 -#define DIVIDE_ROUNDED(n, d) (((n) >= 0) ? (((n) + (d)/2) / (d)) : (((n) - (d)/2) / (d))) -#else -#define DIVIDE_ROUNDED(n, d) ((int)round((double)(n) / (double)(d))) -#endif - -static bool encode_dct_block(mdec_encoder_state_t *state, const int16_t *block, const int16_t *quant_table) { +static bool encode_dct_block( + mdec_encoder_state_t *state, + bs_codec_t codec, + const int16_t *block, + const int16_t *quant_table +) { int dc = DIVIDE_ROUNDED(block[0], quant_table[0]); - dc = state->coeff_clamp_map[dc&0xFFFF]; - if (!encode_bits(state, 10, dc)) - return false; + dc = state->coeff_clamp_map[dc & 0xFFFF]; + + if (codec == BS_CODEC_V2) { + if (!encode_bits(state, 10, dc & 0x3FF)) + return false; + } else { + int index = dc_coeff_indices[state->block_type]; + int last = state->last_dc_values[index]; + + int delta = state->delta_clamp_map[(dc - last) & 0xFFFF]; + state->last_dc_values[index] = (last + delta * 4) & 0x3FF; + + uint32_t outword = state->dc_huffman_map[(index << 9) | (delta & 0x1FF)]; + + if (!encode_bits(state, outword >> 24, outword & 0xFFFFFF)) + return false; + } for (int i = 1, zeroes = 0; i < 64; i++) { int ri = dct_zagzig_table[i]; int ac = DIVIDE_ROUNDED(block[ri], quant_table[ri]); - ac = state->coeff_clamp_map[ac&0xFFFF]; + + ac = state->coeff_clamp_map[ac & 0xFFFF]; if (ac == 0) { zeroes++; } else { - if (!encode_ac_value(state, (zeroes<<10)|ac)) + uint32_t outword = state->ac_huffman_map[(zeroes << 10) | ac]; + + if (!encode_bits(state, outword >> 24, outword & 0xFFFFFF)) return false; zeroes = 0; - state->uncomp_hwords_used += 1; + state->uncomp_hwords_used++; } } - //fprintf(stderr, "dc %08X rles %2d\n", dc, zero_rle_words); - //assert(dc >= -0x200); assert(dc < +0x200); - // Store end of block if (!encode_bits(state, 2, 0x2)) return false; + state->block_type++; + state->block_type %= 6; state->uncomp_hwords_used += 2; //state->uncomp_hwords_used = (state->uncomp_hwords_used+0xF)&~0xF; return true; } -bool init_mdec_encoder(mdec_encoder_t *encoder, int video_width, int video_height) { +bool init_mdec_encoder(mdec_encoder_t *encoder, bs_codec_t video_codec, int video_width, int video_height) { + encoder->video_codec = video_codec; + encoder->video_width = video_width; + encoder->video_height = video_height; + mdec_encoder_state_t *state = &(encoder->state); if (state->dct_context != NULL) @@ -422,9 +518,9 @@ bool init_mdec_encoder(mdec_encoder_t *encoder, int video_width, int video_heigh state->dct_context = avcodec_dct_alloc(); state->ac_huffman_map = malloc(0x10000 * sizeof(uint32_t)); - state->dc_huffman_map = NULL; + state->dc_huffman_map = malloc(0x600 * sizeof(uint32_t)); state->coeff_clamp_map = malloc(0x10000 * sizeof(int16_t)); - state->delta_clamp_map = NULL; + state->delta_clamp_map = malloc(0x10000 * sizeof(int16_t)); if ( state->dct_context == NULL || @@ -445,7 +541,7 @@ bool init_mdec_encoder(mdec_encoder_t *encoder, int video_width, int video_heigh } avcodec_dct_init(state->dct_context); - init_dct_data(state); + init_dct_data(state, video_codec); return true; } @@ -545,6 +641,19 @@ void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame) { } } + uint32_t end_of_block; + + if (encoder->video_codec == BS_CODEC_V2) { + end_of_block = 0x1FF; + } else { + end_of_block = 0x3FF; + assert(state->dc_huffman_map); + assert(state->delta_clamp_map); + } + + assert(state->ac_huffman_map); + assert(state->coeff_clamp_map); + // Attempt encoding the frame at the maximum quality. If the result is too // large, increase the quantization scale and try again. // TODO: if a frame encoded at scale N is too large but the same frame @@ -566,6 +675,11 @@ void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame) { memset(state->frame_output, 0, state->frame_max_size); + state->block_type = 0; + state->last_dc_values[0] = 0; + state->last_dc_values[1] = 0; + state->last_dc_values[2] = 0; + state->bits_value = 0; state->bits_left = 16; state->uncomp_hwords_used = 0; @@ -587,16 +701,18 @@ void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame) { }; for(int i = 0; ok && (i < 6); i++) - ok = encode_dct_block(state, blocks[i], quant_table); + ok = encode_dct_block(state, encoder->video_codec, blocks[i], quant_table); } } if (!ok) continue; - if (!encode_bits(state, 10, 0x1FF)) + if (!encode_bits(state, 10, end_of_block)) continue; +#if 0 if (!encode_bits(state, 2, 0x2)) continue; +#endif if (!flush_bits(state)) continue; @@ -630,11 +746,15 @@ void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame) { state->frame_output[0x005] = (uint8_t)(state->quant_scale>>8); // BS version - state->frame_output[0x006] = 0x02; + if (encoder->video_codec == BS_CODEC_V2) + state->frame_output[0x006] = 0x02; + else + state->frame_output[0x006] = 0x03; + state->frame_output[0x007] = 0x00; } -int encode_sector_str(mdec_encoder_t *encoder, uint8_t *video_frames, uint8_t *output) { +int encode_sector_str(mdec_encoder_t *encoder, format_t format, uint8_t *video_frames, uint8_t *output) { mdec_encoder_state_t *state = &(encoder->state); int last_frame_index = state->frame_index; int frame_size = encoder->video_width * encoder->video_height * 2; @@ -677,34 +797,32 @@ int encode_sector_str(mdec_encoder_t *encoder, uint8_t *video_frames, uint8_t *o header[0x00A] = (uint8_t)(state->frame_index >> 16); header[0x00B] = (uint8_t)(state->frame_index >> 24); - // Video frame size - header[0x010] = (uint8_t)encoder->video_width; - header[0x011] = (uint8_t)(encoder->video_width >> 8); - header[0x012] = (uint8_t)encoder->video_height; - header[0x013] = (uint8_t)(encoder->video_height >> 8); - - // MDEC command (size of decompressed MDEC data) - header[0x014] = (uint8_t)state->blocks_used; - header[0x015] = (uint8_t)(state->blocks_used >> 8); - header[0x016] = 0x00; - header[0x017] = 0x38; - - // Quantization scale - header[0x018] = (uint8_t)state->quant_scale; - header[0x019] = (uint8_t)(state->quant_scale >> 8); - - // BS version - header[0x01A] = 0x02; - header[0x01B] = 0x00; - // Demuxed bytes used as a multiple of 4 header[0x00C] = (uint8_t)state->bytes_used; header[0x00D] = (uint8_t)(state->bytes_used >> 8); header[0x00E] = (uint8_t)(state->bytes_used >> 16); header[0x00F] = (uint8_t)(state->bytes_used >> 24); - memcpy(output + 0x018, header, sizeof(header)); - memcpy(output + 0x018 + 0x020, state->frame_output + state->frame_data_offset, 2016); + // Video frame size + header[0x010] = (uint8_t)encoder->video_width; + header[0x011] = (uint8_t)(encoder->video_width >> 8); + header[0x012] = (uint8_t)encoder->video_height; + header[0x013] = (uint8_t)(encoder->video_height >> 8); + + // Copy of BS header + memcpy(header + 0x014, state->frame_output, 8); + + int offset; + + if (format == FORMAT_STR) + offset = 0x008; + else if (format == FORMAT_STRCD) + offset = 0x018; + else + offset = 0x000; + + memcpy(output + offset, header, sizeof(header)); + memcpy(output + offset + 0x020, state->frame_output + state->frame_data_offset, 2016); state->frame_data_offset += 2016; return state->frame_index - last_frame_index; diff --git a/psxavenc/mdec.h b/psxavenc/mdec.h index 6b22e20..3d1e4dc 100644 --- a/psxavenc/mdec.h +++ b/psxavenc/mdec.h @@ -27,6 +27,7 @@ freely, subject to the following restrictions: #include #include #include +#include "args.h" typedef struct { int frame_index; @@ -55,13 +56,14 @@ typedef struct { } mdec_encoder_state_t; typedef struct { + bs_codec_t video_codec; int video_width; int video_height; mdec_encoder_state_t state; } mdec_encoder_t; -bool init_mdec_encoder(mdec_encoder_t *encoder, int video_width, int video_height); +bool init_mdec_encoder(mdec_encoder_t *encoder, bs_codec_t video_codec, int video_width, int video_height); void destroy_mdec_encoder(mdec_encoder_t *encoder); void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame); -int encode_sector_str(mdec_encoder_t *encoder, uint8_t *video_frames, uint8_t *output); +int encode_sector_str(mdec_encoder_t *encoder, format_t format, uint8_t *video_frames, uint8_t *output); From 7d537edffb3ab427ac592ccd56f513cdda78dc14 Mon Sep 17 00:00:00 2001 From: spicyjpeg Date: Sun, 2 Mar 2025 12:12:51 +0100 Subject: [PATCH 5/8] Clean up, implement new SPU-ADPCM looping options --- libpsxav/adpcm.c | 52 +++++++---------- libpsxav/libpsxav.h | 60 ++++++++++--------- psxavenc/args.h | 18 +++--- psxavenc/decoding.h | 4 +- psxavenc/filefmt.c | 138 +++++++++++++++++++++++++++++--------------- psxavenc/filefmt.h | 1 + psxavenc/main.c | 25 +++++++- psxavenc/mdec.c | 25 ++++---- 8 files changed, 198 insertions(+), 125 deletions(-) diff --git a/libpsxav/adpcm.c b/libpsxav/adpcm.c index 03d298f..ecc7264 100644 --- a/libpsxav/adpcm.c +++ b/libpsxav/adpcm.c @@ -29,8 +29,8 @@ freely, subject to the following restrictions: #define SHIFT_RANGE_4BPS 12 #define SHIFT_RANGE_8BPS 8 -#define ADPCM_FILTER_COUNT 5 -#define XA_ADPCM_FILTER_COUNT 4 +#define ADPCM_FILTER_COUNT 5 +#define XA_ADPCM_FILTER_COUNT 4 #define SPU_ADPCM_FILTER_COUNT 5 static const int16_t filter_k1[ADPCM_FILTER_COUNT] = {0, 60, 115, 98, 122}; @@ -54,7 +54,7 @@ static int find_min_shift(const psx_audio_encoder_channel_state_t *state, int16_ int32_t s_min = 0; int32_t s_max = 0; - for (int i = 0; i < 28; i++) { + for (int i = 0; i < PSX_AUDIO_SPU_SAMPLES_PER_BLOCK; i++) { int32_t raw_sample = (i >= sample_limit) ? 0 : samples[i * pitch]; int32_t previous_values = (k1*prev1 + k2*prev2 + (1<<5))>>6; int32_t sample = raw_sample - previous_values; @@ -87,7 +87,7 @@ static uint8_t attempt_to_encode(psx_audio_encoder_channel_state_t *outstate, co outstate->mse = 0; - for (int i = 0; i < 28; i++) { + for (int i = 0; i < PSX_AUDIO_SPU_SAMPLES_PER_BLOCK; i++) { int32_t sample = ((i >= sample_limit) ? 0 : samples[i * pitch]) + outstate->qerr; int32_t previous_values = (k1*outstate->prev1 + k2*outstate->prev2 + (1<<5))>>6; int32_t sample_enc = sample - previous_values; @@ -205,25 +205,17 @@ uint32_t psx_audio_xa_get_buffer_size(psx_audio_xa_settings_t settings, int samp } uint32_t psx_audio_spu_get_buffer_size(int sample_count) { - return ((sample_count + 27) / 28) << 4; + return ((sample_count + PSX_AUDIO_SPU_SAMPLES_PER_BLOCK - 1) / PSX_AUDIO_SPU_SAMPLES_PER_BLOCK) << 4; } uint32_t psx_audio_xa_get_buffer_size_per_sector(psx_audio_xa_settings_t settings) { return settings.format == PSX_AUDIO_XA_FORMAT_XA ? 2336 : 2352; } -uint32_t psx_audio_spu_get_buffer_size_per_block(void) { - return 16; -} - uint32_t psx_audio_xa_get_samples_per_sector(psx_audio_xa_settings_t settings) { return (((settings.bits_per_sample == 8) ? 112 : 224) >> (settings.stereo ? 1 : 0)) * 18; } -uint32_t psx_audio_spu_get_samples_per_block(void) { - return 28; -} - uint32_t psx_audio_xa_get_sector_interleave(psx_audio_xa_settings_t settings) { // 1/2 interleave for 37800 Hz 8-bit stereo at 1x speed int interleave = settings.stereo ? 2 : 4; @@ -307,14 +299,14 @@ int psx_audio_xa_encode_simple(psx_audio_xa_settings_t settings, int16_t* sample } int psx_audio_spu_encode(psx_audio_encoder_channel_state_t *state, int16_t* samples, int sample_count, int pitch, uint8_t *output) { - uint8_t prebuf[28]; + uint8_t prebuf[PSX_AUDIO_SPU_SAMPLES_PER_BLOCK]; uint8_t *buffer = output; - for (int i = 0; i < sample_count; i += 28, buffer += 16) { + for (int i = 0; i < sample_count; i += PSX_AUDIO_SPU_SAMPLES_PER_BLOCK, buffer += PSX_AUDIO_SPU_BLOCK_SIZE) { buffer[0] = encode(state, samples + i * pitch, sample_count - i, pitch, prebuf, 0, 1, SPU_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS); buffer[1] = 0; - for (int j = 0; j < 28; j+=2) { + for (int j = 0; j < PSX_AUDIO_SPU_SAMPLES_PER_BLOCK; j+=2) { buffer[2 + (j>>1)] = (prebuf[j] & 0x0F) | (prebuf[j+1] << 4); } } @@ -327,24 +319,24 @@ int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *out memset(&state, 0, sizeof(psx_audio_encoder_channel_state_t)); int length = psx_audio_spu_encode(&state, samples, sample_count, 1, output); - if (length >= 32) { + if (length >= PSX_AUDIO_SPU_BLOCK_SIZE) { + uint8_t *last_block = output + length - PSX_AUDIO_SPU_BLOCK_SIZE; + if (loop_start < 0) { - //output[1] = PSX_AUDIO_SPU_LOOP_START; - output[length - 16 + 1] = PSX_AUDIO_SPU_LOOP_END; + last_block[1] |= PSX_AUDIO_SPU_LOOP_END; + + // Insert trailing looping block + memset(output + length, 0, PSX_AUDIO_SPU_BLOCK_SIZE); + output[length + 1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END; + + length += PSX_AUDIO_SPU_BLOCK_SIZE; } else { - psx_audio_spu_set_flag_at_sample(output, loop_start, PSX_AUDIO_SPU_LOOP_START); - output[length - 16 + 1] = PSX_AUDIO_SPU_LOOP_REPEAT; + int loop_start_offset = loop_start / PSX_AUDIO_SPU_SAMPLES_PER_BLOCK * PSX_AUDIO_SPU_BLOCK_SIZE; + + last_block[1] |= PSX_AUDIO_SPU_LOOP_REPEAT; + output[loop_start_offset + 1] |= PSX_AUDIO_SPU_LOOP_START; } - } else if (length >= 16) { - output[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END; - if (loop_start >= 0) - output[1] |= PSX_AUDIO_SPU_LOOP_REPEAT; } return length; } - -void psx_audio_spu_set_flag_at_sample(uint8_t* spu_data, int sample_pos, int flag) { - int buffer_pos = (sample_pos / 28) << 4; - spu_data[buffer_pos + 1] = flag; -} diff --git a/libpsxav/libpsxav.h b/libpsxav/libpsxav.h index 32eabaf..1b8aaa1 100644 --- a/libpsxav/libpsxav.h +++ b/libpsxav/libpsxav.h @@ -28,8 +28,13 @@ freely, subject to the following restrictions: // audio.c -#define PSX_AUDIO_XA_FREQ_SINGLE 18900 -#define PSX_AUDIO_XA_FREQ_DOUBLE 37800 +#define PSX_AUDIO_SPU_BLOCK_SIZE 16 +#define PSX_AUDIO_SPU_SAMPLES_PER_BLOCK 28 + +enum { + PSX_AUDIO_XA_FREQ_SINGLE = 18900, + PSX_AUDIO_XA_FREQ_DOUBLE = 37800 +}; typedef enum { PSX_AUDIO_XA_FORMAT_XA, // .xa file @@ -56,23 +61,22 @@ typedef struct { psx_audio_encoder_channel_state_t right; } psx_audio_encoder_state_t; -#define PSX_AUDIO_SPU_LOOP_END 1 -#define PSX_AUDIO_SPU_LOOP_REPEAT 3 -#define PSX_AUDIO_SPU_LOOP_START 4 +enum { + PSX_AUDIO_SPU_LOOP_END = 1 << 0, + PSX_AUDIO_SPU_LOOP_REPEAT = 3 << 0, + PSX_AUDIO_SPU_LOOP_START = 1 << 2 +}; uint32_t psx_audio_xa_get_buffer_size(psx_audio_xa_settings_t settings, int sample_count); uint32_t psx_audio_spu_get_buffer_size(int sample_count); uint32_t psx_audio_xa_get_buffer_size_per_sector(psx_audio_xa_settings_t settings); -uint32_t psx_audio_spu_get_buffer_size_per_block(void); uint32_t psx_audio_xa_get_samples_per_sector(psx_audio_xa_settings_t settings); -uint32_t psx_audio_spu_get_samples_per_block(void); uint32_t psx_audio_xa_get_sector_interleave(psx_audio_xa_settings_t settings); int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_state_t *state, int16_t* samples, int sample_count, uint8_t *output); int psx_audio_xa_encode_simple(psx_audio_xa_settings_t settings, int16_t* samples, int sample_count, uint8_t *output); int psx_audio_spu_encode(psx_audio_encoder_channel_state_t *state, int16_t* samples, int sample_count, int pitch, uint8_t *output); int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *output, int loop_start); void psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length); -void psx_audio_spu_set_flag_at_sample(uint8_t* spu_data, int sample_pos, int flag); // cdrom.c @@ -115,25 +119,29 @@ _Static_assert(sizeof(psx_cdrom_sector_mode2_t) == PSX_CDROM_SECTOR_SIZE, "Inval #define PSX_CDROM_SECTOR_XA_CHANNEL_MASK 0x1F -#define PSX_CDROM_SECTOR_XA_SUBMODE_EOR 0x01 -#define PSX_CDROM_SECTOR_XA_SUBMODE_VIDEO 0x02 -#define PSX_CDROM_SECTOR_XA_SUBMODE_AUDIO 0x04 -#define PSX_CDROM_SECTOR_XA_SUBMODE_DATA 0x08 -#define PSX_CDROM_SECTOR_XA_SUBMODE_TRIGGER 0x10 -#define PSX_CDROM_SECTOR_XA_SUBMODE_FORM2 0x20 -#define PSX_CDROM_SECTOR_XA_SUBMODE_RT 0x40 -#define PSX_CDROM_SECTOR_XA_SUBMODE_EOF 0x80 +enum { + PSX_CDROM_SECTOR_XA_SUBMODE_EOR = 1 << 0, + PSX_CDROM_SECTOR_XA_SUBMODE_VIDEO = 1 << 1, + PSX_CDROM_SECTOR_XA_SUBMODE_AUDIO = 1 << 2, + PSX_CDROM_SECTOR_XA_SUBMODE_DATA = 1 << 3, + PSX_CDROM_SECTOR_XA_SUBMODE_TRIGGER = 1 << 4, + PSX_CDROM_SECTOR_XA_SUBMODE_FORM2 = 1 << 5, + PSX_CDROM_SECTOR_XA_SUBMODE_RT = 1 << 6, + PSX_CDROM_SECTOR_XA_SUBMODE_EOF = 1 << 7 +}; -#define PSX_CDROM_SECTOR_XA_CODING_MONO 0x00 -#define PSX_CDROM_SECTOR_XA_CODING_STEREO 0x01 -#define PSX_CDROM_SECTOR_XA_CODING_CHANNEL_MASK 0x03 -#define PSX_CDROM_SECTOR_XA_CODING_FREQ_DOUBLE 0x00 -#define PSX_CDROM_SECTOR_XA_CODING_FREQ_SINGLE 0x04 -#define PSX_CDROM_SECTOR_XA_CODING_FREQ_MASK 0x0C -#define PSX_CDROM_SECTOR_XA_CODING_BITS_4 0x00 -#define PSX_CDROM_SECTOR_XA_CODING_BITS_8 0x10 -#define PSX_CDROM_SECTOR_XA_CODING_BITS_MASK 0x30 -#define PSX_CDROM_SECTOR_XA_CODING_EMPHASIS 0x40 +enum { + PSX_CDROM_SECTOR_XA_CODING_MONO = 0 << 0, + PSX_CDROM_SECTOR_XA_CODING_STEREO = 1 << 0, + PSX_CDROM_SECTOR_XA_CODING_CHANNEL_MASK = 3 << 0, + PSX_CDROM_SECTOR_XA_CODING_FREQ_DOUBLE = 0 << 2, + PSX_CDROM_SECTOR_XA_CODING_FREQ_SINGLE = 1 << 2, + PSX_CDROM_SECTOR_XA_CODING_FREQ_MASK = 3 << 2, + PSX_CDROM_SECTOR_XA_CODING_BITS_4 = 0 << 4, + PSX_CDROM_SECTOR_XA_CODING_BITS_8 = 1 << 4, + PSX_CDROM_SECTOR_XA_CODING_BITS_MASK = 3 << 4, + PSX_CDROM_SECTOR_XA_CODING_EMPHASIS = 1 << 6 +}; typedef enum { PSX_CDROM_SECTOR_TYPE_MODE1, diff --git a/psxavenc/args.h b/psxavenc/args.h index 9249290..f0fab88 100644 --- a/psxavenc/args.h +++ b/psxavenc/args.h @@ -26,19 +26,19 @@ freely, subject to the following restrictions: #include -#define NUM_FORMATS 11 +#define NUM_FORMATS 11 #define NUM_BS_CODECS 3 enum { - FLAG_IGNORE_OPTIONS = 1 << 0, - FLAG_QUIET = 1 << 1, - FLAG_HIDE_PROGRESS = 1 << 2, - FLAG_PRINT_HELP = 1 << 3, - FLAG_PRINT_VERSION = 1 << 4, - FLAG_SPU_LOOP_END = 1 << 5, + FLAG_IGNORE_OPTIONS = 1 << 0, + FLAG_QUIET = 1 << 1, + FLAG_HIDE_PROGRESS = 1 << 2, + FLAG_PRINT_HELP = 1 << 3, + FLAG_PRINT_VERSION = 1 << 4, + FLAG_SPU_LOOP_END = 1 << 5, FLAG_SPU_NO_LEADING_DUMMY = 1 << 6, - FLAG_BS_IGNORE_ASPECT = 1 << 7, - FLAG_STR_TRAILING_AUDIO = 1 << 8 + FLAG_BS_IGNORE_ASPECT = 1 << 7, + FLAG_STR_TRAILING_AUDIO = 1 << 8 }; typedef enum { diff --git a/psxavenc/decoding.h b/psxavenc/decoding.h index ccf0b65..311cb69 100644 --- a/psxavenc/decoding.h +++ b/psxavenc/decoding.h @@ -67,8 +67,8 @@ typedef struct { } decoder_t; enum { - DECODER_USE_AUDIO = 1 << 0, - DECODER_USE_VIDEO = 1 << 1, + DECODER_USE_AUDIO = 1 << 0, + DECODER_USE_VIDEO = 1 << 1, DECODER_AUDIO_REQUIRED = 1 << 2, DECODER_VIDEO_REQUIRED = 1 << 3 }; diff --git a/psxavenc/filefmt.c b/psxavenc/filefmt.c index 7e508f5..e5d930b 100644 --- a/psxavenc/filefmt.c +++ b/psxavenc/filefmt.c @@ -136,7 +136,10 @@ static void write_vag_header(const args_t *args, int size_per_channel, uint8_t * strncpy((char*)(header + 0x20), &args->output_file[name_offset], 16); } -void encode_file_xa(args_t *args, decoder_t *decoder, FILE *output) { +// The functions below are some peak spaghetti code I would rewrite if that +// didn't also require scrapping the rest of the codebase. -- spicyjpeg + +void encode_file_xa(const args_t *args, decoder_t *decoder, FILE *output) { psx_audio_xa_settings_t xa_settings = args_to_libpsxav_xa_audio(args); int audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings); @@ -187,26 +190,37 @@ void encode_file_xa(args_t *args, decoder_t *decoder, FILE *output) { } } -void encode_file_spu(args_t *args, decoder_t *decoder, FILE *output) { +void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output) { psx_audio_encoder_channel_state_t audio_state; memset(&audio_state, 0, sizeof(psx_audio_encoder_channel_state_t)); - int audio_samples_per_block = psx_audio_spu_get_samples_per_block(); - int block_size = psx_audio_spu_get_buffer_size_per_block(); - int block_count; - // The header must be written after the data as we don't yet know the // number of audio samples. if (args->format == FORMAT_VAG) fseek(output, VAG_HEADER_SIZE, SEEK_SET); - for (block_count = 0; ensure_av_data(decoder, audio_samples_per_block, 0); block_count++) { + uint8_t buffer[PSX_AUDIO_SPU_BLOCK_SIZE]; + int block_count = 0; + + if (!(args->flags & FLAG_SPU_NO_LEADING_DUMMY)) { + // Insert leading silent block + memset(buffer, 0, PSX_AUDIO_SPU_BLOCK_SIZE); + + fwrite(buffer, PSX_AUDIO_SPU_BLOCK_SIZE, 1, output); + block_count++; + } + + int loop_start_block = -1; + + if (args->audio_loop_point >= 0) + loop_start_block = (args->audio_loop_point * args->audio_frequency) / (PSX_AUDIO_SPU_SAMPLES_PER_BLOCK * 1000); + + for (; ensure_av_data(decoder, PSX_AUDIO_SPU_SAMPLES_PER_BLOCK, 0); block_count++) { int samples_length = decoder->audio_sample_count; - if (samples_length > audio_samples_per_block) - samples_length = audio_samples_per_block; + if (samples_length > PSX_AUDIO_SPU_SAMPLES_PER_BLOCK) + samples_length = PSX_AUDIO_SPU_SAMPLES_PER_BLOCK; - uint8_t buffer[16]; int length = psx_audio_spu_encode( &audio_state, decoder->audio_samples, @@ -215,15 +229,10 @@ void encode_file_spu(args_t *args, decoder_t *decoder, FILE *output) { buffer ); - // TODO: implement proper loop flag support - if (false) + if (block_count == loop_start_block) buffer[1] |= PSX_AUDIO_SPU_LOOP_START; - if (decoder->end_of_input) { - if (args->flags & FLAG_SPU_LOOP_END) - buffer[1] |= PSX_AUDIO_SPU_LOOP_REPEAT; - else - buffer[1] |= PSX_AUDIO_SPU_LOOP_END; - } + if ((args->flags & FLAG_SPU_LOOP_END) && decoder->end_of_input) + buffer[1] |= PSX_AUDIO_SPU_LOOP_REPEAT; retire_av_data(decoder, samples_length, 0); fwrite(buffer, length, 1, output); @@ -235,12 +244,21 @@ void encode_file_spu(args_t *args, decoder_t *decoder, FILE *output) { stderr, "\rBlock: %6d | Encoding speed: %5.2fx", block_count, - (double)(block_count * audio_samples_per_block) / (double)(args->audio_frequency * t) + (double)(block_count * PSX_AUDIO_SPU_SAMPLES_PER_BLOCK) / (double)(args->audio_frequency * t) ); } } - int overflow = (block_count * block_size) % args->alignment; + if (!(args->flags & FLAG_SPU_LOOP_END)) { + // Insert trailing looping block + memset(buffer, 0, PSX_AUDIO_SPU_BLOCK_SIZE); + buffer[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END; + + fwrite(buffer, PSX_AUDIO_SPU_BLOCK_SIZE, 1, output); + block_count++; + } + + int overflow = (block_count * PSX_AUDIO_SPU_BLOCK_SIZE) % args->alignment; if (overflow) { for (int i = 0; i < (args->alignment - overflow); i++) @@ -248,15 +266,15 @@ void encode_file_spu(args_t *args, decoder_t *decoder, FILE *output) { } if (args->format == FORMAT_VAG) { uint8_t header[VAG_HEADER_SIZE]; - write_vag_header(args, block_count * block_size, header); + write_vag_header(args, block_count * PSX_AUDIO_SPU_BLOCK_SIZE, header); fseek(output, 0, SEEK_SET); fwrite(header, VAG_HEADER_SIZE, 1, output); } } -void encode_file_spui(args_t *args, decoder_t *decoder, FILE *output) { - int audio_state_size = sizeof(psx_audio_encoder_channel_state_t) * args->audio_channels; +void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) { + int audio_samples_per_chunk = args->audio_interleave / PSX_AUDIO_SPU_BLOCK_SIZE * PSX_AUDIO_SPU_SAMPLES_PER_BLOCK; // NOTE: since the interleaved .vag format is not standardized, some tools // (such as vgmstream) will not properly play files with interleave < 2048, @@ -267,38 +285,52 @@ void encode_file_spui(args_t *args, decoder_t *decoder, FILE *output) { int header_size = VAG_HEADER_SIZE + args->alignment - 1; header_size -= header_size % args->alignment; - int audio_samples_per_block = psx_audio_spu_get_samples_per_block(); - int block_size = psx_audio_spu_get_buffer_size_per_block(); - int audio_samples_per_chunk = args->audio_interleave / block_size * audio_samples_per_block; - int chunk_count; - if (args->format == FORMAT_VAGI) fseek(output, header_size, SEEK_SET); + int audio_state_size = sizeof(psx_audio_encoder_channel_state_t) * args->audio_channels; psx_audio_encoder_channel_state_t *audio_state = malloc(audio_state_size); - uint8_t *buffer = malloc(buffer_size); memset(audio_state, 0, audio_state_size); - for (chunk_count = 0; ensure_av_data(decoder, audio_samples_per_chunk * args->audio_channels, 0); chunk_count++) { + uint8_t *buffer = malloc(buffer_size); + int chunk_count = 0; + + for (; ensure_av_data(decoder, audio_samples_per_chunk * args->audio_channels, 0); chunk_count++) { int samples_length = decoder->audio_sample_count / args->audio_channels; - if (samples_length > audio_samples_per_chunk) samples_length = audio_samples_per_chunk; + int buffer_offset = 0; + + if (samples_length > audio_samples_per_chunk) + samples_length = audio_samples_per_chunk; + + // Insert leading silent block + if (chunk_count == 0 && !(args->flags & FLAG_SPU_NO_LEADING_DUMMY)) { + buffer_offset = PSX_AUDIO_SPU_BLOCK_SIZE; + samples_length -= PSX_AUDIO_SPU_BLOCK_SIZE; + } for (int ch = 0; ch < args->audio_channels; ch++) { memset(buffer, 0, buffer_size); + int length = psx_audio_spu_encode( audio_state + ch, decoder->audio_samples + ch, samples_length, args->audio_channels, - buffer + buffer + buffer_offset ); - if (length) { - // TODO: implement proper loop flag support - if (args->flags & FLAG_SPU_LOOP_END) - buffer[length - block_size + 1] |= PSX_AUDIO_SPU_LOOP_REPEAT; - else if (decoder->end_of_input) - buffer[length - block_size + 1] |= PSX_AUDIO_SPU_LOOP_END; + if (length > 0) { + uint8_t *last_block = buffer + length - PSX_AUDIO_SPU_BLOCK_SIZE; + + if (args->flags & FLAG_SPU_LOOP_END) { + last_block[1] = PSX_AUDIO_SPU_LOOP_REPEAT; + } else if (decoder->end_of_input) { + // HACK: the trailing block should in theory be appended to + // the existing data, but it's easier to just zerofill and + // repurpose the last encoded block + memset(last_block, 0, PSX_AUDIO_SPU_BLOCK_SIZE); + last_block[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END; + } } fwrite(buffer, buffer_size, 1, output); @@ -332,10 +364,9 @@ void encode_file_spui(args_t *args, decoder_t *decoder, FILE *output) { free(buffer); } -void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) { +void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) { psx_audio_xa_settings_t xa_settings = args_to_libpsxav_xa_audio(args); int audio_samples_per_sector; - uint8_t buffer[PSX_CDROM_SECTOR_SIZE]; int offset, sector_size; @@ -349,7 +380,8 @@ void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) { int interleave; int video_sectors_per_block; - if (decoder->state.audio_stream) { + + if (decoder->state.audio_stream != NULL) { // 1/N audio, (N-1)/N video audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings); interleave = psx_audio_xa_get_sector_interleave(xa_settings) * args->str_cd_speed; @@ -399,16 +431,24 @@ void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) { for (int j = 0; !decoder->end_of_input || encoder.state.frame_data_offset < encoder.state.frame_max_size; j++) { ensure_av_data(decoder, audio_samples_per_sector * args->audio_channels, frames_needed); - if ((j%interleave) < video_sectors_per_block) { - // Video sector + uint8_t buffer[PSX_CDROM_SECTOR_SIZE]; + bool is_video_sector; + + if (args->flags & FLAG_STR_TRAILING_AUDIO) + is_video_sector = (j % interleave) < video_sectors_per_block; + else + is_video_sector = (j % interleave) > 0; + + if (is_video_sector) { init_sector_buffer_video(args, (psx_cdrom_sector_mode2_t*) buffer, j); int frames_used = encode_sector_str(&encoder, args->format, decoder->video_frames, buffer); retire_av_data(decoder, 0, frames_used); } else { - // Audio sector int samples_length = decoder->audio_sample_count / args->audio_channels; - if (samples_length > audio_samples_per_sector) samples_length = audio_samples_per_sector; + + if (samples_length > audio_samples_per_sector) + samples_length = audio_samples_per_sector; // FIXME: this is an extremely hacky way to handle audio tracks // shorter than the video track @@ -438,7 +478,7 @@ void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) { buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4); } - if((j%interleave) < video_sectors_per_block) + if (is_video_sector) psx_cdrom_calculate_checksums((psx_cdrom_sector_t *)buffer, PSX_CDROM_SECTOR_TYPE_MODE2_FORM1); fwrite(buffer + offset, sector_size, 1, output); @@ -461,7 +501,11 @@ void encode_file_str(args_t *args, decoder_t *decoder, FILE *output) { destroy_mdec_encoder(&encoder); } -void encode_file_sbs(args_t *args, decoder_t *decoder, FILE *output) { +void encode_file_strspu(const args_t *args, decoder_t *decoder, FILE *output) { + // TODO: implement +} + +void encode_file_sbs(const args_t *args, decoder_t *decoder, FILE *output) { mdec_encoder_t encoder; init_mdec_encoder(&encoder, args->video_codec, args->video_width, args->video_height); diff --git a/psxavenc/filefmt.h b/psxavenc/filefmt.h index 5f8eb38..9276160 100644 --- a/psxavenc/filefmt.h +++ b/psxavenc/filefmt.h @@ -32,4 +32,5 @@ void encode_file_xa(const args_t *args, decoder_t *decoder, FILE *output); void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output); void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output); void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output); +void encode_file_strspu(const args_t *args, decoder_t *decoder, FILE *output); void encode_file_sbs(const args_t *args, decoder_t *decoder, FILE *output); diff --git a/psxavenc/main.c b/psxavenc/main.c index 78c0935..277aa26 100644 --- a/psxavenc/main.c +++ b/psxavenc/main.c @@ -120,7 +120,6 @@ int main(int argc, const char **argv) { case FORMAT_STR: case FORMAT_STRCD: - case FORMAT_STRSPU: case FORMAT_STRV: if (!(args.flags & FLAG_QUIET)) { if (decoder.state.audio_stream) @@ -147,6 +146,30 @@ int main(int argc, const char **argv) { encode_file_str(&args, &decoder, output); break; + case FORMAT_STRSPU: + if (!(args.flags & FLAG_QUIET)) { + if (decoder.state.audio_stream) + fprintf( + stderr, + "Audio format: SPU-ADPCM, %d Hz %d channels, interleave=%d\n", + args.audio_frequency, + args.audio_channels, + args.audio_interleave + ); + + fprintf( + stderr, + "Video format: %s, %dx%d, %.2f fps\n", + bs_codec_names[args.video_codec], + args.video_width, + args.video_height, + (double)args.str_fps_num / (double)args.str_fps_den + ); + } + + encode_file_strspu(&args, &decoder, output); + break; + case FORMAT_SBS: if (!(args.flags & FLAG_QUIET)) fprintf( diff --git a/psxavenc/mdec.c b/psxavenc/mdec.c index 0d945c3..2221764 100644 --- a/psxavenc/mdec.c +++ b/psxavenc/mdec.c @@ -23,6 +23,7 @@ freely, subject to the following restrictions: */ #include +#include #include #include #include @@ -236,14 +237,16 @@ static const uint8_t dct_zagzig_table[8*8] = { }; #if 0 -#define SF0 0x5a82 // cos(0/16 * pi) * sqrt(2) -#define SF1 0x7d8a // cos(1/16 * pi) * 2 -#define SF2 0x7641 // cos(2/16 * pi) * 2 -#define SF3 0x6a6d // cos(3/16 * pi) * 2 -#define SF4 0x5a82 // cos(4/16 * pi) * 2 -#define SF5 0x471c // cos(5/16 * pi) * 2 -#define SF6 0x30fb // cos(6/16 * pi) * 2 -#define SF7 0x18f8 // cos(7/16 * pi) * 2 +enum { + SF0 = 0x5a82, // cos(0/16 * pi) * sqrt(2) + SF1 = 0x7d8a, // cos(1/16 * pi) * 2 + SF2 = 0x7641, // cos(2/16 * pi) * 2 + SF3 = 0x6a6d, // cos(3/16 * pi) * 2 + SF4 = 0x5a82, // cos(4/16 * pi) * 2 + SF5 = 0x471c, // cos(5/16 * pi) * 2 + SF6 = 0x30fb, // cos(6/16 * pi) * 2 + SF7 = 0x18f8 // cos(7/16 * pi) * 2 +}; static const int16_t dct_scale_table[8*8] = { SF0, SF0, SF0, SF0, SF0, SF0, SF0, SF0, @@ -525,7 +528,9 @@ bool init_mdec_encoder(mdec_encoder_t *encoder, bs_codec_t video_codec, int vide if ( state->dct_context == NULL || state->ac_huffman_map == NULL || - state->coeff_clamp_map == NULL + state->dc_huffman_map == NULL || + state->coeff_clamp_map == NULL || + state->delta_clamp_map == NULL ) return false; @@ -536,7 +541,7 @@ bool init_mdec_encoder(mdec_encoder_t *encoder, bs_codec_t video_codec, int vide for (int i = 0; i < 6; i++) { state->dct_block_lists[i] = malloc(dct_block_size); - if (!state->dct_block_lists[i]) + if (state->dct_block_lists[i] == NULL) return false; } From 24d37145c60d6a4ffad152c28ff2bebc4730656a Mon Sep 17 00:00:00 2001 From: spicyjpeg Date: Sun, 2 Mar 2025 20:15:06 +0100 Subject: [PATCH 6/8] Bugfixes, add -T and -A options --- psxavenc/args.c | 33 ++++--- psxavenc/args.h | 2 + psxavenc/decoding.c | 171 +++++++++++++++++++------------------ psxavenc/filefmt.c | 102 +++++++++++----------- psxavenc/main.c | 2 +- psxavenc/mdec.c | 203 ++++++++++++++++++++++---------------------- psxavenc/mdec.h | 9 +- 7 files changed, 271 insertions(+), 251 deletions(-) diff --git a/psxavenc/args.c b/psxavenc/args.c index 8c92346..fb74a1f 100644 --- a/psxavenc/args.c +++ b/psxavenc/args.c @@ -123,9 +123,9 @@ static const char *const general_options_help = " spui: [A.] raw SPU-ADPCM interleaved data\n" " vag: [A.] .vag SPU-ADPCM mono\n" " vagi: [A.] .vag SPU-ADPCM interleaved\n" - " str: [AV] .str video, 2336-byte sectors\n" - " strcd: [AV] .str video, 2352-byte sectors\n" - " strspu: [AV] .str video, 2048-byte sectors\n" + " str: [AV] .str video + XA-ADPCM, 2336-byte sectors\n" + " strcd: [AV] .str video + XA-ADPCM, 2352-byte sectors\n" + " strspu: [AV] .str video + SPU-ADPCM, 2048-byte sectors\n" " strv: [.V] .str video, 2048-byte sectors\n" " sbs: [.V] .sbs video\n" " -R key=value,... Pass custom options to libswresample (see FFmpeg docs)\n" @@ -148,12 +148,15 @@ static const char *const format_names[NUM_FORMATS] = { static void init_default_args(args_t *args) { if ( - args->format == FORMAT_XA || args->format == FORMAT_XACD || - args->format == FORMAT_STR || args->format == FORMAT_STRCD + args->format == FORMAT_XA || + args->format == FORMAT_XACD || + args->format == FORMAT_STR || + args->format == FORMAT_STRCD ) args->audio_frequency = 37800; else args->audio_frequency = 44100; + if (args->format == FORMAT_SPU || args->format == FORMAT_VAG) args->audio_channels = 1; else @@ -172,11 +175,13 @@ static void init_default_args(args_t *args) { args->str_fps_num = 15; args->str_fps_den = 1; args->str_cd_speed = 2; + args->str_video_id = 0x8001; + args->str_audio_id = 0x0001; if (args->format == FORMAT_SPU || args->format == FORMAT_VAG) - args->alignment = 64; + args->alignment = 64; // Default SPU DMA chunk size else if (args->format == FORMAT_SBS) - args->alignment = 8192; + args->alignment = 8192; // Default for System 573 games else args->alignment = 2048; } @@ -264,7 +269,7 @@ static int parse_xa_option(args_t *args, char option, const char *param) { } static const char *const spu_options_help = - "SPU-ADPCM options:\n" + "Mono SPU-ADPCM options:\n" " [-f freq] [-a size] [-l ms | -L] [-D]\n" "\n" " -f freq Use specified sample rate (default 44100)\n" @@ -411,11 +416,13 @@ static int parse_bs_option(args_t *args, char option, const char *param) { static const char *const str_options_help = ".str container options:\n" - " [-r num[/den]] [-x 1|2] [-A]\n" + " [-r num[/den]] [-x 1|2] [-T id] [-A id] [-X]\n" "\n" " -r num[/den] Set video frame rate to specified integer or fraction (default 15)\n" " -x 1|2 Set CD-ROM speed the file is meant to played at (default 2)\n" - " -A Place audio sectors after corresponding video sectors\n" + " -T id Tag video sectors with specified .str type ID (default 0x8001)\n" + " -A id Tag SPU-ADPCM sectors with specified .str type ID (default 0x0001)\n" + " -X Place audio sectors after corresponding video sectors\n" " (rather than ahead of them)\n" "\n"; @@ -453,7 +460,13 @@ static int parse_str_option(args_t *args, char option, const char *param) { case 'x': return parse_int_one_of(&(args->str_cd_speed), "CD-ROM speed", param, 1, 2); + case 'T': + return parse_int(&(args->str_video_id), "video track type ID", param, 0x0000, 0xFFFF); + case 'A': + return parse_int(&(args->str_audio_id), "audio track type ID", param, 0x0000, 0xFFFF); + + case 'X': args->flags |= FLAG_STR_TRAILING_AUDIO; return 1; diff --git a/psxavenc/args.h b/psxavenc/args.h index f0fab88..d313646 100644 --- a/psxavenc/args.h +++ b/psxavenc/args.h @@ -87,6 +87,8 @@ typedef struct { int str_fps_num; int str_fps_den; int str_cd_speed; // 1 or 2 + int str_video_id; + int str_audio_id; int alignment; } args_t; diff --git a/psxavenc/decoding.c b/psxavenc/decoding.c index a29e90a..a9cec89 100644 --- a/psxavenc/decoding.c +++ b/psxavenc/decoding.c @@ -36,27 +36,22 @@ freely, subject to the following restrictions: #include "args.h" #include "decoding.h" -static int decode_frame( - AVCodecContext *codec, - AVFrame *frame, - int *frame_size, - AVPacket *packet -) { +static bool decode_frame(AVCodecContext *codec, AVFrame *frame, int *frame_size, AVPacket *packet) { if (packet != NULL) { if (avcodec_send_packet(codec, packet) != 0) - return 0; + return false; } int ret = avcodec_receive_frame(codec, frame); if (ret >= 0) { *frame_size = ret; - return 1; - } else if (ret == AVERROR(EAGAIN)) { - return 1; - } else { - return 0; + return true; } + if (ret == AVERROR(EAGAIN)) + return true; + + return false; } bool open_av_data(decoder_t *decoder, const args_t *args, int flags) { @@ -261,35 +256,39 @@ bool open_av_data(decoder_t *decoder, const args_t *args, int flags) { static void poll_av_packet_audio(decoder_t *decoder, AVPacket *packet) { decoder_state_t *av = &(decoder->state); - int frame_size, frame_sample_count; - uint8_t *buffer[1]; + int frame_size; - if (decode_frame(av->audio_codec_context, av->frame, &frame_size, packet)) { - size_t buffer_size = sizeof(int16_t) * av->sample_count_mul * swr_get_out_samples(av->resampler, av->frame->nb_samples); + if (!decode_frame(av->audio_codec_context, av->frame, &frame_size, packet)) + return; - buffer[0] = malloc(buffer_size); - memset(buffer[0], 0, buffer_size); + int frame_sample_count = swr_get_out_samples(av->resampler, av->frame->nb_samples); - frame_sample_count = swr_convert( - av->resampler, - buffer, - av->frame->nb_samples, - (const uint8_t**)av->frame->data, - av->frame->nb_samples - ); + if (frame_sample_count == 0) + return; - decoder->audio_samples = realloc( - decoder->audio_samples, - (decoder->audio_sample_count + ((frame_sample_count + 4032) * av->sample_count_mul)) * sizeof(int16_t) - ); - memmove( - &(decoder->audio_samples[decoder->audio_sample_count]), - buffer[0], - sizeof(int16_t) * frame_sample_count * av->sample_count_mul - ); - decoder->audio_sample_count += frame_sample_count * av->sample_count_mul; - free(buffer[0]); - } + size_t buffer_size = sizeof(int16_t) * av->sample_count_mul * frame_sample_count; + uint8_t *buffer = malloc(buffer_size); + memset(buffer, 0, buffer_size); + + frame_sample_count = swr_convert( + av->resampler, + &buffer, + frame_sample_count, + (const uint8_t**)av->frame->data, + av->frame->nb_samples + ); + + decoder->audio_samples = realloc( + decoder->audio_samples, + (decoder->audio_sample_count + ((frame_sample_count + 4032) * av->sample_count_mul)) * sizeof(int16_t) + ); + memmove( + &(decoder->audio_samples[decoder->audio_sample_count]), + buffer, + sizeof(int16_t) * frame_sample_count * av->sample_count_mul + ); + decoder->audio_sample_count += frame_sample_count * av->sample_count_mul; + free(buffer); } static void poll_av_packet_video(decoder_t *decoder, AVPacket *packet) { @@ -303,63 +302,63 @@ static void poll_av_packet_video(decoder_t *decoder, AVPacket *packet) { decoder->video_width, decoder->video_width }; - if (decode_frame(av->video_codec_context, av->frame, &frame_size, packet)) { - if (!av->frame->width || !av->frame->height || !av->frame->data[0]) - return; + if (!decode_frame(av->video_codec_context, av->frame, &frame_size, packet)) + return; + if (!av->frame->width || !av->frame->height || !av->frame->data[0]) + return; - // Some files seem to have timestamps starting from a negative value - // (but otherwise valid) for whatever reason. - double pts = - ((double)av->frame->pts * (double)av->video_stream->time_base.num) - / av->video_stream->time_base.den; + // Some files seem to have timestamps starting from a negative value + // (but otherwise valid) for whatever reason. + double pts = + ((double)av->frame->pts * (double)av->video_stream->time_base.num) + / av->video_stream->time_base.den; #if 0 - if (pts < 0.0) - return; + if (pts < 0.0) + return; #endif - if (decoder->video_frame_count >= 1 && pts < av->video_next_pts) - return; - if (decoder->video_frame_count < 1) - av->video_next_pts = pts; - else - av->video_next_pts += pts_step; + if (decoder->video_frame_count >= 1 && pts < av->video_next_pts) + return; + if (decoder->video_frame_count < 1) + av->video_next_pts = pts; + else + av->video_next_pts += pts_step; - //fprintf(stderr, "%d %f %f %f\n", decoder->video_frame_count, pts, av->video_next_pts, pts_step); + //fprintf(stderr, "%d %f %f %f\n", decoder->video_frame_count, pts, av->video_next_pts, pts_step); - // Insert duplicate frames if the frame rate of the input stream is - // lower than the target frame rate. - int dupe_frames = (int) ceil((pts - av->video_next_pts) / pts_step); - if (dupe_frames < 0) dupe_frames = 0; - decoder->video_frames = realloc( - decoder->video_frames, - (decoder->video_frame_count + dupe_frames + 1) * av->video_frame_dst_size + // Insert duplicate frames if the frame rate of the input stream is + // lower than the target frame rate. + int dupe_frames = (int) ceil((pts - av->video_next_pts) / pts_step); + if (dupe_frames < 0) dupe_frames = 0; + decoder->video_frames = realloc( + decoder->video_frames, + (decoder->video_frame_count + dupe_frames + 1) * av->video_frame_dst_size + ); + + for (; dupe_frames; dupe_frames--) { + memcpy( + (decoder->video_frames) + av->video_frame_dst_size * decoder->video_frame_count, + (decoder->video_frames) + av->video_frame_dst_size * (decoder->video_frame_count - 1), + av->video_frame_dst_size ); - - for (; dupe_frames; dupe_frames--) { - memcpy( - (decoder->video_frames) + av->video_frame_dst_size * decoder->video_frame_count, - (decoder->video_frames) + av->video_frame_dst_size * (decoder->video_frame_count - 1), - av->video_frame_dst_size - ); - decoder->video_frame_count += 1; - av->video_next_pts += pts_step; - } - - uint8_t *dst_frame = decoder->video_frames + av->video_frame_dst_size * decoder->video_frame_count; - uint8_t *dst_pointers[2] = { - dst_frame, dst_frame + plane_size - }; - sws_scale( - av->scaler, - (const uint8_t *const *) av->frame->data, - av->frame->linesize, - 0, - av->frame->height, - dst_pointers, - dst_strides - ); - decoder->video_frame_count += 1; + av->video_next_pts += pts_step; } + + uint8_t *dst_frame = decoder->video_frames + av->video_frame_dst_size * decoder->video_frame_count; + uint8_t *dst_pointers[2] = { + dst_frame, dst_frame + plane_size + }; + sws_scale( + av->scaler, + (const uint8_t *const *) av->frame->data, + av->frame->linesize, + 0, + av->frame->height, + dst_pointers, + dst_strides + ); + + decoder->video_frame_count += 1; } bool poll_av_data(decoder_t *decoder) { diff --git a/psxavenc/filefmt.c b/psxavenc/filefmt.c index e5d930b..3f6ce45 100644 --- a/psxavenc/filefmt.c +++ b/psxavenc/filefmt.c @@ -104,15 +104,15 @@ static void write_vag_header(const args_t *args, int size_per_channel, uint8_t * if (args->format == FORMAT_VAGI) { header[0x08] = (uint8_t)args->audio_interleave; header[0x09] = (uint8_t)(args->audio_interleave >> 8); - header[0x0a] = (uint8_t)(args->audio_interleave >> 16); - header[0x0b] = (uint8_t)(args->audio_interleave >> 24); + header[0x0A] = (uint8_t)(args->audio_interleave >> 16); + header[0x0B] = (uint8_t)(args->audio_interleave >> 24); } // Length of data for each channel (big-endian) - header[0x0c] = (uint8_t)(size_per_channel >> 24); - header[0x0d] = (uint8_t)(size_per_channel >> 16); - header[0x0e] = (uint8_t)(size_per_channel >> 8); - header[0x0f] = (uint8_t)size_per_channel; + header[0x0C] = (uint8_t)(size_per_channel >> 24); + header[0x0D] = (uint8_t)(size_per_channel >> 16); + header[0x0E] = (uint8_t)(size_per_channel >> 8); + header[0x0F] = (uint8_t)size_per_channel; // Sample rate (big-endian) header[0x10] = (uint8_t)(args->audio_frequency >> 24); @@ -121,8 +121,8 @@ static void write_vag_header(const args_t *args, int size_per_channel, uint8_t * header[0x13] = (uint8_t)args->audio_frequency; // Number of channels (little-endian) - header[0x1e] = (uint8_t)args->audio_channels; - header[0x1f] = 0x00; + header[0x1E] = (uint8_t)args->audio_channels; + header[0x1F] = 0x00; // Filename int name_offset = strlen(args->output_file); @@ -213,7 +213,7 @@ void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output) { int loop_start_block = -1; if (args->audio_loop_point >= 0) - loop_start_block = (args->audio_loop_point * args->audio_frequency) / (PSX_AUDIO_SPU_SAMPLES_PER_BLOCK * 1000); + loop_start_block = block_count + (args->audio_loop_point * args->audio_frequency) / (PSX_AUDIO_SPU_SAMPLES_PER_BLOCK * 1000); for (; ensure_av_data(decoder, PSX_AUDIO_SPU_SAMPLES_PER_BLOCK, 0); block_count++) { int samples_length = decoder->audio_sample_count; @@ -279,7 +279,7 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) { // NOTE: since the interleaved .vag format is not standardized, some tools // (such as vgmstream) will not properly play files with interleave < 2048, // alignment != 2048 or channels != 2. - int buffer_size = args->audio_interleave + args->alignment - 1; + int buffer_size = args->audio_interleave * args->audio_channels + args->alignment - 1; buffer_size -= buffer_size % args->alignment; int header_size = VAG_HEADER_SIZE + args->alignment - 1; @@ -297,30 +297,30 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) { for (; ensure_av_data(decoder, audio_samples_per_chunk * args->audio_channels, 0); chunk_count++) { int samples_length = decoder->audio_sample_count / args->audio_channels; - int buffer_offset = 0; if (samples_length > audio_samples_per_chunk) samples_length = audio_samples_per_chunk; + memset(buffer, 0, buffer_size); + uint8_t *buffer_ptr = buffer; + // Insert leading silent block if (chunk_count == 0 && !(args->flags & FLAG_SPU_NO_LEADING_DUMMY)) { - buffer_offset = PSX_AUDIO_SPU_BLOCK_SIZE; - samples_length -= PSX_AUDIO_SPU_BLOCK_SIZE; + buffer_ptr += PSX_AUDIO_SPU_BLOCK_SIZE; + samples_length -= PSX_AUDIO_SPU_SAMPLES_PER_BLOCK; } - for (int ch = 0; ch < args->audio_channels; ch++) { - memset(buffer, 0, buffer_size); - + for (int ch = 0; ch < args->audio_channels; ch++, buffer_ptr += args->audio_interleave) { int length = psx_audio_spu_encode( audio_state + ch, decoder->audio_samples + ch, samples_length, args->audio_channels, - buffer + buffer_offset + buffer_ptr ); if (length > 0) { - uint8_t *last_block = buffer + length - PSX_AUDIO_SPU_BLOCK_SIZE; + uint8_t *last_block = buffer_ptr + length - PSX_AUDIO_SPU_BLOCK_SIZE; if (args->flags & FLAG_SPU_LOOP_END) { last_block[1] = PSX_AUDIO_SPU_LOOP_REPEAT; @@ -332,24 +332,27 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) { last_block[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END; } } - - fwrite(buffer, buffer_size, 1, output); - - time_t t = get_elapsed_time(); - - if (!(args->flags & FLAG_HIDE_PROGRESS) && t) { - fprintf( - stderr, - "\rChunk: %6d | Encoding speed: %5.2fx", - chunk_count, - (double)(chunk_count * audio_samples_per_chunk) / (double)(args->audio_frequency * t) - ); - } } retire_av_data(decoder, samples_length * args->audio_channels, 0); + fwrite(buffer, buffer_size, 1, output); + + time_t t = get_elapsed_time(); + + if (!(args->flags & FLAG_HIDE_PROGRESS) && t) { + fprintf( + stderr, + "\rChunk: %6d | Encoding speed: %5.2fx", + chunk_count, + (double)(chunk_count * audio_samples_per_chunk) / (double)(args->audio_frequency * t) + ); + } + } + free(audio_state); + free(buffer); + if (args->format == FORMAT_VAGI) { uint8_t *header = malloc(header_size); memset(header, 0, header_size); @@ -359,32 +362,20 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) { fwrite(header, header_size, 1, output); free(header); } - - free(audio_state); - free(buffer); } void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) { psx_audio_xa_settings_t xa_settings = args_to_libpsxav_xa_audio(args); - int audio_samples_per_sector; - - int offset, sector_size; - - if (args->format == FORMAT_STRV) { - sector_size = 2048; - offset = 0x18; - } else { - sector_size = psx_audio_xa_get_buffer_size_per_sector(xa_settings); - offset = PSX_CDROM_SECTOR_SIZE - sector_size; - } + int sector_size = psx_audio_xa_get_buffer_size_per_sector(xa_settings); int interleave; + int audio_samples_per_sector; int video_sectors_per_block; if (decoder->state.audio_stream != NULL) { // 1/N audio, (N-1)/N video - audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings); interleave = psx_audio_xa_get_sector_interleave(xa_settings) * args->str_cd_speed; + audio_samples_per_sector = psx_audio_xa_get_samples_per_sector(xa_settings); video_sectors_per_block = interleave - 1; if (!(args->flags & FLAG_QUIET)) @@ -398,8 +389,8 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) { ); } else { // 0/1 audio, 1/1 video - audio_samples_per_sector = 0; interleave = 1; + audio_samples_per_sector = 0; video_sectors_per_block = 1; } @@ -426,7 +417,9 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) { // FIXME: this needs an extra frame to prevent A/V desync int frames_needed = (int) ceil((double)video_sectors_per_block / frame_size); - if (frames_needed < 2) frames_needed = 2; + + if (frames_needed < 2) + frames_needed = 2; for (int j = 0; !decoder->end_of_input || encoder.state.frame_data_offset < encoder.state.frame_max_size; j++) { ensure_av_data(decoder, audio_samples_per_sector * args->audio_channels, frames_needed); @@ -440,9 +433,16 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) { is_video_sector = (j % interleave) > 0; if (is_video_sector) { - init_sector_buffer_video(args, (psx_cdrom_sector_mode2_t*) buffer, j); + init_sector_buffer_video(args, (psx_cdrom_sector_mode2_t*)buffer, j); + + int frames_used = encode_sector_str( + &encoder, + args->format, + args->str_video_id, + decoder->video_frames, + buffer + ); - int frames_used = encode_sector_str(&encoder, args->format, decoder->video_frames, buffer); retire_av_data(decoder, 0, frames_used); } else { int samples_length = decoder->audio_sample_count / args->audio_channels; @@ -481,7 +481,7 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) { if (is_video_sector) psx_cdrom_calculate_checksums((psx_cdrom_sector_t *)buffer, PSX_CDROM_SECTOR_TYPE_MODE2_FORM1); - fwrite(buffer + offset, sector_size, 1, output); + fwrite(buffer + PSX_CDROM_SECTOR_SIZE - sector_size, sector_size, 1, output); time_t t = get_elapsed_time(); diff --git a/psxavenc/main.c b/psxavenc/main.c index 277aa26..0f5e225 100644 --- a/psxavenc/main.c +++ b/psxavenc/main.c @@ -120,7 +120,6 @@ int main(int argc, const char **argv) { case FORMAT_STR: case FORMAT_STRCD: - case FORMAT_STRV: if (!(args.flags & FLAG_QUIET)) { if (decoder.state.audio_stream) fprintf( @@ -147,6 +146,7 @@ int main(int argc, const char **argv) { break; case FORMAT_STRSPU: + case FORMAT_STRV: if (!(args.flags & FLAG_QUIET)) { if (decoder.state.audio_stream) fprintf( diff --git a/psxavenc/mdec.c b/psxavenc/mdec.c index 2221764..3587ce1 100644 --- a/psxavenc/mdec.c +++ b/psxavenc/mdec.c @@ -32,13 +32,6 @@ freely, subject to the following restrictions: #include "args.h" #include "mdec.h" -// https://stackoverflow.com/a/60011209 -#if 0 -#define DIVIDE_ROUNDED(n, d) (((n) >= 0) ? (((n) + (d)/2) / (d)) : (((n) - (d)/2) / (d))) -#else -#define DIVIDE_ROUNDED(n, d) ((int)round((double)(n) / (double)(d))) -#endif - #define AC_PAIR(zeroes, value) \ (((zeroes) << 10) | ((+(value)) & 0x3FF)), \ (((zeroes) << 10) | ((-(value)) & 0x3FF)) @@ -166,39 +159,31 @@ static const struct { static const struct { int c_bits; uint32_t c_value; - int sign_bits; - int value_bits; + int dc_bits; } dc_c_huffman_tree[] = { - {2, 0x0, 0, 0}, - {2, 0x1, 1, 0}, - {2, 0x2, 1, 1}, - {3, 0x6, 1, 2}, - {4, 0xE, 1, 3}, - {5, 0x1E, 1, 4}, - {6, 0x3E, 1, 5}, - {7, 0x7E, 1, 6}, - {8, 0xFE, 1, 7}, + {2, 0x1, 0}, + {2, 0x2, 1}, + {3, 0x6, 2}, + {4, 0xE, 3}, + {5, 0x1E, 4}, + {6, 0x3E, 5}, + {7, 0x7E, 6}, + {8, 0xFE, 7} }; static const struct { int c_bits; uint32_t c_value; - int sign_bits; - int value_bits; + int dc_bits; } dc_y_huffman_tree[] = { - {3, 0x4, 0, 0}, - {2, 0x0, 1, 0}, - {2, 0x1, 1, 1}, - {3, 0x5, 1, 2}, - {3, 0x6, 1, 3}, - {4, 0xE, 1, 4}, - {5, 0x1E, 1, 5}, - {6, 0x3E, 1, 6}, - {7, 0x7E, 1, 7}, -}; - -static const uint8_t dc_coeff_indices[6] = { - 0, 1, 2, 2, 2, 2 + {2, 0x0, 0}, + {2, 0x1, 1}, + {3, 0x5, 2}, + {3, 0x6, 3}, + {4, 0xE, 4}, + {5, 0x1E, 5}, + {6, 0x3E, 6}, + {7, 0x7E, 7} }; static const uint8_t quant_dec[8*8] = { @@ -260,82 +245,75 @@ static const int16_t dct_scale_table[8*8] = { }; #endif +enum { + INDEX_CR, + INDEX_CB, + INDEX_Y +}; + +#define HUFFMAN_CODE(bits, value) (((bits) << 24) | (value)) + static void init_dct_data(mdec_encoder_state_t *state, bs_codec_t codec) { for(int i = 0; i <= 0xFFFF; i++) { - // high 8 bits = bit count - // low 24 bits = value - state->ac_huffman_map[i] = ((6+16)<<24)|((0x01<<16)|(i)); + state->ac_huffman_map[i] = HUFFMAN_CODE(6 + 16, (0x1 << 16) | i); int16_t coeff = (int16_t)i; + if (coeff < -0x200) coeff = -0x200; else if (coeff > +0x1FE) coeff = +0x1FE; // 0x1FF = v2 end of frame state->coeff_clamp_map[i] = coeff; - - int16_t delta = (int16_t)DIVIDE_ROUNDED(i, 4); - if (delta < -0xFF) - delta = -0xFF; - else if (delta > +0xFF) - delta = +0xFF; - - // Some versions of Sony's BS v3 decoder compute each DC coefficient as - // ((last + delta * 4) & 0x3FF) instead of just (last + delta * 4). The - // encoder can leverage this behavior to represent large coefficient - // differences as smaller deltas that cause the decoder to overflow and - // wrap around (e.g. -1 to encode -512 -> 511 as opposed to +1023). This - // saves some space as larger DC values take up more bits. - if (codec == BS_CODEC_V3DC) { - if (delta > +0x80) - delta -= 0x100; - } - - state->delta_clamp_map[i] = delta; } + state->dc_huffman_map[(INDEX_CR << 9) | 0] = HUFFMAN_CODE(2, 0x0); + state->dc_huffman_map[(INDEX_CB << 9) | 0] = HUFFMAN_CODE(2, 0x0); + state->dc_huffman_map[(INDEX_Y << 9) | 0] = HUFFMAN_CODE(3, 0x4); + int ac_tree_item_count = sizeof(ac_huffman_tree) / sizeof(ac_huffman_tree[0]); int dc_c_tree_item_count = sizeof(dc_c_huffman_tree) / sizeof(dc_c_huffman_tree[0]); int dc_y_tree_item_count = sizeof(dc_y_huffman_tree) / sizeof(dc_y_huffman_tree[0]); for (int i = 0; i < ac_tree_item_count; i++) { - int bits = ac_huffman_tree[i].c_bits+1; + int bits = ac_huffman_tree[i].c_bits + 1; uint32_t base_value = ac_huffman_tree[i].c_value; - state->ac_huffman_map[ac_huffman_tree[i].u_hword_pos] = (bits << 24) | (base_value << 1) | 0; - state->ac_huffman_map[ac_huffman_tree[i].u_hword_neg] = (bits << 24) | (base_value << 1) | 1; + state->ac_huffman_map[ac_huffman_tree[i].u_hword_pos] = HUFFMAN_CODE(bits, (base_value << 1) | 0); + state->ac_huffman_map[ac_huffman_tree[i].u_hword_neg] = HUFFMAN_CODE(bits, (base_value << 1) | 1); } for (int i = 0; i < dc_c_tree_item_count; i++) { - int dc_bits = dc_c_huffman_tree[i].sign_bits + dc_c_huffman_tree[i].value_bits; - int bits = dc_c_huffman_tree[i].c_bits + dc_bits; - uint32_t base_value = dc_c_huffman_tree[i].c_value << dc_bits; + int dc_bits = dc_c_huffman_tree[i].dc_bits; + int bits = dc_c_huffman_tree[i].c_bits + 1 + dc_bits; + uint32_t base_value = dc_c_huffman_tree[i].c_value; + + int pos_offset = 1 << dc_bits; + int neg_offset = 1 - (1 << (dc_bits + 1)); for (int j = 0; j < (1 << dc_bits); j++) { - int delta = j; + int pos = (j + pos_offset) & 0x1FF; + int neg = (j + neg_offset) & 0x1FF; - if ((j >> dc_c_huffman_tree[i].value_bits) == 0) { - delta -= (1 << dc_bits) - 1; - delta &= 0x1FF; - } - - state->dc_huffman_map[(0 << 9) | delta] = (bits << 24) | base_value | j; - state->dc_huffman_map[(1 << 9) | delta] = (bits << 24) | base_value | j; + state->dc_huffman_map[(INDEX_CR << 9) | pos] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (1 << dc_bits) | j); + state->dc_huffman_map[(INDEX_CR << 9) | neg] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (0 << dc_bits) | j); + state->dc_huffman_map[(INDEX_CB << 9) | pos] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (1 << dc_bits) | j); + state->dc_huffman_map[(INDEX_CB << 9) | neg] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (0 << dc_bits) | j); } } for (int i = 0; i < dc_y_tree_item_count; i++) { - int dc_bits = dc_y_huffman_tree[i].sign_bits + dc_y_huffman_tree[i].value_bits; - int bits = dc_y_huffman_tree[i].c_bits + dc_bits; - uint32_t base_value = dc_y_huffman_tree[i].c_value << dc_bits; + int dc_bits = dc_y_huffman_tree[i].dc_bits; + int bits = dc_y_huffman_tree[i].c_bits + 1 + dc_bits; + uint32_t base_value = dc_y_huffman_tree[i].c_value; + + int pos_offset = 1 << dc_bits; + int neg_offset = 1 - (1 << (dc_bits + 1)); for (int j = 0; j < (1 << dc_bits); j++) { - int delta = j; + int pos = (j + pos_offset) & 0x1FF; + int neg = (j + neg_offset) & 0x1FF; - if ((j >> dc_y_huffman_tree[i].value_bits) == 0) { - delta -= (1 << dc_bits) - 1; - delta &= 0x1FF; - } - - state->dc_huffman_map[(2 << 9) | delta] = (bits << 24) | base_value | j; + state->dc_huffman_map[(INDEX_Y << 9) | pos] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (1 << dc_bits) | j); + state->dc_huffman_map[(INDEX_Y << 9) | neg] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (0 << dc_bits) | j); } } } @@ -453,6 +431,13 @@ static int reduce_dct_block(mdec_encoder_state_t *state, int32_t *block, int32_t } #endif +// https://stackoverflow.com/a/60011209 +#if 0 +#define DIVIDE_ROUNDED(n, d) (((n) >= 0) ? (((n) + (d)/2) / (d)) : (((n) - (d)/2) / (d))) +#else +#define DIVIDE_ROUNDED(n, d) ((int)round((double)(n) / (double)(d))) +#endif + static bool encode_dct_block( mdec_encoder_state_t *state, bs_codec_t codec, @@ -467,11 +452,26 @@ static bool encode_dct_block( if (!encode_bits(state, 10, dc & 0x3FF)) return false; } else { - int index = dc_coeff_indices[state->block_type]; - int last = state->last_dc_values[index]; + int index = state->block_type; - int delta = state->delta_clamp_map[(dc - last) & 0xFFFF]; - state->last_dc_values[index] = (last + delta * 4) & 0x3FF; + if (index > INDEX_Y) + index = INDEX_Y; + + int delta = DIVIDE_ROUNDED(dc - state->last_dc_values[index], 4); + state->last_dc_values[index] += delta * 4; + + // Some versions of Sony's BS v3 decoder compute each DC coefficient as + // ((last + delta * 4) & 0x3FF) instead of just (last + delta * 4). The + // encoder can leverage this behavior to represent large coefficient + // differences as smaller deltas that cause the decoder to overflow and + // wrap around (e.g. -1 to encode -512 -> 511 as opposed to +1023). This + // saves some space as larger DC values take up more bits. + if (codec == BS_CODEC_V3DC) { + if (delta < -0x80) + delta += 0x100; + else if (delta > +0x80) + delta -= 0x100; + } uint32_t outword = state->dc_huffman_map[(index << 9) | (delta & 0x1FF)]; @@ -488,7 +488,7 @@ static bool encode_dct_block( if (ac == 0) { zeroes++; } else { - uint32_t outword = state->ac_huffman_map[(zeroes << 10) | ac]; + uint32_t outword = state->ac_huffman_map[(zeroes << 10) | (ac & 0x3FF)]; if (!encode_bits(state, outword >> 24, outword & 0xFFFFFF)) return false; @@ -516,21 +516,21 @@ bool init_mdec_encoder(mdec_encoder_t *encoder, bs_codec_t video_codec, int vide mdec_encoder_state_t *state = &(encoder->state); +#if 0 if (state->dct_context != NULL) return true; +#endif state->dct_context = avcodec_dct_alloc(); state->ac_huffman_map = malloc(0x10000 * sizeof(uint32_t)); - state->dc_huffman_map = malloc(0x600 * sizeof(uint32_t)); + state->dc_huffman_map = malloc(0x200 * 3 * sizeof(uint32_t)); state->coeff_clamp_map = malloc(0x10000 * sizeof(int16_t)); - state->delta_clamp_map = malloc(0x10000 * sizeof(int16_t)); if ( state->dct_context == NULL || state->ac_huffman_map == NULL || state->dc_huffman_map == NULL || - state->coeff_clamp_map == NULL || - state->delta_clamp_map == NULL + state->coeff_clamp_map == NULL ) return false; @@ -569,12 +569,8 @@ void destroy_mdec_encoder(mdec_encoder_t *encoder) { free(state->coeff_clamp_map); state->coeff_clamp_map = NULL; } - if (state->delta_clamp_map) { - free(state->delta_clamp_map); - state->delta_clamp_map = NULL; - } for (int i = 0; i < 6; i++) { - if (state->dct_block_lists[i]) { + if (state->dct_block_lists[i] != NULL) { free(state->dct_block_lists[i]); state->dct_block_lists[i] = NULL; } @@ -653,7 +649,6 @@ void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame) { } else { end_of_block = 0x3FF; assert(state->dc_huffman_map); - assert(state->delta_clamp_map); } assert(state->ac_huffman_map); @@ -681,9 +676,9 @@ void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame) { memset(state->frame_output, 0, state->frame_max_size); state->block_type = 0; - state->last_dc_values[0] = 0; - state->last_dc_values[1] = 0; - state->last_dc_values[2] = 0; + state->last_dc_values[INDEX_CR] = 0; + state->last_dc_values[INDEX_CB] = 0; + state->last_dc_values[INDEX_Y] = 0; state->bits_value = 0; state->bits_left = 16; @@ -759,7 +754,13 @@ void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame) { state->frame_output[0x007] = 0x00; } -int encode_sector_str(mdec_encoder_t *encoder, format_t format, uint8_t *video_frames, uint8_t *output) { +int encode_sector_str( + mdec_encoder_t *encoder, + format_t format, + uint16_t str_video_id, + uint8_t *video_frames, + uint8_t *output +) { mdec_encoder_state_t *state = &(encoder->state); int last_frame_index = state->frame_index; int frame_size = encoder->video_width * encoder->video_height * 2; @@ -784,9 +785,9 @@ int encode_sector_str(mdec_encoder_t *encoder, format_t format, uint8_t *video_f header[0x000] = 0x60; header[0x001] = 0x01; - // Chunk type: MDEC data - header[0x002] = 0x01; - header[0x003] = 0x80; + // Chunk type + header[0x002] = (uint8_t)str_video_id; + header[0x003] = (uint8_t)(str_video_id >> 8); // Muxed chunk index/count int chunk_index = state->frame_data_offset / 2016; diff --git a/psxavenc/mdec.h b/psxavenc/mdec.h index 3d1e4dc..4b8e026 100644 --- a/psxavenc/mdec.h +++ b/psxavenc/mdec.h @@ -51,7 +51,6 @@ typedef struct { uint32_t *ac_huffman_map; uint32_t *dc_huffman_map; int16_t *coeff_clamp_map; - int16_t *delta_clamp_map; int16_t *dct_block_lists[6]; } mdec_encoder_state_t; @@ -66,4 +65,10 @@ typedef struct { bool init_mdec_encoder(mdec_encoder_t *encoder, bs_codec_t video_codec, int video_width, int video_height); void destroy_mdec_encoder(mdec_encoder_t *encoder); void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame); -int encode_sector_str(mdec_encoder_t *encoder, format_t format, uint8_t *video_frames, uint8_t *output); +int encode_sector_str( + mdec_encoder_t *encoder, + format_t format, + uint16_t str_video_id, + uint8_t *video_frames, + uint8_t *output +); From 60cbaca2b294d2044478927729a01dfd8963efd2 Mon Sep 17 00:00:00 2001 From: spicyjpeg Date: Wed, 5 Mar 2025 01:32:35 +0100 Subject: [PATCH 7/8] Fix str subheader corruption, update README --- README.md | 93 +++++++++++++++++++++++--------- libpsxav/adpcm.c | 68 ++++++++++++++++-------- libpsxav/cdrom.c | 21 ++++---- libpsxav/libpsxav.h | 26 +++++++-- psxavenc/filefmt.c | 127 +++++++++++++++++++++----------------------- psxavenc/mdec.c | 10 ++-- 6 files changed, 215 insertions(+), 130 deletions(-) diff --git a/README.md b/README.md index 411c595..12f3218 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ # psxavenc psxavenc is an open-source command-line tool for encoding audio and video data -into formats commonly used on the original PlayStation. +into formats commonly used on the original PlayStation and PlayStation 2. ## Installation @@ -14,22 +14,22 @@ Requirements: ```shell $ meson setup build -$ cd build -$ ninja install +$ meson compile -C build +$ meson install -C build ``` ## Usage -Run `psxavenc`. +Run `psxavenc -h`. ### Examples Rescale a video file to ≤320x240 pixels (preserving aspect ratio) and encode it -into a 15fps .STR file with 37800 Hz 4-bit stereo audio and 2352-byte sectors, -meant to be played at 2x CD-ROM speed: +into a 15 fps version 2 .str file with 37800 Hz 4-bit stereo audio and 2352-byte +sectors, meant to be played at 2x CD-ROM speed: ```shell -$ psxavenc -t str2cd -f 37800 -b 4 -c 2 -s 320x240 -r 15 -x 2 in.mp4 out.str +$ psxavenc -t strcd -v v2 -f 37800 -b 4 -c 2 -s 320x240 -r 15 -x 2 in.mp4 out.str ``` Convert a mono audio sample to 22050 Hz raw SPU-ADPCM data: @@ -38,36 +38,77 @@ Convert a mono audio sample to 22050 Hz raw SPU-ADPCM data: $ psxavenc -t spu -f 22050 in.ogg out.snd ``` -Convert a stereo audio file to a 44100 Hz interleaved .VAG file with 8192-byte +Convert a stereo audio file to a 44100 Hz interleaved .vag file with 2048-byte interleave and loop flags set at the end of each interleaved chunk: ```shell -$ psxavenc -t vagi -f 44100 -c 2 -L -i 8192 in.wav out.vag +$ psxavenc -t vagi -f 44100 -c 2 -L -i 2048 in.wav out.vag ``` -## Supported formats +## Supported output formats -| Format | Audio | Channels | Video | Sector size | -| :------- | :--------------- | :------- | :---- | :---------- | -| `xa` | XA-ADPCM | 1 or 2 | None | 2336 bytes | -| `xacd` | XA-ADPCM | 1 or 2 | None | 2352 bytes | -| `spu` | SPU-ADPCM | 1 | None | | -| `spui` | SPU-ADPCM | Any | None | Any | -| `vag` | SPU-ADPCM | 1 | None | | -| `vagi` | SPU-ADPCM | Any | None | Any | -| `str2` | None or XA-ADPCM | 1 or 2 | BS v2 | 2336 bytes | -| `str2cd` | None or XA-ADPCM | 1 or 2 | BS v2 | 2352 bytes | -| `str2v` | None | | BS v2 | | -| `sbs2` | None | | BS v2 | Any | +The output format must be set using the `-t` option. + +| Format | Audio codec | Audio channels | Video codec | Sector size | +| :------- | :------------------- | :------------- | :------------ | :---------- | +| `xa` | XA-ADPCM | 1 or 2 | | 2336 bytes | +| `xacd` | XA-ADPCM | 1 or 2 | | 2352 bytes | +| `spu` | SPU-ADPCM | 1 | | | +| `vag` | SPU-ADPCM | 1 | | | +| `spui` | SPU-ADPCM | Any | | | +| `vagi` | SPU-ADPCM | Any | | | +| `str` | XA-ADPCM (optional) | 1 or 2 | BS v2/v3/v3dc | 2336 bytes | +| `strcd` | XA-ADPCM (optional) | 1 or 2 | BS v2/v3/v3dc | 2352 bytes | +| `strspu` | SPU-ADPCM (optional) | Any | BS v2/v3/v3dc | 2048 bytes | +| `strv` | | | BS v2/v3/v3dc | 2048 bytes | +| `sbs` | | | BS v2/v3/v3dc | | Notes: -- `vag` and `vagi` are similar to `spu` and `spui` respectively, but add a .VAG +- The `xa`, `xacd`, `str` and `strcd` formats will output files with 2336- or + 2352-byte CD-ROM sectors, containing the appropriate CD-XA subheaders and + dummy EDC/ECC placeholders in addition to the actual sector data. Such files + **cannot be added to a disc image as-is** and must instead be parsed by an + authoring tool capable of rebuilding the EDC/ECC data (as it is dependent on + the file's absolute location on the disc) and generating a Mode 2 CD-ROM image + with "native" 2352-byte sectors. +- Similarly, files generated with `-t xa` or `-t xacd` **must be interleaved** + **with other XA-ADPCM tracks or empty padding using an external tool** before + they can be played. +- `vag` and `vagi` are similar to `spu` and `spui` respectively, but add a .vag header at the beginning of the file. The header is always 48 bytes long for `vag` files, while in the case of `vagi` files it is padded to the size specified using the `-a` option (2048 bytes by default). Note that `vagi` files with more than 2 channels and/or alignment other than 2048 bytes are not standardized. -- The `sbs2` format (used in some System 573 games) is simply a series of - concatenated BS v2 frames, each padded to the size specified by the `-a` - option, with no additional headers besides the BS frame headers. +- The `strspu` format encodes the input file's audio track as a series of custom + .str chunks (type ID `0x0001` by default) holding interleaved SPU-ADPCM data + in the same format as `spui`, rather than XA-ADPCM. As .str chunks do not + require custom XA subheaders, a file with standard 2048-byte sectors that does + not need any special handling will be generated. +- The `strv` format disables audio altogether and is equivalent to `strspu` on + an input file with no audio track. +- The `sbs` format (used in some System 573 games) consists of a series of + concatenated BS frames, each padded to the size specified by the `-a` option + (the default setting is 8192 bytes), with no additional headers besides the BS + frame headers. + +## Supported video codecs + +All formats with a video track (`str`, `strcd`, `strspu`, `strv` and `sbs`) can +use any of the codecs listed below. The codec can be set using the `-v` option. + +| Codec | Supported by | Typ. decoder CPU usage | +| :------------- | :-------------------- | :--------------------- | +| `v2` (default) | All players/decoders | Medium | +| `v3` | Most players/decoders | High | +| `v3dc` | Few players/decoders | High | + +Notes: + +- The `v3dc` format is a variant of `v3` with a slightly better compression + ratio, however most tools and playback libraries (including FFmpeg, jPSXdec + and earlier versions of Sony's own BS decoder) are unable to decode it + correctly; its use is thus highly discouraged. Refer to + [the psx-spx section on DC coefficient encoding](https://psx-spx.consoledev.net/cdromfileformats/#dc-v3) + for more details. diff --git a/libpsxav/adpcm.c b/libpsxav/adpcm.c index ecc7264..2d5dbc5 100644 --- a/libpsxav/adpcm.c +++ b/libpsxav/adpcm.c @@ -228,14 +228,9 @@ static inline void psx_audio_xa_sync_subheader_copy(psx_cdrom_sector_mode2_t *bu memcpy(buffer->subheader + 1, buffer->subheader, sizeof(psx_cdrom_sector_xa_subheader_t)); } -static void psx_audio_xa_encode_init_sector(psx_cdrom_sector_mode2_t *buffer, psx_audio_xa_settings_t settings) { - if (settings.format == PSX_AUDIO_XA_FORMAT_XACD) { - memset(buffer, 0, PSX_CDROM_SECTOR_SIZE); - memset(buffer->sync + 1, 0xFF, 10); - buffer->header.mode = 0x02; - } else { - memset(buffer->subheader, 0, PSX_CDROM_SECTOR_SIZE - 16); - } +static void psx_audio_xa_encode_init_sector(psx_cdrom_sector_mode2_t *buffer, int lba, psx_audio_xa_settings_t settings) { + if (settings.format == PSX_AUDIO_XA_FORMAT_XACD) + psx_cdrom_init_sector((psx_cdrom_sector_t *)buffer, lba, PSX_CDROM_SECTOR_TYPE_MODE2_FORM2); buffer->subheader[0].file = settings.file_number; buffer->subheader[0].channel = settings.channel_number & PSX_CDROM_SECTOR_XA_CHANNEL_MASK; @@ -243,28 +238,46 @@ static void psx_audio_xa_encode_init_sector(psx_cdrom_sector_mode2_t *buffer, ps PSX_CDROM_SECTOR_XA_SUBMODE_AUDIO | PSX_CDROM_SECTOR_XA_SUBMODE_FORM2 | PSX_CDROM_SECTOR_XA_SUBMODE_RT; - buffer->subheader[0].coding = - (settings.stereo ? PSX_CDROM_SECTOR_XA_CODING_STEREO : PSX_CDROM_SECTOR_XA_CODING_MONO) - | (settings.frequency >= PSX_AUDIO_XA_FREQ_DOUBLE ? PSX_CDROM_SECTOR_XA_CODING_FREQ_DOUBLE : PSX_CDROM_SECTOR_XA_CODING_FREQ_SINGLE) - | (settings.bits_per_sample >= 8 ? PSX_CDROM_SECTOR_XA_CODING_BITS_8 : PSX_CDROM_SECTOR_XA_CODING_BITS_4); + + if (settings.stereo) + buffer->subheader[0].coding |= PSX_CDROM_SECTOR_XA_CODING_STEREO; + else + buffer->subheader[0].coding |= PSX_CDROM_SECTOR_XA_CODING_MONO; + if (settings.frequency == PSX_AUDIO_XA_FREQ_DOUBLE) + buffer->subheader[0].coding |= PSX_CDROM_SECTOR_XA_CODING_FREQ_DOUBLE; + else + buffer->subheader[0].coding |= PSX_CDROM_SECTOR_XA_CODING_FREQ_SINGLE; + if (settings.bits_per_sample == 8) + buffer->subheader[0].coding |= PSX_CDROM_SECTOR_XA_CODING_BITS_8; + else + buffer->subheader[0].coding |= PSX_CDROM_SECTOR_XA_CODING_BITS_4; + psx_audio_xa_sync_subheader_copy(buffer); } -int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_state_t *state, int16_t* samples, int sample_count, uint8_t *output) { +int psx_audio_xa_encode( + psx_audio_xa_settings_t settings, + psx_audio_encoder_state_t *state, + int16_t* samples, + int sample_count, + int lba, + uint8_t *output +) { int sample_jump = (settings.bits_per_sample == 8) ? 112 : 224; int i, j; - int xa_sector_size = settings.format == PSX_AUDIO_XA_FORMAT_XA ? 2336 : 2352; - int xa_offset = 2352 - xa_sector_size; + int xa_sector_size = psx_audio_xa_get_buffer_size_per_sector(settings); + int xa_offset = PSX_CDROM_SECTOR_SIZE - xa_sector_size; uint8_t init_sector = 1; - if (settings.stereo) { sample_count <<= 1; } + if (settings.stereo) + sample_count *= 2; for (i = 0, j = 0; i < sample_count || ((j % 18) != 0); i += sample_jump, j++) { psx_cdrom_sector_mode2_t *sector_data = (psx_cdrom_sector_mode2_t*) (output + ((j/18) * xa_sector_size) - xa_offset); uint8_t *block_data = sector_data->data + ((j%18) * 0x80); if (init_sector) { - psx_audio_xa_encode_init_sector(sector_data, settings); + psx_audio_xa_encode_init_sector(sector_data, lba, settings); init_sector = 0; } @@ -276,6 +289,7 @@ int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_stat if ((j+1)%18 == 0) { psx_cdrom_calculate_checksums((psx_cdrom_sector_t *)sector_data, PSX_CDROM_SECTOR_TYPE_MODE2_FORM2); init_sector = 1; + lba++; } } @@ -284,21 +298,33 @@ int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_stat void psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length) { if (output_length >= 2336) { - psx_cdrom_sector_mode2_t *sector = (psx_cdrom_sector_mode2_t*) &output[output_length - 2352]; + psx_cdrom_sector_mode2_t *sector = (psx_cdrom_sector_mode2_t*) &output[output_length - PSX_CDROM_SECTOR_SIZE]; sector->subheader[0].submode |= PSX_CDROM_SECTOR_XA_SUBMODE_EOF; psx_audio_xa_sync_subheader_copy(sector); } } -int psx_audio_xa_encode_simple(psx_audio_xa_settings_t settings, int16_t* samples, int sample_count, uint8_t *output) { +int psx_audio_xa_encode_simple( + psx_audio_xa_settings_t settings, + int16_t* samples, + int sample_count, + int lba, + uint8_t *output +) { psx_audio_encoder_state_t state; memset(&state, 0, sizeof(psx_audio_encoder_state_t)); - int length = psx_audio_xa_encode(settings, &state, samples, sample_count, output); + int length = psx_audio_xa_encode(settings, &state, samples, sample_count, lba, output); psx_audio_xa_encode_finalize(settings, output, length); return length; } -int psx_audio_spu_encode(psx_audio_encoder_channel_state_t *state, int16_t* samples, int sample_count, int pitch, uint8_t *output) { +int psx_audio_spu_encode( + psx_audio_encoder_channel_state_t *state, + int16_t* samples, + int sample_count, + int pitch, + uint8_t *output +) { uint8_t prebuf[PSX_AUDIO_SPU_SAMPLES_PER_BLOCK]; uint8_t *buffer = output; diff --git a/libpsxav/cdrom.c b/libpsxav/cdrom.c index ac9de32..509e525 100644 --- a/libpsxav/cdrom.c +++ b/libpsxav/cdrom.c @@ -42,11 +42,21 @@ static uint32_t edc_crc32(uint8_t *data, int length) { #define TO_BCD(x) ((x) + ((x) / 10) * 6) +void psx_cdrom_init_xa_subheader(psx_cdrom_sector_xa_subheader_t *subheader, psx_cdrom_sector_type_t type) { + memset(subheader, 0, sizeof(psx_cdrom_sector_xa_subheader_t) * 2); + subheader->submode = PSX_CDROM_SECTOR_XA_SUBMODE_DATA; + + if (type == PSX_CDROM_SECTOR_TYPE_MODE2_FORM2) + subheader->submode |= PSX_CDROM_SECTOR_XA_SUBMODE_FORM2; + + memcpy(subheader + 1, subheader, sizeof(psx_cdrom_sector_xa_subheader_t)); +} + void psx_cdrom_init_sector(psx_cdrom_sector_t *sector, int lba, psx_cdrom_sector_type_t type) { // Sync sequence memset(sector->mode1.sync + 1, 0xff, 10); sector->mode1.sync[0x0] = 0x00; - sector->mode1.sync[0xb] = 0x00; + sector->mode1.sync[0xB] = 0x00; // Timecode lba += 150; @@ -59,14 +69,7 @@ void psx_cdrom_init_sector(psx_cdrom_sector_t *sector, int lba, psx_cdrom_sector sector->mode1.header.mode = 0x01; } else { sector->mode2.header.mode = 0x02; - - memset(sector->mode2.subheader, 0, sizeof(psx_cdrom_sector_xa_subheader_t)); - sector->mode2.subheader[0].submode = PSX_CDROM_SECTOR_XA_SUBMODE_DATA; - - if (type == PSX_CDROM_SECTOR_TYPE_MODE2_FORM2) - sector->mode2.subheader[0].submode |= PSX_CDROM_SECTOR_XA_SUBMODE_FORM2; - - memcpy(sector->mode2.subheader + 1, sector->mode2.subheader, sizeof(psx_cdrom_sector_xa_subheader_t)); + psx_cdrom_init_xa_subheader(sector->mode2.subheader, type); } } diff --git a/libpsxav/libpsxav.h b/libpsxav/libpsxav.h index 1b8aaa1..0d9d171 100644 --- a/libpsxav/libpsxav.h +++ b/libpsxav/libpsxav.h @@ -72,9 +72,28 @@ uint32_t psx_audio_spu_get_buffer_size(int sample_count); uint32_t psx_audio_xa_get_buffer_size_per_sector(psx_audio_xa_settings_t settings); uint32_t psx_audio_xa_get_samples_per_sector(psx_audio_xa_settings_t settings); uint32_t psx_audio_xa_get_sector_interleave(psx_audio_xa_settings_t settings); -int psx_audio_xa_encode(psx_audio_xa_settings_t settings, psx_audio_encoder_state_t *state, int16_t* samples, int sample_count, uint8_t *output); -int psx_audio_xa_encode_simple(psx_audio_xa_settings_t settings, int16_t* samples, int sample_count, uint8_t *output); -int psx_audio_spu_encode(psx_audio_encoder_channel_state_t *state, int16_t* samples, int sample_count, int pitch, uint8_t *output); +int psx_audio_xa_encode( + psx_audio_xa_settings_t settings, + psx_audio_encoder_state_t *state, + int16_t* samples, + int sample_count, + int lba, + uint8_t *output +); +int psx_audio_xa_encode_simple( + psx_audio_xa_settings_t settings, + int16_t* samples, + int sample_count, + int lba, + uint8_t *output +); +int psx_audio_spu_encode( + psx_audio_encoder_channel_state_t *state, + int16_t* samples, + int sample_count, + int pitch, + uint8_t *output +); int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *output, int loop_start); void psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length); @@ -149,5 +168,6 @@ typedef enum { PSX_CDROM_SECTOR_TYPE_MODE2_FORM2 } psx_cdrom_sector_type_t; +void psx_cdrom_init_xa_subheader(psx_cdrom_sector_xa_subheader_t *subheader, psx_cdrom_sector_type_t type); void psx_cdrom_init_sector(psx_cdrom_sector_t *sector, int lba, psx_cdrom_sector_type_t type); void psx_cdrom_calculate_checksums(psx_cdrom_sector_t *sector, psx_cdrom_sector_type_t type); diff --git a/psxavenc/filefmt.c b/psxavenc/filefmt.c index 3f6ce45..1150364 100644 --- a/psxavenc/filefmt.c +++ b/psxavenc/filefmt.c @@ -68,15 +68,24 @@ static psx_audio_xa_settings_t args_to_libpsxav_xa_audio(const args_t *args) { return settings; }; -static void init_sector_buffer_video(const args_t *args, psx_cdrom_sector_mode2_t *sector, int lba) { - psx_cdrom_init_sector((psx_cdrom_sector_t *)sector, lba, PSX_CDROM_SECTOR_TYPE_MODE2_FORM1); +static void init_sector_buffer_video(const args_t *args, uint8_t *sector, int lba) { + psx_cdrom_sector_xa_subheader_t *subheader = NULL; - sector->subheader[0].file = args->audio_xa_file; - sector->subheader[0].channel = args->audio_xa_channel & PSX_CDROM_SECTOR_XA_CHANNEL_MASK; - sector->subheader[0].submode = PSX_CDROM_SECTOR_XA_SUBMODE_DATA | PSX_CDROM_SECTOR_XA_SUBMODE_RT; - sector->subheader[0].coding = 0; + if (args->format == FORMAT_STRCD) { + psx_cdrom_init_sector((psx_cdrom_sector_t *)sector, lba, PSX_CDROM_SECTOR_TYPE_MODE2_FORM1); + subheader = ((psx_cdrom_sector_t *)sector)->mode2.subheader; + } else if (args->format == FORMAT_STR) { + subheader = (psx_cdrom_sector_xa_subheader_t *)sector; + } - memcpy(sector->subheader + 1, sector->subheader, sizeof(psx_cdrom_sector_xa_subheader_t)); + if (subheader != NULL) { + subheader->file = args->audio_xa_file; + subheader->channel = args->audio_xa_channel & PSX_CDROM_SECTOR_XA_CHANNEL_MASK; + subheader->submode = PSX_CDROM_SECTOR_XA_SUBMODE_DATA | PSX_CDROM_SECTOR_XA_SUBMODE_RT; + subheader->coding = 0; + + memcpy(subheader + 1, subheader, sizeof(psx_cdrom_sector_xa_subheader_t)); + } } #define VAG_HEADER_SIZE 0x30 @@ -147,35 +156,29 @@ void encode_file_xa(const args_t *args, decoder_t *decoder, FILE *output) { psx_audio_encoder_state_t audio_state; memset(&audio_state, 0, sizeof(psx_audio_encoder_state_t)); - for (int j = 0; ensure_av_data(decoder, audio_samples_per_sector * args->audio_channels, 0); j++) { + int sector_count = 0; + + for (; ensure_av_data(decoder, audio_samples_per_sector * args->audio_channels, 0); sector_count++) { int samples_length = decoder->audio_sample_count / args->audio_channels; if (samples_length > audio_samples_per_sector) samples_length = audio_samples_per_sector; - uint8_t buffer[PSX_CDROM_SECTOR_SIZE]; + uint8_t sector[PSX_CDROM_SECTOR_SIZE]; int length = psx_audio_xa_encode( xa_settings, &audio_state, decoder->audio_samples, samples_length, - buffer + sector_count, + sector ); if (decoder->end_of_input) - psx_audio_xa_encode_finalize(xa_settings, buffer, length); - - if (args->format == FORMAT_XACD) { - int t = j + 75*2; - - // Put the time in - buffer[0x00C] = ((t/75/60)%10)|(((t/75/60)/10)<<4); - buffer[0x00D] = (((t/75)%60)%10)|((((t/75)%60)/10)<<4); - buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4); - } + psx_audio_xa_encode_finalize(xa_settings, sector, length); retire_av_data(decoder, samples_length * args->audio_channels, 0); - fwrite(buffer, length, 1, output); + fwrite(sector, length, 1, output); time_t t = get_elapsed_time(); @@ -183,8 +186,8 @@ void encode_file_xa(const args_t *args, decoder_t *decoder, FILE *output) { fprintf( stderr, "\rLBA: %6d | Encoding speed: %5.2fx", - j, - (double)(j * audio_samples_per_sector) / (double)(args->audio_frequency * t) + sector_count, + (double)(sector_count * audio_samples_per_sector) / (double)(args->audio_frequency * t) ); } } @@ -199,14 +202,14 @@ void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output) { if (args->format == FORMAT_VAG) fseek(output, VAG_HEADER_SIZE, SEEK_SET); - uint8_t buffer[PSX_AUDIO_SPU_BLOCK_SIZE]; + uint8_t block[PSX_AUDIO_SPU_BLOCK_SIZE]; int block_count = 0; if (!(args->flags & FLAG_SPU_NO_LEADING_DUMMY)) { // Insert leading silent block - memset(buffer, 0, PSX_AUDIO_SPU_BLOCK_SIZE); + memset(block, 0, PSX_AUDIO_SPU_BLOCK_SIZE); - fwrite(buffer, PSX_AUDIO_SPU_BLOCK_SIZE, 1, output); + fwrite(block, PSX_AUDIO_SPU_BLOCK_SIZE, 1, output); block_count++; } @@ -226,16 +229,16 @@ void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output) { decoder->audio_samples, samples_length, 1, - buffer + block ); if (block_count == loop_start_block) - buffer[1] |= PSX_AUDIO_SPU_LOOP_START; + block[1] |= PSX_AUDIO_SPU_LOOP_START; if ((args->flags & FLAG_SPU_LOOP_END) && decoder->end_of_input) - buffer[1] |= PSX_AUDIO_SPU_LOOP_REPEAT; + block[1] |= PSX_AUDIO_SPU_LOOP_REPEAT; retire_av_data(decoder, samples_length, 0); - fwrite(buffer, length, 1, output); + fwrite(block, length, 1, output); time_t t = get_elapsed_time(); @@ -251,10 +254,10 @@ void encode_file_spu(const args_t *args, decoder_t *decoder, FILE *output) { if (!(args->flags & FLAG_SPU_LOOP_END)) { // Insert trailing looping block - memset(buffer, 0, PSX_AUDIO_SPU_BLOCK_SIZE); - buffer[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END; + memset(block, 0, PSX_AUDIO_SPU_BLOCK_SIZE); + block[1] = PSX_AUDIO_SPU_LOOP_START | PSX_AUDIO_SPU_LOOP_END; - fwrite(buffer, PSX_AUDIO_SPU_BLOCK_SIZE, 1, output); + fwrite(block, PSX_AUDIO_SPU_BLOCK_SIZE, 1, output); block_count++; } @@ -279,8 +282,8 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) { // NOTE: since the interleaved .vag format is not standardized, some tools // (such as vgmstream) will not properly play files with interleave < 2048, // alignment != 2048 or channels != 2. - int buffer_size = args->audio_interleave * args->audio_channels + args->alignment - 1; - buffer_size -= buffer_size % args->alignment; + int chunk_size = args->audio_interleave * args->audio_channels + args->alignment - 1; + chunk_size -= chunk_size % args->alignment; int header_size = VAG_HEADER_SIZE + args->alignment - 1; header_size -= header_size % args->alignment; @@ -292,7 +295,7 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) { psx_audio_encoder_channel_state_t *audio_state = malloc(audio_state_size); memset(audio_state, 0, audio_state_size); - uint8_t *buffer = malloc(buffer_size); + uint8_t *chunk = malloc(chunk_size); int chunk_count = 0; for (; ensure_av_data(decoder, audio_samples_per_chunk * args->audio_channels, 0); chunk_count++) { @@ -301,26 +304,26 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) { if (samples_length > audio_samples_per_chunk) samples_length = audio_samples_per_chunk; - memset(buffer, 0, buffer_size); - uint8_t *buffer_ptr = buffer; + memset(chunk, 0, chunk_size); + uint8_t *chunk_ptr = chunk; // Insert leading silent block if (chunk_count == 0 && !(args->flags & FLAG_SPU_NO_LEADING_DUMMY)) { - buffer_ptr += PSX_AUDIO_SPU_BLOCK_SIZE; + chunk_ptr += PSX_AUDIO_SPU_BLOCK_SIZE; samples_length -= PSX_AUDIO_SPU_SAMPLES_PER_BLOCK; } - for (int ch = 0; ch < args->audio_channels; ch++, buffer_ptr += args->audio_interleave) { + for (int ch = 0; ch < args->audio_channels; ch++, chunk_ptr += args->audio_interleave) { int length = psx_audio_spu_encode( audio_state + ch, decoder->audio_samples + ch, samples_length, args->audio_channels, - buffer_ptr + chunk_ptr ); if (length > 0) { - uint8_t *last_block = buffer_ptr + length - PSX_AUDIO_SPU_BLOCK_SIZE; + uint8_t *last_block = chunk_ptr + length - PSX_AUDIO_SPU_BLOCK_SIZE; if (args->flags & FLAG_SPU_LOOP_END) { last_block[1] = PSX_AUDIO_SPU_LOOP_REPEAT; @@ -335,7 +338,7 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) { } retire_av_data(decoder, samples_length * args->audio_channels, 0); - fwrite(buffer, buffer_size, 1, output); + fwrite(chunk, chunk_size, 1, output); time_t t = get_elapsed_time(); @@ -351,7 +354,7 @@ void encode_file_spui(const args_t *args, decoder_t *decoder, FILE *output) { } free(audio_state); - free(buffer); + free(chunk); if (args->format == FORMAT_VAGI) { uint8_t *header = malloc(header_size); @@ -421,28 +424,31 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) { if (frames_needed < 2) frames_needed = 2; - for (int j = 0; !decoder->end_of_input || encoder.state.frame_data_offset < encoder.state.frame_max_size; j++) { + int sector_count = 0; + + for (; !decoder->end_of_input || encoder.state.frame_data_offset < encoder.state.frame_max_size; sector_count++) { ensure_av_data(decoder, audio_samples_per_sector * args->audio_channels, frames_needed); - uint8_t buffer[PSX_CDROM_SECTOR_SIZE]; + uint8_t sector[PSX_CDROM_SECTOR_SIZE]; bool is_video_sector; if (args->flags & FLAG_STR_TRAILING_AUDIO) - is_video_sector = (j % interleave) < video_sectors_per_block; + is_video_sector = (sector_count % interleave) < video_sectors_per_block; else - is_video_sector = (j % interleave) > 0; + is_video_sector = (sector_count % interleave) > 0; if (is_video_sector) { - init_sector_buffer_video(args, (psx_cdrom_sector_mode2_t*)buffer, j); + init_sector_buffer_video(args, sector, sector_count); int frames_used = encode_sector_str( &encoder, args->format, args->str_video_id, decoder->video_frames, - buffer + sector ); + psx_cdrom_calculate_checksums((psx_cdrom_sector_t *)sector, PSX_CDROM_SECTOR_TYPE_MODE2_FORM1); retire_av_data(decoder, 0, frames_used); } else { int samples_length = decoder->audio_sample_count / args->audio_channels; @@ -460,28 +466,17 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) { &audio_state, decoder->audio_samples, samples_length, - buffer + sector_count, + sector ); if (decoder->end_of_input) - psx_audio_xa_encode_finalize(xa_settings, buffer, length); + psx_audio_xa_encode_finalize(xa_settings, sector, length); retire_av_data(decoder, samples_length * args->audio_channels, 0); } - if (args->format == FORMAT_STRCD) { - int t = j + 75*2; - - // Put the time in - buffer[0x00C] = ((t/75/60)%10)|(((t/75/60)/10)<<4); - buffer[0x00D] = (((t/75)%60)%10)|((((t/75)%60)/10)<<4); - buffer[0x00E] = ((t%75)%10)|(((t%75)/10)<<4); - } - - if (is_video_sector) - psx_cdrom_calculate_checksums((psx_cdrom_sector_t *)buffer, PSX_CDROM_SECTOR_TYPE_MODE2_FORM1); - - fwrite(buffer + PSX_CDROM_SECTOR_SIZE - sector_size, sector_size, 1, output); + fwrite(sector, sector_size, 1, output); time_t t = get_elapsed_time(); @@ -490,7 +485,7 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) { stderr, "\rFrame: %4d | LBA: %6d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx", encoder.state.frame_index, - j, + sector_count, (double)encoder.state.quant_scale_sum / (double)encoder.state.frame_index, (double)(encoder.state.frame_index * args->str_fps_den) / (double)(t * args->str_fps_num) ); diff --git a/psxavenc/mdec.c b/psxavenc/mdec.c index 3587ce1..602e4cc 100644 --- a/psxavenc/mdec.c +++ b/psxavenc/mdec.c @@ -288,11 +288,11 @@ static void init_dct_data(mdec_encoder_state_t *state, bs_codec_t codec) { uint32_t base_value = dc_c_huffman_tree[i].c_value; int pos_offset = 1 << dc_bits; - int neg_offset = 1 - (1 << (dc_bits + 1)); + int neg_offset = pos_offset * 2 - 1; for (int j = 0; j < (1 << dc_bits); j++) { int pos = (j + pos_offset) & 0x1FF; - int neg = (j + neg_offset) & 0x1FF; + int neg = (j - neg_offset) & 0x1FF; state->dc_huffman_map[(INDEX_CR << 9) | pos] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (1 << dc_bits) | j); state->dc_huffman_map[(INDEX_CR << 9) | neg] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (0 << dc_bits) | j); @@ -306,11 +306,11 @@ static void init_dct_data(mdec_encoder_state_t *state, bs_codec_t codec) { uint32_t base_value = dc_y_huffman_tree[i].c_value; int pos_offset = 1 << dc_bits; - int neg_offset = 1 - (1 << (dc_bits + 1)); + int neg_offset = pos_offset * 2 - 1; for (int j = 0; j < (1 << dc_bits); j++) { int pos = (j + pos_offset) & 0x1FF; - int neg = (j + neg_offset) & 0x1FF; + int neg = (j - neg_offset) & 0x1FF; state->dc_huffman_map[(INDEX_Y << 9) | pos] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (1 << dc_bits) | j); state->dc_huffman_map[(INDEX_Y << 9) | neg] = HUFFMAN_CODE(bits, (base_value << (dc_bits + 1)) | (0 << dc_bits) | j); @@ -657,7 +657,7 @@ void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame) { // Attempt encoding the frame at the maximum quality. If the result is too // large, increase the quantization scale and try again. // TODO: if a frame encoded at scale N is too large but the same frame - // encoded at scale N-1 leaves a significant amount of free space, attempt + // encoded at scale N+1 leaves a significant amount of free space, attempt // compressing at scale N but optimizing coefficients away until it fits // (like the old algorithm did) for ( From 801d70e22e317e2e48423d0c44e4f08490e89680 Mon Sep 17 00:00:00 2001 From: spicyjpeg Date: Sat, 8 Mar 2025 01:10:42 +0100 Subject: [PATCH 8/8] Disable unimplemented formats, add missing const qualifiers --- README.md | 40 ++++++++-------- libpsxav/adpcm.c | 53 ++++++++++++++++---- libpsxav/libpsxav.h | 8 ++-- psxavenc/args.c | 4 +- psxavenc/filefmt.c | 114 +++++++++++++++++++++++++++++++++++++++++++- psxavenc/main.c | 4 ++ psxavenc/mdec.c | 15 +++--- psxavenc/mdec.h | 4 +- 8 files changed, 196 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index 12f3218..007bd35 100644 --- a/README.md +++ b/README.md @@ -49,19 +49,18 @@ $ psxavenc -t vagi -f 44100 -c 2 -L -i 2048 in.wav out.vag The output format must be set using the `-t` option. -| Format | Audio codec | Audio channels | Video codec | Sector size | -| :------- | :------------------- | :------------- | :------------ | :---------- | -| `xa` | XA-ADPCM | 1 or 2 | | 2336 bytes | -| `xacd` | XA-ADPCM | 1 or 2 | | 2352 bytes | -| `spu` | SPU-ADPCM | 1 | | | -| `vag` | SPU-ADPCM | 1 | | | -| `spui` | SPU-ADPCM | Any | | | -| `vagi` | SPU-ADPCM | Any | | | -| `str` | XA-ADPCM (optional) | 1 or 2 | BS v2/v3/v3dc | 2336 bytes | -| `strcd` | XA-ADPCM (optional) | 1 or 2 | BS v2/v3/v3dc | 2352 bytes | -| `strspu` | SPU-ADPCM (optional) | Any | BS v2/v3/v3dc | 2048 bytes | -| `strv` | | | BS v2/v3/v3dc | 2048 bytes | -| `sbs` | | | BS v2/v3/v3dc | | +| Format | Audio codec | Audio channels | Video codec | Sector size | +| :------ | :------------------- | :------------- | :------------ | :---------- | +| `xa` | XA-ADPCM | 1 or 2 | | 2336 bytes | +| `xacd` | XA-ADPCM | 1 or 2 | | 2352 bytes | +| `spu` | SPU-ADPCM | 1 | | | +| `vag` | SPU-ADPCM | 1 | | | +| `spui` | SPU-ADPCM | Any | | | +| `vagi` | SPU-ADPCM | Any | | | +| `str` | XA-ADPCM (optional) | 1 or 2 | BS v2/v3/v3dc | 2336 bytes | +| `strcd` | XA-ADPCM (optional) | 1 or 2 | BS v2/v3/v3dc | 2352 bytes | +| `strv` | | | BS v2/v3/v3dc | 2048 bytes | +| `sbs` | | | BS v2/v3/v3dc | | Notes: @@ -81,11 +80,12 @@ Notes: specified using the `-a` option (2048 bytes by default). Note that `vagi` files with more than 2 channels and/or alignment other than 2048 bytes are not standardized. -- The `strspu` format encodes the input file's audio track as a series of custom - .str chunks (type ID `0x0001` by default) holding interleaved SPU-ADPCM data - in the same format as `spui`, rather than XA-ADPCM. As .str chunks do not - require custom XA subheaders, a file with standard 2048-byte sectors that does - not need any special handling will be generated. +- ~~The `strspu` format encodes the input file's audio track as a series of~~ + ~~custom .str chunks (type ID `0x0001` by default) holding interleaved~~ + ~~SPU-ADPCM data in the same format as `spui`, rather than XA-ADPCM. As .str~~ + ~~chunks do not require custom XA subheaders, a file with standard 2048-byte~~ + ~~sectors that does not need any special handling will be generated.~~ *This* + *format has not yet been implemented.* - The `strv` format disables audio altogether and is equivalent to `strspu` on an input file with no audio track. - The `sbs` format (used in some System 573 games) consists of a series of @@ -95,8 +95,8 @@ Notes: ## Supported video codecs -All formats with a video track (`str`, `strcd`, `strspu`, `strv` and `sbs`) can -use any of the codecs listed below. The codec can be set using the `-v` option. +All formats with a video track (`str`, `strcd`, `strv` and `sbs`) can use any of +the codecs listed below. The codec can be set using the `-v` option. | Codec | Supported by | Typ. decoder CPU usage | | :------------- | :-------------------- | :--------------------- | diff --git a/libpsxav/adpcm.c b/libpsxav/adpcm.c index 2d5dbc5..80e3413 100644 --- a/libpsxav/adpcm.c +++ b/libpsxav/adpcm.c @@ -36,7 +36,14 @@ freely, subject to the following restrictions: static const int16_t filter_k1[ADPCM_FILTER_COUNT] = {0, 60, 115, 98, 122}; static const int16_t filter_k2[ADPCM_FILTER_COUNT] = {0, 0, -52, -55, -60}; -static int find_min_shift(const psx_audio_encoder_channel_state_t *state, int16_t *samples, int sample_limit, int pitch, int filter, int shift_range) { +static int find_min_shift( + const psx_audio_encoder_channel_state_t *state, + const int16_t *samples, + int sample_limit, + int pitch, + int filter, + int shift_range +) { // Assumption made: // // There is value in shifting right one step further to allow the nibbles to clip. @@ -71,7 +78,19 @@ static int find_min_shift(const psx_audio_encoder_channel_state_t *state, int16_ return min_shift; } -static uint8_t attempt_to_encode(psx_audio_encoder_channel_state_t *outstate, const psx_audio_encoder_channel_state_t *instate, int16_t *samples, int sample_limit, int pitch, uint8_t *data, int data_shift, int data_pitch, int filter, int sample_shift, int shift_range) { +static uint8_t attempt_to_encode( + psx_audio_encoder_channel_state_t *outstate, + const psx_audio_encoder_channel_state_t *instate, + const int16_t *samples, + int sample_limit, + int pitch, + uint8_t *data, + int data_shift, + int data_pitch, + int filter, + int sample_shift, + int shift_range +) { uint8_t sample_mask = 0xFFFF >> shift_range; uint8_t nondata_mask = ~(sample_mask << data_shift); @@ -120,8 +139,18 @@ static uint8_t attempt_to_encode(psx_audio_encoder_channel_state_t *outstate, co return hdr; } -static uint8_t encode(psx_audio_encoder_channel_state_t *state, int16_t *samples, int sample_limit, int pitch, uint8_t *data, int data_shift, int data_pitch, int filter_count, int shift_range) { - psx_audio_encoder_channel_state_t proposed; +static uint8_t encode( + psx_audio_encoder_channel_state_t *state, + const int16_t *samples, + int sample_limit, + int pitch, + uint8_t *data, + int data_shift, + int data_pitch, + int filter_count, + int shift_range +) { + psx_audio_encoder_channel_state_t proposed; int64_t best_mse = ((int64_t)1<<(int64_t)50); int best_filter = 0; int best_sample_shift = 0; @@ -161,7 +190,13 @@ static uint8_t encode(psx_audio_encoder_channel_state_t *state, int16_t *samples best_filter, best_sample_shift, shift_range); } -static void encode_block_xa(int16_t *audio_samples, int audio_samples_limit, uint8_t *data, psx_audio_xa_settings_t settings, psx_audio_encoder_state_t *state) { +static void encode_block_xa( + const int16_t *audio_samples, + int audio_samples_limit, + uint8_t *data, + psx_audio_xa_settings_t settings, + psx_audio_encoder_state_t *state +) { if (settings.bits_per_sample == 4) { if (settings.stereo) { data[0] = encode(&(state->left), audio_samples, audio_samples_limit, 2, data + 0x10, 0, 4, XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS); @@ -258,7 +293,7 @@ static void psx_audio_xa_encode_init_sector(psx_cdrom_sector_mode2_t *buffer, in int psx_audio_xa_encode( psx_audio_xa_settings_t settings, psx_audio_encoder_state_t *state, - int16_t* samples, + const int16_t *samples, int sample_count, int lba, uint8_t *output @@ -306,7 +341,7 @@ void psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *out int psx_audio_xa_encode_simple( psx_audio_xa_settings_t settings, - int16_t* samples, + const int16_t *samples, int sample_count, int lba, uint8_t *output @@ -320,7 +355,7 @@ int psx_audio_xa_encode_simple( int psx_audio_spu_encode( psx_audio_encoder_channel_state_t *state, - int16_t* samples, + const int16_t *samples, int sample_count, int pitch, uint8_t *output @@ -340,7 +375,7 @@ int psx_audio_spu_encode( return buffer - output; } -int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *output, int loop_start) { +int psx_audio_spu_encode_simple(const int16_t *samples, int sample_count, uint8_t *output, int loop_start) { psx_audio_encoder_channel_state_t state; memset(&state, 0, sizeof(psx_audio_encoder_channel_state_t)); int length = psx_audio_spu_encode(&state, samples, sample_count, 1, output); diff --git a/libpsxav/libpsxav.h b/libpsxav/libpsxav.h index 0d9d171..67733dd 100644 --- a/libpsxav/libpsxav.h +++ b/libpsxav/libpsxav.h @@ -75,26 +75,26 @@ uint32_t psx_audio_xa_get_sector_interleave(psx_audio_xa_settings_t settings); int psx_audio_xa_encode( psx_audio_xa_settings_t settings, psx_audio_encoder_state_t *state, - int16_t* samples, + const int16_t *samples, int sample_count, int lba, uint8_t *output ); int psx_audio_xa_encode_simple( psx_audio_xa_settings_t settings, - int16_t* samples, + const int16_t *samples, int sample_count, int lba, uint8_t *output ); int psx_audio_spu_encode( psx_audio_encoder_channel_state_t *state, - int16_t* samples, + const int16_t *samples, int sample_count, int pitch, uint8_t *output ); -int psx_audio_spu_encode_simple(int16_t* samples, int sample_count, uint8_t *output, int loop_start); +int psx_audio_spu_encode_simple(const int16_t *samples, int sample_count, uint8_t *output, int loop_start); void psx_audio_xa_encode_finalize(psx_audio_xa_settings_t settings, uint8_t *output, int output_length); // cdrom.c diff --git a/psxavenc/args.c b/psxavenc/args.c index fb74a1f..93c3ef0 100644 --- a/psxavenc/args.c +++ b/psxavenc/args.c @@ -125,7 +125,7 @@ static const char *const general_options_help = " vagi: [A.] .vag SPU-ADPCM interleaved\n" " str: [AV] .str video + XA-ADPCM, 2336-byte sectors\n" " strcd: [AV] .str video + XA-ADPCM, 2352-byte sectors\n" - " strspu: [AV] .str video + SPU-ADPCM, 2048-byte sectors\n" + //" strspu: [AV] .str video + SPU-ADPCM, 2048-byte sectors\n" " strv: [.V] .str video, 2048-byte sectors\n" " sbs: [.V] .sbs video\n" " -R key=value,... Pass custom options to libswresample (see FFmpeg docs)\n" @@ -498,7 +498,7 @@ static const char *const general_usage = " psxavenc -t spu|vag [spu-options] \n" " psxavenc -t spui|vagi [spui-options] \n" " psxavenc -t str|strcd [xa-options] [bs-options] [str-options] \n" - " psxavenc -t strspu [spui-options] [bs-options] [str-options] \n" + //" psxavenc -t strspu [spui-options] [bs-options] [str-options] \n" " psxavenc -t strv [bs-options] [str-options] \n" " psxavenc -t sbs [bs-options] [sbs-options] \n" "\n"; diff --git a/psxavenc/filefmt.c b/psxavenc/filefmt.c index 1150364..cb446df 100644 --- a/psxavenc/filefmt.c +++ b/psxavenc/filefmt.c @@ -22,6 +22,7 @@ freely, subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ +#include #include #include #include @@ -432,7 +433,9 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) { uint8_t sector[PSX_CDROM_SECTOR_SIZE]; bool is_video_sector; - if (args->flags & FLAG_STR_TRAILING_AUDIO) + if (audio_samples_per_sector == 0) + is_video_sector = true; + else if (args->flags & FLAG_STR_TRAILING_AUDIO) is_video_sector = (sector_count % interleave) < video_sectors_per_block; else is_video_sector = (sector_count % interleave) > 0; @@ -497,7 +500,114 @@ void encode_file_str(const args_t *args, decoder_t *decoder, FILE *output) { } void encode_file_strspu(const args_t *args, decoder_t *decoder, FILE *output) { - // TODO: implement + int interleave; + int audio_samples_per_sector; + int video_sectors_per_block; + + if (decoder->state.audio_stream != NULL) { + assert(false); // TODO: implement + + if (!(args->flags & FLAG_QUIET)) + fprintf( + stderr, + "Interleave: %d/%d audio, %d/%d video\n", + interleave - video_sectors_per_block, + interleave, + video_sectors_per_block, + interleave + ); + } else { + // 0/1 audio, 1/1 video + interleave = 1; + audio_samples_per_sector = 0; + video_sectors_per_block = 1; + } + + mdec_encoder_t encoder; + init_mdec_encoder(&encoder, args->video_codec, args->video_width, args->video_height); + + // e.g. 15fps = (150*7/8/15) = 8.75 blocks per frame + encoder.state.frame_block_base_overflow = (75 * args->str_cd_speed) * video_sectors_per_block * args->str_fps_den; + encoder.state.frame_block_overflow_den = interleave * args->str_fps_num; + double frame_size = (double)encoder.state.frame_block_base_overflow / (double)encoder.state.frame_block_overflow_den; + + if (!(args->flags & FLAG_QUIET)) + fprintf(stderr, "Frame size: %.2f sectors\n", frame_size); + + encoder.state.frame_output = malloc(2016 * (int)ceil(frame_size)); + encoder.state.frame_index = 0; + encoder.state.frame_data_offset = 0; + encoder.state.frame_max_size = 0; + encoder.state.frame_block_overflow_num = 0; + encoder.state.quant_scale_sum = 0; + + // FIXME: this needs an extra frame to prevent A/V desync + int frames_needed = (int) ceil((double)video_sectors_per_block / frame_size); + + if (frames_needed < 2) + frames_needed = 2; + + int sector_count = 0; + + for (; !decoder->end_of_input || encoder.state.frame_data_offset < encoder.state.frame_max_size; sector_count++) { + ensure_av_data(decoder, audio_samples_per_sector * args->audio_channels, frames_needed); + + uint8_t sector[2048]; + bool is_video_sector; + + if (audio_samples_per_sector == 0) + is_video_sector = true; + else if (args->flags & FLAG_STR_TRAILING_AUDIO) + is_video_sector = (sector_count % interleave) < video_sectors_per_block; + else + is_video_sector = (sector_count % interleave) > 0; + + if (is_video_sector) { + init_sector_buffer_video(args, sector, sector_count); + + int frames_used = encode_sector_str( + &encoder, + args->format, + args->str_video_id, + decoder->video_frames, + sector + ); + + retire_av_data(decoder, 0, frames_used); + } else { + int samples_length = decoder->audio_sample_count / args->audio_channels; + + if (samples_length > audio_samples_per_sector) + samples_length = audio_samples_per_sector; + + // FIXME: this is an extremely hacky way to handle audio tracks + // shorter than the video track + if (!samples_length) + video_sectors_per_block++; + + assert(false); // TODO: implement + + retire_av_data(decoder, samples_length * args->audio_channels, 0); + } + + fwrite(sector, 2048, 1, output); + + time_t t = get_elapsed_time(); + + if (!(args->flags & FLAG_HIDE_PROGRESS) && t) { + fprintf( + stderr, + "\rFrame: %4d | LBA: %6d | Avg. q. scale: %5.2f | Encoding speed: %5.2fx", + encoder.state.frame_index, + sector_count, + (double)encoder.state.quant_scale_sum / (double)encoder.state.frame_index, + (double)(encoder.state.frame_index * args->str_fps_den) / (double)(t * args->str_fps_num) + ); + } + } + + free(encoder.state.frame_output); + destroy_mdec_encoder(&encoder); } void encode_file_sbs(const args_t *args, decoder_t *decoder, FILE *output) { diff --git a/psxavenc/main.c b/psxavenc/main.c index 0f5e225..9e584c2 100644 --- a/psxavenc/main.c +++ b/psxavenc/main.c @@ -146,6 +146,10 @@ int main(int argc, const char **argv) { break; case FORMAT_STRSPU: + // TODO: implement and remove this check + fprintf(stderr, "This format is not currently supported\n"); + break; + case FORMAT_STRV: if (!(args.flags & FLAG_QUIET)) { if (decoder.state.audio_stream) diff --git a/psxavenc/mdec.c b/psxavenc/mdec.c index 602e4cc..ba3d043 100644 --- a/psxavenc/mdec.c +++ b/psxavenc/mdec.c @@ -577,7 +577,7 @@ void destroy_mdec_encoder(mdec_encoder_t *encoder) { } } -void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame) { +void encode_frame_bs(mdec_encoder_t *encoder, const uint8_t *video_frame) { mdec_encoder_state_t *state = &(encoder->state); assert(state->dct_context); @@ -758,15 +758,12 @@ int encode_sector_str( mdec_encoder_t *encoder, format_t format, uint16_t str_video_id, - uint8_t *video_frames, + const uint8_t *video_frames, uint8_t *output ) { mdec_encoder_state_t *state = &(encoder->state); - int last_frame_index = state->frame_index; int frame_size = encoder->video_width * encoder->video_height * 2; - - uint8_t header[32]; - memset(header, 0, sizeof(header)); + int frames_used = 0; while (state->frame_data_offset >= state->frame_max_size) { state->frame_index++; @@ -779,8 +776,12 @@ int encode_sector_str( encode_frame_bs(encoder, video_frames); video_frames += frame_size; + frames_used++; } + uint8_t header[32]; + memset(header, 0, sizeof(header)); + // STR version header[0x000] = 0x60; header[0x001] = 0x01; @@ -831,5 +832,5 @@ int encode_sector_str( memcpy(output + offset + 0x020, state->frame_output + state->frame_data_offset, 2016); state->frame_data_offset += 2016; - return state->frame_index - last_frame_index; + return frames_used; } diff --git a/psxavenc/mdec.h b/psxavenc/mdec.h index 4b8e026..ed94f2e 100644 --- a/psxavenc/mdec.h +++ b/psxavenc/mdec.h @@ -64,11 +64,11 @@ typedef struct { bool init_mdec_encoder(mdec_encoder_t *encoder, bs_codec_t video_codec, int video_width, int video_height); void destroy_mdec_encoder(mdec_encoder_t *encoder); -void encode_frame_bs(mdec_encoder_t *encoder, uint8_t *video_frame); +void encode_frame_bs(mdec_encoder_t *encoder, const uint8_t *video_frame); int encode_sector_str( mdec_encoder_t *encoder, format_t format, uint16_t str_video_id, - uint8_t *video_frames, + const uint8_t *video_frames, uint8_t *output );