diff --git a/src/Tools/psxfileconv/src/audio/my_xa/mod.rs b/src/Tools/psxfileconv/src/audio/my_xa/mod.rs index 84f5ba75..78124203 100644 --- a/src/Tools/psxfileconv/src/audio/my_xa/mod.rs +++ b/src/Tools/psxfileconv/src/audio/my_xa/mod.rs @@ -9,8 +9,8 @@ use tool_helper::{Error, Input}; pub struct Arguments { #[clap(value_enum, value_parser, default_value_t=Frequency::High)] frequency: Frequency, - #[clap(value_enum, value_parser, default_value_t=Channels::Stereo)] - channels: Channels, + #[clap(value_enum, value_parser, default_value_t=Orality::Stereo)] + orality: Orality, #[clap(value_enum, value_parser, default_value_t=SampleDepth::Normal)] sample_depth: SampleDepth, } @@ -21,8 +21,8 @@ pub enum Frequency { Low, } -#[derive(Copy, Clone, ValueEnum)] -pub enum Channels { +#[derive(Copy, Clone, PartialEq, ValueEnum)] +pub enum Orality { Stereo, Mono, } @@ -35,5 +35,5 @@ pub enum SampleDepth { pub fn convert(args: Arguments, input: Input, output: &mut dyn Write) -> Result<(), Error> { let prepared_xa_audio = raw_audio::load_as_i16_audio(input, xa_audio::audio_conversion_settings(&args))?; - xa_audio::convert(prepared_xa_audio, output, &args) + xa_audio::encode(prepared_xa_audio, output, &args) } \ No newline at end of file diff --git a/src/Tools/psxfileconv/src/audio/my_xa/xa_audio/mod.rs b/src/Tools/psxfileconv/src/audio/my_xa/xa_audio/mod.rs index 1d661000..297de30a 100644 --- a/src/Tools/psxfileconv/src/audio/my_xa/xa_audio/mod.rs +++ b/src/Tools/psxfileconv/src/audio/my_xa/xa_audio/mod.rs @@ -1,45 +1,52 @@ +mod xapcm; + +use crate::audio::my_xa::Orality; + use super::Arguments; use super::raw_audio::{I16Samples, Settings}; use std::io::Write; use symphonia::core::audio::Layout; use tool_helper::Error; -const HIGH_FREQUENCY:u32 = 37_800; -const LOW_FREQUENCY:u32 = 18_900; +const BLOCKS_PER_SECTOR:usize = 18; +const HIGH_FREQUENCY:u32 = 37_800; +const LOW_FREQUENCY:u32 = 18_900; pub fn audio_conversion_settings(arguments: &Arguments) -> Settings { let frequency = match arguments.frequency { super::Frequency::High => HIGH_FREQUENCY, super::Frequency::Low => LOW_FREQUENCY, }; - let layout = match arguments.channels { - super::Channels::Stereo => Layout::Stereo, - super::Channels::Mono => Layout::Mono, + let layout = match arguments.orality { + super::Orality::Stereo => Layout::Stereo, + super::Orality::Mono => Layout::Mono, }; Settings::new(frequency, layout) } -pub fn convert(input: I16Samples, _output: &mut dyn Write, arguments: &Arguments) -> Result<(), Error> { +pub fn encode(input: I16Samples, output: &mut dyn Write, arguments: &Arguments) -> Result<(), Error> { let (samples_per_block, sample_count) = init_values(&input, arguments); - for (iteration, sampled_id) in (0..sample_count).step_by(samples_per_block).enumerate() { + for (sectors, sampled_id) in (0..sample_count).step_by(samples_per_block).enumerate() { + let slice = &input[sampled_id..]; + print!("\rIteration: {}; Sample: {}", iteration, sampled_id); } println!(); Err(Error::not_implemented("XA conversion")) } -fn init_values(input: &I16Samples, arguments: &Arguments) -> (usize, u32) { +fn init_values(input: &I16Samples, arguments: &Arguments) -> (usize, usize) { let samples_per_block = match arguments.sample_depth { super::SampleDepth::Normal => 224, super::SampleDepth::High => 112, }; - let sample_count = match arguments.channels { - super::Channels::Stereo => input.len() / 2, - super::Channels::Mono => input.len(), + let sample_count = match arguments.orality { + super::Orality::Stereo => input.len()/2, + super::Orality::Mono => input.len(), }; - (samples_per_block as usize, sample_count as u32) + (samples_per_block as usize, sample_count) } \ No newline at end of file diff --git a/src/Tools/psxfileconv/src/audio/my_xa/xa_audio/xapcm.rs b/src/Tools/psxfileconv/src/audio/my_xa/xa_audio/xapcm.rs new file mode 100644 index 00000000..3bf26093 --- /dev/null +++ b/src/Tools/psxfileconv/src/audio/my_xa/xa_audio/xapcm.rs @@ -0,0 +1,244 @@ +use crate::audio::my_xa::{Arguments, Orality, SampleDepth}; +use tool_helper::Error; + +pub struct Encoder { + left: ChannelState, + right: ChannelState +} + +impl Encoder { + const XA_ADPCM_FILTER_COUNT: i32 = 4; + const FILTER_K1: [i16; 5] = [0, 60, 115, 98, 122]; + const FILTER_K2: [i16; 5] = [0, 0, -52, -55, -60]; + + pub fn encode_xa(&mut self, samples: &[i16], sample_limit: i32, data: &mut [u8], args: Arguments) -> Result<(), Error> { + const SHIFT_RANGE_4BPS: i32 = 12; + const SHIFT_RANGE_8BPS: i32 = 8; + + let channels = [&mut self.left, &mut self.right]; + match args.sample_depth { + SampleDepth::Normal => { + let offset = if args.orality == Orality::Stereo {&STEREO_4BIT} else {&MONO_4BIT}; + let (first_offset, second_offset) = offset; + + for (offset_idx, offset_set) in [first_offset, second_offset].iter().enumerate() { + for (idx, offset) in offset_set.iter().enumerate() { + let byte = Self::encode(channels[idx%2], &samples[offset.sample..], sample_limit + offset.sample_limit, offset.pitch, &mut data[offset.data..], offset.data_shift, offset.data_pitch, Self::XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_4BPS)?; + data[idx + (offset_idx*8)] = byte; + data[idx + 4 + (offset_idx*8)] = byte; + } + } + }, + SampleDepth::High => { + let offset_set = if args.orality == Orality::Stereo {&STEREO_8BIT} else {&MONO_8BIT}; + for (idx, offset) in offset_set.iter().enumerate() { + let byte = Self::encode(channels[idx%2], &samples[offset.sample..], sample_limit + offset.sample_limit, offset.pitch, &mut data[offset.data..], offset.data_shift, offset.data_pitch, Self::XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS)?; + data[idx] = byte; + data[idx + 4] = byte; + } + } + } + + Ok(()) + } + + fn encode(channel_state: &mut ChannelState, samples: &[i16], sample_limit: i32, pitch: i32, data: &mut [u8], data_shift: i32, data_pitch: i32, filter_count: i32, shift_range: i32) -> Result { + let mut best_mse = 1u64 << 50u64; + let mut best_filer = 0; + let mut best_sample_shift = 0; + + for filter in 0..filter_count { + let true_min_shift = Self::find_min_shift(channel_state, samples, sample_limit, pitch, filter as usize, shift_range)?; + + // Testing has shown that the optimal shift can be off the true minimum shift + // by 1 in *either* direction. + // This is NOT the case when dither is used. + let min_shift = if true_min_shift - 1 < 0 {0} else {true_min_shift - 1}; + let max_shift = if true_min_shift + 1 > shift_range {shift_range} else {true_min_shift + 1}; + + for sample_shift in min_shift..max_shift { + let mut proposed = channel_state.clone(); + Self::attempt_encode(&mut proposed, samples, sample_limit, pitch, data, data_shift, data_pitch, filter, sample_shift, shift_range)?; + if best_mse > proposed.mse { + best_mse = proposed.mse; + best_filer = filter; + best_sample_shift = sample_shift; + } + } + } + + Self::attempt_encode(channel_state, samples, sample_limit, pitch, data, data_shift, data_pitch, best_filer, best_sample_shift, shift_range) + } + + fn attempt_encode(out_channel_state: &mut ChannelState, samples: &[i16], sample_limit: i32, pitch: i32, data: &mut [u8], data_shift: i32, data_pitch: i32, filter: i32, sample_shift: i32, shift_range: i32) -> Result { + let sample_mask = (0xFFFF >> shift_range) as u8; + let nondata_mask = (!(sample_mask << data_shift)) as u8; + + let min_shift = sample_shift; + let k1 = Self::FILTER_K1[filter as usize] as i32; + let k2 = Self::FILTER_K2[filter as usize] as i32; + + let hdr = ((min_shift & 0x0F) | ((filter as i32) << 4)) as u8; + + out_channel_state.mse = 0; + + for i in 0..28 { + // TODO: Code duplication with `find_min_shift`? + let sample = if i >= sample_limit {0} else {samples[(i*pitch) as usize] as i32 + out_channel_state.qerr}; + let previous_value = (k1*out_channel_state.prev1 + k2*out_channel_state.prev2 + (1 << 5)) >> 6; + + let mut sample_enc = sample - previous_value; + sample_enc <<= min_shift; + sample_enc += 1 << (shift_range - 1); + sample_enc >>= shift_range; + + // TODO: And this? + if sample_enc < (-0x8000 >> shift_range) {sample_enc = -0x8000 >> shift_range} + if sample_enc > ( 0x7FFF >> shift_range) {sample_enc = 0x7FFF >> shift_range} + sample_enc &= sample_mask as i32; + + let mut sample_dec = ((sample_enc & sample_mask as i32) << shift_range) & 0xFFFF; + sample_dec >>= min_shift; + sample_dec += previous_value; + if sample_dec > 0x7FFF {sample_dec = 0x7FFF} + if sample_dec < -0x8000 {sample_dec = -0x8000} + + let sample_error = sample_dec - sample; + if sample_error >= (1 << 30) || sample_error <= -(1 << 30) { + return Err(Error::from_text(format!("Sample error exceeds 30bit: {}", sample_error))); + } + + data[(i*data_pitch) as usize] = ((data[(i*pitch) as usize] & nondata_mask) as i32 | (sample_enc << data_shift)) as u8; + + out_channel_state.mse += sample_error as u64*sample_error as u64; + out_channel_state.prev2 = out_channel_state.prev1; + out_channel_state.prev1 = sample_dec; + } + + Ok(hdr) + } + + fn find_min_shift(channel_state: &ChannelState, samples: &[i16], sample_limit: i32, pitch: i32, filter: usize, shift_range: i32) -> Result { + /* + Assumption made: + There is value in shifting right one step further to allow the nibbles to clip. + However, given a possible shift value, there is no value in shifting one step less. + + Having said that, this is not a completely accurate model of the encoder, + so maybe we will need to shift one step less. + */ + + let mut prev1 = channel_state.prev1; + let mut prev2 = channel_state.prev2; + let k1 = Self::FILTER_K1[filter] as i32; + let k2 = Self::FILTER_K2[filter] as i32; + let mut right_shift = 0; + let mut s_min = 0; + let mut s_max = 0; + + for i in 0..28 { + let raw_sample = if i >= sample_limit {0} else {samples[(i*pitch) as usize]} as i32; + let prev_values = (k1*prev1 + k2*prev2 + (1 << 5)) >> 6; + let sample = raw_sample - prev_values; + + if sample < s_min { + s_min = sample; + } + + if sample > s_max { + s_max = sample; + } + + prev2 = prev1; + prev1 = raw_sample; + } + + while right_shift < shift_range && (s_max >> right_shift) > (0x7FFF >> shift_range) { + right_shift += 1; + } + + while right_shift < shift_range && (s_min >> right_shift) < (-0x8000 >> shift_range) { + right_shift += 1; + } + + let min_shift = shift_range - right_shift; + if 0 <= min_shift && min_shift <= shift_range { + Ok(min_shift) + } + + else { + Err(Error::from_text(format!("0 <= {} && {} <= {} was not satisfied with min_shift: {}", min_shift, min_shift, shift_range, min_shift))) + } + } +} + +impl std::default::Default for Encoder { + fn default() -> Self { + Encoder{left: ChannelState::default(), right: ChannelState::default()} + } +} + +#[derive(Clone)] +struct ChannelState { + qerr: i32, // quanitisation error + mse: u64, // mean square error + prev1: i32, + prev2: i32 +} + +impl std::default::Default for ChannelState { + fn default() -> Self { + ChannelState{qerr: 0, mse: 0, prev1: 0, prev2: 0} + } +} + +struct EncodingOffsets { + sample: usize, + sample_limit: i32, + pitch: i32, + data: usize, + data_shift: i32, + data_pitch: i32, +} + +const STEREO_4BIT: ([EncodingOffsets; 4], [EncodingOffsets; 4]) = ( + [ + EncodingOffsets{sample: 0, sample_limit: 0, pitch: 2, data: 0x10, data_shift: 0, data_pitch: 4}, + EncodingOffsets{sample: 1, sample_limit: 0, pitch: 2, data: 0x10, data_shift: 4, data_pitch: 4}, + EncodingOffsets{sample: 56, sample_limit: -28, pitch: 2, data: 0x11, data_shift: 0, data_pitch: 4}, + EncodingOffsets{sample: 56 + 1, sample_limit: -28, pitch: 2, data: 0x11, data_shift: 4, data_pitch: 4}, + ], + [ + EncodingOffsets{sample: 56*2, sample_limit: -28*2, pitch: 2, data: 0x12, data_shift: 0, data_pitch: 4}, + EncodingOffsets{sample: 56*2 + 1, sample_limit: -28*2, pitch: 2, data: 0x12, data_shift: 4, data_pitch: 4}, + EncodingOffsets{sample: 56*3, sample_limit: -28*3, pitch: 2, data: 0x13, data_shift: 0, data_pitch: 4}, + EncodingOffsets{sample: 56*3 + 1, sample_limit: -28*3, pitch: 2, data: 0x13, data_shift: 4, data_pitch: 4} + ] +); +const MONO_4BIT: ([EncodingOffsets; 4], [EncodingOffsets; 4]) = ( + [ + EncodingOffsets{sample: 0, sample_limit: 0, pitch: 1, data: 0x10, data_shift: 0, data_pitch: 4}, + EncodingOffsets{sample: 28, sample_limit: -28, pitch: 1, data: 0x10, data_shift: 4, data_pitch: 4}, + EncodingOffsets{sample: 28*2, sample_limit: -28*2, pitch: 1, data: 0x11, data_shift: 0, data_pitch: 4}, + EncodingOffsets{sample: 28*3, sample_limit: -28*3, pitch: 1, data: 0x11, data_shift: 4, data_pitch: 4}, + ], + [ + EncodingOffsets{sample: 28*4, sample_limit: -28*4, pitch: 1, data: 0x12, data_shift: 0, data_pitch: 4}, + EncodingOffsets{sample: 28*5, sample_limit: -28*5, pitch: 1, data: 0x12, data_shift: 4, data_pitch: 4}, + EncodingOffsets{sample: 28*6, sample_limit: -28*6, pitch: 1, data: 0x13, data_shift: 0, data_pitch: 4}, + EncodingOffsets{sample: 28*7, sample_limit: -28*7, pitch: 1, data: 0x13, data_shift: 4, data_pitch: 4} + ] +); + +const STEREO_8BIT: [EncodingOffsets;4] = [ + EncodingOffsets{sample: 0, sample_limit: 0, pitch: 2, data: 0x10, data_shift: 0, data_pitch: 4}, + EncodingOffsets{sample: 1, sample_limit: 0, pitch: 2, data: 0x11, data_shift: 0, data_pitch: 4}, + EncodingOffsets{sample: 56, sample_limit: -28, pitch: 2, data: 0x12, data_shift: 0, data_pitch: 4}, + EncodingOffsets{sample: 56 + 1, sample_limit: -28, pitch: 2, data: 0x13, data_shift: 0, data_pitch: 4}, +]; +const MONO_8BIT: [EncodingOffsets;4] = [ + EncodingOffsets{sample: 0, sample_limit: 0, pitch: 1, data: 0x10, data_shift: 0, data_pitch: 4}, + EncodingOffsets{sample: 28, sample_limit: -28, pitch: 1, data: 0x11, data_shift: 0, data_pitch: 4}, + EncodingOffsets{sample: 28*2, sample_limit: -28*2, pitch: 1, data: 0x12, data_shift: 0, data_pitch: 4}, + EncodingOffsets{sample: 28*3, sample_limit: -28*3, pitch: 1, data: 0x13, data_shift: 0, data_pitch: 4}, +]; \ No newline at end of file