diff --git a/src/Tools/psxfileconv/Cargo.toml b/src/Tools/psxfileconv/Cargo.toml index e21ebeef..d8799622 100644 --- a/src/Tools/psxfileconv/Cargo.toml +++ b/src/Tools/psxfileconv/Cargo.toml @@ -13,5 +13,6 @@ image = "0.24.7" hound = "3.5.1" paste = "1.0.14" png = "0.17.10" +rubato = "0.16.1" symphonia = "0.5.4" tool_helper = {path = "../tool_helper"} \ No newline at end of file diff --git a/src/Tools/psxfileconv/src/audio/my_xa/mod.rs b/src/Tools/psxfileconv/src/audio/my_xa/mod.rs index a20c974f..40fecb92 100644 --- a/src/Tools/psxfileconv/src/audio/my_xa/mod.rs +++ b/src/Tools/psxfileconv/src/audio/my_xa/mod.rs @@ -9,8 +9,6 @@ use tool_helper::{Error, Input}; pub struct Arguments { #[clap(value_enum, value_parser, default_value_t=Frequency::High)] frequency: Frequency, - #[clap(value_enum, value_parser, default_value_t=Orality::Stereo)] - orality: Orality, #[clap(value_enum, value_parser, default_value_t=SampleDepth::Normal)] sample_depth: SampleDepth, } @@ -21,12 +19,6 @@ pub enum Frequency { Low, } -#[derive(Copy, Clone, PartialEq, ValueEnum)] -pub enum Orality { - Stereo, - Mono, -} - #[derive(Copy, Clone, ValueEnum)] pub enum SampleDepth { Normal, @@ -34,6 +26,7 @@ pub enum SampleDepth { } pub fn convert(args: Arguments, input: Input, output: &mut dyn Write) -> Result<(), Error> { - let prepared_xa_audio = raw_audio::load_as_i16_audio(input, xa_audio::audio_conversion_settings(&args))?; + let prepared_xa_audio = raw_audio::load_as_i16_audio(input, args.frequency)?; + xa_audio::encode(prepared_xa_audio, output, &args) } \ No newline at end of file diff --git a/src/Tools/psxfileconv/src/audio/my_xa/raw_audio/error.rs b/src/Tools/psxfileconv/src/audio/my_xa/raw_audio/error.rs index ee190c34..c3045e78 100644 --- a/src/Tools/psxfileconv/src/audio/my_xa/raw_audio/error.rs +++ b/src/Tools/psxfileconv/src/audio/my_xa/raw_audio/error.rs @@ -1,24 +1,33 @@ use super::Error; use symphonia::core::errors::Error as SymError; +use rubato::{ResampleError, ResamplerConstructionError}; -fn generic_map_error(action: &str, error: SymError) -> Error { - Error::from_text(format!("symphonia error: {} during {}", error, action)) +fn generic_map_error(action: &str, error_str: String) -> Error { + Error::from_text(format!("symphonia error: {} during {}", error_str, action)) } pub fn probe(error: SymError) -> Error { - generic_map_error("probing of input", error) + generic_map_error("probing of input", error.to_string()) } pub fn decoder(error: SymError) -> Error { - generic_map_error("finding codec", error) + generic_map_error("finding codec", error.to_string()) } pub fn next_packet(error: SymError) -> Error { - generic_map_error("getting next raw packet", error) + generic_map_error("getting next raw packet", error.to_string()) } pub fn decode(error: SymError) -> Error { - generic_map_error("decoding of raw packet", error) + generic_map_error("decoding of raw packet", error.to_string()) +} + +pub fn resampler_construction(error: ResamplerConstructionError) -> Error { + generic_map_error("creating resampler", error.to_string()) +} + +pub fn resample(error: ResampleError) -> Error { + generic_map_error("resampling", error.to_string()) } pub fn find_track() -> Error { diff --git a/src/Tools/psxfileconv/src/audio/my_xa/raw_audio/mod.rs b/src/Tools/psxfileconv/src/audio/my_xa/raw_audio/mod.rs index f4803f25..d1572784 100644 --- a/src/Tools/psxfileconv/src/audio/my_xa/raw_audio/mod.rs +++ b/src/Tools/psxfileconv/src/audio/my_xa/raw_audio/mod.rs @@ -1,28 +1,152 @@ mod error; + +use super::Frequency as RequestedFrequencyType; +use rubato::{FftFixedIn, Resampler}; use symphonia::core::{ - audio::{Layout, SampleBuffer, SignalSpec}, + audio::{Layout, SampleBuffer}, codecs::{Decoder, DecoderOptions, CODEC_TYPE_NULL}, errors::Error as SymError, - formats::{FormatReader, FormatOptions}, + formats::{FormatOptions, FormatReader}, io::MediaSourceStream, meta::MetadataOptions, - probe::Hint + probe::Hint, sample }; use tool_helper::{Error, Input}; -pub type I16Samples = Vec::; -pub struct Settings { - frequency: u32, - layout: Layout +#[derive(Copy, Clone, PartialEq)] +pub enum Orality { + Stereo, + Mono, } -impl Settings { - pub fn new(frequency: u32, layout: Layout) -> Settings { - Settings{frequency, layout} +impl Orality { + fn as_channels(&self) -> usize { + match self { + Orality::Mono => 1, + Orality::Stereo => 2, + } } } -pub fn load_as_i16_audio(input: Input, settings: Settings) -> Result { +pub struct AudioSamples { + samples: Vec::, + orality: Orality, + frequency: u32, +} + +impl AudioSamples { + pub fn new(samples: Vec, channels: usize, frequency: u32) -> Result, Error> { + let orality = match channels { + 0 => return Err(Error::from_str("Input file has no audio channels")), + 1 => Orality::Mono, + 2 => Orality::Stereo, + _ => return Err(Error::from_str("Only Mono and Stereo input are supported")), + }; + Ok(AudioSamples{samples, orality, frequency}) + } + + // TODO: Reconsider passing zero_maker + pub fn to_planar(self, chunk_size: usize, zero_maker: fn()->T) -> Vec> { + let (_, chunk_remainder) = self.calculate_chunks(chunk_size); + let num_channel = self.orality.as_channels(); + let mut result = Vec::new(); + + for _ in 0..num_channel { + result.push(Vec::::new()); + } + + for (idx, sample) in self.samples.into_iter().enumerate() { + result[idx%num_channel].push(sample); + } + + for channel in &mut result { + println!("({} - {} = {})", chunk_size, chunk_remainder, chunk_size - chunk_remainder); + for _ in 0..(chunk_size - chunk_remainder) { + channel.push(zero_maker()); + } + } + + result + } + + pub fn calculate_chunks(&self, chunk_size: usize) -> (usize, usize) { + let sample_len = self.samples.len()/self.orality.as_channels(); + let div_value = chunk_size; + + (sample_len/div_value, sample_len%div_value) + } + + pub fn samples(&self) -> &Vec:: { + &self.samples + } + + pub fn orality(&self) -> Orality { + self.orality.clone() + } +} + +struct InternalAudioSamples { + planar_samples: Vec>, + frequency: u32, +} + +impl InternalAudioSamples { + pub fn new(planar_samples: Vec>, frequency: u32) -> InternalAudioSamples { + InternalAudioSamples{planar_samples, frequency} + } + + pub fn calculate_chunks(&self, chunk_size: usize) -> (usize, usize) { + if self.planar_samples.len() > 0 { + let sample_len = self.planar_samples[0].len(); + (sample_len/chunk_size, sample_len%chunk_size) + } + + else { + (0, 0) + } + } + + pub fn channels(&self) -> usize { + self.planar_samples.len() + } + + pub fn planar_slices(&self) -> Vec<&[f32]> { + let mut planar_slices = Vec::new(); + + for channel in &self.planar_samples { + planar_slices.push(channel.as_slice()); + } + planar_slices + } +} + +pub type PreparedAudioSamples = AudioSamples; + +pub fn load_as_i16_audio(input: Input, freq_type: RequestedFrequencyType) -> Result { + let raw_audio = load_raw_audio(input)?; + let raw_audio = resample(raw_audio, 37_800)?; + + test_write_wav(raw_audio)?; + Err(Error::not_implemented("Resampling not implemented")) +} + +fn test_write_wav(audio_samples: InternalAudioSamples) -> Result<(), Error> { + /*let spec = hound::WavSpec { + channels: 2, + sample_rate: 37_800, + bits_per_sample: 32, + sample_format: hound::SampleFormat::Float, + }; + + let mut file = hound::WavWriter::create("planschi.wav", spec).unwrap(); + for sample in audio_samples.samples { + file.write_sample(sample)?; + } + file.finalize()?;*/ + Ok(()) +} + +fn load_raw_audio(input: Input) -> Result { let media_stream = MediaSourceStream::new(Box::new(load_to_ram(input)?), Default::default()); let format = symphonia::default::get_probe().format(&Hint::new(), media_stream, &FormatOptions::default(), &MetadataOptions::default()).map_err(error::probe)?.format; let track = format.tracks().iter().find(|t| t.codec_params.codec != CODEC_TYPE_NULL).ok_or_else(error::find_track)?; @@ -31,12 +155,14 @@ pub fn load_as_i16_audio(input: Input, settings: Settings) -> Result, mut decoder: Box, settings: Settings, track_id: u32) -> Result { - let mut i16_audio = Vec::new(); - let mut cur_sample = None; +fn decode(mut format: Box, mut decoder: Box, track_id: u32) -> Result { + let mut samples = Vec::new(); + let mut channel_count = 0; + let mut frequency = 0; + let mut read_buffer = None; loop { // Get the next packet from the media format. @@ -45,7 +171,7 @@ fn decode(mut format: Box, mut decoder: Box, sett Err(err) => { if let SymError::IoError(io_err) = &err { if io_err.kind() == std::io::ErrorKind::UnexpectedEof { - return Ok(i16_audio); + return Ok(InternalAudioSamples::new(samples, frequency)); } } return Err(error::next_packet(err)); @@ -62,22 +188,88 @@ fn decode(mut format: Box, mut decoder: Box, sett // Decode the packet into audio samples. let packet = decoder.decode(&packet).map_err(error::decode)?; - if cur_sample.is_none() { + if read_buffer.is_none() { let duration = packet.capacity() as u64; + let specs = packet.spec(); + + channel_count = specs.channels.count(); + frequency = specs.rate; + read_buffer = Some(SampleBuffer::::new(duration, packet.spec().clone())); - cur_sample = Some(SampleBuffer::::new(duration, SignalSpec::new_with_layout(settings.frequency, settings.layout))); + for _ in 0..channel_count { + samples.push(Vec::new()); + } } - if let Some(cur_sample) = &mut cur_sample { - cur_sample.copy_interleaved_ref(packet); + if let Some(read_buffer) = &mut read_buffer { + read_buffer.copy_planar_ref(packet); + let cur_samples = read_buffer.samples(); + let mut cur_samples = cur_samples.chunks(cur_samples.len()/channel_count); - for sample in cur_sample.samples() { - i16_audio.push(*sample); + for dst_sample in &mut samples { + dst_sample.extend(cur_samples.next().ok_or_else(|| Error::from_str("Not enough channels in input as expected"))?); } } } } +fn resample(input: InternalAudioSamples, target_frequency: u32) -> Result { + const HIGH_QUALITY_CHUNKS:usize = (1024*10)*100; + const SUB_CHUNKS:usize = 20; + + let (chunk_size, sub_chunks) = (HIGH_QUALITY_CHUNKS, SUB_CHUNKS); + let (chunk_count, chunk_remainder) = input.calculate_chunks(chunk_size); + let mut planar_input = input.planar_slices(); + let mut resampler = FftFixedIn::::new(input.frequency as usize, target_frequency as usize, chunk_size, sub_chunks, input.channels()).map_err(error::resampler_construction)?; + let mut planar_output = { + let mut planar_output = Vec::new(); + + for _ in 0..planar_input.len() { + planar_output.push(Vec::::new()); + } + planar_output + }; + + for _chunk in 0..chunk_count { + let new_samples = resampler.process(&planar_input, None).map_err(error::resample)?; + for (channel, slice) in planar_input.iter_mut().enumerate() { + *slice = &slice[chunk_size..]; + planar_output[channel].extend(new_samples[channel].iter()); + } + } + + let mut process_partial = |input_option: Option<&[&[f32]]>| -> Result<(), Error> { + let new_samples = resampler.process_partial(input_option, None).map_err(error::resample)?; + for (channel, channel_samples) in new_samples.into_iter().enumerate() { + planar_output[channel].extend(channel_samples.iter()); + } + + Ok(()) + }; + + if chunk_remainder > 0 { + process_partial(Some(&planar_input))?; + } + process_partial(None)?; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + let spec = hound::WavSpec { + channels: 1, + sample_rate: 37_800, + bits_per_sample: 32, + sample_format: hound::SampleFormat::Float, + }; + + let mut file = hound::WavWriter::create("planschi.wav", spec).unwrap(); + for sample in &planar_output[0] { + file.write_sample(*sample)?; + } + file.finalize()?; + + + Err(Error::not_implemented("resample")) +} + fn load_to_ram(mut input: Input) -> Result>, Error> { let mut buffer = Vec::default(); diff --git a/src/Tools/psxfileconv/src/audio/my_xa/xa_audio/mod.rs b/src/Tools/psxfileconv/src/audio/my_xa/xa_audio/mod.rs index 8aa1ac76..d6d54a6f 100644 --- a/src/Tools/psxfileconv/src/audio/my_xa/xa_audio/mod.rs +++ b/src/Tools/psxfileconv/src/audio/my_xa/xa_audio/mod.rs @@ -1,29 +1,15 @@ mod xapcm; use super::Arguments; -use super::raw_audio::{I16Samples, Settings}; +use super::raw_audio::PreparedAudioSamples; use std::io::Write; -use symphonia::core::audio::Layout; use tool_helper::Error; -const HIGH_FREQUENCY:u32 = 37_800; -const LOW_FREQUENCY:u32 = 18_900; +const HIGH_FREQUENCY:u32 = 37_800; +const LOW_FREQUENCY:u32 = 18_900; -pub fn audio_conversion_settings(arguments: &Arguments) -> Settings { - let frequency = match arguments.frequency { - super::Frequency::High => HIGH_FREQUENCY, - super::Frequency::Low => LOW_FREQUENCY, - }; - let layout = match arguments.orality { - super::Orality::Stereo => Layout::Stereo, - super::Orality::Mono => Layout::Mono, - }; - - Settings::new(frequency, layout) -} - -pub fn encode(input: I16Samples, output: &mut dyn Write, arguments: &Arguments) -> Result<(), Error> { - let mut encoder = xapcm::Encoder::new(&input, arguments.clone()); +pub fn encode(input: PreparedAudioSamples, output: &mut dyn Write, arguments: &Arguments) -> Result<(), Error> { + let mut encoder = xapcm::Encoder::new(&input.samples(), arguments.clone(), input.orality()); let mut sector_count = 0; while let Some(xa_sector) = encoder.encode_next_xa_sector()? { diff --git a/src/Tools/psxfileconv/src/audio/my_xa/xa_audio/xapcm.rs b/src/Tools/psxfileconv/src/audio/my_xa/xa_audio/xapcm.rs index 8490e89c..fe4e0e34 100644 --- a/src/Tools/psxfileconv/src/audio/my_xa/xa_audio/xapcm.rs +++ b/src/Tools/psxfileconv/src/audio/my_xa/xa_audio/xapcm.rs @@ -1,4 +1,4 @@ -use crate::audio::my_xa::{Arguments, Orality, SampleDepth}; +use crate::audio::my_xa::{Arguments, raw_audio::Orality, SampleDepth}; use tool_helper::Error; pub struct Encoder<'a> { @@ -6,6 +6,7 @@ pub struct Encoder<'a> { right: ChannelState, source: &'a[i16], arguments: Arguments, + orality: Orality, samples_per_block: i32, sample_limit: i32 } @@ -16,9 +17,10 @@ impl<'a> Encoder<'a> { const FILTER_K1: [i16; 5] = [0, 60, 115, 98, 122]; const FILTER_K2: [i16; 5] = [0, 0, -52, -55, -60]; - pub fn new(source: &[i16], arguments: Arguments) -> Encoder { - let (samples_per_block, sample_limit) = Self::samples_per_block_and_limit(&source, &arguments); - Encoder{left: ChannelState::default(), right: ChannelState::default(), source, arguments, samples_per_block, sample_limit} + // TODO: Arguments are not needed anymore, PreparedAudioSamples has all the information we need + pub fn new(source: &[i16], arguments: Arguments, orality: Orality) -> Encoder { + let (samples_per_block, sample_limit) = Self::samples_per_block_and_limit(&source, &arguments, &orality); + Encoder{left: ChannelState::default(), right: ChannelState::default(), source, arguments, orality, samples_per_block, sample_limit} } pub fn encode_next_xa_sector(&mut self) -> Result, Error> { @@ -28,7 +30,7 @@ impl<'a> Encoder<'a> { let mut sector = [0u8; 0x930]; sector[0x12] = 0x24 | 0x40; - sector[0x13] = 1 | 0 | 0; + sector[0x13] = 1 | 0 | 0; // < TODO: Actually consider settings let mut dst = &mut sector[0x18..]; for _ in 0..Self::BLOCKS_PER_SECTOR { @@ -53,7 +55,7 @@ impl<'a> Encoder<'a> { let channels = [&mut self.left, &mut self.right]; match self.arguments.sample_depth { SampleDepth::Normal => { - let (modulo, offset) = if self.arguments.orality == Orality::Stereo {(2, &STEREO_4BIT)} else {(1, &MONO_4BIT)}; + let (modulo, offset) = if self.orality == Orality::Stereo {(2, &STEREO_4BIT)} else {(1, &MONO_4BIT)}; let (first_offset, second_offset) = offset; for (offset_idx, offset_set) in [first_offset, second_offset].iter().enumerate() { @@ -65,7 +67,7 @@ impl<'a> Encoder<'a> { } }, SampleDepth::High => { - let (modulo, offset_set) = if self.arguments.orality == Orality::Stereo {(2, &STEREO_8BIT)} else {(1, &MONO_8BIT)}; + let (modulo, offset_set) = if self.orality == Orality::Stereo {(2, &STEREO_8BIT)} else {(1, &MONO_8BIT)}; for (idx, offset) in offset_set.iter().enumerate() { let byte = Self::encode(channels[idx%modulo], &samples[offset.sample..], sample_limit + offset.sample_limit, offset.pitch, &mut data[offset.data..], offset.data_shift, offset.data_pitch, Self::XA_ADPCM_FILTER_COUNT, SHIFT_RANGE_8BPS)?; data[idx] = byte; @@ -78,7 +80,7 @@ impl<'a> Encoder<'a> { } fn encode(channel_state: &mut ChannelState, samples: &[i16], sample_limit: i32, pitch: i32, data: &mut [u8], data_shift: i32, data_pitch: i32, filter_count: i32, shift_range: i32) -> Result { - let mut best_mse = 1u64 << 50u64; + let mut best_mse = 1i64 << 50i64; let mut best_filer = 0; let mut best_sample_shift = 0; @@ -91,7 +93,7 @@ impl<'a> Encoder<'a> { let min_shift = if true_min_shift - 1 < 0 {0} else {true_min_shift - 1}; let max_shift = if true_min_shift + 1 > shift_range {shift_range} else {true_min_shift + 1}; - for sample_shift in min_shift..max_shift { + for sample_shift in min_shift..=max_shift { let mut proposed = channel_state.clone(); Self::attempt_encode(&mut proposed, samples, sample_limit, pitch, data, data_shift, data_pitch, filter, sample_shift, shift_range)?; if best_mse > proposed.mse { @@ -130,9 +132,9 @@ impl<'a> Encoder<'a> { // TODO: And this? if sample_enc < (-0x8000 >> shift_range) {sample_enc = -0x8000 >> shift_range} if sample_enc > ( 0x7FFF >> shift_range) {sample_enc = 0x7FFF >> shift_range} - sample_enc &= sample_mask as i32; + sample_enc &= sample_mask as i32; //< v TODO: Redundant! - let mut sample_dec = ((sample_enc & sample_mask as i32) << shift_range) & 0xFFFF; + let mut sample_dec = (((sample_enc & sample_mask as i32) << shift_range) as i16) as i32; sample_dec >>= min_shift; sample_dec += previous_value; if sample_dec > 0x7FFF {sample_dec = 0x7FFF} @@ -145,7 +147,7 @@ impl<'a> Encoder<'a> { data[(i*data_pitch) as usize] = ((data[(i*pitch) as usize] & nondata_mask) as i32 | (sample_enc << data_shift)) as u8; - out_channel_state.mse += sample_error as u64*sample_error as u64; + out_channel_state.mse += sample_error as i64*sample_error as i64; out_channel_state.prev2 = out_channel_state.prev1; out_channel_state.prev1 = sample_dec; } @@ -165,8 +167,8 @@ impl<'a> Encoder<'a> { let mut prev1 = channel_state.prev1; let mut prev2 = channel_state.prev2; - let k1 = Self::FILTER_K1[filter] as i32; - let k2 = Self::FILTER_K2[filter] as i32; + let k1 = Self::FILTER_K1[filter] as i32; + let k2 = Self::FILTER_K2[filter] as i32; let mut right_shift = 0; let mut s_min = 0; let mut s_max = 0; @@ -206,13 +208,13 @@ impl<'a> Encoder<'a> { } } - fn samples_per_block_and_limit(input: &[i16], arguments: &Arguments) -> (i32, i32) { + fn samples_per_block_and_limit(input: &[i16], arguments: &Arguments, orality: &Orality) -> (i32, i32) { let samples_per_block = match arguments.sample_depth { SampleDepth::Normal => 224, SampleDepth::High => 112, }; - let sample_limit = match arguments.orality { + let sample_limit = match orality { Orality::Stereo => input.len()*2, Orality::Mono => input.len(), }; @@ -224,7 +226,7 @@ impl<'a> Encoder<'a> { #[derive(Clone)] struct ChannelState { qerr: i32, // quanitisation error - mse: u64, // mean square error + mse: i64, // mean square error prev1: i32, prev2: i32 }