Broken LZ4 algorithm! The decompression seems to work now (needs improvement?) but the conversion tools emit 64K block sizes which is unhelpfull for us

This commit is contained in:
Jaby 2022-12-29 23:18:37 +01:00
parent 87d7bf8efa
commit cdb3abd43f
7 changed files with 250 additions and 15 deletions

View File

@ -12,6 +12,25 @@ namespace JabyEngine {
constexpr ArrayRange(T* start, size_t size) : start(start), size(size) {
}
constexpr void skip(size_t elements) {
this->start += elements;
this->size -= elements;
}
T& pop() {
T& value = *this->start;
ArrayRange::skip(1);
return value;
}
constexpr operator bool() const {
return this->size > 0;
}
constexpr bool operator>=(size_t elements) const {
return this->size >= elements;
}
constexpr T& operator[](size_t idx) {
return this->start[idx];
}

View File

@ -1,6 +1,7 @@
#ifndef __JABYENGINE_LZ4_DECOMPRESSOR_HPP__
#define __JABYENGINE_LZ4_DECOMPRESSOR_HPP__
#include "../../stddef.h"
#include "array_range.hpp"
#include "types.hpp"
namespace JabyEngine {
@ -24,12 +25,37 @@ namespace JabyEngine {
};
private:
struct State {};
struct State {
enum struct Step {
ReadToken,
ObtainLiteralLength,
CopyLiterals,
ObtainMatchOffset,
ObtainMatchLength,
CopyMatch,
};
Step step = Step::ReadToken;
size_t literal_length = 0;
size_t match_length = 0;
uint16_t match_offset = 0xFFFF;
State() = default;
};
private:
uint8_t* dst_adr = nullptr;
State state;
static bool obtain_any_length(ArrayRange<const uint8_t>& data, uint32_t &dst_length);
pair<bool, Result> read_token(ArrayRange<const uint8_t>& data);
pair<bool, Result> obtain_literal_length(ArrayRange<const uint8_t>& data);
pair<bool, Result> copy_literals(ArrayRange<const uint8_t>& data);
pair<bool, Result> obtain_match_offset(ArrayRange<const uint8_t>& data);
pair<bool, Result> obtain_match_length(ArrayRange<const uint8_t>& data);
pair<bool, Result> copy_match(ArrayRange<const uint8_t>& data);
public:
LZ4Decompressor() = default;
LZ4Decompressor(uint8_t* dst_adr) : LZ4Decompressor() {
@ -39,7 +65,7 @@ namespace JabyEngine {
void setup(uint8_t* dst_adr);
void reset();
Result process(const uint8_t* data, size_t size);
Result process(ArrayRange<const uint8_t> data);
};
}

View File

@ -2,6 +2,12 @@
#define __JABYENGINE_TYPES_HPP__
namespace JabyEngine {
template<typename T, typename S>
struct pair {
T first;
S second;
};
enum struct Progress {
InProgress = 0,
Done,

View File

@ -1,19 +1,202 @@
#include <PSX/Auxiliary/lz4_decompressor.hpp>
#include <stdio.h>
namespace JabyEngine {
static void memcpy(uint8_t* &dst, ArrayRange<const uint8_t> &src, size_t size) {
for(size_t n = 0; n < size; n++) {
*dst = src.pop();
dst++;
}
}
bool LZ4Decompressor :: obtain_any_length(ArrayRange<const uint8_t>& data, uint32_t &dst_length) {
while(data) {
const auto additional_value = data.pop();
dst_length += additional_value;
if(additional_value != 255) {
return true;
}
}
return false;
}
pair<bool, LZ4Decompressor::Result> LZ4Decompressor :: read_token(ArrayRange<const uint8_t>& data) {
if(data) {
const auto token = data.pop();
/*if(token == 0) {
return {false, Result::new_done(0)};
}*/
this->state.literal_length = (token & 0xF0) >> 4;
this->state.match_length = (token & 0x0F);
printf("LiteralLength: %llu MatchLength: %llu\n", this->state.literal_length, this->state.match_length);
if(this->state.literal_length == 15) {
this->state.step = State::Step::ObtainLiteralLength;
}
else if(this->state.literal_length == 0) {
this->state.step = State::Step::ObtainMatchOffset;
}
else {
this->state.step = State::Step::CopyLiterals;
}
return {true, Result::new_in_progress(0)};
}
return {false, Result::new_in_progress(0)};
}
pair<bool, LZ4Decompressor::Result> LZ4Decompressor :: obtain_literal_length(ArrayRange<const uint8_t>& data) {
if(LZ4Decompressor::obtain_any_length(data, this->state.literal_length)) {
printf("New LiteralLength: %llu\n", this->state.literal_length);
this->state.step = State::Step::CopyLiterals;
return {true, Result::new_in_progress(0)};
}
return {false, Result::new_in_progress(0)};
}
pair<bool, LZ4Decompressor::Result> LZ4Decompressor :: copy_literals(ArrayRange<const uint8_t>& data) {
if(data) {
const auto bytes_copy = (this->state.literal_length > data.size) ? data.size : this->state.literal_length;
printf("Copy %llu bytes of literal\n", bytes_copy);
memcpy(this->dst_adr, data, bytes_copy);
this->state.literal_length -= bytes_copy;
if(this->state.literal_length == 0) {
this->state.step = State::Step::ObtainMatchOffset;
return {true, Result::new_in_progress(bytes_copy)};
}
}
return {false, Result::new_in_progress(0)};
}
pair<bool, LZ4Decompressor::Result> LZ4Decompressor :: obtain_match_offset(ArrayRange<const uint8_t>& data) {
static const auto state_complete = [](State& state) -> pair<bool, Result> {
if(state.match_length == 15) {
state.step = State::Step::ObtainMatchLength;
}
else {
state.step = State::Step::CopyMatch;
}
printf("New MatchOffset: %llu\n", state.match_offset);
return {true, Result::new_in_progress(0)};
};
if(data) {
if(this->state.match_offset == 0xFFFF) {
// We are unused and invalid
if(data.size >= sizeof(uint16_t)) {
// We can read all
this->state.match_offset = *reinterpret_cast<const uint16_t*>(data.start);
data.skip(sizeof(uint16_t));
return state_complete(this->state);
}
else {
this->state.match_offset = static_cast<uint16_t>(data.pop());
}
}
else {
this->state.match_offset |= (static_cast<uint16_t>(data.pop()) << 8);
return state_complete(this->state);
}
}
return {false, Result::new_in_progress(0)};
}
pair<bool, LZ4Decompressor::Result> LZ4Decompressor :: obtain_match_length(ArrayRange<const uint8_t>& data) {
if(LZ4Decompressor::obtain_any_length(data, this->state.match_length)) {
this->state.step = State::Step::CopyMatch;
printf("New match length: %llu\n", this->state.match_length);
return {true, Result::new_in_progress(0)};
}
return {false, Result::new_in_progress(0)};
}
pair<bool, LZ4Decompressor::Result> LZ4Decompressor :: copy_match(ArrayRange<const uint8_t>& data) {
static constexpr size_t min_match_length = 4;
this->state.match_length += min_match_length;
const uint8_t* src = this->dst_adr - this->state.match_offset;
ArrayRange src_data(src, this->state.match_length);
memcpy(this->dst_adr, src_data, this->state.match_length);
this->state.match_offset = 0xFFFF;
this->state.step = State::Step::ReadToken;
const bool keep_going = data;
return {keep_going, data ? Result::new_in_progress(this->state.match_length) : Result::new_done(this->state.match_length)};
}
void LZ4Decompressor :: setup(uint8_t* dst_adr) {
this->dst_adr = dst_adr;
LZ4Decompressor::reset();
}
void LZ4Decompressor :: reset() {
this->state = State();
}
LZ4Decompressor::Result LZ4Decompressor :: process(const uint8_t* data, size_t size) {
for(size_t n = 0; n < size; n++) {
this->dst_adr[n] = data[n];
LZ4Decompressor::Result LZ4Decompressor :: process(ArrayRange<const uint8_t> data) {
const auto do_call = [this](ArrayRange<const uint8_t>& data, size_t bytes_ready) -> pair<bool, Result> {
switch(this->state.step) {
case State::Step::ReadToken:
printf("Read Token! %llu bytes left (%llu bytes ready)\n", data.size, bytes_ready);
return LZ4Decompressor::read_token(data);
case State::Step::ObtainLiteralLength:
printf("Obtain literal length! %llu bytes left (%llu bytes ready)\n", data.size, bytes_ready);
return LZ4Decompressor::obtain_literal_length(data);
case State::Step::CopyLiterals:
printf("Copy Literals! %llu bytes left (%llu bytes ready)\n", data.size, bytes_ready);
return LZ4Decompressor::copy_literals(data);
case State::Step::ObtainMatchOffset:
printf("Obtain match offset! %llu bytes left (%llu bytes ready)\n", data.size, bytes_ready);
return LZ4Decompressor::obtain_match_offset(data);
case State::Step::ObtainMatchLength:
printf("Obtain match length! %llu bytes left (%llu bytes ready)\n", data.size, bytes_ready);
return LZ4Decompressor::obtain_match_length(data);
case State::Step::CopyMatch:
printf("Copy match! %llu bytes left\n", data.size);
return LZ4Decompressor::copy_match(data);
default:
return {false, Result::new_error()};
}
};
size_t bytes_ready = 0;
while(true) {
auto [keep_going, result] = do_call(data, bytes_ready);
bytes_ready += result.bytes_ready;
if(!keep_going) {
result.bytes_ready = bytes_ready;
return result;
}
}
return Result::new_error();
}
}

View File

@ -17,10 +17,10 @@ namespace JabyEngine {
static size_t decompress_logo() {
LZ4Decompressor lz4_decomp(reinterpret_cast<uint8_t*>(&__boot_loader_end));
const auto [progress, bytes_ready] = lz4_decomp.process(SplashScreen, sizeof(SplashScreen));
const auto [progress, bytes_ready] = lz4_decomp.process(ArrayRange(SplashScreen + 11, (sizeof(SplashScreen) - 11 - 4)));
switch(progress) {
case Progress::InProgress:
printf("Decompressing still in progress...\n");
printf("Decompressing still in progress... %llu\n", bytes_ready);
break;
case Progress::Error:
@ -28,7 +28,7 @@ namespace JabyEngine {
break;
case Progress::Done:
printf("Done decompressing\n");
printf("Done decompressing: %llu Bytes ready\n", bytes_ready);
break;
}

View File

@ -21,13 +21,14 @@ struct CommandLine {
#[derive(Subcommand)]
enum SubCommands {
Nothing,
SimpleTIM(reduced_tim::Arguments)
}
fn run_main() -> Result<(), Error> {
match CommandLine::try_parse() {
Ok(cmd) => {
let input = tool_helper::open_input(cmd.input_file)?;
let mut input = tool_helper::open_input(cmd.input_file)?;
let mut buffer = Vec::<u8>::new();
let mut output_file = tool_helper::open_output(cmd.output_file)?;
let dst_buffer = {
@ -41,6 +42,9 @@ fn run_main() -> Result<(), Error> {
};
match cmd.sub_command {
SubCommands::Nothing => {
std::io::copy(&mut input, dst_buffer)?;
},
SubCommands::SimpleTIM(args) => {
reduced_tim::convert(args, input, dst_buffer)?;
}
@ -48,10 +52,7 @@ fn run_main() -> Result<(), Error> {
// We encoded the file to a temporary buffer and now need to write it
if cmd.compress_lz4 {
println!("Buffer-Size: {} ({} Sectors)", buffer.len(), (buffer.len() + 2047)/2048);
let buffer = tool_helper::compress::lz4(&buffer, 16)?;
println!("New buffer-Size: {} ({} Sectors)", buffer.len(), (buffer.len() + 2047)/2048);
output_file.write(&buffer)?;
}

View File

@ -2,7 +2,7 @@ use super::Error;
use lz4::EncoderBuilder;
pub fn lz4(data: &Vec<u8>, compression_level: u32) -> Result<Vec<u8>, Error> {
let mut lz4_encoder = EncoderBuilder::new().level(compression_level).build(Vec::<u8>::new())?;
let mut lz4_encoder = EncoderBuilder::new().level(compression_level).checksum(lz4::ContentChecksum::NoChecksum).build(Vec::<u8>::new())?;
std::io::copy(&mut&data[..], &mut lz4_encoder)?;
let (output, result) = lz4_encoder.finish();