use std::{ collections::HashMap, io::{Read, Seek, Write}, }; use crate::binio::BitIo; use crate::common::CzError; use byteorder_lite::{ReadBytesExt, WriteBytesExt, LE}; /// The size of compressed data in each chunk #[derive(Debug, Clone, Copy)] pub struct ChunkInfo { /// The size of the data when compressed pub size_compressed: usize, /// The size of the original uncompressed data pub size_raw: usize, } /// A CZ# file's information about compression chunks #[derive(Default, Debug, Clone)] pub struct CompressionInfo { /// Number of compression chunks pub chunk_count: usize, /// Total size of the data when compressed pub total_size_compressed: usize, /// Total size of the original uncompressed data pub _total_size_raw: usize, /// The compression chunk information pub chunks: Vec, /// Length of the compression chunk info pub length: usize, } impl CompressionInfo { pub fn write_into(&self, output: &mut T) -> Result<(), std::io::Error> { output.write_u32::(self.chunk_count as u32)?; for chunk in &self.chunks { output.write_u32::(chunk.size_compressed as u32)?; output.write_u32::(chunk.size_raw as u32)?; } Ok(()) } } /// Get info about the compression chunks /// /// These are defined by a length value, followed by the number of data chunks /// that length value says split into compressed and original size u32 values pub fn get_chunk_info(bytes: &mut T) -> Result { let parts_count = bytes.read_u32::()?; let mut part_sizes = vec![]; let mut total_size = 0; let mut total_size_raw = 0; // Loop over the compressed bytes for _ in 0..parts_count { let compressed_size = bytes.read_u32::()?; total_size = i32::wrapping_add(total_size, compressed_size as i32); let raw_size = bytes.read_u32::()?; total_size_raw = u32::wrapping_add(total_size_raw, raw_size); part_sizes.push(ChunkInfo { size_compressed: compressed_size as usize, size_raw: raw_size as usize, }); } Ok(CompressionInfo { chunk_count: parts_count as usize, total_size_compressed: total_size as usize, _total_size_raw: total_size_raw as usize, chunks: part_sizes, length: bytes.stream_position()? as usize, }) } /// Decompress an LZW compressed stream like CZ1 pub fn decompress( input: &mut T, chunk_info: &CompressionInfo, ) -> Result, CzError> { let mut output_buf: Vec = vec![]; for block in &chunk_info.chunks { let mut buffer = vec![0u16; block.size_compressed]; for word in buffer.iter_mut() { *word = input.read_u16::().unwrap(); } let raw_buf = decompress_lzw(&buffer, block.size_raw); output_buf.write_all(&raw_buf)?; } Ok(output_buf) } fn decompress_lzw(input_data: &[u16], size: usize) -> Vec { let mut dictionary: HashMap> = HashMap::new(); for i in 0..256 { dictionary.insert(i as u16, vec![i as u8]); } let mut dictionary_count = dictionary.len() as u16; let mut w = vec![0]; let mut result = Vec::with_capacity(size); input_data.iter().for_each(|element| { let mut entry; if let Some(x) = dictionary.get(element) { entry = x.clone(); } else if *element == dictionary_count { entry = w.clone(); entry.push(w[0]); } else { panic!("Bad compressed element: {}", element) } result.write_all(&entry).unwrap(); w.push(entry[0]); dictionary.insert(dictionary_count, w.clone()); dictionary_count += 1; w = entry; }); result } /// Decompress an LZW compressed stream like CZ2 pub fn decompress2( input: &mut T, chunk_info: &CompressionInfo, ) -> Result, CzError> { let mut output_buf: Vec = vec![]; for block in &chunk_info.chunks { let mut buffer = vec![0u8; block.size_compressed]; input.read_exact(&mut buffer).unwrap(); let raw_buf = decompress_lzw2(&buffer, block.size_raw); output_buf.write_all(&raw_buf).unwrap(); } Ok(output_buf) } fn decompress_lzw2(input_data: &[u8], size: usize) -> Vec { let data = input_data; let mut dictionary = HashMap::new(); for i in 0..256 { dictionary.insert(i as u64, vec![i as u8]); } let mut dictionary_count = dictionary.len() as u64; let mut result = Vec::with_capacity(size); let data_size = input_data.len(); let mut bit_io = BitIo::new(data.to_vec()); let mut w = dictionary.get(&0).unwrap().clone(); let mut element; loop { if bit_io.byte_offset() >= data_size - 1 { break; } let flag = bit_io.read_bit(1); if flag == 0 { element = bit_io.read_bit(15); } else { element = bit_io.read_bit(18); } let mut entry; if let Some(x) = dictionary.get(&element) { // If the element was already in the dict, get it entry = x.clone() } else if element == dictionary_count { entry = w.clone(); entry.push(w[0]) } else { panic!( "Bad compressed element {} at offset {}", element, bit_io.byte_offset() ) } //println!("{}", element); result.write_all(&entry).unwrap(); w.push(entry[0]); dictionary.insert(dictionary_count, w.clone()); dictionary_count += 1; w.clone_from(&entry); } result } pub fn compress(data: &[u8], size: usize) -> (Vec, CompressionInfo) { let mut size = size; if size == 0 { size = 0xFEFD } let mut part_data; let mut offset = 0; let mut count; let mut last = Vec::new(); let mut output_buf: Vec = vec![]; let mut output_info = CompressionInfo { _total_size_raw: data.len(), ..Default::default() }; loop { (count, part_data, last) = compress_lzw(&data[offset..], size, last); if count == 0 { break; } offset += count; for d in &part_data { output_buf.write_all(&d.to_le_bytes()).unwrap(); } output_info.chunks.push(ChunkInfo { size_compressed: part_data.len(), size_raw: count, }); output_info.chunk_count += 1; } if output_info.chunk_count == 0 { panic!("No chunks compressed!") } else if output_info.chunk_count != 1 { output_info.chunks[0].size_raw -= 1; output_info.chunks[output_info.chunk_count - 1].size_raw += 1; } output_info.total_size_compressed = output_buf.len() / 2; (output_buf, output_info) } fn compress_lzw(data: &[u8], size: usize, last: Vec) -> (usize, Vec, Vec) { let mut count = 0; let mut dictionary = HashMap::new(); for i in 0..=255 { dictionary.insert(vec![i], i as u16); } let mut dictionary_count = (dictionary.len() + 1) as u16; let mut element = Vec::new(); if !last.is_empty() { element = last } let mut compressed = Vec::with_capacity(size); for c in data { let mut entry = element.clone(); entry.push(*c); if dictionary.contains_key(&entry) { element = entry } else { compressed.push(*dictionary.get(&element).unwrap()); dictionary.insert(entry, dictionary_count); element = vec![*c]; dictionary_count += 1; } count += 1; if size > 0 && compressed.len() == size { break; } } let last_element = element; if compressed.is_empty() { if !last_element.is_empty() { for c in last_element { compressed.push(*dictionary.get(&vec![c]).unwrap()); } } return (count, compressed, Vec::new()); } else if compressed.len() < size { if !last_element.is_empty() { compressed.push(*dictionary.get(&last_element).unwrap()); } return (count, compressed, Vec::new()); } (count, compressed, last_element) } pub fn compress2(data: &[u8]) -> (Vec, CompressionInfo) { let mut part_data; let mut offset = 0; let mut count; let mut last: Vec = Vec::new(); let mut output_buf: Vec = Vec::new(); let mut output_info = CompressionInfo { _total_size_raw: data.len(), ..Default::default() }; loop { (count, part_data, last) = compress_lzw2(&data[offset..], last); if count == 0 { break; } offset += count; output_buf.write_all(&part_data).unwrap(); output_info.chunks.push(ChunkInfo { size_compressed: part_data.len(), size_raw: count, }); output_info.chunk_count += 1; } if output_info.chunk_count == 0 { panic!("No chunks compressed!") } output_info.total_size_compressed = output_buf.len(); (output_buf, output_info) } fn compress_lzw2(data: &[u8], last: Vec) -> (usize, Vec, Vec) { let mut count = 0; let mut dictionary = HashMap::new(); for i in 0..=255 { dictionary.insert(vec![i], i as u64); } let mut dictionary_count = (dictionary.len() + 1) as u64; let mut element = Vec::new(); if last.is_empty() { element = last } let mut bit_io = BitIo::new(vec![0u8; 0xF0000]); let write_bit = |bit_io: &mut BitIo, code: u64| { if code > 0x7FFF { bit_io.write_bit(1, 1); bit_io.write_bit(code, 18); } else { bit_io.write_bit(0, 1); bit_io.write_bit(code, 15); } }; for c in data.iter() { let mut entry = element.clone(); entry.push(*c); if dictionary.contains_key(&entry) { element = entry } else { write_bit(&mut bit_io, *dictionary.get(&element).unwrap()); dictionary.insert(entry, dictionary_count); element = vec![*c]; dictionary_count += 1; } count += 1; if dictionary_count >= 0x3FFFE { count -= 1; break; } } let last_element = element; if bit_io.byte_size() == 0 { if !last_element.is_empty() { for c in last_element { write_bit(&mut bit_io, *dictionary.get(&vec![c]).unwrap()); } } return (count, bit_io.bytes(), Vec::new()); } else if bit_io.byte_size() < 0x87BDF { if !last_element.is_empty() { write_bit(&mut bit_io, *dictionary.get(&last_element).unwrap()); } return (count, bit_io.bytes(), Vec::new()); } (count, bit_io.bytes(), last_element) }