From b3d4d4c52de8afffaa9b7894b799c31e6a7492cb Mon Sep 17 00:00:00 2001
From: G2-Games <ke0bhogsg@gmail.com>
Date: Sat, 6 Jul 2024 14:50:39 -0500
Subject: [PATCH] Preliminary PAK writing

---
 cz/src/common.rs         |   7 +-
 cz/src/formats/cz4.rs    |   4 +
 experimental/src/main.rs |   4 +-
 luca_pak/Cargo.toml      |   2 +
 luca_pak/src/entry.rs    |  16 ++--
 luca_pak/src/header.rs   |  41 +++++++-
 luca_pak/src/lib.rs      | 202 +++++++++++++++++++++++++++++----------
 luca_pak/src/main.rs     |  50 +++++++++-
 utils/src/main.rs        |   2 +-
 9 files changed, 257 insertions(+), 71 deletions(-)

diff --git a/cz/src/common.rs b/cz/src/common.rs
index 8de6a51..1d8a0bb 100644
--- a/cz/src/common.rs
+++ b/cz/src/common.rs
@@ -116,10 +116,7 @@ impl CommonHeader {
         self.length = length
     }
 
-    pub fn from_bytes<T: Seek + ReadBytesExt + Read>(bytes: &mut T) -> Result<Self, CzError>
-    where
-        Self: Sized,
-    {
+    pub fn from_bytes<T: Seek + Read>(bytes: &mut T) -> Result<Self, CzError> {
         let mut magic = [0u8; 4];
         bytes.read_exact(&mut magic)?;
 
@@ -203,7 +200,7 @@ impl CommonHeader {
         self.unknown
     }
 
-    pub fn write_into<T: WriteBytesExt + Write>(&self, output: &mut T) -> Result<(), io::Error> {
+    pub fn write_into<T: Write>(&self, output: &mut T) -> Result<(), io::Error> {
         let magic_bytes = [b'C', b'Z', b'0' + self.version as u8, b'\0'];
 
         output.write_all(&magic_bytes)?;
diff --git a/cz/src/formats/cz4.rs b/cz/src/formats/cz4.rs
index e3e6276..06eb75c 100644
--- a/cz/src/formats/cz4.rs
+++ b/cz/src/formats/cz4.rs
@@ -23,9 +23,13 @@ pub fn encode<T: WriteBytesExt + Write>(
     bitmap: &[u8],
     header: &CommonHeader,
 ) -> Result<(), CzError> {
+    let timer = std::time::Instant::now();
     let bitmap = diff_line(header, bitmap);
+    println!("diff_line took {:?}", timer.elapsed());
 
+    let timer = std::time::Instant::now();
     let (compressed_data, compressed_info) = compress(&bitmap, 0xFEFD);
+    println!("Compression took {:?}", timer.elapsed());
 
     compressed_info.write_into(output)?;
 
diff --git a/experimental/src/main.rs b/experimental/src/main.rs
index fdb9a0a..9dfd5d3 100644
--- a/experimental/src/main.rs
+++ b/experimental/src/main.rs
@@ -1,6 +1,8 @@
+use std::time::Instant;
+
 fn main() {
     let mut cz_file = cz::open("test_file.cz3").unwrap();
-    cz_file.save_as_png("test.png").unwrap();
+    //cz_file.save_as_png("test.png").unwrap();
 
     cz_file.header_mut().set_version(4).unwrap();
 
diff --git a/luca_pak/Cargo.toml b/luca_pak/Cargo.toml
index 357f375..f4e0183 100644
--- a/luca_pak/Cargo.toml
+++ b/luca_pak/Cargo.toml
@@ -10,6 +10,8 @@ authors.workspace = true
 
 [dependencies]
 byteorder = "1.5.0"
+colog = "1.3.0"
+log = "0.4.22"
 thiserror = "1.0.61"
 
 [lints]
diff --git a/luca_pak/src/entry.rs b/luca_pak/src/entry.rs
index 30dc09c..0c998d5 100644
--- a/luca_pak/src/entry.rs
+++ b/luca_pak/src/entry.rs
@@ -15,17 +15,17 @@ pub struct Entry {
     /// The size of the entry in bytes
     pub(super) length: u32,
 
-    /// The actual data which make up the entry
-    pub(super) data: Vec<u8>,
+    /// ???
+    pub(super) unknown1: Option<[u8; 12]>,
 
     /// The name of the entry as stored in the PAK
     pub(super) name: Option<String>,
 
-    pub(super) unknown1: Option<u32>,
-
     /// The ID of the entry, effectively an index
     pub(super) id: u32,
-    pub(super) replace: bool, // TODO: Look into a better way to indicate this
+
+    /// The actual data which makes up the entry
+    pub(super) data: Vec<u8>,
 }
 
 impl Entry {
@@ -43,7 +43,7 @@ impl Entry {
 
         // Save the file to <folder> + <file name>
         if let Some(name) = &self.name {
-            path.push(&name);
+            path.push(name);
         } else {
             path.push(&self.id.to_string())
         }
@@ -56,6 +56,10 @@ impl Entry {
         Ok(())
     }
 
+    pub fn len(&self) -> usize {
+        self.length as usize
+    }
+
     /// Get the raw byte data of an [`Entry`]
     pub fn as_bytes(&self) -> &Vec<u8> {
         &self.data
diff --git a/luca_pak/src/header.rs b/luca_pak/src/header.rs
index 124619e..d019cba 100644
--- a/luca_pak/src/header.rs
+++ b/luca_pak/src/header.rs
@@ -1,3 +1,8 @@
+use std::io::{self, Write};
+use byteorder::WriteBytesExt;
+
+use crate::LE;
+
 /// The header of a PAK file
 #[derive(Debug, Clone)]
 pub struct Header {
@@ -14,10 +19,24 @@ pub struct Header {
     pub(super) unknown3: u32,
     pub(super) unknown4: u32,
 
-    pub(super) flags: u32,
+    pub(super) flags: PakFlags,
 }
 
 impl Header {
+    pub fn write_into<T: Write>(&self, output: &mut T) -> Result<(), io::Error> {
+        output.write_u32::<LE>(self.data_offset)?;
+        output.write_u32::<LE>(self.entry_count)?;
+        output.write_u32::<LE>(self.id_start)?;
+        output.write_u32::<LE>(self.block_size)?;
+        output.write_u32::<LE>(self.unknown1)?;
+        output.write_u32::<LE>(self.unknown2)?;
+        output.write_u32::<LE>(self.unknown3)?;
+        output.write_u32::<LE>(self.unknown4)?;
+        output.write_u32::<LE>(self.flags.0)?;
+
+        Ok(())
+    }
+
     pub fn block_size(&self) -> u32 {
         self.block_size
     }
@@ -34,7 +53,23 @@ impl Header {
         self.data_offset
     }
 
-    pub fn flags(&self) -> u32 {
-        self.flags
+    pub fn flags(&self) -> &PakFlags {
+        &self.flags
+    }
+}
+
+/// Flags which define different features in a PAK file
+#[derive(Clone, Debug)]
+pub struct PakFlags(pub u32);
+
+impl PakFlags {
+    pub fn has_unknown_data1(&self) -> bool {
+        // 0b00100000000
+        self.0 & 0x100 != 0
+    }
+
+    pub fn has_names(&self) -> bool {
+        // 0b01000000000
+        self.0 & 512 != 0
     }
 }
diff --git a/luca_pak/src/lib.rs b/luca_pak/src/lib.rs
index d397678..ce88517 100644
--- a/luca_pak/src/lib.rs
+++ b/luca_pak/src/lib.rs
@@ -3,14 +3,16 @@ mod header;
 
 use byteorder::{LittleEndian, ReadBytesExt};
 use header::Header;
+use log::{debug, info};
 use std::{
-    fs::File,
-    io::{self, BufRead, BufReader, Read, Seek, SeekFrom},
-    path::{Path, PathBuf},
+    ffi::CString, fs::File, io::{self, BufRead, BufReader, Read, Seek, SeekFrom, Write}, path::{Path, PathBuf}
 };
 use thiserror::Error;
+use byteorder::WriteBytesExt;
 
-use crate::entry::Entry;
+type LE = LittleEndian;
+
+use crate::{entry::Entry, header::PakFlags};
 
 /// An error associated with a PAK file
 #[derive(Error, Debug)]
@@ -28,34 +30,24 @@ pub enum PakError {
 /// A full PAK file with a header and its contents
 #[derive(Debug, Clone)]
 pub struct Pak {
+    /// The path of the PAK file, can serve as an identifier or name as the
+    /// header has no name for the file.
+    path: PathBuf,
     header: Header,
 
-    unknown_pre_data: Vec<u32>,
+    pub unknown_pre_data: Vec<u32>,
+    unknown_post_header: Vec<u8>,
+
+    rebuild: bool, // TODO: Look into a better way to indicate this, or if it's needed at all
 
     entries: Vec<Entry>,
-
-    unknown_flag_data: Vec<u8>,
-
-    path: PathBuf,
-    rebuild: bool, // TODO: Look into a better way to indicate this, or if it's needed at all
 }
 
-pub struct PakFlags(u32);
-
-impl PakFlags {
-    pub fn has_names(&self) -> bool {
-        // 0b01000000000
-        self.0 & 0x200 != 0
-    }
-
-    pub fn has_offsets(&self) -> bool {
-        // 0b10000000000
-        self.0 & 0x400 != 0
-    }
+struct FileLocation {
+    offset: u32,
+    length: u32,
 }
 
-type LE = LittleEndian;
-
 impl Pak {
     /// Convenience method to open a PAK file from a path and decode it
     pub fn open<P: ?Sized + AsRef<Path>>(path: &P) -> Result<Self, PakError> {
@@ -65,13 +57,15 @@ impl Pak {
     }
 
     /// Decode a PAK file from a byte stream
-    pub fn decode<T: Seek + ReadBytesExt + Read>(
+    pub fn decode<T: Seek + Read>(
         input: &mut T,
         path: PathBuf,
     ) -> Result<Self, PakError> {
+        info!("Reading pak from {:?}", path);
         let mut input = BufReader::new(input);
 
         // Read in all the header bytes
+        info!("READING: header");
         let header = Header {
             data_offset: input.read_u32::<LE>()?,
             entry_count: input.read_u32::<LE>()?,
@@ -81,8 +75,11 @@ impl Pak {
             unknown2: input.read_u32::<LE>()?,
             unknown3: input.read_u32::<LE>()?,
             unknown4: input.read_u32::<LE>()?,
-            flags: input.read_u32::<LE>()?,
+            flags: PakFlags(input.read_u32::<LE>()?),
         };
+        info!("{} entries detected", header.entry_count);
+        info!("Block size is {} bytes", header.block_size);
+        info!("Flag bits {:#032b}", header.flags().0);
 
         let first_offset = header.data_offset() / header.block_size();
 
@@ -97,76 +94,171 @@ impl Pak {
 
             unknown_pre_data.push(unknown);
         }
-        dbg!(unknown_pre_data.len());
+        info!("Pre-position bytes: {}", unknown_pre_data.len());
 
         if input.stream_position()? == header.data_offset() as u64 {
+            log::error!("Header length exceeded first data block");
             return Err(PakError::HeaderError);
         }
 
         // Read all the offsets and lengths
+        // TODO: I think a flag controls this
+        info!("READING: offsets");
         let mut offsets = Vec::new();
         for _ in 0..header.entry_count() {
             let offset = input.read_u32::<LE>().unwrap();
             let length = input.read_u32::<LE>().unwrap();
-            offsets.push((offset, length));
+            offsets.push(FileLocation {
+                offset,
+                length,
+            });
+        }
+
+        // Read all unknown_data1
+        let mut unknown_data1 = None;
+        if header.flags.has_unknown_data1() {
+            info!("READING: unknown_data1");
+            unknown_data1 = Some(Vec::new());
+            let mut buf = [0u8; 12];
+            for _ in 0..header.entry_count() {
+                input.read_exact(&mut buf)?;
+
+                unknown_data1.as_mut().unwrap().push(buf);
+            }
         }
 
         // Read all the file names
-        let mut file_names = Vec::new();
-        let mut string_buf = Vec::new();
-        for _ in 0..header.entry_count() {
-            string_buf.clear();
-            input.read_until(0x00, &mut string_buf)?;
-            string_buf.pop();
+        let mut file_names = None;
+        if header.flags.has_names() {
+            info!("READING: file_names");
+            let mut string_buf = Vec::new();
+            file_names = Some(Vec::new());
+            for _ in 0..header.entry_count() {
+                string_buf.clear();
+                input.read_until(0x00, &mut string_buf)?;
+                string_buf.pop();
 
-            let strbuf = String::from_utf8_lossy(&string_buf).to_string();
-            file_names.push(strbuf.clone());
+                let strbuf = String::from_utf8_lossy(&string_buf).to_string();
+                file_names.as_mut().unwrap().push(strbuf.clone());
+            }
         }
 
-        let unknown_flag_size = header.data_offset() as u64 - input.stream_position()?;
-        let mut unknown_flag_data = vec![0u8; unknown_flag_size as usize];
-        input.read_exact(&mut unknown_flag_data)?;
+        let unknown_post_header_size = header.data_offset() as u64 - input.stream_position()?;
+        let mut unknown_post_header = vec![0u8; unknown_post_header_size as usize];
+        input.read_exact(&mut unknown_post_header)?;
 
         // Read all entry data
+        info!("Creating entry list");
         let mut entries: Vec<Entry> = Vec::new();
-        for i in 0..header.entry_count() as usize {
+        for (i, offset_info) in offsets.iter().enumerate().take(header.entry_count() as usize) {
+            debug!("Seeking to block {}", offset_info.offset);
             // Seek to and read the entry data
             input
                 .seek(SeekFrom::Start(
-                    offsets[i].0 as u64 * header.block_size() as u64,
+                    offset_info.offset as u64 * header.block_size() as u64,
                 ))
                 .unwrap();
-            let mut data = vec![0u8; offsets[i].1 as usize];
+            let mut data = vec![0u8; offset_info.length as usize];
             input.read_exact(&mut data).unwrap();
 
+            let name = if let Some(file_names) = &file_names {
+                file_names.get(i).cloned()
+            } else {
+                None
+            };
+
+            let unknown1 = if let Some(unknown_data1) = &unknown_data1 {
+                unknown_data1.get(i).cloned()
+            } else {
+                None
+            };
+
             // Build the entry from the data we now know
             let entry = Entry {
-                offset: offsets[i].0,
-                length: offsets[i].1,
+                offset: offset_info.offset,
+                length: offset_info.length,
+                unknown1,
                 data,
-                name: Some(file_names[i].clone()),
-                unknown1: todo!(),
+                name,
                 id: header.id_start + i as u32,
-                replace: false,
             };
             entries.push(entry);
         }
+        info!("Entry list contains {} entries", entries.len());
 
         Ok(Pak {
             header,
             unknown_pre_data,
             entries,
-            unknown_flag_data,
+            unknown_post_header,
             path,
             rebuild: false,
         })
     }
 
+    pub fn encode<T: Write + Seek>(&self, mut output: &mut T) -> Result<(), PakError> {
+        let mut block_offset = 0;
+        self.header.write_into(&mut output)?;
+
+        // Write unknown data
+        output.write_all(
+            &self.unknown_pre_data
+                .iter()
+                .flat_map(|dw| dw.to_le_bytes())
+                .collect::<Vec<u8>>()
+        )?;
+
+        // Write offsets and lengths
+        for entry in self.entries() {
+            output.write_u32::<LE>(entry.offset)?;
+            output.write_u32::<LE>(entry.length)?;
+        }
+
+        // Write out unknown data if the flags indicate it should have some
+        if self.header.flags().has_unknown_data1() {
+            for entry in self.entries() {
+                output.write_all(entry.unknown1.as_ref().unwrap())?;
+            }
+        }
+
+        // Write names if the flags indicate it should have them
+        if self.header.flags().has_names() {
+            for entry in self.entries() {
+                let name = entry.name.as_ref().unwrap();
+                output.write_all(
+                    CString::new(name.as_bytes()).unwrap().to_bytes_with_nul()
+                )?;
+            }
+        }
+
+        output.write_all(&self.unknown_post_header)?;
+
+        block_offset += self.header().data_offset / self.header().block_size;
+
+        for entry in self.entries() {
+            let block_size = entry.data.len().div_ceil(self.header().block_size as usize);
+            let remainder = 2048 - entry.data.len().rem_euclid(self.header().block_size as usize);
+
+            debug!("entry {:?} len {}", entry.name(), entry.data.len());
+            debug!("remainder {}", remainder);
+            debug!("block_offset {} - expected offset {}", block_offset, entry.offset);
+            output.write_all(&entry.data)?;
+            output.write_all(&vec![0u8; remainder as usize])?;
+            block_offset += block_size as u32;
+        }
+
+        Ok(())
+    }
+
     /// Get the header information from the PAK
     pub fn header(&self) -> &Header {
         &self.header
     }
 
+    pub fn path(&self) -> &PathBuf {
+        &self.path
+    }
+
     /// Get an individual entry from the PAK by its index
     pub fn get_entry(&self, index: u32) -> Option<&Entry> {
         self.entries.get(index as usize)
@@ -177,15 +269,23 @@ impl Pak {
         self.entries.get((id - self.header.id_start) as usize)
     }
 
+    pub fn get_entry_by_name(&self, name: &str) -> Option<&Entry> {
+        self.entries
+            .iter()
+            .find(|e| e.name.as_ref()
+            .is_some_and(|n| n == &name))
+    }
+
     /// Get a list of all entries from the PAK
     pub fn entries(&self) -> &Vec<Entry> {
         &self.entries
     }
 
-    pub fn contains_name(&self, name: String) -> bool {
+    /// Returns true if the PAK file contains an entry with the given name
+    pub fn contains_name(&self, name: &str) -> bool {
         self.entries
             .iter()
-            .find(|e| e.name.as_ref().is_some_and(|n| n == &name))
-            .is_some()
+            .any(|e| e.name.as_ref()
+            .is_some_and(|n| n == &name))
     }
 }
diff --git a/luca_pak/src/main.rs b/luca_pak/src/main.rs
index c70aaf5..97184b9 100644
--- a/luca_pak/src/main.rs
+++ b/luca_pak/src/main.rs
@@ -1,10 +1,52 @@
+use std::{fs::File, io::BufWriter};
 use luca_pak::Pak;
 
 fn main() {
-    let pak = Pak::open("MANUAL.PAK").unwrap();
-    println!("{:#032b}", pak.header().flags());
+    let mut clog = colog::default_builder();
+    clog.filter(None, log::LevelFilter::Info);
+    clog.init();
 
-    for entry in pak.entries() {
-        println!("{}", entry.name().as_ref().unwrap());
+    /*
+    let paths = std::fs::read_dir(".")
+        .unwrap()
+        .filter_map(|res| res.ok())
+        .map(|dir_entry| dir_entry.path())
+        .filter_map(|path| {
+            if path.extension().map_or(false, |ext| ext.to_ascii_lowercase() == "pak") {
+                Some(path)
+            } else {
+                None
+            }
+        })
+        .collect::<Vec<_>>();
+
+    let mut pak_files = vec![];
+    for path in paths {
+        let pak = Pak::open(&path).unwrap();
+        pak_files.push(pak)
     }
+
+    pak_files.sort_by_key(|x| x.header().flags().0 & 0xF);
+
+    for pak in pak_files {
+        println!(
+            "{:#032b} - {} - {:?}",
+            pak.header().flags().0,
+            pak.unknown_pre_data.len(),
+            pak.path(),
+        );
+    }
+    */
+
+    let pak = Pak::open("MANUAL.PAK").unwrap();
+    println!("{:#?}", pak.header());
+    //println!("{:#032b}", pak.header().flags().0);
+
+    for (i, entry) in pak.entries().iter().enumerate() {
+        //println!("{i:03}: {:06.2} kB - {}", entry.len() as f32 / 1_000.0, entry.name().as_ref().unwrap());
+        entry.save("./output/").unwrap();
+    }
+
+    let mut output = BufWriter::new(File::create("MANUAL-modified.PAK").unwrap());
+    pak.encode(&mut output).unwrap();
 }
diff --git a/utils/src/main.rs b/utils/src/main.rs
index eb85f4a..9b700f6 100644
--- a/utils/src/main.rs
+++ b/utils/src/main.rs
@@ -262,7 +262,7 @@ fn replace_cz<P: ?Sized + AsRef<Path>>(
     cz.header_mut().set_width(repl_img.width() as u16);
     cz.header_mut().set_height(repl_img.height() as u16);
     cz.set_bitmap(repl_img.into_raw());
-    cz.remove_palette();
+    cz.clear_palette();
 
     if let Some(depth) = depth {
         cz.header_mut().set_depth(*depth)