More script parsing improvements

This commit is contained in:
G2-Games 2024-11-21 10:50:51 -06:00
parent 3437b6c7a9
commit 957f3e637d
2 changed files with 504 additions and 54 deletions

View file

@ -1,11 +1,11 @@
mod utils; mod utils;
use std::{cell::LazyCell, fs::{self, File}, io::Read, path::PathBuf}; use std::{ffi::OsString, fs::{self, File}, io::{Read, Write}, path::PathBuf, str::FromStr, sync::LazyLock};
use byteorder_lite::{ReadBytesExt, LE}; use byteorder_lite::{ReadBytesExt, LE};
use utils::Encoding; use utils::Encoding;
const OPCODES: LazyCell<Vec<String>> = LazyCell::new(|| fs::read_to_string("LBEE_opcodes") static OPCODES: LazyLock<Vec<String>> = LazyLock::new(|| fs::read_to_string("LBEE_opcodes")
.unwrap() .unwrap()
.split("\n") .split("\n")
.map(|s| s.to_owned()) .map(|s| s.to_owned())
@ -13,30 +13,72 @@ const OPCODES: LazyCell<Vec<String>> = LazyCell::new(|| fs::read_to_string("LBEE
); );
fn main() { fn main() {
let file_path = PathBuf::from("SEEN0513"); let scripts_path = PathBuf::from("LBEE_SCRIPT_steam");
let mut script = File::open(&file_path).unwrap(); for script in fs::read_dir(scripts_path).unwrap() {
let script = script.unwrap();
let filename = script.file_name();
let filename = filename.to_string_lossy();
if !script.file_type().unwrap().is_file() {
continue;
} else if filename.contains("8500") || filename.contains("8501") {
continue;
} else if filename.starts_with("_") {
continue;
}
println!("Start parsing script"); let mut script_file = File::open(&script.path()).unwrap();
let script = parse_script(
&mut script,
file_path.file_name().unwrap().to_str().unwrap()
);
println!("Parsing finished");
for c in script.opcodes { println!("Start parsing {:?}", script.file_name());
let ascii_string = String::from_utf8_lossy(&c.param_bytes); let script_len = script_file.metadata().unwrap().len();
//println!("{:>4}: '{:>11}' — {}", c.index, c.string, ascii_string); let script = parse_script(
SpecificOpcode::decode(&c.string, c.param_bytes); &mut script_file,
script.file_name().to_str().unwrap(),
script_len
);
let mut out_file = File::create(format!("LBEE_SCRIPT_listing/{}_OPCODES.txt", filename)).unwrap();
for c in script.opcodes {
out_file.write_all(format!("{:>5}", c.position).as_bytes()).unwrap();
out_file.write_all(format!("{:>12}: ", c.string).as_bytes()).unwrap();
if let Some(o) = c.opcode_specifics {
if o == SpecificOpcode::Unknown {
out_file.write_all(format!("{:02X?}", c.param_bytes).as_bytes()).unwrap();
} else {
out_file.write_all(format!("{:?}", o).as_bytes()).unwrap();
}
} else if let Some(r) = c.fixed_param {
out_file.write_all(format!("{:?}", r).as_bytes()).unwrap();
}
out_file.write_all(b"\n").unwrap();
}
} }
println!("Done!");
/*
for c in script.opcodes {
print!("{:>5}", c.position);
print!("{:>12}: ", c.string);
if let Some(o) = c.opcode_specifics {
if o == SpecificOpcode::Unknown {
print!("{:02X?}", c.param_bytes);
} else {
print!("{:?}", o);
}
} else if let Some(r) = c.fixed_param {
print!("{:?}", r);
}
println!();
}
*/
} }
fn parse_script<S: Read>(script_stream: &mut S, name: &str) -> Script { fn parse_script<S: Read>(script_stream: &mut S, name: &str, length: u64) -> Script {
let mut opcodes = Vec::new(); let mut opcodes = Vec::new();
let mut _offset = 0; let mut offset = 0;
let mut i = 0; let mut i = 0;
let mut pos = 0; let mut pos = 0;
loop { while offset < length as usize {
// Read all base info // Read all base info
let (length, number, flag) = ( let (length, number, flag) = (
script_stream.read_u16::<LE>().unwrap() as usize, script_stream.read_u16::<LE>().unwrap() as usize,
@ -45,16 +87,16 @@ fn parse_script<S: Read>(script_stream: &mut S, name: &str) -> Script {
); );
let string = OPCODES[number as usize].clone(); let string = OPCODES[number as usize].clone();
_offset += 4; offset += 4;
let raw_len = length - 4; let raw_len = length - 4;
let mut raw_bytes = vec![0u8; raw_len]; let mut raw_bytes = vec![0u8; raw_len];
script_stream.read_exact(&mut raw_bytes).unwrap(); script_stream.read_exact(&mut raw_bytes).unwrap();
_offset += raw_len; offset += raw_len;
// Read extra align byte if alignment needed // Read extra align byte if alignment needed
let align = if length % 2 != 0 { if length % 2 != 0 {
_offset += 1; offset += 1;
Some(script_stream.read_u8().unwrap()) Some(script_stream.read_u8().unwrap())
} else { } else {
None None
@ -82,13 +124,12 @@ fn parse_script<S: Read>(script_stream: &mut S, name: &str) -> Script {
index: i, index: i,
position: pos, position: pos,
length, length,
number, opcode_number: number,
string: string.clone(), string: string.clone(),
flag, flag,
raw_bytes,
align,
fixed_param, fixed_param,
param_bytes opcode_specifics: SpecificOpcode::decode(&string, &param_bytes),
param_bytes,
}); });
// Break if END opcode reached // Break if END opcode reached
@ -119,58 +160,146 @@ struct Opcode {
index: usize, index: usize,
position: usize, position: usize,
length: usize, length: usize,
number: u8, opcode_number: u8,
string: String, string: String,
flag: u8, flag: u8,
raw_bytes: Vec<u8>,
align: Option<u8>,
fixed_param: Option<Vec<u16>>, fixed_param: Option<Vec<u16>>,
param_bytes: Vec<u8>, param_bytes: Vec<u8>,
opcode_specifics: Option<SpecificOpcode>,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone, PartialEq, Eq)]
enum SpecificOpcode { enum SpecificOpcode {
Message { Message {
voice_id: u16, voice_id: u16,
messages: Vec<String>, messages: Vec<String>,
end: Vec<u8>, end: Vec<u8>,
}, },
Select, Add {
Battle, var1: u16,
Task, expr: String,
SayAVoiceText, },
VarStrSet, EquN {
GoTo, var1: u16,
GoSub, value: Option<u16>, //?
Jump, },
FarCall, Select {
IFN, var_id: u16,
IFY, var0: u16,
Random, var1: u16,
ImageLoad, var2: u16,
messages: Vec<String>,
var3: u16,
var4: u16,
var5: u16,
},
_Battle,
Task {
task_type: u16,
var1: Option<u16>,
var2: Option<u16>,
var3: Option<u16>,
var4: Option<u16>,
message_1: Option<Vec<String>>,
message_2: Option<Vec<String>>,
raw_args: Option<Vec<u8>>,
},
SayAVoiceText {
voice_id: u16,
messages: Vec<String>,
},
VarStrSet {
varstr_id: u16,
varstr_str: String,
},
GoTo {
jump_pos: u32,
},
GoSub {
arg1: u16,
jump_pos: u32,
end: Vec<u8>,
},
Jump {
filename: String,
jump_pos: Option<u32>,
},
FarCall {
index: u16,
filename: String,
jump_pos: u32,
end: Vec<u8>,
},
IfN {
condition: String,
jump_pos: u32,
},
IfY {
condition: String,
jump_pos: u32,
},
Random {
var1: u16,
rnd_from: String,
rnd_to: String,
},
ImageLoad {
mode: u16,
image_id: u16,
var1: Option<u16>,
pos_x: Option<u16>,
pos_y: Option<u16>,
end: Vec<u8>,
},
Bgm {
bgm_id: u32,
arg2: Option<u16>,
},
Unknown, Unknown,
} }
impl SpecificOpcode { impl SpecificOpcode {
pub fn decode(opcode_str: &str, param_bytes: Vec<u8>) -> Self { pub fn decode(opcode_str: &str, param_bytes: &[u8]) -> Option<Self> {
match opcode_str { if param_bytes.is_empty() {
"MESSAGE" => Self::message(param_bytes), return None
_ => Self::Unknown
} }
Some(match opcode_str {
"MESSAGE" => Self::parse_message(param_bytes),
"SAYAVOICETEXT" => Self::parse_sayavoicetext(param_bytes),
"SELECT" => Self::parse_select(param_bytes),
"TASK" => Self::parse_task(param_bytes),
"ADD" => Self::parse_add(param_bytes),
"EQUN" => Self::parse_equn(param_bytes),
"RANDOM" => Self::parse_random(param_bytes),
"IFY" => Self::parse_ifn_ify(param_bytes, false),
"IFN" => Self::parse_ifn_ify(param_bytes, true),
"JUMP" => Self::parse_jump(param_bytes),
"GOTO" => Self::parse_goto(param_bytes),
"GOSUB" => Self::parse_gosub(param_bytes),
"FARCALL" => Self::parse_farcall(param_bytes),
"VARSTR_SET" => Self::parse_varstr_set(param_bytes),
"IMAGELOAD" => Self::parse_imageload(param_bytes),
"BGM" => Self::parse_bgm(param_bytes),
_ => Self::Unknown
})
} }
fn message(param_bytes: Vec<u8>) -> Self { fn parse_message(param_bytes: &[u8]) -> Self {
let voice_id = u16::from_le_bytes(param_bytes[0..2].try_into().unwrap()); let (mut offset, voice_id) = utils::get_u16(param_bytes, 0).unwrap();
// TODO: This will need to change per-game based on the number of
// languages and their encodings
let mut messages = Vec::new(); let mut messages = Vec::new();
let mut offset = 2;
for _ in 0..2 { for _ in 0..2 {
let (o, string) = utils::get_string(&param_bytes, offset, Encoding::UTF16, None).unwrap(); let (o, string) = utils::get_string(param_bytes, offset, Encoding::UTF16, None).unwrap();
messages.push(string); messages.push(string);
offset = o; offset = o;
} }
dbg!(&messages);
Self::Message { Self::Message {
voice_id, voice_id,
@ -178,4 +307,303 @@ impl SpecificOpcode {
end: param_bytes[offset..].to_vec() end: param_bytes[offset..].to_vec()
} }
} }
fn parse_add(param_bytes: &[u8]) -> Self {
let (offset, var1) = utils::get_u16(param_bytes, 0).unwrap();
let (_, expr) = utils::get_string(param_bytes, offset, Encoding::ShiftJIS, None).unwrap();
Self::Add { var1, expr }
}
fn parse_equn(param_bytes: &[u8]) -> Self {
let (offset, var1) = utils::get_u16(param_bytes, 0).unwrap();
let mut value = None;
if offset < param_bytes.len() {
let (_, v) = utils::get_u16(param_bytes, offset).unwrap();
value = Some(v);
}
Self::EquN { var1, value }
}
fn parse_select(param_bytes: &[u8]) -> Self {
let (offset, var_id) = utils::get_u16(param_bytes, 0).unwrap();
let (offset, var0) = utils::get_u16(param_bytes, offset).unwrap();
let (offset, var1) = utils::get_u16(param_bytes, offset).unwrap();
let (mut offset, var2) = utils::get_u16(param_bytes, offset).unwrap();
// TODO: This will need to change per-game based on the number of
// languages and their encodings
let mut messages = Vec::new();
for _ in 0..2 {
let (o, string) = utils::get_string(param_bytes, offset, Encoding::UTF16, None).unwrap();
messages.push(string);
offset = o;
}
let (offset, var3) = utils::get_u16(param_bytes, offset).unwrap();
let (offset, var4) = utils::get_u16(param_bytes, offset).unwrap();
let (_, var5) = utils::get_u16(param_bytes, offset).unwrap();
Self::Select {
var_id,
var0,
var1,
var2,
messages,
var3,
var4,
var5
}
}
fn parse_random(param_bytes: &[u8]) -> Self {
let (offset, var1) = utils::get_u16(param_bytes, 0).unwrap();
let (offset, rnd_from) = utils::get_string(param_bytes, offset, Encoding::ShiftJIS, None).unwrap();
let (_, rnd_to) = utils::get_string(param_bytes, offset, Encoding::ShiftJIS, None).unwrap();
Self::Random { var1, rnd_from, rnd_to }
}
fn parse_ifn_ify(param_bytes: &[u8], ifn: bool) -> Self {
let (offset, condition) = utils::get_string(param_bytes, 0, Encoding::ShiftJIS, None).unwrap();
let (_, jump_pos) = utils::get_u32(param_bytes, offset).unwrap();
if ifn {
Self::IfN { condition, jump_pos }
} else {
Self::IfY { condition, jump_pos }
}
}
fn parse_jump(param_bytes: &[u8]) -> Self {
let (offset, filename) = utils::get_string(param_bytes, 0, Encoding::ShiftJIS, None).unwrap();
let jump_pos = if param_bytes.len() > offset {
let (_, j) = utils::get_u32(param_bytes, offset).unwrap();
Some(j)
} else {
None
};
Self::Jump { filename, jump_pos }
}
fn parse_imageload(param_bytes: &[u8]) -> Self {
let (offset, mode) = utils::get_u16(param_bytes, 0).unwrap();
let (mut offset, image_id) = utils::get_u16(param_bytes, offset).unwrap();
let mut var1 = None;
let mut pos_x = None;
let mut pos_y = None;
if mode != 0 && mode != 8 && param_bytes.len() > offset {
let var1_2 = utils::get_u16(param_bytes, offset).unwrap();
var1 = Some(var1_2.1);
let pos_x_2 = utils::get_u16(param_bytes, var1_2.0).unwrap();
pos_x = Some(pos_x_2.1);
let pos_y_2 = utils::get_u16(param_bytes, pos_x_2.0).unwrap();
pos_y = Some(pos_y_2.1);
offset = pos_y_2.0;
}
Self::ImageLoad {
mode,
image_id,
var1,
pos_x,
pos_y,
end: param_bytes[offset..].to_vec(),
}
}
fn parse_goto(param_bytes: &[u8]) -> Self {
let (_, jump_pos) = utils::get_u32(param_bytes, 0).unwrap();
Self::GoTo { jump_pos }
}
fn parse_gosub(param_bytes: &[u8]) -> Self {
let (offset, arg1) = utils::get_u16(param_bytes, 0).unwrap();
let (offset, jump_pos) = utils::get_u32(param_bytes, offset).unwrap();
Self::GoSub { arg1, jump_pos, end: param_bytes[offset..].to_vec() }
}
fn parse_varstr_set(param_bytes: &[u8]) -> Self {
let (offset, varstr_id) = utils::get_u16(param_bytes, 0).unwrap();
let (_, varstr_str) = utils::get_string(param_bytes, offset, Encoding::ShiftJIS, None).unwrap();
Self::VarStrSet { varstr_id, varstr_str }
}
fn parse_farcall(param_bytes: &[u8]) -> Self {
let (offset, index) = utils::get_u16(param_bytes, 0).unwrap();
let (offset, filename) = utils::get_string(param_bytes, offset, Encoding::ShiftJIS, None).unwrap();
let (offset, jump_pos) = utils::get_u32(param_bytes, offset).unwrap();
Self::FarCall { index, filename, jump_pos, end: param_bytes[offset..].to_vec() }
}
fn parse_sayavoicetext(param_bytes: &[u8]) -> Self {
let (mut offset, voice_id) = utils::get_u16(param_bytes, 0).unwrap();
// TODO: This will need to change per-game based on the number of
// languages and their encodings
let mut messages = Vec::new();
for _ in 0..2 {
let (o, string) = utils::get_string(param_bytes, offset, Encoding::UTF16, None).unwrap();
messages.push(string);
offset = o;
}
Self::SayAVoiceText {
voice_id,
messages,
}
}
fn parse_bgm(param_bytes: &[u8]) -> Self {
// TODO: invesigate the accuracy of this
let (offset, bgm_id) = utils::get_u32(param_bytes, 0).unwrap();
let arg2 = if bgm_id == 0 {
Some(utils::get_u16(param_bytes, offset).unwrap().1)
} else {
None
};
Self::Bgm {
bgm_id,
arg2,
}
}
fn parse_task(param_bytes: &[u8]) -> Self {
let (offset, task_type) = utils::get_u16(param_bytes, 0).unwrap();
let mut var1 = None;
let mut var2 = None;
let mut var3 = None;
let mut var4 = None;
let mut message_1 = None;
let mut message_2 = None;
let raw_args: Option<Vec<u8>> = None;
let abort_task = Self::Task {
task_type,
var1,
var2,
var3,
var4,
message_1: message_1.clone(),
message_2: message_2.clone(),
raw_args: Some(param_bytes.to_vec())
};
if param_bytes.len() <= offset {
return abort_task;
}
match task_type {
4 => {
let (offset, v1) = utils::get_u16(param_bytes, offset).unwrap();
var1 = Some(v1);
if param_bytes.len() <= offset {
return abort_task;
}
if [0, 4, 5].contains(&v1) {
let (mut offset, v2) = utils::get_u16(param_bytes, offset).unwrap();
var2 = Some(v2);
let mut messages = Vec::new();
for _ in 0..2 {
let (o, string) = utils::get_string(param_bytes, offset, Encoding::UTF16, None).unwrap();
messages.push(string);
offset = o;
}
message_1 = Some(messages);
} else if v1 == 1 {
let (offset, v2) = utils::get_u16(param_bytes, offset).unwrap();
var2 = Some(v2);
let (offset, v3) = utils::get_u16(param_bytes, offset).unwrap();
var3 = Some(v3);
let (mut offset, v4) = utils::get_u16(param_bytes, offset).unwrap();
var4 = Some(v4);
// Get first set of messages
let mut messages = Vec::new();
for _ in 0..2 {
let (o, string) = utils::get_string(param_bytes, offset, Encoding::UTF16, None).unwrap();
messages.push(string);
offset = o;
}
message_1 = Some(messages);
// Get second set of messages
let mut messages = Vec::new();
for _ in 0..2 {
let (o, string) = utils::get_string(param_bytes, offset, Encoding::UTF16, None).unwrap();
messages.push(string);
offset = o;
}
message_2 = Some(messages);
} else if v1 == 6 {
let (offset, v2) = utils::get_u16(param_bytes, offset).unwrap();
var2 = Some(v2);
let (mut offset, v3) = utils::get_u16(param_bytes, offset).unwrap();
var3 = Some(v3);
let mut messages = Vec::new();
for _ in 0..2 {
let (o, string) = utils::get_string(param_bytes, offset, Encoding::UTF16, None).unwrap();
messages.push(string);
offset = o;
}
message_1 = Some(messages);
} else {
return abort_task;
}
}
54 => {
let (_, string) = utils::get_string(param_bytes, offset, Encoding::UTF16, None).unwrap();
message_1 = Some(vec![string]);
}
69 => {
let (mut offset, v1) = utils::get_u16(param_bytes, offset).unwrap();
var1 = Some(v1);
// Get first set of messages
let mut messages = Vec::new();
for _ in 0..2 {
let (o, string) = utils::get_string(param_bytes, offset, Encoding::UTF16, None).unwrap();
messages.push(string);
offset = o;
}
message_1 = Some(messages);
// Get second set of messages
let mut messages = Vec::new();
for _ in 0..2 {
let (o, string) = utils::get_string(param_bytes, offset, Encoding::UTF16, None).unwrap();
messages.push(string);
offset = o;
}
message_2 = Some(messages);
}
_ => return abort_task
}
Self::Task {
task_type,
var1,
var2,
var3,
var4,
message_1,
message_2,
raw_args,
}
}
} }

View file

@ -8,6 +8,29 @@ pub enum Encoding {
ShiftJIS, ShiftJIS,
} }
impl Encoding {
pub fn width(&self) -> usize {
match self {
Self::UTF8 | Self::ShiftJIS => 1,
Self::UTF16 => 2,
}
}
}
pub fn get_u16(bytes: &[u8], offset: usize) -> Result<(usize, u16), Box<dyn Error>> {
Ok((
offset + 2,
u16::from_le_bytes(bytes[offset..offset + 2].try_into()?)
))
}
pub fn get_u32(bytes: &[u8], offset: usize) -> Result<(usize, u32), Box<dyn Error>> {
Ok((
offset + 4,
u32::from_le_bytes(bytes[offset..offset + 4].try_into()?)
))
}
pub fn get_string( pub fn get_string(
bytes: &[u8], bytes: &[u8],
offset: usize, offset: usize,
@ -18,7 +41,6 @@ pub fn get_string(
// Find the end of the string // Find the end of the string
let mut end = 0; let mut end = 0;
let mut char_width = 1;
if let Some(l) = len { if let Some(l) = len {
end = l; end = l;
} else { } else {
@ -29,7 +51,6 @@ pub fn get_string(
} }
}, },
Encoding::UTF16 => { Encoding::UTF16 => {
char_width = 2;
while (end + 1 < slice.len()) && !((slice[end] == 0) && (slice[end + 1] == 0)) { while (end + 1 < slice.len()) && !((slice[end] == 0) && (slice[end + 1] == 0)) {
end += 2 end += 2
} }
@ -37,6 +58,7 @@ pub fn get_string(
} }
}; };
// Get the actual string data using the proper decoder
let string = match format { let string = match format {
Encoding::UTF8 => String::from_utf8(slice[..end].to_vec())?, Encoding::UTF8 => String::from_utf8(slice[..end].to_vec())?,
Encoding::UTF16 => { Encoding::UTF16 => {
@ -49,5 +71,5 @@ pub fn get_string(
Encoding::ShiftJIS => SHIFT_JIS.decode(&slice[..end]).0.to_string(), Encoding::ShiftJIS => SHIFT_JIS.decode(&slice[..end]).0.to_string(),
}; };
Ok((offset + end + char_width, string)) Ok((offset + end + format.width(), string))
} }