use crate::netmd::mappings::{ALLOWED_HW_KANA, MAPPINGS_DE, MAPPINGS_HW, MAPPINGS_JP, MAPPINGS_RU}; use diacritics; use encoding_rs::SHIFT_JIS; use regex::Regex; use std::{collections::hash_map::HashMap, error::Error, vec::IntoIter}; use unicode_normalization::UnicodeNormalization; extern crate kana; use kana::*; pub fn bcd_to_int(mut bcd: i32) -> i32 { let mut value = 0; let mut nibble = 0; while bcd != 0 { let nibble_value = bcd & 0xf; bcd >>= 4; value += nibble_value * i32::pow(10, nibble); nibble += 1; } value } pub fn int_to_bcd(mut value: i32) -> i32 { let mut bcd = 0; let mut shift = 0; while value > 0 { let digit = value % 10; bcd |= digit << shift; shift += 4; value /= 10; } bcd } pub fn half_width_to_full_width_range(range: &str) -> String { let mappings: HashMap = HashMap::from([ ('0', '0'), ('1', '1'), ('2', '2'), ('3', '3'), ('4', '4'), ('5', '5'), ('6', '6'), ('7', '7'), ('8', '8'), ('9', '9'), ('-', '-'), ('/', '/'), (';', ';'), ]); range .chars() .map(|char| mappings.get(&char).unwrap()) .collect() } pub fn get_bytes(iterator: &mut IntoIter) -> Result<[u8; S], Box> { let byte_vec: Vec = iterator.take(S).collect(); let bytes: [u8; S] = byte_vec.try_into().unwrap(); Ok(bytes) } pub fn length_after_encoding_to_jis(string: &str) -> usize { let new_string = SHIFT_JIS.encode(string); new_string.0.len() } pub fn validate_shift_jis(sjis_string: Vec) -> bool { let (_, _, had_errors) = SHIFT_JIS.decode(&sjis_string); had_errors } fn check(string: String) -> Option { if MAPPINGS_HW.contains_key(&string) { return Some(MAPPINGS_HW.get(&string).unwrap().to_string()); } let mut ch = string.chars(); if (ch.next().unwrap() as u32) < 0x7f || ALLOWED_HW_KANA.contains(&string) { return Some(string); } None } pub fn sanitize_half_width_title(mut title: String) -> Vec { title = wide2ascii(&title); title = nowidespace(&title); title = hira2kata(&title); title = combine(&title); println!("{}", title); let new_title: String = title .chars() .map(|c| { check(c.to_string()).unwrap_or( check(diacritics::remove_diacritics(&c.to_string())).unwrap_or(" ".to_string()), ) }) .collect(); let sjis_string = SHIFT_JIS.encode(&new_title).0; if validate_shift_jis(sjis_string.clone().into()) { return agressive_sanitize_title(&title).into(); } sjis_string.into() } // TODO: This function is bad, probably should do the string sanitization in the frontend pub fn sanitize_full_width_title(title: &str, just_remap: bool) -> Vec { let new_title: String = title .chars() .map(|character| { match MAPPINGS_JP.get(&character.to_string()) { Some(string) => string.clone(), None => character.to_string().clone(), } .to_string() }) .map(|character| { match MAPPINGS_RU.get(&character.to_string()) { Some(string) => string.clone(), None => character.to_string().clone(), } .to_string() }) .map(|character| { match MAPPINGS_DE.get(&character.to_string()) { Some(string) => string.clone(), None => character.to_string().clone(), } .to_string() }) .collect::(); if just_remap { return new_title.into(); }; let sjis_string = SHIFT_JIS.encode(&new_title).0; if validate_shift_jis(sjis_string.clone().into()) { return agressive_sanitize_title(title).into(); } sjis_string.into() } pub fn agressive_sanitize_title(title: &str) -> String { let re = Regex::new(r"[^\x00-\x7F]").unwrap(); re.replace_all( &diacritics::remove_diacritics(title) .nfd() .collect::(), "", ) .into() } pub fn time_to_duration(time: &Vec) -> std::time::Duration { assert_eq!(time.len(), 4); std::time::Duration::from_micros( (time[0] * 3600000000) + (time[1] * 60000000) + (time[2] * 1000000) + (time[3] * 11600), ) }