diff --git a/src/header.rs b/src/header.rs
index 3508a5f..a38c420 100644
--- a/src/header.rs
+++ b/src/header.rs
@@ -98,7 +98,7 @@ pub enum ColorFormat {
 impl ColorFormat {
     /// Bits per color channel.
     ///
-    /// Ex. Rgba32 has `8bpc`
+    /// Ex. `Rgba8` has `8bpc`
     pub fn bpc(&self) -> u8 {
         match self {
             ColorFormat::Rgba8 => 8,
@@ -108,7 +108,7 @@ impl ColorFormat {
 
     /// Bits per pixel.
     ///
-    /// Ex. Rgba32 has `32bpp`
+    /// Ex. `Rgba8` has `32bpp`
     pub fn bpp(&self) -> u16 {
         match self {
             ColorFormat::Rgba8 => 32,
@@ -118,13 +118,27 @@ impl ColorFormat {
 
     /// Number of color channels.
     ///
-    /// Ex. Rgba32 has `4` channels
-    pub fn channels(self) -> u16 {
+    /// Ex. `Rgba8` has `4` channels
+    pub fn channels(&self) -> u16 {
         match self {
             ColorFormat::Rgba8 => 4,
             ColorFormat::Rgb8 => 3,
         }
     }
+
+    /// The channel in which alpha is contained, or [`None`] if there is none.
+    ///
+    /// Ex. `Rgba8`'s 3rd channel is alpha
+    pub fn alpha_channel(&self) -> Option<u8> {
+        match self {
+            ColorFormat::Rgba8 => Some(4),
+            ColorFormat::Rgb8 => None,
+        }
+    }
+
+    pub fn pixel_byte_count(&self) -> u16 {
+        self.bpp() / 8
+    }
 }
 
 impl TryFrom<u8> for ColorFormat {
diff --git a/src/operations.rs b/src/operations.rs
index 602ecfb..8be04dd 100644
--- a/src/operations.rs
+++ b/src/operations.rs
@@ -1,19 +1,72 @@
-pub fn line_diff(width: u32, height: u32, data: &[u8]) -> Vec<u8> {
-    let mut output_buf = Vec::with_capacity((width * height * 4) as usize);
+use crate::ColorFormat;
+use rayon::prelude::*;
+
+pub fn sub_rows(width: u32, height: u32, color_format: ColorFormat, input: &[u8]) -> Vec<u8> {
+    let mut data = Vec::with_capacity(width as usize * (color_format.bpp() / 8) as usize);
+
+    let block_height = f32::ceil(height as f32 / 3.0) as u32;
+    let line_byte_count = (width * color_format.pixel_byte_count() as u32) as usize;
+
+    let mut curr_line: Vec<u8>;
+    let mut prev_line: Vec<u8> = Vec::new();
+
+    let mut i = 0;
+    for y in 0..height {
+        curr_line = input[i..i + line_byte_count].to_vec();
+
+        if y % block_height != 0 {
+            curr_line.iter_mut()
+                .zip(prev_line.iter_mut())
+                .for_each(|(curr, prev)| {
+                    *curr = curr.wrapping_sub(*prev);
+                    *prev = prev.wrapping_add(*curr);
+                });
+        } else {
+            prev_line.clone_from(&curr_line);
+        }
+
+        data.extend_from_slice(&curr_line);
+        i += line_byte_count;
+    }
+
+    if color_format.alpha_channel().is_some() {
+        let (pixels, alpha): (Vec<[u8; 3]>, Vec<u8>) =
+            data.chunks(4)
+                .map(|i| (
+                    [i[0], i[1], i[2]],
+                    i[3]
+                ))
+                .unzip();
+
+        pixels.into_iter().flatten().chain(alpha).collect()
+    } else {
+        data
+    }
+}
+
+pub fn add_rows(width: u32, height: u32, color_format: ColorFormat, data: &[u8]) -> Vec<u8> {
+    let mut output_buf = Vec::with_capacity((width * height * color_format.channels() as u32) as usize);
 
     let block_height = f32::ceil(height as f32 / 3.0) as u32;
 
-    let mut curr_line;
-    let mut prev_line = Vec::with_capacity(width as usize * 3);
-
-    let mut curr_alpha;
-    let mut prev_alpha = Vec::with_capacity(width as usize);
+    let mut curr_line: Vec<u8>;
+    let mut prev_line = Vec::new();
 
     let mut rgb_index = 0;
-    let mut alpha_index = (width * height * 3) as usize;
+    let mut alpha_index = (width * height * (color_format.channels() as u32 - 1)) as usize;
     for y in 0..height {
-        curr_line = data[rgb_index..rgb_index + width as usize * 3].to_vec();
-        curr_alpha = data[alpha_index..alpha_index + width as usize].to_vec();
+        curr_line = if color_format.alpha_channel().is_some() {
+            data[rgb_index..rgb_index + width as usize * 3]
+                .chunks(3)
+                .zip(data[alpha_index..alpha_index + width as usize].into_iter())
+                .flat_map(|(a, b)| {
+                    a.into_iter().chain(vec![b])
+                })
+                .copied()
+                .collect()
+        } else {
+            data[rgb_index..rgb_index + width as usize * 3].to_vec()
+        };
 
         if y % block_height != 0 {
             curr_line
@@ -22,81 +75,15 @@ pub fn line_diff(width: u32, height: u32, data: &[u8]) -> Vec<u8> {
                 .for_each(|(curr_p, prev_p)| {
                     *curr_p = curr_p.wrapping_add(*prev_p);
                 });
-            curr_alpha
-                .iter_mut()
-                .zip(&prev_alpha)
-                .for_each(|(curr_a, prev_a)| {
-                    *curr_a = curr_a.wrapping_add(*prev_a);
-                });
         }
 
         // Write the decoded RGBA data to the final buffer
-        curr_line
-            .windows(3)
-            .step_by(3)
-            .zip(&curr_alpha)
-            .for_each(|(curr_p, alpha_p)| {
-                output_buf.extend_from_slice(&[curr_p[0], curr_p[1], curr_p[2], *alpha_p]);
-            });
+        output_buf.extend_from_slice(&curr_line);
 
         prev_line.clone_from(&curr_line);
-        prev_alpha.clone_from(&curr_alpha);
-
         rgb_index += width as usize * 3;
         alpha_index += width as usize;
     }
 
     output_buf
 }
-
-pub fn diff_line(width: u32, height: u32, input: &[u8]) -> Vec<u8> {
-    let mut data = Vec::with_capacity(width as usize * 3);
-    let mut alpha_data = Vec::with_capacity(width as usize);
-
-    let block_height = f32::ceil(height as f32 / 3.0) as u32;
-    let pixel_byte_count = 4;
-    let line_byte_count = (width * pixel_byte_count as u32) as usize;
-
-    let mut curr_line: Vec<u8>;
-    let mut prev_line: Vec<u8> = Vec::with_capacity(width as usize * 3);
-
-    let mut curr_alpha: Vec<u8>;
-    let mut prev_alpha: Vec<u8> = Vec::with_capacity(width as usize);
-
-    let mut i = 0;
-    for y in 0..height {
-        curr_line = input[i..i + line_byte_count]
-            .windows(4)
-            .step_by(4)
-            .flat_map(|r| [r[0], r[1], r[2]])
-            .collect();
-        curr_alpha = input[i..i + line_byte_count]
-            .iter()
-            .skip(3)
-            .step_by(4)
-            .copied()
-            .collect();
-
-        if y % block_height != 0 {
-            for x in 0..width as usize * 3 {
-                curr_line[x] = curr_line[x].wrapping_sub(prev_line[x]);
-                prev_line[x] = prev_line[x].wrapping_add(curr_line[x]);
-            }
-            for x in 0..width as usize {
-                curr_alpha[x] = curr_alpha[x].wrapping_sub(prev_alpha[x]);
-                prev_alpha[x] = prev_alpha[x].wrapping_add(curr_alpha[x]);
-            }
-        } else {
-            prev_line.clone_from(&curr_line);
-            prev_alpha.clone_from(&curr_alpha);
-        }
-
-        data.extend_from_slice(&curr_line);
-        alpha_data.extend_from_slice(&curr_alpha);
-        i += line_byte_count;
-    }
-
-    data.extend_from_slice(&alpha_data);
-
-    data
-}
diff --git a/src/picture.rs b/src/picture.rs
index 4f952d9..2b0d357 100644
--- a/src/picture.rs
+++ b/src/picture.rs
@@ -10,7 +10,7 @@ use crate::{
     compression::{dct::{dct_compress, dct_decompress, DctParameters},
     lossless::{compress, decompress, CompressionError, CompressionInfo}},
     header::{ColorFormat, CompressionType, Header},
-    operations::{diff_line, line_diff},
+    operations::{add_rows, sub_rows},
 };
 
 #[derive(Error, Debug)]
@@ -132,7 +132,12 @@ impl SquishyPicture {
         let modified_data = match self.header.compression_type {
             CompressionType::None => &self.bitmap,
             CompressionType::Lossless => {
-                &diff_line(self.header.width, self.header.height, &self.bitmap)
+                &sub_rows(
+                    self.header.width,
+                    self.header.height,
+                    self.header.color_format,
+                    &self.bitmap
+                )
             },
             CompressionType::LossyDct => {
                 &dct_compress(
@@ -187,7 +192,7 @@ impl SquishyPicture {
         let bitmap = match header.compression_type {
             CompressionType::None => pre_bitmap,
             CompressionType::Lossless => {
-                line_diff(header.width, header.height, &pre_bitmap)
+                add_rows(header.width, header.height, header.color_format, &pre_bitmap)
             },
             CompressionType::LossyDct => {
                 dct_decompress(