Skip to content

Commit

Permalink
Fix glyph IDs in subset font
Browse files Browse the repository at this point in the history
  • Loading branch information
fschutt committed Nov 5, 2024
1 parent e5580c0 commit c605d35
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 54 deletions.
2 changes: 0 additions & 2 deletions src/color.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ pub enum ColorSpace {
GreyscaleAlpha,
}

#[cfg(feature = "images")]
impl From<image::ColorType> for ColorSpace {
fn from(color_type: image::ColorType) -> Self {
use image::ColorType::*;
Expand Down Expand Up @@ -46,7 +45,6 @@ pub enum ColorBits {
Bit16,
}

#[cfg(feature = "images")]
impl From<image::ColorType> for ColorBits {
fn from(color_type: image::ColorType) -> ColorBits {
use image::ColorType::*;
Expand Down
23 changes: 20 additions & 3 deletions src/font.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,11 @@ impl fmt::Debug for ParsedFont {
}
}

pub struct SubsetFont {
pub bytes: Vec<u8>,
pub glyph_mapping: BTreeMap<u16, (u16, char)>,
}

impl ParsedFont {

/// Returns the glyph IDs used in the PDF file
Expand Down Expand Up @@ -198,7 +203,14 @@ impl ParsedFont {
}

/// Generates a new font file from the used glyph IDs
pub(crate) fn subset(&self, glyph_ids: &BTreeMap<u16, char>) -> Result<Vec<u8>, String> {
pub(crate) fn subset(&self, glyph_ids: &BTreeMap<u16, char>) -> Result<SubsetFont, String> {

let glyph_mapping = glyph_ids
.iter()
.enumerate()
.map(|(new_glyph_id, (original_glyph_id, ch))| {
(*original_glyph_id, (new_glyph_id as u16, *ch))
}).collect();

let scope = ReadScope::new(&self.original_bytes);

Expand All @@ -208,10 +220,15 @@ impl ParsedFont {
let provider = font_file.table_provider(self.original_index)
.map_err(|e| e.to_string())?;

allsorts::subset::subset(
let font = allsorts::subset::subset(
&provider,
&glyph_ids.keys().copied().collect::<Vec<_>>()
).map_err(|e| e.to_string())
).map_err(|e| e.to_string())?;

Ok(SubsetFont {
bytes: font,
glyph_mapping,
})
}

pub(crate) fn generate_cid_to_unicode_map(&self, font_id: &FontId, glyph_ids: &BTreeMap<u16, char>) -> String {
Expand Down
66 changes: 41 additions & 25 deletions src/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use crate::ParsedFont;
use crate::PdfDocument;
use crate::PdfDocumentInfo;
use crate::color::IccProfile;
use crate::PdfFontMap;
use crate::font::SubsetFont;
use crate::PdfPage;
use crate::PdfResources;
use crate::Polygon;
Expand Down Expand Up @@ -388,30 +388,46 @@ fn translate_operations(ops: &[Op], fonts: &BTreeMap<FontId, PreparedFont>) -> V
content.push(LoOp::new("Tj", vec![LoString(bytes, Hexadecimal)]));
},
Op::WriteCodepoints { font, cp } => {

let bytes = cp
.into_iter()
.flat_map(|(x, _)| {
let [b0, b1] = x.to_be_bytes();
std::iter::once(b0).chain(std::iter::once(b1))
})
.collect::<Vec<u8>>();

content.push(LoOp::new("Tj", vec![LoString(bytes, Hexadecimal)]));
if let Some(font) = fonts.get(&font) {

let subset_codepoints = cp.iter()
.filter_map(|(gid, ch)| {
font.subset_font.glyph_mapping.get(gid).map(|c| (c.0, *ch))
}).collect::<Vec<_>>();

let bytes = subset_codepoints
.into_iter()
.flat_map(|(x, _)| {
let [b0, b1] = x.to_be_bytes();
std::iter::once(b0).chain(std::iter::once(b1))
})
.collect::<Vec<u8>>();

content.push(LoOp::new("Tj", vec![LoString(bytes, Hexadecimal)]));
}
},
Op::WriteCodepointsWithKerning { font, cpk } => {

let mut list = Vec::new();
if let Some(font) = fonts.get(&font) {

let subset_codepoints = cpk.iter()
.filter_map(|(kern, gid, ch)| {
font.subset_font.glyph_mapping.get(gid).map(|c| (*kern, c.0, *ch))
}).collect::<Vec<_>>();


let mut list = Vec::new();

for (pos, codepoint, _) in cpk.iter() {
if *pos != 0 {
list.push(Integer(*pos));
for (pos, codepoint, _) in subset_codepoints.iter() {
if *pos != 0 {
list.push(Integer(*pos));
}
let bytes = codepoint.to_be_bytes().to_vec();
list.push(LoString(bytes, Hexadecimal));
}
let bytes = codepoint.to_be_bytes().to_vec();
list.push(LoString(bytes, Hexadecimal));

content.push(LoOp::new("TJ", vec![Array(list)]));
}

content.push(LoOp::new("TJ", vec![Array(list)]));
},
Op::AddLineBreak => {
content.push(LoOp::new("T*", vec![]));
Expand Down Expand Up @@ -502,7 +518,7 @@ fn translate_operations(ops: &[Op], fonts: &BTreeMap<FontId, PreparedFont>) -> V

struct PreparedFont {
original: ParsedFont,
subset_font_bytes: Vec<u8>,
subset_font: SubsetFont,
cid_to_unicode_map: String,
vertical_writing: bool, // default: false
ascent: i64,
Expand Down Expand Up @@ -753,14 +769,14 @@ fn prepare_fonts(resources: &PdfResources, pages: &[PdfPage]) -> BTreeMap<FontId
if glyph_ids.is_empty() {
continue; // unused font
}
let font_bytes = match font.subset(&glyph_ids) {
let subset_font = match font.subset(&glyph_ids) {
Ok(o) => o,
Err(e) => {
println!("{e}");
continue;
}
};
let font = match ParsedFont::from_bytes(&font_bytes, 0) {
let font = match ParsedFont::from_bytes(&subset_font.bytes, 0) {
Some(s) => s,
None => continue,
};
Expand All @@ -769,7 +785,7 @@ fn prepare_fonts(resources: &PdfResources, pages: &[PdfPage]) -> BTreeMap<FontId
let widths = font.get_normalized_widths(&glyph_ids);
fonts_in_pdf.insert(font_id.clone(), PreparedFont {
original: font.clone(),
subset_font_bytes: font_bytes,
subset_font: subset_font,
cid_to_unicode_map: cid_to_unicode,
vertical_writing: false, // TODO
ascent: font.font_metrics.ascender as i64,
Expand All @@ -791,8 +807,8 @@ fn add_font_to_pdf(doc: &mut lopdf::Document, font_id: &FontId, prepared: &Prepa

// WARNING: Font stream MAY NOT be compressed
let font_stream = LoStream::new(
LoDictionary::from_iter(vec![("Length1", Integer(prepared.subset_font_bytes.len() as i64))]),
prepared.subset_font_bytes.clone(),
LoDictionary::from_iter(vec![("Length1", Integer(prepared.subset_font.bytes.len() as i64))]),
prepared.subset_font.bytes.clone(),
).with_compression(false);

let font_stream_ref = doc.add_object(font_stream);
Expand Down
27 changes: 3 additions & 24 deletions src/utils.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,8 @@

use std::sync::atomic::{AtomicUsize, Ordering};
use crate::date::OffsetDateTime;
#[cfg(feature = "images")]
use image::ColorType;
#[cfg(feature = "images")]
use crate::xobject::ImageXObject;
#[cfg(feature = "images")]
use crate::color::{ColorSpace, ColorBits};
#[cfg(feature = "images")]
use crate::units::Px;
use crate::{ColorBits, ColorSpace, ImageXObject, Px};
use crate::date::OffsetDateTime;

/// Since the random number generator doesn't have to be cryptographically secure
/// it doesn't make sense to import the entire rand library, so this is just a
Expand Down Expand Up @@ -101,21 +95,7 @@ fn u8_to_char(input: u8) -> char {
(b'A' + input) as char
}

#[cfg(any(debug_assertions, feature = "less-optimization"))]
#[inline]
pub fn compress_stream(stream: lopdf::Stream) -> lopdf::Stream {
stream
}

#[cfg(all(not(debug_assertions), not(feature = "less-optimization")))]
#[inline]
pub fn compress_stream(mut stream: lopdf::Stream) -> lopdf::Stream {
let _ = stream.compress();
stream
}

#[cfg(feature = "images")]
fn preprocess_image_with_alpha(
pub(crate) fn preprocess_image_with_alpha(
color_type: ColorType,
image_data: Vec<u8>,
dim: (u32, u32),
Expand Down Expand Up @@ -156,7 +136,6 @@ fn preprocess_image_with_alpha(
}

/// Takes a Vec<u8> of RGBA data and returns two Vec<u8> of RGB and alpha data
#[cfg(feature = "images")]
pub(crate) fn rgba_to_rgb(data: Vec<u8>) -> (Vec<u8>, Vec<u8>) {
let mut rgb = Vec::with_capacity(data.len() / 4 * 3);
let mut alpha = Vec::with_capacity(data.len() / 4);
Expand Down

0 comments on commit c605d35

Please sign in to comment.