Skip to content

Commit

Permalink
Extract inline/internal images from HTML content
Browse files Browse the repository at this point in the history
Previously inline images were embedded in HTML, which was not quite
efficient. This commit extracts them to individual image parts, not only
more efficient but also makes things easier to link to these images on
correspond/comment.

For new transactions with images linking to existing attachments(usually
via quoted transactions), we also extract these linked images like inline
images, so new created transactions could be independent from quoted
transactions.

For outgoing emails that contain inline images, this commit automatically
converts them to corresponding "multipart/related" entities that contain
both HTML and linked images, which is more consistent with email RFCs.
  • Loading branch information
sunnavy committed Jun 5, 2024
1 parent 04a898c commit 1eef9fc
Show file tree
Hide file tree
Showing 5 changed files with 159 additions and 8 deletions.
5 changes: 4 additions & 1 deletion lib/RT/Action/SendEmail.pm
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,10 @@ sub Prepare {
&& !$MIMEObj->head->get('To')
&& ( $MIMEObj->head->get('Cc') or $MIMEObj->head->get('Bcc') );

# For security reasons, we only send out textual mails.
# For security reasons, we only send out textual+image mails.
foreach my $part ( grep !$_->is_multipart, $MIMEObj->parts_DFS ) {
my $type = $part->mime_type || 'text/plain';
next if $type =~ m{^image/};
$type = 'text/plain' unless RT::I18N::IsTextualContentType($type);
$part->head->mime_attr( "Content-Type" => $type );
# utf-8 here is for _FindOrGuessCharset in I18N.pm
Expand Down Expand Up @@ -396,6 +397,8 @@ sub AddAttachments {
# attach any of this transaction's attachments
my $seen_attachment = 0;
while ( my $attach = $attachments->Next ) {
# Skip if it's already added(as inline) in template.
next if $self->TemplateObj->{_AddedAttachments} && $self->TemplateObj->{_AddedAttachments}{ $attach->Id };
if ( !$seen_attachment ) {
$MIMEObj->make_multipart( 'mixed', Force => 1 );
$seen_attachment = 1;
Expand Down
95 changes: 92 additions & 3 deletions lib/RT/Interface/Web.pm
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ use HTTP::Status qw();
use Regexp::Common;
use RT::Shortener;
use RT::Interface::Web::ReportsRegistry;
use MIME::Base64;
use Digest::SHA 'sha1_hex';

our @SHORTENER_SEARCH_FIELDS
= qw/Class ObjectType BaseQuery Query Format RowsPerPage Order OrderBy ExtraQueryParams ResultPage/;
Expand Down Expand Up @@ -2195,6 +2197,68 @@ sub ExpandShortenerCode {
}
}

=head2 ExtractImages Content => $Content, CurrentUser => $CurrentUser
Extract images from $HTML and convert them to src="cid:..."
Currently it supports images embedded in base64 and ones linking to existing
ticket attachments.
Returns the modified HTML and extracted images, each image is a hashref
containing:
cid: content id
content_type: image type
content: image data
=cut

sub ExtractImages {
my %args = (
Content => undef,
CurrentUser => $HTML::Mason::Commands::session{CurrentUser},
@_,
);

my $content = $args{Content};
my ( @images, %added );
require HTML::RewriteAttributes::Resources;
$content = HTML::RewriteAttributes::Resources->rewrite(
$content,
sub {
my $uri = shift;
my %meta = @_;
return $uri unless lc $meta{tag} eq 'img' && lc $meta{attr} eq 'src';

my ( $content_type, $content );
if ( $uri =~ m{^data:(.+);base64,(.+)}s ) {
$content_type = $1;
$content = decode_base64($2);
}
elsif ( $uri =~ m{^/(?:SelfService|Ticket)/Attachment/\d+/(\d+)} ) {
my $attachment = RT::Attachment->new( $args{CurrentUser} );
$attachment->Load($1);
if ( $attachment->CurrentUserCanSee ) {
$content_type = $attachment->ContentType;
$content = $attachment->Content;
}
else {
RT->Logger->warning( "Attachment #$1 is not visible to current user #" . $args{CurrentUser}->Id );
}
}

if ($content) {
my $cid = sha1_hex($content) . '@' . RT->Config->Get('rtname');
push @images, { cid => $cid, content => $content, content_type => $content_type } unless $added{$cid}++;
return "cid:$cid";
}

return $uri;
}
);
return ( $content, @images );
}

package HTML::Mason::Commands;

use vars qw/$r $m %session/;
Expand Down Expand Up @@ -2542,7 +2606,7 @@ sub CreateTicket {
push @attachments, grep $_, map $ARGS{Attachments}->{$_}, sort keys %{ $ARGS{'Attachments'} };
}
if ( @attachments ) {
$MIMEObj->make_multipart;
$MIMEObj->make_multipart( 'mixed', Force => 1 );
$MIMEObj->add_part( $_ ) foreach @attachments;
}

Expand Down Expand Up @@ -2733,7 +2797,7 @@ sub ProcessUpdateMessage {
}

if ( @attachments ) {
$Message->make_multipart;
$Message->make_multipart( 'mixed', Force => 1 );
$Message->add_part( $_ ) foreach @attachments;
}

Expand Down Expand Up @@ -2898,7 +2962,11 @@ sub ProcessAttachments {
Takes a paramhash Subject, Body and AttachmentFieldName.
Also takes Form, Cc and Type as optional paramhash keys.
Also takes Form, Cc, Type, and ExtractImages as optional paramhash keys.
If ExtractImages is true(default value), it will extract images from the HTML
body and generate a corresponding "multiplart/related" entity that contains
the modified body and also extracted images.
Returns a MIME::Entity.
Expand All @@ -2915,8 +2983,15 @@ sub MakeMIMEEntity {
AttachmentFieldName => undef,
Type => undef,
Interface => undef,
ExtractImages => 1,
@_,
);

my @images;
if ( $args{ExtractImages} && ( $args{Type} // '' ) eq 'text/html' ) {
( $args{Body}, @images ) = RT::Interface::Web::ExtractImages( Content => $args{Body} );
}

my $Message = MIME::Entity->build(
Type => 'multipart/mixed',
"Message-Id" => Encode::encode( "UTF-8", RT::Interface::Email::GenMessageId ),
Expand Down Expand Up @@ -2972,6 +3047,20 @@ sub MakeMIMEEntity {

RT::I18N::SetMIMEEntityToUTF8($Message); # convert text parts into utf-8

if (@images) {
$Message->make_multipart('related');
# RFC2387 3.1 says that "type" must be specified
$Message->head->mime_attr('Content-type.type' => 'text/html');
for my $image (@images) {
$Message->attach(
Type => $image->{content_type},
Data => $image->{content},
Disposition => 'inline',
Id => $image->{cid},
);
}
}

return ($Message);

}
Expand Down
49 changes: 45 additions & 4 deletions lib/RT/Template.pm
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ sub Parse {
my $self = shift;
my ($rv, $msg);

delete $self->{_AddedAttachments};

if (not $self->IsEmpty and $self->Content =~ m{^Content-Type:\s+text/html\b}im) {
local $RT::Transaction::PreferredContentType = 'text/html';
Expand All @@ -417,8 +418,47 @@ sub Parse {

return ($rv, $msg) unless $rv;

my %args = @_;
my $mime_type = $self->MIMEObj->mime_type;
if (defined $mime_type and $mime_type eq 'text/html') {
if ( defined $mime_type and $mime_type eq 'text/html' and $args{TransactionObj} ) {
if ( my $content_obj = $args{TransactionObj}->ContentObj( Type => 'text/html' ) ) {
if ( my $related_part = $content_obj->Closest("multipart/related") ) {
my $body = Encode::decode( "UTF-8", $self->MIMEObj->bodyhandle->as_string );
my ( @attachments, %added );
require HTML::RewriteAttributes::Resources;
HTML::RewriteAttributes::Resources->rewrite(
$body,
sub {
my $cid = shift;
my %meta = @_;
return $cid unless lc $meta{tag} eq 'img' && lc $meta{attr} eq 'src' && $cid =~ s/^cid://i;

for my $attach ( @{$related_part->Children->ItemsArrayRef } ) {
if ( ( $attach->GetHeader('Content-ID') || '' ) =~ /^(<)?\Q$cid\E(?(1)>)$/ ) {
push @attachments, $attach unless $added{$attach->Id}++;
}
}

return "cid:$cid";
}
);

if ( @attachments ) {
$self->MIMEObj->make_multipart('related');
# RFC2387 3.1 says that "type" must be specified
$self->MIMEObj->head->mime_attr('Content-type.type' => 'text/html');
for my $attach ( @attachments ) {
$self->MIMEObj->attach(
Type => $attach->ContentType,
Disposition => $attach->GetHeader('Content-Disposition'),
Id => $attach->GetHeader('Content-ID'),
Data => $attach->OriginalContent,
);
}
$self->{_AddedAttachments} = { map { $_->Id => 1 } @attachments };
}
}
}
$self->_DowngradeFromHTML(@_);
}

Expand Down Expand Up @@ -675,7 +715,8 @@ sub _DowngradeFromHTML {
my $self = shift;
my $orig_entity = $self->MIMEObj;

my $new_entity = $orig_entity->dup; # this will fail badly if we go away from InCore parsing
my $html_entity = $orig_entity->is_multipart ? $orig_entity->parts(0) : $orig_entity;
my $new_entity = $html_entity->dup; # this will fail badly if we go away from InCore parsing

# We're going to make this multipart/alternative below, so clear out the Subject
# header copied from the original when we dup'd above.
Expand All @@ -689,8 +730,8 @@ sub _DowngradeFromHTML {
$new_entity->head->mime_attr( "Content-Type" => 'text/plain' );
$new_entity->head->mime_attr( "Content-Type.charset" => 'utf-8' );

$orig_entity->head->mime_attr( "Content-Type" => 'text/html' );
$orig_entity->head->mime_attr( "Content-Type.charset" => 'utf-8' );
$html_entity->head->mime_attr( "Content-Type" => 'text/html' );
$html_entity->head->mime_attr( "Content-Type.charset" => 'utf-8' );

my $body = $new_entity->bodyhandle->as_string;
$body = Encode::decode( "UTF-8", $body );
Expand Down
8 changes: 8 additions & 0 deletions lib/RT/Transaction.pm
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,14 @@ sub _FindPreferredContentObj {
}

}

# Handle the case where multipart/related is a child of multipart/alternative.
my $related_parts = $Attachment->Children;
$related_parts->ContentType( VALUE => 'multipart/related' );
while ( my $child = $related_parts->Next ) {
my $ret = _FindPreferredContentObj( %args, Attachment => $child );
return $ret if $ret;
}
}

# If this is a message/rfc822 mail, we need to dig into it in order to find
Expand Down
10 changes: 10 additions & 0 deletions share/html/Elements/MessageBox
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,16 @@ if ( $QuoteTransaction ) {

if ( $transaction->Id && !$QuoteContent ) {
$message = $transaction->Content( Quote => 1, Type => $Type );
# Convert cid: images to links so they can be rendered.
if ( $Type eq 'text/html' && $message && $transaction->ObjectType eq 'RT::Ticket' ) {
RT::Interface::Web::RewriteInlineImages(
Content => \$message,
Attachment => $transaction->ContentObj( Type => $Type ) || undef,
AttachmentPath => join( '/',
RT->Config->Get('WebPath'), $session{CurrentUser}->Privileged ? 'Ticket' : 'SelfService',
'Attachment' ),
);
}
}
else {
$message = RT::Transaction->QuoteContent(
Expand Down

0 comments on commit 1eef9fc

Please sign in to comment.