Skip to content

Commit

Permalink
Implement shrinkh using highway ops
Browse files Browse the repository at this point in the history
  • Loading branch information
kleisauke committed Nov 18, 2024
1 parent 58f92e2 commit f534551
Show file tree
Hide file tree
Showing 4 changed files with 214 additions and 1 deletion.
1 change: 1 addition & 0 deletions libvips/resample/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ resample_sources = files(
'resize.c',
'shrink.c',
'shrinkh.c',
'shrinkh_hwy.cpp',
'shrinkv.c',
'shrinkv_hwy.cpp',
'reduce.c',
Expand Down
2 changes: 2 additions & 0 deletions libvips/resample/presample.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ void vips_reduceh_uchar_hwy(VipsPel *pout, VipsPel *pin,
void vips_reducev_uchar_hwy(VipsPel *pout, VipsPel *pin,
int n, int ne, int lskip, const short *restrict k);

void vips_shrinkh_uchar_hwy(VipsPel *pout, VipsPel *pin,
int width, int hshrink, int bands);
void vips_shrinkv_uchar_hwy(VipsPel *pout, VipsPel *pin,
int ne, int vshrink, int lskip);

Expand Down
82 changes: 81 additions & 1 deletion libvips/resample/shrinkh.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#include <math.h>

#include <vips/vips.h>
#include <vips/vector.h>
#include <vips/debug.h>
#include <vips/internal.h>

Expand Down Expand Up @@ -245,6 +246,70 @@ vips_shrinkh_gen(VipsRegion *out_region,
return 0;
}

#ifdef HAVE_HWY
static int
vips_shrinkh_uchar_vector_gen(VipsRegion *out_region,
void *seq, void *a, void *b, gboolean *stop)
{
/* How do we chunk up the image? We don't want to prepare the whole of
* the input region corresponding to *r since it could be huge.
*
* Reading a line at a time could cause a lot of overcomputation, depending
* on what's upstream from us. In SMALLTILE, output scanlines could be
* quite small.
*
* Use fatstrip height as a compromise.
*/
const int dy = vips__fatstrip_height;

VipsImage *in = (VipsImage *) a;
VipsShrinkh *shrink = (VipsShrinkh *) b;
VipsRegion *ir = (VipsRegion *) seq;
VipsRect *r = &out_region->valid;
const int bands = in->Bands;

int y, y1;

#ifdef DEBUG
printf("vips_shrinkh_uchar_vector_gen: generating %d x %d at %d x %d\n",
r->width, r->height, r->left, r->top);
#endif /*DEBUG*/

for (y = 0; y < r->height; y += dy) {
int chunk_height = VIPS_MIN(dy, r->height - y);

VipsRect s;

s.left = r->left * shrink->hshrink;
s.top = r->top + y;
s.width = r->width * shrink->hshrink;
s.height = chunk_height;
#ifdef DEBUG
printf("vips_shrinkh_uchar_vector_gen: requesting %d lines from %d\n",
s.height, s.top);
#endif /*DEBUG*/
if (vips_region_prepare(ir, &s))
return -1;

VIPS_GATE_START("vips_shrinkh_uchar_vector_gen: work");

for (y1 = 0; y1 < chunk_height; y1++) {
VipsPel *q = VIPS_REGION_ADDR(out_region, r->left, r->top + y + y1);
VipsPel *p = VIPS_REGION_ADDR(ir, s.left, s.top + y1);

vips_shrinkh_uchar_hwy(q, p,
r->width, shrink->hshrink, bands);
}

VIPS_GATE_STOP("vips_shrinkh_uchar_vector_gen: work");
}

VIPS_COUNT_PIXELS(out_region, "vips_shrinkh_uchar_vector_gen");

return 0;
}
#endif /*HAVE_HWY*/

static int
vips_shrinkh_build(VipsObject *object)
{
Expand All @@ -255,6 +320,7 @@ vips_shrinkh_build(VipsObject *object)
vips_object_local_array(object, 2);

VipsImage *in;
VipsGenerateFn generate;

if (VIPS_OBJECT_CLASS(vips_shrinkh_parent_class)->build(object))
return -1;
Expand All @@ -281,6 +347,20 @@ vips_shrinkh_build(VipsObject *object)
return -1;
in = t[1];

/* For uchar input, try to make a vector path.
*/
#ifdef HAVE_HWY
if (in->BandFmt == VIPS_FORMAT_UCHAR &&
vips_vector_isenabled()) {
generate = vips_shrinkh_uchar_vector_gen;
g_info("shrinkh: using vector path");
}
else
#endif /*HAVE_HWY*/
/* Default to the C path.
*/
generate = vips_shrinkh_gen;

if (vips_image_pipelinev(resample->out,
VIPS_DEMAND_STYLE_THINSTRIP, in, NULL))
return -1;
Expand All @@ -307,7 +387,7 @@ vips_shrinkh_build(VipsObject *object)
#endif /*DEBUG*/

if (vips_image_generate(resample->out,
vips_start_one, vips_shrinkh_gen, vips_stop_one,
vips_start_one, generate, vips_stop_one,
in, shrink))
return -1;

Expand Down
130 changes: 130 additions & 0 deletions libvips/resample/shrinkh_hwy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/* 15/11/24 kleisauke
* - from shrinkv_hwy.cpp
*/

/*
This file is part of VIPS.
VIPS is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/

/*
These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk
*/

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /*HAVE_CONFIG_H*/
#include <glib/gi18n-lib.h>

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#include <vips/vips.h>
#include <vips/vector.h>
#include <vips/debug.h>
#include <vips/internal.h>

#include "presample.h"

#ifdef HAVE_HWY

#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE "libvips/resample/shrinkh_hwy.cpp"
#include <hwy/foreach_target.h>
#include <hwy/highway.h>

namespace HWY_NAMESPACE {

using namespace hwy::HWY_NAMESPACE;

using DU32 = ScalableTag<uint32_t>;
constexpr Rebind<uint8_t, DU32> du8x32;
constexpr DU32 du32;

constexpr float maxUint32 = (1 << 30) * 4.0;
constexpr float maxBits = 1 << 8;

HWY_ATTR void
vips_shrinkh_uchar_hwy(VipsPel *pout, VipsPel *pin,
int32_t width, int32_t hshrink, int32_t bands)
{
#if HWY_TARGET != HWY_SCALAR
const auto multiplier =
Set(du32, static_cast<uint32_t>(maxUint32 / (maxBits * hshrink)));
const auto amend = Set(du32, hshrink / 2);

int32_t ix = 0;

for (int32_t x = 0; x < width; ++x) {
auto *HWY_RESTRICT p = (uint8_t *) pin + ix * bands;
auto *HWY_RESTRICT q = (uint8_t *) pout + x * bands;

auto sum0 = amend;

int32_t xx = 0;
for (; xx + 2 <= hshrink; xx += 2) {
auto pix0 = PromoteTo(du32, LoadU(du8x32, p));
p += bands;
auto pix1 = PromoteTo(du32, LoadU(du8x32, p));
p += bands;

pix0 = Add(pix0, pix1);
sum0 = Add(sum0, pix0);
}
for (; xx < hshrink; ++xx) {
auto pix0 = PromoteTo(du32, LoadU(du8x32, p));
p += bands;

sum0 = Add(sum0, pix0);
}

sum0 = Mul(sum0, multiplier);

/* The final 32->8 conversion.
*/
sum0 = ShiftRight<24>(sum0);

auto demoted = DemoteTo(du8x32, sum0);
StoreU(demoted, du8x32, q);

ix += hshrink;
}
#endif
}

} /*namespace HWY_NAMESPACE*/

#if HWY_ONCE
HWY_EXPORT(vips_shrinkh_uchar_hwy);

void
vips_shrinkh_uchar_hwy(VipsPel *pout, VipsPel *pin,
int width, int hshrink, int bands)
{
/* clang-format off */
HWY_DYNAMIC_DISPATCH(vips_shrinkh_uchar_hwy)(pout, pin,
width, hshrink, bands);
/* clang-format on */
}
#endif /*HWY_ONCE*/

#endif /*HAVE_HWY*/

0 comments on commit f534551

Please sign in to comment.