From a607f257a6d4af90a534163c7dfa851007ad0e78 Mon Sep 17 00:00:00 2001 From: Kleis Auke Wolthuizen Date: Thu, 21 Nov 2024 18:31:20 +0100 Subject: [PATCH] shrink{h,v}: add fixed-point arithmetic path for uchar images About 9% faster. --- libvips/resample/shrinkh.c | 33 +++++++++++++++++++++++++++------ libvips/resample/shrinkv.c | 36 ++++++++++++++++++++++++++++++------ 2 files changed, 57 insertions(+), 12 deletions(-) diff --git a/libvips/resample/shrinkh.c b/libvips/resample/shrinkh.c index 95bb619d8..574a44cf5 100644 --- a/libvips/resample/shrinkh.c +++ b/libvips/resample/shrinkh.c @@ -72,6 +72,25 @@ typedef VipsResampleClass VipsShrinkhClass; G_DEFINE_TYPE(VipsShrinkh, vips_shrinkh, VIPS_TYPE_RESAMPLE); +/* Fixed-point arithmetic path for uchar images. + */ +#define UCHAR_SHRINK(BANDS) \ + { \ + unsigned char *restrict p = (unsigned char *) in; \ + unsigned char *restrict q = (unsigned char *) out; \ +\ + for (x = 0; x < width; x++) { \ + for (b = 0; b < BANDS; b++) { \ + int sum = amend; \ + for (x1 = b; x1 < ne; x1 += BANDS) \ + sum += p[x1]; \ + q[b] = (sum * multiplier) >> 24; \ + } \ + p += ne; \ + q += BANDS; \ + } \ + } + /* Integer shrink. */ #define ISHRINK(ACC_TYPE, TYPE, BANDS) \ @@ -129,7 +148,9 @@ vips_shrinkh_gen2(VipsShrinkh *shrink, VipsRegion *out_region, VipsRegion *ir, int x1, b; switch (resample->in->BandFmt) { - case VIPS_FORMAT_UCHAR: + case VIPS_FORMAT_UCHAR: { + unsigned int multiplier = (1LL << 32) / ((1 << 8) * shrink->hshrink); + /* Generate a special path for 1, 3 and 4 band uchar data. The * compiler will be able to vectorise these. * @@ -138,20 +159,20 @@ vips_shrinkh_gen2(VipsShrinkh *shrink, VipsRegion *out_region, VipsRegion *ir, */ switch (bands) { case 1: - ISHRINK(int, unsigned char, 1); + UCHAR_SHRINK(1); break; case 3: - ISHRINK(int, unsigned char, 3); + UCHAR_SHRINK(3); break; case 4: - ISHRINK(int, unsigned char, 4); + UCHAR_SHRINK(4); break; default: - ISHRINK(int, unsigned char, bands); + UCHAR_SHRINK(bands); break; } break; - + } case VIPS_FORMAT_CHAR: ISHRINK(int, char, bands); break; diff --git a/libvips/resample/shrinkv.c b/libvips/resample/shrinkv.c index e38f24eaf..c6c310964 100644 --- a/libvips/resample/shrinkv.c +++ b/libvips/resample/shrinkv.c @@ -111,6 +111,28 @@ typedef VipsResampleClass VipsShrinkvClass; G_DEFINE_TYPE(VipsShrinkv, vips_shrinkv, VIPS_TYPE_RESAMPLE); +/* Fixed-point arithmetic path for uchar images. + */ +#define UCHAR_SHRINK(BANDS) \ + { \ + unsigned char *restrict p = (unsigned char *) in; \ + unsigned char *restrict q = (unsigned char *) out; \ +\ + for (x = 0; x < width; x++) { \ + for (b = 0; b < BANDS; b++) { \ + int sum = amend; \ + unsigned char *restrict row_ptr = p + b; \ + for (yy = 0; yy < shrink->vshrink; yy++) { \ + sum += *row_ptr; \ + row_ptr += sz; \ + } \ + q[b] = (sum * multiplier) >> 24; \ + } \ + p += BANDS; \ + q += BANDS; \ + } \ + } + /* Integer shrink. */ #define ISHRINK(ACC_TYPE, TYPE, BANDS) \ @@ -174,7 +196,9 @@ vips_shrinkv_gen2(VipsShrinkv *shrink, VipsRegion *out_region, VipsRegion *ir, int yy, b; switch (resample->in->BandFmt) { - case VIPS_FORMAT_UCHAR: + case VIPS_FORMAT_UCHAR: { + unsigned int multiplier = (1LL << 32) / ((1 << 8) * shrink->vshrink); + /* Generate a special path for 1, 3 and 4 band uchar data. The * compiler will be able to vectorise these. * @@ -183,20 +207,20 @@ vips_shrinkv_gen2(VipsShrinkv *shrink, VipsRegion *out_region, VipsRegion *ir, */ switch (bands) { case 1: - ISHRINK(int, unsigned char, 1); + UCHAR_SHRINK(1); break; case 3: - ISHRINK(int, unsigned char, 3); + UCHAR_SHRINK(3); break; case 4: - ISHRINK(int, unsigned char, 4); + UCHAR_SHRINK(4); break; default: - ISHRINK(int, unsigned char, bands); + UCHAR_SHRINK(bands); break; } break; - + } case VIPS_FORMAT_CHAR: ISHRINK(int, char, bands); break;