Skip to content

Commit

Permalink
reduce{h,v}: remove rounding from the double paths
Browse files Browse the repository at this point in the history
  • Loading branch information
kleisauke committed Sep 26, 2023
1 parent 2ebefa4 commit 74c7ae6
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 114 deletions.
65 changes: 11 additions & 54 deletions libvips/resample/reduceh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ vips_reduce_get_points(VipsKernel kernel, double shrink)
}
}

template <typename T, int max_value>
template <typename T, T max_value>
static void inline reduceh_unsigned_int_tab(VipsReduceh *reduceh,
VipsPel *pout, const VipsPel *pin,
const int bands, const short *restrict cx)
Expand All @@ -141,9 +141,9 @@ static void inline reduceh_unsigned_int_tab(VipsReduceh *reduceh,
const int n = reduceh->n_point;

for (int z = 0; z < bands; z++) {
int sum;
typename intermediate<T>::type sum;

sum = reduce_sum<T, int>(in + z, bands, cx, n);
sum = reduce_sum<T>(in + z, bands, cx, n);
sum = unsigned_fixed_round(sum);
out[z] = VIPS_CLIP(0, sum, max_value);
}
Expand All @@ -159,9 +159,9 @@ static void inline reduceh_signed_int_tab(VipsReduceh *reduceh,
const int n = reduceh->n_point;

for (int z = 0; z < bands; z++) {
int sum;
typename intermediate<T>::type sum;

sum = reduce_sum<T, int>(in + z, bands, cx, n);
sum = reduce_sum<T>(in + z, bands, cx, n);
sum = signed_fixed_round(sum);
out[z] = VIPS_CLIP(min_value, sum, max_value);
}
Expand All @@ -179,46 +179,7 @@ static void inline reduceh_float_tab(VipsReduceh *reduceh,
const int n = reduceh->n_point;

for (int z = 0; z < bands; z++)
out[z] = reduce_sum<T, double>(in + z, bands, cx, n);
}

/* 32-bit int output needs a 64-bits intermediate.
*/

template <typename T, unsigned int max_value>
static void inline reduceh_unsigned_int32_tab(VipsReduceh *reduceh,
VipsPel *pout, const VipsPel *pin,
const int bands, const short *restrict cx)
{
T *restrict out = (T *) pout;
const T *restrict in = (T *) pin;
const int n = reduceh->n_point;

for (int z = 0; z < bands; z++) {
uint64_t sum;

sum = reduce_sum<T, uint64_t>(in + z, bands, cx, n);
sum = unsigned_fixed_round(sum);
out[z] = VIPS_CLIP(0, sum, max_value);
}
}

template <typename T, int min_value, int max_value>
static void inline reduceh_signed_int32_tab(VipsReduceh *reduceh,
VipsPel *pout, const VipsPel *pin,
const int bands, const short *restrict cx)
{
T *restrict out = (T *) pout;
const T *restrict in = (T *) pin;
const int n = reduceh->n_point;

for (int z = 0; z < bands; z++) {
int64_t sum;

sum = reduce_sum<T, int64_t>(in + z, bands, cx, n);
sum = signed_fixed_round(sum);
out[z] = VIPS_CLIP(min_value, sum, max_value);
}
out[z] = reduce_sum<T>(in + z, bands, cx, n);
}

/* Ultra-high-quality version for double images.
Expand All @@ -232,17 +193,13 @@ static void inline reduceh_notab(VipsReduceh *reduceh,
const T *restrict in = (T *) pin;
const int n = reduceh->n_point;

double cx[MAX_POINT];
typename intermediate<T>::type cx[MAX_POINT];

vips_reduce_make_mask(cx, reduceh->kernel, reduceh->n_point,
reduceh->hshrink, x);

for (int z = 0; z < bands; z++) {
double sum;
sum = reduce_sum<T, double>(in + z, bands, cx, n);

out[z] = VIPS_ROUND_UINT(sum);
}
for (int z = 0; z < bands; z++)
out[z] = reduce_sum<T>(in + z, bands, cx, n);
}

static int
Expand Down Expand Up @@ -330,12 +287,12 @@ vips_reduceh_gen(VipsRegion *out_region, void *seq,
break;

case VIPS_FORMAT_UINT:
reduceh_unsigned_int32_tab<unsigned int,
reduceh_unsigned_int_tab<unsigned int,
UINT_MAX>(reduceh, q, p, bands, cxs);
break;

case VIPS_FORMAT_INT:
reduceh_signed_int32_tab<signed int,
reduceh_signed_int_tab<signed int,
INT_MIN, INT_MAX>(reduceh, q, p, bands, cxs);
break;

Expand Down
67 changes: 11 additions & 56 deletions libvips/resample/reducev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ vips_reducev_compile(VipsReducev *reducev)
/* You'd think this would vectorise, but gcc hates mixed types in nested loops
* :-(
*/
template <typename T, int max_value>
template <typename T, T max_value>
static void inline reducev_unsigned_int_tab(VipsReducev *reducev,
VipsPel *pout, const VipsPel *pin,
const int ne, const int lskip, const short *restrict cy)
Expand All @@ -413,9 +413,9 @@ static void inline reducev_unsigned_int_tab(VipsReducev *reducev,
const int l1 = lskip / sizeof(T);

for (int z = 0; z < ne; z++) {
int sum;
typename intermediate<T>::type sum;

sum = reduce_sum<T, int>(in + z, l1, cy, n);
sum = reduce_sum<T>(in + z, l1, cy, n);
sum = unsigned_fixed_round(sum);
out[z] = VIPS_CLIP(0, sum, max_value);
}
Expand All @@ -432,9 +432,9 @@ static void inline reducev_signed_int_tab(VipsReducev *reducev,
const int l1 = lskip / sizeof(T);

for (int z = 0; z < ne; z++) {
int sum;
typename intermediate<T>::type sum;

sum = reduce_sum<T, int>(in + z, l1, cy, n);
sum = reduce_sum<T>(in + z, l1, cy, n);
sum = signed_fixed_round(sum);
out[z] = VIPS_CLIP(min_value, sum, max_value);
}
Expand All @@ -453,48 +453,7 @@ static void inline reducev_float_tab(VipsReducev *reducev,
const int l1 = lskip / sizeof(T);

for (int z = 0; z < ne; z++)
out[z] = reduce_sum<T, double>(in + z, l1, cy, n);
}

/* 32-bit int output needs a 64-bits intermediate.
*/

template <typename T, unsigned int max_value>
static void inline reducev_unsigned_int32_tab(VipsReducev *reducev,
VipsPel *pout, const VipsPel *pin,
const int ne, const int lskip, const short *restrict cy)
{
T *restrict out = (T *) pout;
const T *restrict in = (T *) pin;
const int n = reducev->n_point;
const int l1 = lskip / sizeof(T);

for (int z = 0; z < ne; z++) {
uint64_t sum;

sum = reduce_sum<T, uint64_t>(in + z, l1, cy, n);
sum = unsigned_fixed_round(sum);
out[z] = VIPS_CLIP(0, sum, max_value);
}
}

template <typename T, int min_value, int max_value>
static void inline reducev_signed_int32_tab(VipsReducev *reducev,
VipsPel *pout, const VipsPel *pin,
const int ne, const int lskip, const short *restrict cy)
{
T *restrict out = (T *) pout;
const T *restrict in = (T *) pin;
const int n = reducev->n_point;
const int l1 = lskip / sizeof(T);

for (int z = 0; z < ne; z++) {
int64_t sum;

sum = reduce_sum<T, int64_t>(in + z, l1, cy, n);
sum = signed_fixed_round(sum);
out[z] = VIPS_CLIP(min_value, sum, max_value);
}
out[z] = reduce_sum<T>(in + z, l1, cy, n);
}

/* Ultra-high-quality version for double images.
Expand All @@ -509,17 +468,13 @@ static void inline reducev_notab(VipsReducev *reducev,
const int n = reducev->n_point;
const int l1 = lskip / sizeof(T);

double cy[MAX_POINT];
typename intermediate<T>::type cy[MAX_POINT];

vips_reduce_make_mask(cy, reducev->kernel, reducev->n_point,
reducev->vshrink, y);

for (int z = 0; z < ne; z++) {
double sum;
sum = reduce_sum<T, double>(in + z, l1, cy, n);

out[z] = VIPS_ROUND_UINT(sum);
}
for (int z = 0; z < ne; z++)
out[z] = reduce_sum<T>(in + z, l1, cy, n);
}

static int
Expand Down Expand Up @@ -591,12 +546,12 @@ vips_reducev_gen(VipsRegion *out_region, void *vseq,
break;

case VIPS_FORMAT_UINT:
reducev_unsigned_int32_tab<unsigned int,
reducev_unsigned_int_tab<unsigned int,
UINT_MAX>(reducev, q, p, ne, lskip, cys);
break;

case VIPS_FORMAT_INT:
reducev_signed_int32_tab<signed int,
reducev_signed_int_tab<signed int,
INT_MIN, INT_MAX>(reducev, q, p, ne, lskip, cys);
break;

Expand Down
42 changes: 38 additions & 4 deletions libvips/resample/templates.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
*/

#include <cstdint>
#include <type_traits>

/*
* Various casts which assume that the data is already in range. (That
* is, they are to be used with monotone samplers.)
Expand Down Expand Up @@ -149,7 +152,7 @@ static T inline bilinear_nosign(
template <typename T>
static T inline unsigned_fixed_round(T v)
{
const T round_by = VIPS_INTERPOLATE_SCALE >> 1;
const int round_by = VIPS_INTERPOLATE_SCALE >> 1;

return (v + round_by) >> VIPS_INTERPOLATE_SHIFT;
}
Expand Down Expand Up @@ -200,8 +203,8 @@ static int inline bicubic_unsigned_int(
template <typename T>
static T inline signed_fixed_round(T v)
{
const T sign_of_v = 2 * (v >= 0) - 1;
const T round_by = sign_of_v * (VIPS_INTERPOLATE_SCALE >> 1);
const int sign_of_v = 2 * (v >= 0) - 1;
const int round_by = sign_of_v * (VIPS_INTERPOLATE_SCALE >> 1);

return (v + round_by) >> VIPS_INTERPOLATE_SHIFT;
}
Expand Down Expand Up @@ -473,10 +476,41 @@ vips_reduce_make_mask(T *c, VipsKernel kernel, const int n_points,
}
}

/* Machinery to pick an intermediate for type T, prevents an overflow in
* reduce_sum(). Defaults to a 32-bit integral type.
*/
template <typename T,
bool IsIntegral = std::is_integral<T>::value,
size_t Size = sizeof(T)>
struct intermediate {
typedef int32_t type;
};

/* 32-bit integral types needs a 64-bits intermediate.
*/
template <typename T>
struct intermediate<T, true, 4> {
typedef int64_t type;
};

/* 32-bit floating-point types needs a 64-bits intermediate.
*/
template <typename T>
struct intermediate<T, false, 4> {
typedef double type;
};

/* 64-bit floating-point types needs a 128-bits intermediate.
*/
template <typename T>
struct intermediate<T, false, 8> {
typedef long double type;
};

/* Our inner loop for resampling with a convolution of type CT. Operate on
* elements of type T, gather results in an intermediate of type IT.
*/
template <typename T, typename IT, typename CT>
template <typename T, typename CT, typename IT = typename intermediate<T>::type>
static IT inline reduce_sum(const T *restrict in, int stride,
const CT *restrict c, int n)
{
Expand Down

0 comments on commit 74c7ae6

Please sign in to comment.