| /* |
| * Image Scaling Functions |
| * Copyright (c) 2011 David A. Schleef <ds@schleef.org> |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, |
| * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING |
| * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| * POSSIBILITY OF SUCH DAMAGE. |
| */ |
| /* |
| * |
| * Modified Lanczos scaling algorithm |
| * ================================== |
| * |
| * This algorithm was developed by the author. The primary goals of |
| * the algorithm are high-quality video downscaling for medium scale |
| * factors (in the range of 1.3x to 5.0x) using methods that can be |
| * converted to SIMD code. Concerns with existing algorithms were |
| * mainly related to either over-soft filtering (Lanczos) or aliasing |
| * (bilinear or any other method with inadequate sampling). |
| * |
| * The problems with bilinear scaling are apparent when downscaling |
| * more than a factor of 2. For example, when downscaling by a factor |
| * of 3, only two-thirds of the input pixels contribute to the output |
| * pixels. This is only considering scaling in one direction; after |
| * scaling both vertically and horizontally in a 2-D image, fewer than |
| * half of the input pixels contribute to the output, so it should not |
| * be surprising that the output is suboptimal. |
| * |
| * The problems with Lanczos scaling are more subtle. From a theoretical |
| * perspective, Lanczos is an optimal algorithm for resampling equally- |
| * spaced values. This theoretical perspective is based on analysis |
| * done in frequency space, thus, Lanczos works very well for audio |
| * resampling, since the ear hears primarily in frequency space. The |
| * human visual system is sensitive primarily in the spatial domain, |
| * therefore any resampling algorithm should take this into account. |
| * This difference is immediately clear in the size of resampling |
| * window or envelope that is chosen for resampling: for audio, an |
| * envelope of a=64 is typical, in image scaling, the envelope is |
| * usually a=2 or a=3. |
| * |
| * One result of the HVS being sensitive in the spatial domain (and |
| * also probably due to oversampling capabilities of the retina and |
| * visual cortex) is that it is less sensitive to the exact magnitude |
| * of high-frequency visual signals than to the appropriate amount of |
| * energy in the nearby frequency band. A Lanczos kernel with a=2 |
| * or a=3 strongly decreases the amount of energy in the high frequency |
| * bands. The energy in this area can be increased by increasing a, |
| * which brings in energy from different areas of the image (bad for |
| * reasons mentioned above), or by oversampling the input data. We |
| * have chosen two methods for doing the latter. Firstly, there is |
| * a sharpness parameter, which increases the cutoff frequency of the |
| * filter, aliasing higher frequency noise into the passband. And |
| * secondly, there is the sharpen parameter, which increases the |
| * contribution of high-frequency (but in-band) components. |
| * |
| * An alternate explanation of the usefulness of a sharpening filter |
| * is that many natural images have a roughly 1/f spectrum. In order |
| * for a downsampled image to look more "natural" when high frequencies |
| * are removed, the frequencies in the pass band near the cutoff |
| * frequency are amplified, causing the spectrum to be more roughly |
| * 1/f. I said "roughly", not "literally". |
| * |
| * This alternate explanation is useful for understanding the author's |
| * secondary motivation for developing this algorithm, namely, as a |
| * method of video compression. Several recent techniques (such as |
| * HTTP Live Streaming and SVC) use image scaling as a method to get |
| * increased compression out of nominally non-scalable codecs such as |
| * H.264. For optimal quality, it is thusly important to consider |
| * the scaler and encoder as a combined unit. Tuning of the sharpness |
| * and sharpen parameters was performed using the Toro encoder tuner, |
| * where scaled and encoded video was compared to unscaled and encoded |
| * video. This tuning suggested values that were very close to the |
| * values chosen by manual inspection of scaled images and video. |
| * |
| * The optimal values of sharpen and sharpness were slightly different |
| * depending whether the comparison was still images or video. Video |
| * comparisons were more sensitive to aliasing, since the aliasing |
| * artifacts tended to move or "crawl" around the video. The default |
| * values are for video; image scaling may prefer higher values. |
| * |
| * A number of related techniques were rejected for various reasons. |
| * An early technique of selecting the sharpness factor locally based |
| * on edge detection (in order to use a higher sharpness values without |
| * the corresponding aliasing on edges) worked very well for still |
| * images, but caused too much "crawling" on textures in video. Also, |
| * this method is slow, as it does not parallelize well. |
| * |
| * Non-separable techniques were rejected because the fastest would |
| * have been at least 4x slower. |
| * |
| * It is infrequently appreciated that image scaling should ideally be |
| * done in linear light space. Converting to linear light space has |
| * a similar effect to a sharpening filter. This approach was not |
| * taken because the added benefit is minor compared to the additional |
| * computational cost. Morever, the benefit is decreased by increasing |
| * the strength of the sharpening filter. |
| * |
| */ |
| #ifdef HAVE_CONFIG_H |
| #include "config.h" |
| #endif |
| |
| #include <string.h> |
| |
| #include "vs_scanline.h" |
| #include "vs_image.h" |
| |
| #include "gstvideoscaleorc.h" |
| #include <gst/gst.h> |
| #include <math.h> |
| |
| #define NEED_CLAMP(x,a,b) ((x) < (a) || (x) > (b)) |
| |
| #define ROUND_UP_2(x) (((x)+1)&~1) |
| #define ROUND_UP_4(x) (((x)+3)&~3) |
| #define ROUND_UP_8(x) (((x)+7)&~7) |
| |
| #define SRC_LINE(i) (scale->src->pixels + scale->src->stride * (i)) |
| |
| #define TMP_LINE_S16(i) ((gint16 *)scale->tmpdata + (i)*(scale->dest->width)) |
| #define TMP_LINE_S32(i) ((gint32 *)scale->tmpdata + (i)*(scale->dest->width)) |
| #define TMP_LINE_FLOAT(i) ((float *)scale->tmpdata + (i)*(scale->dest->width)) |
| #define TMP_LINE_DOUBLE(i) ((double *)scale->tmpdata + (i)*(scale->dest->width)) |
| #define TMP_LINE_S16_AYUV(i) ((gint16 *)scale->tmpdata + (i)*4*(scale->dest->width)) |
| #define TMP_LINE_S32_AYUV(i) ((gint32 *)scale->tmpdata + (i)*4*(scale->dest->width)) |
| #define TMP_LINE_FLOAT_AYUV(i) ((float *)scale->tmpdata + (i)*4*(scale->dest->width)) |
| #define TMP_LINE_DOUBLE_AYUV(i) ((double *)scale->tmpdata + (i)*4*(scale->dest->width)) |
| |
| #define PTR_OFFSET(a,b) ((void *)((char *)(a) + (b))) |
| |
| typedef void (*HorizResampleFunc) (void *dest, const gint32 * offsets, |
| const void *taps, const void *src, int n_taps, int shift, int n); |
| |
| typedef struct _Scale1D Scale1D; |
| struct _Scale1D |
| { |
| int n; |
| double offset; |
| double scale; |
| |
| double fx; |
| double ex; |
| int dx; |
| |
| int n_taps; |
| gint32 *offsets; |
| void *taps; |
| }; |
| |
| typedef struct _Scale Scale; |
| struct _Scale |
| { |
| const VSImage *dest; |
| const VSImage *src; |
| |
| double sharpness; |
| gboolean dither; |
| |
| void *tmpdata; |
| |
| HorizResampleFunc horiz_resample_func; |
| |
| Scale1D x_scale1d; |
| Scale1D y_scale1d; |
| }; |
| |
| static void |
| vs_image_scale_lanczos_Y_int16 (const VSImage * dest, const VSImage * src, |
| uint8_t * tmpbuf, double sharpness, gboolean dither, double a, |
| double sharpen); |
| static void vs_image_scale_lanczos_Y_int32 (const VSImage * dest, |
| const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither, |
| double a, double sharpen); |
| static void vs_image_scale_lanczos_Y_float (const VSImage * dest, |
| const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither, |
| double a, double sharpen); |
| static void vs_image_scale_lanczos_Y_double (const VSImage * dest, |
| const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither, |
| double a, double sharpen); |
| static void |
| vs_image_scale_lanczos_AYUV_int16 (const VSImage * dest, const VSImage * src, |
| uint8_t * tmpbuf, double sharpness, gboolean dither, double a, |
| double sharpen); |
| static void vs_image_scale_lanczos_AYUV_int32 (const VSImage * dest, |
| const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither, |
| double a, double sharpen); |
| static void vs_image_scale_lanczos_AYUV_float (const VSImage * dest, |
| const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither, |
| double a, double sharpen); |
| static void vs_image_scale_lanczos_AYUV_double (const VSImage * dest, |
| const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither, |
| double a, double sharpen); |
| static void vs_image_scale_lanczos_AYUV64_double (const VSImage * dest, |
| const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither, |
| double a, double sharpen); |
| |
| static double |
| sinc (double x) |
| { |
| if (x == 0) |
| return 1; |
| return sin (G_PI * x) / (G_PI * x); |
| } |
| |
| static double |
| envelope (double x) |
| { |
| if (x <= -1 || x >= 1) |
| return 0; |
| return sinc (x); |
| } |
| |
| static int |
| scale1d_get_n_taps (int src_size, int dest_size, double a, double sharpness) |
| { |
| double scale; |
| double fx; |
| int dx; |
| |
| scale = src_size / (double) dest_size; |
| if (scale > 1.0) { |
| fx = (1.0 / scale) * sharpness; |
| } else { |
| fx = (1.0) * sharpness; |
| } |
| dx = ceil (a / fx); |
| |
| return 2 * dx; |
| } |
| |
| static void |
| scale1d_cleanup (Scale1D * scale) |
| { |
| g_free (scale->taps); |
| g_free (scale->offsets); |
| } |
| |
| /* |
| * Calculates a set of taps for each destination element in double |
| * format. Each set of taps sums to 1.0. |
| * |
| */ |
| static void |
| scale1d_calculate_taps (Scale1D * scale, int src_size, int dest_size, |
| int n_taps, double a, double sharpness, double sharpen) |
| { |
| int j; |
| double *tap_array; |
| gint32 *offsets; |
| double scale_offset; |
| double scale_increment; |
| int dx; |
| double fx; |
| double ex; |
| |
| scale->scale = src_size / (double) dest_size; |
| scale->offset = scale->scale / 2 - 0.5; |
| |
| if (scale->scale > 1.0) { |
| scale->fx = (1.0 / scale->scale) * sharpness; |
| } else { |
| scale->fx = (1.0) * sharpness; |
| } |
| scale->ex = scale->fx / a; |
| scale->dx = ceil (a / scale->fx); |
| |
| g_assert (n_taps >= 2 * scale->dx); |
| scale->n_taps = n_taps; |
| |
| scale->taps = g_malloc (sizeof (double) * scale->n_taps * dest_size); |
| scale->offsets = g_malloc (sizeof (gint32) * dest_size); |
| tap_array = scale->taps; |
| offsets = scale->offsets; |
| |
| scale_offset = scale->offset; |
| scale_increment = scale->scale; |
| dx = scale->dx; |
| fx = scale->fx; |
| ex = scale->ex; |
| |
| for (j = 0; j < dest_size; j++) { |
| double x; |
| int xi; |
| int l; |
| double weight; |
| double *taps; |
| |
| x = scale_offset + scale_increment * j; |
| x = CLAMP (x, 0, src_size); |
| xi = ceil (x) - dx; |
| |
| offsets[j] = xi; |
| weight = 0; |
| taps = tap_array + j * n_taps; |
| |
| for (l = 0; l < n_taps; l++) { |
| int xl = xi + l; |
| taps[l] = sinc ((x - xl) * fx) * envelope ((x - xl) * ex); |
| taps[l] -= sharpen * envelope ((x - xl) * ex); |
| weight += taps[l]; |
| } |
| g_assert (envelope ((x - (xi - 1)) * ex) == 0); |
| g_assert (envelope ((x - (xi + n_taps)) * ex) == 0); |
| for (l = 0; l < n_taps; l++) { |
| taps[l] /= weight; |
| } |
| |
| if (xi < 0) { |
| int shift = -xi; |
| |
| for (l = 0; l < shift; l++) { |
| taps[shift] += taps[l]; |
| } |
| for (l = 0; l < n_taps - shift; l++) { |
| taps[l] = taps[shift + l]; |
| } |
| for (; l < n_taps; l++) { |
| taps[l] = 0; |
| } |
| offsets[j] += shift; |
| } |
| |
| if (xi > src_size - n_taps) { |
| int shift = xi - (src_size - n_taps); |
| |
| for (l = 0; l < shift; l++) { |
| taps[n_taps - shift - 1] += taps[n_taps - shift + l]; |
| } |
| for (l = 0; l < n_taps - shift; l++) { |
| taps[n_taps - 1 - l] = taps[n_taps - 1 - shift - l]; |
| } |
| for (l = 0; l < shift; l++) { |
| taps[l] = 0; |
| } |
| offsets[j] -= shift; |
| } |
| } |
| } |
| |
| /* |
| * Calculates a set of taps for each destination element in float |
| * format. Each set of taps sums to 1.0. |
| */ |
| static void |
| scale1d_calculate_taps_float (Scale1D * scale, int src_size, int dest_size, |
| int n_taps, double a, double sharpness, double sharpen) |
| { |
| double *taps_d; |
| float *taps_f; |
| int j; |
| |
| scale1d_calculate_taps (scale, src_size, dest_size, n_taps, a, sharpness, |
| sharpen); |
| |
| taps_d = scale->taps; |
| taps_f = g_malloc (sizeof (float) * scale->n_taps * dest_size); |
| |
| for (j = 0; j < dest_size * n_taps; j++) { |
| taps_f[j] = taps_d[j]; |
| } |
| |
| g_free (taps_d); |
| scale->taps = taps_f; |
| } |
| |
| /* |
| * Calculates a set of taps for each destination element in gint32 |
| * format. Each set of taps sums to (very nearly) (1<<shift). A |
| * typical value for shift is 10 to 15, so that applying the taps to |
| * uint8 values and summing will fit in a (signed) int32. |
| */ |
| static void |
| scale1d_calculate_taps_int32 (Scale1D * scale, int src_size, int dest_size, |
| int n_taps, double a, double sharpness, double sharpen, int shift) |
| { |
| double *taps_d; |
| gint32 *taps_i; |
| int i; |
| int j; |
| double multiplier; |
| |
| scale1d_calculate_taps (scale, src_size, dest_size, n_taps, a, sharpness, |
| sharpen); |
| |
| taps_d = scale->taps; |
| taps_i = g_malloc (sizeof (gint32) * scale->n_taps * dest_size); |
| |
| multiplier = (1 << shift); |
| |
| for (j = 0; j < dest_size; j++) { |
| for (i = 0; i < n_taps; i++) { |
| taps_i[j * n_taps + i] = |
| floor (0.5 + taps_d[j * n_taps + i] * multiplier); |
| } |
| } |
| |
| g_free (taps_d); |
| scale->taps = taps_i; |
| } |
| |
| /* |
| * Calculates a set of taps for each destination element in gint16 |
| * format. Each set of taps sums to (1<<shift). A typical value |
| * for shift is 7, so that applying the taps to uint8 values and |
| * summing will fit in a (signed) int16. |
| */ |
| static void |
| scale1d_calculate_taps_int16 (Scale1D * scale, int src_size, int dest_size, |
| int n_taps, double a, double sharpness, double sharpen, int shift) |
| { |
| double *taps_d; |
| gint16 *taps_i; |
| int i; |
| int j; |
| double multiplier; |
| |
| scale1d_calculate_taps (scale, src_size, dest_size, n_taps, a, sharpness, |
| sharpen); |
| |
| taps_d = scale->taps; |
| taps_i = g_malloc (sizeof (gint16) * scale->n_taps * dest_size); |
| |
| multiplier = (1 << shift); |
| |
| /* Various methods for converting floating point taps to integer. |
| * The dB values are the SSIM value between scaling an image via |
| * the floating point pathway vs. the integer pathway using the |
| * given code to generate the taps. Only one image was tested, |
| * scaling from 1920x1080 to 640x360. Several variations of the |
| * methods were also tested, with nothing appearing useful. */ |
| #if 0 |
| /* Standard round to integer. This causes bad DC errors. */ |
| /* 44.588 dB */ |
| for (j = 0; j < dest_size; j++) { |
| for (i = 0; i < n_taps; i++) { |
| taps_i[j * n_taps + i] = |
| floor (0.5 + taps_d[j * n_taps + i] * multiplier); |
| } |
| } |
| #endif |
| #if 0 |
| /* Dithering via error propogation. Works pretty well, but |
| * really we want to propogate errors across rows, which would |
| * mean having several sets of tap arrays. Possible, but more work, |
| * and it may not even be better. */ |
| /* 57.0961 dB */ |
| { |
| double err = 0; |
| for (j = 0; j < dest_size; j++) { |
| for (i = 0; i < n_taps; i++) { |
| err += taps_d[j * n_taps + i] * multiplier; |
| taps_i[j * n_taps + i] = floor (err); |
| err -= floor (err); |
| } |
| } |
| } |
| #endif |
| #if 1 |
| /* Round to integer, but with an adjustable bias that we use to |
| * eliminate the DC error. This search method is a bit crude, and |
| * could perhaps be improved somewhat. */ |
| /* 60.4851 dB */ |
| for (j = 0; j < dest_size; j++) { |
| int k; |
| for (k = 0; k < 100; k++) { |
| int sum = 0; |
| double offset; |
| |
| offset = k * 0.01; |
| for (i = 0; i < n_taps; i++) { |
| taps_i[j * n_taps + i] = |
| floor (offset + taps_d[j * n_taps + i] * multiplier); |
| sum += taps_i[j * n_taps + i]; |
| } |
| |
| if (sum >= (1 << shift)) |
| break; |
| } |
| } |
| #endif |
| #if 0 |
| /* Round to integer, but adjust the multiplier. The search method is |
| * wrong a lot, but was sufficient enough to calculate dB error. */ |
| /* 58.6517 dB */ |
| for (j = 0; j < dest_size; j++) { |
| int k; |
| int sum = 0; |
| for (k = 0; k < 200; k++) { |
| sum = 0; |
| |
| multiplier = (1 << shift) - 1.0 + k * 0.01; |
| for (i = 0; i < n_taps; i++) { |
| taps_i[j * n_taps + i] = |
| floor (0.5 + taps_d[j * n_taps + i] * multiplier); |
| sum += taps_i[j * n_taps + i]; |
| } |
| |
| if (sum >= (1 << shift)) |
| break; |
| } |
| if (sum != (1 << shift)) { |
| GST_ERROR ("%g %d", multiplier, sum); |
| } |
| } |
| #endif |
| #if 0 |
| /* Round to integer, but subtract the error from the largest tap */ |
| /* 58.3677 dB */ |
| for (j = 0; j < dest_size; j++) { |
| int err = -multiplier; |
| for (i = 0; i < n_taps; i++) { |
| taps_i[j * n_taps + i] = |
| floor (0.5 + taps_d[j * n_taps + i] * multiplier); |
| err += taps_i[j * n_taps + i]; |
| } |
| if (taps_i[j * n_taps + (n_taps / 2 - 1)] > |
| taps_i[j * n_taps + (n_taps / 2)]) { |
| taps_i[j * n_taps + (n_taps / 2 - 1)] -= err; |
| } else { |
| taps_i[j * n_taps + (n_taps / 2)] -= err; |
| } |
| } |
| #endif |
| |
| g_free (taps_d); |
| scale->taps = taps_i; |
| } |
| |
| |
| void |
| vs_image_scale_lanczos_Y (const VSImage * dest, const VSImage * src, |
| uint8_t * tmpbuf, double sharpness, gboolean dither, int submethod, |
| double a, double sharpen) |
| { |
| switch (submethod) { |
| case 0: |
| default: |
| vs_image_scale_lanczos_Y_int16 (dest, src, tmpbuf, sharpness, dither, a, |
| sharpen); |
| break; |
| case 1: |
| vs_image_scale_lanczos_Y_int32 (dest, src, tmpbuf, sharpness, dither, a, |
| sharpen); |
| break; |
| case 2: |
| vs_image_scale_lanczos_Y_float (dest, src, tmpbuf, sharpness, dither, a, |
| sharpen); |
| break; |
| case 3: |
| vs_image_scale_lanczos_Y_double (dest, src, tmpbuf, sharpness, dither, a, |
| sharpen); |
| break; |
| } |
| } |
| |
| void |
| vs_image_scale_lanczos_AYUV (const VSImage * dest, const VSImage * src, |
| uint8_t * tmpbuf, double sharpness, gboolean dither, int submethod, |
| double a, double sharpen) |
| { |
| switch (submethod) { |
| case 0: |
| default: |
| vs_image_scale_lanczos_AYUV_int16 (dest, src, tmpbuf, sharpness, dither, |
| a, sharpen); |
| break; |
| case 1: |
| vs_image_scale_lanczos_AYUV_int32 (dest, src, tmpbuf, sharpness, dither, |
| a, sharpen); |
| break; |
| case 2: |
| vs_image_scale_lanczos_AYUV_float (dest, src, tmpbuf, sharpness, dither, |
| a, sharpen); |
| break; |
| case 3: |
| vs_image_scale_lanczos_AYUV_double (dest, src, tmpbuf, sharpness, dither, |
| a, sharpen); |
| break; |
| } |
| } |
| |
| void |
| vs_image_scale_lanczos_AYUV64 (const VSImage * dest, const VSImage * src, |
| uint8_t * tmpbuf, double sharpness, gboolean dither, int submethod, |
| double a, double sharpen) |
| { |
| vs_image_scale_lanczos_AYUV64_double (dest, src, tmpbuf, sharpness, dither, |
| a, sharpen); |
| } |
| |
| |
| |
| #define RESAMPLE_HORIZ_FLOAT(function, dest_type, tap_type, src_type, _n_taps) \ |
| static void \ |
| function (dest_type *dest, const gint32 *offsets, \ |
| const tap_type *taps, const src_type *src, int n_taps, int shift, int n) \ |
| { \ |
| int i; \ |
| int k; \ |
| dest_type sum; \ |
| const src_type *srcline; \ |
| const tap_type *tapsline; \ |
| for (i = 0; i < n; i++) { \ |
| srcline = src + offsets[i]; \ |
| tapsline = taps + i * _n_taps; \ |
| sum = 0; \ |
| for (k = 0; k < _n_taps; k++) { \ |
| sum += srcline[k] * tapsline[k]; \ |
| } \ |
| dest[i] = sum; \ |
| } \ |
| } |
| |
| #define RESAMPLE_HORIZ(function, dest_type, tap_type, src_type, _n_taps, _shift) \ |
| static void \ |
| function (dest_type *dest, const gint32 *offsets, \ |
| const tap_type *taps, const src_type *src, int n_taps, int shift, int n) \ |
| { \ |
| int i; \ |
| int k; \ |
| dest_type sum; \ |
| const src_type *srcline; \ |
| const tap_type *tapsline; \ |
| int offset; \ |
| if (_shift > 0) offset = (1<<_shift)>>1; \ |
| else offset = 0; \ |
| for (i = 0; i < n; i++) { \ |
| srcline = src + offsets[i]; \ |
| tapsline = taps + i * _n_taps; \ |
| sum = 0; \ |
| for (k = 0; k < _n_taps; k++) { \ |
| sum += srcline[k] * tapsline[k]; \ |
| } \ |
| dest[i] = (sum + offset) >> _shift; \ |
| } \ |
| } |
| |
| #define RESAMPLE_HORIZ_AYUV_FLOAT(function, dest_type, tap_type, src_type, _n_taps) \ |
| static void \ |
| function (dest_type *dest, const gint32 *offsets, \ |
| const tap_type *taps, const src_type *src, int n_taps, int shift, int n) \ |
| { \ |
| int i; \ |
| int k; \ |
| dest_type sum1; \ |
| dest_type sum2; \ |
| dest_type sum3; \ |
| dest_type sum4; \ |
| const src_type *srcline; \ |
| const tap_type *tapsline; \ |
| for (i = 0; i < n; i++) { \ |
| srcline = src + 4*offsets[i]; \ |
| tapsline = taps + i * _n_taps; \ |
| sum1 = 0; \ |
| sum2 = 0; \ |
| sum3 = 0; \ |
| sum4 = 0; \ |
| for (k = 0; k < _n_taps; k++) { \ |
| sum1 += srcline[k*4+0] * tapsline[k]; \ |
| sum2 += srcline[k*4+1] * tapsline[k]; \ |
| sum3 += srcline[k*4+2] * tapsline[k]; \ |
| sum4 += srcline[k*4+3] * tapsline[k]; \ |
| } \ |
| dest[i*4+0] = sum1; \ |
| dest[i*4+1] = sum2; \ |
| dest[i*4+2] = sum3; \ |
| dest[i*4+3] = sum4; \ |
| } \ |
| } |
| |
| #define RESAMPLE_HORIZ_AYUV(function, dest_type, tap_type, src_type, _n_taps, _shift) \ |
| static void \ |
| function (dest_type *dest, const gint32 *offsets, \ |
| const tap_type *taps, const src_type *src, int n_taps, int shift, int n) \ |
| { \ |
| int i; \ |
| int k; \ |
| dest_type sum1; \ |
| dest_type sum2; \ |
| dest_type sum3; \ |
| dest_type sum4; \ |
| const src_type *srcline; \ |
| const tap_type *tapsline; \ |
| int offset; \ |
| if (_shift > 0) offset = (1<<_shift)>>1; \ |
| else offset = 0; \ |
| for (i = 0; i < n; i++) { \ |
| srcline = src + 4*offsets[i]; \ |
| tapsline = taps + i * _n_taps; \ |
| sum1 = 0; \ |
| sum2 = 0; \ |
| sum3 = 0; \ |
| sum4 = 0; \ |
| for (k = 0; k < _n_taps; k++) { \ |
| sum1 += srcline[k*4+0] * tapsline[k]; \ |
| sum2 += srcline[k*4+1] * tapsline[k]; \ |
| sum3 += srcline[k*4+2] * tapsline[k]; \ |
| sum4 += srcline[k*4+3] * tapsline[k]; \ |
| } \ |
| dest[i*4+0] = (sum1 + offset) >> _shift; \ |
| dest[i*4+1] = (sum2 + offset) >> _shift; \ |
| dest[i*4+2] = (sum3 + offset) >> _shift; \ |
| dest[i*4+3] = (sum4 + offset) >> _shift; \ |
| } \ |
| } |
| |
| /* *INDENT-OFF* */ |
| RESAMPLE_HORIZ_FLOAT (resample_horiz_double_u8_generic, double, double, |
| guint8, n_taps) |
| RESAMPLE_HORIZ_FLOAT (resample_horiz_float_u8_generic, float, float, |
| guint8, n_taps) |
| RESAMPLE_HORIZ_AYUV_FLOAT (resample_horiz_double_ayuv_generic, double, double, |
| guint8, n_taps) |
| RESAMPLE_HORIZ_AYUV_FLOAT (resample_horiz_float_ayuv_generic, float, float, |
| guint8, n_taps) |
| |
| RESAMPLE_HORIZ_AYUV_FLOAT (resample_horiz_double_ayuv_generic_s16, double, double, |
| guint16, n_taps) |
| |
| RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_generic, gint32, gint32, |
| guint8, n_taps, shift) |
| RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_generic, gint16, gint16, |
| guint8, n_taps, shift) |
| RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_generic, gint32, gint32, |
| guint8, n_taps, shift) |
| RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_generic, gint16, gint16, |
| guint8, n_taps, shift) |
| |
| /* Candidates for orcification */ |
| RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_taps16_shift0, gint32, gint32, |
| guint8, 16, 0) |
| RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_taps12_shift0, gint32, gint32, |
| guint8, 12, 0) |
| RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_taps8_shift0, gint32, gint32, |
| guint8, 8, 0) |
| RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_taps4_shift0, gint32, gint32, |
| guint8, 4, 0) |
| RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_taps16_shift0, gint16, gint16, |
| guint8, 16, 0) |
| RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_taps12_shift0, gint16, gint16, |
| guint8, 12, 0) |
| RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_taps8_shift0, gint16, gint16, |
| guint8, 8, 0) |
| RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_taps4_shift0, gint16, gint16, |
| guint8, 4, 0) |
| |
| RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_taps16_shift0, gint32, gint32, |
| guint8, 16, 0) |
| RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_taps12_shift0, gint32, gint32, |
| guint8, 12, 0) |
| RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_taps8_shift0, gint32, gint32, |
| guint8, 8, 0) |
| RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_taps4_shift0, gint32, gint32, |
| guint8, 4, 0) |
| RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_taps16_shift0, gint16, gint16, |
| guint8, 16, 0) |
| RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_taps12_shift0, gint16, gint16, |
| guint8, 12, 0) |
| RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_taps8_shift0, gint16, gint16, |
| guint8, 8, 0) |
| RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_taps4_shift0, gint16, gint16, |
| guint8, 4, 0) |
| /* *INDENT-ON* */ |
| |
| #define RESAMPLE_VERT(function, tap_type, src_type, _n_taps, _shift) \ |
| static void \ |
| function (guint8 *dest, \ |
| const tap_type *taps, const src_type *src, int stride, int n_taps, \ |
| int shift, int n) \ |
| { \ |
| int i; \ |
| int l; \ |
| gint32 sum_y; \ |
| gint32 offset = (1<<_shift) >> 1; \ |
| for (i = 0; i < n; i++) { \ |
| sum_y = 0; \ |
| for (l = 0; l < n_taps; l++) { \ |
| const src_type *line = PTR_OFFSET(src, stride * l); \ |
| sum_y += line[i] * taps[l]; \ |
| } \ |
| dest[i] = CLAMP ((sum_y + offset) >> _shift, 0, 255); \ |
| } \ |
| } |
| |
| #define RESAMPLE_VERT_DITHER(function, tap_type, src_type, _n_taps, _shift) \ |
| static void \ |
| function (guint8 *dest, \ |
| const tap_type *taps, const src_type *src, int stride, int n_taps, \ |
| int shift, int n) \ |
| { \ |
| int i; \ |
| int l; \ |
| gint32 sum_y; \ |
| gint32 err_y = 0; \ |
| gint32 mask = (1<<_shift) - 1; \ |
| for (i = 0; i < n; i++) { \ |
| sum_y = 0; \ |
| for (l = 0; l < n_taps; l++) { \ |
| const src_type *line = PTR_OFFSET(src, stride * l); \ |
| sum_y += line[i] * taps[l]; \ |
| } \ |
| err_y += sum_y; \ |
| dest[i] = CLAMP (err_y >> _shift, 0, 255); \ |
| err_y &= mask; \ |
| } \ |
| } |
| |
| /* *INDENT-OFF* */ |
| RESAMPLE_VERT (resample_vert_int32_generic, gint32, gint32, n_taps, shift) |
| RESAMPLE_VERT_DITHER (resample_vert_dither_int32_generic, gint32, gint32, |
| n_taps, shift) |
| RESAMPLE_VERT (resample_vert_int16_generic, gint16, gint16, n_taps, shift); |
| RESAMPLE_VERT_DITHER (resample_vert_dither_int16_generic, gint16, gint16, |
| n_taps, shift) |
| /* *INDENT-ON* */ |
| |
| #define RESAMPLE_VERT_FLOAT(function, dest_type, clamp, tap_type, src_type, _n_taps, _shift) \ |
| static void \ |
| function (dest_type *dest, \ |
| const tap_type *taps, const src_type *src, int stride, int n_taps, \ |
| int shift, int n) \ |
| { \ |
| int i; \ |
| int l; \ |
| src_type sum_y; \ |
| for (i = 0; i < n; i++) { \ |
| sum_y = 0; \ |
| for (l = 0; l < n_taps; l++) { \ |
| const src_type *line = PTR_OFFSET(src, stride * l); \ |
| sum_y += line[i] * taps[l]; \ |
| } \ |
| dest[i] = CLAMP (floor(0.5 + sum_y), 0, clamp); \ |
| } \ |
| } |
| |
| #define RESAMPLE_VERT_FLOAT_DITHER(function, dest_type, clamp, tap_type, src_type, _n_taps, _shift) \ |
| static void \ |
| function (dest_type *dest, \ |
| const tap_type *taps, const src_type *src, int stride, int n_taps, \ |
| int shift, int n) \ |
| { \ |
| int i; \ |
| int l; \ |
| src_type sum_y; \ |
| src_type err_y = 0; \ |
| for (i = 0; i < n; i++) { \ |
| sum_y = 0; \ |
| for (l = 0; l < n_taps; l++) { \ |
| const src_type *line = PTR_OFFSET(src, stride * l); \ |
| sum_y += line[i] * taps[l]; \ |
| } \ |
| err_y += sum_y; \ |
| dest[i] = CLAMP (floor (err_y), 0, clamp); \ |
| err_y -= floor (err_y); \ |
| } \ |
| } |
| |
| /* *INDENT-OFF* */ |
| RESAMPLE_VERT_FLOAT (resample_vert_double_generic, guint8, 255, double, double, n_taps, |
| shift) |
| RESAMPLE_VERT_FLOAT_DITHER (resample_vert_dither_double_generic, guint8, 255, double, double, |
| n_taps, shift) |
| |
| RESAMPLE_VERT_FLOAT (resample_vert_double_generic_u16, guint16, 65535, double, double, n_taps, |
| shift) |
| RESAMPLE_VERT_FLOAT_DITHER (resample_vert_dither_double_generic_u16, guint16, 65535, double, double, |
| n_taps, shift) |
| |
| RESAMPLE_VERT_FLOAT (resample_vert_float_generic, guint8, 255, float, float, n_taps, shift) |
| RESAMPLE_VERT_FLOAT_DITHER (resample_vert_dither_float_generic, guint8, 255, float, float, |
| n_taps, shift) |
| /* *INDENT-ON* */ |
| |
| #define S16_SHIFT1 7 |
| #define S16_SHIFT2 7 |
| #define S16_MIDSHIFT 0 |
| #define S16_POSTSHIFT (S16_SHIFT1+S16_SHIFT2-S16_MIDSHIFT) |
| |
| static void |
| vs_scale_lanczos_Y_int16 (Scale * scale) |
| { |
| int j; |
| int yi; |
| int tmp_yi; |
| |
| tmp_yi = 0; |
| |
| for (j = 0; j < scale->dest->height; j++) { |
| guint8 *destline; |
| gint16 *taps; |
| |
| destline = scale->dest->pixels + scale->dest->stride * j; |
| |
| yi = scale->y_scale1d.offsets[j]; |
| |
| while (tmp_yi < yi + scale->y_scale1d.n_taps) { |
| scale->horiz_resample_func (TMP_LINE_S16 (tmp_yi), |
| scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi), |
| scale->x_scale1d.n_taps, S16_MIDSHIFT, scale->dest->width); |
| tmp_yi++; |
| } |
| |
| taps = (gint16 *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps; |
| if (scale->dither) { |
| resample_vert_dither_int16_generic (destline, |
| taps, TMP_LINE_S16 (scale->y_scale1d.offsets[j]), |
| sizeof (gint16) * scale->dest->width, scale->y_scale1d.n_taps, |
| S16_POSTSHIFT, scale->dest->width); |
| } else { |
| resample_vert_int16_generic (destline, |
| taps, TMP_LINE_S16 (scale->y_scale1d.offsets[j]), |
| sizeof (gint16) * scale->dest->width, scale->y_scale1d.n_taps, |
| S16_POSTSHIFT, scale->dest->width); |
| } |
| } |
| } |
| |
| void |
| vs_image_scale_lanczos_Y_int16 (const VSImage * dest, const VSImage * src, |
| uint8_t * tmpbuf, double sharpness, gboolean dither, double a, |
| double sharpen) |
| { |
| Scale s = { 0 }; |
| Scale *scale = &s; |
| int n_taps; |
| |
| scale->dest = dest; |
| scale->src = src; |
| |
| n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness); |
| n_taps = ROUND_UP_4 (n_taps); |
| scale1d_calculate_taps_int16 (&scale->x_scale1d, |
| src->width, dest->width, n_taps, a, sharpness, sharpen, S16_SHIFT1); |
| |
| n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness); |
| scale1d_calculate_taps_int16 (&scale->y_scale1d, |
| src->height, dest->height, n_taps, a, sharpness, sharpen, S16_SHIFT2); |
| |
| scale->dither = dither; |
| |
| switch (scale->x_scale1d.n_taps) { |
| case 4: |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_int16_int16_u8_taps4_shift0; |
| break; |
| case 8: |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_int16_int16_u8_taps8_shift0; |
| break; |
| case 12: |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_int16_int16_u8_taps12_shift0; |
| break; |
| case 16: |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_int16_int16_u8_taps16_shift0; |
| break; |
| default: |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_int16_int16_u8_generic; |
| break; |
| } |
| |
| scale->tmpdata = |
| g_malloc (sizeof (gint16) * scale->dest->width * scale->src->height); |
| |
| vs_scale_lanczos_Y_int16 (scale); |
| |
| scale1d_cleanup (&scale->x_scale1d); |
| scale1d_cleanup (&scale->y_scale1d); |
| g_free (scale->tmpdata); |
| } |
| |
| |
| #define S32_SHIFT1 11 |
| #define S32_SHIFT2 11 |
| #define S32_MIDSHIFT 0 |
| #define S32_POSTSHIFT (S32_SHIFT1+S32_SHIFT2-S32_MIDSHIFT) |
| |
| static void |
| vs_scale_lanczos_Y_int32 (Scale * scale) |
| { |
| int j; |
| int yi; |
| int tmp_yi; |
| |
| tmp_yi = 0; |
| |
| for (j = 0; j < scale->dest->height; j++) { |
| guint8 *destline; |
| gint32 *taps; |
| |
| destline = scale->dest->pixels + scale->dest->stride * j; |
| |
| yi = scale->y_scale1d.offsets[j]; |
| |
| while (tmp_yi < yi + scale->y_scale1d.n_taps) { |
| scale->horiz_resample_func (TMP_LINE_S32 (tmp_yi), |
| scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi), |
| scale->x_scale1d.n_taps, S32_MIDSHIFT, scale->dest->width); |
| tmp_yi++; |
| } |
| |
| taps = (gint32 *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps; |
| if (scale->dither) { |
| resample_vert_dither_int32_generic (destline, |
| taps, TMP_LINE_S32 (scale->y_scale1d.offsets[j]), |
| sizeof (gint32) * scale->dest->width, |
| scale->y_scale1d.n_taps, S32_POSTSHIFT, scale->dest->width); |
| } else { |
| resample_vert_int32_generic (destline, |
| taps, TMP_LINE_S32 (scale->y_scale1d.offsets[j]), |
| sizeof (gint32) * scale->dest->width, |
| scale->y_scale1d.n_taps, S32_POSTSHIFT, scale->dest->width); |
| } |
| } |
| } |
| |
| void |
| vs_image_scale_lanczos_Y_int32 (const VSImage * dest, const VSImage * src, |
| uint8_t * tmpbuf, double sharpness, gboolean dither, double a, |
| double sharpen) |
| { |
| Scale s = { 0 }; |
| Scale *scale = &s; |
| int n_taps; |
| |
| scale->dest = dest; |
| scale->src = src; |
| |
| n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness); |
| n_taps = ROUND_UP_4 (n_taps); |
| scale1d_calculate_taps_int32 (&scale->x_scale1d, |
| src->width, dest->width, n_taps, a, sharpness, sharpen, S32_SHIFT1); |
| |
| n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness); |
| scale1d_calculate_taps_int32 (&scale->y_scale1d, |
| src->height, dest->height, n_taps, a, sharpness, sharpen, S32_SHIFT2); |
| |
| scale->dither = dither; |
| |
| switch (scale->x_scale1d.n_taps) { |
| case 4: |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_int32_int32_u8_taps4_shift0; |
| break; |
| case 8: |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_int32_int32_u8_taps8_shift0; |
| break; |
| case 12: |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_int32_int32_u8_taps12_shift0; |
| break; |
| case 16: |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_int32_int32_u8_taps16_shift0; |
| break; |
| default: |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_int32_int32_u8_generic; |
| break; |
| } |
| |
| scale->tmpdata = |
| g_malloc (sizeof (int32_t) * scale->dest->width * scale->src->height); |
| |
| vs_scale_lanczos_Y_int32 (scale); |
| |
| scale1d_cleanup (&scale->x_scale1d); |
| scale1d_cleanup (&scale->y_scale1d); |
| g_free (scale->tmpdata); |
| } |
| |
| static void |
| vs_scale_lanczos_Y_double (Scale * scale) |
| { |
| int j; |
| int yi; |
| int tmp_yi; |
| |
| tmp_yi = 0; |
| |
| for (j = 0; j < scale->dest->height; j++) { |
| guint8 *destline; |
| double *taps; |
| |
| destline = scale->dest->pixels + scale->dest->stride * j; |
| |
| yi = scale->y_scale1d.offsets[j]; |
| |
| while (tmp_yi < yi + scale->y_scale1d.n_taps) { |
| scale->horiz_resample_func (TMP_LINE_DOUBLE (tmp_yi), |
| scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi), |
| scale->x_scale1d.n_taps, 0, scale->dest->width); |
| tmp_yi++; |
| } |
| |
| taps = (double *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps; |
| if (scale->dither) { |
| resample_vert_dither_double_generic (destline, |
| taps, TMP_LINE_DOUBLE (scale->y_scale1d.offsets[j]), |
| sizeof (double) * scale->dest->width, |
| scale->y_scale1d.n_taps, 0, scale->dest->width); |
| } else { |
| resample_vert_double_generic (destline, |
| taps, TMP_LINE_DOUBLE (scale->y_scale1d.offsets[j]), |
| sizeof (double) * scale->dest->width, |
| scale->y_scale1d.n_taps, 0, scale->dest->width); |
| } |
| } |
| } |
| |
| void |
| vs_image_scale_lanczos_Y_double (const VSImage * dest, const VSImage * src, |
| uint8_t * tmpbuf, double sharpness, gboolean dither, double a, |
| double sharpen) |
| { |
| Scale s = { 0 }; |
| Scale *scale = &s; |
| int n_taps; |
| |
| scale->dest = dest; |
| scale->src = src; |
| |
| n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness); |
| scale1d_calculate_taps (&scale->x_scale1d, |
| src->width, dest->width, n_taps, a, sharpness, sharpen); |
| |
| n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness); |
| scale1d_calculate_taps (&scale->y_scale1d, |
| src->height, dest->height, n_taps, a, sharpness, sharpen); |
| |
| scale->dither = dither; |
| |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_double_u8_generic; |
| |
| scale->tmpdata = |
| g_malloc (sizeof (double) * scale->dest->width * scale->src->height); |
| |
| vs_scale_lanczos_Y_double (scale); |
| |
| scale1d_cleanup (&scale->x_scale1d); |
| scale1d_cleanup (&scale->y_scale1d); |
| g_free (scale->tmpdata); |
| } |
| |
| static void |
| vs_scale_lanczos_Y_float (Scale * scale) |
| { |
| int j; |
| int yi; |
| int tmp_yi; |
| |
| tmp_yi = 0; |
| |
| for (j = 0; j < scale->dest->height; j++) { |
| guint8 *destline; |
| float *taps; |
| |
| destline = scale->dest->pixels + scale->dest->stride * j; |
| |
| yi = scale->y_scale1d.offsets[j]; |
| |
| while (tmp_yi < yi + scale->y_scale1d.n_taps) { |
| scale->horiz_resample_func (TMP_LINE_FLOAT (tmp_yi), |
| scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi), |
| scale->x_scale1d.n_taps, 0, scale->dest->width); |
| tmp_yi++; |
| } |
| |
| taps = (float *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps; |
| if (scale->dither) { |
| resample_vert_dither_float_generic (destline, |
| taps, TMP_LINE_FLOAT (scale->y_scale1d.offsets[j]), |
| sizeof (float) * scale->dest->width, |
| scale->y_scale1d.n_taps, 0, scale->dest->width); |
| } else { |
| resample_vert_float_generic (destline, |
| taps, TMP_LINE_FLOAT (scale->y_scale1d.offsets[j]), |
| sizeof (float) * scale->dest->width, |
| scale->y_scale1d.n_taps, 0, scale->dest->width); |
| } |
| } |
| } |
| |
| void |
| vs_image_scale_lanczos_Y_float (const VSImage * dest, const VSImage * src, |
| uint8_t * tmpbuf, double sharpness, gboolean dither, double a, |
| double sharpen) |
| { |
| Scale s = { 0 }; |
| Scale *scale = &s; |
| int n_taps; |
| |
| scale->dest = dest; |
| scale->src = src; |
| |
| n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness); |
| scale1d_calculate_taps_float (&scale->x_scale1d, |
| src->width, dest->width, n_taps, a, sharpness, sharpen); |
| |
| n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness); |
| scale1d_calculate_taps_float (&scale->y_scale1d, |
| src->height, dest->height, n_taps, a, sharpness, sharpen); |
| |
| scale->dither = dither; |
| |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_float_u8_generic; |
| |
| scale->tmpdata = |
| g_malloc (sizeof (float) * scale->dest->width * scale->src->height); |
| |
| vs_scale_lanczos_Y_float (scale); |
| |
| scale1d_cleanup (&scale->x_scale1d); |
| scale1d_cleanup (&scale->y_scale1d); |
| g_free (scale->tmpdata); |
| } |
| |
| |
| |
| |
| |
| static void |
| vs_scale_lanczos_AYUV_int16 (Scale * scale) |
| { |
| int j; |
| int yi; |
| int tmp_yi; |
| |
| tmp_yi = 0; |
| |
| for (j = 0; j < scale->dest->height; j++) { |
| guint8 *destline; |
| gint16 *taps; |
| |
| destline = scale->dest->pixels + scale->dest->stride * j; |
| |
| yi = scale->y_scale1d.offsets[j]; |
| |
| while (tmp_yi < yi + scale->y_scale1d.n_taps) { |
| scale->horiz_resample_func (TMP_LINE_S16_AYUV (tmp_yi), |
| scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi), |
| scale->x_scale1d.n_taps, S16_MIDSHIFT, scale->dest->width); |
| tmp_yi++; |
| } |
| |
| taps = (gint16 *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps; |
| if (scale->dither) { |
| resample_vert_dither_int16_generic (destline, |
| taps, TMP_LINE_S16_AYUV (scale->y_scale1d.offsets[j]), |
| sizeof (gint16) * 4 * scale->dest->width, |
| scale->y_scale1d.n_taps, S16_POSTSHIFT, scale->dest->width * 4); |
| } else { |
| resample_vert_int16_generic (destline, |
| taps, TMP_LINE_S16_AYUV (scale->y_scale1d.offsets[j]), |
| sizeof (gint16) * 4 * scale->dest->width, |
| scale->y_scale1d.n_taps, S16_POSTSHIFT, scale->dest->width * 4); |
| } |
| } |
| } |
| |
| void |
| vs_image_scale_lanczos_AYUV_int16 (const VSImage * dest, const VSImage * src, |
| uint8_t * tmpbuf, double sharpness, gboolean dither, double a, |
| double sharpen) |
| { |
| Scale s = { 0 }; |
| Scale *scale = &s; |
| int n_taps; |
| |
| scale->dest = dest; |
| scale->src = src; |
| |
| n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness); |
| n_taps = ROUND_UP_4 (n_taps); |
| scale1d_calculate_taps_int16 (&scale->x_scale1d, |
| src->width, dest->width, n_taps, a, sharpness, sharpen, S16_SHIFT1); |
| |
| n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness); |
| scale1d_calculate_taps_int16 (&scale->y_scale1d, |
| src->height, dest->height, n_taps, a, sharpness, sharpen, S16_SHIFT2); |
| |
| scale->dither = dither; |
| |
| switch (scale->x_scale1d.n_taps) { |
| case 4: |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_int16_int16_ayuv_taps4_shift0; |
| break; |
| case 8: |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_int16_int16_ayuv_taps8_shift0; |
| break; |
| case 12: |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_int16_int16_ayuv_taps12_shift0; |
| break; |
| case 16: |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_int16_int16_ayuv_taps16_shift0; |
| break; |
| default: |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_int16_int16_ayuv_generic; |
| break; |
| } |
| |
| scale->tmpdata = |
| g_malloc (sizeof (gint16) * scale->dest->width * scale->src->height * 4); |
| |
| vs_scale_lanczos_AYUV_int16 (scale); |
| |
| scale1d_cleanup (&scale->x_scale1d); |
| scale1d_cleanup (&scale->y_scale1d); |
| g_free (scale->tmpdata); |
| } |
| |
| |
| static void |
| vs_scale_lanczos_AYUV_int32 (Scale * scale) |
| { |
| int j; |
| int yi; |
| int tmp_yi; |
| |
| tmp_yi = 0; |
| |
| for (j = 0; j < scale->dest->height; j++) { |
| guint8 *destline; |
| gint32 *taps; |
| |
| destline = scale->dest->pixels + scale->dest->stride * j; |
| |
| yi = scale->y_scale1d.offsets[j]; |
| |
| while (tmp_yi < yi + scale->y_scale1d.n_taps) { |
| scale->horiz_resample_func (TMP_LINE_S32_AYUV (tmp_yi), |
| scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi), |
| scale->x_scale1d.n_taps, S32_MIDSHIFT, scale->dest->width); |
| tmp_yi++; |
| } |
| |
| taps = (gint32 *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps; |
| if (scale->dither) { |
| resample_vert_dither_int32_generic (destline, |
| taps, TMP_LINE_S32_AYUV (scale->y_scale1d.offsets[j]), |
| sizeof (gint32) * 4 * scale->dest->width, scale->y_scale1d.n_taps, |
| S32_POSTSHIFT, scale->dest->width * 4); |
| } else { |
| resample_vert_int32_generic (destline, |
| taps, TMP_LINE_S32_AYUV (scale->y_scale1d.offsets[j]), |
| sizeof (gint32) * 4 * scale->dest->width, scale->y_scale1d.n_taps, |
| S32_POSTSHIFT, scale->dest->width * 4); |
| } |
| } |
| } |
| |
| void |
| vs_image_scale_lanczos_AYUV_int32 (const VSImage * dest, const VSImage * src, |
| uint8_t * tmpbuf, double sharpness, gboolean dither, double a, |
| double sharpen) |
| { |
| Scale s = { 0 }; |
| Scale *scale = &s; |
| int n_taps; |
| |
| scale->dest = dest; |
| scale->src = src; |
| |
| n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness); |
| n_taps = ROUND_UP_4 (n_taps); |
| scale1d_calculate_taps_int32 (&scale->x_scale1d, |
| src->width, dest->width, n_taps, a, sharpness, sharpen, S32_SHIFT1); |
| |
| n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness); |
| scale1d_calculate_taps_int32 (&scale->y_scale1d, |
| src->height, dest->height, n_taps, a, sharpness, sharpen, S32_SHIFT2); |
| |
| scale->dither = dither; |
| |
| switch (scale->x_scale1d.n_taps) { |
| case 4: |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_int32_int32_ayuv_taps4_shift0; |
| break; |
| case 8: |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_int32_int32_ayuv_taps8_shift0; |
| break; |
| case 12: |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_int32_int32_ayuv_taps12_shift0; |
| break; |
| case 16: |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_int32_int32_ayuv_taps16_shift0; |
| break; |
| default: |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_int32_int32_ayuv_generic; |
| break; |
| } |
| |
| scale->tmpdata = |
| g_malloc (sizeof (int32_t) * scale->dest->width * scale->src->height * 4); |
| |
| vs_scale_lanczos_AYUV_int32 (scale); |
| |
| scale1d_cleanup (&scale->x_scale1d); |
| scale1d_cleanup (&scale->y_scale1d); |
| g_free (scale->tmpdata); |
| } |
| |
| static void |
| vs_scale_lanczos_AYUV_double (Scale * scale) |
| { |
| int j; |
| int yi; |
| int tmp_yi; |
| |
| tmp_yi = 0; |
| |
| for (j = 0; j < scale->dest->height; j++) { |
| guint8 *destline; |
| double *taps; |
| |
| destline = scale->dest->pixels + scale->dest->stride * j; |
| |
| yi = scale->y_scale1d.offsets[j]; |
| |
| while (tmp_yi < yi + scale->y_scale1d.n_taps) { |
| scale->horiz_resample_func (TMP_LINE_DOUBLE_AYUV (tmp_yi), |
| scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi), |
| scale->x_scale1d.n_taps, 0, scale->dest->width); |
| tmp_yi++; |
| } |
| |
| taps = (double *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps; |
| if (scale->dither) { |
| resample_vert_dither_double_generic (destline, |
| taps, TMP_LINE_DOUBLE_AYUV (scale->y_scale1d.offsets[j]), |
| sizeof (double) * 4 * scale->dest->width, |
| scale->y_scale1d.n_taps, 0, scale->dest->width * 4); |
| } else { |
| resample_vert_double_generic (destline, |
| taps, TMP_LINE_DOUBLE_AYUV (scale->y_scale1d.offsets[j]), |
| sizeof (double) * 4 * scale->dest->width, |
| scale->y_scale1d.n_taps, 0, scale->dest->width * 4); |
| } |
| } |
| } |
| |
| void |
| vs_image_scale_lanczos_AYUV_double (const VSImage * dest, const VSImage * src, |
| uint8_t * tmpbuf, double sharpness, gboolean dither, double a, |
| double sharpen) |
| { |
| Scale s = { 0 }; |
| Scale *scale = &s; |
| int n_taps; |
| |
| scale->dest = dest; |
| scale->src = src; |
| |
| n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness); |
| scale1d_calculate_taps (&scale->x_scale1d, |
| src->width, dest->width, n_taps, a, sharpness, sharpen); |
| |
| n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness); |
| scale1d_calculate_taps (&scale->y_scale1d, |
| src->height, dest->height, n_taps, a, sharpness, sharpen); |
| |
| scale->dither = dither; |
| |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_double_ayuv_generic; |
| |
| scale->tmpdata = |
| g_malloc (sizeof (double) * scale->dest->width * scale->src->height * 4); |
| |
| vs_scale_lanczos_AYUV_double (scale); |
| |
| scale1d_cleanup (&scale->x_scale1d); |
| scale1d_cleanup (&scale->y_scale1d); |
| g_free (scale->tmpdata); |
| } |
| |
| static void |
| vs_scale_lanczos_AYUV_float (Scale * scale) |
| { |
| int j; |
| int yi; |
| int tmp_yi; |
| |
| tmp_yi = 0; |
| |
| for (j = 0; j < scale->dest->height; j++) { |
| guint8 *destline; |
| float *taps; |
| |
| destline = scale->dest->pixels + scale->dest->stride * j; |
| |
| yi = scale->y_scale1d.offsets[j]; |
| |
| while (tmp_yi < yi + scale->y_scale1d.n_taps) { |
| scale->horiz_resample_func (TMP_LINE_FLOAT_AYUV (tmp_yi), |
| scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi), |
| scale->x_scale1d.n_taps, 0, scale->dest->width); |
| tmp_yi++; |
| } |
| |
| taps = (float *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps; |
| if (scale->dither) { |
| resample_vert_dither_float_generic (destline, |
| taps, TMP_LINE_FLOAT_AYUV (scale->y_scale1d.offsets[j]), |
| sizeof (float) * 4 * scale->dest->width, scale->y_scale1d.n_taps, 0, |
| scale->dest->width * 4); |
| } else { |
| resample_vert_float_generic (destline, |
| taps, TMP_LINE_FLOAT_AYUV (scale->y_scale1d.offsets[j]), |
| sizeof (float) * 4 * scale->dest->width, scale->y_scale1d.n_taps, 0, |
| scale->dest->width * 4); |
| } |
| } |
| } |
| |
| void |
| vs_image_scale_lanczos_AYUV_float (const VSImage * dest, const VSImage * src, |
| uint8_t * tmpbuf, double sharpness, gboolean dither, double a, |
| double sharpen) |
| { |
| Scale s = { 0 }; |
| Scale *scale = &s; |
| int n_taps; |
| |
| scale->dest = dest; |
| scale->src = src; |
| |
| n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness); |
| scale1d_calculate_taps_float (&scale->x_scale1d, |
| src->width, dest->width, n_taps, a, sharpness, sharpen); |
| |
| n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness); |
| scale1d_calculate_taps_float (&scale->y_scale1d, |
| src->height, dest->height, n_taps, a, sharpness, sharpen); |
| |
| scale->dither = dither; |
| |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_float_ayuv_generic; |
| |
| scale->tmpdata = |
| g_malloc (sizeof (float) * scale->dest->width * scale->src->height * 4); |
| |
| vs_scale_lanczos_AYUV_float (scale); |
| |
| scale1d_cleanup (&scale->x_scale1d); |
| scale1d_cleanup (&scale->y_scale1d); |
| g_free (scale->tmpdata); |
| } |
| |
| static void |
| vs_scale_lanczos_AYUV64_double (Scale * scale) |
| { |
| int j; |
| int yi; |
| int tmp_yi; |
| |
| tmp_yi = 0; |
| |
| for (j = 0; j < scale->dest->height; j++) { |
| guint16 *destline; |
| double *taps; |
| |
| destline = (guint16 *) (scale->dest->pixels + scale->dest->stride * j); |
| |
| yi = scale->y_scale1d.offsets[j]; |
| |
| while (tmp_yi < yi + scale->y_scale1d.n_taps) { |
| scale->horiz_resample_func (TMP_LINE_DOUBLE_AYUV (tmp_yi), |
| scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi), |
| scale->x_scale1d.n_taps, 0, scale->dest->width); |
| tmp_yi++; |
| } |
| |
| taps = (double *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps; |
| if (scale->dither) { |
| resample_vert_dither_double_generic_u16 (destline, |
| taps, TMP_LINE_DOUBLE_AYUV (scale->y_scale1d.offsets[j]), |
| sizeof (double) * 4 * scale->dest->width, |
| scale->y_scale1d.n_taps, 0, scale->dest->width * 4); |
| } else { |
| resample_vert_double_generic_u16 (destline, |
| taps, TMP_LINE_DOUBLE_AYUV (scale->y_scale1d.offsets[j]), |
| sizeof (double) * 4 * scale->dest->width, |
| scale->y_scale1d.n_taps, 0, scale->dest->width * 4); |
| } |
| } |
| } |
| |
| void |
| vs_image_scale_lanczos_AYUV64_double (const VSImage * dest, const VSImage * src, |
| uint8_t * tmpbuf, double sharpness, gboolean dither, double a, |
| double sharpen) |
| { |
| Scale s = { 0 }; |
| Scale *scale = &s; |
| int n_taps; |
| |
| scale->dest = dest; |
| scale->src = src; |
| |
| n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness); |
| scale1d_calculate_taps (&scale->x_scale1d, |
| src->width, dest->width, n_taps, a, sharpness, sharpen); |
| |
| n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness); |
| scale1d_calculate_taps (&scale->y_scale1d, |
| src->height, dest->height, n_taps, a, sharpness, sharpen); |
| |
| scale->dither = dither; |
| |
| scale->horiz_resample_func = |
| (HorizResampleFunc) resample_horiz_double_ayuv_generic_s16; |
| |
| scale->tmpdata = |
| g_malloc (sizeof (double) * scale->dest->width * scale->src->height * 4); |
| |
| vs_scale_lanczos_AYUV64_double (scale); |
| |
| scale1d_cleanup (&scale->x_scale1d); |
| scale1d_cleanup (&scale->y_scale1d); |
| g_free (scale->tmpdata); |
| } |