You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
278 lines
7.0 KiB
278 lines
7.0 KiB
/*
|
|
* Image Library -- Image scaling algorithms
|
|
*
|
|
* (c) 2006 Pavel Charvat <pchar@ucw.cz>
|
|
*
|
|
* This software may be freely distributed and used according to the terms
|
|
* of the GNU Lesser General Public License.
|
|
*/
|
|
|
|
#undef LOCAL_DEBUG
|
|
|
|
#include <ucw/lib.h>
|
|
#include <images/images.h>
|
|
#include <images/error.h>
|
|
#include <images/math.h>
|
|
|
|
#include <string.h>
|
|
|
|
#ifdef __SSE2__
|
|
#include <emmintrin.h>
|
|
#endif
|
|
|
|
#define LINEAR_INTERPOLATE(a, b, t) (((int)((a) << 16) + (int)(t) * ((int)(b) - (int)(a)) + 0x8000) >> 16)
|
|
|
|
/* Generate optimized code for various pixel formats */
|
|
|
|
#define IMAGE_SCALE_PREFIX(x) image_scale_1_##x
|
|
#define IMAGE_SCALE_PIXEL_SIZE 1
|
|
#include <images/scale-gen.h>
|
|
|
|
#define IMAGE_SCALE_PREFIX(x) image_scale_2_##x
|
|
#define IMAGE_SCALE_PIXEL_SIZE 2
|
|
#include <images/scale-gen.h>
|
|
|
|
#define IMAGE_SCALE_PREFIX(x) image_scale_3_##x
|
|
#define IMAGE_SCALE_PIXEL_SIZE 3
|
|
#include <images/scale-gen.h>
|
|
|
|
#define IMAGE_SCALE_PREFIX(x) image_scale_4_##x
|
|
#define IMAGE_SCALE_PIXEL_SIZE 4
|
|
#include <images/scale-gen.h>
|
|
|
|
/* Simple "nearest neighbour" algorithm */
|
|
|
|
static void
|
|
image_scale_nearest_xy(struct image *dest, struct image *src)
|
|
{
|
|
switch (src->pixel_size)
|
|
{
|
|
case 1:
|
|
image_scale_1_nearest_xy(dest, src);
|
|
return;
|
|
case 2:
|
|
image_scale_2_nearest_xy(dest, src);
|
|
return;
|
|
case 3:
|
|
image_scale_3_nearest_xy(dest, src);
|
|
return;
|
|
case 4:
|
|
image_scale_4_nearest_xy(dest, src);
|
|
return;
|
|
default:
|
|
ASSERT(0);
|
|
}
|
|
}
|
|
|
|
static inline void
|
|
image_scale_nearest_x(struct image *dest, struct image *src)
|
|
{
|
|
image_scale_nearest_xy(dest, src);
|
|
}
|
|
|
|
static void
|
|
image_scale_nearest_y(struct image *dest, struct image *src)
|
|
{
|
|
uint y_inc = (src->rows << 16) / dest->rows;
|
|
uint y_pos = y_inc >> 1;
|
|
byte *dest_pos = dest->pixels;
|
|
for (uint row_counter = dest->rows; row_counter--; )
|
|
{
|
|
byte *src_pos = src->pixels + (y_pos >> 16) * src->row_size;
|
|
y_pos += y_inc;
|
|
memcpy(dest_pos, src_pos, dest->row_pixels_size);
|
|
dest_pos += dest->row_size;
|
|
}
|
|
}
|
|
|
|
/* Bilinear filter */
|
|
|
|
UNUSED static void
|
|
image_scale_linear_y(struct image *dest, struct image *src)
|
|
{
|
|
byte *dest_row = dest->pixels;
|
|
/* Handle problematic special case */
|
|
if (src->rows == 1)
|
|
{
|
|
for (uint y_counter = dest->rows; y_counter--; dest_row += dest->row_size)
|
|
memcpy(dest_row, src->pixels, src->row_pixels_size);
|
|
return;
|
|
}
|
|
/* Initialize the main loop */
|
|
uint y_inc = ((src->rows - 1) << 16) / (dest->rows - 1), y_pos = 0;
|
|
#ifdef __SSE2__
|
|
__m128i zero = _mm_setzero_si128();
|
|
#endif
|
|
/* Main loop */
|
|
for (uint y_counter = dest->rows; --y_counter; )
|
|
{
|
|
uint coef = y_pos & 0xffff;
|
|
byte *src_row_1 = src->pixels + (y_pos >> 16) * src->row_size;
|
|
byte *src_row_2 = src_row_1 + src->row_size;
|
|
uint i = 0;
|
|
#ifdef __SSE2__
|
|
/* SSE2 */
|
|
__m128i sse_coef = _mm_set1_epi16(coef >> 9);
|
|
for (; (int)i < (int)dest->row_pixels_size - 15; i += 16)
|
|
{
|
|
__m128i a2 = _mm_loadu_si128((__m128i *)(src_row_1 + i));
|
|
__m128i a1 = _mm_unpacklo_epi8(a2, zero);
|
|
a2 = _mm_unpackhi_epi8(a2, zero);
|
|
__m128i b2 = _mm_loadu_si128((__m128i *)(src_row_2 + i));
|
|
__m128i b1 = _mm_unpacklo_epi8(b2, zero);
|
|
b2 = _mm_unpackhi_epi8(b2, zero);
|
|
b1 = _mm_sub_epi16(b1, a1);
|
|
b2 = _mm_sub_epi16(b2, a2);
|
|
a1 = _mm_slli_epi16(a1, 7);
|
|
a2 = _mm_slli_epi16(a2, 7);
|
|
b1 = _mm_mullo_epi16(b1, sse_coef);
|
|
b2 = _mm_mullo_epi16(b2, sse_coef);
|
|
a1 = _mm_add_epi16(a1, b1);
|
|
a2 = _mm_add_epi16(a2, b2);
|
|
a1 = _mm_srli_epi16(a1, 7);
|
|
a2 = _mm_srli_epi16(a2, 7);
|
|
a1 = _mm_packus_epi16(a1, a2);
|
|
_mm_storeu_si128((__m128i *)(dest_row + i), a1);
|
|
}
|
|
#endif
|
|
/* Unrolled loop using general-purpose registers */
|
|
for (; (int)i < (int)dest->row_pixels_size - 3; i += 4)
|
|
{
|
|
dest_row[i + 0] = LINEAR_INTERPOLATE(src_row_1[i + 0], src_row_2[i + 0], coef);
|
|
dest_row[i + 1] = LINEAR_INTERPOLATE(src_row_1[i + 1], src_row_2[i + 1], coef);
|
|
dest_row[i + 2] = LINEAR_INTERPOLATE(src_row_1[i + 2], src_row_2[i + 2], coef);
|
|
dest_row[i + 3] = LINEAR_INTERPOLATE(src_row_1[i + 3], src_row_2[i + 3], coef);
|
|
}
|
|
/* Remaining columns */
|
|
for (; i < dest->row_pixels_size; i++)
|
|
dest_row[i] = LINEAR_INTERPOLATE(src_row_1[i], src_row_2[i], coef);
|
|
dest_row += dest->row_size;
|
|
y_pos += y_inc;
|
|
}
|
|
/* Always copy the last row - faster and also handle "y_pos == dest->rows * 0x10000" overflow */
|
|
memcpy(dest_row, src->pixels + src->image_size - src->row_size, src->row_pixels_size);
|
|
}
|
|
|
|
/* Box filter */
|
|
|
|
static void
|
|
image_scale_downsample_xy(struct image *dest, struct image *src)
|
|
{
|
|
switch (src->pixel_size)
|
|
{
|
|
case 1:
|
|
image_scale_1_downsample_xy(dest, src);
|
|
return;
|
|
case 2:
|
|
image_scale_2_downsample_xy(dest, src);
|
|
return;
|
|
case 3:
|
|
image_scale_3_downsample_xy(dest, src);
|
|
return;
|
|
case 4:
|
|
image_scale_4_downsample_xy(dest, src);
|
|
return;
|
|
default:
|
|
ASSERT(0);
|
|
}
|
|
}
|
|
|
|
/* General routine
|
|
* FIXME: customizable; implement at least bilinear and bicubic filters */
|
|
|
|
int
|
|
image_scale(struct image_context *ctx, struct image *dest, struct image *src)
|
|
{
|
|
if ((src->flags & IMAGE_PIXEL_FORMAT) != (dest->flags & IMAGE_PIXEL_FORMAT))
|
|
{
|
|
IMAGE_ERROR(ctx, IMAGE_ERROR_INVALID_PIXEL_FORMAT, "Different pixel formats not supported.");
|
|
return 0;
|
|
}
|
|
if (dest->cols == src->cols)
|
|
{
|
|
if (dest->rows == src->rows)
|
|
{
|
|
/* No scale, copy only */
|
|
image_scale_nearest_y(dest, src);
|
|
return 1;
|
|
}
|
|
else if (dest->rows < src->rows)
|
|
{
|
|
/* Downscale vertically */
|
|
image_scale_downsample_xy(dest, src);
|
|
return 1;
|
|
}
|
|
else
|
|
{
|
|
/* Upscale vertically */
|
|
image_scale_nearest_y(dest, src);
|
|
return 1;
|
|
}
|
|
}
|
|
else if (dest->rows == src->rows)
|
|
{
|
|
if (dest->cols < src->cols)
|
|
{
|
|
/* Downscale horizontally */
|
|
image_scale_downsample_xy(dest, src);
|
|
return 1;
|
|
}
|
|
else
|
|
{
|
|
/* Upscale horizontally */
|
|
image_scale_nearest_x(dest, src);
|
|
return 1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (dest->cols <= src->cols && dest->rows <= src->rows)
|
|
{
|
|
/* Downscale in both dimensions */
|
|
image_scale_downsample_xy(dest, src);
|
|
return 1;
|
|
}
|
|
else
|
|
{
|
|
image_scale_nearest_xy(dest, src);
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
image_dimensions_fit_to_box(uint *cols, uint *rows, uint max_cols, uint max_rows, uint upsample)
|
|
{
|
|
ASSERT(image_dimensions_valid(*cols, *rows));
|
|
ASSERT(image_dimensions_valid(max_cols, max_rows));
|
|
if (*cols <= max_cols && *rows <= max_rows)
|
|
{
|
|
if (!upsample)
|
|
return;
|
|
if (max_cols * *rows > max_rows * *cols)
|
|
{
|
|
*cols = *cols * max_rows / *rows;
|
|
*cols = MIN(*cols, max_cols);
|
|
*rows = max_rows;
|
|
}
|
|
else
|
|
{
|
|
*rows = *rows * max_cols / *cols;
|
|
*rows = MIN(*rows, max_rows);
|
|
*cols = max_cols;
|
|
}
|
|
}
|
|
else if (*cols <= max_cols)
|
|
goto down_cols;
|
|
else if (*rows <= max_rows || max_rows * *cols > max_cols * *rows)
|
|
goto down_rows;
|
|
down_cols:
|
|
*cols = *cols * max_rows / *rows;
|
|
*cols = MAX(*cols, 1);
|
|
*rows = max_rows;
|
|
return;
|
|
down_rows:
|
|
*rows = *rows * max_cols / *cols;
|
|
*rows = MAX(*rows, 1);
|
|
*cols = max_cols;
|
|
}
|
|
|