/* * Image Library -- Image scaling algorithms * * (c) 2006 Pavel Charvat * * This software may be freely distributed and used according to the terms * of the GNU Lesser General Public License. */ #undef LOCAL_DEBUG #include #include #include #include #include #ifdef __SSE2__ #include #endif #define LINEAR_INTERPOLATE(a, b, t) (((int)((a) << 16) + (int)(t) * ((int)(b) - (int)(a)) + 0x8000) >> 16) /* Generate optimized code for various pixel formats */ #define IMAGE_SCALE_PREFIX(x) image_scale_1_##x #define IMAGE_SCALE_PIXEL_SIZE 1 #include #define IMAGE_SCALE_PREFIX(x) image_scale_2_##x #define IMAGE_SCALE_PIXEL_SIZE 2 #include #define IMAGE_SCALE_PREFIX(x) image_scale_3_##x #define IMAGE_SCALE_PIXEL_SIZE 3 #include #define IMAGE_SCALE_PREFIX(x) image_scale_4_##x #define IMAGE_SCALE_PIXEL_SIZE 4 #include /* Simple "nearest neighbour" algorithm */ static void image_scale_nearest_xy(struct image *dest, struct image *src) { switch (src->pixel_size) { case 1: image_scale_1_nearest_xy(dest, src); return; case 2: image_scale_2_nearest_xy(dest, src); return; case 3: image_scale_3_nearest_xy(dest, src); return; case 4: image_scale_4_nearest_xy(dest, src); return; default: ASSERT(0); } } static inline void image_scale_nearest_x(struct image *dest, struct image *src) { image_scale_nearest_xy(dest, src); } static void image_scale_nearest_y(struct image *dest, struct image *src) { uint y_inc = (src->rows << 16) / dest->rows; uint y_pos = y_inc >> 1; byte *dest_pos = dest->pixels; for (uint row_counter = dest->rows; row_counter--; ) { byte *src_pos = src->pixels + (y_pos >> 16) * src->row_size; y_pos += y_inc; memcpy(dest_pos, src_pos, dest->row_pixels_size); dest_pos += dest->row_size; } } /* Bilinear filter */ UNUSED static void image_scale_linear_y(struct image *dest, struct image *src) { byte *dest_row = dest->pixels; /* Handle problematic special case */ if (src->rows == 1) { for (uint y_counter = dest->rows; y_counter--; dest_row += dest->row_size) memcpy(dest_row, src->pixels, src->row_pixels_size); return; } /* Initialize the main loop */ uint y_inc = ((src->rows - 1) << 16) / (dest->rows - 1), y_pos = 0; #ifdef __SSE2__ __m128i zero = _mm_setzero_si128(); #endif /* Main loop */ for (uint y_counter = dest->rows; --y_counter; ) { uint coef = y_pos & 0xffff; byte *src_row_1 = src->pixels + (y_pos >> 16) * src->row_size; byte *src_row_2 = src_row_1 + src->row_size; uint i = 0; #ifdef __SSE2__ /* SSE2 */ __m128i sse_coef = _mm_set1_epi16(coef >> 9); for (; (int)i < (int)dest->row_pixels_size - 15; i += 16) { __m128i a2 = _mm_loadu_si128((__m128i *)(src_row_1 + i)); __m128i a1 = _mm_unpacklo_epi8(a2, zero); a2 = _mm_unpackhi_epi8(a2, zero); __m128i b2 = _mm_loadu_si128((__m128i *)(src_row_2 + i)); __m128i b1 = _mm_unpacklo_epi8(b2, zero); b2 = _mm_unpackhi_epi8(b2, zero); b1 = _mm_sub_epi16(b1, a1); b2 = _mm_sub_epi16(b2, a2); a1 = _mm_slli_epi16(a1, 7); a2 = _mm_slli_epi16(a2, 7); b1 = _mm_mullo_epi16(b1, sse_coef); b2 = _mm_mullo_epi16(b2, sse_coef); a1 = _mm_add_epi16(a1, b1); a2 = _mm_add_epi16(a2, b2); a1 = _mm_srli_epi16(a1, 7); a2 = _mm_srli_epi16(a2, 7); a1 = _mm_packus_epi16(a1, a2); _mm_storeu_si128((__m128i *)(dest_row + i), a1); } #endif /* Unrolled loop using general-purpose registers */ for (; (int)i < (int)dest->row_pixels_size - 3; i += 4) { dest_row[i + 0] = LINEAR_INTERPOLATE(src_row_1[i + 0], src_row_2[i + 0], coef); dest_row[i + 1] = LINEAR_INTERPOLATE(src_row_1[i + 1], src_row_2[i + 1], coef); dest_row[i + 2] = LINEAR_INTERPOLATE(src_row_1[i + 2], src_row_2[i + 2], coef); dest_row[i + 3] = LINEAR_INTERPOLATE(src_row_1[i + 3], src_row_2[i + 3], coef); } /* Remaining columns */ for (; i < dest->row_pixels_size; i++) dest_row[i] = LINEAR_INTERPOLATE(src_row_1[i], src_row_2[i], coef); dest_row += dest->row_size; y_pos += y_inc; } /* Always copy the last row - faster and also handle "y_pos == dest->rows * 0x10000" overflow */ memcpy(dest_row, src->pixels + src->image_size - src->row_size, src->row_pixels_size); } /* Box filter */ static void image_scale_downsample_xy(struct image *dest, struct image *src) { switch (src->pixel_size) { case 1: image_scale_1_downsample_xy(dest, src); return; case 2: image_scale_2_downsample_xy(dest, src); return; case 3: image_scale_3_downsample_xy(dest, src); return; case 4: image_scale_4_downsample_xy(dest, src); return; default: ASSERT(0); } } /* General routine * FIXME: customizable; implement at least bilinear and bicubic filters */ int image_scale(struct image_context *ctx, struct image *dest, struct image *src) { if ((src->flags & IMAGE_PIXEL_FORMAT) != (dest->flags & IMAGE_PIXEL_FORMAT)) { IMAGE_ERROR(ctx, IMAGE_ERROR_INVALID_PIXEL_FORMAT, "Different pixel formats not supported."); return 0; } if (dest->cols == src->cols) { if (dest->rows == src->rows) { /* No scale, copy only */ image_scale_nearest_y(dest, src); return 1; } else if (dest->rows < src->rows) { /* Downscale vertically */ image_scale_downsample_xy(dest, src); return 1; } else { /* Upscale vertically */ image_scale_nearest_y(dest, src); return 1; } } else if (dest->rows == src->rows) { if (dest->cols < src->cols) { /* Downscale horizontally */ image_scale_downsample_xy(dest, src); return 1; } else { /* Upscale horizontally */ image_scale_nearest_x(dest, src); return 1; } } else { if (dest->cols <= src->cols && dest->rows <= src->rows) { /* Downscale in both dimensions */ image_scale_downsample_xy(dest, src); return 1; } else { image_scale_nearest_xy(dest, src); return 1; } } } void image_dimensions_fit_to_box(uint *cols, uint *rows, uint max_cols, uint max_rows, uint upsample) { ASSERT(image_dimensions_valid(*cols, *rows)); ASSERT(image_dimensions_valid(max_cols, max_rows)); if (*cols <= max_cols && *rows <= max_rows) { if (!upsample) return; if (max_cols * *rows > max_rows * *cols) { *cols = *cols * max_rows / *rows; *cols = MIN(*cols, max_cols); *rows = max_rows; } else { *rows = *rows * max_cols / *cols; *rows = MIN(*rows, max_rows); *cols = max_cols; } } else if (*cols <= max_cols) goto down_cols; else if (*rows <= max_rows || max_rows * *cols > max_cols * *rows) goto down_rows; down_cols: *cols = *cols * max_rows / *rows; *cols = MAX(*cols, 1); *rows = max_rows; return; down_rows: *rows = *rows * max_cols / *cols; *rows = MAX(*rows, 1); *cols = max_cols; }