Merge pull request #113963 from akien-mga/libwebp-1.6.0

libwebp: Update to 1.6.0
This commit is contained in:
Thaddeus Crews
2025-12-15 08:00:58 -06:00
139 changed files with 6462 additions and 4929 deletions

View File

@@ -635,7 +635,7 @@ Files extracted from upstream source:
## libwebp
- Upstream: https://chromium.googlesource.com/webm/libwebp/
- Version: 1.5.0 (a4d7a715337ded4451fec90ff8ce79728e04126c, 2024)
- Version: 1.6.0 (4fa21912338357f89e4fd51cf2368325b59e9bd9, 2025)
- License: BSD-3-Clause
Files extracted from upstream source:
@@ -645,9 +645,9 @@ Files extracted from upstream source:
Patches:
- `0001-msvc-node-debug-rename.patch`
- `0002-msvc-arm64-fpstrict.patch`
- `0003-clang-cl-sse2-sse41.patch`
- `0001-msvc-node-debug-rename.patch` ([GH-75769](https://github.com/godotengine/godot/pull/75769))
- `0002-msvc-arm64-fpstrict.patch` ([GH-94655](https://github.com/godotengine/godot/pull/94655))
- `0003-clang-cl-sse2-sse41-avx2.patch` ([GH-92316](https://github.com/godotengine/godot/pull/92316))
## linuxbsd_headers

View File

@@ -10,9 +10,11 @@ Contributors:
- Christian Duvivier (cduvivier at google dot com)
- Christopher Degawa (ccom at randomderp dot com)
- Clement Courbet (courbet at google dot com)
- devtools-clrobot at google dot com (devtools-clrobot@google dot com)
- Djordje Pesut (djordje dot pesut at imgtec dot com)
- Frank (1433351828 at qq dot com)
- Frank Barchard (fbarchard at google dot com)
- Henner Zeller (hzeller at google dot com)
- Hui Su (huisu at google dot com)
- H. Vetinari (h dot vetinari at gmx dot com)
- Ilya Kurdyukov (jpegqs at gmail dot com)

View File

@@ -1,8 +1,8 @@
diff --git a/thirdparty/libwebp/src/enc/quant_enc.c b/thirdparty/libwebp/src/enc/quant_enc.c
index 6d8202d277..302e8047f2 100644
index 4b8cb5e9be..440991f86d 100644
--- a/thirdparty/libwebp/src/enc/quant_enc.c
+++ b/thirdparty/libwebp/src/enc/quant_enc.c
@@ -556,6 +556,9 @@ static void AddScore(VP8ModeScore* WEBP_RESTRICT const dst,
@@ -562,6 +562,9 @@ static void AddScore(VP8ModeScore* WEBP_RESTRICT const dst,
//------------------------------------------------------------------------------
// Performs trellis-optimized quantization.

View File

@@ -1,8 +1,8 @@
diff --git a/thirdparty/libwebp/sharpyuv/sharpyuv_gamma.c b/thirdparty/libwebp/sharpyuv/sharpyuv_gamma.c
index 09028428ac..6f1a88bf1a 100644
index f72be4b89e..17c4b1940b 100644
--- a/thirdparty/libwebp/sharpyuv/sharpyuv_gamma.c
+++ b/thirdparty/libwebp/sharpyuv/sharpyuv_gamma.c
@@ -26,7 +26,11 @@ static uint32_t kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 2];
@@ -27,7 +27,11 @@ static uint32_t kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 2];
#define LINEAR_TO_GAMMA_TAB_SIZE (1 << LINEAR_TO_GAMMA_TAB_BITS)
static uint32_t kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 2];

View File

@@ -1,8 +1,8 @@
diff --git a/thirdparty/libwebp/src/dsp/cpu.h b/thirdparty/libwebp/src/dsp/cpu.h
index c86540f280..4dbe607aec 100644
index 7f87d7daaa..ef63219043 100644
--- a/thirdparty/libwebp/src/dsp/cpu.h
+++ b/thirdparty/libwebp/src/dsp/cpu.h
@@ -47,12 +47,12 @@
@@ -47,17 +47,17 @@
// x86 defines.
#if !defined(HAVE_CONFIG_H)
@@ -17,3 +17,9 @@ index c86540f280..4dbe607aec 100644
(defined(_M_X64) || defined(_M_IX86))
#define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets
#endif
-#if defined(_MSC_VER) && _MSC_VER >= 1700 && \
+#if defined(_MSC_VER) && !defined(__clang__) && _MSC_VER >= 1700 && \
(defined(_M_X64) || defined(_M_IX86))
#define WEBP_MSC_AVX2 // Visual C++ AVX2 targets
#endif

View File

@@ -19,10 +19,10 @@
#include <stdlib.h>
#include <string.h>
#include "src/webp/types.h"
#include "sharpyuv/sharpyuv_cpu.h"
#include "sharpyuv/sharpyuv_dsp.h"
#include "sharpyuv/sharpyuv_gamma.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------

View File

@@ -52,7 +52,7 @@ extern "C" {
// SharpYUV API version following the convention from semver.org
#define SHARPYUV_VERSION_MAJOR 0
#define SHARPYUV_VERSION_MINOR 4
#define SHARPYUV_VERSION_PATCH 1
#define SHARPYUV_VERSION_PATCH 2
// Version as a uint32_t. The major number is the high 8 bits.
// The minor number is the middle 8 bits. The patch number is the low 16 bits.
#define SHARPYUV_MAKE_VERSION(MAJOR, MINOR, PATCH) \

View File

@@ -15,6 +15,8 @@
#include <math.h>
#include <stddef.h>
#include "sharpyuv/sharpyuv.h"
static int ToFixed16(float f) { return (int)floor(f * (1 << 16) + 0.5f); }
void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space,

View File

@@ -17,6 +17,7 @@
#include <stdlib.h>
#include "sharpyuv/sharpyuv_cpu.h"
#include "src/dsp/cpu.h"
#include "src/webp/types.h"
//-----------------------------------------------------------------------------

View File

@@ -15,6 +15,7 @@
#include <float.h>
#include <math.h>
#include "sharpyuv/sharpyuv.h"
#include "src/webp/types.h"
// Gamma correction compensates loss of resolution during chroma subsampling.

View File

@@ -14,9 +14,13 @@
#include "sharpyuv/sharpyuv_dsp.h"
#if defined(WEBP_USE_SSE2)
#include <stdlib.h>
#include <emmintrin.h>
#include <stdlib.h>
#include "src/dsp/cpu.h"
#include "src/webp/types.h"
static uint16_t clip_SSE2(int v, int max) {
return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
}

View File

@@ -11,14 +11,18 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include <assert.h>
#include <stdlib.h>
#include "src/dec/alphai_dec.h"
#include "src/dec/vp8_dec.h"
#include "src/dec/vp8i_dec.h"
#include "src/dec/vp8li_dec.h"
#include "src/dec/webpi_dec.h"
#include "src/dsp/dsp.h"
#include "src/utils/quant_levels_dec_utils.h"
#include "src/utils/utils.h"
#include "src/webp/decode.h"
#include "src/webp/format_constants.h"
#include "src/webp/types.h"
@@ -34,8 +38,8 @@ WEBP_NODISCARD static ALPHDecoder* ALPHNew(void) {
// Clears and deallocates an alpha decoder instance.
static void ALPHDelete(ALPHDecoder* const dec) {
if (dec != NULL) {
VP8LDelete(dec->vp8l_dec_);
dec->vp8l_dec_ = NULL;
VP8LDelete(dec->vp8l_dec);
dec->vp8l_dec = NULL;
WebPSafeFree(dec);
}
}
@@ -54,28 +58,28 @@ WEBP_NODISCARD static int ALPHInit(ALPHDecoder* const dec, const uint8_t* data,
const uint8_t* const alpha_data = data + ALPHA_HEADER_LEN;
const size_t alpha_data_size = data_size - ALPHA_HEADER_LEN;
int rsrv;
VP8Io* const io = &dec->io_;
VP8Io* const io = &dec->io;
assert(data != NULL && output != NULL && src_io != NULL);
VP8FiltersInit();
dec->output_ = output;
dec->width_ = src_io->width;
dec->height_ = src_io->height;
assert(dec->width_ > 0 && dec->height_ > 0);
dec->output = output;
dec->width = src_io->width;
dec->height = src_io->height;
assert(dec->width > 0 && dec->height > 0);
if (data_size <= ALPHA_HEADER_LEN) {
return 0;
}
dec->method_ = (data[0] >> 0) & 0x03;
dec->filter_ = (WEBP_FILTER_TYPE)((data[0] >> 2) & 0x03);
dec->pre_processing_ = (data[0] >> 4) & 0x03;
dec->method = (data[0] >> 0) & 0x03;
dec->filter = (WEBP_FILTER_TYPE)((data[0] >> 2) & 0x03);
dec->pre_processing = (data[0] >> 4) & 0x03;
rsrv = (data[0] >> 6) & 0x03;
if (dec->method_ < ALPHA_NO_COMPRESSION ||
dec->method_ > ALPHA_LOSSLESS_COMPRESSION ||
dec->filter_ >= WEBP_FILTER_LAST ||
dec->pre_processing_ > ALPHA_PREPROCESSED_LEVELS ||
if (dec->method < ALPHA_NO_COMPRESSION ||
dec->method > ALPHA_LOSSLESS_COMPRESSION ||
dec->filter >= WEBP_FILTER_LAST ||
dec->pre_processing > ALPHA_PREPROCESSED_LEVELS ||
rsrv != 0) {
return 0;
}
@@ -96,11 +100,11 @@ WEBP_NODISCARD static int ALPHInit(ALPHDecoder* const dec, const uint8_t* data,
io->crop_bottom = src_io->crop_bottom;
// No need to copy the scaling parameters.
if (dec->method_ == ALPHA_NO_COMPRESSION) {
const size_t alpha_decoded_size = dec->width_ * dec->height_;
if (dec->method == ALPHA_NO_COMPRESSION) {
const size_t alpha_decoded_size = dec->width * dec->height;
ok = (alpha_data_size >= alpha_decoded_size);
} else {
assert(dec->method_ == ALPHA_LOSSLESS_COMPRESSION);
assert(dec->method == ALPHA_LOSSLESS_COMPRESSION);
ok = VP8LDecodeAlphaHeader(dec, alpha_data, alpha_data_size);
}
@@ -113,32 +117,32 @@ WEBP_NODISCARD static int ALPHInit(ALPHDecoder* const dec, const uint8_t* data,
// Returns false in case of bitstream error.
WEBP_NODISCARD static int ALPHDecode(VP8Decoder* const dec, int row,
int num_rows) {
ALPHDecoder* const alph_dec = dec->alph_dec_;
const int width = alph_dec->width_;
const int height = alph_dec->io_.crop_bottom;
if (alph_dec->method_ == ALPHA_NO_COMPRESSION) {
ALPHDecoder* const alph_dec = dec->alph_dec;
const int width = alph_dec->width;
const int height = alph_dec->io.crop_bottom;
if (alph_dec->method == ALPHA_NO_COMPRESSION) {
int y;
const uint8_t* prev_line = dec->alpha_prev_line_;
const uint8_t* deltas = dec->alpha_data_ + ALPHA_HEADER_LEN + row * width;
uint8_t* dst = dec->alpha_plane_ + row * width;
assert(deltas <= &dec->alpha_data_[dec->alpha_data_size_]);
assert(WebPUnfilters[alph_dec->filter_] != NULL);
const uint8_t* prev_line = dec->alpha_prev_line;
const uint8_t* deltas = dec->alpha_data + ALPHA_HEADER_LEN + row * width;
uint8_t* dst = dec->alpha_plane + row * width;
assert(deltas <= &dec->alpha_data[dec->alpha_data_size]);
assert(WebPUnfilters[alph_dec->filter] != NULL);
for (y = 0; y < num_rows; ++y) {
WebPUnfilters[alph_dec->filter_](prev_line, deltas, dst, width);
WebPUnfilters[alph_dec->filter](prev_line, deltas, dst, width);
prev_line = dst;
dst += width;
deltas += width;
}
dec->alpha_prev_line_ = prev_line;
} else { // alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION
assert(alph_dec->vp8l_dec_ != NULL);
dec->alpha_prev_line = prev_line;
} else { // alph_dec->method == ALPHA_LOSSLESS_COMPRESSION
assert(alph_dec->vp8l_dec != NULL);
if (!VP8LDecodeAlphaImageStream(alph_dec, row + num_rows)) {
return 0;
}
}
if (row + num_rows >= height) {
dec->is_alpha_decoded_ = 1;
dec->is_alpha_decoded = 1;
}
return 1;
}
@@ -148,25 +152,25 @@ WEBP_NODISCARD static int AllocateAlphaPlane(VP8Decoder* const dec,
const int stride = io->width;
const int height = io->crop_bottom;
const uint64_t alpha_size = (uint64_t)stride * height;
assert(dec->alpha_plane_mem_ == NULL);
dec->alpha_plane_mem_ =
(uint8_t*)WebPSafeMalloc(alpha_size, sizeof(*dec->alpha_plane_));
if (dec->alpha_plane_mem_ == NULL) {
assert(dec->alpha_plane_mem == NULL);
dec->alpha_plane_mem =
(uint8_t*)WebPSafeMalloc(alpha_size, sizeof(*dec->alpha_plane));
if (dec->alpha_plane_mem == NULL) {
return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
"Alpha decoder initialization failed.");
}
dec->alpha_plane_ = dec->alpha_plane_mem_;
dec->alpha_prev_line_ = NULL;
dec->alpha_plane = dec->alpha_plane_mem;
dec->alpha_prev_line = NULL;
return 1;
}
void WebPDeallocateAlphaMemory(VP8Decoder* const dec) {
assert(dec != NULL);
WebPSafeFree(dec->alpha_plane_mem_);
dec->alpha_plane_mem_ = NULL;
dec->alpha_plane_ = NULL;
ALPHDelete(dec->alph_dec_);
dec->alph_dec_ = NULL;
WebPSafeFree(dec->alpha_plane_mem);
dec->alpha_plane_mem = NULL;
dec->alpha_plane = NULL;
ALPHDelete(dec->alph_dec);
dec->alph_dec = NULL;
}
//------------------------------------------------------------------------------
@@ -184,46 +188,46 @@ WEBP_NODISCARD const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
return NULL;
}
if (!dec->is_alpha_decoded_) {
if (dec->alph_dec_ == NULL) { // Initialize decoder.
dec->alph_dec_ = ALPHNew();
if (dec->alph_dec_ == NULL) {
if (!dec->is_alpha_decoded) {
if (dec->alph_dec == NULL) { // Initialize decoder.
dec->alph_dec = ALPHNew();
if (dec->alph_dec == NULL) {
VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
"Alpha decoder initialization failed.");
return NULL;
}
if (!AllocateAlphaPlane(dec, io)) goto Error;
if (!ALPHInit(dec->alph_dec_, dec->alpha_data_, dec->alpha_data_size_,
io, dec->alpha_plane_)) {
VP8LDecoder* const vp8l_dec = dec->alph_dec_->vp8l_dec_;
if (!ALPHInit(dec->alph_dec, dec->alpha_data, dec->alpha_data_size,
io, dec->alpha_plane)) {
VP8LDecoder* const vp8l_dec = dec->alph_dec->vp8l_dec;
VP8SetError(dec,
(vp8l_dec == NULL) ? VP8_STATUS_OUT_OF_MEMORY
: vp8l_dec->status_,
: vp8l_dec->status,
"Alpha decoder initialization failed.");
goto Error;
}
// if we allowed use of alpha dithering, check whether it's needed at all
if (dec->alph_dec_->pre_processing_ != ALPHA_PREPROCESSED_LEVELS) {
dec->alpha_dithering_ = 0; // disable dithering
if (dec->alph_dec->pre_processing != ALPHA_PREPROCESSED_LEVELS) {
dec->alpha_dithering = 0; // disable dithering
} else {
num_rows = height - row; // decode everything in one pass
}
}
assert(dec->alph_dec_ != NULL);
assert(dec->alph_dec != NULL);
assert(row + num_rows <= height);
if (!ALPHDecode(dec, row, num_rows)) goto Error;
if (dec->is_alpha_decoded_) { // finished?
ALPHDelete(dec->alph_dec_);
dec->alph_dec_ = NULL;
if (dec->alpha_dithering_ > 0) {
uint8_t* const alpha = dec->alpha_plane_ + io->crop_top * width
if (dec->is_alpha_decoded) { // finished?
ALPHDelete(dec->alph_dec);
dec->alph_dec = NULL;
if (dec->alpha_dithering > 0) {
uint8_t* const alpha = dec->alpha_plane + io->crop_top * width
+ io->crop_left;
if (!WebPDequantizeLevels(alpha,
io->crop_right - io->crop_left,
io->crop_bottom - io->crop_top,
width, dec->alpha_dithering_)) {
width, dec->alpha_dithering)) {
goto Error;
}
}
@@ -231,7 +235,7 @@ WEBP_NODISCARD const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
}
// Return a pointer to the current decoded row.
return dec->alpha_plane_ + row * width;
return dec->alpha_plane + row * width;
Error:
WebPDeallocateAlphaMemory(dec);

View File

@@ -14,7 +14,10 @@
#ifndef WEBP_DEC_ALPHAI_DEC_H_
#define WEBP_DEC_ALPHAI_DEC_H_
#include "src/dec/vp8_dec.h"
#include "src/webp/types.h"
#include "src/dec/webpi_dec.h"
#include "src/dsp/dsp.h"
#include "src/utils/filters_utils.h"
#ifdef __cplusplus
@@ -25,24 +28,24 @@ struct VP8LDecoder; // Defined in dec/vp8li.h.
typedef struct ALPHDecoder ALPHDecoder;
struct ALPHDecoder {
int width_;
int height_;
int method_;
WEBP_FILTER_TYPE filter_;
int pre_processing_;
struct VP8LDecoder* vp8l_dec_;
VP8Io io_;
int use_8b_decode_; // Although alpha channel requires only 1 byte per
int width;
int height;
int method;
WEBP_FILTER_TYPE filter;
int pre_processing;
struct VP8LDecoder* vp8l_dec;
VP8Io io;
int use_8b_decode; // Although alpha channel requires only 1 byte per
// pixel, sometimes VP8LDecoder may need to allocate
// 4 bytes per pixel internally during decode.
uint8_t* output_;
const uint8_t* prev_line_; // last output row (or NULL)
uint8_t* output;
const uint8_t* prev_line; // last output row (or NULL)
};
//------------------------------------------------------------------------------
// internal functions. Not public.
// Deallocate memory associated to dec->alpha_plane_ decoding
// Deallocate memory associated to dec->alpha_plane decoding
void WebPDeallocateAlphaMemory(VP8Decoder* const dec);
//------------------------------------------------------------------------------

View File

@@ -11,11 +11,16 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "src/dec/vp8i_dec.h"
#include "src/dec/webpi_dec.h"
#include "src/utils/rescaler_utils.h"
#include "src/utils/utils.h"
#include "src/webp/decode.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
// WebPDecBuffer
@@ -26,10 +31,9 @@ static const uint8_t kModeBpp[MODE_LAST] = {
4, 4, 4, 2, // pre-multiplied modes
1, 1 };
// Check that webp_csp_mode is within the bounds of WEBP_CSP_MODE.
// Convert to an integer to handle both the unsigned/signed enum cases
// without the need for casting to remove type limit warnings.
static int IsValidColorspace(int webp_csp_mode) {
int IsValidColorspace(int webp_csp_mode) {
return (webp_csp_mode >= MODE_RGB && webp_csp_mode < MODE_LAST);
}

View File

@@ -51,4 +51,7 @@ enum { MB_FEATURE_TREE_PROBS = 3,
NUM_PROBAS = 11
};
// Check that webp_csp_mode is within the bounds of WEBP_CSP_MODE.
int IsValidColorspace(int webp_csp_mode);
#endif // WEBP_DEC_COMMON_DEC_H_

View File

@@ -11,9 +11,20 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "src/dec/common_dec.h"
#include "src/dec/vp8_dec.h"
#include "src/dec/vp8i_dec.h"
#include "src/dec/webpi_dec.h"
#include "src/dsp/dsp.h"
#include "src/utils/random_utils.h"
#include "src/utils/thread_utils.h"
#include "src/utils/utils.h"
#include "src/webp/decode.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
// Main reconstruction function.
@@ -72,11 +83,11 @@ static void ReconstructRow(const VP8Decoder* const dec,
const VP8ThreadContext* ctx) {
int j;
int mb_x;
const int mb_y = ctx->mb_y_;
const int cache_id = ctx->id_;
uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
const int mb_y = ctx->mb_y;
const int cache_id = ctx->id;
uint8_t* const y_dst = dec->yuv_b + Y_OFF;
uint8_t* const u_dst = dec->yuv_b + U_OFF;
uint8_t* const v_dst = dec->yuv_b + V_OFF;
// Initialize left-most block.
for (j = 0; j < 16; ++j) {
@@ -99,8 +110,8 @@ static void ReconstructRow(const VP8Decoder* const dec,
}
// Reconstruct one row.
for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) {
const VP8MBData* const block = ctx->mb_data_ + mb_x;
for (mb_x = 0; mb_x < dec->mb_w; ++mb_x) {
const VP8MBData* const block = ctx->mb_data + mb_x;
// Rotate in the left samples from previously decoded block. We move four
// pixels at a time for alignment reason, and because of in-loop filter.
@@ -115,9 +126,9 @@ static void ReconstructRow(const VP8Decoder* const dec,
}
{
// bring top samples into the cache
VP8TopSamples* const top_yuv = dec->yuv_t_ + mb_x;
const int16_t* const coeffs = block->coeffs_;
uint32_t bits = block->non_zero_y_;
VP8TopSamples* const top_yuv = dec->yuv_t + mb_x;
const int16_t* const coeffs = block->coeffs;
uint32_t bits = block->non_zero_y;
int n;
if (mb_y > 0) {
@@ -127,11 +138,11 @@ static void ReconstructRow(const VP8Decoder* const dec,
}
// predict and add residuals
if (block->is_i4x4_) { // 4x4
if (block->is_i4x4) { // 4x4
uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);
if (mb_y > 0) {
if (mb_x >= dec->mb_w_ - 1) { // on rightmost border
if (mb_x >= dec->mb_w - 1) { // on rightmost border
memset(top_right, top_yuv[0].y[15], sizeof(*top_right));
} else {
memcpy(top_right, top_yuv[1].y, sizeof(*top_right));
@@ -143,11 +154,11 @@ static void ReconstructRow(const VP8Decoder* const dec,
// predict and add residuals for all 4x4 blocks in turn.
for (n = 0; n < 16; ++n, bits <<= 2) {
uint8_t* const dst = y_dst + kScan[n];
VP8PredLuma4[block->imodes_[n]](dst);
VP8PredLuma4[block->imodes[n]](dst);
DoTransform(bits, coeffs + n * 16, dst);
}
} else { // 16x16
const int pred_func = CheckMode(mb_x, mb_y, block->imodes_[0]);
const int pred_func = CheckMode(mb_x, mb_y, block->imodes[0]);
VP8PredLuma16[pred_func](y_dst);
if (bits != 0) {
for (n = 0; n < 16; ++n, bits <<= 2) {
@@ -157,8 +168,8 @@ static void ReconstructRow(const VP8Decoder* const dec,
}
{
// Chroma
const uint32_t bits_uv = block->non_zero_uv_;
const int pred_func = CheckMode(mb_x, mb_y, block->uvmode_);
const uint32_t bits_uv = block->non_zero_uv;
const int pred_func = CheckMode(mb_x, mb_y, block->uvmode);
VP8PredChroma8[pred_func](u_dst);
VP8PredChroma8[pred_func](v_dst);
DoUVTransform(bits_uv >> 0, coeffs + 16 * 16, u_dst);
@@ -166,25 +177,25 @@ static void ReconstructRow(const VP8Decoder* const dec,
}
// stash away top samples for next block
if (mb_y < dec->mb_h_ - 1) {
if (mb_y < dec->mb_h - 1) {
memcpy(top_yuv[0].y, y_dst + 15 * BPS, 16);
memcpy(top_yuv[0].u, u_dst + 7 * BPS, 8);
memcpy(top_yuv[0].v, v_dst + 7 * BPS, 8);
}
}
// Transfer reconstructed samples from yuv_b_ cache to final destination.
// Transfer reconstructed samples from yuv_b cache to final destination.
{
const int y_offset = cache_id * 16 * dec->cache_y_stride_;
const int uv_offset = cache_id * 8 * dec->cache_uv_stride_;
uint8_t* const y_out = dec->cache_y_ + mb_x * 16 + y_offset;
uint8_t* const u_out = dec->cache_u_ + mb_x * 8 + uv_offset;
uint8_t* const v_out = dec->cache_v_ + mb_x * 8 + uv_offset;
const int y_offset = cache_id * 16 * dec->cache_y_stride;
const int uv_offset = cache_id * 8 * dec->cache_uv_stride;
uint8_t* const y_out = dec->cache_y + mb_x * 16 + y_offset;
uint8_t* const u_out = dec->cache_u + mb_x * 8 + uv_offset;
uint8_t* const v_out = dec->cache_v + mb_x * 8 + uv_offset;
for (j = 0; j < 16; ++j) {
memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16);
memcpy(y_out + j * dec->cache_y_stride, y_dst + j * BPS, 16);
}
for (j = 0; j < 8; ++j) {
memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8);
memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8);
memcpy(u_out + j * dec->cache_uv_stride, u_dst + j * BPS, 8);
memcpy(v_out + j * dec->cache_uv_stride, v_dst + j * BPS, 8);
}
}
}
@@ -201,40 +212,40 @@ static void ReconstructRow(const VP8Decoder* const dec,
static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 };
static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
const VP8ThreadContext* const ctx = &dec->thread_ctx_;
const int cache_id = ctx->id_;
const int y_bps = dec->cache_y_stride_;
const VP8FInfo* const f_info = ctx->f_info_ + mb_x;
uint8_t* const y_dst = dec->cache_y_ + cache_id * 16 * y_bps + mb_x * 16;
const int ilevel = f_info->f_ilevel_;
const int limit = f_info->f_limit_;
const VP8ThreadContext* const ctx = &dec->thread_ctx;
const int cache_id = ctx->id;
const int y_bps = dec->cache_y_stride;
const VP8FInfo* const f_info = ctx->f_info + mb_x;
uint8_t* const y_dst = dec->cache_y + cache_id * 16 * y_bps + mb_x * 16;
const int ilevel = f_info->f_ilevel;
const int limit = f_info->f_limit;
if (limit == 0) {
return;
}
assert(limit >= 3);
if (dec->filter_type_ == 1) { // simple
if (dec->filter_type == 1) { // simple
if (mb_x > 0) {
VP8SimpleHFilter16(y_dst, y_bps, limit + 4);
}
if (f_info->f_inner_) {
if (f_info->f_inner) {
VP8SimpleHFilter16i(y_dst, y_bps, limit);
}
if (mb_y > 0) {
VP8SimpleVFilter16(y_dst, y_bps, limit + 4);
}
if (f_info->f_inner_) {
if (f_info->f_inner) {
VP8SimpleVFilter16i(y_dst, y_bps, limit);
}
} else { // complex
const int uv_bps = dec->cache_uv_stride_;
uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8;
uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8;
const int hev_thresh = f_info->hev_thresh_;
const int uv_bps = dec->cache_uv_stride;
uint8_t* const u_dst = dec->cache_u + cache_id * 8 * uv_bps + mb_x * 8;
uint8_t* const v_dst = dec->cache_v + cache_id * 8 * uv_bps + mb_x * 8;
const int hev_thresh = f_info->hev_thresh;
if (mb_x > 0) {
VP8HFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
VP8HFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
}
if (f_info->f_inner_) {
if (f_info->f_inner) {
VP8HFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);
VP8HFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);
}
@@ -242,7 +253,7 @@ static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
VP8VFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
VP8VFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
}
if (f_info->f_inner_) {
if (f_info->f_inner) {
VP8VFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);
VP8VFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);
}
@@ -252,9 +263,9 @@ static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
// Filter the decoded macroblock row (if needed)
static void FilterRow(const VP8Decoder* const dec) {
int mb_x;
const int mb_y = dec->thread_ctx_.mb_y_;
assert(dec->thread_ctx_.filter_row_);
for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) {
const int mb_y = dec->thread_ctx.mb_y;
assert(dec->thread_ctx.filter_row);
for (mb_x = dec->tl_mb_x; mb_x < dec->br_mb_x; ++mb_x) {
DoFilter(dec, mb_x, mb_y);
}
}
@@ -263,51 +274,51 @@ static void FilterRow(const VP8Decoder* const dec) {
// Precompute the filtering strength for each segment and each i4x4/i16x16 mode.
static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
if (dec->filter_type_ > 0) {
if (dec->filter_type > 0) {
int s;
const VP8FilterHeader* const hdr = &dec->filter_hdr_;
const VP8FilterHeader* const hdr = &dec->filter_hdr;
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
int i4x4;
// First, compute the initial level
int base_level;
if (dec->segment_hdr_.use_segment_) {
base_level = dec->segment_hdr_.filter_strength_[s];
if (!dec->segment_hdr_.absolute_delta_) {
base_level += hdr->level_;
if (dec->segment_hdr.use_segment) {
base_level = dec->segment_hdr.filter_strength[s];
if (!dec->segment_hdr.absolute_delta) {
base_level += hdr->level;
}
} else {
base_level = hdr->level_;
base_level = hdr->level;
}
for (i4x4 = 0; i4x4 <= 1; ++i4x4) {
VP8FInfo* const info = &dec->fstrengths_[s][i4x4];
VP8FInfo* const info = &dec->fstrengths[s][i4x4];
int level = base_level;
if (hdr->use_lf_delta_) {
level += hdr->ref_lf_delta_[0];
if (hdr->use_lf_delta) {
level += hdr->ref_lf_delta[0];
if (i4x4) {
level += hdr->mode_lf_delta_[0];
level += hdr->mode_lf_delta[0];
}
}
level = (level < 0) ? 0 : (level > 63) ? 63 : level;
if (level > 0) {
int ilevel = level;
if (hdr->sharpness_ > 0) {
if (hdr->sharpness_ > 4) {
if (hdr->sharpness > 0) {
if (hdr->sharpness > 4) {
ilevel >>= 2;
} else {
ilevel >>= 1;
}
if (ilevel > 9 - hdr->sharpness_) {
ilevel = 9 - hdr->sharpness_;
if (ilevel > 9 - hdr->sharpness) {
ilevel = 9 - hdr->sharpness;
}
}
if (ilevel < 1) ilevel = 1;
info->f_ilevel_ = ilevel;
info->f_limit_ = 2 * level + ilevel;
info->hev_thresh_ = (level >= 40) ? 2 : (level >= 15) ? 1 : 0;
info->f_ilevel = ilevel;
info->f_limit = 2 * level + ilevel;
info->hev_thresh = (level >= 40) ? 2 : (level >= 15) ? 1 : 0;
} else {
info->f_limit_ = 0; // no filtering
info->f_limit = 0; // no filtering
}
info->f_inner_ = i4x4;
info->f_inner = i4x4;
}
}
}
@@ -321,7 +332,7 @@ static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
#define DITHER_AMP_TAB_SIZE 12
static const uint8_t kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
// roughly, it's dqm->uv_mat_[1]
// roughly, it's dqm->uv_mat[1]
8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1
};
@@ -336,24 +347,24 @@ void VP8InitDithering(const WebPDecoderOptions* const options,
int s;
int all_amp = 0;
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
VP8QuantMatrix* const dqm = &dec->dqm_[s];
if (dqm->uv_quant_ < DITHER_AMP_TAB_SIZE) {
const int idx = (dqm->uv_quant_ < 0) ? 0 : dqm->uv_quant_;
dqm->dither_ = (f * kQuantToDitherAmp[idx]) >> 3;
VP8QuantMatrix* const dqm = &dec->dqm[s];
if (dqm->uv_quant < DITHER_AMP_TAB_SIZE) {
const int idx = (dqm->uv_quant < 0) ? 0 : dqm->uv_quant;
dqm->dither = (f * kQuantToDitherAmp[idx]) >> 3;
}
all_amp |= dqm->dither_;
all_amp |= dqm->dither;
}
if (all_amp != 0) {
VP8InitRandom(&dec->dithering_rg_, 1.0f);
dec->dither_ = 1;
VP8InitRandom(&dec->dithering_rg, 1.0f);
dec->dither = 1;
}
}
// potentially allow alpha dithering
dec->alpha_dithering_ = options->alpha_dithering_strength;
if (dec->alpha_dithering_ > 100) {
dec->alpha_dithering_ = 100;
} else if (dec->alpha_dithering_ < 0) {
dec->alpha_dithering_ = 0;
dec->alpha_dithering = options->alpha_dithering_strength;
if (dec->alpha_dithering > 100) {
dec->alpha_dithering = 100;
} else if (dec->alpha_dithering < 0) {
dec->alpha_dithering = 0;
}
}
}
@@ -370,17 +381,17 @@ static void Dither8x8(VP8Random* const rg, uint8_t* dst, int bps, int amp) {
static void DitherRow(VP8Decoder* const dec) {
int mb_x;
assert(dec->dither_);
for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) {
const VP8ThreadContext* const ctx = &dec->thread_ctx_;
const VP8MBData* const data = ctx->mb_data_ + mb_x;
const int cache_id = ctx->id_;
const int uv_bps = dec->cache_uv_stride_;
if (data->dither_ >= MIN_DITHER_AMP) {
uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8;
uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8;
Dither8x8(&dec->dithering_rg_, u_dst, uv_bps, data->dither_);
Dither8x8(&dec->dithering_rg_, v_dst, uv_bps, data->dither_);
assert(dec->dither);
for (mb_x = dec->tl_mb_x; mb_x < dec->br_mb_x; ++mb_x) {
const VP8ThreadContext* const ctx = &dec->thread_ctx;
const VP8MBData* const data = ctx->mb_data + mb_x;
const int cache_id = ctx->id;
const int uv_bps = dec->cache_uv_stride;
if (data->dither >= MIN_DITHER_AMP) {
uint8_t* const u_dst = dec->cache_u + cache_id * 8 * uv_bps + mb_x * 8;
uint8_t* const v_dst = dec->cache_v + cache_id * 8 * uv_bps + mb_x * 8;
Dither8x8(&dec->dithering_rg, u_dst, uv_bps, data->dither);
Dither8x8(&dec->dithering_rg, v_dst, uv_bps, data->dither);
}
}
}
@@ -403,29 +414,29 @@ static int FinishRow(void* arg1, void* arg2) {
VP8Decoder* const dec = (VP8Decoder*)arg1;
VP8Io* const io = (VP8Io*)arg2;
int ok = 1;
const VP8ThreadContext* const ctx = &dec->thread_ctx_;
const int cache_id = ctx->id_;
const int extra_y_rows = kFilterExtraRows[dec->filter_type_];
const int ysize = extra_y_rows * dec->cache_y_stride_;
const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_;
const int y_offset = cache_id * 16 * dec->cache_y_stride_;
const int uv_offset = cache_id * 8 * dec->cache_uv_stride_;
uint8_t* const ydst = dec->cache_y_ - ysize + y_offset;
uint8_t* const udst = dec->cache_u_ - uvsize + uv_offset;
uint8_t* const vdst = dec->cache_v_ - uvsize + uv_offset;
const int mb_y = ctx->mb_y_;
const VP8ThreadContext* const ctx = &dec->thread_ctx;
const int cache_id = ctx->id;
const int extra_y_rows = kFilterExtraRows[dec->filter_type];
const int ysize = extra_y_rows * dec->cache_y_stride;
const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride;
const int y_offset = cache_id * 16 * dec->cache_y_stride;
const int uv_offset = cache_id * 8 * dec->cache_uv_stride;
uint8_t* const ydst = dec->cache_y - ysize + y_offset;
uint8_t* const udst = dec->cache_u - uvsize + uv_offset;
uint8_t* const vdst = dec->cache_v - uvsize + uv_offset;
const int mb_y = ctx->mb_y;
const int is_first_row = (mb_y == 0);
const int is_last_row = (mb_y >= dec->br_mb_y_ - 1);
const int is_last_row = (mb_y >= dec->br_mb_y - 1);
if (dec->mt_method_ == 2) {
if (dec->mt_method == 2) {
ReconstructRow(dec, ctx);
}
if (ctx->filter_row_) {
if (ctx->filter_row) {
FilterRow(dec);
}
if (dec->dither_) {
if (dec->dither) {
DitherRow(dec);
}
@@ -438,9 +449,9 @@ static int FinishRow(void* arg1, void* arg2) {
io->u = udst;
io->v = vdst;
} else {
io->y = dec->cache_y_ + y_offset;
io->u = dec->cache_u_ + uv_offset;
io->v = dec->cache_v_ + uv_offset;
io->y = dec->cache_y + y_offset;
io->u = dec->cache_u + uv_offset;
io->v = dec->cache_v + uv_offset;
}
if (!is_last_row) {
@@ -449,9 +460,9 @@ static int FinishRow(void* arg1, void* arg2) {
if (y_end > io->crop_bottom) {
y_end = io->crop_bottom; // make sure we don't overflow on last row.
}
// If dec->alpha_data_ is not NULL, we have some alpha plane present.
// If dec->alpha_data is not NULL, we have some alpha plane present.
io->a = NULL;
if (dec->alpha_data_ != NULL && y_start < y_end) {
if (dec->alpha_data != NULL && y_start < y_end) {
io->a = VP8DecompressAlphaRows(dec, io, y_start, y_end - y_start);
if (io->a == NULL) {
return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
@@ -462,9 +473,9 @@ static int FinishRow(void* arg1, void* arg2) {
const int delta_y = io->crop_top - y_start;
y_start = io->crop_top;
assert(!(delta_y & 1));
io->y += dec->cache_y_stride_ * delta_y;
io->u += dec->cache_uv_stride_ * (delta_y >> 1);
io->v += dec->cache_uv_stride_ * (delta_y >> 1);
io->y += dec->cache_y_stride * delta_y;
io->u += dec->cache_uv_stride * (delta_y >> 1);
io->v += dec->cache_uv_stride * (delta_y >> 1);
if (io->a != NULL) {
io->a += io->width * delta_y;
}
@@ -483,11 +494,11 @@ static int FinishRow(void* arg1, void* arg2) {
}
}
// rotate top samples if needed
if (cache_id + 1 == dec->num_caches_) {
if (cache_id + 1 == dec->num_caches) {
if (!is_last_row) {
memcpy(dec->cache_y_ - ysize, ydst + 16 * dec->cache_y_stride_, ysize);
memcpy(dec->cache_u_ - uvsize, udst + 8 * dec->cache_uv_stride_, uvsize);
memcpy(dec->cache_v_ - uvsize, vdst + 8 * dec->cache_uv_stride_, uvsize);
memcpy(dec->cache_y - ysize, ydst + 16 * dec->cache_y_stride, ysize);
memcpy(dec->cache_u - uvsize, udst + 8 * dec->cache_uv_stride, uvsize);
memcpy(dec->cache_v - uvsize, vdst + 8 * dec->cache_uv_stride, uvsize);
}
}
@@ -500,43 +511,43 @@ static int FinishRow(void* arg1, void* arg2) {
int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {
int ok = 1;
VP8ThreadContext* const ctx = &dec->thread_ctx_;
VP8ThreadContext* const ctx = &dec->thread_ctx;
const int filter_row =
(dec->filter_type_ > 0) &&
(dec->mb_y_ >= dec->tl_mb_y_) && (dec->mb_y_ <= dec->br_mb_y_);
if (dec->mt_method_ == 0) {
// ctx->id_ and ctx->f_info_ are already set
ctx->mb_y_ = dec->mb_y_;
ctx->filter_row_ = filter_row;
(dec->filter_type > 0) &&
(dec->mb_y >= dec->tl_mb_y) && (dec->mb_y <= dec->br_mb_y);
if (dec->mt_method == 0) {
// ctx->id and ctx->f_info are already set
ctx->mb_y = dec->mb_y;
ctx->filter_row = filter_row;
ReconstructRow(dec, ctx);
ok = FinishRow(dec, io);
} else {
WebPWorker* const worker = &dec->worker_;
WebPWorker* const worker = &dec->worker;
// Finish previous job *before* updating context
ok &= WebPGetWorkerInterface()->Sync(worker);
assert(worker->status_ == OK);
assert(worker->status == OK);
if (ok) { // spawn a new deblocking/output job
ctx->io_ = *io;
ctx->id_ = dec->cache_id_;
ctx->mb_y_ = dec->mb_y_;
ctx->filter_row_ = filter_row;
if (dec->mt_method_ == 2) { // swap macroblock data
VP8MBData* const tmp = ctx->mb_data_;
ctx->mb_data_ = dec->mb_data_;
dec->mb_data_ = tmp;
ctx->io = *io;
ctx->id = dec->cache_id;
ctx->mb_y = dec->mb_y;
ctx->filter_row = filter_row;
if (dec->mt_method == 2) { // swap macroblock data
VP8MBData* const tmp = ctx->mb_data;
ctx->mb_data = dec->mb_data;
dec->mb_data = tmp;
} else {
// perform reconstruction directly in main thread
ReconstructRow(dec, ctx);
}
if (filter_row) { // swap filter info
VP8FInfo* const tmp = ctx->f_info_;
ctx->f_info_ = dec->f_info_;
dec->f_info_ = tmp;
VP8FInfo* const tmp = ctx->f_info;
ctx->f_info = dec->f_info;
dec->f_info = tmp;
}
// (reconstruct)+filter in parallel
WebPGetWorkerInterface()->Launch(worker);
if (++dec->cache_id_ == dec->num_caches_) {
dec->cache_id_ = 0;
if (++dec->cache_id == dec->num_caches) {
dec->cache_id = 0;
}
}
}
@@ -551,12 +562,12 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
// Note: Afterward, we must call teardown() no matter what.
if (io->setup != NULL && !io->setup(io)) {
VP8SetError(dec, VP8_STATUS_USER_ABORT, "Frame setup failed");
return dec->status_;
return dec->status;
}
// Disable filtering per user request
if (io->bypass_filtering) {
dec->filter_type_ = 0;
dec->filter_type = 0;
}
// Define the area where we can skip in-loop filtering, in case of cropping.
@@ -569,29 +580,29 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
// top-left corner of the picture (MB #0). We must filter all the previous
// macroblocks.
{
const int extra_pixels = kFilterExtraRows[dec->filter_type_];
if (dec->filter_type_ == 2) {
const int extra_pixels = kFilterExtraRows[dec->filter_type];
if (dec->filter_type == 2) {
// For complex filter, we need to preserve the dependency chain.
dec->tl_mb_x_ = 0;
dec->tl_mb_y_ = 0;
dec->tl_mb_x = 0;
dec->tl_mb_y = 0;
} else {
// For simple filter, we can filter only the cropped region.
// We include 'extra_pixels' on the other side of the boundary, since
// vertical or horizontal filtering of the previous macroblock can
// modify some abutting pixels.
dec->tl_mb_x_ = (io->crop_left - extra_pixels) >> 4;
dec->tl_mb_y_ = (io->crop_top - extra_pixels) >> 4;
if (dec->tl_mb_x_ < 0) dec->tl_mb_x_ = 0;
if (dec->tl_mb_y_ < 0) dec->tl_mb_y_ = 0;
dec->tl_mb_x = (io->crop_left - extra_pixels) >> 4;
dec->tl_mb_y = (io->crop_top - extra_pixels) >> 4;
if (dec->tl_mb_x < 0) dec->tl_mb_x = 0;
if (dec->tl_mb_y < 0) dec->tl_mb_y = 0;
}
// We need some 'extra' pixels on the right/bottom.
dec->br_mb_y_ = (io->crop_bottom + 15 + extra_pixels) >> 4;
dec->br_mb_x_ = (io->crop_right + 15 + extra_pixels) >> 4;
if (dec->br_mb_x_ > dec->mb_w_) {
dec->br_mb_x_ = dec->mb_w_;
dec->br_mb_y = (io->crop_bottom + 15 + extra_pixels) >> 4;
dec->br_mb_x = (io->crop_right + 15 + extra_pixels) >> 4;
if (dec->br_mb_x > dec->mb_w) {
dec->br_mb_x = dec->mb_w;
}
if (dec->br_mb_y_ > dec->mb_h_) {
dec->br_mb_y_ = dec->mb_h_;
if (dec->br_mb_y > dec->mb_h) {
dec->br_mb_y = dec->mb_h;
}
}
PrecomputeFilterStrengths(dec);
@@ -600,8 +611,8 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {
int ok = 1;
if (dec->mt_method_ > 0) {
ok = WebPGetWorkerInterface()->Sync(&dec->worker_);
if (dec->mt_method > 0) {
ok = WebPGetWorkerInterface()->Sync(&dec->worker);
}
if (io->teardown != NULL) {
@@ -639,20 +650,20 @@ int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {
// Initialize multi/single-thread worker
static int InitThreadContext(VP8Decoder* const dec) {
dec->cache_id_ = 0;
if (dec->mt_method_ > 0) {
WebPWorker* const worker = &dec->worker_;
dec->cache_id = 0;
if (dec->mt_method > 0) {
WebPWorker* const worker = &dec->worker;
if (!WebPGetWorkerInterface()->Reset(worker)) {
return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
"thread initialization failed.");
}
worker->data1 = dec;
worker->data2 = (void*)&dec->thread_ctx_.io_;
worker->data2 = (void*)&dec->thread_ctx.io;
worker->hook = FinishRow;
dec->num_caches_ =
(dec->filter_type_ > 0) ? MT_CACHE_LINES : MT_CACHE_LINES - 1;
dec->num_caches =
(dec->filter_type > 0) ? MT_CACHE_LINES : MT_CACHE_LINES - 1;
} else {
dec->num_caches_ = ST_CACHE_LINES;
dec->num_caches = ST_CACHE_LINES;
}
return 1;
}
@@ -680,25 +691,25 @@ int VP8GetThreadMethod(const WebPDecoderOptions* const options,
// Memory setup
static int AllocateMemory(VP8Decoder* const dec) {
const int num_caches = dec->num_caches_;
const int mb_w = dec->mb_w_;
const int num_caches = dec->num_caches;
const int mb_w = dec->mb_w;
// Note: we use 'size_t' when there's no overflow risk, uint64_t otherwise.
const size_t intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t);
const size_t top_size = sizeof(VP8TopSamples) * mb_w;
const size_t mb_info_size = (mb_w + 1) * sizeof(VP8MB);
const size_t f_info_size =
(dec->filter_type_ > 0) ?
mb_w * (dec->mt_method_ > 0 ? 2 : 1) * sizeof(VP8FInfo)
(dec->filter_type > 0) ?
mb_w * (dec->mt_method > 0 ? 2 : 1) * sizeof(VP8FInfo)
: 0;
const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);
const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b);
const size_t mb_data_size =
(dec->mt_method_ == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data_);
(dec->mt_method == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data);
const size_t cache_height = (16 * num_caches
+ kFilterExtraRows[dec->filter_type_]) * 3 / 2;
+ kFilterExtraRows[dec->filter_type]) * 3 / 2;
const size_t cache_size = top_size * cache_height;
// alpha_size is the only one that scales as width x height.
const uint64_t alpha_size = (dec->alpha_data_ != NULL) ?
(uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL;
const uint64_t alpha_size = (dec->alpha_data != NULL) ?
(uint64_t)dec->pic_hdr.width * dec->pic_hdr.height : 0ULL;
const uint64_t needed = (uint64_t)intra_pred_mode_size
+ top_size + mb_info_size + f_info_size
+ yuv_size + mb_data_size
@@ -706,77 +717,77 @@ static int AllocateMemory(VP8Decoder* const dec) {
uint8_t* mem;
if (!CheckSizeOverflow(needed)) return 0; // check for overflow
if (needed > dec->mem_size_) {
WebPSafeFree(dec->mem_);
dec->mem_size_ = 0;
dec->mem_ = WebPSafeMalloc(needed, sizeof(uint8_t));
if (dec->mem_ == NULL) {
if (needed > dec->mem_size) {
WebPSafeFree(dec->mem);
dec->mem_size = 0;
dec->mem = WebPSafeMalloc(needed, sizeof(uint8_t));
if (dec->mem == NULL) {
return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
"no memory during frame initialization.");
}
// down-cast is ok, thanks to WebPSafeMalloc() above.
dec->mem_size_ = (size_t)needed;
dec->mem_size = (size_t)needed;
}
mem = (uint8_t*)dec->mem_;
dec->intra_t_ = mem;
mem = (uint8_t*)dec->mem;
dec->intra_t = mem;
mem += intra_pred_mode_size;
dec->yuv_t_ = (VP8TopSamples*)mem;
dec->yuv_t = (VP8TopSamples*)mem;
mem += top_size;
dec->mb_info_ = ((VP8MB*)mem) + 1;
dec->mb_info = ((VP8MB*)mem) + 1;
mem += mb_info_size;
dec->f_info_ = f_info_size ? (VP8FInfo*)mem : NULL;
dec->f_info = f_info_size ? (VP8FInfo*)mem : NULL;
mem += f_info_size;
dec->thread_ctx_.id_ = 0;
dec->thread_ctx_.f_info_ = dec->f_info_;
if (dec->filter_type_ > 0 && dec->mt_method_ > 0) {
dec->thread_ctx.id = 0;
dec->thread_ctx.f_info = dec->f_info;
if (dec->filter_type > 0 && dec->mt_method > 0) {
// secondary cache line. The deblocking process need to make use of the
// filtering strength from previous macroblock row, while the new ones
// are being decoded in parallel. We'll just swap the pointers.
dec->thread_ctx_.f_info_ += mb_w;
dec->thread_ctx.f_info += mb_w;
}
mem = (uint8_t*)WEBP_ALIGN(mem);
assert((yuv_size & WEBP_ALIGN_CST) == 0);
dec->yuv_b_ = mem;
dec->yuv_b = mem;
mem += yuv_size;
dec->mb_data_ = (VP8MBData*)mem;
dec->thread_ctx_.mb_data_ = (VP8MBData*)mem;
if (dec->mt_method_ == 2) {
dec->thread_ctx_.mb_data_ += mb_w;
dec->mb_data = (VP8MBData*)mem;
dec->thread_ctx.mb_data = (VP8MBData*)mem;
if (dec->mt_method == 2) {
dec->thread_ctx.mb_data += mb_w;
}
mem += mb_data_size;
dec->cache_y_stride_ = 16 * mb_w;
dec->cache_uv_stride_ = 8 * mb_w;
dec->cache_y_stride = 16 * mb_w;
dec->cache_uv_stride = 8 * mb_w;
{
const int extra_rows = kFilterExtraRows[dec->filter_type_];
const int extra_y = extra_rows * dec->cache_y_stride_;
const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_;
dec->cache_y_ = mem + extra_y;
dec->cache_u_ = dec->cache_y_
+ 16 * num_caches * dec->cache_y_stride_ + extra_uv;
dec->cache_v_ = dec->cache_u_
+ 8 * num_caches * dec->cache_uv_stride_ + extra_uv;
dec->cache_id_ = 0;
const int extra_rows = kFilterExtraRows[dec->filter_type];
const int extra_y = extra_rows * dec->cache_y_stride;
const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride;
dec->cache_y = mem + extra_y;
dec->cache_u = dec->cache_y
+ 16 * num_caches * dec->cache_y_stride + extra_uv;
dec->cache_v = dec->cache_u
+ 8 * num_caches * dec->cache_uv_stride + extra_uv;
dec->cache_id = 0;
}
mem += cache_size;
// alpha plane
dec->alpha_plane_ = alpha_size ? mem : NULL;
dec->alpha_plane = alpha_size ? mem : NULL;
mem += alpha_size;
assert(mem <= (uint8_t*)dec->mem_ + dec->mem_size_);
assert(mem <= (uint8_t*)dec->mem + dec->mem_size);
// note: left/top-info is initialized once for all.
memset(dec->mb_info_ - 1, 0, mb_info_size);
memset(dec->mb_info - 1, 0, mb_info_size);
VP8InitScanline(dec); // initialize left too.
// initialize top
memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size);
memset(dec->intra_t, B_DC_PRED, intra_pred_mode_size);
return 1;
}
@@ -784,16 +795,16 @@ static int AllocateMemory(VP8Decoder* const dec) {
static void InitIo(VP8Decoder* const dec, VP8Io* io) {
// prepare 'io'
io->mb_y = 0;
io->y = dec->cache_y_;
io->u = dec->cache_u_;
io->v = dec->cache_v_;
io->y_stride = dec->cache_y_stride_;
io->uv_stride = dec->cache_uv_stride_;
io->y = dec->cache_y;
io->u = dec->cache_u;
io->v = dec->cache_v;
io->y_stride = dec->cache_y_stride;
io->uv_stride = dec->cache_uv_stride;
io->a = NULL;
}
int VP8InitFrame(VP8Decoder* const dec, VP8Io* const io) {
if (!InitThreadContext(dec)) return 0; // call first. Sets dec->num_caches_.
if (!InitThreadContext(dec)) return 0; // call first. Sets dec->num_caches.
if (!AllocateMemory(dec)) return 0;
InitIo(dec, io);
VP8DspInit(); // Init critical function pointers and look-up tables.

View File

@@ -12,15 +12,20 @@
// Author: somnath@google.com (Somnath Banerjee)
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <string.h>
#include "src/dec/alphai_dec.h"
#include "src/dec/webpi_dec.h"
#include "src/dec/vp8_dec.h"
#include "src/dec/vp8i_dec.h"
#include "src/dec/vp8li_dec.h"
#include "src/dec/webpi_dec.h"
#include "src/utils/bit_reader_utils.h"
#include "src/utils/thread_utils.h"
#include "src/utils/utils.h"
#include "src/webp/decode.h"
#include "src/webp/format_constants.h"
#include "src/webp/types.h"
// In append mode, buffer allocations increase as multiples of this value.
// Needs to be a power of 2.
@@ -54,134 +59,140 @@ typedef enum {
// storage for partition #0 and partial data (in a rolling fashion)
typedef struct {
MemBufferMode mode_; // Operation mode
size_t start_; // start location of the data to be decoded
size_t end_; // end location
size_t buf_size_; // size of the allocated buffer
uint8_t* buf_; // We don't own this buffer in case WebPIUpdate()
MemBufferMode mode; // Operation mode
size_t start; // start location of the data to be decoded
size_t end; // end location
size_t buf_size; // size of the allocated buffer
uint8_t* buf; // We don't own this buffer in case WebPIUpdate()
size_t part0_size_; // size of partition #0
const uint8_t* part0_buf_; // buffer to store partition #0
size_t part0_size; // size of partition #0
const uint8_t* part0_buf; // buffer to store partition #0
} MemBuffer;
struct WebPIDecoder {
DecState state_; // current decoding state
WebPDecParams params_; // Params to store output info
int is_lossless_; // for down-casting 'dec_'.
void* dec_; // either a VP8Decoder or a VP8LDecoder instance
VP8Io io_;
DecState state; // current decoding state
WebPDecParams params; // Params to store output info
int is_lossless; // for down-casting 'dec'.
void* dec; // either a VP8Decoder or a VP8LDecoder instance
VP8Io io;
MemBuffer mem_; // input memory buffer.
WebPDecBuffer output_; // output buffer (when no external one is supplied,
// or if the external one has slow-memory)
WebPDecBuffer* final_output_; // Slow-memory output to copy to eventually.
size_t chunk_size_; // Compressed VP8/VP8L size extracted from Header.
MemBuffer mem; // input memory buffer.
WebPDecBuffer output; // output buffer (when no external one is supplied,
// or if the external one has slow-memory)
WebPDecBuffer* final_output; // Slow-memory output to copy to eventually.
size_t chunk_size; // Compressed VP8/VP8L size extracted from Header.
int last_mb_y_; // last row reached for intra-mode decoding
int last_mb_y; // last row reached for intra-mode decoding
};
// MB context to restore in case VP8DecodeMB() fails
typedef struct {
VP8MB left_;
VP8MB info_;
VP8BitReader token_br_;
VP8MB left;
VP8MB info;
VP8BitReader token_br;
} MBContext;
//------------------------------------------------------------------------------
// MemBuffer: incoming data handling
static WEBP_INLINE size_t MemDataSize(const MemBuffer* mem) {
return (mem->end_ - mem->start_);
return (mem->end - mem->start);
}
// Check if we need to preserve the compressed alpha data, as it may not have
// been decoded yet.
static int NeedCompressedAlpha(const WebPIDecoder* const idec) {
if (idec->state_ == STATE_WEBP_HEADER) {
if (idec->state == STATE_WEBP_HEADER) {
// We haven't parsed the headers yet, so we don't know whether the image is
// lossy or lossless. This also means that we haven't parsed the ALPH chunk.
return 0;
}
if (idec->is_lossless_) {
if (idec->is_lossless) {
return 0; // ALPH chunk is not present for lossless images.
} else {
const VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
assert(dec != NULL); // Must be true as idec->state_ != STATE_WEBP_HEADER.
return (dec->alpha_data_ != NULL) && !dec->is_alpha_decoded_;
const VP8Decoder* const dec = (VP8Decoder*)idec->dec;
assert(dec != NULL); // Must be true as idec->state != STATE_WEBP_HEADER.
return (dec->alpha_data != NULL) && !dec->is_alpha_decoded;
}
}
static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
MemBuffer* const mem = &idec->mem_;
const uint8_t* const new_base = mem->buf_ + mem->start_;
// note: for VP8, setting up idec->io_ is only really needed at the beginning
MemBuffer* const mem = &idec->mem;
const uint8_t* const new_base = mem->buf + mem->start;
// note: for VP8, setting up idec->io is only really needed at the beginning
// of the decoding, till partition #0 is complete.
idec->io_.data = new_base;
idec->io_.data_size = MemDataSize(mem);
idec->io.data = new_base;
idec->io.data_size = MemDataSize(mem);
if (idec->dec_ != NULL) {
if (!idec->is_lossless_) {
VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
const uint32_t last_part = dec->num_parts_minus_one_;
if (idec->dec != NULL) {
if (!idec->is_lossless) {
VP8Decoder* const dec = (VP8Decoder*)idec->dec;
const uint32_t last_part = dec->num_parts_minus_one;
if (offset != 0) {
uint32_t p;
for (p = 0; p <= last_part; ++p) {
VP8RemapBitReader(dec->parts_ + p, offset);
VP8RemapBitReader(dec->parts + p, offset);
}
// Remap partition #0 data pointer to new offset, but only in MAP
// mode (in APPEND mode, partition #0 is copied into a fixed memory).
if (mem->mode_ == MEM_MODE_MAP) {
VP8RemapBitReader(&dec->br_, offset);
if (mem->mode == MEM_MODE_MAP) {
VP8RemapBitReader(&dec->br, offset);
}
}
{
const uint8_t* const last_start = dec->parts_[last_part].buf_;
VP8BitReaderSetBuffer(&dec->parts_[last_part], last_start,
mem->buf_ + mem->end_ - last_start);
const uint8_t* const last_start = dec->parts[last_part].buf;
// 'last_start' will be NULL when 'idec->state' is < STATE_VP8_PARTS0
// and through a portion of that state (when there isn't enough data to
// parse the partitions). The bitreader is only used meaningfully when
// there is enough data to begin parsing partition 0.
if (last_start != NULL) {
VP8BitReaderSetBuffer(&dec->parts[last_part], last_start,
mem->buf + mem->end - last_start);
}
}
if (NeedCompressedAlpha(idec)) {
ALPHDecoder* const alph_dec = dec->alph_dec_;
dec->alpha_data_ += offset;
if (alph_dec != NULL && alph_dec->vp8l_dec_ != NULL) {
if (alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION) {
VP8LDecoder* const alph_vp8l_dec = alph_dec->vp8l_dec_;
assert(dec->alpha_data_size_ >= ALPHA_HEADER_LEN);
VP8LBitReaderSetBuffer(&alph_vp8l_dec->br_,
dec->alpha_data_ + ALPHA_HEADER_LEN,
dec->alpha_data_size_ - ALPHA_HEADER_LEN);
} else { // alph_dec->method_ == ALPHA_NO_COMPRESSION
ALPHDecoder* const alph_dec = dec->alph_dec;
dec->alpha_data += offset;
if (alph_dec != NULL && alph_dec->vp8l_dec != NULL) {
if (alph_dec->method == ALPHA_LOSSLESS_COMPRESSION) {
VP8LDecoder* const alph_vp8l_dec = alph_dec->vp8l_dec;
assert(dec->alpha_data_size >= ALPHA_HEADER_LEN);
VP8LBitReaderSetBuffer(&alph_vp8l_dec->br,
dec->alpha_data + ALPHA_HEADER_LEN,
dec->alpha_data_size - ALPHA_HEADER_LEN);
} else { // alph_dec->method == ALPHA_NO_COMPRESSION
// Nothing special to do in this case.
}
}
}
} else { // Resize lossless bitreader
VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
VP8LBitReaderSetBuffer(&dec->br_, new_base, MemDataSize(mem));
VP8LDecoder* const dec = (VP8LDecoder*)idec->dec;
VP8LBitReaderSetBuffer(&dec->br, new_base, MemDataSize(mem));
}
}
}
// Appends data to the end of MemBuffer->buf_. It expands the allocated memory
// Appends data to the end of MemBuffer->buf. It expands the allocated memory
// size if required and also updates VP8BitReader's if new memory is allocated.
WEBP_NODISCARD static int AppendToMemBuffer(WebPIDecoder* const idec,
const uint8_t* const data,
size_t data_size) {
VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
MemBuffer* const mem = &idec->mem_;
VP8Decoder* const dec = (VP8Decoder*)idec->dec;
MemBuffer* const mem = &idec->mem;
const int need_compressed_alpha = NeedCompressedAlpha(idec);
const uint8_t* const old_start =
(mem->buf_ == NULL) ? NULL : mem->buf_ + mem->start_;
(mem->buf == NULL) ? NULL : mem->buf + mem->start;
const uint8_t* const old_base =
need_compressed_alpha ? dec->alpha_data_ : old_start;
assert(mem->buf_ != NULL || mem->start_ == 0);
assert(mem->mode_ == MEM_MODE_APPEND);
need_compressed_alpha ? dec->alpha_data : old_start;
assert(mem->buf != NULL || mem->start == 0);
assert(mem->mode == MEM_MODE_APPEND);
if (data_size > MAX_CHUNK_PAYLOAD) {
// security safeguard: trying to allocate more than what the format
// allows for a chunk should be considered a smoke smell.
return 0;
}
if (mem->end_ + data_size > mem->buf_size_) { // Need some free memory
if (mem->end + data_size > mem->buf_size) { // Need some free memory
const size_t new_mem_start = old_start - old_base;
const size_t current_size = MemDataSize(mem) + new_mem_start;
const uint64_t new_size = (uint64_t)current_size + data_size;
@@ -190,85 +201,85 @@ WEBP_NODISCARD static int AppendToMemBuffer(WebPIDecoder* const idec,
(uint8_t*)WebPSafeMalloc(extra_size, sizeof(*new_buf));
if (new_buf == NULL) return 0;
if (old_base != NULL) memcpy(new_buf, old_base, current_size);
WebPSafeFree(mem->buf_);
mem->buf_ = new_buf;
mem->buf_size_ = (size_t)extra_size;
mem->start_ = new_mem_start;
mem->end_ = current_size;
WebPSafeFree(mem->buf);
mem->buf = new_buf;
mem->buf_size = (size_t)extra_size;
mem->start = new_mem_start;
mem->end = current_size;
}
assert(mem->buf_ != NULL);
memcpy(mem->buf_ + mem->end_, data, data_size);
mem->end_ += data_size;
assert(mem->end_ <= mem->buf_size_);
assert(mem->buf != NULL);
memcpy(mem->buf + mem->end, data, data_size);
mem->end += data_size;
assert(mem->end <= mem->buf_size);
DoRemap(idec, mem->buf_ + mem->start_ - old_start);
DoRemap(idec, mem->buf + mem->start - old_start);
return 1;
}
WEBP_NODISCARD static int RemapMemBuffer(WebPIDecoder* const idec,
const uint8_t* const data,
size_t data_size) {
MemBuffer* const mem = &idec->mem_;
const uint8_t* const old_buf = mem->buf_;
MemBuffer* const mem = &idec->mem;
const uint8_t* const old_buf = mem->buf;
const uint8_t* const old_start =
(old_buf == NULL) ? NULL : old_buf + mem->start_;
assert(old_buf != NULL || mem->start_ == 0);
assert(mem->mode_ == MEM_MODE_MAP);
(old_buf == NULL) ? NULL : old_buf + mem->start;
assert(old_buf != NULL || mem->start == 0);
assert(mem->mode == MEM_MODE_MAP);
if (data_size < mem->buf_size_) return 0; // can't remap to a shorter buffer!
if (data_size < mem->buf_size) return 0; // can't remap to a shorter buffer!
mem->buf_ = (uint8_t*)data;
mem->end_ = mem->buf_size_ = data_size;
mem->buf = (uint8_t*)data;
mem->end = mem->buf_size = data_size;
DoRemap(idec, mem->buf_ + mem->start_ - old_start);
DoRemap(idec, mem->buf + mem->start - old_start);
return 1;
}
static void InitMemBuffer(MemBuffer* const mem) {
mem->mode_ = MEM_MODE_NONE;
mem->buf_ = NULL;
mem->buf_size_ = 0;
mem->part0_buf_ = NULL;
mem->part0_size_ = 0;
mem->mode = MEM_MODE_NONE;
mem->buf = NULL;
mem->buf_size = 0;
mem->part0_buf = NULL;
mem->part0_size = 0;
}
static void ClearMemBuffer(MemBuffer* const mem) {
assert(mem);
if (mem->mode_ == MEM_MODE_APPEND) {
WebPSafeFree(mem->buf_);
WebPSafeFree((void*)mem->part0_buf_);
if (mem->mode == MEM_MODE_APPEND) {
WebPSafeFree(mem->buf);
WebPSafeFree((void*)mem->part0_buf);
}
}
WEBP_NODISCARD static int CheckMemBufferMode(MemBuffer* const mem,
MemBufferMode expected) {
if (mem->mode_ == MEM_MODE_NONE) {
mem->mode_ = expected; // switch to the expected mode
} else if (mem->mode_ != expected) {
if (mem->mode == MEM_MODE_NONE) {
mem->mode = expected; // switch to the expected mode
} else if (mem->mode != expected) {
return 0; // we mixed the modes => error
}
assert(mem->mode_ == expected); // mode is ok
assert(mem->mode == expected); // mode is ok
return 1;
}
// To be called last.
WEBP_NODISCARD static VP8StatusCode FinishDecoding(WebPIDecoder* const idec) {
const WebPDecoderOptions* const options = idec->params_.options;
WebPDecBuffer* const output = idec->params_.output;
const WebPDecoderOptions* const options = idec->params.options;
WebPDecBuffer* const output = idec->params.output;
idec->state_ = STATE_DONE;
idec->state = STATE_DONE;
if (options != NULL && options->flip) {
const VP8StatusCode status = WebPFlipBuffer(output);
if (status != VP8_STATUS_OK) return status;
}
if (idec->final_output_ != NULL) {
if (idec->final_output != NULL) {
const VP8StatusCode status = WebPCopyDecBufferPixels(
output, idec->final_output_); // do the slow-copy
WebPFreeDecBuffer(&idec->output_);
output, idec->final_output); // do the slow-copy
WebPFreeDecBuffer(&idec->output);
if (status != VP8_STATUS_OK) return status;
*output = *idec->final_output_;
idec->final_output_ = NULL;
*output = *idec->final_output;
idec->final_output = NULL;
}
return VP8_STATUS_OK;
}
@@ -278,43 +289,43 @@ WEBP_NODISCARD static VP8StatusCode FinishDecoding(WebPIDecoder* const idec) {
static void SaveContext(const VP8Decoder* dec, const VP8BitReader* token_br,
MBContext* const context) {
context->left_ = dec->mb_info_[-1];
context->info_ = dec->mb_info_[dec->mb_x_];
context->token_br_ = *token_br;
context->left = dec->mb_info[-1];
context->info = dec->mb_info[dec->mb_x];
context->token_br = *token_br;
}
static void RestoreContext(const MBContext* context, VP8Decoder* const dec,
VP8BitReader* const token_br) {
dec->mb_info_[-1] = context->left_;
dec->mb_info_[dec->mb_x_] = context->info_;
*token_br = context->token_br_;
dec->mb_info[-1] = context->left;
dec->mb_info[dec->mb_x] = context->info;
*token_br = context->token_br;
}
//------------------------------------------------------------------------------
static VP8StatusCode IDecError(WebPIDecoder* const idec, VP8StatusCode error) {
if (idec->state_ == STATE_VP8_DATA) {
if (idec->state == STATE_VP8_DATA) {
// Synchronize the thread, clean-up and check for errors.
(void)VP8ExitCritical((VP8Decoder*)idec->dec_, &idec->io_);
(void)VP8ExitCritical((VP8Decoder*)idec->dec, &idec->io);
}
idec->state_ = STATE_ERROR;
idec->state = STATE_ERROR;
return error;
}
static void ChangeState(WebPIDecoder* const idec, DecState new_state,
size_t consumed_bytes) {
MemBuffer* const mem = &idec->mem_;
idec->state_ = new_state;
mem->start_ += consumed_bytes;
assert(mem->start_ <= mem->end_);
idec->io_.data = mem->buf_ + mem->start_;
idec->io_.data_size = MemDataSize(mem);
MemBuffer* const mem = &idec->mem;
idec->state = new_state;
mem->start += consumed_bytes;
assert(mem->start <= mem->end);
idec->io.data = mem->buf + mem->start;
idec->io.data_size = MemDataSize(mem);
}
// Headers
static VP8StatusCode DecodeWebPHeaders(WebPIDecoder* const idec) {
MemBuffer* const mem = &idec->mem_;
const uint8_t* data = mem->buf_ + mem->start_;
MemBuffer* const mem = &idec->mem;
const uint8_t* data = mem->buf + mem->start;
size_t curr_size = MemDataSize(mem);
VP8StatusCode status;
WebPHeaderStructure headers;
@@ -329,32 +340,32 @@ static VP8StatusCode DecodeWebPHeaders(WebPIDecoder* const idec) {
return IDecError(idec, status);
}
idec->chunk_size_ = headers.compressed_size;
idec->is_lossless_ = headers.is_lossless;
if (!idec->is_lossless_) {
idec->chunk_size = headers.compressed_size;
idec->is_lossless = headers.is_lossless;
if (!idec->is_lossless) {
VP8Decoder* const dec = VP8New();
if (dec == NULL) {
return VP8_STATUS_OUT_OF_MEMORY;
}
dec->incremental_ = 1;
idec->dec_ = dec;
dec->alpha_data_ = headers.alpha_data;
dec->alpha_data_size_ = headers.alpha_data_size;
dec->incremental = 1;
idec->dec = dec;
dec->alpha_data = headers.alpha_data;
dec->alpha_data_size = headers.alpha_data_size;
ChangeState(idec, STATE_VP8_HEADER, headers.offset);
} else {
VP8LDecoder* const dec = VP8LNew();
if (dec == NULL) {
return VP8_STATUS_OUT_OF_MEMORY;
}
idec->dec_ = dec;
idec->dec = dec;
ChangeState(idec, STATE_VP8L_HEADER, headers.offset);
}
return VP8_STATUS_OK;
}
static VP8StatusCode DecodeVP8FrameHeader(WebPIDecoder* const idec) {
const uint8_t* data = idec->mem_.buf_ + idec->mem_.start_;
const size_t curr_size = MemDataSize(&idec->mem_);
const uint8_t* data = idec->mem.buf + idec->mem.start;
const size_t curr_size = MemDataSize(&idec->mem);
int width, height;
uint32_t bits;
@@ -362,61 +373,61 @@ static VP8StatusCode DecodeVP8FrameHeader(WebPIDecoder* const idec) {
// Not enough data bytes to extract VP8 Frame Header.
return VP8_STATUS_SUSPENDED;
}
if (!VP8GetInfo(data, curr_size, idec->chunk_size_, &width, &height)) {
if (!VP8GetInfo(data, curr_size, idec->chunk_size, &width, &height)) {
return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
}
bits = data[0] | (data[1] << 8) | (data[2] << 16);
idec->mem_.part0_size_ = (bits >> 5) + VP8_FRAME_HEADER_SIZE;
idec->mem.part0_size = (bits >> 5) + VP8_FRAME_HEADER_SIZE;
idec->io_.data = data;
idec->io_.data_size = curr_size;
idec->state_ = STATE_VP8_PARTS0;
idec->io.data = data;
idec->io.data_size = curr_size;
idec->state = STATE_VP8_PARTS0;
return VP8_STATUS_OK;
}
// Partition #0
static VP8StatusCode CopyParts0Data(WebPIDecoder* const idec) {
VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
VP8BitReader* const br = &dec->br_;
const size_t part_size = br->buf_end_ - br->buf_;
MemBuffer* const mem = &idec->mem_;
assert(!idec->is_lossless_);
assert(mem->part0_buf_ == NULL);
VP8Decoder* const dec = (VP8Decoder*)idec->dec;
VP8BitReader* const br = &dec->br;
const size_t part_size = br->buf_end - br->buf;
MemBuffer* const mem = &idec->mem;
assert(!idec->is_lossless);
assert(mem->part0_buf == NULL);
// the following is a format limitation, no need for runtime check:
assert(part_size <= mem->part0_size_);
assert(part_size <= mem->part0_size);
if (part_size == 0) { // can't have zero-size partition #0
return VP8_STATUS_BITSTREAM_ERROR;
}
if (mem->mode_ == MEM_MODE_APPEND) {
if (mem->mode == MEM_MODE_APPEND) {
// We copy and grab ownership of the partition #0 data.
uint8_t* const part0_buf = (uint8_t*)WebPSafeMalloc(1ULL, part_size);
if (part0_buf == NULL) {
return VP8_STATUS_OUT_OF_MEMORY;
}
memcpy(part0_buf, br->buf_, part_size);
mem->part0_buf_ = part0_buf;
memcpy(part0_buf, br->buf, part_size);
mem->part0_buf = part0_buf;
VP8BitReaderSetBuffer(br, part0_buf, part_size);
} else {
// Else: just keep pointers to the partition #0's data in dec_->br_.
// Else: just keep pointers to the partition #0's data in dec->br.
}
mem->start_ += part_size;
mem->start += part_size;
return VP8_STATUS_OK;
}
static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) {
VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
VP8Io* const io = &idec->io_;
const WebPDecParams* const params = &idec->params_;
VP8Decoder* const dec = (VP8Decoder*)idec->dec;
VP8Io* const io = &idec->io;
const WebPDecParams* const params = &idec->params;
WebPDecBuffer* const output = params->output;
// Wait till we have enough data for the whole partition #0
if (MemDataSize(&idec->mem_) < idec->mem_.part0_size_) {
if (MemDataSize(&idec->mem) < idec->mem.part0_size) {
return VP8_STATUS_SUSPENDED;
}
if (!VP8GetHeaders(dec, io)) {
const VP8StatusCode status = dec->status_;
const VP8StatusCode status = dec->status;
if (status == VP8_STATUS_SUSPENDED ||
status == VP8_STATUS_NOT_ENOUGH_DATA) {
// treating NOT_ENOUGH_DATA as SUSPENDED state
@@ -426,69 +437,69 @@ static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) {
}
// Allocate/Verify output buffer now
dec->status_ = WebPAllocateDecBuffer(io->width, io->height, params->options,
output);
if (dec->status_ != VP8_STATUS_OK) {
return IDecError(idec, dec->status_);
dec->status = WebPAllocateDecBuffer(io->width, io->height, params->options,
output);
if (dec->status != VP8_STATUS_OK) {
return IDecError(idec, dec->status);
}
// This change must be done before calling VP8InitFrame()
dec->mt_method_ = VP8GetThreadMethod(params->options, NULL,
io->width, io->height);
dec->mt_method = VP8GetThreadMethod(params->options, NULL,
io->width, io->height);
VP8InitDithering(params->options, dec);
dec->status_ = CopyParts0Data(idec);
if (dec->status_ != VP8_STATUS_OK) {
return IDecError(idec, dec->status_);
dec->status = CopyParts0Data(idec);
if (dec->status != VP8_STATUS_OK) {
return IDecError(idec, dec->status);
}
// Finish setting up the decoding parameters. Will call io->setup().
if (VP8EnterCritical(dec, io) != VP8_STATUS_OK) {
return IDecError(idec, dec->status_);
return IDecError(idec, dec->status);
}
// Note: past this point, teardown() must always be called
// in case of error.
idec->state_ = STATE_VP8_DATA;
idec->state = STATE_VP8_DATA;
// Allocate memory and prepare everything.
if (!VP8InitFrame(dec, io)) {
return IDecError(idec, dec->status_);
return IDecError(idec, dec->status);
}
return VP8_STATUS_OK;
}
// Remaining partitions
static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
VP8Io* const io = &idec->io_;
VP8Decoder* const dec = (VP8Decoder*)idec->dec;
VP8Io* const io = &idec->io;
// Make sure partition #0 has been read before, to set dec to ready_.
if (!dec->ready_) {
// Make sure partition #0 has been read before, to set dec to ready.
if (!dec->ready) {
return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
}
for (; dec->mb_y_ < dec->mb_h_; ++dec->mb_y_) {
if (idec->last_mb_y_ != dec->mb_y_) {
if (!VP8ParseIntraModeRow(&dec->br_, dec)) {
for (; dec->mb_y < dec->mb_h; ++dec->mb_y) {
if (idec->last_mb_y != dec->mb_y) {
if (!VP8ParseIntraModeRow(&dec->br, dec)) {
// note: normally, error shouldn't occur since we already have the whole
// partition0 available here in DecodeRemaining(). Reaching EOF while
// reading intra modes really means a BITSTREAM_ERROR.
return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
}
idec->last_mb_y_ = dec->mb_y_;
idec->last_mb_y = dec->mb_y;
}
for (; dec->mb_x_ < dec->mb_w_; ++dec->mb_x_) {
for (; dec->mb_x < dec->mb_w; ++dec->mb_x) {
VP8BitReader* const token_br =
&dec->parts_[dec->mb_y_ & dec->num_parts_minus_one_];
&dec->parts[dec->mb_y & dec->num_parts_minus_one];
MBContext context;
SaveContext(dec, token_br, &context);
if (!VP8DecodeMB(dec, token_br)) {
// We shouldn't fail when MAX_MB data was available
if (dec->num_parts_minus_one_ == 0 &&
MemDataSize(&idec->mem_) > MAX_MB_SIZE) {
if (dec->num_parts_minus_one == 0 &&
MemDataSize(&idec->mem) > MAX_MB_SIZE) {
return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
}
// Synchronize the threads.
if (dec->mt_method_ > 0) {
if (!WebPGetWorkerInterface()->Sync(&dec->worker_)) {
if (dec->mt_method > 0) {
if (!WebPGetWorkerInterface()->Sync(&dec->worker)) {
return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
}
}
@@ -496,9 +507,9 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
return VP8_STATUS_SUSPENDED;
}
// Release buffer only if there is only one partition
if (dec->num_parts_minus_one_ == 0) {
idec->mem_.start_ = token_br->buf_ - idec->mem_.buf_;
assert(idec->mem_.start_ <= idec->mem_.end_);
if (dec->num_parts_minus_one == 0) {
idec->mem.start = token_br->buf - idec->mem.buf;
assert(idec->mem.start <= idec->mem.end);
}
}
VP8InitScanline(dec); // Prepare for next scanline
@@ -510,10 +521,10 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
}
// Synchronize the thread and check for errors.
if (!VP8ExitCritical(dec, io)) {
idec->state_ = STATE_ERROR; // prevent re-entry in IDecError
idec->state = STATE_ERROR; // prevent re-entry in IDecError
return IDecError(idec, VP8_STATUS_USER_ABORT);
}
dec->ready_ = 0;
dec->ready = 0;
return FinishDecoding(idec);
}
@@ -526,81 +537,81 @@ static VP8StatusCode ErrorStatusLossless(WebPIDecoder* const idec,
}
static VP8StatusCode DecodeVP8LHeader(WebPIDecoder* const idec) {
VP8Io* const io = &idec->io_;
VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
const WebPDecParams* const params = &idec->params_;
VP8Io* const io = &idec->io;
VP8LDecoder* const dec = (VP8LDecoder*)idec->dec;
const WebPDecParams* const params = &idec->params;
WebPDecBuffer* const output = params->output;
size_t curr_size = MemDataSize(&idec->mem_);
assert(idec->is_lossless_);
size_t curr_size = MemDataSize(&idec->mem);
assert(idec->is_lossless);
// Wait until there's enough data for decoding header.
if (curr_size < (idec->chunk_size_ >> 3)) {
dec->status_ = VP8_STATUS_SUSPENDED;
return ErrorStatusLossless(idec, dec->status_);
if (curr_size < (idec->chunk_size >> 3)) {
dec->status = VP8_STATUS_SUSPENDED;
return ErrorStatusLossless(idec, dec->status);
}
if (!VP8LDecodeHeader(dec, io)) {
if (dec->status_ == VP8_STATUS_BITSTREAM_ERROR &&
curr_size < idec->chunk_size_) {
dec->status_ = VP8_STATUS_SUSPENDED;
if (dec->status == VP8_STATUS_BITSTREAM_ERROR &&
curr_size < idec->chunk_size) {
dec->status = VP8_STATUS_SUSPENDED;
}
return ErrorStatusLossless(idec, dec->status_);
return ErrorStatusLossless(idec, dec->status);
}
// Allocate/verify output buffer now.
dec->status_ = WebPAllocateDecBuffer(io->width, io->height, params->options,
output);
if (dec->status_ != VP8_STATUS_OK) {
return IDecError(idec, dec->status_);
dec->status = WebPAllocateDecBuffer(io->width, io->height, params->options,
output);
if (dec->status != VP8_STATUS_OK) {
return IDecError(idec, dec->status);
}
idec->state_ = STATE_VP8L_DATA;
idec->state = STATE_VP8L_DATA;
return VP8_STATUS_OK;
}
static VP8StatusCode DecodeVP8LData(WebPIDecoder* const idec) {
VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
const size_t curr_size = MemDataSize(&idec->mem_);
assert(idec->is_lossless_);
VP8LDecoder* const dec = (VP8LDecoder*)idec->dec;
const size_t curr_size = MemDataSize(&idec->mem);
assert(idec->is_lossless);
// Switch to incremental decoding if we don't have all the bytes available.
dec->incremental_ = (curr_size < idec->chunk_size_);
dec->incremental = (curr_size < idec->chunk_size);
if (!VP8LDecodeImage(dec)) {
return ErrorStatusLossless(idec, dec->status_);
return ErrorStatusLossless(idec, dec->status);
}
assert(dec->status_ == VP8_STATUS_OK || dec->status_ == VP8_STATUS_SUSPENDED);
return (dec->status_ == VP8_STATUS_SUSPENDED) ? dec->status_
: FinishDecoding(idec);
assert(dec->status == VP8_STATUS_OK || dec->status == VP8_STATUS_SUSPENDED);
return (dec->status == VP8_STATUS_SUSPENDED) ? dec->status
: FinishDecoding(idec);
}
// Main decoding loop
static VP8StatusCode IDecode(WebPIDecoder* idec) {
VP8StatusCode status = VP8_STATUS_SUSPENDED;
if (idec->state_ == STATE_WEBP_HEADER) {
if (idec->state == STATE_WEBP_HEADER) {
status = DecodeWebPHeaders(idec);
} else {
if (idec->dec_ == NULL) {
if (idec->dec == NULL) {
return VP8_STATUS_SUSPENDED; // can't continue if we have no decoder.
}
}
if (idec->state_ == STATE_VP8_HEADER) {
if (idec->state == STATE_VP8_HEADER) {
status = DecodeVP8FrameHeader(idec);
}
if (idec->state_ == STATE_VP8_PARTS0) {
if (idec->state == STATE_VP8_PARTS0) {
status = DecodePartition0(idec);
}
if (idec->state_ == STATE_VP8_DATA) {
const VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
if (idec->state == STATE_VP8_DATA) {
const VP8Decoder* const dec = (VP8Decoder*)idec->dec;
if (dec == NULL) {
return VP8_STATUS_SUSPENDED; // can't continue if we have no decoder.
}
status = DecodeRemaining(idec);
}
if (idec->state_ == STATE_VP8L_HEADER) {
if (idec->state == STATE_VP8L_HEADER) {
status = DecodeVP8LHeader(idec);
}
if (idec->state_ == STATE_VP8L_DATA) {
if (idec->state == STATE_VP8L_DATA) {
status = DecodeVP8LData(idec);
}
return status;
@@ -617,29 +628,29 @@ WEBP_NODISCARD static WebPIDecoder* NewDecoder(
return NULL;
}
idec->state_ = STATE_WEBP_HEADER;
idec->chunk_size_ = 0;
idec->state = STATE_WEBP_HEADER;
idec->chunk_size = 0;
idec->last_mb_y_ = -1;
idec->last_mb_y = -1;
InitMemBuffer(&idec->mem_);
if (!WebPInitDecBuffer(&idec->output_) || !VP8InitIo(&idec->io_)) {
InitMemBuffer(&idec->mem);
if (!WebPInitDecBuffer(&idec->output) || !VP8InitIo(&idec->io)) {
WebPSafeFree(idec);
return NULL;
}
WebPResetDecParams(&idec->params_);
WebPResetDecParams(&idec->params);
if (output_buffer == NULL || WebPAvoidSlowMemory(output_buffer, features)) {
idec->params_.output = &idec->output_;
idec->final_output_ = output_buffer;
idec->params.output = &idec->output;
idec->final_output = output_buffer;
if (output_buffer != NULL) {
idec->params_.output->colorspace = output_buffer->colorspace;
idec->params.output->colorspace = output_buffer->colorspace;
}
} else {
idec->params_.output = output_buffer;
idec->final_output_ = NULL;
idec->params.output = output_buffer;
idec->final_output = NULL;
}
WebPInitCustomIo(&idec->params_, &idec->io_); // Plug the I/O functions.
WebPInitCustomIo(&idec->params, &idec->io); // Plug the I/O functions.
return idec;
}
@@ -674,27 +685,27 @@ WebPIDecoder* WebPIDecode(const uint8_t* data, size_t data_size,
}
// Finish initialization
if (config != NULL) {
idec->params_.options = &config->options;
idec->params.options = &config->options;
}
return idec;
}
void WebPIDelete(WebPIDecoder* idec) {
if (idec == NULL) return;
if (idec->dec_ != NULL) {
if (!idec->is_lossless_) {
if (idec->state_ == STATE_VP8_DATA) {
if (idec->dec != NULL) {
if (!idec->is_lossless) {
if (idec->state == STATE_VP8_DATA) {
// Synchronize the thread, clean-up and check for errors.
// TODO(vrabaud) do we care about the return result?
(void)VP8ExitCritical((VP8Decoder*)idec->dec_, &idec->io_);
(void)VP8ExitCritical((VP8Decoder*)idec->dec, &idec->io);
}
VP8Delete((VP8Decoder*)idec->dec_);
VP8Delete((VP8Decoder*)idec->dec);
} else {
VP8LDelete((VP8LDecoder*)idec->dec_);
VP8LDelete((VP8LDecoder*)idec->dec);
}
}
ClearMemBuffer(&idec->mem_);
WebPFreeDecBuffer(&idec->output_);
ClearMemBuffer(&idec->mem);
WebPFreeDecBuffer(&idec->output);
WebPSafeFree(idec);
}
@@ -717,11 +728,11 @@ WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE csp, uint8_t* output_buffer,
}
idec = WebPINewDecoder(NULL);
if (idec == NULL) return NULL;
idec->output_.colorspace = csp;
idec->output_.is_external_memory = is_external_memory;
idec->output_.u.RGBA.rgba = output_buffer;
idec->output_.u.RGBA.stride = output_stride;
idec->output_.u.RGBA.size = output_buffer_size;
idec->output.colorspace = csp;
idec->output.is_external_memory = is_external_memory;
idec->output.u.RGBA.rgba = output_buffer;
idec->output.u.RGBA.stride = output_stride;
idec->output.u.RGBA.size = output_buffer_size;
return idec;
}
@@ -751,20 +762,20 @@ WebPIDecoder* WebPINewYUVA(uint8_t* luma, size_t luma_size, int luma_stride,
idec = WebPINewDecoder(NULL);
if (idec == NULL) return NULL;
idec->output_.colorspace = colorspace;
idec->output_.is_external_memory = is_external_memory;
idec->output_.u.YUVA.y = luma;
idec->output_.u.YUVA.y_stride = luma_stride;
idec->output_.u.YUVA.y_size = luma_size;
idec->output_.u.YUVA.u = u;
idec->output_.u.YUVA.u_stride = u_stride;
idec->output_.u.YUVA.u_size = u_size;
idec->output_.u.YUVA.v = v;
idec->output_.u.YUVA.v_stride = v_stride;
idec->output_.u.YUVA.v_size = v_size;
idec->output_.u.YUVA.a = a;
idec->output_.u.YUVA.a_stride = a_stride;
idec->output_.u.YUVA.a_size = a_size;
idec->output.colorspace = colorspace;
idec->output.is_external_memory = is_external_memory;
idec->output.u.YUVA.y = luma;
idec->output.u.YUVA.y_stride = luma_stride;
idec->output.u.YUVA.y_size = luma_size;
idec->output.u.YUVA.u = u;
idec->output.u.YUVA.u_stride = u_stride;
idec->output.u.YUVA.u_size = u_size;
idec->output.u.YUVA.v = v;
idec->output.u.YUVA.v_stride = v_stride;
idec->output.u.YUVA.v_size = v_size;
idec->output.u.YUVA.a = a;
idec->output.u.YUVA.a_stride = a_stride;
idec->output.u.YUVA.a_size = a_size;
return idec;
}
@@ -781,10 +792,10 @@ WebPIDecoder* WebPINewYUV(uint8_t* luma, size_t luma_size, int luma_stride,
static VP8StatusCode IDecCheckStatus(const WebPIDecoder* const idec) {
assert(idec);
if (idec->state_ == STATE_ERROR) {
if (idec->state == STATE_ERROR) {
return VP8_STATUS_BITSTREAM_ERROR;
}
if (idec->state_ == STATE_DONE) {
if (idec->state == STATE_DONE) {
return VP8_STATUS_OK;
}
return VP8_STATUS_SUSPENDED;
@@ -801,7 +812,7 @@ VP8StatusCode WebPIAppend(WebPIDecoder* idec,
return status;
}
// Check mixed calls between RemapMemBuffer and AppendToMemBuffer.
if (!CheckMemBufferMode(&idec->mem_, MEM_MODE_APPEND)) {
if (!CheckMemBufferMode(&idec->mem, MEM_MODE_APPEND)) {
return VP8_STATUS_INVALID_PARAM;
}
// Append data to memory buffer
@@ -822,7 +833,7 @@ VP8StatusCode WebPIUpdate(WebPIDecoder* idec,
return status;
}
// Check mixed calls between RemapMemBuffer and AppendToMemBuffer.
if (!CheckMemBufferMode(&idec->mem_, MEM_MODE_MAP)) {
if (!CheckMemBufferMode(&idec->mem, MEM_MODE_MAP)) {
return VP8_STATUS_INVALID_PARAM;
}
// Make the memory buffer point to the new buffer
@@ -835,16 +846,16 @@ VP8StatusCode WebPIUpdate(WebPIDecoder* idec,
//------------------------------------------------------------------------------
static const WebPDecBuffer* GetOutputBuffer(const WebPIDecoder* const idec) {
if (idec == NULL || idec->dec_ == NULL) {
if (idec == NULL || idec->dec == NULL) {
return NULL;
}
if (idec->state_ <= STATE_VP8_PARTS0) {
if (idec->state <= STATE_VP8_PARTS0) {
return NULL;
}
if (idec->final_output_ != NULL) {
if (idec->final_output != NULL) {
return NULL; // not yet slow-copied
}
return idec->params_.output;
return idec->params.output;
}
const WebPDecBuffer* WebPIDecodedArea(const WebPIDecoder* idec,
@@ -855,7 +866,7 @@ const WebPDecBuffer* WebPIDecodedArea(const WebPIDecoder* idec,
if (top != NULL) *top = 0;
if (src != NULL) {
if (width != NULL) *width = src->width;
if (height != NULL) *height = idec->params_.last_y;
if (height != NULL) *height = idec->params.last_y;
} else {
if (width != NULL) *width = 0;
if (height != NULL) *height = 0;
@@ -871,7 +882,7 @@ WEBP_NODISCARD uint8_t* WebPIDecGetRGB(const WebPIDecoder* idec, int* last_y,
return NULL;
}
if (last_y != NULL) *last_y = idec->params_.last_y;
if (last_y != NULL) *last_y = idec->params.last_y;
if (width != NULL) *width = src->width;
if (height != NULL) *height = src->height;
if (stride != NULL) *stride = src->u.RGBA.stride;
@@ -889,7 +900,7 @@ WEBP_NODISCARD uint8_t* WebPIDecGetYUVA(const WebPIDecoder* idec, int* last_y,
return NULL;
}
if (last_y != NULL) *last_y = idec->params_.last_y;
if (last_y != NULL) *last_y = idec->params.last_y;
if (u != NULL) *u = src->u.YUVA.u;
if (v != NULL) *v = src->u.YUVA.v;
if (a != NULL) *a = src->u.YUVA.a;
@@ -907,14 +918,14 @@ int WebPISetIOHooks(WebPIDecoder* const idec,
VP8IoSetupHook setup,
VP8IoTeardownHook teardown,
void* user_data) {
if (idec == NULL || idec->state_ > STATE_WEBP_HEADER) {
if (idec == NULL || idec->state > STATE_WEBP_HEADER) {
return 0;
}
idec->io_.put = put;
idec->io_.setup = setup;
idec->io_.teardown = teardown;
idec->io_.opaque = user_data;
idec->io.put = put;
idec->io.setup = setup;
idec->io.teardown = teardown;
idec->io.opaque = user_data;
return 1;
}

View File

@@ -12,12 +12,20 @@
// Author: Skal (pascal.massimino@gmail.com)
#include <assert.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include "src/dec/vp8_dec.h"
#include "src/webp/types.h"
#include "src/dec/vp8i_dec.h"
#include "src/dec/webpi_dec.h"
#include "src/dsp/cpu.h"
#include "src/dsp/dsp.h"
#include "src/dsp/yuv.h"
#include "src/utils/rescaler_utils.h"
#include "src/utils/utils.h"
#include "src/webp/decode.h"
//------------------------------------------------------------------------------
// Main YUV<->RGB conversion functions
@@ -25,9 +33,9 @@
static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) {
WebPDecBuffer* output = p->output;
const WebPYUVABuffer* const buf = &output->u.YUVA;
uint8_t* const y_dst = buf->y + (size_t)io->mb_y * buf->y_stride;
uint8_t* const u_dst = buf->u + (size_t)(io->mb_y >> 1) * buf->u_stride;
uint8_t* const v_dst = buf->v + (size_t)(io->mb_y >> 1) * buf->v_stride;
uint8_t* const y_dst = buf->y + (ptrdiff_t)io->mb_y * buf->y_stride;
uint8_t* const u_dst = buf->u + (ptrdiff_t)(io->mb_y >> 1) * buf->u_stride;
uint8_t* const v_dst = buf->v + (ptrdiff_t)(io->mb_y >> 1) * buf->v_stride;
const int mb_w = io->mb_w;
const int mb_h = io->mb_h;
const int uv_w = (mb_w + 1) / 2;
@@ -42,7 +50,7 @@ static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) {
static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) {
WebPDecBuffer* const output = p->output;
WebPRGBABuffer* const buf = &output->u.RGBA;
uint8_t* const dst = buf->rgba + (size_t)io->mb_y * buf->stride;
uint8_t* const dst = buf->rgba + (ptrdiff_t)io->mb_y * buf->stride;
WebPSamplerProcessPlane(io->y, io->y_stride,
io->u, io->v, io->uv_stride,
dst, buf->stride, io->mb_w, io->mb_h,
@@ -57,7 +65,7 @@ static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) {
static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
int num_lines_out = io->mb_h; // a priori guess
const WebPRGBABuffer* const buf = &p->output->u.RGBA;
uint8_t* dst = buf->rgba + (size_t)io->mb_y * buf->stride;
uint8_t* dst = buf->rgba + (ptrdiff_t)io->mb_y * buf->stride;
WebPUpsampleLinePairFunc upsample = WebPUpsamplers[p->output->colorspace];
const uint8_t* cur_y = io->y;
const uint8_t* cur_u = io->u;
@@ -128,7 +136,7 @@ static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
const WebPYUVABuffer* const buf = &p->output->u.YUVA;
const int mb_w = io->mb_w;
const int mb_h = io->mb_h;
uint8_t* dst = buf->a + (size_t)io->mb_y * buf->a_stride;
uint8_t* dst = buf->a + (ptrdiff_t)io->mb_y * buf->a_stride;
int j;
(void)expected_num_lines_out;
assert(expected_num_lines_out == mb_h);
@@ -181,8 +189,8 @@ static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p,
(colorspace == MODE_ARGB || colorspace == MODE_Argb);
const WebPRGBABuffer* const buf = &p->output->u.RGBA;
int num_rows;
const size_t start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
uint8_t* const base_rgba = buf->rgba + (ptrdiff_t)start_y * buf->stride;
uint8_t* const dst = base_rgba + (alpha_first ? 0 : 3);
const int has_alpha = WebPDispatchAlpha(alpha, io->width, mb_w,
num_rows, dst, buf->stride);
@@ -205,8 +213,8 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
const WEBP_CSP_MODE colorspace = p->output->colorspace;
const WebPRGBABuffer* const buf = &p->output->u.RGBA;
int num_rows;
const size_t start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
uint8_t* const base_rgba = buf->rgba + (ptrdiff_t)start_y * buf->stride;
#if (WEBP_SWAP_16BIT_CSP == 1)
uint8_t* alpha_dst = base_rgba;
#else
@@ -257,7 +265,7 @@ static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) {
if (WebPIsAlphaMode(p->output->colorspace) && io->a != NULL) {
// Before rescaling, we premultiply the luma directly into the io->y
// internal buffer. This is OK since these samples are not used for
// intra-prediction (the top samples are saved in cache_y_/u_/v_).
// intra-prediction (the top samples are saved in cache_y/u/v).
// But we need to cast the const away, though.
WebPMultRows((uint8_t*)io->y, io->y_stride,
io->a, io->width, io->mb_w, mb_h, 0);
@@ -271,9 +279,9 @@ static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) {
static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
int expected_num_lines_out) {
const WebPYUVABuffer* const buf = &p->output->u.YUVA;
uint8_t* const dst_a = buf->a + (size_t)p->last_y * buf->a_stride;
uint8_t* const dst_a = buf->a + (ptrdiff_t)p->last_y * buf->a_stride;
if (io->a != NULL) {
uint8_t* const dst_y = buf->y + (size_t)p->last_y * buf->y_stride;
uint8_t* const dst_y = buf->y + (ptrdiff_t)p->last_y * buf->y_stride;
const int num_lines_out = Rescale(io->a, io->width, io->mb_h, p->scaler_a);
assert(expected_num_lines_out == num_lines_out);
if (num_lines_out > 0) { // unmultiply the Y
@@ -362,7 +370,7 @@ static int ExportRGB(WebPDecParams* const p, int y_pos) {
const WebPYUV444Converter convert =
WebPYUV444Converters[p->output->colorspace];
const WebPRGBABuffer* const buf = &p->output->u.RGBA;
uint8_t* dst = buf->rgba + (size_t)y_pos * buf->stride;
uint8_t* dst = buf->rgba + (ptrdiff_t)y_pos * buf->stride;
int num_lines_out = 0;
// For RGB rescaling, because of the YUV420, current scan position
// U/V can be +1/-1 line from the Y one. Hence the double test.
@@ -389,14 +397,14 @@ static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) {
while (j < mb_h) {
const int y_lines_in =
WebPRescalerImport(p->scaler_y, mb_h - j,
io->y + (size_t)j * io->y_stride, io->y_stride);
io->y + (ptrdiff_t)j * io->y_stride, io->y_stride);
j += y_lines_in;
if (WebPRescaleNeededLines(p->scaler_u, uv_mb_h - uv_j)) {
const int u_lines_in = WebPRescalerImport(
p->scaler_u, uv_mb_h - uv_j, io->u + (size_t)uv_j * io->uv_stride,
p->scaler_u, uv_mb_h - uv_j, io->u + (ptrdiff_t)uv_j * io->uv_stride,
io->uv_stride);
const int v_lines_in = WebPRescalerImport(
p->scaler_v, uv_mb_h - uv_j, io->v + (size_t)uv_j * io->uv_stride,
p->scaler_v, uv_mb_h - uv_j, io->v + (ptrdiff_t)uv_j * io->uv_stride,
io->uv_stride);
(void)v_lines_in; // remove a gcc warning
assert(u_lines_in == v_lines_in);
@@ -409,7 +417,7 @@ static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) {
static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) {
const WebPRGBABuffer* const buf = &p->output->u.RGBA;
uint8_t* const base_rgba = buf->rgba + (size_t)y_pos * buf->stride;
uint8_t* const base_rgba = buf->rgba + (ptrdiff_t)y_pos * buf->stride;
const WEBP_CSP_MODE colorspace = p->output->colorspace;
const int alpha_first =
(colorspace == MODE_ARGB || colorspace == MODE_Argb);
@@ -437,7 +445,7 @@ static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) {
static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos,
int max_lines_out) {
const WebPRGBABuffer* const buf = &p->output->u.RGBA;
uint8_t* const base_rgba = buf->rgba + (size_t)y_pos * buf->stride;
uint8_t* const base_rgba = buf->rgba + (ptrdiff_t)y_pos * buf->stride;
#if (WEBP_SWAP_16BIT_CSP == 1)
uint8_t* alpha_dst = base_rgba;
#else
@@ -476,7 +484,7 @@ static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p,
int lines_left = expected_num_out_lines;
const int y_end = p->last_y + lines_left;
while (lines_left > 0) {
const int64_t row_offset = (int64_t)scaler->src_y - io->mb_y;
const int64_t row_offset = (ptrdiff_t)scaler->src_y - io->mb_y;
WebPRescalerImport(scaler, io->mb_h + io->mb_y - scaler->src_y,
io->a + row_offset * io->width, io->width);
lines_left -= p->emit_alpha_row(p, y_end - lines_left, lines_left);

View File

@@ -11,7 +11,11 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include "src/dec/common_dec.h"
#include "src/dec/vp8_dec.h"
#include "src/dec/vp8i_dec.h"
#include "src/utils/bit_reader_utils.h"
#include "src/webp/types.h"
static WEBP_INLINE int clip(int v, int M) {
return v < 0 ? 0 : v > M ? M : v;
@@ -60,7 +64,7 @@ static const uint16_t kAcTable[128] = {
// Paragraph 9.6
void VP8ParseQuant(VP8Decoder* const dec) {
VP8BitReader* const br = &dec->br_;
VP8BitReader* const br = &dec->br;
const int base_q0 = VP8GetValue(br, 7, "global-header");
const int dqy1_dc = VP8Get(br, "global-header") ?
VP8GetSignedValue(br, 4, "global-header") : 0;
@@ -73,43 +77,42 @@ void VP8ParseQuant(VP8Decoder* const dec) {
const int dquv_ac = VP8Get(br, "global-header") ?
VP8GetSignedValue(br, 4, "global-header") : 0;
const VP8SegmentHeader* const hdr = &dec->segment_hdr_;
const VP8SegmentHeader* const hdr = &dec->segment_hdr;
int i;
for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
int q;
if (hdr->use_segment_) {
q = hdr->quantizer_[i];
if (!hdr->absolute_delta_) {
if (hdr->use_segment) {
q = hdr->quantizer[i];
if (!hdr->absolute_delta) {
q += base_q0;
}
} else {
if (i > 0) {
dec->dqm_[i] = dec->dqm_[0];
dec->dqm[i] = dec->dqm[0];
continue;
} else {
q = base_q0;
}
}
{
VP8QuantMatrix* const m = &dec->dqm_[i];
m->y1_mat_[0] = kDcTable[clip(q + dqy1_dc, 127)];
m->y1_mat_[1] = kAcTable[clip(q + 0, 127)];
VP8QuantMatrix* const m = &dec->dqm[i];
m->y1_mat[0] = kDcTable[clip(q + dqy1_dc, 127)];
m->y1_mat[1] = kAcTable[clip(q + 0, 127)];
m->y2_mat_[0] = kDcTable[clip(q + dqy2_dc, 127)] * 2;
m->y2_mat[0] = kDcTable[clip(q + dqy2_dc, 127)] * 2;
// For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16.
// The smallest precision for that is '(x*6349) >> 12' but 16 is a good
// word size.
m->y2_mat_[1] = (kAcTable[clip(q + dqy2_ac, 127)] * 101581) >> 16;
if (m->y2_mat_[1] < 8) m->y2_mat_[1] = 8;
m->y2_mat[1] = (kAcTable[clip(q + dqy2_ac, 127)] * 101581) >> 16;
if (m->y2_mat[1] < 8) m->y2_mat[1] = 8;
m->uv_mat_[0] = kDcTable[clip(q + dquv_dc, 117)];
m->uv_mat_[1] = kAcTable[clip(q + dquv_ac, 127)];
m->uv_mat[0] = kDcTable[clip(q + dquv_dc, 117)];
m->uv_mat[1] = kAcTable[clip(q + dquv_ac, 127)];
m->uv_quant_ = q + dquv_ac; // for dithering strength evaluation
m->uv_quant = q + dquv_ac; // for dithering strength evaluation
}
}
}
//------------------------------------------------------------------------------

View File

@@ -11,9 +11,15 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include <string.h>
#include "src/dec/common_dec.h"
#include "src/webp/types.h"
#include "src/dec/vp8_dec.h"
#include "src/dec/vp8i_dec.h"
#include "src/dsp/cpu.h"
#include "src/utils/bit_reader_inl_utils.h"
#include "src/utils/bit_reader_utils.h"
#if !defined(USE_GENERIC_TREE)
#if !defined(__arm__) && !defined(_M_ARM) && !WEBP_AARCH64 && \
@@ -284,40 +290,40 @@ static const uint8_t kBModesProba[NUM_BMODES][NUM_BMODES][NUM_BMODES - 1] = {
};
void VP8ResetProba(VP8Proba* const proba) {
memset(proba->segments_, 255u, sizeof(proba->segments_));
// proba->bands_[][] is initialized later
memset(proba->segments, 255u, sizeof(proba->segments));
// proba->bands[][] is initialized later
}
static void ParseIntraMode(VP8BitReader* const br,
VP8Decoder* const dec, int mb_x) {
uint8_t* const top = dec->intra_t_ + 4 * mb_x;
uint8_t* const left = dec->intra_l_;
VP8MBData* const block = dec->mb_data_ + mb_x;
uint8_t* const top = dec->intra_t + 4 * mb_x;
uint8_t* const left = dec->intra_l;
VP8MBData* const block = dec->mb_data + mb_x;
// Note: we don't save segment map (yet), as we don't expect
// to decode more than 1 keyframe.
if (dec->segment_hdr_.update_map_) {
if (dec->segment_hdr.update_map) {
// Hardcoded tree parsing
block->segment_ = !VP8GetBit(br, dec->proba_.segments_[0], "segments")
? VP8GetBit(br, dec->proba_.segments_[1], "segments")
: VP8GetBit(br, dec->proba_.segments_[2], "segments") + 2;
block->segment = !VP8GetBit(br, dec->proba.segments[0], "segments")
? VP8GetBit(br, dec->proba.segments[1], "segments")
: VP8GetBit(br, dec->proba.segments[2], "segments") + 2;
} else {
block->segment_ = 0; // default for intra
block->segment = 0; // default for intra
}
if (dec->use_skip_proba_) block->skip_ = VP8GetBit(br, dec->skip_p_, "skip");
if (dec->use_skip_proba) block->skip = VP8GetBit(br, dec->skip_p, "skip");
block->is_i4x4_ = !VP8GetBit(br, 145, "block-size");
if (!block->is_i4x4_) {
block->is_i4x4 = !VP8GetBit(br, 145, "block-size");
if (!block->is_i4x4) {
// Hardcoded 16x16 intra-mode decision tree.
const int ymode =
VP8GetBit(br, 156, "pred-modes") ?
(VP8GetBit(br, 128, "pred-modes") ? TM_PRED : H_PRED) :
(VP8GetBit(br, 163, "pred-modes") ? V_PRED : DC_PRED);
block->imodes_[0] = ymode;
block->imodes[0] = ymode;
memset(top, ymode, 4 * sizeof(*top));
memset(left, ymode, 4 * sizeof(*left));
} else {
uint8_t* modes = block->imodes_;
uint8_t* modes = block->imodes;
int y;
for (y = 0; y < 4; ++y) {
int ymode = left[y];
@@ -354,17 +360,17 @@ static void ParseIntraMode(VP8BitReader* const br,
}
}
// Hardcoded UVMode decision tree
block->uvmode_ = !VP8GetBit(br, 142, "pred-modes-uv") ? DC_PRED
: !VP8GetBit(br, 114, "pred-modes-uv") ? V_PRED
: VP8GetBit(br, 183, "pred-modes-uv") ? TM_PRED : H_PRED;
block->uvmode = !VP8GetBit(br, 142, "pred-modes-uv") ? DC_PRED
: !VP8GetBit(br, 114, "pred-modes-uv") ? V_PRED
: VP8GetBit(br, 183, "pred-modes-uv") ? TM_PRED : H_PRED;
}
int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec) {
int mb_x;
for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) {
for (mb_x = 0; mb_x < dec->mb_w; ++mb_x) {
ParseIntraMode(br, dec, mb_x);
}
return !dec->br_.eof_;
return !dec->br.eof;
}
//------------------------------------------------------------------------------
@@ -514,7 +520,7 @@ static const uint8_t kBands[16 + 1] = {
};
void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec) {
VP8Proba* const proba = &dec->proba_;
VP8Proba* const proba = &dec->proba;
int t, b, c, p;
for (t = 0; t < NUM_TYPES; ++t) {
for (b = 0; b < NUM_BANDS; ++b) {
@@ -524,16 +530,16 @@ void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec) {
VP8GetBit(br, CoeffsUpdateProba[t][b][c][p], "global-header") ?
VP8GetValue(br, 8, "global-header") :
CoeffsProba0[t][b][c][p];
proba->bands_[t][b].probas_[c][p] = v;
proba->bands[t][b].probas[c][p] = v;
}
}
}
for (b = 0; b < 16 + 1; ++b) {
proba->bands_ptr_[t][b] = &proba->bands_[t][kBands[b]];
proba->bands_ptr[t][b] = &proba->bands[t][kBands[b]];
}
}
dec->use_skip_proba_ = VP8Get(br, "global-header");
if (dec->use_skip_proba_) {
dec->skip_p_ = VP8GetValue(br, 8, "global-header");
dec->use_skip_proba = VP8Get(br, "global-header");
if (dec->use_skip_proba) {
dec->skip_p = VP8GetValue(br, 8, "global-header");
}
}

View File

@@ -11,14 +11,25 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "src/dec/alphai_dec.h"
#include "src/dec/common_dec.h"
#include "src/dec/vp8_dec.h"
#include "src/dec/vp8i_dec.h"
#include "src/dec/vp8li_dec.h"
#include "src/dec/webpi_dec.h"
#include "src/dsp/cpu.h"
#include "src/dsp/dsp.h"
#include "src/utils/bit_reader_inl_utils.h"
#include "src/utils/bit_reader_utils.h"
#include "src/utils/thread_utils.h"
#include "src/utils/utils.h"
#include "src/webp/decode.h"
#include "src/webp/format_constants.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
@@ -40,8 +51,8 @@ static void InitGetCoeffs(void);
// VP8Decoder
static void SetOk(VP8Decoder* const dec) {
dec->status_ = VP8_STATUS_OK;
dec->error_msg_ = "OK";
dec->status = VP8_STATUS_OK;
dec->error_msg = "OK";
}
int VP8InitIoInternal(VP8Io* const io, int version) {
@@ -58,9 +69,9 @@ VP8Decoder* VP8New(void) {
VP8Decoder* const dec = (VP8Decoder*)WebPSafeCalloc(1ULL, sizeof(*dec));
if (dec != NULL) {
SetOk(dec);
WebPGetWorkerInterface()->Init(&dec->worker_);
dec->ready_ = 0;
dec->num_parts_minus_one_ = 0;
WebPGetWorkerInterface()->Init(&dec->worker);
dec->ready = 0;
dec->num_parts_minus_one = 0;
InitGetCoeffs();
}
return dec;
@@ -68,13 +79,13 @@ VP8Decoder* VP8New(void) {
VP8StatusCode VP8Status(VP8Decoder* const dec) {
if (!dec) return VP8_STATUS_INVALID_PARAM;
return dec->status_;
return dec->status;
}
const char* VP8StatusMessage(VP8Decoder* const dec) {
if (dec == NULL) return "no object";
if (!dec->error_msg_) return "OK";
return dec->error_msg_;
if (!dec->error_msg) return "OK";
return dec->error_msg;
}
void VP8Delete(VP8Decoder* const dec) {
@@ -87,12 +98,12 @@ void VP8Delete(VP8Decoder* const dec) {
int VP8SetError(VP8Decoder* const dec,
VP8StatusCode error, const char* const msg) {
// VP8_STATUS_SUSPENDED is only meaningful in incremental decoding.
assert(dec->incremental_ || error != VP8_STATUS_SUSPENDED);
assert(dec->incremental || error != VP8_STATUS_SUSPENDED);
// The oldest error reported takes precedence over the new one.
if (dec->status_ == VP8_STATUS_OK) {
dec->status_ = error;
dec->error_msg_ = msg;
dec->ready_ = 0;
if (dec->status == VP8_STATUS_OK) {
dec->status = error;
dec->error_msg = msg;
dec->ready = 0;
}
return 0;
}
@@ -151,11 +162,11 @@ int VP8GetInfo(const uint8_t* data, size_t data_size, size_t chunk_size,
static void ResetSegmentHeader(VP8SegmentHeader* const hdr) {
assert(hdr != NULL);
hdr->use_segment_ = 0;
hdr->update_map_ = 0;
hdr->absolute_delta_ = 1;
memset(hdr->quantizer_, 0, sizeof(hdr->quantizer_));
memset(hdr->filter_strength_, 0, sizeof(hdr->filter_strength_));
hdr->use_segment = 0;
hdr->update_map = 0;
hdr->absolute_delta = 1;
memset(hdr->quantizer, 0, sizeof(hdr->quantizer));
memset(hdr->filter_strength, 0, sizeof(hdr->filter_strength));
}
// Paragraph 9.3
@@ -163,32 +174,32 @@ static int ParseSegmentHeader(VP8BitReader* br,
VP8SegmentHeader* hdr, VP8Proba* proba) {
assert(br != NULL);
assert(hdr != NULL);
hdr->use_segment_ = VP8Get(br, "global-header");
if (hdr->use_segment_) {
hdr->update_map_ = VP8Get(br, "global-header");
hdr->use_segment = VP8Get(br, "global-header");
if (hdr->use_segment) {
hdr->update_map = VP8Get(br, "global-header");
if (VP8Get(br, "global-header")) { // update data
int s;
hdr->absolute_delta_ = VP8Get(br, "global-header");
hdr->absolute_delta = VP8Get(br, "global-header");
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
hdr->quantizer_[s] = VP8Get(br, "global-header") ?
hdr->quantizer[s] = VP8Get(br, "global-header") ?
VP8GetSignedValue(br, 7, "global-header") : 0;
}
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
hdr->filter_strength_[s] = VP8Get(br, "global-header") ?
hdr->filter_strength[s] = VP8Get(br, "global-header") ?
VP8GetSignedValue(br, 6, "global-header") : 0;
}
}
if (hdr->update_map_) {
if (hdr->update_map) {
int s;
for (s = 0; s < MB_FEATURE_TREE_PROBS; ++s) {
proba->segments_[s] = VP8Get(br, "global-header") ?
proba->segments[s] = VP8Get(br, "global-header") ?
VP8GetValue(br, 8, "global-header") : 255u;
}
}
} else {
hdr->update_map_ = 0;
hdr->update_map = 0;
}
return !br->eof_;
return !br->eof;
}
// Paragraph 9.5
@@ -202,7 +213,7 @@ static int ParseSegmentHeader(VP8BitReader* br,
// If the partitions were positioned ok, VP8_STATUS_OK is returned.
static VP8StatusCode ParsePartitions(VP8Decoder* const dec,
const uint8_t* buf, size_t size) {
VP8BitReader* const br = &dec->br_;
VP8BitReader* const br = &dec->br;
const uint8_t* sz = buf;
const uint8_t* buf_end = buf + size;
const uint8_t* part_start;
@@ -210,8 +221,8 @@ static VP8StatusCode ParsePartitions(VP8Decoder* const dec,
size_t last_part;
size_t p;
dec->num_parts_minus_one_ = (1 << VP8GetValue(br, 2, "global-header")) - 1;
last_part = dec->num_parts_minus_one_;
dec->num_parts_minus_one = (1 << VP8GetValue(br, 2, "global-header")) - 1;
last_part = dec->num_parts_minus_one;
if (size < 3 * last_part) {
// we can't even read the sizes with sz[]! That's a failure.
return VP8_STATUS_NOT_ENOUGH_DATA;
@@ -221,42 +232,42 @@ static VP8StatusCode ParsePartitions(VP8Decoder* const dec,
for (p = 0; p < last_part; ++p) {
size_t psize = sz[0] | (sz[1] << 8) | (sz[2] << 16);
if (psize > size_left) psize = size_left;
VP8InitBitReader(dec->parts_ + p, part_start, psize);
VP8InitBitReader(dec->parts + p, part_start, psize);
part_start += psize;
size_left -= psize;
sz += 3;
}
VP8InitBitReader(dec->parts_ + last_part, part_start, size_left);
VP8InitBitReader(dec->parts + last_part, part_start, size_left);
if (part_start < buf_end) return VP8_STATUS_OK;
return dec->incremental_
return dec->incremental
? VP8_STATUS_SUSPENDED // Init is ok, but there's not enough data
: VP8_STATUS_NOT_ENOUGH_DATA;
}
// Paragraph 9.4
static int ParseFilterHeader(VP8BitReader* br, VP8Decoder* const dec) {
VP8FilterHeader* const hdr = &dec->filter_hdr_;
hdr->simple_ = VP8Get(br, "global-header");
hdr->level_ = VP8GetValue(br, 6, "global-header");
hdr->sharpness_ = VP8GetValue(br, 3, "global-header");
hdr->use_lf_delta_ = VP8Get(br, "global-header");
if (hdr->use_lf_delta_) {
VP8FilterHeader* const hdr = &dec->filter_hdr;
hdr->simple = VP8Get(br, "global-header");
hdr->level = VP8GetValue(br, 6, "global-header");
hdr->sharpness = VP8GetValue(br, 3, "global-header");
hdr->use_lf_delta = VP8Get(br, "global-header");
if (hdr->use_lf_delta) {
if (VP8Get(br, "global-header")) { // update lf-delta?
int i;
for (i = 0; i < NUM_REF_LF_DELTAS; ++i) {
if (VP8Get(br, "global-header")) {
hdr->ref_lf_delta_[i] = VP8GetSignedValue(br, 6, "global-header");
hdr->ref_lf_delta[i] = VP8GetSignedValue(br, 6, "global-header");
}
}
for (i = 0; i < NUM_MODE_LF_DELTAS; ++i) {
if (VP8Get(br, "global-header")) {
hdr->mode_lf_delta_[i] = VP8GetSignedValue(br, 6, "global-header");
hdr->mode_lf_delta[i] = VP8GetSignedValue(br, 6, "global-header");
}
}
}
}
dec->filter_type_ = (hdr->level_ == 0) ? 0 : hdr->simple_ ? 1 : 2;
return !br->eof_;
dec->filter_type = (hdr->level == 0) ? 0 : hdr->simple ? 1 : 2;
return !br->eof;
}
// Topmost call
@@ -286,16 +297,16 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
// Paragraph 9.1
{
const uint32_t bits = buf[0] | (buf[1] << 8) | (buf[2] << 16);
frm_hdr = &dec->frm_hdr_;
frm_hdr->key_frame_ = !(bits & 1);
frm_hdr->profile_ = (bits >> 1) & 7;
frm_hdr->show_ = (bits >> 4) & 1;
frm_hdr->partition_length_ = (bits >> 5);
if (frm_hdr->profile_ > 3) {
frm_hdr = &dec->frm_hdr;
frm_hdr->key_frame = !(bits & 1);
frm_hdr->profile = (bits >> 1) & 7;
frm_hdr->show = (bits >> 4) & 1;
frm_hdr->partition_length = (bits >> 5);
if (frm_hdr->profile > 3) {
return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
"Incorrect keyframe parameters.");
}
if (!frm_hdr->show_) {
if (!frm_hdr->show) {
return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE,
"Frame not displayable.");
}
@@ -303,8 +314,8 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
buf_size -= 3;
}
pic_hdr = &dec->pic_hdr_;
if (frm_hdr->key_frame_) {
pic_hdr = &dec->pic_hdr;
if (frm_hdr->key_frame) {
// Paragraph 9.2
if (buf_size < 7) {
return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
@@ -314,20 +325,20 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
"Bad code word");
}
pic_hdr->width_ = ((buf[4] << 8) | buf[3]) & 0x3fff;
pic_hdr->xscale_ = buf[4] >> 6; // ratio: 1, 5/4 5/3 or 2
pic_hdr->height_ = ((buf[6] << 8) | buf[5]) & 0x3fff;
pic_hdr->yscale_ = buf[6] >> 6;
pic_hdr->width = ((buf[4] << 8) | buf[3]) & 0x3fff;
pic_hdr->xscale = buf[4] >> 6; // ratio: 1, 5/4 5/3 or 2
pic_hdr->height = ((buf[6] << 8) | buf[5]) & 0x3fff;
pic_hdr->yscale = buf[6] >> 6;
buf += 7;
buf_size -= 7;
dec->mb_w_ = (pic_hdr->width_ + 15) >> 4;
dec->mb_h_ = (pic_hdr->height_ + 15) >> 4;
dec->mb_w = (pic_hdr->width + 15) >> 4;
dec->mb_h = (pic_hdr->height + 15) >> 4;
// Setup default output area (can be later modified during io->setup())
io->width = pic_hdr->width_;
io->height = pic_hdr->height_;
// IMPORTANT! use some sane dimensions in crop_* and scaled_* fields.
io->width = pic_hdr->width;
io->height = pic_hdr->height;
// IMPORTANT! use some sane dimensions in crop* and scaled* fields.
// So they can be used interchangeably without always testing for
// 'use_cropping'.
io->use_cropping = 0;
@@ -342,27 +353,27 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
io->mb_w = io->width; // for soundness
io->mb_h = io->height; // ditto
VP8ResetProba(&dec->proba_);
ResetSegmentHeader(&dec->segment_hdr_);
VP8ResetProba(&dec->proba);
ResetSegmentHeader(&dec->segment_hdr);
}
// Check if we have all the partition #0 available, and initialize dec->br_
// Check if we have all the partition #0 available, and initialize dec->br
// to read this partition (and this partition only).
if (frm_hdr->partition_length_ > buf_size) {
if (frm_hdr->partition_length > buf_size) {
return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
"bad partition length");
}
br = &dec->br_;
VP8InitBitReader(br, buf, frm_hdr->partition_length_);
buf += frm_hdr->partition_length_;
buf_size -= frm_hdr->partition_length_;
br = &dec->br;
VP8InitBitReader(br, buf, frm_hdr->partition_length);
buf += frm_hdr->partition_length;
buf_size -= frm_hdr->partition_length;
if (frm_hdr->key_frame_) {
pic_hdr->colorspace_ = VP8Get(br, "global-header");
pic_hdr->clamp_type_ = VP8Get(br, "global-header");
if (frm_hdr->key_frame) {
pic_hdr->colorspace = VP8Get(br, "global-header");
pic_hdr->clamp_type = VP8Get(br, "global-header");
}
if (!ParseSegmentHeader(br, &dec->segment_hdr_, &dec->proba_)) {
if (!ParseSegmentHeader(br, &dec->segment_hdr, &dec->proba)) {
return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
"cannot parse segment header");
}
@@ -380,17 +391,17 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
VP8ParseQuant(dec);
// Frame buffer marking
if (!frm_hdr->key_frame_) {
if (!frm_hdr->key_frame) {
return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE,
"Not a key frame.");
}
VP8Get(br, "global-header"); // ignore the value of update_proba_
VP8Get(br, "global-header"); // ignore the value of 'update_proba'
VP8ParseProba(br, dec);
// sanitized state
dec->ready_ = 1;
dec->ready = 1;
return 1;
}
@@ -443,17 +454,17 @@ static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
static int GetCoeffsFast(VP8BitReader* const br,
const VP8BandProbas* const prob[],
int ctx, const quant_t dq, int n, int16_t* out) {
const uint8_t* p = prob[n]->probas_[ctx];
const uint8_t* p = prob[n]->probas[ctx];
for (; n < 16; ++n) {
if (!VP8GetBit(br, p[0], "coeffs")) {
return n; // previous coeff was last non-zero coeff
}
while (!VP8GetBit(br, p[1], "coeffs")) { // sequence of zero coeffs
p = prob[++n]->probas_[0];
p = prob[++n]->probas[0];
if (n == 16) return 16;
}
{ // non zero coeff
const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas_[0];
const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas[0];
int v;
if (!VP8GetBit(br, p[2], "coeffs")) {
v = 1;
@@ -473,17 +484,17 @@ static int GetCoeffsFast(VP8BitReader* const br,
static int GetCoeffsAlt(VP8BitReader* const br,
const VP8BandProbas* const prob[],
int ctx, const quant_t dq, int n, int16_t* out) {
const uint8_t* p = prob[n]->probas_[ctx];
const uint8_t* p = prob[n]->probas[ctx];
for (; n < 16; ++n) {
if (!VP8GetBitAlt(br, p[0], "coeffs")) {
return n; // previous coeff was last non-zero coeff
}
while (!VP8GetBitAlt(br, p[1], "coeffs")) { // sequence of zero coeffs
p = prob[++n]->probas_[0];
p = prob[++n]->probas[0];
if (n == 16) return 16;
}
{ // non zero coeff
const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas_[0];
const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas[0];
int v;
if (!VP8GetBitAlt(br, p[2], "coeffs")) {
v = 1;
@@ -516,12 +527,12 @@ static WEBP_INLINE uint32_t NzCodeBits(uint32_t nz_coeffs, int nz, int dc_nz) {
static int ParseResiduals(VP8Decoder* const dec,
VP8MB* const mb, VP8BitReader* const token_br) {
const VP8BandProbas* (* const bands)[16 + 1] = dec->proba_.bands_ptr_;
const VP8BandProbas* (* const bands)[16 + 1] = dec->proba.bands_ptr;
const VP8BandProbas* const * ac_proba;
VP8MBData* const block = dec->mb_data_ + dec->mb_x_;
const VP8QuantMatrix* const q = &dec->dqm_[block->segment_];
int16_t* dst = block->coeffs_;
VP8MB* const left_mb = dec->mb_info_ - 1;
VP8MBData* const block = dec->mb_data + dec->mb_x;
const VP8QuantMatrix* const q = &dec->dqm[block->segment];
int16_t* dst = block->coeffs;
VP8MB* const left_mb = dec->mb_info - 1;
uint8_t tnz, lnz;
uint32_t non_zero_y = 0;
uint32_t non_zero_uv = 0;
@@ -530,11 +541,11 @@ static int ParseResiduals(VP8Decoder* const dec,
int first;
memset(dst, 0, 384 * sizeof(*dst));
if (!block->is_i4x4_) { // parse DC
if (!block->is_i4x4) { // parse DC
int16_t dc[16] = { 0 };
const int ctx = mb->nz_dc_ + left_mb->nz_dc_;
const int nz = GetCoeffs(token_br, bands[1], ctx, q->y2_mat_, 0, dc);
mb->nz_dc_ = left_mb->nz_dc_ = (nz > 0);
const int ctx = mb->nz_dc + left_mb->nz_dc;
const int nz = GetCoeffs(token_br, bands[1], ctx, q->y2_mat, 0, dc);
mb->nz_dc = left_mb->nz_dc = (nz > 0);
if (nz > 1) { // more than just the DC -> perform the full transform
VP8TransformWHT(dc, dst);
} else { // only DC is non-zero -> inlined simplified transform
@@ -549,14 +560,14 @@ static int ParseResiduals(VP8Decoder* const dec,
ac_proba = bands[3];
}
tnz = mb->nz_ & 0x0f;
lnz = left_mb->nz_ & 0x0f;
tnz = mb->nz & 0x0f;
lnz = left_mb->nz & 0x0f;
for (y = 0; y < 4; ++y) {
int l = lnz & 1;
uint32_t nz_coeffs = 0;
for (x = 0; x < 4; ++x) {
const int ctx = l + (tnz & 1);
const int nz = GetCoeffs(token_br, ac_proba, ctx, q->y1_mat_, first, dst);
const int nz = GetCoeffs(token_br, ac_proba, ctx, q->y1_mat, first, dst);
l = (nz > first);
tnz = (tnz >> 1) | (l << 7);
nz_coeffs = NzCodeBits(nz_coeffs, nz, dst[0] != 0);
@@ -571,13 +582,13 @@ static int ParseResiduals(VP8Decoder* const dec,
for (ch = 0; ch < 4; ch += 2) {
uint32_t nz_coeffs = 0;
tnz = mb->nz_ >> (4 + ch);
lnz = left_mb->nz_ >> (4 + ch);
tnz = mb->nz >> (4 + ch);
lnz = left_mb->nz >> (4 + ch);
for (y = 0; y < 2; ++y) {
int l = lnz & 1;
for (x = 0; x < 2; ++x) {
const int ctx = l + (tnz & 1);
const int nz = GetCoeffs(token_br, bands[2], ctx, q->uv_mat_, 0, dst);
const int nz = GetCoeffs(token_br, bands[2], ctx, q->uv_mat, 0, dst);
l = (nz > 0);
tnz = (tnz >> 1) | (l << 3);
nz_coeffs = NzCodeBits(nz_coeffs, nz, dst[0] != 0);
@@ -591,16 +602,16 @@ static int ParseResiduals(VP8Decoder* const dec,
out_t_nz |= (tnz << 4) << ch;
out_l_nz |= (lnz & 0xf0) << ch;
}
mb->nz_ = out_t_nz;
left_mb->nz_ = out_l_nz;
mb->nz = out_t_nz;
left_mb->nz = out_l_nz;
block->non_zero_y_ = non_zero_y;
block->non_zero_uv_ = non_zero_uv;
block->non_zero_y = non_zero_y;
block->non_zero_uv = non_zero_uv;
// We look at the mode-code of each block and check if some blocks have less
// than three non-zero coeffs (code < 2). This is to avoid dithering flat and
// empty blocks.
block->dither_ = (non_zero_uv & 0xaaaa) ? 0 : q->dither_;
block->dither = (non_zero_uv & 0xaaaa) ? 0 : q->dither;
return !(non_zero_y | non_zero_uv); // will be used for further optimization
}
@@ -609,50 +620,50 @@ static int ParseResiduals(VP8Decoder* const dec,
// Main loop
int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) {
VP8MB* const left = dec->mb_info_ - 1;
VP8MB* const mb = dec->mb_info_ + dec->mb_x_;
VP8MBData* const block = dec->mb_data_ + dec->mb_x_;
int skip = dec->use_skip_proba_ ? block->skip_ : 0;
VP8MB* const left = dec->mb_info - 1;
VP8MB* const mb = dec->mb_info + dec->mb_x;
VP8MBData* const block = dec->mb_data + dec->mb_x;
int skip = dec->use_skip_proba ? block->skip : 0;
if (!skip) {
skip = ParseResiduals(dec, mb, token_br);
} else {
left->nz_ = mb->nz_ = 0;
if (!block->is_i4x4_) {
left->nz_dc_ = mb->nz_dc_ = 0;
left->nz = mb->nz = 0;
if (!block->is_i4x4) {
left->nz_dc = mb->nz_dc = 0;
}
block->non_zero_y_ = 0;
block->non_zero_uv_ = 0;
block->dither_ = 0;
block->non_zero_y = 0;
block->non_zero_uv = 0;
block->dither = 0;
}
if (dec->filter_type_ > 0) { // store filter info
VP8FInfo* const finfo = dec->f_info_ + dec->mb_x_;
*finfo = dec->fstrengths_[block->segment_][block->is_i4x4_];
finfo->f_inner_ |= !skip;
if (dec->filter_type > 0) { // store filter info
VP8FInfo* const finfo = dec->f_info + dec->mb_x;
*finfo = dec->fstrengths[block->segment][block->is_i4x4];
finfo->f_inner |= !skip;
}
return !token_br->eof_;
return !token_br->eof;
}
void VP8InitScanline(VP8Decoder* const dec) {
VP8MB* const left = dec->mb_info_ - 1;
left->nz_ = 0;
left->nz_dc_ = 0;
memset(dec->intra_l_, B_DC_PRED, sizeof(dec->intra_l_));
dec->mb_x_ = 0;
VP8MB* const left = dec->mb_info - 1;
left->nz = 0;
left->nz_dc = 0;
memset(dec->intra_l, B_DC_PRED, sizeof(dec->intra_l));
dec->mb_x = 0;
}
static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
for (dec->mb_y_ = 0; dec->mb_y_ < dec->br_mb_y_; ++dec->mb_y_) {
for (dec->mb_y = 0; dec->mb_y < dec->br_mb_y; ++dec->mb_y) {
// Parse bitstream for this row.
VP8BitReader* const token_br =
&dec->parts_[dec->mb_y_ & dec->num_parts_minus_one_];
if (!VP8ParseIntraModeRow(&dec->br_, dec)) {
&dec->parts[dec->mb_y & dec->num_parts_minus_one];
if (!VP8ParseIntraModeRow(&dec->br, dec)) {
return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
"Premature end-of-partition0 encountered.");
}
for (; dec->mb_x_ < dec->mb_w_; ++dec->mb_x_) {
for (; dec->mb_x < dec->mb_w; ++dec->mb_x) {
if (!VP8DecodeMB(dec, token_br)) {
return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
"Premature end-of-file encountered.");
@@ -665,8 +676,8 @@ static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
return VP8SetError(dec, VP8_STATUS_USER_ABORT, "Output aborted.");
}
}
if (dec->mt_method_ > 0) {
if (!WebPGetWorkerInterface()->Sync(&dec->worker_)) return 0;
if (dec->mt_method > 0) {
if (!WebPGetWorkerInterface()->Sync(&dec->worker)) return 0;
}
return 1;
@@ -683,12 +694,12 @@ int VP8Decode(VP8Decoder* const dec, VP8Io* const io) {
"NULL VP8Io parameter in VP8Decode().");
}
if (!dec->ready_) {
if (!dec->ready) {
if (!VP8GetHeaders(dec, io)) {
return 0;
}
}
assert(dec->ready_);
assert(dec->ready);
// Finish setting up the decoding parameter. Will call io->setup().
ok = (VP8EnterCritical(dec, io) == VP8_STATUS_OK);
@@ -708,7 +719,7 @@ int VP8Decode(VP8Decoder* const dec, VP8Io* const io) {
return 0;
}
dec->ready_ = 0;
dec->ready = 0;
return ok;
}
@@ -716,13 +727,13 @@ void VP8Clear(VP8Decoder* const dec) {
if (dec == NULL) {
return;
}
WebPGetWorkerInterface()->End(&dec->worker_);
WebPGetWorkerInterface()->End(&dec->worker);
WebPDeallocateAlphaMemory(dec);
WebPSafeFree(dec->mem_);
dec->mem_ = NULL;
dec->mem_size_ = 0;
memset(&dec->br_, 0, sizeof(dec->br_));
dec->ready_ = 0;
WebPSafeFree(dec->mem);
dec->mem = NULL;
dec->mem_size = 0;
memset(&dec->br, 0, sizeof(dec->br));
dec->ready = 0;
}
//------------------------------------------------------------------------------

View File

@@ -14,6 +14,8 @@
#ifndef WEBP_DEC_VP8_DEC_H_
#define WEBP_DEC_VP8_DEC_H_
#include <stddef.h>
#include "src/webp/decode.h"
#include "src/webp/types.h"

View File

@@ -15,12 +15,16 @@
#define WEBP_DEC_VP8I_DEC_H_
#include <string.h> // for memcpy()
#include "src/dec/common_dec.h"
#include "src/dec/vp8_dec.h"
#include "src/dec/vp8li_dec.h"
#include "src/dec/webpi_dec.h"
#include "src/dsp/dsp.h"
#include "src/utils/bit_reader_utils.h"
#include "src/utils/random_utils.h"
#include "src/utils/thread_utils.h"
#include "src/dsp/dsp.h"
#include "src/webp/decode.h"
#include "src/webp/types.h"
#ifdef __cplusplus
@@ -32,7 +36,7 @@ extern "C" {
// version numbers
#define DEC_MAJ_VERSION 1
#define DEC_MIN_VERSION 5
#define DEC_MIN_VERSION 6
#define DEC_REV_VERSION 0
// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
@@ -69,85 +73,85 @@ extern "C" {
// Headers
typedef struct {
uint8_t key_frame_;
uint8_t profile_;
uint8_t show_;
uint32_t partition_length_;
uint8_t key_frame;
uint8_t profile;
uint8_t show;
uint32_t partition_length;
} VP8FrameHeader;
typedef struct {
uint16_t width_;
uint16_t height_;
uint8_t xscale_;
uint8_t yscale_;
uint8_t colorspace_; // 0 = YCbCr
uint8_t clamp_type_;
uint16_t width;
uint16_t height;
uint8_t xscale;
uint8_t yscale;
uint8_t colorspace; // 0 = YCbCr
uint8_t clamp_type;
} VP8PictureHeader;
// segment features
typedef struct {
int use_segment_;
int update_map_; // whether to update the segment map or not
int absolute_delta_; // absolute or delta values for quantizer and filter
int8_t quantizer_[NUM_MB_SEGMENTS]; // quantization changes
int8_t filter_strength_[NUM_MB_SEGMENTS]; // filter strength for segments
int use_segment;
int update_map; // whether to update the segment map or not
int absolute_delta; // absolute or delta values for quantizer and filter
int8_t quantizer[NUM_MB_SEGMENTS]; // quantization changes
int8_t filter_strength[NUM_MB_SEGMENTS]; // filter strength for segments
} VP8SegmentHeader;
// probas associated to one of the contexts
typedef uint8_t VP8ProbaArray[NUM_PROBAS];
typedef struct { // all the probas associated to one band
VP8ProbaArray probas_[NUM_CTX];
VP8ProbaArray probas[NUM_CTX];
} VP8BandProbas;
// Struct collecting all frame-persistent probabilities.
typedef struct {
uint8_t segments_[MB_FEATURE_TREE_PROBS];
uint8_t segments[MB_FEATURE_TREE_PROBS];
// Type: 0:Intra16-AC 1:Intra16-DC 2:Chroma 3:Intra4
VP8BandProbas bands_[NUM_TYPES][NUM_BANDS];
const VP8BandProbas* bands_ptr_[NUM_TYPES][16 + 1];
VP8BandProbas bands[NUM_TYPES][NUM_BANDS];
const VP8BandProbas* bands_ptr[NUM_TYPES][16 + 1];
} VP8Proba;
// Filter parameters
typedef struct {
int simple_; // 0=complex, 1=simple
int level_; // [0..63]
int sharpness_; // [0..7]
int use_lf_delta_;
int ref_lf_delta_[NUM_REF_LF_DELTAS];
int mode_lf_delta_[NUM_MODE_LF_DELTAS];
int simple; // 0=complex, 1=simple
int level; // [0..63]
int sharpness; // [0..7]
int use_lf_delta;
int ref_lf_delta[NUM_REF_LF_DELTAS];
int mode_lf_delta[NUM_MODE_LF_DELTAS];
} VP8FilterHeader;
//------------------------------------------------------------------------------
// Informations about the macroblocks.
typedef struct { // filter specs
uint8_t f_limit_; // filter limit in [3..189], or 0 if no filtering
uint8_t f_ilevel_; // inner limit in [1..63]
uint8_t f_inner_; // do inner filtering?
uint8_t hev_thresh_; // high edge variance threshold in [0..2]
uint8_t f_limit; // filter limit in [3..189], or 0 if no filtering
uint8_t f_ilevel; // inner limit in [1..63]
uint8_t f_inner; // do inner filtering?
uint8_t hev_thresh; // high edge variance threshold in [0..2]
} VP8FInfo;
typedef struct { // Top/Left Contexts used for syntax-parsing
uint8_t nz_; // non-zero AC/DC coeffs (4bit for luma + 4bit for chroma)
uint8_t nz_dc_; // non-zero DC coeff (1bit)
uint8_t nz; // non-zero AC/DC coeffs (4bit for luma + 4bit for chroma)
uint8_t nz_dc; // non-zero DC coeff (1bit)
} VP8MB;
// Dequantization matrices
typedef int quant_t[2]; // [DC / AC]. Can be 'uint16_t[2]' too (~slower).
typedef struct {
quant_t y1_mat_, y2_mat_, uv_mat_;
quant_t y1_mat, y2_mat, uv_mat;
int uv_quant_; // U/V quantizer value
int dither_; // dithering amplitude (0 = off, max=255)
int uv_quant; // U/V quantizer value
int dither; // dithering amplitude (0 = off, max=255)
} VP8QuantMatrix;
// Data needed to reconstruct a macroblock
typedef struct {
int16_t coeffs_[384]; // 384 coeffs = (16+4+4) * 4*4
uint8_t is_i4x4_; // true if intra4x4
uint8_t imodes_[16]; // one 16x16 mode (#0) or sixteen 4x4 modes
uint8_t uvmode_; // chroma prediction mode
int16_t coeffs[384]; // 384 coeffs = (16+4+4) * 4*4
uint8_t is_i4x4; // true if intra4x4
uint8_t imodes[16]; // one 16x16 mode (#0) or sixteen 4x4 modes
uint8_t uvmode; // chroma prediction mode
// bit-wise info about the content of each sub-4x4 blocks (in decoding order).
// Each of the 4x4 blocks for y/u/v is associated with a 2b code according to:
// code=0 -> no coefficient
@@ -155,21 +159,21 @@ typedef struct {
// code=2 -> first three coefficients are non-zero
// code=3 -> more than three coefficients are non-zero
// This allows to call specialized transform functions.
uint32_t non_zero_y_;
uint32_t non_zero_uv_;
uint8_t dither_; // local dithering strength (deduced from non_zero_*)
uint8_t skip_;
uint8_t segment_;
uint32_t non_zero_y;
uint32_t non_zero_uv;
uint8_t dither; // local dithering strength (deduced from non_zero*)
uint8_t skip;
uint8_t segment;
} VP8MBData;
// Persistent information needed by the parallel processing
typedef struct {
int id_; // cache row to process (in [0..2])
int mb_y_; // macroblock position of the row
int filter_row_; // true if row-filtering is needed
VP8FInfo* f_info_; // filter strengths (swapped with dec->f_info_)
VP8MBData* mb_data_; // reconstruction data (swapped with dec->mb_data_)
VP8Io io_; // copy of the VP8Io to pass to put()
int id; // cache row to process (in [0..2])
int mb_y; // macroblock position of the row
int filter_row; // true if row-filtering is needed
VP8FInfo* f_info; // filter strengths (swapped with dec->f_info)
VP8MBData* mb_data; // reconstruction data (swapped with dec->mb_data)
VP8Io io; // copy of the VP8Io to pass to put()
} VP8ThreadContext;
// Saved top samples, per macroblock. Fits into a cache-line.
@@ -181,89 +185,89 @@ typedef struct {
// VP8Decoder: the main opaque structure handed over to user
struct VP8Decoder {
VP8StatusCode status_;
int ready_; // true if ready to decode a picture with VP8Decode()
const char* error_msg_; // set when status_ is not OK.
VP8StatusCode status;
int ready; // true if ready to decode a picture with VP8Decode()
const char* error_msg; // set when status is not OK.
// Main data source
VP8BitReader br_;
int incremental_; // if true, incremental decoding is expected
VP8BitReader br;
int incremental; // if true, incremental decoding is expected
// headers
VP8FrameHeader frm_hdr_;
VP8PictureHeader pic_hdr_;
VP8FilterHeader filter_hdr_;
VP8SegmentHeader segment_hdr_;
VP8FrameHeader frm_hdr;
VP8PictureHeader pic_hdr;
VP8FilterHeader filter_hdr;
VP8SegmentHeader segment_hdr;
// Worker
WebPWorker worker_;
int mt_method_; // multi-thread method: 0=off, 1=[parse+recon][filter]
// 2=[parse][recon+filter]
int cache_id_; // current cache row
int num_caches_; // number of cached rows of 16 pixels (1, 2 or 3)
VP8ThreadContext thread_ctx_; // Thread context
WebPWorker worker;
int mt_method; // multi-thread method: 0=off, 1=[parse+recon][filter]
// 2=[parse][recon+filter]
int cache_id; // current cache row
int num_caches; // number of cached rows of 16 pixels (1, 2 or 3)
VP8ThreadContext thread_ctx; // Thread context
// dimension, in macroblock units.
int mb_w_, mb_h_;
int mb_w, mb_h;
// Macroblock to process/filter, depending on cropping and filter_type.
int tl_mb_x_, tl_mb_y_; // top-left MB that must be in-loop filtered
int br_mb_x_, br_mb_y_; // last bottom-right MB that must be decoded
int tl_mb_x, tl_mb_y; // top-left MB that must be in-loop filtered
int br_mb_x, br_mb_y; // last bottom-right MB that must be decoded
// number of partitions minus one.
uint32_t num_parts_minus_one_;
uint32_t num_parts_minus_one;
// per-partition boolean decoders.
VP8BitReader parts_[MAX_NUM_PARTITIONS];
VP8BitReader parts[MAX_NUM_PARTITIONS];
// Dithering strength, deduced from decoding options
int dither_; // whether to use dithering or not
VP8Random dithering_rg_; // random generator for dithering
int dither; // whether to use dithering or not
VP8Random dithering_rg; // random generator for dithering
// dequantization (one set of DC/AC dequant factor per segment)
VP8QuantMatrix dqm_[NUM_MB_SEGMENTS];
VP8QuantMatrix dqm[NUM_MB_SEGMENTS];
// probabilities
VP8Proba proba_;
int use_skip_proba_;
uint8_t skip_p_;
VP8Proba proba;
int use_skip_proba;
uint8_t skip_p;
// Boundary data cache and persistent buffers.
uint8_t* intra_t_; // top intra modes values: 4 * mb_w_
uint8_t intra_l_[4]; // left intra modes values
uint8_t* intra_t; // top intra modes values: 4 * mb_w
uint8_t intra_l[4]; // left intra modes values
VP8TopSamples* yuv_t_; // top y/u/v samples
VP8TopSamples* yuv_t; // top y/u/v samples
VP8MB* mb_info_; // contextual macroblock info (mb_w_ + 1)
VP8FInfo* f_info_; // filter strength info
uint8_t* yuv_b_; // main block for Y/U/V (size = YUV_SIZE)
VP8MB* mb_info; // contextual macroblock info (mb_w + 1)
VP8FInfo* f_info; // filter strength info
uint8_t* yuv_b; // main block for Y/U/V (size = YUV_SIZE)
uint8_t* cache_y_; // macroblock row for storing unfiltered samples
uint8_t* cache_u_;
uint8_t* cache_v_;
int cache_y_stride_;
int cache_uv_stride_;
uint8_t* cache_y; // macroblock row for storing unfiltered samples
uint8_t* cache_u;
uint8_t* cache_v;
int cache_y_stride;
int cache_uv_stride;
// main memory chunk for the above data. Persistent.
void* mem_;
size_t mem_size_;
void* mem;
size_t mem_size;
// Per macroblock non-persistent infos.
int mb_x_, mb_y_; // current position, in macroblock units
VP8MBData* mb_data_; // parsed reconstruction data
int mb_x, mb_y; // current position, in macroblock units
VP8MBData* mb_data; // parsed reconstruction data
// Filtering side-info
int filter_type_; // 0=off, 1=simple, 2=complex
VP8FInfo fstrengths_[NUM_MB_SEGMENTS][2]; // precalculated per-segment/type
int filter_type; // 0=off, 1=simple, 2=complex
VP8FInfo fstrengths[NUM_MB_SEGMENTS][2]; // precalculated per-segment/type
// Alpha
struct ALPHDecoder* alph_dec_; // alpha-plane decoder object
const uint8_t* alpha_data_; // compressed alpha data (if present)
size_t alpha_data_size_;
int is_alpha_decoded_; // true if alpha_data_ is decoded in alpha_plane_
uint8_t* alpha_plane_mem_; // memory allocated for alpha_plane_
uint8_t* alpha_plane_; // output. Persistent, contains the whole data.
const uint8_t* alpha_prev_line_; // last decoded alpha row (or NULL)
int alpha_dithering_; // derived from decoding options (0=off, 100=full)
struct ALPHDecoder* alph_dec; // alpha-plane decoder object
const uint8_t* alpha_data; // compressed alpha data (if present)
size_t alpha_data_size;
int is_alpha_decoded; // true if alpha_data is decoded in alpha_plane
uint8_t* alpha_plane_mem; // memory allocated for alpha_plane
uint8_t* alpha_plane; // output. Persistent, contains the whole data.
const uint8_t* alpha_prev_line; // last decoded alpha row (or NULL)
int alpha_dithering; // derived from decoding options (0=off, 100=full)
};
//------------------------------------------------------------------------------

File diff suppressed because it is too large Load Diff

View File

@@ -16,10 +16,15 @@
#define WEBP_DEC_VP8LI_DEC_H_
#include <string.h> // for memcpy()
#include "src/dec/vp8_dec.h"
#include "src/dec/webpi_dec.h"
#include "src/utils/bit_reader_utils.h"
#include "src/utils/color_cache_utils.h"
#include "src/utils/huffman_utils.h"
#include "src/utils/rescaler_utils.h"
#include "src/webp/decode.h"
#include "src/webp/format_constants.h"
#include "src/webp/types.h"
#ifdef __cplusplus
@@ -34,58 +39,58 @@ typedef enum {
typedef struct VP8LTransform VP8LTransform;
struct VP8LTransform {
VP8LImageTransformType type_; // transform type.
int bits_; // subsampling bits defining transform window.
int xsize_; // transform window X index.
int ysize_; // transform window Y index.
uint32_t* data_; // transform data.
VP8LImageTransformType type; // transform type.
int bits; // subsampling bits defining transform window.
int xsize; // transform window X index.
int ysize; // transform window Y index.
uint32_t* data; // transform data.
};
typedef struct {
int color_cache_size_;
VP8LColorCache color_cache_;
VP8LColorCache saved_color_cache_; // for incremental
int color_cache_size;
VP8LColorCache color_cache;
VP8LColorCache saved_color_cache; // for incremental
int huffman_mask_;
int huffman_subsample_bits_;
int huffman_xsize_;
uint32_t* huffman_image_;
int num_htree_groups_;
HTreeGroup* htree_groups_;
HuffmanTables huffman_tables_;
int huffman_mask;
int huffman_subsample_bits;
int huffman_xsize;
uint32_t* huffman_image;
int num_htree_groups;
HTreeGroup* htree_groups;
HuffmanTables huffman_tables;
} VP8LMetadata;
typedef struct VP8LDecoder VP8LDecoder;
struct VP8LDecoder {
VP8StatusCode status_;
VP8LDecodeState state_;
VP8Io* io_;
VP8StatusCode status;
VP8LDecodeState state;
VP8Io* io;
const WebPDecBuffer* output_; // shortcut to io->opaque->output
const WebPDecBuffer* output; // shortcut to io->opaque->output
uint32_t* pixels_; // Internal data: either uint8_t* for alpha
// or uint32_t* for BGRA.
uint32_t* argb_cache_; // Scratch buffer for temporary BGRA storage.
uint32_t* pixels; // Internal data: either uint8_t* for alpha
// or uint32_t* for BGRA.
uint32_t* argb_cache; // Scratch buffer for temporary BGRA storage.
VP8LBitReader br_;
int incremental_; // if true, incremental decoding is expected
VP8LBitReader saved_br_; // note: could be local variables too
int saved_last_pixel_;
VP8LBitReader br;
int incremental; // if true, incremental decoding is expected
VP8LBitReader saved_br; // note: could be local variables too
int saved_last_pixel;
int width_;
int height_;
int last_row_; // last input row decoded so far.
int last_pixel_; // last pixel decoded so far. However, it may
// not be transformed, scaled and
// color-converted yet.
int last_out_row_; // last row output so far.
int width;
int height;
int last_row; // last input row decoded so far.
int last_pixel; // last pixel decoded so far. However, it may
// not be transformed, scaled and
// color-converted yet.
int last_out_row; // last row output so far.
VP8LMetadata hdr_;
VP8LMetadata hdr;
int next_transform_;
VP8LTransform transforms_[NUM_TRANSFORMS];
int next_transform;
VP8LTransform transforms[NUM_TRANSFORMS];
// or'd bitset storing the transforms types.
uint32_t transforms_seen_;
uint32_t transforms_seen;
uint8_t* rescaler_memory; // Working memory for rescaling work.
WebPRescaler* rescaler; // Common rescaler for all channels.
@@ -118,7 +123,7 @@ WEBP_NODISCARD VP8LDecoder* VP8LNew(void);
WEBP_NODISCARD int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io);
// Decodes an image. It's required to decode the lossless header before calling
// this function. Returns false in case of error, with updated dec->status_.
// this function. Returns false in case of error, with updated dec->status.
WEBP_NODISCARD int VP8LDecodeImage(VP8LDecoder* const dec);
// Clears and deallocate a lossless decoder instance.

View File

@@ -11,15 +11,20 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "src/dec/common_dec.h"
#include "src/dec/vp8_dec.h"
#include "src/dec/vp8i_dec.h"
#include "src/dec/vp8li_dec.h"
#include "src/dec/webpi_dec.h"
#include "src/utils/rescaler_utils.h"
#include "src/utils/utils.h"
#include "src/webp/mux_types.h" // ALPHA_FLAG
#include "src/webp/decode.h"
#include "src/webp/format_constants.h"
#include "src/webp/mux_types.h" // ALPHA_FLAG
#include "src/webp/types.h"
//------------------------------------------------------------------------------
@@ -475,23 +480,23 @@ WEBP_NODISCARD static VP8StatusCode DecodeInto(const uint8_t* const data,
if (dec == NULL) {
return VP8_STATUS_OUT_OF_MEMORY;
}
dec->alpha_data_ = headers.alpha_data;
dec->alpha_data_size_ = headers.alpha_data_size;
dec->alpha_data = headers.alpha_data;
dec->alpha_data_size = headers.alpha_data_size;
// Decode bitstream header, update io->width/io->height.
if (!VP8GetHeaders(dec, &io)) {
status = dec->status_; // An error occurred. Grab error status.
status = dec->status; // An error occurred. Grab error status.
} else {
// Allocate/check output buffers.
status = WebPAllocateDecBuffer(io.width, io.height, params->options,
params->output);
if (status == VP8_STATUS_OK) { // Decode
// This change must be done before calling VP8Decode()
dec->mt_method_ = VP8GetThreadMethod(params->options, &headers,
io.width, io.height);
dec->mt_method = VP8GetThreadMethod(params->options, &headers,
io.width, io.height);
VP8InitDithering(params->options, dec);
if (!VP8Decode(dec, &io)) {
status = dec->status_;
status = dec->status;
}
}
}
@@ -502,14 +507,14 @@ WEBP_NODISCARD static VP8StatusCode DecodeInto(const uint8_t* const data,
return VP8_STATUS_OUT_OF_MEMORY;
}
if (!VP8LDecodeHeader(dec, &io)) {
status = dec->status_; // An error occurred. Grab error status.
status = dec->status; // An error occurred. Grab error status.
} else {
// Allocate/check output buffers.
status = WebPAllocateDecBuffer(io.width, io.height, params->options,
params->output);
if (status == VP8_STATUS_OK) { // Decode
if (!VP8LDecodeImage(dec)) {
status = dec->status_;
status = dec->status;
}
}
}
@@ -747,6 +752,61 @@ int WebPInitDecoderConfigInternal(WebPDecoderConfig* config,
return 1;
}
static int WebPCheckCropDimensionsBasic(int x, int y, int w, int h) {
return !(x < 0 || y < 0 || w <= 0 || h <= 0);
}
int WebPValidateDecoderConfig(const WebPDecoderConfig* config) {
const WebPDecoderOptions* options;
if (config == NULL) return 0;
if (!IsValidColorspace(config->output.colorspace)) {
return 0;
}
options = &config->options;
// bypass_filtering, no_fancy_upsampling, use_cropping, use_scaling,
// use_threads, flip can be any integer and are interpreted as boolean.
// Check for cropping.
if (options->use_cropping && !WebPCheckCropDimensionsBasic(
options->crop_left, options->crop_top,
options->crop_width, options->crop_height)) {
return 0;
}
// Check for scaling.
if (options->use_scaling &&
(options->scaled_width < 0 || options->scaled_height < 0 ||
(options->scaled_width == 0 && options->scaled_height == 0))) {
return 0;
}
// In case the WebPBitstreamFeatures has been filled in, check further.
if (config->input.width > 0 || config->input.height > 0) {
int scaled_width = options->scaled_width;
int scaled_height = options->scaled_height;
if (options->use_cropping &&
!WebPCheckCropDimensions(config->input.width, config->input.height,
options->crop_left, options->crop_top,
options->crop_width, options->crop_height)) {
return 0;
}
if (options->use_scaling && !WebPRescalerGetScaledDimensions(
config->input.width, config->input.height,
&scaled_width, &scaled_height)) {
return 0;
}
}
// Check for dithering.
if (options->dithering_strength < 0 || options->dithering_strength > 100 ||
options->alpha_dithering_strength < 0 ||
options->alpha_dithering_strength > 100) {
return 0;
}
return 1;
}
VP8StatusCode WebPGetFeaturesInternal(const uint8_t* data, size_t data_size,
WebPBitstreamFeatures* features,
int version) {
@@ -806,8 +866,8 @@ VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size,
int WebPCheckCropDimensions(int image_width, int image_height,
int x, int y, int w, int h) {
return !(x < 0 || y < 0 || w <= 0 || h <= 0 ||
x >= image_width || w > image_width || w > image_width - x ||
return WebPCheckCropDimensionsBasic(x, y, w, h) &&
!(x >= image_width || w > image_width || w > image_width - x ||
y >= image_height || h > image_height || h > image_height - y);
}

View File

@@ -18,9 +18,12 @@
extern "C" {
#endif
#include "src/utils/rescaler_utils.h"
#include <stddef.h>
#include "src/dec/vp8_dec.h"
#include "src/utils/rescaler_utils.h"
#include "src/webp/decode.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
// WebPDecParams: Decoding output parameters. Transient internal object.

View File

@@ -20,6 +20,8 @@
#include "src/utils/utils.h"
#include "src/webp/decode.h"
#include "src/webp/demux.h"
#include "src/webp/mux.h"
#include "src/webp/mux_types.h"
#include "src/webp/types.h"
#define NUM_CHANNELS 4
@@ -39,18 +41,18 @@ static void BlendPixelRowPremult(uint32_t* const src, const uint32_t* const dst,
int num_pixels);
struct WebPAnimDecoder {
WebPDemuxer* demux_; // Demuxer created from given WebP bitstream.
WebPDecoderConfig config_; // Decoder config.
WebPDemuxer* demux; // Demuxer created from given WebP bitstream.
WebPDecoderConfig config; // Decoder config.
// Note: we use a pointer to a function blending multiple pixels at a time to
// allow possible inlining of per-pixel blending function.
BlendRowFunc blend_func_; // Pointer to the chose blend row function.
WebPAnimInfo info_; // Global info about the animation.
uint8_t* curr_frame_; // Current canvas (not disposed).
uint8_t* prev_frame_disposed_; // Previous canvas (properly disposed).
int prev_frame_timestamp_; // Previous frame timestamp (milliseconds).
WebPIterator prev_iter_; // Iterator object for previous frame.
int prev_frame_was_keyframe_; // True if previous frame was a keyframe.
int next_frame_; // Index of the next frame to be decoded
BlendRowFunc blend_func; // Pointer to the chose blend row function.
WebPAnimInfo info; // Global info about the animation.
uint8_t* curr_frame; // Current canvas (not disposed).
uint8_t* prev_frame_disposed; // Previous canvas (properly disposed).
int prev_frame_timestamp; // Previous frame timestamp (milliseconds).
WebPIterator prev_iter; // Iterator object for previous frame.
int prev_frame_was_keyframe; // True if previous frame was a keyframe.
int next_frame; // Index of the next frame to be decoded
// (starting from 1).
};
@@ -73,7 +75,7 @@ WEBP_NODISCARD static int ApplyDecoderOptions(
const WebPAnimDecoderOptions* const dec_options,
WebPAnimDecoder* const dec) {
WEBP_CSP_MODE mode;
WebPDecoderConfig* config = &dec->config_;
WebPDecoderConfig* config = &dec->config;
assert(dec_options != NULL);
mode = dec_options->color_mode;
@@ -81,9 +83,9 @@ WEBP_NODISCARD static int ApplyDecoderOptions(
mode != MODE_rgbA && mode != MODE_bgrA) {
return 0;
}
dec->blend_func_ = (mode == MODE_RGBA || mode == MODE_BGRA)
? &BlendPixelRowNonPremult
: &BlendPixelRowPremult;
dec->blend_func = (mode == MODE_RGBA || mode == MODE_BGRA)
? &BlendPixelRowNonPremult
: &BlendPixelRowPremult;
if (!WebPInitDecoderConfig(config)) {
return 0;
}
@@ -123,22 +125,22 @@ WebPAnimDecoder* WebPAnimDecoderNewInternal(
}
if (!ApplyDecoderOptions(&options, dec)) goto Error;
dec->demux_ = WebPDemux(webp_data);
if (dec->demux_ == NULL) goto Error;
dec->demux = WebPDemux(webp_data);
if (dec->demux == NULL) goto Error;
dec->info_.canvas_width = WebPDemuxGetI(dec->demux_, WEBP_FF_CANVAS_WIDTH);
dec->info_.canvas_height = WebPDemuxGetI(dec->demux_, WEBP_FF_CANVAS_HEIGHT);
dec->info_.loop_count = WebPDemuxGetI(dec->demux_, WEBP_FF_LOOP_COUNT);
dec->info_.bgcolor = WebPDemuxGetI(dec->demux_, WEBP_FF_BACKGROUND_COLOR);
dec->info_.frame_count = WebPDemuxGetI(dec->demux_, WEBP_FF_FRAME_COUNT);
dec->info.canvas_width = WebPDemuxGetI(dec->demux, WEBP_FF_CANVAS_WIDTH);
dec->info.canvas_height = WebPDemuxGetI(dec->demux, WEBP_FF_CANVAS_HEIGHT);
dec->info.loop_count = WebPDemuxGetI(dec->demux, WEBP_FF_LOOP_COUNT);
dec->info.bgcolor = WebPDemuxGetI(dec->demux, WEBP_FF_BACKGROUND_COLOR);
dec->info.frame_count = WebPDemuxGetI(dec->demux, WEBP_FF_FRAME_COUNT);
// Note: calloc() because we fill frame with zeroes as well.
dec->curr_frame_ = (uint8_t*)WebPSafeCalloc(
dec->info_.canvas_width * NUM_CHANNELS, dec->info_.canvas_height);
if (dec->curr_frame_ == NULL) goto Error;
dec->prev_frame_disposed_ = (uint8_t*)WebPSafeCalloc(
dec->info_.canvas_width * NUM_CHANNELS, dec->info_.canvas_height);
if (dec->prev_frame_disposed_ == NULL) goto Error;
dec->curr_frame = (uint8_t*)WebPSafeCalloc(
dec->info.canvas_width * NUM_CHANNELS, dec->info.canvas_height);
if (dec->curr_frame == NULL) goto Error;
dec->prev_frame_disposed = (uint8_t*)WebPSafeCalloc(
dec->info.canvas_width * NUM_CHANNELS, dec->info.canvas_height);
if (dec->prev_frame_disposed == NULL) goto Error;
WebPAnimDecoderReset(dec);
return dec;
@@ -150,7 +152,7 @@ WebPAnimDecoder* WebPAnimDecoderNewInternal(
int WebPAnimDecoderGetInfo(const WebPAnimDecoder* dec, WebPAnimInfo* info) {
if (dec == NULL || info == NULL) return 0;
*info = dec->info_;
*info = dec->info;
return 1;
}
@@ -338,25 +340,25 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
if (dec == NULL || buf_ptr == NULL || timestamp_ptr == NULL) return 0;
if (!WebPAnimDecoderHasMoreFrames(dec)) return 0;
width = dec->info_.canvas_width;
height = dec->info_.canvas_height;
blend_row = dec->blend_func_;
width = dec->info.canvas_width;
height = dec->info.canvas_height;
blend_row = dec->blend_func;
// Get compressed frame.
if (!WebPDemuxGetFrame(dec->demux_, dec->next_frame_, &iter)) {
if (!WebPDemuxGetFrame(dec->demux, dec->next_frame, &iter)) {
return 0;
}
timestamp = dec->prev_frame_timestamp_ + iter.duration;
timestamp = dec->prev_frame_timestamp + iter.duration;
// Initialize.
is_key_frame = IsKeyFrame(&iter, &dec->prev_iter_,
dec->prev_frame_was_keyframe_, width, height);
is_key_frame = IsKeyFrame(&iter, &dec->prev_iter,
dec->prev_frame_was_keyframe, width, height);
if (is_key_frame) {
if (!ZeroFillCanvas(dec->curr_frame_, width, height)) {
if (!ZeroFillCanvas(dec->curr_frame, width, height)) {
goto Error;
}
} else {
if (!CopyCanvas(dec->prev_frame_disposed_, dec->curr_frame_,
if (!CopyCanvas(dec->prev_frame_disposed, dec->curr_frame,
width, height)) {
goto Error;
}
@@ -370,12 +372,12 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
const uint64_t out_offset = (uint64_t)iter.y_offset * stride +
(uint64_t)iter.x_offset * NUM_CHANNELS; // 53b
const uint64_t size = (uint64_t)iter.height * stride; // at most 25 + 27b
WebPDecoderConfig* const config = &dec->config_;
WebPDecoderConfig* const config = &dec->config;
WebPRGBABuffer* const buf = &config->output.u.RGBA;
if ((size_t)size != size) goto Error;
buf->stride = (int)stride;
buf->size = (size_t)size;
buf->rgba = dec->curr_frame_ + out_offset;
buf->rgba = dec->curr_frame + out_offset;
if (WebPDecode(in, in_size, config) != VP8_STATUS_OK) {
goto Error;
@@ -388,18 +390,18 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
// that pixel in the previous frame if blending method of is WEBP_MUX_BLEND.
if (iter.frame_num > 1 && iter.blend_method == WEBP_MUX_BLEND &&
!is_key_frame) {
if (dec->prev_iter_.dispose_method == WEBP_MUX_DISPOSE_NONE) {
if (dec->prev_iter.dispose_method == WEBP_MUX_DISPOSE_NONE) {
int y;
// Blend transparent pixels with pixels in previous canvas.
for (y = 0; y < iter.height; ++y) {
const size_t offset =
(iter.y_offset + y) * width + iter.x_offset;
blend_row((uint32_t*)dec->curr_frame_ + offset,
(uint32_t*)dec->prev_frame_disposed_ + offset, iter.width);
blend_row((uint32_t*)dec->curr_frame + offset,
(uint32_t*)dec->prev_frame_disposed + offset, iter.width);
}
} else {
int y;
assert(dec->prev_iter_.dispose_method == WEBP_MUX_DISPOSE_BACKGROUND);
assert(dec->prev_iter.dispose_method == WEBP_MUX_DISPOSE_BACKGROUND);
// We need to blend a transparent pixel with its value just after
// initialization. That is, blend it with:
// * Fully transparent pixel if it belongs to prevRect <-- No-op.
@@ -407,39 +409,39 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
for (y = 0; y < iter.height; ++y) {
const int canvas_y = iter.y_offset + y;
int left1, width1, left2, width2;
FindBlendRangeAtRow(&iter, &dec->prev_iter_, canvas_y, &left1, &width1,
FindBlendRangeAtRow(&iter, &dec->prev_iter, canvas_y, &left1, &width1,
&left2, &width2);
if (width1 > 0) {
const size_t offset1 = canvas_y * width + left1;
blend_row((uint32_t*)dec->curr_frame_ + offset1,
(uint32_t*)dec->prev_frame_disposed_ + offset1, width1);
blend_row((uint32_t*)dec->curr_frame + offset1,
(uint32_t*)dec->prev_frame_disposed + offset1, width1);
}
if (width2 > 0) {
const size_t offset2 = canvas_y * width + left2;
blend_row((uint32_t*)dec->curr_frame_ + offset2,
(uint32_t*)dec->prev_frame_disposed_ + offset2, width2);
blend_row((uint32_t*)dec->curr_frame + offset2,
(uint32_t*)dec->prev_frame_disposed + offset2, width2);
}
}
}
}
// Update info of the previous frame and dispose it for the next iteration.
dec->prev_frame_timestamp_ = timestamp;
WebPDemuxReleaseIterator(&dec->prev_iter_);
dec->prev_iter_ = iter;
dec->prev_frame_was_keyframe_ = is_key_frame;
if (!CopyCanvas(dec->curr_frame_, dec->prev_frame_disposed_, width, height)) {
dec->prev_frame_timestamp = timestamp;
WebPDemuxReleaseIterator(&dec->prev_iter);
dec->prev_iter = iter;
dec->prev_frame_was_keyframe = is_key_frame;
if (!CopyCanvas(dec->curr_frame, dec->prev_frame_disposed, width, height)) {
goto Error;
}
if (dec->prev_iter_.dispose_method == WEBP_MUX_DISPOSE_BACKGROUND) {
ZeroFillFrameRect(dec->prev_frame_disposed_, width * NUM_CHANNELS,
dec->prev_iter_.x_offset, dec->prev_iter_.y_offset,
dec->prev_iter_.width, dec->prev_iter_.height);
if (dec->prev_iter.dispose_method == WEBP_MUX_DISPOSE_BACKGROUND) {
ZeroFillFrameRect(dec->prev_frame_disposed, width * NUM_CHANNELS,
dec->prev_iter.x_offset, dec->prev_iter.y_offset,
dec->prev_iter.width, dec->prev_iter.height);
}
++dec->next_frame_;
++dec->next_frame;
// All OK, fill in the values.
*buf_ptr = dec->curr_frame_;
*buf_ptr = dec->curr_frame;
*timestamp_ptr = timestamp;
return 1;
@@ -450,30 +452,30 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
int WebPAnimDecoderHasMoreFrames(const WebPAnimDecoder* dec) {
if (dec == NULL) return 0;
return (dec->next_frame_ <= (int)dec->info_.frame_count);
return (dec->next_frame <= (int)dec->info.frame_count);
}
void WebPAnimDecoderReset(WebPAnimDecoder* dec) {
if (dec != NULL) {
dec->prev_frame_timestamp_ = 0;
WebPDemuxReleaseIterator(&dec->prev_iter_);
memset(&dec->prev_iter_, 0, sizeof(dec->prev_iter_));
dec->prev_frame_was_keyframe_ = 0;
dec->next_frame_ = 1;
dec->prev_frame_timestamp = 0;
WebPDemuxReleaseIterator(&dec->prev_iter);
memset(&dec->prev_iter, 0, sizeof(dec->prev_iter));
dec->prev_frame_was_keyframe = 0;
dec->next_frame = 1;
}
}
const WebPDemuxer* WebPAnimDecoderGetDemuxer(const WebPAnimDecoder* dec) {
if (dec == NULL) return NULL;
return dec->demux_;
return dec->demux;
}
void WebPAnimDecoderDelete(WebPAnimDecoder* dec) {
if (dec != NULL) {
WebPDemuxReleaseIterator(&dec->prev_iter_);
WebPDemuxDelete(dec->demux_);
WebPSafeFree(dec->curr_frame_);
WebPSafeFree(dec->prev_frame_disposed_);
WebPDemuxReleaseIterator(&dec->prev_iter);
WebPDemuxDelete(dec->demux);
WebPSafeFree(dec->curr_frame);
WebPSafeFree(dec->prev_frame_disposed);
WebPSafeFree(dec);
}
}

View File

@@ -22,55 +22,58 @@
#include "src/webp/decode.h" // WebPGetFeatures
#include "src/webp/demux.h"
#include "src/webp/format_constants.h"
#include "src/webp/mux.h"
#include "src/webp/mux_types.h"
#include "src/webp/types.h"
#define DMUX_MAJ_VERSION 1
#define DMUX_MIN_VERSION 5
#define DMUX_MIN_VERSION 6
#define DMUX_REV_VERSION 0
typedef struct {
size_t start_; // start location of the data
size_t end_; // end location
size_t riff_end_; // riff chunk end location, can be > end_.
size_t buf_size_; // size of the buffer
const uint8_t* buf_;
size_t start; // start location of the data
size_t end; // end location
size_t riff_end; // riff chunk end location, can be > end.
size_t buf_size; // size of the buffer
const uint8_t* buf;
} MemBuffer;
typedef struct {
size_t offset_;
size_t size_;
size_t offset;
size_t size;
} ChunkData;
typedef struct Frame {
int x_offset_, y_offset_;
int width_, height_;
int has_alpha_;
int duration_;
WebPMuxAnimDispose dispose_method_;
WebPMuxAnimBlend blend_method_;
int frame_num_;
int complete_; // img_components_ contains a full image.
ChunkData img_components_[2]; // 0=VP8{,L} 1=ALPH
struct Frame* next_;
int x_offset, y_offset;
int width, height;
int has_alpha;
int duration;
WebPMuxAnimDispose dispose_method;
WebPMuxAnimBlend blend_method;
int frame_num;
int complete; // img_components contains a full image.
ChunkData img_components[2]; // 0=VP8{,L} 1=ALPH
struct Frame* next;
} Frame;
typedef struct Chunk {
ChunkData data_;
struct Chunk* next_;
ChunkData data;
struct Chunk* next;
} Chunk;
struct WebPDemuxer {
MemBuffer mem_;
WebPDemuxState state_;
int is_ext_format_;
uint32_t feature_flags_;
int canvas_width_, canvas_height_;
int loop_count_;
uint32_t bgcolor_;
int num_frames_;
Frame* frames_;
Frame** frames_tail_;
Chunk* chunks_; // non-image chunks
Chunk** chunks_tail_;
MemBuffer mem;
WebPDemuxState state;
int is_ext_format;
uint32_t feature_flags;
int canvas_width, canvas_height;
int loop_count;
uint32_t bgcolor;
int num_frames;
Frame* frames;
Frame** frames_tail;
Chunk* chunks; // non-image chunks
Chunk** chunks_tail;
};
typedef enum {
@@ -108,10 +111,10 @@ int WebPGetDemuxVersion(void) {
static int RemapMemBuffer(MemBuffer* const mem,
const uint8_t* data, size_t size) {
if (size < mem->buf_size_) return 0; // can't remap to a shorter buffer!
if (size < mem->buf_size) return 0; // can't remap to a shorter buffer!
mem->buf_ = data;
mem->end_ = mem->buf_size_ = size;
mem->buf = data;
mem->end = mem->buf_size = size;
return 1;
}
@@ -123,49 +126,49 @@ static int InitMemBuffer(MemBuffer* const mem,
// Return the remaining data size available in 'mem'.
static WEBP_INLINE size_t MemDataSize(const MemBuffer* const mem) {
return (mem->end_ - mem->start_);
return (mem->end - mem->start);
}
// Return true if 'size' exceeds the end of the RIFF chunk.
static WEBP_INLINE int SizeIsInvalid(const MemBuffer* const mem, size_t size) {
return (size > mem->riff_end_ - mem->start_);
return (size > mem->riff_end - mem->start);
}
static WEBP_INLINE void Skip(MemBuffer* const mem, size_t size) {
mem->start_ += size;
mem->start += size;
}
static WEBP_INLINE void Rewind(MemBuffer* const mem, size_t size) {
mem->start_ -= size;
mem->start -= size;
}
static WEBP_INLINE const uint8_t* GetBuffer(MemBuffer* const mem) {
return mem->buf_ + mem->start_;
return mem->buf + mem->start;
}
// Read from 'mem' and skip the read bytes.
static WEBP_INLINE uint8_t ReadByte(MemBuffer* const mem) {
const uint8_t byte = mem->buf_[mem->start_];
const uint8_t byte = mem->buf[mem->start];
Skip(mem, 1);
return byte;
}
static WEBP_INLINE int ReadLE16s(MemBuffer* const mem) {
const uint8_t* const data = mem->buf_ + mem->start_;
const uint8_t* const data = mem->buf + mem->start;
const int val = GetLE16(data);
Skip(mem, 2);
return val;
}
static WEBP_INLINE int ReadLE24s(MemBuffer* const mem) {
const uint8_t* const data = mem->buf_ + mem->start_;
const uint8_t* const data = mem->buf + mem->start;
const int val = GetLE24(data);
Skip(mem, 3);
return val;
}
static WEBP_INLINE uint32_t ReadLE32(MemBuffer* const mem) {
const uint8_t* const data = mem->buf_ + mem->start_;
const uint8_t* const data = mem->buf + mem->start;
const uint32_t val = GetLE32(data);
Skip(mem, 4);
return val;
@@ -175,20 +178,20 @@ static WEBP_INLINE uint32_t ReadLE32(MemBuffer* const mem) {
// Secondary chunk parsing
static void AddChunk(WebPDemuxer* const dmux, Chunk* const chunk) {
*dmux->chunks_tail_ = chunk;
chunk->next_ = NULL;
dmux->chunks_tail_ = &chunk->next_;
*dmux->chunks_tail = chunk;
chunk->next = NULL;
dmux->chunks_tail = &chunk->next;
}
// Add a frame to the end of the list, ensuring the last frame is complete.
// Returns true on success, false otherwise.
static int AddFrame(WebPDemuxer* const dmux, Frame* const frame) {
const Frame* const last_frame = *dmux->frames_tail_;
if (last_frame != NULL && !last_frame->complete_) return 0;
const Frame* const last_frame = *dmux->frames_tail;
if (last_frame != NULL && !last_frame->complete) return 0;
*dmux->frames_tail_ = frame;
frame->next_ = NULL;
dmux->frames_tail_ = &frame->next_;
*dmux->frames_tail = frame;
frame->next = NULL;
dmux->frames_tail = &frame->next;
return 1;
}
@@ -196,13 +199,13 @@ static void SetFrameInfo(size_t start_offset, size_t size,
int frame_num, int complete,
const WebPBitstreamFeatures* const features,
Frame* const frame) {
frame->img_components_[0].offset_ = start_offset;
frame->img_components_[0].size_ = size;
frame->width_ = features->width;
frame->height_ = features->height;
frame->has_alpha_ |= features->has_alpha;
frame->frame_num_ = frame_num;
frame->complete_ = complete;
frame->img_components[0].offset = start_offset;
frame->img_components[0].size = size;
frame->width = features->width;
frame->height = features->height;
frame->has_alpha |= features->has_alpha;
frame->frame_num = frame_num;
frame->complete = complete;
}
// Store image bearing chunks to 'frame'. 'min_size' is an optional size
@@ -218,7 +221,7 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
if (done) return PARSE_NEED_MORE_DATA;
do {
const size_t chunk_start_offset = mem->start_;
const size_t chunk_start_offset = mem->start;
const uint32_t fourcc = ReadLE32(mem);
const uint32_t payload_size = ReadLE32(mem);
uint32_t payload_size_padded;
@@ -238,10 +241,10 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
case MKFOURCC('A', 'L', 'P', 'H'):
if (alpha_chunks == 0) {
++alpha_chunks;
frame->img_components_[1].offset_ = chunk_start_offset;
frame->img_components_[1].size_ = chunk_size;
frame->has_alpha_ = 1;
frame->frame_num_ = frame_num;
frame->img_components[1].offset = chunk_start_offset;
frame->img_components[1].size = chunk_size;
frame->has_alpha = 1;
frame->frame_num = frame_num;
Skip(mem, payload_available);
} else {
goto Done;
@@ -256,7 +259,7 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
// is incomplete.
WebPBitstreamFeatures features;
const VP8StatusCode vp8_status =
WebPGetFeatures(mem->buf_ + chunk_start_offset, chunk_size,
WebPGetFeatures(mem->buf + chunk_start_offset, chunk_size,
&features);
if (status == PARSE_NEED_MORE_DATA &&
vp8_status == VP8_STATUS_NOT_ENOUGH_DATA) {
@@ -281,7 +284,7 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
break;
}
if (mem->start_ == mem->riff_end_) {
if (mem->start == mem->riff_end) {
done = 1;
} else if (MemDataSize(mem) < CHUNK_HEADER_SIZE) {
status = PARSE_NEED_MORE_DATA;
@@ -310,42 +313,42 @@ static ParseStatus NewFrame(const MemBuffer* const mem,
// 'frame_chunk_size' is the previously validated, padded chunk size.
static ParseStatus ParseAnimationFrame(
WebPDemuxer* const dmux, uint32_t frame_chunk_size) {
const int is_animation = !!(dmux->feature_flags_ & ANIMATION_FLAG);
const int is_animation = !!(dmux->feature_flags & ANIMATION_FLAG);
const uint32_t anmf_payload_size = frame_chunk_size - ANMF_CHUNK_SIZE;
int added_frame = 0;
int bits;
MemBuffer* const mem = &dmux->mem_;
MemBuffer* const mem = &dmux->mem;
Frame* frame;
size_t start_offset;
ParseStatus status =
NewFrame(mem, ANMF_CHUNK_SIZE, frame_chunk_size, &frame);
if (status != PARSE_OK) return status;
frame->x_offset_ = 2 * ReadLE24s(mem);
frame->y_offset_ = 2 * ReadLE24s(mem);
frame->width_ = 1 + ReadLE24s(mem);
frame->height_ = 1 + ReadLE24s(mem);
frame->duration_ = ReadLE24s(mem);
frame->x_offset = 2 * ReadLE24s(mem);
frame->y_offset = 2 * ReadLE24s(mem);
frame->width = 1 + ReadLE24s(mem);
frame->height = 1 + ReadLE24s(mem);
frame->duration = ReadLE24s(mem);
bits = ReadByte(mem);
frame->dispose_method_ =
frame->dispose_method =
(bits & 1) ? WEBP_MUX_DISPOSE_BACKGROUND : WEBP_MUX_DISPOSE_NONE;
frame->blend_method_ = (bits & 2) ? WEBP_MUX_NO_BLEND : WEBP_MUX_BLEND;
if (frame->width_ * (uint64_t)frame->height_ >= MAX_IMAGE_AREA) {
frame->blend_method = (bits & 2) ? WEBP_MUX_NO_BLEND : WEBP_MUX_BLEND;
if (frame->width * (uint64_t)frame->height >= MAX_IMAGE_AREA) {
WebPSafeFree(frame);
return PARSE_ERROR;
}
// Store a frame only if the animation flag is set there is some data for
// this frame is available.
start_offset = mem->start_;
status = StoreFrame(dmux->num_frames_ + 1, anmf_payload_size, mem, frame);
if (status != PARSE_ERROR && mem->start_ - start_offset > anmf_payload_size) {
start_offset = mem->start;
status = StoreFrame(dmux->num_frames + 1, anmf_payload_size, mem, frame);
if (status != PARSE_ERROR && mem->start - start_offset > anmf_payload_size) {
status = PARSE_ERROR;
}
if (status != PARSE_ERROR && is_animation && frame->frame_num_ > 0) {
if (status != PARSE_ERROR && is_animation && frame->frame_num > 0) {
added_frame = AddFrame(dmux, frame);
if (added_frame) {
++dmux->num_frames_;
++dmux->num_frames;
} else {
status = PARSE_ERROR;
}
@@ -364,8 +367,8 @@ static int StoreChunk(WebPDemuxer* const dmux,
Chunk* const chunk = (Chunk*)WebPSafeCalloc(1ULL, sizeof(*chunk));
if (chunk == NULL) return 0;
chunk->data_.offset_ = start_offset;
chunk->data_.size_ = size;
chunk->data.offset = start_offset;
chunk->data.size = size;
AddChunk(dmux, chunk);
return 1;
}
@@ -389,9 +392,9 @@ static ParseStatus ReadHeader(MemBuffer* const mem) {
if (riff_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
// There's no point in reading past the end of the RIFF chunk
mem->riff_end_ = riff_size + CHUNK_HEADER_SIZE;
if (mem->buf_size_ > mem->riff_end_) {
mem->buf_size_ = mem->end_ = mem->riff_end_;
mem->riff_end = riff_size + CHUNK_HEADER_SIZE;
if (mem->buf_size > mem->riff_end) {
mem->buf_size = mem->end = mem->riff_end;
}
Skip(mem, RIFF_HEADER_SIZE);
@@ -400,12 +403,12 @@ static ParseStatus ReadHeader(MemBuffer* const mem) {
static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
const size_t min_size = CHUNK_HEADER_SIZE;
MemBuffer* const mem = &dmux->mem_;
MemBuffer* const mem = &dmux->mem;
Frame* frame;
ParseStatus status;
int image_added = 0;
if (dmux->frames_ != NULL) return PARSE_ERROR;
if (dmux->frames != NULL) return PARSE_ERROR;
if (SizeIsInvalid(mem, min_size)) return PARSE_ERROR;
if (MemDataSize(mem) < min_size) return PARSE_NEED_MORE_DATA;
@@ -414,29 +417,29 @@ static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
// For the single image case we allow parsing of a partial frame, so no
// minimum size is imposed here.
status = StoreFrame(1, 0, &dmux->mem_, frame);
status = StoreFrame(1, 0, &dmux->mem, frame);
if (status != PARSE_ERROR) {
const int has_alpha = !!(dmux->feature_flags_ & ALPHA_FLAG);
const int has_alpha = !!(dmux->feature_flags & ALPHA_FLAG);
// Clear any alpha when the alpha flag is missing.
if (!has_alpha && frame->img_components_[1].size_ > 0) {
frame->img_components_[1].offset_ = 0;
frame->img_components_[1].size_ = 0;
frame->has_alpha_ = 0;
if (!has_alpha && frame->img_components[1].size > 0) {
frame->img_components[1].offset = 0;
frame->img_components[1].size = 0;
frame->has_alpha = 0;
}
// Use the frame width/height as the canvas values for non-vp8x files.
// Also, set ALPHA_FLAG if this is a lossless image with alpha.
if (!dmux->is_ext_format_ && frame->width_ > 0 && frame->height_ > 0) {
dmux->state_ = WEBP_DEMUX_PARSED_HEADER;
dmux->canvas_width_ = frame->width_;
dmux->canvas_height_ = frame->height_;
dmux->feature_flags_ |= frame->has_alpha_ ? ALPHA_FLAG : 0;
if (!dmux->is_ext_format && frame->width > 0 && frame->height > 0) {
dmux->state = WEBP_DEMUX_PARSED_HEADER;
dmux->canvas_width = frame->width;
dmux->canvas_height = frame->height;
dmux->feature_flags |= frame->has_alpha ? ALPHA_FLAG : 0;
}
if (!AddFrame(dmux, frame)) {
status = PARSE_ERROR; // last frame was left incomplete
} else {
image_added = 1;
dmux->num_frames_ = 1;
dmux->num_frames = 1;
}
}
@@ -445,14 +448,14 @@ static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
}
static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) {
const int is_animation = !!(dmux->feature_flags_ & ANIMATION_FLAG);
MemBuffer* const mem = &dmux->mem_;
const int is_animation = !!(dmux->feature_flags & ANIMATION_FLAG);
MemBuffer* const mem = &dmux->mem;
int anim_chunks = 0;
ParseStatus status = PARSE_OK;
do {
int store_chunk = 1;
const size_t chunk_start_offset = mem->start_;
const size_t chunk_start_offset = mem->start;
const uint32_t fourcc = ReadLE32(mem);
const uint32_t chunk_size = ReadLE32(mem);
uint32_t chunk_size_padded;
@@ -483,8 +486,8 @@ static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) {
status = PARSE_NEED_MORE_DATA;
} else if (anim_chunks == 0) {
++anim_chunks;
dmux->bgcolor_ = ReadLE32(mem);
dmux->loop_count_ = ReadLE16s(mem);
dmux->bgcolor = ReadLE32(mem);
dmux->loop_count = ReadLE16s(mem);
Skip(mem, chunk_size_padded - ANIM_CHUNK_SIZE);
} else {
store_chunk = 0;
@@ -498,15 +501,15 @@ static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) {
break;
}
case MKFOURCC('I', 'C', 'C', 'P'): {
store_chunk = !!(dmux->feature_flags_ & ICCP_FLAG);
store_chunk = !!(dmux->feature_flags & ICCP_FLAG);
goto Skip;
}
case MKFOURCC('E', 'X', 'I', 'F'): {
store_chunk = !!(dmux->feature_flags_ & EXIF_FLAG);
store_chunk = !!(dmux->feature_flags & EXIF_FLAG);
goto Skip;
}
case MKFOURCC('X', 'M', 'P', ' '): {
store_chunk = !!(dmux->feature_flags_ & XMP_FLAG);
store_chunk = !!(dmux->feature_flags & XMP_FLAG);
goto Skip;
}
Skip:
@@ -527,7 +530,7 @@ static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) {
}
}
if (mem->start_ == mem->riff_end_) {
if (mem->start == mem->riff_end) {
break;
} else if (MemDataSize(mem) < CHUNK_HEADER_SIZE) {
status = PARSE_NEED_MORE_DATA;
@@ -538,12 +541,12 @@ static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) {
}
static ParseStatus ParseVP8X(WebPDemuxer* const dmux) {
MemBuffer* const mem = &dmux->mem_;
MemBuffer* const mem = &dmux->mem;
uint32_t vp8x_size;
if (MemDataSize(mem) < CHUNK_HEADER_SIZE) return PARSE_NEED_MORE_DATA;
dmux->is_ext_format_ = 1;
dmux->is_ext_format = 1;
Skip(mem, TAG_SIZE); // VP8X
vp8x_size = ReadLE32(mem);
if (vp8x_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
@@ -552,15 +555,15 @@ static ParseStatus ParseVP8X(WebPDemuxer* const dmux) {
if (SizeIsInvalid(mem, vp8x_size)) return PARSE_ERROR;
if (MemDataSize(mem) < vp8x_size) return PARSE_NEED_MORE_DATA;
dmux->feature_flags_ = ReadByte(mem);
dmux->feature_flags = ReadByte(mem);
Skip(mem, 3); // Reserved.
dmux->canvas_width_ = 1 + ReadLE24s(mem);
dmux->canvas_height_ = 1 + ReadLE24s(mem);
if (dmux->canvas_width_ * (uint64_t)dmux->canvas_height_ >= MAX_IMAGE_AREA) {
dmux->canvas_width = 1 + ReadLE24s(mem);
dmux->canvas_height = 1 + ReadLE24s(mem);
if (dmux->canvas_width * (uint64_t)dmux->canvas_height >= MAX_IMAGE_AREA) {
return PARSE_ERROR; // image final dimension is too large
}
Skip(mem, vp8x_size - VP8X_CHUNK_SIZE); // skip any trailing data.
dmux->state_ = WEBP_DEMUX_PARSED_HEADER;
dmux->state = WEBP_DEMUX_PARSED_HEADER;
if (SizeIsInvalid(mem, CHUNK_HEADER_SIZE)) return PARSE_ERROR;
if (MemDataSize(mem) < CHUNK_HEADER_SIZE) return PARSE_NEED_MORE_DATA;
@@ -572,13 +575,13 @@ static ParseStatus ParseVP8X(WebPDemuxer* const dmux) {
// Format validation
static int IsValidSimpleFormat(const WebPDemuxer* const dmux) {
const Frame* const frame = dmux->frames_;
if (dmux->state_ == WEBP_DEMUX_PARSING_HEADER) return 1;
const Frame* const frame = dmux->frames;
if (dmux->state == WEBP_DEMUX_PARSING_HEADER) return 1;
if (dmux->canvas_width_ <= 0 || dmux->canvas_height_ <= 0) return 0;
if (dmux->state_ == WEBP_DEMUX_DONE && frame == NULL) return 0;
if (dmux->canvas_width <= 0 || dmux->canvas_height <= 0) return 0;
if (dmux->state == WEBP_DEMUX_DONE && frame == NULL) return 0;
if (frame->width_ <= 0 || frame->height_ <= 0) return 0;
if (frame->width <= 0 || frame->height <= 0) return 0;
return 1;
}
@@ -587,65 +590,65 @@ static int IsValidSimpleFormat(const WebPDemuxer* const dmux) {
static int CheckFrameBounds(const Frame* const frame, int exact,
int canvas_width, int canvas_height) {
if (exact) {
if (frame->x_offset_ != 0 || frame->y_offset_ != 0) {
if (frame->x_offset != 0 || frame->y_offset != 0) {
return 0;
}
if (frame->width_ != canvas_width || frame->height_ != canvas_height) {
if (frame->width != canvas_width || frame->height != canvas_height) {
return 0;
}
} else {
if (frame->x_offset_ < 0 || frame->y_offset_ < 0) return 0;
if (frame->width_ + frame->x_offset_ > canvas_width) return 0;
if (frame->height_ + frame->y_offset_ > canvas_height) return 0;
if (frame->x_offset < 0 || frame->y_offset < 0) return 0;
if (frame->width + frame->x_offset > canvas_width) return 0;
if (frame->height + frame->y_offset > canvas_height) return 0;
}
return 1;
}
static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
const int is_animation = !!(dmux->feature_flags_ & ANIMATION_FLAG);
const Frame* f = dmux->frames_;
const int is_animation = !!(dmux->feature_flags & ANIMATION_FLAG);
const Frame* f = dmux->frames;
if (dmux->state_ == WEBP_DEMUX_PARSING_HEADER) return 1;
if (dmux->state == WEBP_DEMUX_PARSING_HEADER) return 1;
if (dmux->canvas_width_ <= 0 || dmux->canvas_height_ <= 0) return 0;
if (dmux->loop_count_ < 0) return 0;
if (dmux->state_ == WEBP_DEMUX_DONE && dmux->frames_ == NULL) return 0;
if (dmux->feature_flags_ & ~ALL_VALID_FLAGS) return 0; // invalid bitstream
if (dmux->canvas_width <= 0 || dmux->canvas_height <= 0) return 0;
if (dmux->loop_count < 0) return 0;
if (dmux->state == WEBP_DEMUX_DONE && dmux->frames == NULL) return 0;
if (dmux->feature_flags & ~ALL_VALID_FLAGS) return 0; // invalid bitstream
while (f != NULL) {
const int cur_frame_set = f->frame_num_;
const int cur_frame_set = f->frame_num;
// Check frame properties.
for (; f != NULL && f->frame_num_ == cur_frame_set; f = f->next_) {
const ChunkData* const image = f->img_components_;
const ChunkData* const alpha = f->img_components_ + 1;
for (; f != NULL && f->frame_num == cur_frame_set; f = f->next) {
const ChunkData* const image = f->img_components;
const ChunkData* const alpha = f->img_components + 1;
if (!is_animation && f->frame_num_ > 1) return 0;
if (!is_animation && f->frame_num > 1) return 0;
if (f->complete_) {
if (alpha->size_ == 0 && image->size_ == 0) return 0;
if (f->complete) {
if (alpha->size == 0 && image->size == 0) return 0;
// Ensure alpha precedes image bitstream.
if (alpha->size_ > 0 && alpha->offset_ > image->offset_) {
if (alpha->size > 0 && alpha->offset > image->offset) {
return 0;
}
if (f->width_ <= 0 || f->height_ <= 0) return 0;
if (f->width <= 0 || f->height <= 0) return 0;
} else {
// There shouldn't be a partial frame in a complete file.
if (dmux->state_ == WEBP_DEMUX_DONE) return 0;
if (dmux->state == WEBP_DEMUX_DONE) return 0;
// Ensure alpha precedes image bitstream.
if (alpha->size_ > 0 && image->size_ > 0 &&
alpha->offset_ > image->offset_) {
if (alpha->size > 0 && image->size > 0 &&
alpha->offset > image->offset) {
return 0;
}
// There shouldn't be any frames after an incomplete one.
if (f->next_ != NULL) return 0;
if (f->next != NULL) return 0;
}
if (f->width_ > 0 && f->height_ > 0 &&
if (f->width > 0 && f->height > 0 &&
!CheckFrameBounds(f, !is_animation,
dmux->canvas_width_, dmux->canvas_height_)) {
dmux->canvas_width, dmux->canvas_height)) {
return 0;
}
}
@@ -657,21 +660,21 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
// WebPDemuxer object
static void InitDemux(WebPDemuxer* const dmux, const MemBuffer* const mem) {
dmux->state_ = WEBP_DEMUX_PARSING_HEADER;
dmux->loop_count_ = 1;
dmux->bgcolor_ = 0xFFFFFFFF; // White background by default.
dmux->canvas_width_ = -1;
dmux->canvas_height_ = -1;
dmux->frames_tail_ = &dmux->frames_;
dmux->chunks_tail_ = &dmux->chunks_;
dmux->mem_ = *mem;
dmux->state = WEBP_DEMUX_PARSING_HEADER;
dmux->loop_count = 1;
dmux->bgcolor = 0xFFFFFFFF; // White background by default.
dmux->canvas_width = -1;
dmux->canvas_height = -1;
dmux->frames_tail = &dmux->frames;
dmux->chunks_tail = &dmux->chunks;
dmux->mem = *mem;
}
static ParseStatus CreateRawImageDemuxer(MemBuffer* const mem,
WebPDemuxer** demuxer) {
WebPBitstreamFeatures features;
const VP8StatusCode status =
WebPGetFeatures(mem->buf_, mem->buf_size_, &features);
WebPGetFeatures(mem->buf, mem->buf_size, &features);
*demuxer = NULL;
if (status != VP8_STATUS_OK) {
return (status == VP8_STATUS_NOT_ENOUGH_DATA) ? PARSE_NEED_MORE_DATA
@@ -683,14 +686,14 @@ static ParseStatus CreateRawImageDemuxer(MemBuffer* const mem,
Frame* const frame = (Frame*)WebPSafeCalloc(1ULL, sizeof(*frame));
if (dmux == NULL || frame == NULL) goto Error;
InitDemux(dmux, mem);
SetFrameInfo(0, mem->buf_size_, 1 /*frame_num*/, 1 /*complete*/, &features,
SetFrameInfo(0, mem->buf_size, 1 /*frame_num*/, 1 /*complete*/, &features,
frame);
if (!AddFrame(dmux, frame)) goto Error;
dmux->state_ = WEBP_DEMUX_DONE;
dmux->canvas_width_ = frame->width_;
dmux->canvas_height_ = frame->height_;
dmux->feature_flags_ |= frame->has_alpha_ ? ALPHA_FLAG : 0;
dmux->num_frames_ = 1;
dmux->state = WEBP_DEMUX_DONE;
dmux->canvas_width = frame->width;
dmux->canvas_height = frame->height;
dmux->feature_flags |= frame->has_alpha ? ALPHA_FLAG : 0;
dmux->num_frames = 1;
assert(IsValidSimpleFormat(dmux));
*demuxer = dmux;
return PARSE_OK;
@@ -734,7 +737,7 @@ WebPDemuxer* WebPDemuxInternal(const WebPData* data, int allow_partial,
return NULL;
}
partial = (mem.buf_size_ < mem.riff_end_);
partial = (mem.buf_size < mem.riff_end);
if (!allow_partial && partial) return NULL;
dmux = (WebPDemuxer*)WebPSafeCalloc(1ULL, sizeof(*dmux));
@@ -743,16 +746,16 @@ WebPDemuxer* WebPDemuxInternal(const WebPData* data, int allow_partial,
status = PARSE_ERROR;
for (parser = kMasterChunks; parser->parse != NULL; ++parser) {
if (!memcmp(parser->id, GetBuffer(&dmux->mem_), TAG_SIZE)) {
if (!memcmp(parser->id, GetBuffer(&dmux->mem), TAG_SIZE)) {
status = parser->parse(dmux);
if (status == PARSE_OK) dmux->state_ = WEBP_DEMUX_DONE;
if (status == PARSE_OK) dmux->state = WEBP_DEMUX_DONE;
if (status == PARSE_NEED_MORE_DATA && !partial) status = PARSE_ERROR;
if (status != PARSE_ERROR && !parser->valid(dmux)) status = PARSE_ERROR;
if (status == PARSE_ERROR) dmux->state_ = WEBP_DEMUX_PARSE_ERROR;
if (status == PARSE_ERROR) dmux->state = WEBP_DEMUX_PARSE_ERROR;
break;
}
}
if (state != NULL) *state = dmux->state_;
if (state != NULL) *state = dmux->state;
if (status == PARSE_ERROR) {
WebPDemuxDelete(dmux);
@@ -766,14 +769,14 @@ void WebPDemuxDelete(WebPDemuxer* dmux) {
Frame* f;
if (dmux == NULL) return;
for (f = dmux->frames_; f != NULL;) {
for (f = dmux->frames; f != NULL;) {
Frame* const cur_frame = f;
f = f->next_;
f = f->next;
WebPSafeFree(cur_frame);
}
for (c = dmux->chunks_; c != NULL;) {
for (c = dmux->chunks; c != NULL;) {
Chunk* const cur_chunk = c;
c = c->next_;
c = c->next;
WebPSafeFree(cur_chunk);
}
WebPSafeFree(dmux);
@@ -785,12 +788,12 @@ uint32_t WebPDemuxGetI(const WebPDemuxer* dmux, WebPFormatFeature feature) {
if (dmux == NULL) return 0;
switch (feature) {
case WEBP_FF_FORMAT_FLAGS: return dmux->feature_flags_;
case WEBP_FF_CANVAS_WIDTH: return (uint32_t)dmux->canvas_width_;
case WEBP_FF_CANVAS_HEIGHT: return (uint32_t)dmux->canvas_height_;
case WEBP_FF_LOOP_COUNT: return (uint32_t)dmux->loop_count_;
case WEBP_FF_BACKGROUND_COLOR: return dmux->bgcolor_;
case WEBP_FF_FRAME_COUNT: return (uint32_t)dmux->num_frames_;
case WEBP_FF_FORMAT_FLAGS: return dmux->feature_flags;
case WEBP_FF_CANVAS_WIDTH: return (uint32_t)dmux->canvas_width;
case WEBP_FF_CANVAS_HEIGHT: return (uint32_t)dmux->canvas_height;
case WEBP_FF_LOOP_COUNT: return (uint32_t)dmux->loop_count;
case WEBP_FF_BACKGROUND_COLOR: return dmux->bgcolor;
case WEBP_FF_FRAME_COUNT: return (uint32_t)dmux->num_frames;
}
return 0;
}
@@ -800,8 +803,8 @@ uint32_t WebPDemuxGetI(const WebPDemuxer* dmux, WebPFormatFeature feature) {
static const Frame* GetFrame(const WebPDemuxer* const dmux, int frame_num) {
const Frame* f;
for (f = dmux->frames_; f != NULL; f = f->next_) {
if (frame_num == f->frame_num_) break;
for (f = dmux->frames; f != NULL; f = f->next) {
if (frame_num == f->frame_num) break;
}
return f;
}
@@ -811,19 +814,19 @@ static const uint8_t* GetFramePayload(const uint8_t* const mem_buf,
size_t* const data_size) {
*data_size = 0;
if (frame != NULL) {
const ChunkData* const image = frame->img_components_;
const ChunkData* const alpha = frame->img_components_ + 1;
size_t start_offset = image->offset_;
*data_size = image->size_;
const ChunkData* const image = frame->img_components;
const ChunkData* const alpha = frame->img_components + 1;
size_t start_offset = image->offset;
*data_size = image->size;
// if alpha exists it precedes image, update the size allowing for
// intervening chunks.
if (alpha->size_ > 0) {
const size_t inter_size = (image->offset_ > 0)
? image->offset_ - (alpha->offset_ + alpha->size_)
if (alpha->size > 0) {
const size_t inter_size = (image->offset > 0)
? image->offset - (alpha->offset + alpha->size)
: 0;
start_offset = alpha->offset_;
*data_size += alpha->size_ + inter_size;
start_offset = alpha->offset;
*data_size += alpha->size + inter_size;
}
return mem_buf + start_offset;
}
@@ -834,23 +837,23 @@ static const uint8_t* GetFramePayload(const uint8_t* const mem_buf,
static int SynthesizeFrame(const WebPDemuxer* const dmux,
const Frame* const frame,
WebPIterator* const iter) {
const uint8_t* const mem_buf = dmux->mem_.buf_;
const uint8_t* const mem_buf = dmux->mem.buf;
size_t payload_size = 0;
const uint8_t* const payload = GetFramePayload(mem_buf, frame, &payload_size);
if (payload == NULL) return 0;
assert(frame != NULL);
iter->frame_num = frame->frame_num_;
iter->num_frames = dmux->num_frames_;
iter->x_offset = frame->x_offset_;
iter->y_offset = frame->y_offset_;
iter->width = frame->width_;
iter->height = frame->height_;
iter->has_alpha = frame->has_alpha_;
iter->duration = frame->duration_;
iter->dispose_method = frame->dispose_method_;
iter->blend_method = frame->blend_method_;
iter->complete = frame->complete_;
iter->frame_num = frame->frame_num;
iter->num_frames = dmux->num_frames;
iter->x_offset = frame->x_offset;
iter->y_offset = frame->y_offset;
iter->width = frame->width;
iter->height = frame->height;
iter->has_alpha = frame->has_alpha;
iter->duration = frame->duration;
iter->dispose_method = frame->dispose_method;
iter->blend_method = frame->blend_method;
iter->complete = frame->complete;
iter->fragment.bytes = payload;
iter->fragment.size = payload_size;
return 1;
@@ -860,8 +863,8 @@ static int SetFrame(int frame_num, WebPIterator* const iter) {
const Frame* frame;
const WebPDemuxer* const dmux = (WebPDemuxer*)iter->private_;
if (dmux == NULL || frame_num < 0) return 0;
if (frame_num > dmux->num_frames_) return 0;
if (frame_num == 0) frame_num = dmux->num_frames_;
if (frame_num > dmux->num_frames) return 0;
if (frame_num == 0) frame_num = dmux->num_frames;
frame = GetFrame(dmux, frame_num);
if (frame == NULL) return 0;
@@ -896,11 +899,11 @@ void WebPDemuxReleaseIterator(WebPIterator* iter) {
// Chunk iteration
static int ChunkCount(const WebPDemuxer* const dmux, const char fourcc[4]) {
const uint8_t* const mem_buf = dmux->mem_.buf_;
const uint8_t* const mem_buf = dmux->mem.buf;
const Chunk* c;
int count = 0;
for (c = dmux->chunks_; c != NULL; c = c->next_) {
const uint8_t* const header = mem_buf + c->data_.offset_;
for (c = dmux->chunks; c != NULL; c = c->next) {
const uint8_t* const header = mem_buf + c->data.offset;
if (!memcmp(header, fourcc, TAG_SIZE)) ++count;
}
return count;
@@ -908,11 +911,11 @@ static int ChunkCount(const WebPDemuxer* const dmux, const char fourcc[4]) {
static const Chunk* GetChunk(const WebPDemuxer* const dmux,
const char fourcc[4], int chunk_num) {
const uint8_t* const mem_buf = dmux->mem_.buf_;
const uint8_t* const mem_buf = dmux->mem.buf;
const Chunk* c;
int count = 0;
for (c = dmux->chunks_; c != NULL; c = c->next_) {
const uint8_t* const header = mem_buf + c->data_.offset_;
for (c = dmux->chunks; c != NULL; c = c->next) {
const uint8_t* const header = mem_buf + c->data.offset;
if (!memcmp(header, fourcc, TAG_SIZE)) ++count;
if (count == chunk_num) break;
}
@@ -930,10 +933,10 @@ static int SetChunk(const char fourcc[4], int chunk_num,
if (chunk_num == 0) chunk_num = count;
if (chunk_num <= count) {
const uint8_t* const mem_buf = dmux->mem_.buf_;
const uint8_t* const mem_buf = dmux->mem.buf;
const Chunk* const chunk = GetChunk(dmux, fourcc, chunk_num);
iter->chunk.bytes = mem_buf + chunk->data_.offset_ + CHUNK_HEADER_SIZE;
iter->chunk.size = chunk->data_.size_ - CHUNK_HEADER_SIZE;
iter->chunk.bytes = mem_buf + chunk->data.offset + CHUNK_HEADER_SIZE;
iter->chunk.size = chunk->data.size - CHUNK_HEADER_SIZE;
iter->num_chunks = count;
iter->chunk_num = chunk_num;
return 1;
@@ -972,4 +975,3 @@ int WebPDemuxPrevChunk(WebPChunkIterator* iter) {
void WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter) {
(void)iter;
}

View File

@@ -12,7 +12,11 @@
// Author: Skal (pascal.massimino@gmail.com)
#include <assert.h>
#include <stddef.h>
#include "src/dsp/cpu.h"
#include "src/dsp/dsp.h"
#include "src/webp/types.h"
// Tables can be faster on some platform but incur some extra binary size (~2k).
#if !defined(USE_TABLES_FOR_ALPHA_MULT)

View File

@@ -16,6 +16,9 @@
#if defined(WEBP_USE_SSE2)
#include <emmintrin.h>
#include "src/webp/types.h"
#include "src/dsp/cpu.h"
//------------------------------------------------------------------------------
static int DispatchAlpha_SSE2(const uint8_t* WEBP_RESTRICT alpha,
@@ -26,38 +29,44 @@ static int DispatchAlpha_SSE2(const uint8_t* WEBP_RESTRICT alpha,
uint32_t alpha_and = 0xff;
int i, j;
const __m128i zero = _mm_setzero_si128();
const __m128i rgb_mask = _mm_set1_epi32((int)0xffffff00); // to preserve RGB
const __m128i all_0xff = _mm_set_epi32(0, 0, ~0, ~0);
__m128i all_alphas = all_0xff;
const __m128i alpha_mask = _mm_set1_epi32((int)0xff); // to preserve A
const __m128i all_0xff = _mm_set1_epi8((char)0xff);
__m128i all_alphas16 = all_0xff;
__m128i all_alphas8 = all_0xff;
// We must be able to access 3 extra bytes after the last written byte
// 'dst[4 * width - 4]', because we don't know if alpha is the first or the
// last byte of the quadruplet.
const int limit = (width - 1) & ~7;
for (j = 0; j < height; ++j) {
__m128i* out = (__m128i*)dst;
for (i = 0; i < limit; i += 8) {
char* ptr = (char*)dst;
for (i = 0; i + 16 <= width - 1; i += 16) {
// load 16 alpha bytes
const __m128i a0 = _mm_loadu_si128((const __m128i*)&alpha[i]);
const __m128i a1_lo = _mm_unpacklo_epi8(a0, zero);
const __m128i a1_hi = _mm_unpackhi_epi8(a0, zero);
const __m128i a2_lo_lo = _mm_unpacklo_epi16(a1_lo, zero);
const __m128i a2_lo_hi = _mm_unpackhi_epi16(a1_lo, zero);
const __m128i a2_hi_lo = _mm_unpacklo_epi16(a1_hi, zero);
const __m128i a2_hi_hi = _mm_unpackhi_epi16(a1_hi, zero);
_mm_maskmoveu_si128(a2_lo_lo, alpha_mask, ptr + 0);
_mm_maskmoveu_si128(a2_lo_hi, alpha_mask, ptr + 16);
_mm_maskmoveu_si128(a2_hi_lo, alpha_mask, ptr + 32);
_mm_maskmoveu_si128(a2_hi_hi, alpha_mask, ptr + 48);
// accumulate 16 alpha 'and' in parallel
all_alphas16 = _mm_and_si128(all_alphas16, a0);
ptr += 64;
}
if (i + 8 <= width - 1) {
// load 8 alpha bytes
const __m128i a0 = _mm_loadl_epi64((const __m128i*)&alpha[i]);
const __m128i a1 = _mm_unpacklo_epi8(a0, zero);
const __m128i a2_lo = _mm_unpacklo_epi16(a1, zero);
const __m128i a2_hi = _mm_unpackhi_epi16(a1, zero);
// load 8 dst pixels (32 bytes)
const __m128i b0_lo = _mm_loadu_si128(out + 0);
const __m128i b0_hi = _mm_loadu_si128(out + 1);
// mask dst alpha values
const __m128i b1_lo = _mm_and_si128(b0_lo, rgb_mask);
const __m128i b1_hi = _mm_and_si128(b0_hi, rgb_mask);
// combine
const __m128i b2_lo = _mm_or_si128(b1_lo, a2_lo);
const __m128i b2_hi = _mm_or_si128(b1_hi, a2_hi);
// store
_mm_storeu_si128(out + 0, b2_lo);
_mm_storeu_si128(out + 1, b2_hi);
// accumulate eight alpha 'and' in parallel
all_alphas = _mm_and_si128(all_alphas, a0);
out += 2;
_mm_maskmoveu_si128(a2_lo, alpha_mask, ptr);
_mm_maskmoveu_si128(a2_hi, alpha_mask, ptr + 16);
// accumulate 8 alpha 'and' in parallel
all_alphas8 = _mm_and_si128(all_alphas8, a0);
i += 8;
}
for (; i < width; ++i) {
const uint32_t alpha_value = alpha[i];
@@ -68,8 +77,9 @@ static int DispatchAlpha_SSE2(const uint8_t* WEBP_RESTRICT alpha,
dst += dst_stride;
}
// Combine the eight alpha 'and' into a 8-bit mask.
alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas, all_0xff));
return (alpha_and != 0xff);
alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas8, all_0xff)) & 0xff;
return (alpha_and != 0xff ||
_mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas16, all_0xff)) != 0xffff);
}
static void DispatchAlphaToGreen_SSE2(const uint8_t* WEBP_RESTRICT alpha,

View File

@@ -11,10 +11,12 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include "src/dsp/cpu.h"
#include "src/webp/types.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_SSE41)
#include <emmintrin.h>
#include <smmintrin.h>
//------------------------------------------------------------------------------

View File

@@ -9,8 +9,15 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include <assert.h>
#include <stddef.h>
#include <stdlib.h>
#include "src/dsp/cpu.h"
#include "src/webp/types.h"
#include "src/dsp/dsp.h"
#include "src/enc/cost_enc.h"
#include "src/enc/vp8i_enc.h"
//------------------------------------------------------------------------------
// Boolean-cost cost table

View File

@@ -16,6 +16,10 @@
#if defined(WEBP_USE_SSE2)
#include <emmintrin.h>
#include <assert.h>
#include "src/webp/types.h"
#include "src/dsp/cpu.h"
#include "src/enc/cost_enc.h"
#include "src/enc/vp8i_enc.h"
#include "src/utils/utils.h"

View File

@@ -22,6 +22,10 @@
#include <cpu-features.h>
#endif
#include <stddef.h>
#include "src/webp/types.h"
//------------------------------------------------------------------------------
// SSE2 detection.
//

View File

@@ -56,6 +56,11 @@
(defined(_M_X64) || defined(_M_IX86))
#define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets
#endif
#if defined(_MSC_VER) && !defined(__clang__) && _MSC_VER >= 1700 && \
(defined(_M_X64) || defined(_M_IX86))
#define WEBP_MSC_AVX2 // Visual C++ AVX2 targets
#endif
#endif
// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
@@ -80,6 +85,16 @@
#define WEBP_HAVE_SSE41
#endif
#if (defined(__AVX2__) || defined(WEBP_MSC_AVX2)) && \
(!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_AVX2))
#define WEBP_USE_AVX2
#endif
#if defined(WEBP_USE_AVX2) && !defined(WEBP_HAVE_AVX2)
#define WEBP_HAVE_AVX2
#endif
#undef WEBP_MSC_AVX2
#undef WEBP_MSC_SSE41
#undef WEBP_MSC_SSE2

View File

@@ -12,10 +12,15 @@
// Author: Skal (pascal.massimino@gmail.com)
#include <assert.h>
#include <stddef.h>
#include <string.h>
#include "src/dsp/dsp.h"
#include "src/dec/common_dec.h"
#include "src/dec/vp8i_dec.h"
#include "src/dsp/cpu.h"
#include "src/dsp/dsp.h"
#include "src/utils/utils.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------

View File

@@ -11,6 +11,8 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include "src/dsp/cpu.h"
#include "src/webp/types.h"
#include "src/dsp/dsp.h"
// define to 0 to have run-time table initialization

View File

@@ -23,9 +23,12 @@
#endif
#include <emmintrin.h>
#include "src/dsp/common_sse2.h"
#include "src/dec/vp8i_dec.h"
#include "src/dsp/common_sse2.h"
#include "src/dsp/cpu.h"
#include "src/utils/utils.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
// Transforms (Paragraph 14.4)

View File

@@ -14,9 +14,12 @@
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_SSE41)
#include <emmintrin.h>
#include <smmintrin.h>
#include "src/webp/types.h"
#include "src/dec/vp8i_dec.h"
#include "src/dsp/cpu.h"
#include "src/utils/utils.h"
static void HE16_SSE41(uint8_t* dst) { // horizontal

View File

@@ -13,9 +13,13 @@
#include <assert.h>
#include <stdlib.h> // for abs()
#include <string.h>
#include "src/dsp/cpu.h"
#include "src/dsp/dsp.h"
#include "src/enc/vp8i_enc.h"
#include "src/utils/utils.h"
#include "src/webp/types.h"
static WEBP_INLINE uint8_t clip_8b(int v) {
return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
@@ -688,11 +692,11 @@ static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
for (n = 0; n < 16; ++n) {
const int j = kZigzag[n];
const int sign = (in[j] < 0);
const uint32_t coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
if (coeff > mtx->zthresh_[j]) {
const uint32_t Q = mtx->q_[j];
const uint32_t iQ = mtx->iq_[j];
const uint32_t B = mtx->bias_[j];
const uint32_t coeff = (sign ? -in[j] : in[j]) + mtx->sharpen[j];
if (coeff > mtx->zthresh[j]) {
const uint32_t Q = mtx->q[j];
const uint32_t iQ = mtx->iq[j];
const uint32_t B = mtx->bias[j];
int level = QUANTDIV(coeff, iQ, B);
if (level > MAX_LEVEL) level = MAX_LEVEL;
if (sign) level = -level;

View File

@@ -193,11 +193,11 @@ static int QuantizeBlock_MIPS32(int16_t in[16], int16_t out[16],
int16_t* ppin = &in[0];
int16_t* pout = &out[0];
const uint16_t* ppsharpen = &mtx->sharpen_[0];
const uint32_t* ppzthresh = &mtx->zthresh_[0];
const uint16_t* ppq = &mtx->q_[0];
const uint16_t* ppiq = &mtx->iq_[0];
const uint32_t* ppbias = &mtx->bias_[0];
const uint16_t* ppsharpen = &mtx->sharpen[0];
const uint32_t* ppzthresh = &mtx->zthresh[0];
const uint16_t* ppq = &mtx->q[0];
const uint16_t* ppiq = &mtx->iq[0];
const uint32_t* ppbias = &mtx->bias[0];
__asm__ volatile(
QUANTIZE_ONE( 0, 0, 0)

View File

@@ -1296,11 +1296,11 @@ static int QuantizeBlock_MIPSdspR2(int16_t in[16], int16_t out[16],
int16_t* ppin = &in[0];
int16_t* pout = &out[0];
const uint16_t* ppsharpen = &mtx->sharpen_[0];
const uint32_t* ppzthresh = &mtx->zthresh_[0];
const uint16_t* ppq = &mtx->q_[0];
const uint16_t* ppiq = &mtx->iq_[0];
const uint32_t* ppbias = &mtx->bias_[0];
const uint16_t* ppsharpen = &mtx->sharpen[0];
const uint32_t* ppzthresh = &mtx->zthresh[0];
const uint16_t* ppq = &mtx->q[0];
const uint16_t* ppiq = &mtx->iq[0];
const uint32_t* ppbias = &mtx->bias[0];
__asm__ volatile (
QUANTIZE_ONE( 0, 0, 0, 2)

View File

@@ -845,7 +845,7 @@ static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
const v8i16 maxlevel = __msa_fill_h(MAX_LEVEL);
LD_SH2(&in[0], 8, in0, in1);
LD_SH2(&mtx->sharpen_[0], 8, sh0, sh1);
LD_SH2(&mtx->sharpen[0], 8, sh0, sh1);
tmp4 = __msa_add_a_h(in0, zero);
tmp5 = __msa_add_a_h(in1, zero);
ILVRL_H2_SH(sh0, tmp4, tmp0, tmp1);
@@ -853,10 +853,10 @@ static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
HADD_SH4_SW(tmp0, tmp1, tmp2, tmp3, s0, s1, s2, s3);
sign0 = (in0 < zero);
sign1 = (in1 < zero); // sign
LD_SH2(&mtx->iq_[0], 8, tmp0, tmp1); // iq
LD_SH2(&mtx->iq[0], 8, tmp0, tmp1); // iq
ILVRL_H2_SW(zero, tmp0, t0, t1);
ILVRL_H2_SW(zero, tmp1, t2, t3);
LD_SW4(&mtx->bias_[0], 4, b0, b1, b2, b3); // bias
LD_SW4(&mtx->bias[0], 4, b0, b1, b2, b3); // bias
MUL4(t0, s0, t1, s1, t2, s2, t3, s3, t0, t1, t2, t3);
ADD4(b0, t0, b1, t1, b2, t2, b3, t3, b0, b1, b2, b3);
SRAI_W4_SW(b0, b1, b2, b3, 17);
@@ -868,7 +868,7 @@ static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
SUB2(zero, tmp2, zero, tmp3, tmp0, tmp1);
tmp2 = (v8i16)__msa_bmnz_v((v16u8)tmp2, (v16u8)tmp0, (v16u8)sign0);
tmp3 = (v8i16)__msa_bmnz_v((v16u8)tmp3, (v16u8)tmp1, (v16u8)sign1);
LD_SW4(&mtx->zthresh_[0], 4, t0, t1, t2, t3); // zthresh
LD_SW4(&mtx->zthresh[0], 4, t0, t1, t2, t3); // zthresh
t0 = (s0 > t0);
t1 = (s1 > t1);
t2 = (s2 > t2);
@@ -876,7 +876,7 @@ static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
PCKEV_H2_SH(t1, t0, t3, t2, tmp0, tmp1);
tmp4 = (v8i16)__msa_bmnz_v((v16u8)zero, (v16u8)tmp2, (v16u8)tmp0);
tmp5 = (v8i16)__msa_bmnz_v((v16u8)zero, (v16u8)tmp3, (v16u8)tmp1);
LD_SH2(&mtx->q_[0], 8, tmp0, tmp1);
LD_SH2(&mtx->q[0], 8, tmp0, tmp1);
MUL2(tmp4, tmp0, tmp5, tmp1, in0, in1);
VSHF_H2_SH(tmp4, tmp5, tmp4, tmp5, zigzag0, zigzag1, out0, out1);
ST_SH2(in0, in1, &in[0], 8);

View File

@@ -841,11 +841,11 @@ static int SSE4x4_NEON(const uint8_t* WEBP_RESTRICT a,
static int16x8_t Quantize_NEON(int16_t* WEBP_RESTRICT const in,
const VP8Matrix* WEBP_RESTRICT const mtx,
int offset) {
const uint16x8_t sharp = vld1q_u16(&mtx->sharpen_[offset]);
const uint16x8_t q = vld1q_u16(&mtx->q_[offset]);
const uint16x8_t iq = vld1q_u16(&mtx->iq_[offset]);
const uint32x4_t bias0 = vld1q_u32(&mtx->bias_[offset + 0]);
const uint32x4_t bias1 = vld1q_u32(&mtx->bias_[offset + 4]);
const uint16x8_t sharp = vld1q_u16(&mtx->sharpen[offset]);
const uint16x8_t q = vld1q_u16(&mtx->q[offset]);
const uint16x8_t iq = vld1q_u16(&mtx->iq[offset]);
const uint32x4_t bias0 = vld1q_u32(&mtx->bias[offset + 0]);
const uint32x4_t bias1 = vld1q_u32(&mtx->bias[offset + 4]);
const int16x8_t a = vld1q_s16(in + offset); // in
const uint16x8_t b = vreinterpretq_u16_s16(vabsq_s16(a)); // coeff = abs(in)
@@ -945,6 +945,28 @@ static int Quantize2Blocks_NEON(int16_t in[32], int16_t out[32],
vst1q_u8(dst, r); \
} while (0)
static WEBP_INLINE uint8x8x2_t Vld1U8x2(const uint8_t* ptr) {
#if LOCAL_CLANG_PREREQ(3, 4) || LOCAL_GCC_PREREQ(8, 5) || defined(_MSC_VER)
return vld1_u8_x2(ptr);
#else
uint8x8x2_t res;
INIT_VECTOR2(res, vld1_u8(ptr + 0 * 8), vld1_u8(ptr + 1 * 8));
return res;
#endif
}
static WEBP_INLINE uint8x16x4_t Vld1qU8x4(const uint8_t* ptr) {
#if LOCAL_CLANG_PREREQ(3, 4) || LOCAL_GCC_PREREQ(9, 4) || defined(_MSC_VER)
return vld1q_u8_x4(ptr);
#else
uint8x16x4_t res;
INIT_VECTOR4(res,
vld1q_u8(ptr + 0 * 16), vld1q_u8(ptr + 1 * 16),
vld1q_u8(ptr + 2 * 16), vld1q_u8(ptr + 3 * 16));
return res;
#endif
}
static void Intra4Preds_NEON(uint8_t* WEBP_RESTRICT dst,
const uint8_t* WEBP_RESTRICT top) {
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13
@@ -971,9 +993,9 @@ static void Intra4Preds_NEON(uint8_t* WEBP_RESTRICT dst,
30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 16, 16, 16, 16
};
const uint8x16x4_t lookup_avgs1 = vld1q_u8_x4(kLookupTbl1);
const uint8x16x4_t lookup_avgs2 = vld1q_u8_x4(kLookupTbl2);
const uint8x16x4_t lookup_avgs3 = vld1q_u8_x4(kLookupTbl3);
const uint8x16x4_t lookup_avgs1 = Vld1qU8x4(kLookupTbl1);
const uint8x16x4_t lookup_avgs2 = Vld1qU8x4(kLookupTbl2);
const uint8x16x4_t lookup_avgs3 = Vld1qU8x4(kLookupTbl3);
const uint8x16_t preload = vld1q_u8(top - 5);
uint8x16x2_t qcombined;
@@ -1167,7 +1189,7 @@ static WEBP_INLINE void TrueMotion_NEON(uint8_t* dst, const uint8_t* left,
// Neither left nor top are NULL.
a = vdupq_n_u16(left[-1]);
inner = vld1_u8_x2(top);
inner = Vld1U8x2(top);
for (i = 0; i < 4; i++) {
const uint8x8x4_t outer = vld4_dup_u8(&left[i * 4]);

View File

@@ -14,13 +14,18 @@
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_SSE2)
#include <assert.h>
#include <stdlib.h> // for abs()
#include <emmintrin.h>
#include <assert.h>
#include <stdlib.h> // for abs()
#include <string.h>
#include "src/dsp/common_sse2.h"
#include "src/dsp/cpu.h"
#include "src/enc/cost_enc.h"
#include "src/enc/vp8i_enc.h"
#include "src/utils/utils.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
// Transforms (Paragraph 14.4)
@@ -1410,10 +1415,10 @@ static WEBP_INLINE int DoQuantizeBlock_SSE2(
// Load all inputs.
__m128i in0 = _mm_loadu_si128((__m128i*)&in[0]);
__m128i in8 = _mm_loadu_si128((__m128i*)&in[8]);
const __m128i iq0 = _mm_loadu_si128((const __m128i*)&mtx->iq_[0]);
const __m128i iq8 = _mm_loadu_si128((const __m128i*)&mtx->iq_[8]);
const __m128i q0 = _mm_loadu_si128((const __m128i*)&mtx->q_[0]);
const __m128i q8 = _mm_loadu_si128((const __m128i*)&mtx->q_[8]);
const __m128i iq0 = _mm_loadu_si128((const __m128i*)&mtx->iq[0]);
const __m128i iq8 = _mm_loadu_si128((const __m128i*)&mtx->iq[8]);
const __m128i q0 = _mm_loadu_si128((const __m128i*)&mtx->q[0]);
const __m128i q8 = _mm_loadu_si128((const __m128i*)&mtx->q[8]);
// extract sign(in) (0x0000 if positive, 0xffff if negative)
const __m128i sign0 = _mm_cmpgt_epi16(zero, in0);
@@ -1446,10 +1451,10 @@ static WEBP_INLINE int DoQuantizeBlock_SSE2(
__m128i out_08 = _mm_unpacklo_epi16(coeff_iQ8L, coeff_iQ8H);
__m128i out_12 = _mm_unpackhi_epi16(coeff_iQ8L, coeff_iQ8H);
// out = (coeff * iQ + B)
const __m128i bias_00 = _mm_loadu_si128((const __m128i*)&mtx->bias_[0]);
const __m128i bias_04 = _mm_loadu_si128((const __m128i*)&mtx->bias_[4]);
const __m128i bias_08 = _mm_loadu_si128((const __m128i*)&mtx->bias_[8]);
const __m128i bias_12 = _mm_loadu_si128((const __m128i*)&mtx->bias_[12]);
const __m128i bias_00 = _mm_loadu_si128((const __m128i*)&mtx->bias[0]);
const __m128i bias_04 = _mm_loadu_si128((const __m128i*)&mtx->bias[4]);
const __m128i bias_08 = _mm_loadu_si128((const __m128i*)&mtx->bias[8]);
const __m128i bias_12 = _mm_loadu_si128((const __m128i*)&mtx->bias[12]);
out_00 = _mm_add_epi32(out_00, bias_00);
out_04 = _mm_add_epi32(out_04, bias_04);
out_08 = _mm_add_epi32(out_08, bias_08);
@@ -1512,7 +1517,7 @@ static WEBP_INLINE int DoQuantizeBlock_SSE2(
static int QuantizeBlock_SSE2(int16_t in[16], int16_t out[16],
const VP8Matrix* WEBP_RESTRICT const mtx) {
return DoQuantizeBlock_SSE2(in, out, &mtx->sharpen_[0], mtx);
return DoQuantizeBlock_SSE2(in, out, &mtx->sharpen[0], mtx);
}
static int QuantizeBlockWHT_SSE2(int16_t in[16], int16_t out[16],
@@ -1523,7 +1528,7 @@ static int QuantizeBlockWHT_SSE2(int16_t in[16], int16_t out[16],
static int Quantize2Blocks_SSE2(int16_t in[32], int16_t out[32],
const VP8Matrix* WEBP_RESTRICT const mtx) {
int nz;
const uint16_t* const sharpen = &mtx->sharpen_[0];
const uint16_t* const sharpen = &mtx->sharpen[0];
nz = DoQuantizeBlock_SSE2(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
nz |= DoQuantizeBlock_SSE2(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
return nz;

View File

@@ -14,11 +14,15 @@
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_SSE41)
#include <emmintrin.h>
#include <smmintrin.h>
#include <stdlib.h> // for abs()
#include "src/dsp/common_sse2.h"
#include "src/dsp/cpu.h"
#include "src/enc/vp8i_enc.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
// Compute susceptibility based on DCT-coeff histograms.
@@ -211,10 +215,10 @@ static WEBP_INLINE int DoQuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
// Load all inputs.
__m128i in0 = _mm_loadu_si128((__m128i*)&in[0]);
__m128i in8 = _mm_loadu_si128((__m128i*)&in[8]);
const __m128i iq0 = _mm_loadu_si128((const __m128i*)&mtx->iq_[0]);
const __m128i iq8 = _mm_loadu_si128((const __m128i*)&mtx->iq_[8]);
const __m128i q0 = _mm_loadu_si128((const __m128i*)&mtx->q_[0]);
const __m128i q8 = _mm_loadu_si128((const __m128i*)&mtx->q_[8]);
const __m128i iq0 = _mm_loadu_si128((const __m128i*)&mtx->iq[0]);
const __m128i iq8 = _mm_loadu_si128((const __m128i*)&mtx->iq[8]);
const __m128i q0 = _mm_loadu_si128((const __m128i*)&mtx->q[0]);
const __m128i q8 = _mm_loadu_si128((const __m128i*)&mtx->q[8]);
// coeff = abs(in)
__m128i coeff0 = _mm_abs_epi16(in0);
@@ -241,10 +245,10 @@ static WEBP_INLINE int DoQuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
__m128i out_08 = _mm_unpacklo_epi16(coeff_iQ8L, coeff_iQ8H);
__m128i out_12 = _mm_unpackhi_epi16(coeff_iQ8L, coeff_iQ8H);
// out = (coeff * iQ + B)
const __m128i bias_00 = _mm_loadu_si128((const __m128i*)&mtx->bias_[0]);
const __m128i bias_04 = _mm_loadu_si128((const __m128i*)&mtx->bias_[4]);
const __m128i bias_08 = _mm_loadu_si128((const __m128i*)&mtx->bias_[8]);
const __m128i bias_12 = _mm_loadu_si128((const __m128i*)&mtx->bias_[12]);
const __m128i bias_00 = _mm_loadu_si128((const __m128i*)&mtx->bias[0]);
const __m128i bias_04 = _mm_loadu_si128((const __m128i*)&mtx->bias[4]);
const __m128i bias_08 = _mm_loadu_si128((const __m128i*)&mtx->bias[8]);
const __m128i bias_12 = _mm_loadu_si128((const __m128i*)&mtx->bias[12]);
out_00 = _mm_add_epi32(out_00, bias_00);
out_04 = _mm_add_epi32(out_04, bias_04);
out_08 = _mm_add_epi32(out_08, bias_08);
@@ -305,7 +309,7 @@ static WEBP_INLINE int DoQuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
static int QuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
const VP8Matrix* WEBP_RESTRICT const mtx) {
return DoQuantizeBlock_SSE41(in, out, &mtx->sharpen_[0], mtx);
return DoQuantizeBlock_SSE41(in, out, &mtx->sharpen[0], mtx);
}
static int QuantizeBlockWHT_SSE41(int16_t in[16], int16_t out[16],
@@ -316,7 +320,7 @@ static int QuantizeBlockWHT_SSE41(int16_t in[16], int16_t out[16],
static int Quantize2Blocks_SSE41(int16_t in[32], int16_t out[32],
const VP8Matrix* WEBP_RESTRICT const mtx) {
int nz;
const uint16_t* const sharpen = &mtx->sharpen_[0];
const uint16_t* const sharpen = &mtx->sharpen[0];
nz = DoQuantizeBlock_SSE41(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
nz |= DoQuantizeBlock_SSE41(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
return nz;

View File

@@ -11,11 +11,14 @@
//
// Author: Urvang (urvang@google.com)
#include "src/dsp/dsp.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "src/dsp/cpu.h"
#include "src/dsp/dsp.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
// Helpful macro.

View File

@@ -20,6 +20,9 @@
#include <stdlib.h>
#include <string.h>
#include "src/dsp/cpu.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
// Helpful macro.

View File

@@ -13,15 +13,21 @@
// Jyrki Alakuijala (jyrki@google.com)
// Urvang Joshi (urvang@google.com)
#include "src/dsp/dsp.h"
#include "src/dsp/lossless.h"
#include <assert.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include "src/dec/vp8li_dec.h"
#include "src/utils/endian_inl_utils.h"
#include "src/dsp/lossless.h"
#include "src/dsp/cpu.h"
#include "src/dsp/dsp.h"
#include "src/dsp/lossless_common.h"
#include "src/utils/endian_inl_utils.h"
#include "src/utils/utils.h"
#include "src/webp/decode.h"
#include "src/webp/format_constants.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
// Image transforms.
@@ -215,7 +221,7 @@ GENERATE_PREDICTOR_ADD(VP8LPredictor13_C, PredictorAdd13_C)
static void PredictorInverseTransform_C(const VP8LTransform* const transform,
int y_start, int y_end,
const uint32_t* in, uint32_t* out) {
const int width = transform->xsize_;
const int width = transform->xsize;
if (y_start == 0) { // First Row follows the L (mode=1) mode.
PredictorAdd0_C(in, NULL, 1, out);
PredictorAdd1_C(in + 1, NULL, width - 1, out + 1);
@@ -226,11 +232,11 @@ static void PredictorInverseTransform_C(const VP8LTransform* const transform,
{
int y = y_start;
const int tile_width = 1 << transform->bits_;
const int tile_width = 1 << transform->bits;
const int mask = tile_width - 1;
const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
const int tiles_per_row = VP8LSubSampleSize(width, transform->bits);
const uint32_t* pred_mode_base =
transform->data_ + (y >> transform->bits_) * tiles_per_row;
transform->data + (y >> transform->bits) * tiles_per_row;
while (y < y_end) {
const uint32_t* pred_mode_src = pred_mode_base;
@@ -278,9 +284,9 @@ static WEBP_INLINE int ColorTransformDelta(int8_t color_pred,
static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
VP8LMultipliers* const m) {
m->green_to_red_ = (color_code >> 0) & 0xff;
m->green_to_blue_ = (color_code >> 8) & 0xff;
m->red_to_blue_ = (color_code >> 16) & 0xff;
m->green_to_red = (color_code >> 0) & 0xff;
m->green_to_blue = (color_code >> 8) & 0xff;
m->red_to_blue = (color_code >> 16) & 0xff;
}
void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
@@ -293,10 +299,10 @@ void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
const uint32_t red = argb >> 16;
int new_red = red & 0xff;
int new_blue = argb & 0xff;
new_red += ColorTransformDelta((int8_t)m->green_to_red_, green);
new_red += ColorTransformDelta((int8_t)m->green_to_red, green);
new_red &= 0xff;
new_blue += ColorTransformDelta((int8_t)m->green_to_blue_, green);
new_blue += ColorTransformDelta((int8_t)m->red_to_blue_, (int8_t)new_red);
new_blue += ColorTransformDelta((int8_t)m->green_to_blue, green);
new_blue += ColorTransformDelta((int8_t)m->red_to_blue, (int8_t)new_red);
new_blue &= 0xff;
dst[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
}
@@ -306,15 +312,15 @@ void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
static void ColorSpaceInverseTransform_C(const VP8LTransform* const transform,
int y_start, int y_end,
const uint32_t* src, uint32_t* dst) {
const int width = transform->xsize_;
const int tile_width = 1 << transform->bits_;
const int width = transform->xsize;
const int tile_width = 1 << transform->bits;
const int mask = tile_width - 1;
const int safe_width = width & ~mask;
const int remaining_width = width - safe_width;
const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
const int tiles_per_row = VP8LSubSampleSize(width, transform->bits);
int y = y_start;
const uint32_t* pred_row =
transform->data_ + (y >> transform->bits_) * tiles_per_row;
transform->data + (y >> transform->bits) * tiles_per_row;
while (y < y_end) {
const uint32_t* pred = pred_row;
@@ -356,11 +362,11 @@ STATIC_DECL void FUNC_NAME(const VP8LTransform* const transform, \
int y_start, int y_end, const TYPE* src, \
TYPE* dst) { \
int y; \
const int bits_per_pixel = 8 >> transform->bits_; \
const int width = transform->xsize_; \
const uint32_t* const color_map = transform->data_; \
const int bits_per_pixel = 8 >> transform->bits; \
const int width = transform->xsize; \
const uint32_t* const color_map = transform->data; \
if (bits_per_pixel < 8) { \
const int pixels_per_byte = 1 << transform->bits_; \
const int pixels_per_byte = 1 << transform->bits; \
const int count_mask = pixels_per_byte - 1; \
const uint32_t bit_mask = (1 << bits_per_pixel) - 1; \
for (y = y_start; y < y_end; ++y) { \
@@ -391,16 +397,16 @@ COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha_C, ,
void VP8LInverseTransform(const VP8LTransform* const transform,
int row_start, int row_end,
const uint32_t* const in, uint32_t* const out) {
const int width = transform->xsize_;
const int width = transform->xsize;
assert(row_start < row_end);
assert(row_end <= transform->ysize_);
switch (transform->type_) {
assert(row_end <= transform->ysize);
switch (transform->type) {
case SUBTRACT_GREEN_TRANSFORM:
VP8LAddGreenToBlueAndRed(in, (row_end - row_start) * width, out);
break;
case PREDICTOR_TRANSFORM:
PredictorInverseTransform_C(transform, row_start, row_end, in, out);
if (row_end != transform->ysize_) {
if (row_end != transform->ysize) {
// The last predicted row in this iteration will be the top-pred row
// for the first row in next iteration.
memcpy(out - width, out + (row_end - row_start - 1) * width,
@@ -411,15 +417,15 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
ColorSpaceInverseTransform_C(transform, row_start, row_end, in, out);
break;
case COLOR_INDEXING_TRANSFORM:
if (in == out && transform->bits_ > 0) {
if (in == out && transform->bits > 0) {
// Move packed pixels to the end of unpacked region, so that unpacking
// can occur seamlessly.
// Also, note that this is the only transform that applies on
// the effective width of VP8LSubSampleSize(xsize_, bits_). All other
// transforms work on effective width of xsize_.
// the effective width of VP8LSubSampleSize(xsize, bits). All other
// transforms work on effective width of 'xsize'.
const int out_stride = (row_end - row_start) * width;
const int in_stride = (row_end - row_start) *
VP8LSubSampleSize(transform->xsize_, transform->bits_);
VP8LSubSampleSize(transform->xsize, transform->bits);
uint32_t* const src = out + out_stride - in_stride;
memmove(src, out, in_stride * sizeof(*src));
ColorIndexInverseTransform_C(transform, row_start, row_end, src, out);
@@ -571,16 +577,21 @@ void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
//------------------------------------------------------------------------------
VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed;
VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed_SSE;
VP8LPredictorAddSubFunc VP8LPredictorsAdd[16];
VP8LPredictorAddSubFunc VP8LPredictorsAdd_SSE[16];
VP8LPredictorFunc VP8LPredictors[16];
// exposed plain-C implementations
VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16];
VP8LTransformColorInverseFunc VP8LTransformColorInverse;
VP8LTransformColorInverseFunc VP8LTransformColorInverse_SSE;
VP8LConvertFunc VP8LConvertBGRAToRGB;
VP8LConvertFunc VP8LConvertBGRAToRGB_SSE;
VP8LConvertFunc VP8LConvertBGRAToRGBA;
VP8LConvertFunc VP8LConvertBGRAToRGBA_SSE;
VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
VP8LConvertFunc VP8LConvertBGRAToRGB565;
VP8LConvertFunc VP8LConvertBGRAToBGR;
@@ -591,6 +602,7 @@ VP8LMapAlphaFunc VP8LMapColor8b;
extern VP8CPUInfo VP8GetCPUInfo;
extern void VP8LDspInitSSE2(void);
extern void VP8LDspInitSSE41(void);
extern void VP8LDspInitAVX2(void);
extern void VP8LDspInitNEON(void);
extern void VP8LDspInitMIPSdspR2(void);
extern void VP8LDspInitMSA(void);
@@ -643,6 +655,11 @@ WEBP_DSP_INIT_FUNC(VP8LDspInit) {
#if defined(WEBP_HAVE_SSE41)
if (VP8GetCPUInfo(kSSE4_1)) {
VP8LDspInitSSE41();
#if defined(WEBP_HAVE_AVX2)
if (VP8GetCPUInfo(kAVX2)) {
VP8LDspInitAVX2();
}
#endif
}
#endif
}

View File

@@ -15,13 +15,10 @@
#ifndef WEBP_DSP_LOSSLESS_H_
#define WEBP_DSP_LOSSLESS_H_
#include "src/dsp/dsp.h"
#include "src/webp/types.h"
#include "src/webp/decode.h"
#include "src/dsp/dsp.h"
#include "src/enc/histogram_enc.h"
#include "src/utils/utils.h"
#ifdef __cplusplus
extern "C" {
#endif
@@ -64,22 +61,25 @@ typedef void (*VP8LPredictorAddSubFunc)(const uint32_t* in,
uint32_t* WEBP_RESTRICT out);
extern VP8LPredictorAddSubFunc VP8LPredictorsAdd[16];
extern VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16];
extern VP8LPredictorAddSubFunc VP8LPredictorsAdd_SSE[16];
typedef void (*VP8LProcessDecBlueAndRedFunc)(const uint32_t* src,
int num_pixels, uint32_t* dst);
extern VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed;
extern VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed_SSE;
typedef struct {
// Note: the members are uint8_t, so that any negative values are
// automatically converted to "mod 256" values.
uint8_t green_to_red_;
uint8_t green_to_blue_;
uint8_t red_to_blue_;
uint8_t green_to_red;
uint8_t green_to_blue;
uint8_t red_to_blue;
} VP8LMultipliers;
typedef void (*VP8LTransformColorInverseFunc)(const VP8LMultipliers* const m,
const uint32_t* src,
int num_pixels, uint32_t* dst);
extern VP8LTransformColorInverseFunc VP8LTransformColorInverse;
extern VP8LTransformColorInverseFunc VP8LTransformColorInverse_SSE;
struct VP8LTransform; // Defined in dec/vp8li.h.
@@ -99,6 +99,8 @@ extern VP8LConvertFunc VP8LConvertBGRAToRGBA;
extern VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
extern VP8LConvertFunc VP8LConvertBGRAToRGB565;
extern VP8LConvertFunc VP8LConvertBGRAToBGR;
extern VP8LConvertFunc VP8LConvertBGRAToRGB_SSE;
extern VP8LConvertFunc VP8LConvertBGRAToRGBA_SSE;
// Converts from BGRA to other color spaces.
void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
@@ -149,21 +151,25 @@ void VP8LDspInit(void);
typedef void (*VP8LProcessEncBlueAndRedFunc)(uint32_t* dst, int num_pixels);
extern VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
extern VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed_SSE;
typedef void (*VP8LTransformColorFunc)(
const VP8LMultipliers* WEBP_RESTRICT const m, uint32_t* WEBP_RESTRICT dst,
int num_pixels);
extern VP8LTransformColorFunc VP8LTransformColor;
extern VP8LTransformColorFunc VP8LTransformColor_SSE;
typedef void (*VP8LCollectColorBlueTransformsFunc)(
const uint32_t* WEBP_RESTRICT argb, int stride,
int tile_width, int tile_height,
int green_to_blue, int red_to_blue, uint32_t histo[]);
extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms_SSE;
typedef void (*VP8LCollectColorRedTransformsFunc)(
const uint32_t* WEBP_RESTRICT argb, int stride,
int tile_width, int tile_height,
int green_to_red, uint32_t histo[]);
extern VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
extern VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms_SSE;
// Expose some C-only fallback functions
void VP8LTransformColor_C(const VP8LMultipliers* WEBP_RESTRICT const m,
@@ -181,20 +187,17 @@ void VP8LCollectColorBlueTransforms_C(const uint32_t* WEBP_RESTRICT argb,
extern VP8LPredictorAddSubFunc VP8LPredictorsSub[16];
extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
extern VP8LPredictorAddSubFunc VP8LPredictorsSub_SSE[16];
// -----------------------------------------------------------------------------
// Huffman-cost related functions.
typedef uint32_t (*VP8LCostFunc)(const uint32_t* population, int length);
typedef uint32_t (*VP8LCostCombinedFunc)(const uint32_t* WEBP_RESTRICT X,
const uint32_t* WEBP_RESTRICT Y,
int length);
typedef uint64_t (*VP8LCombinedShannonEntropyFunc)(const uint32_t X[256],
const uint32_t Y[256]);
typedef uint64_t (*VP8LShannonEntropyFunc)(const uint32_t* X, int length);
extern VP8LCostFunc VP8LExtraCost;
extern VP8LCostCombinedFunc VP8LExtraCostCombined;
extern VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
extern VP8LShannonEntropyFunc VP8LShannonEntropy;
@@ -239,9 +242,6 @@ extern VP8LAddVectorFunc VP8LAddVector;
typedef void (*VP8LAddVectorEqFunc)(const uint32_t* WEBP_RESTRICT a,
uint32_t* WEBP_RESTRICT out, int size);
extern VP8LAddVectorEqFunc VP8LAddVectorEq;
void VP8LHistogramAdd(const VP8LHistogram* WEBP_RESTRICT const a,
const VP8LHistogram* WEBP_RESTRICT const b,
VP8LHistogram* WEBP_RESTRICT const out);
// -----------------------------------------------------------------------------
// PrefixEncode()
@@ -255,6 +255,7 @@ typedef void (*VP8LBundleColorMapFunc)(const uint8_t* WEBP_RESTRICT const row,
int width, int xbits,
uint32_t* WEBP_RESTRICT dst);
extern VP8LBundleColorMapFunc VP8LBundleColorMap;
extern VP8LBundleColorMapFunc VP8LBundleColorMap_SSE;
void VP8LBundleColorMap_C(const uint8_t* WEBP_RESTRICT const row,
int width, int xbits, uint32_t* WEBP_RESTRICT dst);

View File

@@ -0,0 +1,443 @@
// Copyright 2025 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// AVX2 variant of methods for lossless decoder
//
// Author: Vincent Rabaud (vrabaud@google.com)
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_AVX2)
#include <stddef.h>
#include <immintrin.h>
#include "src/dsp/cpu.h"
#include "src/dsp/lossless.h"
#include "src/webp/format_constants.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
// Predictor Transform
static WEBP_INLINE void Average2_m256i(const __m256i* const a0,
const __m256i* const a1,
__m256i* const avg) {
// (a + b) >> 1 = ((a + b + 1) >> 1) - ((a ^ b) & 1)
const __m256i ones = _mm256_set1_epi8(1);
const __m256i avg1 = _mm256_avg_epu8(*a0, *a1);
const __m256i one = _mm256_and_si256(_mm256_xor_si256(*a0, *a1), ones);
*avg = _mm256_sub_epi8(avg1, one);
}
// Batch versions of those functions.
// Predictor0: ARGB_BLACK.
static void PredictorAdd0_AVX2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
const __m256i black = _mm256_set1_epi32((int)ARGB_BLACK);
for (i = 0; i + 8 <= num_pixels; i += 8) {
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
const __m256i res = _mm256_add_epi8(src, black);
_mm256_storeu_si256((__m256i*)&out[i], res);
}
if (i != num_pixels) {
VP8LPredictorsAdd_SSE[0](in + i, NULL, num_pixels - i, out + i);
}
(void)upper;
}
// Predictor1: left.
static void PredictorAdd1_AVX2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
__m256i prev = _mm256_set1_epi32((int)out[-1]);
for (i = 0; i + 8 <= num_pixels; i += 8) {
// h | g | f | e | d | c | b | a
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
// g | f | e | 0 | c | b | a | 0
const __m256i shift0 = _mm256_slli_si256(src, 4);
// g + h | f + g | e + f | e | c + d | b + c | a + b | a
const __m256i sum0 = _mm256_add_epi8(src, shift0);
// e + f | e | 0 | 0 | a + b | a | 0 | 0
const __m256i shift1 = _mm256_slli_si256(sum0, 8);
// e + f + g + h | e + f + g | e + f | e | a + b + c + d | a + b + c | a + b
// | a
const __m256i sum1 = _mm256_add_epi8(sum0, shift1);
// Add a + b + c + d to the upper lane.
const int32_t sum_abcd = _mm256_extract_epi32(sum1, 3);
const __m256i sum2 = _mm256_add_epi8(
sum1,
_mm256_set_epi32(sum_abcd, sum_abcd, sum_abcd, sum_abcd, 0, 0, 0, 0));
const __m256i res = _mm256_add_epi8(sum2, prev);
_mm256_storeu_si256((__m256i*)&out[i], res);
// replicate last res output in prev.
prev = _mm256_permutevar8x32_epi32(
res, _mm256_set_epi32(7, 7, 7, 7, 7, 7, 7, 7));
}
if (i != num_pixels) {
VP8LPredictorsAdd_SSE[1](in + i, upper + i, num_pixels - i, out + i);
}
}
// Macro that adds 32-bit integers from IN using mod 256 arithmetic
// per 8 bit channel.
#define GENERATE_PREDICTOR_1(X, IN) \
static void PredictorAdd##X##_AVX2(const uint32_t* in, \
const uint32_t* upper, int num_pixels, \
uint32_t* WEBP_RESTRICT out) { \
int i; \
for (i = 0; i + 8 <= num_pixels; i += 8) { \
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]); \
const __m256i other = _mm256_loadu_si256((const __m256i*)&(IN)); \
const __m256i res = _mm256_add_epi8(src, other); \
_mm256_storeu_si256((__m256i*)&out[i], res); \
} \
if (i != num_pixels) { \
VP8LPredictorsAdd_SSE[(X)](in + i, upper + i, num_pixels - i, out + i); \
} \
}
// Predictor2: Top.
GENERATE_PREDICTOR_1(2, upper[i])
// Predictor3: Top-right.
GENERATE_PREDICTOR_1(3, upper[i + 1])
// Predictor4: Top-left.
GENERATE_PREDICTOR_1(4, upper[i - 1])
#undef GENERATE_PREDICTOR_1
// Due to averages with integers, values cannot be accumulated in parallel for
// predictors 5 to 7.
#define GENERATE_PREDICTOR_2(X, IN) \
static void PredictorAdd##X##_AVX2(const uint32_t* in, \
const uint32_t* upper, int num_pixels, \
uint32_t* WEBP_RESTRICT out) { \
int i; \
for (i = 0; i + 8 <= num_pixels; i += 8) { \
const __m256i Tother = _mm256_loadu_si256((const __m256i*)&(IN)); \
const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]); \
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]); \
__m256i avg, res; \
Average2_m256i(&T, &Tother, &avg); \
res = _mm256_add_epi8(avg, src); \
_mm256_storeu_si256((__m256i*)&out[i], res); \
} \
if (i != num_pixels) { \
VP8LPredictorsAdd_SSE[(X)](in + i, upper + i, num_pixels - i, out + i); \
} \
}
// Predictor8: average TL T.
GENERATE_PREDICTOR_2(8, upper[i - 1])
// Predictor9: average T TR.
GENERATE_PREDICTOR_2(9, upper[i + 1])
#undef GENERATE_PREDICTOR_2
// Predictor10: average of (average of (L,TL), average of (T, TR)).
#define DO_PRED10(OUT) \
do { \
__m256i avgLTL, avg; \
Average2_m256i(&L, &TL, &avgLTL); \
Average2_m256i(&avgTTR, &avgLTL, &avg); \
L = _mm256_add_epi8(avg, src); \
out[i + (OUT)] = (uint32_t)_mm256_cvtsi256_si32(L); \
} while (0)
#define DO_PRED10_SHIFT \
do { \
/* Rotate the pre-computed values for the next iteration.*/ \
avgTTR = _mm256_srli_si256(avgTTR, 4); \
TL = _mm256_srli_si256(TL, 4); \
src = _mm256_srli_si256(src, 4); \
} while (0)
static void PredictorAdd10_AVX2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i, j;
__m256i L = _mm256_setr_epi32((int)out[-1], 0, 0, 0, 0, 0, 0, 0);
for (i = 0; i + 8 <= num_pixels; i += 8) {
__m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
__m256i TL = _mm256_loadu_si256((const __m256i*)&upper[i - 1]);
const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
const __m256i TR = _mm256_loadu_si256((const __m256i*)&upper[i + 1]);
__m256i avgTTR;
Average2_m256i(&T, &TR, &avgTTR);
{
const __m256i avgTTR_bak = avgTTR;
const __m256i TL_bak = TL;
const __m256i src_bak = src;
for (j = 0; j < 4; ++j) {
DO_PRED10(j);
DO_PRED10_SHIFT;
}
avgTTR = _mm256_permute2x128_si256(avgTTR_bak, avgTTR_bak, 1);
TL = _mm256_permute2x128_si256(TL_bak, TL_bak, 1);
src = _mm256_permute2x128_si256(src_bak, src_bak, 1);
for (; j < 8; ++j) {
DO_PRED10(j);
DO_PRED10_SHIFT;
}
}
}
if (i != num_pixels) {
VP8LPredictorsAdd_SSE[10](in + i, upper + i, num_pixels - i, out + i);
}
}
#undef DO_PRED10
#undef DO_PRED10_SHIFT
// Predictor11: select.
#define DO_PRED11(OUT) \
do { \
const __m256i L_lo = _mm256_unpacklo_epi32(L, T); \
const __m256i TL_lo = _mm256_unpacklo_epi32(TL, T); \
const __m256i pb = _mm256_sad_epu8(L_lo, TL_lo); /* pb = sum |L-TL|*/ \
const __m256i mask = _mm256_cmpgt_epi32(pb, pa); \
const __m256i A = _mm256_and_si256(mask, L); \
const __m256i B = _mm256_andnot_si256(mask, T); \
const __m256i pred = _mm256_or_si256(A, B); /* pred = (pa > b)? L : T*/ \
L = _mm256_add_epi8(src, pred); \
out[i + (OUT)] = (uint32_t)_mm256_cvtsi256_si32(L); \
} while (0)
#define DO_PRED11_SHIFT \
do { \
/* Shift the pre-computed value for the next iteration.*/ \
T = _mm256_srli_si256(T, 4); \
TL = _mm256_srli_si256(TL, 4); \
src = _mm256_srli_si256(src, 4); \
pa = _mm256_srli_si256(pa, 4); \
} while (0)
static void PredictorAdd11_AVX2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i, j;
__m256i pa;
__m256i L = _mm256_setr_epi32((int)out[-1], 0, 0, 0, 0, 0, 0, 0);
for (i = 0; i + 8 <= num_pixels; i += 8) {
__m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
__m256i TL = _mm256_loadu_si256((const __m256i*)&upper[i - 1]);
__m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
{
// We can unpack with any value on the upper 32 bits, provided it's the
// same on both operands (so that their sum of abs diff is zero). Here we
// use T.
const __m256i T_lo = _mm256_unpacklo_epi32(T, T);
const __m256i TL_lo = _mm256_unpacklo_epi32(TL, T);
const __m256i T_hi = _mm256_unpackhi_epi32(T, T);
const __m256i TL_hi = _mm256_unpackhi_epi32(TL, T);
const __m256i s_lo = _mm256_sad_epu8(T_lo, TL_lo);
const __m256i s_hi = _mm256_sad_epu8(T_hi, TL_hi);
pa = _mm256_packs_epi32(s_lo, s_hi); // pa = sum |T-TL|
}
{
const __m256i T_bak = T;
const __m256i TL_bak = TL;
const __m256i src_bak = src;
const __m256i pa_bak = pa;
for (j = 0; j < 4; ++j) {
DO_PRED11(j);
DO_PRED11_SHIFT;
}
T = _mm256_permute2x128_si256(T_bak, T_bak, 1);
TL = _mm256_permute2x128_si256(TL_bak, TL_bak, 1);
src = _mm256_permute2x128_si256(src_bak, src_bak, 1);
pa = _mm256_permute2x128_si256(pa_bak, pa_bak, 1);
for (; j < 8; ++j) {
DO_PRED11(j);
DO_PRED11_SHIFT;
}
}
}
if (i != num_pixels) {
VP8LPredictorsAdd_SSE[11](in + i, upper + i, num_pixels - i, out + i);
}
}
#undef DO_PRED11
#undef DO_PRED11_SHIFT
// Predictor12: ClampedAddSubtractFull.
#define DO_PRED12(DIFF, OUT) \
do { \
const __m256i all = _mm256_add_epi16(L, (DIFF)); \
const __m256i alls = _mm256_packus_epi16(all, all); \
const __m256i res = _mm256_add_epi8(src, alls); \
out[i + (OUT)] = (uint32_t)_mm256_cvtsi256_si32(res); \
L = _mm256_unpacklo_epi8(res, zero); \
} while (0)
#define DO_PRED12_SHIFT(DIFF, LANE) \
do { \
/* Shift the pre-computed value for the next iteration.*/ \
if ((LANE) == 0) (DIFF) = _mm256_srli_si256(DIFF, 8); \
src = _mm256_srli_si256(src, 4); \
} while (0)
static void PredictorAdd12_AVX2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
const __m256i zero = _mm256_setzero_si256();
const __m256i L8 = _mm256_setr_epi32((int)out[-1], 0, 0, 0, 0, 0, 0, 0);
__m256i L = _mm256_unpacklo_epi8(L8, zero);
for (i = 0; i + 8 <= num_pixels; i += 8) {
// Load 8 pixels at a time.
__m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
const __m256i T_lo = _mm256_unpacklo_epi8(T, zero);
const __m256i T_hi = _mm256_unpackhi_epi8(T, zero);
const __m256i TL = _mm256_loadu_si256((const __m256i*)&upper[i - 1]);
const __m256i TL_lo = _mm256_unpacklo_epi8(TL, zero);
const __m256i TL_hi = _mm256_unpackhi_epi8(TL, zero);
__m256i diff_lo = _mm256_sub_epi16(T_lo, TL_lo);
__m256i diff_hi = _mm256_sub_epi16(T_hi, TL_hi);
const __m256i diff_lo_bak = diff_lo;
const __m256i diff_hi_bak = diff_hi;
const __m256i src_bak = src;
DO_PRED12(diff_lo, 0);
DO_PRED12_SHIFT(diff_lo, 0);
DO_PRED12(diff_lo, 1);
DO_PRED12_SHIFT(diff_lo, 0);
DO_PRED12(diff_hi, 2);
DO_PRED12_SHIFT(diff_hi, 0);
DO_PRED12(diff_hi, 3);
DO_PRED12_SHIFT(diff_hi, 0);
// Process the upper lane.
diff_lo = _mm256_permute2x128_si256(diff_lo_bak, diff_lo_bak, 1);
diff_hi = _mm256_permute2x128_si256(diff_hi_bak, diff_hi_bak, 1);
src = _mm256_permute2x128_si256(src_bak, src_bak, 1);
DO_PRED12(diff_lo, 4);
DO_PRED12_SHIFT(diff_lo, 0);
DO_PRED12(diff_lo, 5);
DO_PRED12_SHIFT(diff_lo, 1);
DO_PRED12(diff_hi, 6);
DO_PRED12_SHIFT(diff_hi, 0);
DO_PRED12(diff_hi, 7);
}
if (i != num_pixels) {
VP8LPredictorsAdd_SSE[12](in + i, upper + i, num_pixels - i, out + i);
}
}
#undef DO_PRED12
#undef DO_PRED12_SHIFT
// Due to averages with integers, values cannot be accumulated in parallel for
// predictors 13.
//------------------------------------------------------------------------------
// Subtract-Green Transform
static void AddGreenToBlueAndRed_AVX2(const uint32_t* const src, int num_pixels,
uint32_t* dst) {
int i;
const __m256i kCstShuffle = _mm256_set_epi8(
-1, 29, -1, 29, -1, 25, -1, 25, -1, 21, -1, 21, -1, 17, -1, 17, -1, 13,
-1, 13, -1, 9, -1, 9, -1, 5, -1, 5, -1, 1, -1, 1);
for (i = 0; i + 8 <= num_pixels; i += 8) {
const __m256i in = _mm256_loadu_si256((const __m256i*)&src[i]); // argb
const __m256i in_0g0g = _mm256_shuffle_epi8(in, kCstShuffle); // 0g0g
const __m256i out = _mm256_add_epi8(in, in_0g0g);
_mm256_storeu_si256((__m256i*)&dst[i], out);
}
// fallthrough and finish off with SSE.
if (i != num_pixels) {
VP8LAddGreenToBlueAndRed_SSE(src + i, num_pixels - i, dst + i);
}
}
//------------------------------------------------------------------------------
// Color Transform
static void TransformColorInverse_AVX2(const VP8LMultipliers* const m,
const uint32_t* const src,
int num_pixels, uint32_t* dst) {
// sign-extended multiplying constants, pre-shifted by 5.
#define CST(X) (((int16_t)(m->X << 8)) >> 5) // sign-extend
const __m256i mults_rb =
_mm256_set1_epi32((int)((uint32_t)CST(green_to_red) << 16 |
(CST(green_to_blue) & 0xffff)));
const __m256i mults_b2 = _mm256_set1_epi32(CST(red_to_blue));
#undef CST
const __m256i mask_ag = _mm256_set1_epi32((int)0xff00ff00);
const __m256i perm1 = _mm256_setr_epi8(
-1, 1, -1, 1, -1, 5, -1, 5, -1, 9, -1, 9, -1, 13, -1, 13, -1, 17, -1, 17,
-1, 21, -1, 21, -1, 25, -1, 25, -1, 29, -1, 29);
const __m256i perm2 = _mm256_setr_epi8(
-1, 2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1, 18, -1,
-1, -1, 22, -1, -1, -1, 26, -1, -1, -1, 30, -1, -1);
int i;
for (i = 0; i + 8 <= num_pixels; i += 8) {
const __m256i A = _mm256_loadu_si256((const __m256i*)(src + i));
const __m256i B = _mm256_shuffle_epi8(A, perm1); // argb -> g0g0
const __m256i C = _mm256_mulhi_epi16(B, mults_rb);
const __m256i D = _mm256_add_epi8(A, C);
const __m256i E = _mm256_shuffle_epi8(D, perm2);
const __m256i F = _mm256_mulhi_epi16(E, mults_b2);
const __m256i G = _mm256_add_epi8(D, F);
const __m256i out = _mm256_blendv_epi8(G, A, mask_ag);
_mm256_storeu_si256((__m256i*)&dst[i], out);
}
// Fall-back to SSE-version for left-overs.
if (i != num_pixels) {
VP8LTransformColorInverse_SSE(m, src + i, num_pixels - i, dst + i);
}
}
//------------------------------------------------------------------------------
// Color-space conversion functions
static void ConvertBGRAToRGBA_AVX2(const uint32_t* WEBP_RESTRICT src,
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
const __m256i* in = (const __m256i*)src;
__m256i* out = (__m256i*)dst;
while (num_pixels >= 8) {
const __m256i A = _mm256_loadu_si256(in++);
const __m256i B = _mm256_shuffle_epi8(
A,
_mm256_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2,
15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2));
_mm256_storeu_si256(out++, B);
num_pixels -= 8;
}
// left-overs
if (num_pixels > 0) {
VP8LConvertBGRAToRGBA_SSE((const uint32_t*)in, num_pixels, (uint8_t*)out);
}
}
//------------------------------------------------------------------------------
// Entry point
extern void VP8LDspInitAVX2(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitAVX2(void) {
VP8LPredictorsAdd[0] = PredictorAdd0_AVX2;
VP8LPredictorsAdd[1] = PredictorAdd1_AVX2;
VP8LPredictorsAdd[2] = PredictorAdd2_AVX2;
VP8LPredictorsAdd[3] = PredictorAdd3_AVX2;
VP8LPredictorsAdd[4] = PredictorAdd4_AVX2;
VP8LPredictorsAdd[8] = PredictorAdd8_AVX2;
VP8LPredictorsAdd[9] = PredictorAdd9_AVX2;
VP8LPredictorsAdd[10] = PredictorAdd10_AVX2;
VP8LPredictorsAdd[11] = PredictorAdd11_AVX2;
VP8LPredictorsAdd[12] = PredictorAdd12_AVX2;
VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed_AVX2;
VP8LTransformColorInverse = TransformColorInverse_AVX2;
VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA_AVX2;
}
#else // !WEBP_USE_AVX2
WEBP_DSP_INIT_STUB(VP8LDspInitAVX2)
#endif // WEBP_USE_AVX2

View File

@@ -16,6 +16,9 @@
#ifndef WEBP_DSP_LOSSLESS_COMMON_H_
#define WEBP_DSP_LOSSLESS_COMMON_H_
#include <assert.h>
#include <stddef.h>
#include "src/dsp/cpu.h"
#include "src/utils/utils.h"
#include "src/webp/types.h"
@@ -137,8 +140,8 @@ static WEBP_INLINE void VP8LPrefixEncodeNoLUT(int distance, int* const code,
#define PREFIX_LOOKUP_IDX_MAX 512
typedef struct {
int8_t code_;
int8_t extra_bits_;
int8_t code;
int8_t extra_bits;
} VP8LPrefixCode;
// These tables are derived using VP8LPrefixEncodeNoLUT.
@@ -148,8 +151,8 @@ static WEBP_INLINE void VP8LPrefixEncodeBits(int distance, int* const code,
int* const extra_bits) {
if (distance < PREFIX_LOOKUP_IDX_MAX) {
const VP8LPrefixCode prefix_code = kPrefixEncodeCode[distance];
*code = prefix_code.code_;
*extra_bits = prefix_code.extra_bits_;
*code = prefix_code.code;
*extra_bits = prefix_code.extra_bits;
} else {
VP8LPrefixEncodeBitsNoLUT(distance, code, extra_bits);
}
@@ -160,8 +163,8 @@ static WEBP_INLINE void VP8LPrefixEncode(int distance, int* const code,
int* const extra_bits_value) {
if (distance < PREFIX_LOOKUP_IDX_MAX) {
const VP8LPrefixCode prefix_code = kPrefixEncodeCode[distance];
*code = prefix_code.code_;
*extra_bits = prefix_code.extra_bits_;
*code = prefix_code.code;
*extra_bits = prefix_code.extra_bits;
*extra_bits_value = kPrefixEncodeExtraBitsValue[distance];
} else {
VP8LPrefixEncodeNoLUT(distance, code, extra_bits, extra_bits_value);

View File

@@ -13,16 +13,19 @@
// Jyrki Alakuijala (jyrki@google.com)
// Urvang Joshi (urvang@google.com)
#include "src/dsp/dsp.h"
#include <assert.h>
#include <math.h>
#include <stdlib.h>
#include "src/dec/vp8li_dec.h"
#include "src/utils/endian_inl_utils.h"
#include <string.h>
#include "src/dsp/cpu.h"
#include "src/dsp/dsp.h"
#include "src/dsp/lossless.h"
#include "src/dsp/lossless_common.h"
#include "src/dsp/yuv.h"
#include "src/enc/histogram_enc.h"
#include "src/utils/utils.h"
#include "src/webp/format_constants.h"
#include "src/webp/types.h"
// lookup table for small values of log2(int) * (1 << LOG_2_PRECISION_BITS).
// Obtained in Python with:
@@ -479,10 +482,10 @@ void VP8LTransformColor_C(const VP8LMultipliers* WEBP_RESTRICT const m,
const int8_t red = U32ToS8(argb >> 16);
int new_red = red & 0xff;
int new_blue = argb & 0xff;
new_red -= ColorTransformDelta((int8_t)m->green_to_red_, green);
new_red -= ColorTransformDelta((int8_t)m->green_to_red, green);
new_red &= 0xff;
new_blue -= ColorTransformDelta((int8_t)m->green_to_blue_, green);
new_blue -= ColorTransformDelta((int8_t)m->red_to_blue_, red);
new_blue -= ColorTransformDelta((int8_t)m->green_to_blue, green);
new_blue -= ColorTransformDelta((int8_t)m->red_to_blue, red);
new_blue &= 0xff;
data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
}
@@ -580,20 +583,6 @@ static uint32_t ExtraCost_C(const uint32_t* population, int length) {
return cost;
}
static uint32_t ExtraCostCombined_C(const uint32_t* WEBP_RESTRICT X,
const uint32_t* WEBP_RESTRICT Y,
int length) {
int i;
uint32_t cost = X[4] + Y[4] + X[5] + Y[5];
assert(length % 2 == 0);
for (i = 2; i < length / 2 - 1; ++i) {
const int xy0 = X[2 * i + 2] + Y[2 * i + 2];
const int xy1 = X[2 * i + 3] + Y[2 * i + 3];
cost += i * (xy0 + xy1);
}
return cost;
}
//------------------------------------------------------------------------------
static void AddVector_C(const uint32_t* WEBP_RESTRICT a,
@@ -609,58 +598,6 @@ static void AddVectorEq_C(const uint32_t* WEBP_RESTRICT a,
for (i = 0; i < size; ++i) out[i] += a[i];
}
#define ADD(X, ARG, LEN) do { \
if (a->is_used_[X]) { \
if (b->is_used_[X]) { \
VP8LAddVector(a->ARG, b->ARG, out->ARG, (LEN)); \
} else { \
memcpy(&out->ARG[0], &a->ARG[0], (LEN) * sizeof(out->ARG[0])); \
} \
} else if (b->is_used_[X]) { \
memcpy(&out->ARG[0], &b->ARG[0], (LEN) * sizeof(out->ARG[0])); \
} else { \
memset(&out->ARG[0], 0, (LEN) * sizeof(out->ARG[0])); \
} \
} while (0)
#define ADD_EQ(X, ARG, LEN) do { \
if (a->is_used_[X]) { \
if (out->is_used_[X]) { \
VP8LAddVectorEq(a->ARG, out->ARG, (LEN)); \
} else { \
memcpy(&out->ARG[0], &a->ARG[0], (LEN) * sizeof(out->ARG[0])); \
} \
} \
} while (0)
void VP8LHistogramAdd(const VP8LHistogram* WEBP_RESTRICT const a,
const VP8LHistogram* WEBP_RESTRICT const b,
VP8LHistogram* WEBP_RESTRICT const out) {
int i;
const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
assert(a->palette_code_bits_ == b->palette_code_bits_);
if (b != out) {
ADD(0, literal_, literal_size);
ADD(1, red_, NUM_LITERAL_CODES);
ADD(2, blue_, NUM_LITERAL_CODES);
ADD(3, alpha_, NUM_LITERAL_CODES);
ADD(4, distance_, NUM_DISTANCE_CODES);
for (i = 0; i < 5; ++i) {
out->is_used_[i] = (a->is_used_[i] | b->is_used_[i]);
}
} else {
ADD_EQ(0, literal_, literal_size);
ADD_EQ(1, red_, NUM_LITERAL_CODES);
ADD_EQ(2, blue_, NUM_LITERAL_CODES);
ADD_EQ(3, alpha_, NUM_LITERAL_CODES);
ADD_EQ(4, distance_, NUM_DISTANCE_CODES);
for (i = 0; i < 5; ++i) out->is_used_[i] |= a->is_used_[i];
}
}
#undef ADD
#undef ADD_EQ
//------------------------------------------------------------------------------
// Image transforms.
@@ -710,17 +647,20 @@ GENERATE_PREDICTOR_SUB(13)
//------------------------------------------------------------------------------
VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed_SSE;
VP8LTransformColorFunc VP8LTransformColor;
VP8LTransformColorFunc VP8LTransformColor_SSE;
VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms_SSE;
VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms_SSE;
VP8LFastLog2SlowFunc VP8LFastLog2Slow;
VP8LFastSLog2SlowFunc VP8LFastSLog2Slow;
VP8LCostFunc VP8LExtraCost;
VP8LCostCombinedFunc VP8LExtraCostCombined;
VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
VP8LShannonEntropyFunc VP8LShannonEntropy;
@@ -732,13 +672,16 @@ VP8LAddVectorEqFunc VP8LAddVectorEq;
VP8LVectorMismatchFunc VP8LVectorMismatch;
VP8LBundleColorMapFunc VP8LBundleColorMap;
VP8LBundleColorMapFunc VP8LBundleColorMap_SSE;
VP8LPredictorAddSubFunc VP8LPredictorsSub[16];
VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
VP8LPredictorAddSubFunc VP8LPredictorsSub_SSE[16];
extern VP8CPUInfo VP8GetCPUInfo;
extern void VP8LEncDspInitSSE2(void);
extern void VP8LEncDspInitSSE41(void);
extern void VP8LEncDspInitAVX2(void);
extern void VP8LEncDspInitNEON(void);
extern void VP8LEncDspInitMIPS32(void);
extern void VP8LEncDspInitMIPSdspR2(void);
@@ -760,7 +703,6 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
VP8LFastSLog2Slow = FastSLog2Slow_C;
VP8LExtraCost = ExtraCost_C;
VP8LExtraCostCombined = ExtraCostCombined_C;
VP8LCombinedShannonEntropy = CombinedShannonEntropy_C;
VP8LShannonEntropy = ShannonEntropy_C;
@@ -815,6 +757,11 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
#if defined(WEBP_HAVE_SSE41)
if (VP8GetCPUInfo(kSSE4_1)) {
VP8LEncDspInitSSE41();
#if defined(WEBP_HAVE_AVX2)
if (VP8GetCPUInfo(kAVX2)) {
VP8LEncDspInitAVX2();
}
#endif
}
#endif
}
@@ -850,7 +797,6 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
assert(VP8LFastLog2Slow != NULL);
assert(VP8LFastSLog2Slow != NULL);
assert(VP8LExtraCost != NULL);
assert(VP8LExtraCostCombined != NULL);
assert(VP8LCombinedShannonEntropy != NULL);
assert(VP8LShannonEntropy != NULL);
assert(VP8LGetEntropyUnrefined != NULL);

View File

@@ -0,0 +1,736 @@
// Copyright 2025 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// AVX2 variant of methods for lossless encoder
//
// Author: Vincent Rabaud (vrabaud@google.com)
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_AVX2)
#include <emmintrin.h>
#include <immintrin.h>
#include <assert.h>
#include <stddef.h>
#include "src/dsp/cpu.h"
#include "src/dsp/lossless.h"
#include "src/dsp/lossless_common.h"
#include "src/utils/utils.h"
#include "src/webp/format_constants.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
// Subtract-Green Transform
static void SubtractGreenFromBlueAndRed_AVX2(uint32_t* argb_data,
int num_pixels) {
int i;
const __m256i kCstShuffle = _mm256_set_epi8(
-1, 29, -1, 29, -1, 25, -1, 25, -1, 21, -1, 21, -1, 17, -1, 17, -1, 13,
-1, 13, -1, 9, -1, 9, -1, 5, -1, 5, -1, 1, -1, 1);
for (i = 0; i + 8 <= num_pixels; i += 8) {
const __m256i in = _mm256_loadu_si256((__m256i*)&argb_data[i]); // argb
const __m256i in_0g0g = _mm256_shuffle_epi8(in, kCstShuffle);
const __m256i out = _mm256_sub_epi8(in, in_0g0g);
_mm256_storeu_si256((__m256i*)&argb_data[i], out);
}
// fallthrough and finish off with plain-SSE
if (i != num_pixels) {
VP8LSubtractGreenFromBlueAndRed_SSE(argb_data + i, num_pixels - i);
}
}
//------------------------------------------------------------------------------
// Color Transform
// For sign-extended multiplying constants, pre-shifted by 5:
#define CST_5b(X) (((int16_t)((uint16_t)(X) << 8)) >> 5)
#define MK_CST_16(HI, LO) \
_mm256_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
static void TransformColor_AVX2(const VP8LMultipliers* WEBP_RESTRICT const m,
uint32_t* WEBP_RESTRICT argb_data,
int num_pixels) {
const __m256i mults_rb =
MK_CST_16(CST_5b(m->green_to_red), CST_5b(m->green_to_blue));
const __m256i mults_b2 = MK_CST_16(CST_5b(m->red_to_blue), 0);
const __m256i mask_rb = _mm256_set1_epi32(0x00ff00ff); // red-blue masks
const __m256i kCstShuffle = _mm256_set_epi8(
29, -1, 29, -1, 25, -1, 25, -1, 21, -1, 21, -1, 17, -1, 17, -1, 13, -1,
13, -1, 9, -1, 9, -1, 5, -1, 5, -1, 1, -1, 1, -1);
int i;
for (i = 0; i + 8 <= num_pixels; i += 8) {
const __m256i in = _mm256_loadu_si256((__m256i*)&argb_data[i]); // argb
const __m256i A = _mm256_shuffle_epi8(in, kCstShuffle); // g0g0
const __m256i B = _mm256_mulhi_epi16(A, mults_rb); // x dr x db1
const __m256i C = _mm256_slli_epi16(in, 8); // r 0 b 0
const __m256i D = _mm256_mulhi_epi16(C, mults_b2); // x db2 0 0
const __m256i E = _mm256_srli_epi32(D, 16); // 0 0 x db2
const __m256i F = _mm256_add_epi8(E, B); // x dr x db
const __m256i G = _mm256_and_si256(F, mask_rb); // 0 dr 0 db
const __m256i out = _mm256_sub_epi8(in, G);
_mm256_storeu_si256((__m256i*)&argb_data[i], out);
}
// fallthrough and finish off with plain-C
if (i != num_pixels) {
VP8LTransformColor_SSE(m, argb_data + i, num_pixels - i);
}
}
//------------------------------------------------------------------------------
#define SPAN 16
static void CollectColorBlueTransforms_AVX2(const uint32_t* WEBP_RESTRICT argb,
int stride, int tile_width,
int tile_height, int green_to_blue,
int red_to_blue, uint32_t histo[]) {
const __m256i mult =
MK_CST_16(CST_5b(red_to_blue) + 256, CST_5b(green_to_blue));
const __m256i perm = _mm256_setr_epi8(
-1, 1, -1, 2, -1, 5, -1, 6, -1, 9, -1, 10, -1, 13, -1, 14, -1, 17, -1, 18,
-1, 21, -1, 22, -1, 25, -1, 26, -1, 29, -1, 30);
if (tile_width >= 8) {
int y, i;
for (y = 0; y < tile_height; ++y) {
uint8_t values[32];
const uint32_t* const src = argb + y * stride;
const __m256i A1 = _mm256_loadu_si256((const __m256i*)src);
const __m256i B1 = _mm256_shuffle_epi8(A1, perm);
const __m256i C1 = _mm256_mulhi_epi16(B1, mult);
const __m256i D1 = _mm256_sub_epi16(A1, C1);
__m256i E = _mm256_add_epi16(_mm256_srli_epi32(D1, 16), D1);
int x;
for (x = 8; x + 8 <= tile_width; x += 8) {
const __m256i A2 = _mm256_loadu_si256((const __m256i*)(src + x));
__m256i B2, C2, D2;
_mm256_storeu_si256((__m256i*)values, E);
for (i = 0; i < 32; i += 4) ++histo[values[i]];
B2 = _mm256_shuffle_epi8(A2, perm);
C2 = _mm256_mulhi_epi16(B2, mult);
D2 = _mm256_sub_epi16(A2, C2);
E = _mm256_add_epi16(_mm256_srli_epi32(D2, 16), D2);
}
_mm256_storeu_si256((__m256i*)values, E);
for (i = 0; i < 32; i += 4) ++histo[values[i]];
}
}
{
const int left_over = tile_width & 7;
if (left_over > 0) {
VP8LCollectColorBlueTransforms_SSE(argb + tile_width - left_over, stride,
left_over, tile_height, green_to_blue,
red_to_blue, histo);
}
}
}
static void CollectColorRedTransforms_AVX2(const uint32_t* WEBP_RESTRICT argb,
int stride, int tile_width,
int tile_height, int green_to_red,
uint32_t histo[]) {
const __m256i mult = MK_CST_16(0, CST_5b(green_to_red));
const __m256i mask_g = _mm256_set1_epi32(0x0000ff00);
if (tile_width >= 8) {
int y, i;
for (y = 0; y < tile_height; ++y) {
uint8_t values[32];
const uint32_t* const src = argb + y * stride;
const __m256i A1 = _mm256_loadu_si256((const __m256i*)src);
const __m256i B1 = _mm256_and_si256(A1, mask_g);
const __m256i C1 = _mm256_madd_epi16(B1, mult);
__m256i D = _mm256_sub_epi16(A1, C1);
int x;
for (x = 8; x + 8 <= tile_width; x += 8) {
const __m256i A2 = _mm256_loadu_si256((const __m256i*)(src + x));
__m256i B2, C2;
_mm256_storeu_si256((__m256i*)values, D);
for (i = 2; i < 32; i += 4) ++histo[values[i]];
B2 = _mm256_and_si256(A2, mask_g);
C2 = _mm256_madd_epi16(B2, mult);
D = _mm256_sub_epi16(A2, C2);
}
_mm256_storeu_si256((__m256i*)values, D);
for (i = 2; i < 32; i += 4) ++histo[values[i]];
}
}
{
const int left_over = tile_width & 7;
if (left_over > 0) {
VP8LCollectColorRedTransforms_SSE(argb + tile_width - left_over, stride,
left_over, tile_height, green_to_red,
histo);
}
}
}
#undef SPAN
#undef MK_CST_16
//------------------------------------------------------------------------------
// Note we are adding uint32_t's as *signed* int32's (using _mm256_add_epi32).
// But that's ok since the histogram values are less than 1<<28 (max picture
// size).
static void AddVector_AVX2(const uint32_t* WEBP_RESTRICT a,
const uint32_t* WEBP_RESTRICT b,
uint32_t* WEBP_RESTRICT out, int size) {
int i = 0;
int aligned_size = size & ~31;
// Size is, at minimum, NUM_DISTANCE_CODES (40) and may be as large as
// NUM_LITERAL_CODES (256) + NUM_LENGTH_CODES (24) + (0 or a non-zero power of
// 2). See the usage in VP8LHistogramAdd().
assert(size >= 32);
assert(size % 2 == 0);
do {
const __m256i a0 = _mm256_loadu_si256((const __m256i*)&a[i + 0]);
const __m256i a1 = _mm256_loadu_si256((const __m256i*)&a[i + 8]);
const __m256i a2 = _mm256_loadu_si256((const __m256i*)&a[i + 16]);
const __m256i a3 = _mm256_loadu_si256((const __m256i*)&a[i + 24]);
const __m256i b0 = _mm256_loadu_si256((const __m256i*)&b[i + 0]);
const __m256i b1 = _mm256_loadu_si256((const __m256i*)&b[i + 8]);
const __m256i b2 = _mm256_loadu_si256((const __m256i*)&b[i + 16]);
const __m256i b3 = _mm256_loadu_si256((const __m256i*)&b[i + 24]);
_mm256_storeu_si256((__m256i*)&out[i + 0], _mm256_add_epi32(a0, b0));
_mm256_storeu_si256((__m256i*)&out[i + 8], _mm256_add_epi32(a1, b1));
_mm256_storeu_si256((__m256i*)&out[i + 16], _mm256_add_epi32(a2, b2));
_mm256_storeu_si256((__m256i*)&out[i + 24], _mm256_add_epi32(a3, b3));
i += 32;
} while (i != aligned_size);
if ((size & 16) != 0) {
const __m256i a0 = _mm256_loadu_si256((const __m256i*)&a[i + 0]);
const __m256i a1 = _mm256_loadu_si256((const __m256i*)&a[i + 8]);
const __m256i b0 = _mm256_loadu_si256((const __m256i*)&b[i + 0]);
const __m256i b1 = _mm256_loadu_si256((const __m256i*)&b[i + 8]);
_mm256_storeu_si256((__m256i*)&out[i + 0], _mm256_add_epi32(a0, b0));
_mm256_storeu_si256((__m256i*)&out[i + 8], _mm256_add_epi32(a1, b1));
i += 16;
}
size &= 15;
if (size == 8) {
const __m256i a0 = _mm256_loadu_si256((const __m256i*)&a[i]);
const __m256i b0 = _mm256_loadu_si256((const __m256i*)&b[i]);
_mm256_storeu_si256((__m256i*)&out[i], _mm256_add_epi32(a0, b0));
} else {
for (; size--; ++i) {
out[i] = a[i] + b[i];
}
}
}
static void AddVectorEq_AVX2(const uint32_t* WEBP_RESTRICT a,
uint32_t* WEBP_RESTRICT out, int size) {
int i = 0;
int aligned_size = size & ~31;
// Size is, at minimum, NUM_DISTANCE_CODES (40) and may be as large as
// NUM_LITERAL_CODES (256) + NUM_LENGTH_CODES (24) + (0 or a non-zero power of
// 2). See the usage in VP8LHistogramAdd().
assert(size >= 32);
assert(size % 2 == 0);
do {
const __m256i a0 = _mm256_loadu_si256((const __m256i*)&a[i + 0]);
const __m256i a1 = _mm256_loadu_si256((const __m256i*)&a[i + 8]);
const __m256i a2 = _mm256_loadu_si256((const __m256i*)&a[i + 16]);
const __m256i a3 = _mm256_loadu_si256((const __m256i*)&a[i + 24]);
const __m256i b0 = _mm256_loadu_si256((const __m256i*)&out[i + 0]);
const __m256i b1 = _mm256_loadu_si256((const __m256i*)&out[i + 8]);
const __m256i b2 = _mm256_loadu_si256((const __m256i*)&out[i + 16]);
const __m256i b3 = _mm256_loadu_si256((const __m256i*)&out[i + 24]);
_mm256_storeu_si256((__m256i*)&out[i + 0], _mm256_add_epi32(a0, b0));
_mm256_storeu_si256((__m256i*)&out[i + 8], _mm256_add_epi32(a1, b1));
_mm256_storeu_si256((__m256i*)&out[i + 16], _mm256_add_epi32(a2, b2));
_mm256_storeu_si256((__m256i*)&out[i + 24], _mm256_add_epi32(a3, b3));
i += 32;
} while (i != aligned_size);
if ((size & 16) != 0) {
const __m256i a0 = _mm256_loadu_si256((const __m256i*)&a[i + 0]);
const __m256i a1 = _mm256_loadu_si256((const __m256i*)&a[i + 8]);
const __m256i b0 = _mm256_loadu_si256((const __m256i*)&out[i + 0]);
const __m256i b1 = _mm256_loadu_si256((const __m256i*)&out[i + 8]);
_mm256_storeu_si256((__m256i*)&out[i + 0], _mm256_add_epi32(a0, b0));
_mm256_storeu_si256((__m256i*)&out[i + 8], _mm256_add_epi32(a1, b1));
i += 16;
}
size &= 15;
if (size == 8) {
const __m256i a0 = _mm256_loadu_si256((const __m256i*)&a[i]);
const __m256i b0 = _mm256_loadu_si256((const __m256i*)&out[i]);
_mm256_storeu_si256((__m256i*)&out[i], _mm256_add_epi32(a0, b0));
} else {
for (; size--; ++i) {
out[i] += a[i];
}
}
}
//------------------------------------------------------------------------------
// Entropy
#if !defined(WEBP_HAVE_SLOW_CLZ_CTZ)
static uint64_t CombinedShannonEntropy_AVX2(const uint32_t X[256],
const uint32_t Y[256]) {
int i;
uint64_t retval = 0;
uint32_t sumX = 0, sumXY = 0;
const __m256i zero = _mm256_setzero_si256();
for (i = 0; i < 256; i += 32) {
const __m256i x0 = _mm256_loadu_si256((const __m256i*)(X + i + 0));
const __m256i y0 = _mm256_loadu_si256((const __m256i*)(Y + i + 0));
const __m256i x1 = _mm256_loadu_si256((const __m256i*)(X + i + 8));
const __m256i y1 = _mm256_loadu_si256((const __m256i*)(Y + i + 8));
const __m256i x2 = _mm256_loadu_si256((const __m256i*)(X + i + 16));
const __m256i y2 = _mm256_loadu_si256((const __m256i*)(Y + i + 16));
const __m256i x3 = _mm256_loadu_si256((const __m256i*)(X + i + 24));
const __m256i y3 = _mm256_loadu_si256((const __m256i*)(Y + i + 24));
const __m256i x4 = _mm256_packs_epi16(_mm256_packs_epi32(x0, x1),
_mm256_packs_epi32(x2, x3));
const __m256i y4 = _mm256_packs_epi16(_mm256_packs_epi32(y0, y1),
_mm256_packs_epi32(y2, y3));
// Packed pixels are actually in order: ... 17 16 12 11 10 9 8 3 2 1 0
const __m256i x5 = _mm256_permutevar8x32_epi32(
x4, _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0));
const __m256i y5 = _mm256_permutevar8x32_epi32(
y4, _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0));
const uint32_t mx =
(uint32_t)_mm256_movemask_epi8(_mm256_cmpgt_epi8(x5, zero));
uint32_t my =
(uint32_t)_mm256_movemask_epi8(_mm256_cmpgt_epi8(y5, zero)) | mx;
while (my) {
const int32_t j = BitsCtz(my);
uint32_t xy;
if ((mx >> j) & 1) {
const int x = X[i + j];
sumXY += x;
retval += VP8LFastSLog2(x);
}
xy = X[i + j] + Y[i + j];
sumX += xy;
retval += VP8LFastSLog2(xy);
my &= my - 1;
}
}
retval = VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY) - retval;
return retval;
}
#else
#define DONT_USE_COMBINED_SHANNON_ENTROPY_SSE2_FUNC // won't be faster
#endif
//------------------------------------------------------------------------------
static int VectorMismatch_AVX2(const uint32_t* const array1,
const uint32_t* const array2, int length) {
int match_len;
if (length >= 24) {
__m256i A0 = _mm256_loadu_si256((const __m256i*)&array1[0]);
__m256i A1 = _mm256_loadu_si256((const __m256i*)&array2[0]);
match_len = 0;
do {
// Loop unrolling and early load both provide a speedup of 10% for the
// current function. Also, max_limit can be MAX_LENGTH=4096 at most.
const __m256i cmpA = _mm256_cmpeq_epi32(A0, A1);
const __m256i B0 =
_mm256_loadu_si256((const __m256i*)&array1[match_len + 8]);
const __m256i B1 =
_mm256_loadu_si256((const __m256i*)&array2[match_len + 8]);
if ((uint32_t)_mm256_movemask_epi8(cmpA) != 0xffffffff) break;
match_len += 8;
{
const __m256i cmpB = _mm256_cmpeq_epi32(B0, B1);
A0 = _mm256_loadu_si256((const __m256i*)&array1[match_len + 8]);
A1 = _mm256_loadu_si256((const __m256i*)&array2[match_len + 8]);
if ((uint32_t)_mm256_movemask_epi8(cmpB) != 0xffffffff) break;
match_len += 8;
}
} while (match_len + 24 < length);
} else {
match_len = 0;
// Unroll the potential first two loops.
if (length >= 8 &&
(uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi32(
_mm256_loadu_si256((const __m256i*)&array1[0]),
_mm256_loadu_si256((const __m256i*)&array2[0]))) == 0xffffffff) {
match_len = 8;
if (length >= 16 &&
(uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi32(
_mm256_loadu_si256((const __m256i*)&array1[8]),
_mm256_loadu_si256((const __m256i*)&array2[8]))) == 0xffffffff) {
match_len = 16;
}
}
}
while (match_len < length && array1[match_len] == array2[match_len]) {
++match_len;
}
return match_len;
}
// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
static void BundleColorMap_AVX2(const uint8_t* WEBP_RESTRICT const row,
int width, int xbits,
uint32_t* WEBP_RESTRICT dst) {
int x = 0;
assert(xbits >= 0);
assert(xbits <= 3);
switch (xbits) {
case 0: {
const __m256i ff = _mm256_set1_epi16((short)0xff00);
const __m256i zero = _mm256_setzero_si256();
// Store 0xff000000 | (row[x] << 8).
for (x = 0; x + 32 <= width; x += 32, dst += 32) {
const __m256i in = _mm256_loadu_si256((const __m256i*)&row[x]);
const __m256i in_lo = _mm256_unpacklo_epi8(zero, in);
const __m256i dst0 = _mm256_unpacklo_epi16(in_lo, ff);
const __m256i dst1 = _mm256_unpackhi_epi16(in_lo, ff);
const __m256i in_hi = _mm256_unpackhi_epi8(zero, in);
const __m256i dst2 = _mm256_unpacklo_epi16(in_hi, ff);
const __m256i dst3 = _mm256_unpackhi_epi16(in_hi, ff);
_mm256_storeu2_m128i((__m128i*)&dst[16], (__m128i*)&dst[0], dst0);
_mm256_storeu2_m128i((__m128i*)&dst[20], (__m128i*)&dst[4], dst1);
_mm256_storeu2_m128i((__m128i*)&dst[24], (__m128i*)&dst[8], dst2);
_mm256_storeu2_m128i((__m128i*)&dst[28], (__m128i*)&dst[12], dst3);
}
break;
}
case 1: {
const __m256i ff = _mm256_set1_epi16((short)0xff00);
const __m256i mul = _mm256_set1_epi16(0x110);
for (x = 0; x + 32 <= width; x += 32, dst += 16) {
// 0a0b | (where a/b are 4 bits).
const __m256i in = _mm256_loadu_si256((const __m256i*)&row[x]);
const __m256i tmp = _mm256_mullo_epi16(in, mul); // aba0
const __m256i pack = _mm256_and_si256(tmp, ff); // ab00
const __m256i dst0 = _mm256_unpacklo_epi16(pack, ff);
const __m256i dst1 = _mm256_unpackhi_epi16(pack, ff);
_mm256_storeu2_m128i((__m128i*)&dst[8], (__m128i*)&dst[0], dst0);
_mm256_storeu2_m128i((__m128i*)&dst[12], (__m128i*)&dst[4], dst1);
}
break;
}
case 2: {
const __m256i mask_or = _mm256_set1_epi32((int)0xff000000);
const __m256i mul_cst = _mm256_set1_epi16(0x0104);
const __m256i mask_mul = _mm256_set1_epi16(0x0f00);
for (x = 0; x + 32 <= width; x += 32, dst += 8) {
// 000a000b000c000d | (where a/b/c/d are 2 bits).
const __m256i in = _mm256_loadu_si256((const __m256i*)&row[x]);
const __m256i mul =
_mm256_mullo_epi16(in, mul_cst); // 00ab00b000cd00d0
const __m256i tmp =
_mm256_and_si256(mul, mask_mul); // 00ab000000cd0000
const __m256i shift = _mm256_srli_epi32(tmp, 12); // 00000000ab000000
const __m256i pack = _mm256_or_si256(shift, tmp); // 00000000abcd0000
// Convert to 0xff00**00.
const __m256i res = _mm256_or_si256(pack, mask_or);
_mm256_storeu_si256((__m256i*)dst, res);
}
break;
}
default: {
assert(xbits == 3);
for (x = 0; x + 32 <= width; x += 32, dst += 4) {
// 0000000a00000000b... | (where a/b are 1 bit).
const __m256i in = _mm256_loadu_si256((const __m256i*)&row[x]);
const __m256i shift = _mm256_slli_epi64(in, 7);
const uint32_t move = _mm256_movemask_epi8(shift);
dst[0] = 0xff000000 | ((move & 0xff) << 8);
dst[1] = 0xff000000 | (move & 0xff00);
dst[2] = 0xff000000 | ((move & 0xff0000) >> 8);
dst[3] = 0xff000000 | ((move & 0xff000000) >> 16);
}
break;
}
}
if (x != width) {
VP8LBundleColorMap_SSE(row + x, width - x, xbits, dst);
}
}
//------------------------------------------------------------------------------
// Batch version of Predictor Transform subtraction
static WEBP_INLINE void Average2_m256i(const __m256i* const a0,
const __m256i* const a1,
__m256i* const avg) {
// (a + b) >> 1 = ((a + b + 1) >> 1) - ((a ^ b) & 1)
const __m256i ones = _mm256_set1_epi8(1);
const __m256i avg1 = _mm256_avg_epu8(*a0, *a1);
const __m256i one = _mm256_and_si256(_mm256_xor_si256(*a0, *a1), ones);
*avg = _mm256_sub_epi8(avg1, one);
}
// Predictor0: ARGB_BLACK.
static void PredictorSub0_AVX2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
const __m256i black = _mm256_set1_epi32((int)ARGB_BLACK);
for (i = 0; i + 8 <= num_pixels; i += 8) {
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
const __m256i res = _mm256_sub_epi8(src, black);
_mm256_storeu_si256((__m256i*)&out[i], res);
}
if (i != num_pixels) {
VP8LPredictorsSub_SSE[0](in + i, NULL, num_pixels - i, out + i);
}
(void)upper;
}
#define GENERATE_PREDICTOR_1(X, IN) \
static void PredictorSub##X##_AVX2( \
const uint32_t* const in, const uint32_t* const upper, int num_pixels, \
uint32_t* WEBP_RESTRICT const out) { \
int i; \
for (i = 0; i + 8 <= num_pixels; i += 8) { \
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]); \
const __m256i pred = _mm256_loadu_si256((const __m256i*)&(IN)); \
const __m256i res = _mm256_sub_epi8(src, pred); \
_mm256_storeu_si256((__m256i*)&out[i], res); \
} \
if (i != num_pixels) { \
VP8LPredictorsSub_SSE[(X)](in + i, WEBP_OFFSET_PTR(upper, i), \
num_pixels - i, out + i); \
} \
}
GENERATE_PREDICTOR_1(1, in[i - 1]) // Predictor1: L
GENERATE_PREDICTOR_1(2, upper[i]) // Predictor2: T
GENERATE_PREDICTOR_1(3, upper[i + 1]) // Predictor3: TR
GENERATE_PREDICTOR_1(4, upper[i - 1]) // Predictor4: TL
#undef GENERATE_PREDICTOR_1
// Predictor5: avg2(avg2(L, TR), T)
static void PredictorSub5_AVX2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
for (i = 0; i + 8 <= num_pixels; i += 8) {
const __m256i L = _mm256_loadu_si256((const __m256i*)&in[i - 1]);
const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
const __m256i TR = _mm256_loadu_si256((const __m256i*)&upper[i + 1]);
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
__m256i avg, pred, res;
Average2_m256i(&L, &TR, &avg);
Average2_m256i(&avg, &T, &pred);
res = _mm256_sub_epi8(src, pred);
_mm256_storeu_si256((__m256i*)&out[i], res);
}
if (i != num_pixels) {
VP8LPredictorsSub_SSE[5](in + i, upper + i, num_pixels - i, out + i);
}
}
#define GENERATE_PREDICTOR_2(X, A, B) \
static void PredictorSub##X##_AVX2(const uint32_t* in, \
const uint32_t* upper, int num_pixels, \
uint32_t* WEBP_RESTRICT out) { \
int i; \
for (i = 0; i + 8 <= num_pixels; i += 8) { \
const __m256i tA = _mm256_loadu_si256((const __m256i*)&(A)); \
const __m256i tB = _mm256_loadu_si256((const __m256i*)&(B)); \
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]); \
__m256i pred, res; \
Average2_m256i(&tA, &tB, &pred); \
res = _mm256_sub_epi8(src, pred); \
_mm256_storeu_si256((__m256i*)&out[i], res); \
} \
if (i != num_pixels) { \
VP8LPredictorsSub_SSE[(X)](in + i, upper + i, num_pixels - i, out + i); \
} \
}
GENERATE_PREDICTOR_2(6, in[i - 1], upper[i - 1]) // Predictor6: avg(L, TL)
GENERATE_PREDICTOR_2(7, in[i - 1], upper[i]) // Predictor7: avg(L, T)
GENERATE_PREDICTOR_2(8, upper[i - 1], upper[i]) // Predictor8: avg(TL, T)
GENERATE_PREDICTOR_2(9, upper[i], upper[i + 1]) // Predictor9: average(T, TR)
#undef GENERATE_PREDICTOR_2
// Predictor10: avg(avg(L,TL), avg(T, TR)).
static void PredictorSub10_AVX2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
for (i = 0; i + 8 <= num_pixels; i += 8) {
const __m256i L = _mm256_loadu_si256((const __m256i*)&in[i - 1]);
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
const __m256i TL = _mm256_loadu_si256((const __m256i*)&upper[i - 1]);
const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
const __m256i TR = _mm256_loadu_si256((const __m256i*)&upper[i + 1]);
__m256i avgTTR, avgLTL, avg, res;
Average2_m256i(&T, &TR, &avgTTR);
Average2_m256i(&L, &TL, &avgLTL);
Average2_m256i(&avgTTR, &avgLTL, &avg);
res = _mm256_sub_epi8(src, avg);
_mm256_storeu_si256((__m256i*)&out[i], res);
}
if (i != num_pixels) {
VP8LPredictorsSub_SSE[10](in + i, upper + i, num_pixels - i, out + i);
}
}
// Predictor11: select.
static void GetSumAbsDiff32_AVX2(const __m256i* const A, const __m256i* const B,
__m256i* const out) {
// We can unpack with any value on the upper 32 bits, provided it's the same
// on both operands (to that their sum of abs diff is zero). Here we use *A.
const __m256i A_lo = _mm256_unpacklo_epi32(*A, *A);
const __m256i B_lo = _mm256_unpacklo_epi32(*B, *A);
const __m256i A_hi = _mm256_unpackhi_epi32(*A, *A);
const __m256i B_hi = _mm256_unpackhi_epi32(*B, *A);
const __m256i s_lo = _mm256_sad_epu8(A_lo, B_lo);
const __m256i s_hi = _mm256_sad_epu8(A_hi, B_hi);
*out = _mm256_packs_epi32(s_lo, s_hi);
}
static void PredictorSub11_AVX2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
for (i = 0; i + 8 <= num_pixels; i += 8) {
const __m256i L = _mm256_loadu_si256((const __m256i*)&in[i - 1]);
const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
const __m256i TL = _mm256_loadu_si256((const __m256i*)&upper[i - 1]);
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
__m256i pa, pb;
GetSumAbsDiff32_AVX2(&T, &TL, &pa); // pa = sum |T-TL|
GetSumAbsDiff32_AVX2(&L, &TL, &pb); // pb = sum |L-TL|
{
const __m256i mask = _mm256_cmpgt_epi32(pb, pa);
const __m256i A = _mm256_and_si256(mask, L);
const __m256i B = _mm256_andnot_si256(mask, T);
const __m256i pred = _mm256_or_si256(A, B); // pred = (L > T)? L : T
const __m256i res = _mm256_sub_epi8(src, pred);
_mm256_storeu_si256((__m256i*)&out[i], res);
}
}
if (i != num_pixels) {
VP8LPredictorsSub_SSE[11](in + i, upper + i, num_pixels - i, out + i);
}
}
// Predictor12: ClampedSubSubtractFull.
static void PredictorSub12_AVX2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
const __m256i zero = _mm256_setzero_si256();
for (i = 0; i + 8 <= num_pixels; i += 8) {
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
const __m256i L = _mm256_loadu_si256((const __m256i*)&in[i - 1]);
const __m256i L_lo = _mm256_unpacklo_epi8(L, zero);
const __m256i L_hi = _mm256_unpackhi_epi8(L, zero);
const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
const __m256i T_lo = _mm256_unpacklo_epi8(T, zero);
const __m256i T_hi = _mm256_unpackhi_epi8(T, zero);
const __m256i TL = _mm256_loadu_si256((const __m256i*)&upper[i - 1]);
const __m256i TL_lo = _mm256_unpacklo_epi8(TL, zero);
const __m256i TL_hi = _mm256_unpackhi_epi8(TL, zero);
const __m256i diff_lo = _mm256_sub_epi16(T_lo, TL_lo);
const __m256i diff_hi = _mm256_sub_epi16(T_hi, TL_hi);
const __m256i pred_lo = _mm256_add_epi16(L_lo, diff_lo);
const __m256i pred_hi = _mm256_add_epi16(L_hi, diff_hi);
const __m256i pred = _mm256_packus_epi16(pred_lo, pred_hi);
const __m256i res = _mm256_sub_epi8(src, pred);
_mm256_storeu_si256((__m256i*)&out[i], res);
}
if (i != num_pixels) {
VP8LPredictorsSub_SSE[12](in + i, upper + i, num_pixels - i, out + i);
}
}
// Predictors13: ClampedAddSubtractHalf
static void PredictorSub13_AVX2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
const __m256i zero = _mm256_setzero_si256();
for (i = 0; i + 8 <= num_pixels; i += 8) {
const __m256i L = _mm256_loadu_si256((const __m256i*)&in[i - 1]);
const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
const __m256i TL = _mm256_loadu_si256((const __m256i*)&upper[i - 1]);
// lo.
const __m256i L_lo = _mm256_unpacklo_epi8(L, zero);
const __m256i T_lo = _mm256_unpacklo_epi8(T, zero);
const __m256i TL_lo = _mm256_unpacklo_epi8(TL, zero);
const __m256i sum_lo = _mm256_add_epi16(T_lo, L_lo);
const __m256i avg_lo = _mm256_srli_epi16(sum_lo, 1);
const __m256i A1_lo = _mm256_sub_epi16(avg_lo, TL_lo);
const __m256i bit_fix_lo = _mm256_cmpgt_epi16(TL_lo, avg_lo);
const __m256i A2_lo = _mm256_sub_epi16(A1_lo, bit_fix_lo);
const __m256i A3_lo = _mm256_srai_epi16(A2_lo, 1);
const __m256i A4_lo = _mm256_add_epi16(avg_lo, A3_lo);
// hi.
const __m256i L_hi = _mm256_unpackhi_epi8(L, zero);
const __m256i T_hi = _mm256_unpackhi_epi8(T, zero);
const __m256i TL_hi = _mm256_unpackhi_epi8(TL, zero);
const __m256i sum_hi = _mm256_add_epi16(T_hi, L_hi);
const __m256i avg_hi = _mm256_srli_epi16(sum_hi, 1);
const __m256i A1_hi = _mm256_sub_epi16(avg_hi, TL_hi);
const __m256i bit_fix_hi = _mm256_cmpgt_epi16(TL_hi, avg_hi);
const __m256i A2_hi = _mm256_sub_epi16(A1_hi, bit_fix_hi);
const __m256i A3_hi = _mm256_srai_epi16(A2_hi, 1);
const __m256i A4_hi = _mm256_add_epi16(avg_hi, A3_hi);
const __m256i pred = _mm256_packus_epi16(A4_lo, A4_hi);
const __m256i res = _mm256_sub_epi8(src, pred);
_mm256_storeu_si256((__m256i*)&out[i], res);
}
if (i != num_pixels) {
VP8LPredictorsSub_SSE[13](in + i, upper + i, num_pixels - i, out + i);
}
}
//------------------------------------------------------------------------------
// Entry point
extern void VP8LEncDspInitAVX2(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitAVX2(void) {
VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed_AVX2;
VP8LTransformColor = TransformColor_AVX2;
VP8LCollectColorBlueTransforms = CollectColorBlueTransforms_AVX2;
VP8LCollectColorRedTransforms = CollectColorRedTransforms_AVX2;
VP8LAddVector = AddVector_AVX2;
VP8LAddVectorEq = AddVectorEq_AVX2;
VP8LCombinedShannonEntropy = CombinedShannonEntropy_AVX2;
VP8LVectorMismatch = VectorMismatch_AVX2;
VP8LBundleColorMap = BundleColorMap_AVX2;
VP8LPredictorsSub[0] = PredictorSub0_AVX2;
VP8LPredictorsSub[1] = PredictorSub1_AVX2;
VP8LPredictorsSub[2] = PredictorSub2_AVX2;
VP8LPredictorsSub[3] = PredictorSub3_AVX2;
VP8LPredictorsSub[4] = PredictorSub4_AVX2;
VP8LPredictorsSub[5] = PredictorSub5_AVX2;
VP8LPredictorsSub[6] = PredictorSub6_AVX2;
VP8LPredictorsSub[7] = PredictorSub7_AVX2;
VP8LPredictorsSub[8] = PredictorSub8_AVX2;
VP8LPredictorsSub[9] = PredictorSub9_AVX2;
VP8LPredictorsSub[10] = PredictorSub10_AVX2;
VP8LPredictorsSub[11] = PredictorSub11_AVX2;
VP8LPredictorsSub[12] = PredictorSub12_AVX2;
VP8LPredictorsSub[13] = PredictorSub13_AVX2;
VP8LPredictorsSub[14] = PredictorSub0_AVX2; // <- padding security sentinels
VP8LPredictorsSub[15] = PredictorSub0_AVX2;
}
#else // !WEBP_USE_AVX2
WEBP_DSP_INIT_STUB(VP8LEncDspInitAVX2)
#endif // WEBP_USE_AVX2

View File

@@ -133,60 +133,6 @@ static uint32_t ExtraCost_MIPS32(const uint32_t* const population, int length) {
return ((int64_t)temp0 << 32 | temp1);
}
// C version of this function:
// int i = 0;
// int64_t cost = 0;
// const uint32_t* pX = &X[4];
// const uint32_t* pY = &Y[4];
// const uint32_t* LoopEnd = &X[length];
// while (pX != LoopEnd) {
// const uint32_t xy0 = *pX + *pY;
// const uint32_t xy1 = *(pX + 1) + *(pY + 1);
// ++i;
// cost += i * xy0;
// cost += i * xy1;
// pX += 2;
// pY += 2;
// }
// return cost;
static uint32_t ExtraCostCombined_MIPS32(const uint32_t* WEBP_RESTRICT const X,
const uint32_t* WEBP_RESTRICT const Y,
int length) {
int i, temp0, temp1, temp2, temp3;
const uint32_t* pX = &X[4];
const uint32_t* pY = &Y[4];
const uint32_t* const LoopEnd = &X[length];
__asm__ volatile(
"mult $zero, $zero \n\t"
"xor %[i], %[i], %[i] \n\t"
"beq %[pX], %[LoopEnd], 2f \n\t"
"1: \n\t"
"lw %[temp0], 0(%[pX]) \n\t"
"lw %[temp1], 0(%[pY]) \n\t"
"lw %[temp2], 4(%[pX]) \n\t"
"lw %[temp3], 4(%[pY]) \n\t"
"addiu %[i], %[i], 1 \n\t"
"addu %[temp0], %[temp0], %[temp1] \n\t"
"addu %[temp2], %[temp2], %[temp3] \n\t"
"addiu %[pX], %[pX], 8 \n\t"
"addiu %[pY], %[pY], 8 \n\t"
"madd %[i], %[temp0] \n\t"
"madd %[i], %[temp2] \n\t"
"bne %[pX], %[LoopEnd], 1b \n\t"
"2: \n\t"
"mfhi %[temp0] \n\t"
"mflo %[temp1] \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
[temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
[i]"=&r"(i), [pX]"+r"(pX), [pY]"+r"(pY)
: [LoopEnd]"r"(LoopEnd)
: "memory", "hi", "lo"
);
return ((int64_t)temp0 << 32 | temp1);
}
#define HUFFMAN_COST_PASS \
__asm__ volatile( \
"sll %[temp1], %[temp0], 3 \n\t" \
@@ -299,7 +245,7 @@ static void GetCombinedEntropyUnrefined_MIPS32(
// A..D - offsets
// E - temp variable to tell macro
// if pointer should be incremented
// literal_ and successive histograms could be unaligned
// 'literal' and successive histograms could be unaligned
// so we must use ulw and usw
#define ADD_TO_OUT(A, B, C, D, E, P0, P1, P2) \
"ulw %[temp0], " #A "(%[" #P0 "]) \n\t" \
@@ -388,7 +334,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitMIPS32(void) {
VP8LFastSLog2Slow = FastSLog2Slow_MIPS32;
VP8LFastLog2Slow = FastLog2Slow_MIPS32;
VP8LExtraCost = ExtraCost_MIPS32;
VP8LExtraCostCombined = ExtraCostCombined_MIPS32;
VP8LGetEntropyUnrefined = GetEntropyUnrefined_MIPS32;
VP8LGetCombinedEntropyUnrefined = GetCombinedEntropyUnrefined_MIPS32;
VP8LAddVector = AddVector_MIPS32;

View File

@@ -83,9 +83,9 @@ static void TransformColor_MIPSdspR2(
int num_pixels) {
int temp0, temp1, temp2, temp3, temp4, temp5;
uint32_t argb, argb1, new_red, new_red1;
const uint32_t G_to_R = m->green_to_red_;
const uint32_t G_to_B = m->green_to_blue_;
const uint32_t R_to_B = m->red_to_blue_;
const uint32_t G_to_R = m->green_to_red;
const uint32_t G_to_B = m->green_to_blue;
const uint32_t R_to_B = m->red_to_blue;
uint32_t* const p_loop_end = data + (num_pixels & ~1);
__asm__ volatile (
".set push \n\t"
@@ -152,10 +152,10 @@ static void TransformColor_MIPSdspR2(
const uint32_t red = argb_ >> 16;
uint32_t new_blue = argb_;
new_red = red;
new_red -= ColorTransformDelta(m->green_to_red_, green);
new_red -= ColorTransformDelta(m->green_to_red, green);
new_red &= 0xff;
new_blue -= ColorTransformDelta(m->green_to_blue_, green);
new_blue -= ColorTransformDelta(m->red_to_blue_, red);
new_blue -= ColorTransformDelta(m->green_to_blue, green);
new_blue -= ColorTransformDelta(m->red_to_blue, red);
new_blue &= 0xff;
data[0] = (argb_ & 0xff00ff00u) | (new_red << 16) | (new_blue);
}

View File

@@ -51,9 +51,9 @@
static void TransformColor_MSA(const VP8LMultipliers* WEBP_RESTRICT const m,
uint32_t* WEBP_RESTRICT data, int num_pixels) {
v16u8 src0, dst0;
const v16i8 g2br = (v16i8)__msa_fill_w(m->green_to_blue_ |
(m->green_to_red_ << 16));
const v16i8 r2b = (v16i8)__msa_fill_w(m->red_to_blue_);
const v16i8 g2br = (v16i8)__msa_fill_w(m->green_to_blue |
(m->green_to_red << 16));
const v16i8 r2b = (v16i8)__msa_fill_w(m->red_to_blue);
const v16u8 mask0 = { 1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255,
13, 255, 13, 255 };
const v16u8 mask1 = { 16, 1, 18, 3, 20, 5, 22, 7, 24, 9, 26, 11,

View File

@@ -78,15 +78,15 @@ static void TransformColor_NEON(const VP8LMultipliers* WEBP_RESTRICT const m,
// sign-extended multiplying constants, pre-shifted by 6.
#define CST(X) (((int16_t)(m->X << 8)) >> 6)
const int16_t rb[8] = {
CST(green_to_blue_), CST(green_to_red_),
CST(green_to_blue_), CST(green_to_red_),
CST(green_to_blue_), CST(green_to_red_),
CST(green_to_blue_), CST(green_to_red_)
CST(green_to_blue), CST(green_to_red),
CST(green_to_blue), CST(green_to_red),
CST(green_to_blue), CST(green_to_red),
CST(green_to_blue), CST(green_to_red)
};
const int16x8_t mults_rb = vld1q_s16(rb);
const int16_t b2[8] = {
0, CST(red_to_blue_), 0, CST(red_to_blue_),
0, CST(red_to_blue_), 0, CST(red_to_blue_),
0, CST(red_to_blue), 0, CST(red_to_blue),
0, CST(red_to_blue), 0, CST(red_to_blue),
};
const int16x8_t mults_b2 = vld1q_s16(b2);
#undef CST

View File

@@ -14,11 +14,17 @@
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_SSE2)
#include <assert.h>
#include <emmintrin.h>
#include <assert.h>
#include <string.h>
#include "src/dsp/cpu.h"
#include "src/dsp/lossless.h"
#include "src/dsp/common_sse2.h"
#include "src/dsp/lossless_common.h"
#include "src/utils/utils.h"
#include "src/webp/format_constants.h"
#include "src/webp/types.h"
// For sign-extended multiplying constants, pre-shifted by 5:
#define CST_5b(X) (((int16_t)((uint16_t)(X) << 8)) >> 5)
@@ -52,9 +58,9 @@ static void SubtractGreenFromBlueAndRed_SSE2(uint32_t* argb_data,
static void TransformColor_SSE2(const VP8LMultipliers* WEBP_RESTRICT const m,
uint32_t* WEBP_RESTRICT argb_data,
int num_pixels) {
const __m128i mults_rb = MK_CST_16(CST_5b(m->green_to_red_),
CST_5b(m->green_to_blue_));
const __m128i mults_b2 = MK_CST_16(CST_5b(m->red_to_blue_), 0);
const __m128i mults_rb = MK_CST_16(CST_5b(m->green_to_red),
CST_5b(m->green_to_blue));
const __m128i mults_b2 = MK_CST_16(CST_5b(m->red_to_blue), 0);
const __m128i mask_ag = _mm_set1_epi32((int)0xff00ff00); // alpha-green masks
const __m128i mask_rb = _mm_set1_epi32(0x00ff00ff); // red-blue masks
int i;
@@ -645,25 +651,43 @@ static void PredictorSub13_SSE2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
const __m128i zero = _mm_setzero_si128();
for (i = 0; i + 2 <= num_pixels; i += 2) {
// we can only process two pixels at a time
const __m128i L = _mm_loadl_epi64((const __m128i*)&in[i - 1]);
const __m128i src = _mm_loadl_epi64((const __m128i*)&in[i]);
const __m128i T = _mm_loadl_epi64((const __m128i*)&upper[i]);
const __m128i TL = _mm_loadl_epi64((const __m128i*)&upper[i - 1]);
const __m128i L_lo = _mm_unpacklo_epi8(L, zero);
const __m128i T_lo = _mm_unpacklo_epi8(T, zero);
const __m128i TL_lo = _mm_unpacklo_epi8(TL, zero);
const __m128i sum = _mm_add_epi16(T_lo, L_lo);
const __m128i avg = _mm_srli_epi16(sum, 1);
const __m128i A1 = _mm_sub_epi16(avg, TL_lo);
const __m128i bit_fix = _mm_cmpgt_epi16(TL_lo, avg);
const __m128i A2 = _mm_sub_epi16(A1, bit_fix);
const __m128i A3 = _mm_srai_epi16(A2, 1);
const __m128i A4 = _mm_add_epi16(avg, A3);
const __m128i pred = _mm_packus_epi16(A4, A4);
const __m128i res = _mm_sub_epi8(src, pred);
_mm_storel_epi64((__m128i*)&out[i], res);
for (i = 0; i + 4 <= num_pixels; i += 4) {
const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]);
const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
const __m128i T = _mm_loadu_si128((const __m128i*)&upper[i]);
const __m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]);
__m128i A4_lo, A4_hi;
// lo.
{
const __m128i L_lo = _mm_unpacklo_epi8(L, zero);
const __m128i T_lo = _mm_unpacklo_epi8(T, zero);
const __m128i TL_lo = _mm_unpacklo_epi8(TL, zero);
const __m128i sum_lo = _mm_add_epi16(T_lo, L_lo);
const __m128i avg_lo = _mm_srli_epi16(sum_lo, 1);
const __m128i A1_lo = _mm_sub_epi16(avg_lo, TL_lo);
const __m128i bit_fix_lo = _mm_cmpgt_epi16(TL_lo, avg_lo);
const __m128i A2_lo = _mm_sub_epi16(A1_lo, bit_fix_lo);
const __m128i A3_lo = _mm_srai_epi16(A2_lo, 1);
A4_lo = _mm_add_epi16(avg_lo, A3_lo);
}
// hi.
{
const __m128i L_hi = _mm_unpackhi_epi8(L, zero);
const __m128i T_hi = _mm_unpackhi_epi8(T, zero);
const __m128i TL_hi = _mm_unpackhi_epi8(TL, zero);
const __m128i sum_hi = _mm_add_epi16(T_hi, L_hi);
const __m128i avg_hi = _mm_srli_epi16(sum_hi, 1);
const __m128i A1_hi = _mm_sub_epi16(avg_hi, TL_hi);
const __m128i bit_fix_hi = _mm_cmpgt_epi16(TL_hi, avg_hi);
const __m128i A2_hi = _mm_sub_epi16(A1_hi, bit_fix_hi);
const __m128i A3_hi = _mm_srai_epi16(A2_hi, 1);
A4_hi = _mm_add_epi16(avg_hi, A3_hi);
}
{
const __m128i pred = _mm_packus_epi16(A4_lo, A4_hi);
const __m128i res = _mm_sub_epi8(src, pred);
_mm_storeu_si128((__m128i*)&out[i], res);
}
}
if (i != num_pixels) {
VP8LPredictorsSub_C[13](in + i, upper + i, num_pixels - i, out + i);
@@ -704,6 +728,15 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE2(void) {
VP8LPredictorsSub[13] = PredictorSub13_SSE2;
VP8LPredictorsSub[14] = PredictorSub0_SSE2; // <- padding security sentinels
VP8LPredictorsSub[15] = PredictorSub0_SSE2;
// SSE exports for AVX and above.
VP8LSubtractGreenFromBlueAndRed_SSE = SubtractGreenFromBlueAndRed_SSE2;
VP8LTransformColor_SSE = TransformColor_SSE2;
VP8LCollectColorBlueTransforms_SSE = CollectColorBlueTransforms_SSE2;
VP8LCollectColorRedTransforms_SSE = CollectColorRedTransforms_SSE2;
VP8LBundleColorMap_SSE = BundleColorMap_SSE2;
memcpy(VP8LPredictorsSub_SSE, VP8LPredictorsSub, sizeof(VP8LPredictorsSub));
}
#else // !WEBP_USE_SSE2

View File

@@ -14,9 +14,14 @@
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_SSE41)
#include <assert.h>
#include <emmintrin.h>
#include <smmintrin.h>
#include <assert.h>
#include "src/dsp/cpu.h"
#include "src/dsp/lossless.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
// Cost operations.
@@ -44,29 +49,6 @@ static uint32_t ExtraCost_SSE41(const uint32_t* const a, int length) {
return HorizontalSum_SSE41(cost);
}
static uint32_t ExtraCostCombined_SSE41(const uint32_t* WEBP_RESTRICT const a,
const uint32_t* WEBP_RESTRICT const b,
int length) {
int i;
__m128i cost = _mm_add_epi32(_mm_set_epi32(2 * a[7], 2 * a[6], a[5], a[4]),
_mm_set_epi32(2 * b[7], 2 * b[6], b[5], b[4]));
assert(length % 8 == 0);
for (i = 8; i + 8 <= length; i += 8) {
const int j = (i - 2) >> 1;
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i]);
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i]);
const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[i + 4]);
const __m128i w = _mm_set_epi32(j + 3, j + 2, j + 1, j);
const __m128i a2 = _mm_hadd_epi32(a0, a1);
const __m128i b2 = _mm_hadd_epi32(b0, b1);
const __m128i mul = _mm_mullo_epi32(_mm_add_epi32(a2, b2), w);
cost = _mm_add_epi32(mul, cost);
}
return HorizontalSum_SSE41(cost);
}
//------------------------------------------------------------------------------
// Subtract-Green Transform
@@ -195,10 +177,14 @@ extern void VP8LEncDspInitSSE41(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE41(void) {
VP8LExtraCost = ExtraCost_SSE41;
VP8LExtraCostCombined = ExtraCostCombined_SSE41;
VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed_SSE41;
VP8LCollectColorBlueTransforms = CollectColorBlueTransforms_SSE41;
VP8LCollectColorRedTransforms = CollectColorRedTransforms_SSE41;
// SSE exports for AVX and above.
VP8LSubtractGreenFromBlueAndRed_SSE = SubtractGreenFromBlueAndRed_SSE41;
VP8LCollectColorBlueTransforms_SSE = CollectColorBlueTransforms_SSE41;
VP8LCollectColorRedTransforms_SSE = CollectColorRedTransforms_SSE41;
}
#else // !WEBP_USE_SSE41

View File

@@ -299,9 +299,9 @@ static void TransformColorInverse_MIPSdspR2(const VP8LMultipliers* const m,
uint32_t* dst) {
int temp0, temp1, temp2, temp3, temp4, temp5;
uint32_t argb, argb1, new_red;
const uint32_t G_to_R = m->green_to_red_;
const uint32_t G_to_B = m->green_to_blue_;
const uint32_t R_to_B = m->red_to_blue_;
const uint32_t G_to_R = m->green_to_red;
const uint32_t G_to_B = m->green_to_blue;
const uint32_t R_to_B = m->red_to_blue;
const uint32_t* const p_loop_end = src + (num_pixels & ~1);
__asm__ volatile (
".set push \n\t"

View File

@@ -290,9 +290,9 @@ static void TransformColorInverse_MSA(const VP8LMultipliers* const m,
const uint32_t* src, int num_pixels,
uint32_t* dst) {
v16u8 src0, dst0;
const v16i8 g2br = (v16i8)__msa_fill_w(m->green_to_blue_ |
(m->green_to_red_ << 16));
const v16i8 r2b = (v16i8)__msa_fill_w(m->red_to_blue_);
const v16i8 g2br = (v16i8)__msa_fill_w(m->green_to_blue |
(m->green_to_red << 16));
const v16i8 r2b = (v16i8)__msa_fill_w(m->red_to_blue);
const v16u8 mask0 = { 1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255,
13, 255, 13, 255 };
const v16u8 mask1 = { 16, 1, 18, 3, 20, 5, 22, 7, 24, 9, 26, 11,

View File

@@ -19,6 +19,7 @@
#include "src/dsp/lossless.h"
#include "src/dsp/neon.h"
#include "src/webp/format_constants.h"
//------------------------------------------------------------------------------
// Colorspace conversion functions
@@ -551,15 +552,15 @@ static void TransformColorInverse_NEON(const VP8LMultipliers* const m,
// sign-extended multiplying constants, pre-shifted by 6.
#define CST(X) (((int16_t)(m->X << 8)) >> 6)
const int16_t rb[8] = {
CST(green_to_blue_), CST(green_to_red_),
CST(green_to_blue_), CST(green_to_red_),
CST(green_to_blue_), CST(green_to_red_),
CST(green_to_blue_), CST(green_to_red_)
CST(green_to_blue), CST(green_to_red),
CST(green_to_blue), CST(green_to_red),
CST(green_to_blue), CST(green_to_red),
CST(green_to_blue), CST(green_to_red)
};
const int16x8_t mults_rb = vld1q_s16(rb);
const int16_t b2[8] = {
0, CST(red_to_blue_), 0, CST(red_to_blue_),
0, CST(red_to_blue_), 0, CST(red_to_blue_),
0, CST(red_to_blue), 0, CST(red_to_blue),
0, CST(red_to_blue), 0, CST(red_to_blue),
};
const int16x8_t mults_b2 = vld1q_s16(b2);
#undef CST

View File

@@ -15,10 +15,15 @@
#if defined(WEBP_USE_SSE2)
#include <emmintrin.h>
#include <string.h>
#include "src/dsp/common_sse2.h"
#include "src/dsp/cpu.h"
#include "src/dsp/lossless.h"
#include "src/dsp/lossless_common.h"
#include <emmintrin.h>
#include "src/webp/format_constants.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
// Predictor Transform
@@ -462,8 +467,8 @@ static void TransformColorInverse_SSE2(const VP8LMultipliers* const m,
#define CST(X) (((int16_t)(m->X << 8)) >> 5) // sign-extend
#define MK_CST_16(HI, LO) \
_mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
const __m128i mults_rb = MK_CST_16(CST(green_to_red_), CST(green_to_blue_));
const __m128i mults_b2 = MK_CST_16(CST(red_to_blue_), 0);
const __m128i mults_rb = MK_CST_16(CST(green_to_red), CST(green_to_blue));
const __m128i mults_b2 = MK_CST_16(CST(red_to_blue), 0);
#undef MK_CST_16
#undef CST
const __m128i mask_ag = _mm_set1_epi32((int)0xff00ff00); // alpha-green masks
@@ -707,6 +712,15 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitSSE2(void) {
VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444_SSE2;
VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565_SSE2;
VP8LConvertBGRAToBGR = ConvertBGRAToBGR_SSE2;
// SSE exports for AVX and above.
memcpy(VP8LPredictorsAdd_SSE, VP8LPredictorsAdd, sizeof(VP8LPredictorsAdd));
VP8LAddGreenToBlueAndRed_SSE = AddGreenToBlueAndRed_SSE2;
VP8LTransformColorInverse_SSE = TransformColorInverse_SSE2;
VP8LConvertBGRAToRGB_SSE = ConvertBGRAToRGB_SSE2;
VP8LConvertBGRAToRGBA_SSE = ConvertBGRAToRGBA_SSE2;
}
#else // !WEBP_USE_SSE2

View File

@@ -12,10 +12,12 @@
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_SSE41)
#include <emmintrin.h>
#include <smmintrin.h>
#include "src/dsp/common_sse41.h"
#include "src/webp/types.h"
#include "src/dsp/cpu.h"
#include "src/dsp/lossless.h"
#include "src/dsp/lossless_common.h"
//------------------------------------------------------------------------------
// Color-space conversion functions
@@ -26,9 +28,9 @@ static void TransformColorInverse_SSE41(const VP8LMultipliers* const m,
// sign-extended multiplying constants, pre-shifted by 5.
#define CST(X) (((int16_t)(m->X << 8)) >> 5) // sign-extend
const __m128i mults_rb =
_mm_set1_epi32((int)((uint32_t)CST(green_to_red_) << 16 |
(CST(green_to_blue_) & 0xffff)));
const __m128i mults_b2 = _mm_set1_epi32(CST(red_to_blue_));
_mm_set1_epi32((int)((uint32_t)CST(green_to_red) << 16 |
(CST(green_to_blue) & 0xffff)));
const __m128i mults_b2 = _mm_set1_epi32(CST(red_to_blue));
#undef CST
const __m128i mask_ag = _mm_set1_epi32((int)0xff00ff00);
const __m128i perm1 = _mm_setr_epi8(-1, 1, -1, 1, -1, 5, -1, 5,
@@ -124,6 +126,10 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitSSE41(void) {
VP8LTransformColorInverse = TransformColorInverse_SSE41;
VP8LConvertBGRAToRGB = ConvertBGRAToRGB_SSE41;
VP8LConvertBGRAToBGR = ConvertBGRAToBGR_SSE41;
// SSE exports for AVX and above.
VP8LTransformColorInverse_SSE = TransformColorInverse_SSE41;
VP8LConvertBGRAToRGB_SSE = ConvertBGRAToRGB_SSE41;
}
#else // !WEBP_USE_SSE41

View File

@@ -12,7 +12,10 @@
// Author: Skal (pascal.massimino@gmail.com)
#include <assert.h>
#include <stddef.h>
#include "src/dsp/cpu.h"
#include "src/webp/types.h"
#include "src/dsp/dsp.h"
#include "src/utils/rescaler_utils.h"

View File

@@ -17,8 +17,12 @@
#include <emmintrin.h>
#include <assert.h>
#include <stddef.h>
#include "src/dsp/cpu.h"
#include "src/utils/rescaler_utils.h"
#include "src/utils/utils.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
// Implementations of critical functions ImportRow / ExportRow

View File

@@ -14,7 +14,9 @@
#include <assert.h>
#include <stdlib.h> // for abs()
#include "src/dsp/cpu.h"
#include "src/dsp/dsp.h"
#include "src/webp/types.h"
#if !defined(WEBP_REDUCE_SIZE)

View File

@@ -14,11 +14,13 @@
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_SSE2)
#include <assert.h>
#include <emmintrin.h>
#include <assert.h>
#include "src/dsp/common_sse2.h"
#include "src/dsp/cpu.h"
#include "src/webp/types.h"
#if !defined(WEBP_DISABLE_STATS)

View File

@@ -11,10 +11,14 @@
//
// Author: somnath@google.com (Somnath Banerjee)
#include <assert.h>
#include <stddef.h>
#include "src/dsp/cpu.h"
#include "src/webp/types.h"
#include "src/dsp/dsp.h"
#include "src/dsp/yuv.h"
#include <assert.h>
#include "src/webp/decode.h"
//------------------------------------------------------------------------------
// Fancy upsampler

View File

@@ -14,11 +14,15 @@
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_SSE2)
#include <emmintrin.h>
#include <assert.h>
#include <emmintrin.h>
#include <string.h>
#include "src/webp/types.h"
#include "src/dsp/cpu.h"
#include "src/dsp/yuv.h"
#include "src/webp/decode.h"
#ifdef FANCY_UPSAMPLING

View File

@@ -14,11 +14,15 @@
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_SSE41)
#include <smmintrin.h>
#include <assert.h>
#include <smmintrin.h>
#include <string.h>
#include "src/webp/types.h"
#include "src/dsp/cpu.h"
#include "src/dsp/yuv.h"
#include "src/webp/decode.h"
#ifdef FANCY_UPSAMPLING

View File

@@ -11,11 +11,15 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include "src/dsp/yuv.h"
#include <assert.h>
#include <stdlib.h>
#include "src/dsp/cpu.h"
#include "src/webp/types.h"
#include "src/dsp/dsp.h"
#include "src/dsp/yuv.h"
#include "src/webp/decode.h"
//-----------------------------------------------------------------------------
// Plain-C version

View File

@@ -35,8 +35,10 @@
#ifndef WEBP_DSP_YUV_H_
#define WEBP_DSP_YUV_H_
#include "src/dsp/dsp.h"
#include "src/dec/vp8_dec.h"
#include "src/dsp/cpu.h"
#include "src/dsp/dsp.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
// YUV -> RGB conversion

View File

@@ -18,6 +18,7 @@
#include <assert.h>
#include <stdlib.h>
#include "src/dsp/dsp.h"
#include "src/dsp/neon.h"
//-----------------------------------------------------------------------------

View File

@@ -14,12 +14,16 @@
#include "src/dsp/yuv.h"
#if defined(WEBP_USE_SSE2)
#include <stdlib.h>
#include <emmintrin.h>
#include <stdlib.h>
#include "src/dsp/common_sse2.h"
#include "src/dsp/cpu.h"
#include "src/dsp/dsp.h"
#include "src/utils/utils.h"
#include "src/webp/decode.h"
#include "src/webp/types.h"
//-----------------------------------------------------------------------------
// Convert spans of 32 pixels to various RGB formats for the fancy upsampler.

View File

@@ -14,12 +14,17 @@
#include "src/dsp/yuv.h"
#if defined(WEBP_USE_SSE41)
#include <stdlib.h>
#include <emmintrin.h>
#include <smmintrin.h>
#include <stdlib.h>
#include "src/dsp/common_sse41.h"
#include "src/dsp/cpu.h"
#include "src/dsp/dsp.h"
#include "src/utils/utils.h"
#include "src/webp/decode.h"
#include "src/webp/types.h"
//-----------------------------------------------------------------------------
// Convert spans of 32 pixels to various RGB formats for the fancy upsampler.

View File

@@ -15,10 +15,13 @@
#include <stdlib.h>
#include <string.h>
#include "src/enc/vp8i_enc.h"
#include "src/dsp/dsp.h"
#include "src/webp/types.h"
#include "src/enc/vp8i_enc.h"
#include "src/utils/bit_writer_utils.h"
#include "src/utils/filters_utils.h"
#include "src/utils/quant_levels_utils.h"
#include "src/utils/thread_utils.h"
#include "src/utils/utils.h"
#include "src/webp/encode.h"
#include "src/webp/format_constants.h"
@@ -86,7 +89,7 @@ static int EncodeLossless(const uint8_t* const data, int width, int height,
ok = VP8LEncodeStream(&config, &picture, bw);
WebPPictureFree(&picture);
ok = ok && !bw->error_;
ok = ok && !bw->error;
if (!ok) {
VP8LBitWriterWipeOut(bw);
return 0;
@@ -138,7 +141,7 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
!reduce_levels, &tmp_bw, &result->stats);
if (ok) {
output = VP8LBitWriterFinish(&tmp_bw);
if (tmp_bw.error_) {
if (tmp_bw.error) {
VP8LBitWriterWipeOut(&tmp_bw);
memset(&result->bw, 0, sizeof(result->bw));
return 0;
@@ -173,7 +176,7 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
if (method != ALPHA_NO_COMPRESSION) {
VP8LBitWriterWipeOut(&tmp_bw);
}
ok = ok && !result->bw.error_;
ok = ok && !result->bw.error;
result->score = VP8BitWriterSize(&result->bw);
return ok;
}
@@ -298,7 +301,7 @@ static int EncodeAlpha(VP8Encoder* const enc,
int quality, int method, int filter,
int effort_level,
uint8_t** const output, size_t* const output_size) {
const WebPPicture* const pic = enc->pic_;
const WebPPicture* const pic = enc->pic;
const int width = pic->width;
const int height = pic->height;
@@ -357,7 +360,7 @@ static int EncodeAlpha(VP8Encoder* const enc,
#if !defined(WEBP_DISABLE_STATS)
if (pic->stats != NULL) { // need stats?
pic->stats->coded_size += (int)(*output_size);
enc->sse_[3] = sse;
enc->sse[3] = sse;
}
#endif
}
@@ -371,7 +374,7 @@ static int EncodeAlpha(VP8Encoder* const enc,
static int CompressAlphaJob(void* arg1, void* unused) {
VP8Encoder* const enc = (VP8Encoder*)arg1;
const WebPConfig* config = enc->config_;
const WebPConfig* config = enc->config;
uint8_t* alpha_data = NULL;
size_t alpha_size = 0;
const int effort_level = config->method; // maps to [0..6]
@@ -387,19 +390,19 @@ static int CompressAlphaJob(void* arg1, void* unused) {
WebPSafeFree(alpha_data);
return 0;
}
enc->alpha_data_size_ = (uint32_t)alpha_size;
enc->alpha_data_ = alpha_data;
enc->alpha_data_size = (uint32_t)alpha_size;
enc->alpha_data = alpha_data;
(void)unused;
return 1;
}
void VP8EncInitAlpha(VP8Encoder* const enc) {
WebPInitAlphaProcessing();
enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_);
enc->alpha_data_ = NULL;
enc->alpha_data_size_ = 0;
if (enc->thread_level_ > 0) {
WebPWorker* const worker = &enc->alpha_worker_;
enc->has_alpha = WebPPictureHasTransparency(enc->pic);
enc->alpha_data = NULL;
enc->alpha_data_size = 0;
if (enc->thread_level > 0) {
WebPWorker* const worker = &enc->alpha_worker;
WebPGetWorkerInterface()->Init(worker);
worker->data1 = enc;
worker->data2 = NULL;
@@ -408,12 +411,12 @@ void VP8EncInitAlpha(VP8Encoder* const enc) {
}
int VP8EncStartAlpha(VP8Encoder* const enc) {
if (enc->has_alpha_) {
if (enc->thread_level_ > 0) {
WebPWorker* const worker = &enc->alpha_worker_;
if (enc->has_alpha) {
if (enc->thread_level > 0) {
WebPWorker* const worker = &enc->alpha_worker;
// Makes sure worker is good to go.
if (!WebPGetWorkerInterface()->Reset(worker)) {
return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
return WebPEncodingSetError(enc->pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
WebPGetWorkerInterface()->Launch(worker);
return 1;
@@ -425,27 +428,27 @@ int VP8EncStartAlpha(VP8Encoder* const enc) {
}
int VP8EncFinishAlpha(VP8Encoder* const enc) {
if (enc->has_alpha_) {
if (enc->thread_level_ > 0) {
WebPWorker* const worker = &enc->alpha_worker_;
if (enc->has_alpha) {
if (enc->thread_level > 0) {
WebPWorker* const worker = &enc->alpha_worker;
if (!WebPGetWorkerInterface()->Sync(worker)) return 0; // error
}
}
return WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
return WebPReportProgress(enc->pic, enc->percent + 20, &enc->percent);
}
int VP8EncDeleteAlpha(VP8Encoder* const enc) {
int ok = 1;
if (enc->thread_level_ > 0) {
WebPWorker* const worker = &enc->alpha_worker_;
if (enc->thread_level > 0) {
WebPWorker* const worker = &enc->alpha_worker;
// finish anything left in flight
ok = WebPGetWorkerInterface()->Sync(worker);
// still need to end the worker, even if !ok
WebPGetWorkerInterface()->End(worker);
}
WebPSafeFree(enc->alpha_data_);
enc->alpha_data_ = NULL;
enc->alpha_data_size_ = 0;
enc->has_alpha_ = 0;
WebPSafeFree(enc->alpha_data);
enc->alpha_data = NULL;
enc->alpha_data_size = 0;
enc->has_alpha = 0;
return ok;
}

View File

@@ -11,13 +11,17 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "src/dec/common_dec.h"
#include "src/dsp/dsp.h"
#include "src/enc/vp8i_enc.h"
#include "src/enc/cost_enc.h"
#include "src/utils/thread_utils.h"
#include "src/utils/utils.h"
#include "src/webp/encode.h"
#include "src/webp/types.h"
#define MAX_ITERS_K_MEANS 6
@@ -27,8 +31,8 @@
static void SmoothSegmentMap(VP8Encoder* const enc) {
int n, x, y;
const int w = enc->mb_w_;
const int h = enc->mb_h_;
const int w = enc->mb_w;
const int h = enc->mb_h;
const int majority_cnt_3_x_3_grid = 5;
uint8_t* const tmp = (uint8_t*)WebPSafeMalloc(w * h, sizeof(*tmp));
assert((uint64_t)(w * h) == (uint64_t)w * h); // no overflow, as per spec
@@ -37,17 +41,17 @@ static void SmoothSegmentMap(VP8Encoder* const enc) {
for (y = 1; y < h - 1; ++y) {
for (x = 1; x < w - 1; ++x) {
int cnt[NUM_MB_SEGMENTS] = { 0 };
const VP8MBInfo* const mb = &enc->mb_info_[x + w * y];
int majority_seg = mb->segment_;
const VP8MBInfo* const mb = &enc->mb_info[x + w * y];
int majority_seg = mb->segment;
// Check the 8 neighbouring segment values.
cnt[mb[-w - 1].segment_]++; // top-left
cnt[mb[-w + 0].segment_]++; // top
cnt[mb[-w + 1].segment_]++; // top-right
cnt[mb[ - 1].segment_]++; // left
cnt[mb[ + 1].segment_]++; // right
cnt[mb[ w - 1].segment_]++; // bottom-left
cnt[mb[ w + 0].segment_]++; // bottom
cnt[mb[ w + 1].segment_]++; // bottom-right
cnt[mb[-w - 1].segment]++; // top-left
cnt[mb[-w + 0].segment]++; // top
cnt[mb[-w + 1].segment]++; // top-right
cnt[mb[ - 1].segment]++; // left
cnt[mb[ + 1].segment]++; // right
cnt[mb[ w - 1].segment]++; // bottom-left
cnt[mb[ w + 0].segment]++; // bottom
cnt[mb[ w + 1].segment]++; // bottom-right
for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
if (cnt[n] >= majority_cnt_3_x_3_grid) {
majority_seg = n;
@@ -59,15 +63,15 @@ static void SmoothSegmentMap(VP8Encoder* const enc) {
}
for (y = 1; y < h - 1; ++y) {
for (x = 1; x < w - 1; ++x) {
VP8MBInfo* const mb = &enc->mb_info_[x + w * y];
mb->segment_ = tmp[x + y * w];
VP8MBInfo* const mb = &enc->mb_info[x + w * y];
mb->segment = tmp[x + y * w];
}
}
WebPSafeFree(tmp);
}
//------------------------------------------------------------------------------
// set segment susceptibility alpha_ / beta_
// set segment susceptibility 'alpha' / 'beta'
static WEBP_INLINE int clip(int v, int m, int M) {
return (v < m) ? m : (v > M) ? M : v;
@@ -76,7 +80,7 @@ static WEBP_INLINE int clip(int v, int m, int M) {
static void SetSegmentAlphas(VP8Encoder* const enc,
const int centers[NUM_MB_SEGMENTS],
int mid) {
const int nb = enc->segment_hdr_.num_segments_;
const int nb = enc->segment_hdr.num_segments;
int min = centers[0], max = centers[0];
int n;
@@ -91,8 +95,8 @@ static void SetSegmentAlphas(VP8Encoder* const enc,
for (n = 0; n < nb; ++n) {
const int alpha = 255 * (centers[n] - mid) / (max - min);
const int beta = 255 * (centers[n] - min) / (max - min);
enc->dqm_[n].alpha_ = clip(alpha, -127, 127);
enc->dqm_[n].beta_ = clip(beta, 0, 255);
enc->dqm[n].alpha = clip(alpha, -127, 127);
enc->dqm[n].beta = clip(beta, 0, 255);
}
}
@@ -131,11 +135,11 @@ static void InitHistogram(VP8Histogram* const histo) {
static void AssignSegments(VP8Encoder* const enc,
const int alphas[MAX_ALPHA + 1]) {
// 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an
// 'num_segments' is previously validated and <= NUM_MB_SEGMENTS, but an
// explicit check is needed to avoid spurious warning about 'n + 1' exceeding
// array bounds of 'centers' with some compilers (noticed with gcc-4.9).
const int nb = (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS) ?
enc->segment_hdr_.num_segments_ : NUM_MB_SEGMENTS;
const int nb = (enc->segment_hdr.num_segments < NUM_MB_SEGMENTS) ?
enc->segment_hdr.num_segments : NUM_MB_SEGMENTS;
int centers[NUM_MB_SEGMENTS];
int weighted_average = 0;
int map[MAX_ALPHA + 1];
@@ -200,15 +204,15 @@ static void AssignSegments(VP8Encoder* const enc,
}
// Map each original value to the closest centroid
for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
VP8MBInfo* const mb = &enc->mb_info_[n];
const int alpha = mb->alpha_;
mb->segment_ = map[alpha];
mb->alpha_ = centers[map[alpha]]; // for the record.
for (n = 0; n < enc->mb_w * enc->mb_h; ++n) {
VP8MBInfo* const mb = &enc->mb_info[n];
const int alpha = mb->alpha;
mb->segment = map[alpha];
mb->alpha = centers[map[alpha]]; // for the record.
}
if (nb > 1) {
const int smooth = (enc->config_->preprocessing & 1);
const int smooth = (enc->config->preprocessing & 1);
if (smooth) SmoothSegmentMap(enc);
}
@@ -220,7 +224,7 @@ static void AssignSegments(VP8Encoder* const enc,
// susceptibility and set best modes for this macroblock.
// Segment assignment is done later.
// Number of modes to inspect for alpha_ evaluation. We don't need to test all
// Number of modes to inspect for 'alpha' evaluation. We don't need to test all
// the possible modes during the analysis phase: we risk falling into a local
// optimum, or be subject to boundary effect
#define MAX_INTRA16_MODE 2
@@ -239,8 +243,8 @@ static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
int alpha;
InitHistogram(&histo);
VP8CollectHistogram(it->yuv_in_ + Y_OFF_ENC,
it->yuv_p_ + VP8I16ModeOffsets[mode],
VP8CollectHistogram(it->yuv_in + Y_OFF_ENC,
it->yuv_p + VP8I16ModeOffsets[mode],
0, 16, &histo);
alpha = GetAlpha(&histo);
if (IS_BETTER_ALPHA(alpha, best_alpha)) {
@@ -255,12 +259,12 @@ static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
static int FastMBAnalyze(VP8EncIterator* const it) {
// Empirical cut-off value, should be around 16 (~=block size). We use the
// [8-17] range and favor intra4 at high quality, intra16 for low quality.
const int q = (int)it->enc_->config_->quality;
const int q = (int)it->enc->config->quality;
const uint32_t kThreshold = 8 + (17 - 8) * q / 100;
int k;
uint32_t dc[16], m, m2;
for (k = 0; k < 16; k += 4) {
VP8Mean16x4(it->yuv_in_ + Y_OFF_ENC + k * BPS, &dc[k]);
VP8Mean16x4(it->yuv_in + Y_OFF_ENC + k * BPS, &dc[k]);
}
for (m = 0, m2 = 0, k = 0; k < 16; ++k) {
m += dc[k];
@@ -287,8 +291,8 @@ static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
VP8Histogram histo;
int alpha;
InitHistogram(&histo);
VP8CollectHistogram(it->yuv_in_ + U_OFF_ENC,
it->yuv_p_ + VP8UVModeOffsets[mode],
VP8CollectHistogram(it->yuv_in + U_OFF_ENC,
it->yuv_p + VP8UVModeOffsets[mode],
16, 16 + 4 + 4, &histo);
alpha = GetAlpha(&histo);
if (IS_BETTER_ALPHA(alpha, best_alpha)) {
@@ -307,14 +311,14 @@ static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
static void MBAnalyze(VP8EncIterator* const it,
int alphas[MAX_ALPHA + 1],
int* const alpha, int* const uv_alpha) {
const VP8Encoder* const enc = it->enc_;
const VP8Encoder* const enc = it->enc;
int best_alpha, best_uv_alpha;
VP8SetIntra16Mode(it, 0); // default: Intra16, DC_PRED
VP8SetSkip(it, 0); // not skipped
VP8SetSegment(it, 0); // default segment, spec-wise.
if (enc->method_ <= 1) {
if (enc->method <= 1) {
best_alpha = FastMBAnalyze(it);
} else {
best_alpha = MBAnalyzeBestIntra16Mode(it);
@@ -325,7 +329,7 @@ static void MBAnalyze(VP8EncIterator* const it,
best_alpha = (3 * best_alpha + best_uv_alpha + 2) >> 2;
best_alpha = FinalAlphaValue(best_alpha);
alphas[best_alpha]++;
it->mb_->alpha_ = best_alpha; // for later remapping.
it->mb->alpha = best_alpha; // for later remapping.
// Accumulate for later complexity analysis.
*alpha += best_alpha; // mixed susceptibility (not just luma)
@@ -333,11 +337,11 @@ static void MBAnalyze(VP8EncIterator* const it,
}
static void DefaultMBInfo(VP8MBInfo* const mb) {
mb->type_ = 1; // I16x16
mb->uv_mode_ = 0;
mb->skip_ = 0; // not skipped
mb->segment_ = 0; // default segment
mb->alpha_ = 0;
mb->type = 1; // I16x16
mb->uv_mode = 0;
mb->skip = 0; // not skipped
mb->segment = 0; // default segment
mb->alpha = 0;
}
//------------------------------------------------------------------------------
@@ -352,16 +356,16 @@ static void DefaultMBInfo(VP8MBInfo* const mb) {
static void ResetAllMBInfo(VP8Encoder* const enc) {
int n;
for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
DefaultMBInfo(&enc->mb_info_[n]);
for (n = 0; n < enc->mb_w * enc->mb_h; ++n) {
DefaultMBInfo(&enc->mb_info[n]);
}
// Default susceptibilities.
enc->dqm_[0].alpha_ = 0;
enc->dqm_[0].beta_ = 0;
// Note: we can't compute this alpha_ / uv_alpha_ -> set to default value.
enc->alpha_ = 0;
enc->uv_alpha_ = 0;
WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
enc->dqm[0].alpha = 0;
enc->dqm[0].beta = 0;
// Note: we can't compute this 'alpha' / 'uv_alpha' -> set to default value.
enc->alpha = 0;
enc->uv_alpha = 0;
WebPReportProgress(enc->pic, enc->percent + 20, &enc->percent);
}
// struct used to collect job result
@@ -409,7 +413,7 @@ static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job,
job->worker.hook = DoSegmentsJob;
VP8IteratorInit(enc, &job->it);
VP8IteratorSetRow(&job->it, start_row);
VP8IteratorSetCountDown(&job->it, (end_row - start_row) * enc->mb_w_);
VP8IteratorSetCountDown(&job->it, (end_row - start_row) * enc->mb_w);
memset(job->alphas, 0, sizeof(job->alphas));
job->alpha = 0;
job->uv_alpha = 0;
@@ -422,17 +426,17 @@ static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job,
int VP8EncAnalyze(VP8Encoder* const enc) {
int ok = 1;
const int do_segments =
enc->config_->emulate_jpeg_size || // We need the complexity evaluation.
(enc->segment_hdr_.num_segments_ > 1) ||
(enc->method_ <= 1); // for method 0 - 1, we need preds_[] to be filled.
enc->config->emulate_jpeg_size || // We need the complexity evaluation.
(enc->segment_hdr.num_segments > 1) ||
(enc->method <= 1); // for method 0 - 1, we need preds[] to be filled.
if (do_segments) {
const int last_row = enc->mb_h_;
const int total_mb = last_row * enc->mb_w_;
const int last_row = enc->mb_h;
const int total_mb = last_row * enc->mb_w;
#ifdef WEBP_USE_THREAD
// We give a little more than a half work to the main thread.
const int split_row = (9 * last_row + 15) >> 4;
const int kMinSplitRow = 2; // minimal rows needed for mt to be worth it
const int do_mt = (enc->thread_level_ > 0) && (split_row >= kMinSplitRow);
const int do_mt = (enc->thread_level > 0) && (split_row >= kMinSplitRow);
#else
const int do_mt = 0;
#endif
@@ -467,17 +471,16 @@ int VP8EncAnalyze(VP8Encoder* const enc) {
}
worker_interface->End(&main_job.worker);
if (ok) {
enc->alpha_ = main_job.alpha / total_mb;
enc->uv_alpha_ = main_job.uv_alpha / total_mb;
enc->alpha = main_job.alpha / total_mb;
enc->uv_alpha = main_job.uv_alpha / total_mb;
AssignSegments(enc, main_job.alphas);
}
} else { // Use only one default segment.
ResetAllMBInfo(enc);
}
if (!ok) {
return WebPEncodingSetError(enc->pic_,
return WebPEncodingSetError(enc->pic,
VP8_ENC_ERROR_OUT_OF_MEMORY); // imprecise
}
return ok;
}

View File

@@ -22,6 +22,8 @@
#include "src/enc/histogram_enc.h"
#include "src/utils/color_cache_utils.h"
#include "src/utils/utils.h"
#include "src/webp/format_constants.h"
#include "src/webp/types.h"
#define VALUES_IN_BYTE 256
@@ -31,11 +33,11 @@ extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
const PixOrCopy v);
typedef struct {
uint32_t alpha_[VALUES_IN_BYTE];
uint32_t red_[VALUES_IN_BYTE];
uint32_t blue_[VALUES_IN_BYTE];
uint32_t distance_[NUM_DISTANCE_CODES];
uint32_t* literal_;
uint32_t alpha[VALUES_IN_BYTE];
uint32_t red[VALUES_IN_BYTE];
uint32_t blue[VALUES_IN_BYTE];
uint32_t distance[NUM_DISTANCE_CODES];
uint32_t* literal;
} CostModel;
static void ConvertPopulationCountTableToBitEstimates(
@@ -62,30 +64,25 @@ static void ConvertPopulationCountTableToBitEstimates(
static int CostModelBuild(CostModel* const m, int xsize, int cache_bits,
const VP8LBackwardRefs* const refs) {
int ok = 0;
VP8LRefsCursor c = VP8LRefsCursorInit(refs);
VP8LHistogram* const histo = VP8LAllocateHistogram(cache_bits);
if (histo == NULL) goto Error;
// The following code is similar to VP8LHistogramCreate but converts the
// distance to plane code.
VP8LHistogramInit(histo, cache_bits, /*init_arrays=*/ 1);
while (VP8LRefsCursorOk(&c)) {
VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos, VP8LDistanceToPlaneCode,
xsize);
VP8LRefsCursorNext(&c);
}
VP8LHistogramStoreRefs(refs, VP8LDistanceToPlaneCode, xsize, histo);
ConvertPopulationCountTableToBitEstimates(
VP8LHistogramNumCodes(histo->palette_code_bits_), histo->literal_,
m->literal_);
VP8LHistogramNumCodes(histo->palette_code_bits), histo->literal,
m->literal);
ConvertPopulationCountTableToBitEstimates(
VALUES_IN_BYTE, histo->red_, m->red_);
VALUES_IN_BYTE, histo->red, m->red);
ConvertPopulationCountTableToBitEstimates(
VALUES_IN_BYTE, histo->blue_, m->blue_);
VALUES_IN_BYTE, histo->blue, m->blue);
ConvertPopulationCountTableToBitEstimates(
VALUES_IN_BYTE, histo->alpha_, m->alpha_);
VALUES_IN_BYTE, histo->alpha, m->alpha);
ConvertPopulationCountTableToBitEstimates(
NUM_DISTANCE_CODES, histo->distance_, m->distance_);
NUM_DISTANCE_CODES, histo->distance, m->distance);
ok = 1;
Error:
@@ -95,21 +92,21 @@ static int CostModelBuild(CostModel* const m, int xsize, int cache_bits,
static WEBP_INLINE int64_t GetLiteralCost(const CostModel* const m,
uint32_t v) {
return (int64_t)m->alpha_[v >> 24] + m->red_[(v >> 16) & 0xff] +
m->literal_[(v >> 8) & 0xff] + m->blue_[v & 0xff];
return (int64_t)m->alpha[v >> 24] + m->red[(v >> 16) & 0xff] +
m->literal[(v >> 8) & 0xff] + m->blue[v & 0xff];
}
static WEBP_INLINE int64_t GetCacheCost(const CostModel* const m,
uint32_t idx) {
const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx;
return (int64_t)m->literal_[literal_idx];
return (int64_t)m->literal[literal_idx];
}
static WEBP_INLINE int64_t GetLengthCost(const CostModel* const m,
uint32_t length) {
int code, extra_bits;
VP8LPrefixEncodeBits(length, &code, &extra_bits);
return (int64_t)m->literal_[VALUES_IN_BYTE + code] +
return (int64_t)m->literal[VALUES_IN_BYTE + code] +
((int64_t)extra_bits << LOG_2_PRECISION_BITS);
}
@@ -117,7 +114,7 @@ static WEBP_INLINE int64_t GetDistanceCost(const CostModel* const m,
uint32_t distance) {
int code, extra_bits;
VP8LPrefixEncodeBits(distance, &code, &extra_bits);
return (int64_t)m->distance_[code] +
return (int64_t)m->distance[code] +
((int64_t)extra_bits << LOG_2_PRECISION_BITS);
}
@@ -147,84 +144,84 @@ static WEBP_INLINE void AddSingleLiteralWithCostModel(
// Empirical value to avoid high memory consumption but good for performance.
#define COST_CACHE_INTERVAL_SIZE_MAX 500
// To perform backward reference every pixel at index index_ is considered and
// To perform backward reference every pixel at index 'index' is considered and
// the cost for the MAX_LENGTH following pixels computed. Those following pixels
// at index index_ + k (k from 0 to MAX_LENGTH) have a cost of:
// cost_ = distance cost at index + GetLengthCost(cost_model, k)
// at index 'index' + k (k from 0 to MAX_LENGTH) have a cost of:
// cost = distance cost at index + GetLengthCost(cost_model, k)
// and the minimum value is kept. GetLengthCost(cost_model, k) is cached in an
// array of size MAX_LENGTH.
// Instead of performing MAX_LENGTH comparisons per pixel, we keep track of the
// minimal values using intervals of constant cost.
// An interval is defined by the index_ of the pixel that generated it and
// is only useful in a range of indices from start_ to end_ (exclusive), i.e.
// it contains the minimum value for pixels between start_ and end_.
// Intervals are stored in a linked list and ordered by start_. When a new
// An interval is defined by the 'index' of the pixel that generated it and
// is only useful in a range of indices from 'start' to 'end' (exclusive), i.e.
// it contains the minimum value for pixels between start and end.
// Intervals are stored in a linked list and ordered by 'start'. When a new
// interval has a better value, old intervals are split or removed. There are
// therefore no overlapping intervals.
typedef struct CostInterval CostInterval;
struct CostInterval {
int64_t cost_;
int start_;
int end_;
int index_;
CostInterval* previous_;
CostInterval* next_;
int64_t cost;
int start;
int end;
int index;
CostInterval* previous;
CostInterval* next;
};
// The GetLengthCost(cost_model, k) are cached in a CostCacheInterval.
typedef struct {
int64_t cost_;
int start_;
int end_; // Exclusive.
int64_t cost;
int start;
int end; // Exclusive.
} CostCacheInterval;
// This structure is in charge of managing intervals and costs.
// It caches the different CostCacheInterval, caches the different
// GetLengthCost(cost_model, k) in cost_cache_ and the CostInterval's (whose
// count_ is limited by COST_CACHE_INTERVAL_SIZE_MAX).
// GetLengthCost(cost_model, k) in cost_cache and the CostInterval's (whose
// 'count' is limited by COST_CACHE_INTERVAL_SIZE_MAX).
#define COST_MANAGER_MAX_FREE_LIST 10
typedef struct {
CostInterval* head_;
int count_; // The number of stored intervals.
CostCacheInterval* cache_intervals_;
size_t cache_intervals_size_;
CostInterval* head;
int count; // The number of stored intervals.
CostCacheInterval* cache_intervals;
size_t cache_intervals_size;
// Contains the GetLengthCost(cost_model, k).
int64_t cost_cache_[MAX_LENGTH];
int64_t* costs_;
uint16_t* dist_array_;
int64_t cost_cache[MAX_LENGTH];
int64_t* costs;
uint16_t* dist_array;
// Most of the time, we only need few intervals -> use a free-list, to avoid
// fragmentation with small allocs in most common cases.
CostInterval intervals_[COST_MANAGER_MAX_FREE_LIST];
CostInterval* free_intervals_;
CostInterval intervals[COST_MANAGER_MAX_FREE_LIST];
CostInterval* free_intervals;
// These are regularly malloc'd remains. This list can't grow larger than than
// size COST_CACHE_INTERVAL_SIZE_MAX - COST_MANAGER_MAX_FREE_LIST, note.
CostInterval* recycled_intervals_;
CostInterval* recycled_intervals;
} CostManager;
static void CostIntervalAddToFreeList(CostManager* const manager,
CostInterval* const interval) {
interval->next_ = manager->free_intervals_;
manager->free_intervals_ = interval;
interval->next = manager->free_intervals;
manager->free_intervals = interval;
}
static int CostIntervalIsInFreeList(const CostManager* const manager,
const CostInterval* const interval) {
return (interval >= &manager->intervals_[0] &&
interval <= &manager->intervals_[COST_MANAGER_MAX_FREE_LIST - 1]);
return (interval >= &manager->intervals[0] &&
interval <= &manager->intervals[COST_MANAGER_MAX_FREE_LIST - 1]);
}
static void CostManagerInitFreeList(CostManager* const manager) {
int i;
manager->free_intervals_ = NULL;
manager->free_intervals = NULL;
for (i = 0; i < COST_MANAGER_MAX_FREE_LIST; ++i) {
CostIntervalAddToFreeList(manager, &manager->intervals_[i]);
CostIntervalAddToFreeList(manager, &manager->intervals[i]);
}
}
static void DeleteIntervalList(CostManager* const manager,
const CostInterval* interval) {
while (interval != NULL) {
const CostInterval* const next = interval->next_;
const CostInterval* const next = interval->next;
if (!CostIntervalIsInFreeList(manager, interval)) {
WebPSafeFree((void*)interval);
} // else: do nothing
@@ -235,16 +232,16 @@ static void DeleteIntervalList(CostManager* const manager,
static void CostManagerClear(CostManager* const manager) {
if (manager == NULL) return;
WebPSafeFree(manager->costs_);
WebPSafeFree(manager->cache_intervals_);
WebPSafeFree(manager->costs);
WebPSafeFree(manager->cache_intervals);
// Clear the interval lists.
DeleteIntervalList(manager, manager->head_);
manager->head_ = NULL;
DeleteIntervalList(manager, manager->recycled_intervals_);
manager->recycled_intervals_ = NULL;
DeleteIntervalList(manager, manager->head);
manager->head = NULL;
DeleteIntervalList(manager, manager->recycled_intervals);
manager->recycled_intervals = NULL;
// Reset pointers, count_ and cache_intervals_size_.
// Reset pointers, 'count' and 'cache_intervals_size'.
memset(manager, 0, sizeof(*manager));
CostManagerInitFreeList(manager);
}
@@ -255,25 +252,25 @@ static int CostManagerInit(CostManager* const manager,
int i;
const int cost_cache_size = (pix_count > MAX_LENGTH) ? MAX_LENGTH : pix_count;
manager->costs_ = NULL;
manager->cache_intervals_ = NULL;
manager->head_ = NULL;
manager->recycled_intervals_ = NULL;
manager->count_ = 0;
manager->dist_array_ = dist_array;
manager->costs = NULL;
manager->cache_intervals = NULL;
manager->head = NULL;
manager->recycled_intervals = NULL;
manager->count = 0;
manager->dist_array = dist_array;
CostManagerInitFreeList(manager);
// Fill in the cost_cache_.
// Fill in the 'cost_cache'.
// Has to be done in two passes due to a GCC bug on i686
// related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323
for (i = 0; i < cost_cache_size; ++i) {
manager->cost_cache_[i] = GetLengthCost(cost_model, i);
manager->cost_cache[i] = GetLengthCost(cost_model, i);
}
manager->cache_intervals_size_ = 1;
manager->cache_intervals_size = 1;
for (i = 1; i < cost_cache_size; ++i) {
// Get the number of bound intervals.
if (manager->cost_cache_[i] != manager->cost_cache_[i - 1]) {
++manager->cache_intervals_size_;
if (manager->cost_cache[i] != manager->cost_cache[i - 1]) {
++manager->cache_intervals_size;
}
}
@@ -281,46 +278,46 @@ static int CostManagerInit(CostManager* const manager,
// The worst case scenario with a cost model would be if every length has a
// different cost, hence MAX_LENGTH but that is impossible with the current
// implementation that spirals around a pixel.
assert(manager->cache_intervals_size_ <= MAX_LENGTH);
manager->cache_intervals_ = (CostCacheInterval*)WebPSafeMalloc(
manager->cache_intervals_size_, sizeof(*manager->cache_intervals_));
if (manager->cache_intervals_ == NULL) {
assert(manager->cache_intervals_size <= MAX_LENGTH);
manager->cache_intervals = (CostCacheInterval*)WebPSafeMalloc(
manager->cache_intervals_size, sizeof(*manager->cache_intervals));
if (manager->cache_intervals == NULL) {
CostManagerClear(manager);
return 0;
}
// Fill in the cache_intervals_.
// Fill in the 'cache_intervals'.
{
CostCacheInterval* cur = manager->cache_intervals_;
CostCacheInterval* cur = manager->cache_intervals;
// Consecutive values in cost_cache_ are compared and if a big enough
// Consecutive values in 'cost_cache' are compared and if a big enough
// difference is found, a new interval is created and bounded.
cur->start_ = 0;
cur->end_ = 1;
cur->cost_ = manager->cost_cache_[0];
cur->start = 0;
cur->end = 1;
cur->cost = manager->cost_cache[0];
for (i = 1; i < cost_cache_size; ++i) {
const int64_t cost_val = manager->cost_cache_[i];
if (cost_val != cur->cost_) {
const int64_t cost_val = manager->cost_cache[i];
if (cost_val != cur->cost) {
++cur;
// Initialize an interval.
cur->start_ = i;
cur->cost_ = cost_val;
cur->start = i;
cur->cost = cost_val;
}
cur->end_ = i + 1;
cur->end = i + 1;
}
assert((size_t)(cur - manager->cache_intervals_) + 1 ==
manager->cache_intervals_size_);
assert((size_t)(cur - manager->cache_intervals) + 1 ==
manager->cache_intervals_size);
}
manager->costs_ =
(int64_t*)WebPSafeMalloc(pix_count, sizeof(*manager->costs_));
if (manager->costs_ == NULL) {
manager->costs =
(int64_t*)WebPSafeMalloc(pix_count, sizeof(*manager->costs));
if (manager->costs == NULL) {
CostManagerClear(manager);
return 0;
}
// Set the initial costs_ to INT64_MAX for every pixel as we will keep the
// Set the initial 'costs' to INT64_MAX for every pixel as we will keep the
// minimum.
for (i = 0; i < pix_count; ++i) manager->costs_[i] = WEBP_INT64_MAX;
for (i = 0; i < pix_count; ++i) manager->costs[i] = WEBP_INT64_MAX;
return 1;
}
@@ -332,9 +329,9 @@ static WEBP_INLINE void UpdateCost(CostManager* const manager, int i,
const int k = i - position;
assert(k >= 0 && k < MAX_LENGTH);
if (manager->costs_[i] > cost) {
manager->costs_[i] = cost;
manager->dist_array_[i] = k + 1;
if (manager->costs[i] > cost) {
manager->costs[i] = cost;
manager->dist_array[i] = k + 1;
}
}
@@ -352,12 +349,12 @@ static WEBP_INLINE void ConnectIntervals(CostManager* const manager,
CostInterval* const prev,
CostInterval* const next) {
if (prev != NULL) {
prev->next_ = next;
prev->next = next;
} else {
manager->head_ = next;
manager->head = next;
}
if (next != NULL) next->previous_ = prev;
if (next != NULL) next->previous = prev;
}
// Pop an interval in the manager.
@@ -365,15 +362,15 @@ static WEBP_INLINE void PopInterval(CostManager* const manager,
CostInterval* const interval) {
if (interval == NULL) return;
ConnectIntervals(manager, interval->previous_, interval->next_);
ConnectIntervals(manager, interval->previous, interval->next);
if (CostIntervalIsInFreeList(manager, interval)) {
CostIntervalAddToFreeList(manager, interval);
} else { // recycle regularly malloc'd intervals too
interval->next_ = manager->recycled_intervals_;
manager->recycled_intervals_ = interval;
interval->next = manager->recycled_intervals;
manager->recycled_intervals = interval;
}
--manager->count_;
assert(manager->count_ >= 0);
--manager->count;
assert(manager->count >= 0);
}
// Update the cost at index i by going over all the stored intervals that
@@ -382,17 +379,17 @@ static WEBP_INLINE void PopInterval(CostManager* const manager,
// end before 'i' will be popped.
static WEBP_INLINE void UpdateCostAtIndex(CostManager* const manager, int i,
int do_clean_intervals) {
CostInterval* current = manager->head_;
CostInterval* current = manager->head;
while (current != NULL && current->start_ <= i) {
CostInterval* const next = current->next_;
if (current->end_ <= i) {
while (current != NULL && current->start <= i) {
CostInterval* const next = current->next;
if (current->end <= i) {
if (do_clean_intervals) {
// We have an outdated interval, remove it.
PopInterval(manager, current);
}
} else {
UpdateCost(manager, i, current->index_, current->cost_);
UpdateCost(manager, i, current->index, current->cost);
}
current = next;
}
@@ -400,31 +397,31 @@ static WEBP_INLINE void UpdateCostAtIndex(CostManager* const manager, int i,
// Given a current orphan interval and its previous interval, before
// it was orphaned (which can be NULL), set it at the right place in the list
// of intervals using the start_ ordering and the previous interval as a hint.
// of intervals using the 'start' ordering and the previous interval as a hint.
static WEBP_INLINE void PositionOrphanInterval(CostManager* const manager,
CostInterval* const current,
CostInterval* previous) {
assert(current != NULL);
if (previous == NULL) previous = manager->head_;
while (previous != NULL && current->start_ < previous->start_) {
previous = previous->previous_;
if (previous == NULL) previous = manager->head;
while (previous != NULL && current->start < previous->start) {
previous = previous->previous;
}
while (previous != NULL && previous->next_ != NULL &&
previous->next_->start_ < current->start_) {
previous = previous->next_;
while (previous != NULL && previous->next != NULL &&
previous->next->start < current->start) {
previous = previous->next;
}
if (previous != NULL) {
ConnectIntervals(manager, current, previous->next_);
ConnectIntervals(manager, current, previous->next);
} else {
ConnectIntervals(manager, current, manager->head_);
ConnectIntervals(manager, current, manager->head);
}
ConnectIntervals(manager, previous, current);
}
// Insert an interval in the list contained in the manager by starting at
// interval_in as a hint. The intervals are sorted by start_ value.
// 'interval_in' as a hint. The intervals are sorted by 'start' value.
static WEBP_INLINE void InsertInterval(CostManager* const manager,
CostInterval* const interval_in,
int64_t cost, int position, int start,
@@ -432,17 +429,17 @@ static WEBP_INLINE void InsertInterval(CostManager* const manager,
CostInterval* interval_new;
if (start >= end) return;
if (manager->count_ >= COST_CACHE_INTERVAL_SIZE_MAX) {
if (manager->count >= COST_CACHE_INTERVAL_SIZE_MAX) {
// Serialize the interval if we cannot store it.
UpdateCostPerInterval(manager, start, end, position, cost);
return;
}
if (manager->free_intervals_ != NULL) {
interval_new = manager->free_intervals_;
manager->free_intervals_ = interval_new->next_;
} else if (manager->recycled_intervals_ != NULL) {
interval_new = manager->recycled_intervals_;
manager->recycled_intervals_ = interval_new->next_;
if (manager->free_intervals != NULL) {
interval_new = manager->free_intervals;
manager->free_intervals = interval_new->next;
} else if (manager->recycled_intervals != NULL) {
interval_new = manager->recycled_intervals;
manager->recycled_intervals = interval_new->next;
} else { // malloc for good
interval_new = (CostInterval*)WebPSafeMalloc(1, sizeof(*interval_new));
if (interval_new == NULL) {
@@ -452,13 +449,13 @@ static WEBP_INLINE void InsertInterval(CostManager* const manager,
}
}
interval_new->cost_ = cost;
interval_new->index_ = position;
interval_new->start_ = start;
interval_new->end_ = end;
interval_new->cost = cost;
interval_new->index = position;
interval_new->start = start;
interval_new->end = end;
PositionOrphanInterval(manager, interval_new, interval_in);
++manager->count_;
++manager->count;
}
// Given a new cost interval defined by its start at position, its length value
@@ -469,10 +466,10 @@ static WEBP_INLINE void PushInterval(CostManager* const manager,
int64_t distance_cost, int position,
int len) {
size_t i;
CostInterval* interval = manager->head_;
CostInterval* interval = manager->head;
CostInterval* interval_next;
const CostCacheInterval* const cost_cache_intervals =
manager->cache_intervals_;
manager->cache_intervals;
// If the interval is small enough, no need to deal with the heavy
// interval logic, just serialize it right away. This constant is empirical.
const int kSkipDistance = 10;
@@ -483,84 +480,84 @@ static WEBP_INLINE void PushInterval(CostManager* const manager,
const int k = j - position;
int64_t cost_tmp;
assert(k >= 0 && k < MAX_LENGTH);
cost_tmp = distance_cost + manager->cost_cache_[k];
cost_tmp = distance_cost + manager->cost_cache[k];
if (manager->costs_[j] > cost_tmp) {
manager->costs_[j] = cost_tmp;
manager->dist_array_[j] = k + 1;
if (manager->costs[j] > cost_tmp) {
manager->costs[j] = cost_tmp;
manager->dist_array[j] = k + 1;
}
}
return;
}
for (i = 0; i < manager->cache_intervals_size_ &&
cost_cache_intervals[i].start_ < len;
for (i = 0; i < manager->cache_intervals_size &&
cost_cache_intervals[i].start < len;
++i) {
// Define the intersection of the ith interval with the new one.
int start = position + cost_cache_intervals[i].start_;
const int end = position + (cost_cache_intervals[i].end_ > len
int start = position + cost_cache_intervals[i].start;
const int end = position + (cost_cache_intervals[i].end > len
? len
: cost_cache_intervals[i].end_);
const int64_t cost = distance_cost + cost_cache_intervals[i].cost_;
: cost_cache_intervals[i].end);
const int64_t cost = distance_cost + cost_cache_intervals[i].cost;
for (; interval != NULL && interval->start_ < end;
for (; interval != NULL && interval->start < end;
interval = interval_next) {
interval_next = interval->next_;
interval_next = interval->next;
// Make sure we have some overlap
if (start >= interval->end_) continue;
if (start >= interval->end) continue;
if (cost >= interval->cost_) {
if (cost >= interval->cost) {
// When intervals are represented, the lower, the better.
// [**********************************************************[
// start end
// [----------------------------------[
// interval->start_ interval->end_
// interval->start interval->end
// If we are worse than what we already have, add whatever we have so
// far up to interval.
const int start_new = interval->end_;
const int start_new = interval->end;
InsertInterval(manager, interval, cost, position, start,
interval->start_);
interval->start);
start = start_new;
if (start >= end) break;
continue;
}
if (start <= interval->start_) {
if (interval->end_ <= end) {
if (start <= interval->start) {
if (interval->end <= end) {
// [----------------------------------[
// interval->start_ interval->end_
// interval->start interval->end
// [**************************************************************[
// start end
// We can safely remove the old interval as it is fully included.
PopInterval(manager, interval);
} else {
// [------------------------------------[
// interval->start_ interval->end_
// interval->start interval->end
// [*****************************[
// start end
interval->start_ = end;
interval->start = end;
break;
}
} else {
if (end < interval->end_) {
if (end < interval->end) {
// [--------------------------------------------------------------[
// interval->start_ interval->end_
// interval->start interval->end
// [*****************************[
// start end
// We have to split the old interval as it fully contains the new one.
const int end_original = interval->end_;
interval->end_ = start;
InsertInterval(manager, interval, interval->cost_, interval->index_,
const int end_original = interval->end;
interval->end = start;
InsertInterval(manager, interval, interval->cost, interval->index,
end, end_original);
interval = interval->next_;
interval = interval->next;
break;
} else {
// [------------------------------------[
// interval->start_ interval->end_
// interval->start interval->end
// [*****************************[
// start end
interval->end_ = start;
interval->end = start;
}
}
}
@@ -579,7 +576,7 @@ static int BackwardReferencesHashChainDistanceOnly(
const int pix_count = xsize * ysize;
const int use_color_cache = (cache_bits > 0);
const size_t literal_array_size =
sizeof(*((CostModel*)NULL)->literal_) * VP8LHistogramNumCodes(cache_bits);
sizeof(*((CostModel*)NULL)->literal) * VP8LHistogramNumCodes(cache_bits);
const size_t cost_model_size = sizeof(CostModel) + literal_array_size;
CostModel* const cost_model =
(CostModel*)WebPSafeCalloc(1ULL, cost_model_size);
@@ -593,7 +590,7 @@ static int BackwardReferencesHashChainDistanceOnly(
if (cost_model == NULL || cost_manager == NULL) goto Error;
cost_model->literal_ = (uint32_t*)(cost_model + 1);
cost_model->literal = (uint32_t*)(cost_model + 1);
if (use_color_cache) {
cc_init = VP8LColorCacheInit(&hashers, cache_bits);
if (!cc_init) goto Error;
@@ -613,17 +610,17 @@ static int BackwardReferencesHashChainDistanceOnly(
// Add first pixel as literal.
AddSingleLiteralWithCostModel(argb, &hashers, cost_model, /*idx=*/0,
use_color_cache, /*prev_cost=*/0,
cost_manager->costs_, dist_array);
cost_manager->costs, dist_array);
for (i = 1; i < pix_count; ++i) {
const int64_t prev_cost = cost_manager->costs_[i - 1];
const int64_t prev_cost = cost_manager->costs[i - 1];
int offset, len;
VP8LHashChainFindCopy(hash_chain, i, &offset, &len);
// Try adding the pixel as a literal.
AddSingleLiteralWithCostModel(argb, &hashers, cost_model, i,
use_color_cache, prev_cost,
cost_manager->costs_, dist_array);
cost_manager->costs, dist_array);
// If we are dealing with a non-literal.
if (len >= 2) {
@@ -671,7 +668,7 @@ static int BackwardReferencesHashChainDistanceOnly(
UpdateCostAtIndex(cost_manager, j - 1, 0);
UpdateCostAtIndex(cost_manager, j, 0);
PushInterval(cost_manager, cost_manager->costs_[j - 1] + offset_cost,
PushInterval(cost_manager, cost_manager->costs[j - 1] + offset_cost,
j, len_j);
reach = j + len_j - 1;
}
@@ -683,7 +680,7 @@ static int BackwardReferencesHashChainDistanceOnly(
len_prev = len;
}
ok = !refs->error_;
ok = !refs->error;
Error:
if (cc_init) VP8LColorCacheClear(&hashers);
CostManagerClear(cost_manager);
@@ -756,7 +753,7 @@ static int BackwardReferencesHashChainFollowChosenPath(
++i;
}
}
ok = !refs->error_;
ok = !refs->error;
Error:
if (cc_init) VP8LColorCacheClear(&hashers);
return ok;

View File

@@ -13,8 +13,9 @@
#include "src/enc/backward_references_enc.h"
#include <assert.h>
#include <string.h>
#include "src/dsp/dsp.h"
#include "src/dsp/cpu.h"
#include "src/dsp/lossless.h"
#include "src/dsp/lossless_common.h"
#include "src/enc/histogram_enc.h"
@@ -22,6 +23,8 @@
#include "src/utils/color_cache_utils.h"
#include "src/utils/utils.h"
#include "src/webp/encode.h"
#include "src/webp/format_constants.h"
#include "src/webp/types.h"
#define MIN_BLOCK_SIZE 256 // minimum block size for backward references
@@ -76,30 +79,30 @@ static WEBP_INLINE int FindMatchLength(const uint32_t* const array1,
// VP8LBackwardRefs
struct PixOrCopyBlock {
PixOrCopyBlock* next_; // next block (or NULL)
PixOrCopy* start_; // data start
int size_; // currently used size
PixOrCopyBlock* next; // next block (or NULL)
PixOrCopy* start; // data start
int size; // currently used size
};
extern void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs);
void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs) {
assert(refs != NULL);
if (refs->tail_ != NULL) {
*refs->tail_ = refs->free_blocks_; // recycle all blocks at once
if (refs->tail != NULL) {
*refs->tail = refs->free_blocks; // recycle all blocks at once
}
refs->free_blocks_ = refs->refs_;
refs->tail_ = &refs->refs_;
refs->last_block_ = NULL;
refs->refs_ = NULL;
refs->free_blocks = refs->refs;
refs->tail = &refs->refs;
refs->last_block = NULL;
refs->refs = NULL;
}
void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs) {
assert(refs != NULL);
VP8LClearBackwardRefs(refs);
while (refs->free_blocks_ != NULL) {
PixOrCopyBlock* const next = refs->free_blocks_->next_;
WebPSafeFree(refs->free_blocks_);
refs->free_blocks_ = next;
while (refs->free_blocks != NULL) {
PixOrCopyBlock* const next = refs->free_blocks->next;
WebPSafeFree(refs->free_blocks);
refs->free_blocks = next;
}
}
@@ -107,79 +110,79 @@ void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs) {
static void BackwardRefsSwap(VP8LBackwardRefs* const refs1,
VP8LBackwardRefs* const refs2) {
const int point_to_refs1 =
(refs1->tail_ != NULL && refs1->tail_ == &refs1->refs_);
(refs1->tail != NULL && refs1->tail == &refs1->refs);
const int point_to_refs2 =
(refs2->tail_ != NULL && refs2->tail_ == &refs2->refs_);
(refs2->tail != NULL && refs2->tail == &refs2->refs);
const VP8LBackwardRefs tmp = *refs1;
*refs1 = *refs2;
*refs2 = tmp;
if (point_to_refs2) refs1->tail_ = &refs1->refs_;
if (point_to_refs1) refs2->tail_ = &refs2->refs_;
if (point_to_refs2) refs1->tail = &refs1->refs;
if (point_to_refs1) refs2->tail = &refs2->refs;
}
void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size) {
assert(refs != NULL);
memset(refs, 0, sizeof(*refs));
refs->tail_ = &refs->refs_;
refs->block_size_ =
refs->tail = &refs->refs;
refs->block_size =
(block_size < MIN_BLOCK_SIZE) ? MIN_BLOCK_SIZE : block_size;
}
VP8LRefsCursor VP8LRefsCursorInit(const VP8LBackwardRefs* const refs) {
VP8LRefsCursor c;
c.cur_block_ = refs->refs_;
if (refs->refs_ != NULL) {
c.cur_pos = c.cur_block_->start_;
c.last_pos_ = c.cur_pos + c.cur_block_->size_;
c.cur_block = refs->refs;
if (refs->refs != NULL) {
c.cur_pos = c.cur_block->start;
c.last_pos = c.cur_pos + c.cur_block->size;
} else {
c.cur_pos = NULL;
c.last_pos_ = NULL;
c.last_pos = NULL;
}
return c;
}
void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c) {
PixOrCopyBlock* const b = c->cur_block_->next_;
c->cur_pos = (b == NULL) ? NULL : b->start_;
c->last_pos_ = (b == NULL) ? NULL : b->start_ + b->size_;
c->cur_block_ = b;
PixOrCopyBlock* const b = c->cur_block->next;
c->cur_pos = (b == NULL) ? NULL : b->start;
c->last_pos = (b == NULL) ? NULL : b->start + b->size;
c->cur_block = b;
}
// Create a new block, either from the free list or allocated
static PixOrCopyBlock* BackwardRefsNewBlock(VP8LBackwardRefs* const refs) {
PixOrCopyBlock* b = refs->free_blocks_;
PixOrCopyBlock* b = refs->free_blocks;
if (b == NULL) { // allocate new memory chunk
const size_t total_size =
sizeof(*b) + refs->block_size_ * sizeof(*b->start_);
sizeof(*b) + refs->block_size * sizeof(*b->start);
b = (PixOrCopyBlock*)WebPSafeMalloc(1ULL, total_size);
if (b == NULL) {
refs->error_ |= 1;
refs->error |= 1;
return NULL;
}
b->start_ = (PixOrCopy*)((uint8_t*)b + sizeof(*b)); // not always aligned
b->start = (PixOrCopy*)((uint8_t*)b + sizeof(*b)); // not always aligned
} else { // recycle from free-list
refs->free_blocks_ = b->next_;
refs->free_blocks = b->next;
}
*refs->tail_ = b;
refs->tail_ = &b->next_;
refs->last_block_ = b;
b->next_ = NULL;
b->size_ = 0;
*refs->tail = b;
refs->tail = &b->next;
refs->last_block = b;
b->next = NULL;
b->size = 0;
return b;
}
// Return 1 on success, 0 on error.
static int BackwardRefsClone(const VP8LBackwardRefs* const from,
VP8LBackwardRefs* const to) {
const PixOrCopyBlock* block_from = from->refs_;
const PixOrCopyBlock* block_from = from->refs;
VP8LClearBackwardRefs(to);
while (block_from != NULL) {
PixOrCopyBlock* const block_to = BackwardRefsNewBlock(to);
if (block_to == NULL) return 0;
memcpy(block_to->start_, block_from->start_,
block_from->size_ * sizeof(PixOrCopy));
block_to->size_ = block_from->size_;
block_from = block_from->next_;
memcpy(block_to->start, block_from->start,
block_from->size * sizeof(PixOrCopy));
block_to->size = block_from->size;
block_from = block_from->next;
}
return 1;
}
@@ -188,35 +191,35 @@ extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
const PixOrCopy v);
void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
const PixOrCopy v) {
PixOrCopyBlock* b = refs->last_block_;
if (b == NULL || b->size_ == refs->block_size_) {
PixOrCopyBlock* b = refs->last_block;
if (b == NULL || b->size == refs->block_size) {
b = BackwardRefsNewBlock(refs);
if (b == NULL) return; // refs->error_ is set
if (b == NULL) return; // refs->error is set
}
b->start_[b->size_++] = v;
b->start[b->size++] = v;
}
// -----------------------------------------------------------------------------
// Hash chains
int VP8LHashChainInit(VP8LHashChain* const p, int size) {
assert(p->size_ == 0);
assert(p->offset_length_ == NULL);
assert(p->size == 0);
assert(p->offset_length == NULL);
assert(size > 0);
p->offset_length_ =
(uint32_t*)WebPSafeMalloc(size, sizeof(*p->offset_length_));
if (p->offset_length_ == NULL) return 0;
p->size_ = size;
p->offset_length =
(uint32_t*)WebPSafeMalloc(size, sizeof(*p->offset_length));
if (p->offset_length == NULL) return 0;
p->size = size;
return 1;
}
void VP8LHashChainClear(VP8LHashChain* const p) {
assert(p != NULL);
WebPSafeFree(p->offset_length_);
WebPSafeFree(p->offset_length);
p->size_ = 0;
p->offset_length_ = NULL;
p->size = 0;
p->offset_length = NULL;
}
// -----------------------------------------------------------------------------
@@ -265,14 +268,14 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality,
int argb_comp;
uint32_t base_position;
int32_t* hash_to_first_index;
// Temporarily use the p->offset_length_ as a hash chain.
int32_t* chain = (int32_t*)p->offset_length_;
// Temporarily use the p->offset_length as a hash chain.
int32_t* chain = (int32_t*)p->offset_length;
assert(size > 0);
assert(p->size_ != 0);
assert(p->offset_length_ != NULL);
assert(p->size != 0);
assert(p->offset_length != NULL);
if (size <= 2) {
p->offset_length_[0] = p->offset_length_[size - 1] = 0;
p->offset_length[0] = p->offset_length[size - 1] = 0;
return 1;
}
@@ -351,7 +354,7 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality,
// (hence a best length of 0) and the left-most pixel nothing to the left
// (hence an offset of 0).
assert(size > 2);
p->offset_length_[0] = p->offset_length_[size - 1] = 0;
p->offset_length[0] = p->offset_length[size - 1] = 0;
for (base_position = size - 2; base_position > 0;) {
const int max_len = MaxFindCopyLength(size - 1 - base_position);
const uint32_t* const argb_start = argb + base_position;
@@ -411,7 +414,7 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality,
while (1) {
assert(best_length <= MAX_LENGTH);
assert(best_distance <= WINDOW_SIZE);
p->offset_length_[base_position] =
p->offset_length[base_position] =
(best_distance << MAX_LENGTH_BITS) | (uint32_t)best_length;
--base_position;
// Stop if we don't have a match or if we are out of bounds.
@@ -505,7 +508,7 @@ static int BackwardReferencesRle(int xsize, int ysize,
}
}
if (use_color_cache) VP8LColorCacheClear(&hashers);
return !refs->error_;
return !refs->error;
}
static int BackwardReferencesLz77(int xsize, int ysize,
@@ -570,7 +573,7 @@ static int BackwardReferencesLz77(int xsize, int ysize,
i += len;
}
ok = !refs->error_;
ok = !refs->error;
Error:
if (cc_init) VP8LColorCacheClear(&hashers);
return ok;
@@ -645,7 +648,7 @@ static int BackwardReferencesLz77Box(int xsize, int ysize,
}
}
hash_chain->offset_length_[0] = 0;
hash_chain->offset_length[0] = 0;
for (i = 1; i < pix_count; ++i) {
int ind;
int best_length = VP8LHashChainFindLength(hash_chain_best, i);
@@ -712,17 +715,17 @@ static int BackwardReferencesLz77Box(int xsize, int ysize,
assert(i + best_length <= pix_count);
assert(best_length <= MAX_LENGTH);
if (best_length <= MIN_LENGTH) {
hash_chain->offset_length_[i] = 0;
hash_chain->offset_length[i] = 0;
best_offset_prev = 0;
best_length_prev = 0;
} else {
hash_chain->offset_length_[i] =
hash_chain->offset_length[i] =
(best_offset << MAX_LENGTH_BITS) | (uint32_t)best_length;
best_offset_prev = best_offset;
best_length_prev = best_length;
}
}
hash_chain->offset_length_[0] = 0;
hash_chain->offset_length[0] = 0;
WebPSafeFree(counts_ini);
return BackwardReferencesLz77(xsize, ysize, argb, cache_bits, hash_chain,
@@ -793,20 +796,20 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality,
// The keys of the caches can be derived from the longest one.
int key = VP8LHashPix(pix, 32 - cache_bits_max);
// Do not use the color cache for cache_bits = 0.
++histos[0]->blue_[b];
++histos[0]->literal_[g];
++histos[0]->red_[r];
++histos[0]->alpha_[a];
++histos[0]->blue[b];
++histos[0]->literal[g];
++histos[0]->red[r];
++histos[0]->alpha[a];
// Deal with cache_bits > 0.
for (i = cache_bits_max; i >= 1; --i, key >>= 1) {
if (VP8LColorCacheLookup(&hashers[i], key) == pix) {
++histos[i]->literal_[NUM_LITERAL_CODES + NUM_LENGTH_CODES + key];
++histos[i]->literal[NUM_LITERAL_CODES + NUM_LENGTH_CODES + key];
} else {
VP8LColorCacheSet(&hashers[i], key, pix);
++histos[i]->blue_[b];
++histos[i]->literal_[g];
++histos[i]->red_[r];
++histos[i]->alpha_[a];
++histos[i]->blue[b];
++histos[i]->literal[g];
++histos[i]->red[r];
++histos[i]->alpha[a];
}
}
} else {
@@ -815,12 +818,12 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality,
// histograms but those are the same independently from the cache size.
// As those constant contributions are in the end added to the other
// histogram contributions, we can ignore them, except for the length
// prefix that is part of the literal_ histogram.
// prefix that is part of the 'literal' histogram.
int len = PixOrCopyLength(v);
uint32_t argb_prev = *argb ^ 0xffffffffu;
VP8LPrefixEncode(len, &code, &extra_bits, &extra_bits_value);
for (i = 0; i <= cache_bits_max; ++i) {
++histos[i]->literal_[NUM_LITERAL_CODES + code];
++histos[i]->literal[NUM_LITERAL_CODES + code];
}
// Update the color caches.
do {
@@ -828,7 +831,7 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality,
// Efficiency: insert only if the color changes.
int key = VP8LHashPix(*argb, 32 - cache_bits_max);
for (i = cache_bits_max; i >= 1; --i, key >>= 1) {
hashers[i].colors_[key] = *argb;
hashers[i].colors[key] = *argb;
}
argb_prev = *argb;
}

View File

@@ -15,9 +15,10 @@
#include <assert.h>
#include <stdlib.h>
#include "src/webp/types.h"
#include "src/webp/encode.h"
#include "src/webp/format_constants.h"
#include "src/webp/types.h"
#ifdef __cplusplus
extern "C" {
@@ -126,10 +127,10 @@ struct VP8LHashChain {
// (through WINDOW_SIZE = 1<<20).
// The lower 12 bits contain the length of the match. The 12 bit limit is
// defined in MaxFindCopyLength with MAX_LENGTH=4096.
uint32_t* offset_length_;
uint32_t* offset_length;
// This is the maximum size of the hash_chain that can be constructed.
// Typically this is the pixel count (width x height) for a given image.
int size_;
int size;
};
// Must be called first, to set size.
@@ -143,12 +144,12 @@ void VP8LHashChainClear(VP8LHashChain* const p); // release memory
static WEBP_INLINE int VP8LHashChainFindOffset(const VP8LHashChain* const p,
const int base_position) {
return p->offset_length_[base_position] >> MAX_LENGTH_BITS;
return p->offset_length[base_position] >> MAX_LENGTH_BITS;
}
static WEBP_INLINE int VP8LHashChainFindLength(const VP8LHashChain* const p,
const int base_position) {
return p->offset_length_[base_position] & ((1U << MAX_LENGTH_BITS) - 1);
return p->offset_length[base_position] & ((1U << MAX_LENGTH_BITS) - 1);
}
static WEBP_INLINE void VP8LHashChainFindCopy(const VP8LHashChain* const p,
@@ -170,12 +171,12 @@ typedef struct VP8LBackwardRefs VP8LBackwardRefs;
// Container for blocks chain
struct VP8LBackwardRefs {
int block_size_; // common block-size
int error_; // set to true if some memory error occurred
PixOrCopyBlock* refs_; // list of currently used blocks
PixOrCopyBlock** tail_; // for list recycling
PixOrCopyBlock* free_blocks_; // free-list
PixOrCopyBlock* last_block_; // used for adding new refs (internal)
int block_size; // common block-size
int error; // set to true if some memory error occurred
PixOrCopyBlock* refs; // list of currently used blocks
PixOrCopyBlock** tail; // for list recycling
PixOrCopyBlock* free_blocks; // free-list
PixOrCopyBlock* last_block; // used for adding new refs (internal)
};
// Initialize the object. 'block_size' is the common block size to store
@@ -189,8 +190,8 @@ typedef struct {
// public:
PixOrCopy* cur_pos; // current position
// private:
PixOrCopyBlock* cur_block_; // current block in the refs list
const PixOrCopy* last_pos_; // sentinel for switching to next block
PixOrCopyBlock* cur_block; // current block in the refs list
const PixOrCopy* last_pos; // sentinel for switching to next block
} VP8LRefsCursor;
// Returns a cursor positioned at the beginning of the references list.
@@ -205,7 +206,7 @@ void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c);
static WEBP_INLINE void VP8LRefsCursorNext(VP8LRefsCursor* const c) {
assert(c != NULL);
assert(VP8LRefsCursorOk(c));
if (++c->cur_pos == c->last_pos_) VP8LRefsCursorNextBlock(c);
if (++c->cur_pos == c->last_pos) VP8LRefsCursorNextBlock(c);
}
// -----------------------------------------------------------------------------

View File

@@ -15,7 +15,10 @@
#include "src/webp/config.h"
#endif
#include <stddef.h>
#include "src/webp/encode.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
// WebPConfig
@@ -135,8 +138,8 @@ int WebPValidateConfig(const WebPConfig* config) {
// Mapping between -z level and -m / -q parameter settings.
static const struct {
uint8_t method_;
uint8_t quality_;
uint8_t method;
uint8_t quality;
} kLosslessPresets[MAX_LEVEL + 1] = {
{ 0, 0 }, { 1, 20 }, { 2, 25 }, { 3, 30 }, { 3, 50 },
{ 4, 50 }, { 4, 75 }, { 4, 90 }, { 5, 90 }, { 6, 100 }
@@ -145,8 +148,8 @@ static const struct {
int WebPConfigLosslessPreset(WebPConfig* config, int level) {
if (config == NULL || level < 0 || level > MAX_LEVEL) return 0;
config->lossless = 1;
config->method = kLosslessPresets[level].method_;
config->quality = kLosslessPresets[level].quality_;
config->method = kLosslessPresets[level].method;
config->quality = kLosslessPresets[level].quality;
return 1;
}

View File

@@ -11,7 +11,13 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include <stdlib.h>
#include "src/dec/common_dec.h"
#include "src/webp/types.h"
#include "src/dsp/dsp.h"
#include "src/enc/cost_enc.h"
#include "src/enc/vp8i_enc.h"
//------------------------------------------------------------------------------
// Level cost tables
@@ -60,14 +66,14 @@ static int VariableLevelCost(int level, const uint8_t probas[NUM_PROBAS]) {
void VP8CalculateLevelCosts(VP8EncProba* const proba) {
int ctype, band, ctx;
if (!proba->dirty_) return; // nothing to do.
if (!proba->dirty) return; // nothing to do.
for (ctype = 0; ctype < NUM_TYPES; ++ctype) {
int n;
for (band = 0; band < NUM_BANDS; ++band) {
for (ctx = 0; ctx < NUM_CTX; ++ctx) {
const uint8_t* const p = proba->coeffs_[ctype][band][ctx];
uint16_t* const table = proba->level_cost_[ctype][band][ctx];
const uint8_t* const p = proba->coeffs[ctype][band][ctx];
uint16_t* const table = proba->level_cost[ctype][band][ctx];
const int cost0 = (ctx > 0) ? VP8BitCost(1, p[0]) : 0;
const int cost_base = VP8BitCost(1, p[1]) + cost0;
int v;
@@ -81,12 +87,12 @@ void VP8CalculateLevelCosts(VP8EncProba* const proba) {
}
for (n = 0; n < 16; ++n) { // replicate bands. We don't need to sentinel.
for (ctx = 0; ctx < NUM_CTX; ++ctx) {
proba->remapped_costs_[ctype][n][ctx] =
proba->level_cost_[ctype][VP8EncBands[n]][ctx];
proba->remapped_costs[ctype][n][ctx] =
proba->level_cost[ctype][VP8EncBands[n]][ctx];
}
}
}
proba->dirty_ = 0;
proba->dirty = 0;
}
//------------------------------------------------------------------------------
@@ -206,9 +212,9 @@ const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES] = {
void VP8InitResidual(int first, int coeff_type,
VP8Encoder* const enc, VP8Residual* const res) {
res->coeff_type = coeff_type;
res->prob = enc->proba_.coeffs_[coeff_type];
res->stats = enc->proba_.stats_[coeff_type];
res->costs = enc->proba_.remapped_costs_[coeff_type];
res->prob = enc->proba.coeffs[coeff_type];
res->stats = enc->proba.stats[coeff_type];
res->costs = enc->proba.remapped_costs[coeff_type];
res->first = first;
}
@@ -216,14 +222,14 @@ void VP8InitResidual(int first, int coeff_type,
// Mode costs
int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]) {
const int x = (it->i4_ & 3), y = (it->i4_ >> 2);
const int x = (it->i4 & 3), y = (it->i4 >> 2);
VP8Residual res;
VP8Encoder* const enc = it->enc_;
VP8Encoder* const enc = it->enc;
int R = 0;
int ctx;
VP8InitResidual(0, 3, enc, &res);
ctx = it->top_nz_[x] + it->left_nz_[y];
ctx = it->top_nz[x] + it->left_nz[y];
VP8SetResidualCoeffs(levels, &res);
R += VP8GetResidualCost(ctx, &res);
return R;
@@ -231,7 +237,7 @@ int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]) {
int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd) {
VP8Residual res;
VP8Encoder* const enc = it->enc_;
VP8Encoder* const enc = it->enc;
int x, y;
int R = 0;
@@ -240,16 +246,16 @@ int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd) {
// DC
VP8InitResidual(0, 1, enc, &res);
VP8SetResidualCoeffs(rd->y_dc_levels, &res);
R += VP8GetResidualCost(it->top_nz_[8] + it->left_nz_[8], &res);
R += VP8GetResidualCost(it->top_nz[8] + it->left_nz[8], &res);
// AC
VP8InitResidual(1, 0, enc, &res);
for (y = 0; y < 4; ++y) {
for (x = 0; x < 4; ++x) {
const int ctx = it->top_nz_[x] + it->left_nz_[y];
const int ctx = it->top_nz[x] + it->left_nz[y];
VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
R += VP8GetResidualCost(ctx, &res);
it->top_nz_[x] = it->left_nz_[y] = (res.last >= 0);
it->top_nz[x] = it->left_nz[y] = (res.last >= 0);
}
}
return R;
@@ -257,7 +263,7 @@ int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd) {
int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd) {
VP8Residual res;
VP8Encoder* const enc = it->enc_;
VP8Encoder* const enc = it->enc;
int ch, x, y;
int R = 0;
@@ -267,10 +273,10 @@ int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd) {
for (ch = 0; ch <= 2; ch += 2) {
for (y = 0; y < 2; ++y) {
for (x = 0; x < 2; ++x) {
const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
const int ctx = it->top_nz[4 + ch + x] + it->left_nz[4 + ch + y];
VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
R += VP8GetResidualCost(ctx, &res);
it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = (res.last >= 0);
it->top_nz[4 + ch + x] = it->left_nz[4 + ch + y] = (res.last >= 0);
}
}
}

View File

@@ -16,7 +16,11 @@
#include <assert.h>
#include <stdlib.h>
#include "src/dec/common_dec.h"
#include "src/dsp/dsp.h"
#include "src/enc/vp8i_enc.h"
#include "src/webp/types.h"
#ifdef __cplusplus
extern "C" {

View File

@@ -12,8 +12,13 @@
// Author: somnath@google.com (Somnath Banerjee)
#include <assert.h>
#include "src/enc/vp8i_enc.h"
#include <stddef.h>
#include <string.h>
#include "src/dec/common_dec.h"
#include "src/webp/types.h"
#include "src/dsp/dsp.h"
#include "src/enc/vp8i_enc.h"
// This table gives, for a given sharpness, the filtering strength to be
// used (at least) in order to filter a given edge step delta.
@@ -83,18 +88,18 @@ static int GetILevel(int sharpness, int level) {
}
static void DoFilter(const VP8EncIterator* const it, int level) {
const VP8Encoder* const enc = it->enc_;
const int ilevel = GetILevel(enc->config_->filter_sharpness, level);
const VP8Encoder* const enc = it->enc;
const int ilevel = GetILevel(enc->config->filter_sharpness, level);
const int limit = 2 * level + ilevel;
uint8_t* const y_dst = it->yuv_out2_ + Y_OFF_ENC;
uint8_t* const u_dst = it->yuv_out2_ + U_OFF_ENC;
uint8_t* const v_dst = it->yuv_out2_ + V_OFF_ENC;
uint8_t* const y_dst = it->yuv_out2 + Y_OFF_ENC;
uint8_t* const u_dst = it->yuv_out2 + U_OFF_ENC;
uint8_t* const v_dst = it->yuv_out2 + V_OFF_ENC;
// copy current block to yuv_out2_
memcpy(y_dst, it->yuv_out_, YUV_SIZE_ENC * sizeof(uint8_t));
// copy current block to yuv_out2
memcpy(y_dst, it->yuv_out, YUV_SIZE_ENC * sizeof(uint8_t));
if (enc->filter_hdr_.simple_ == 1) { // simple
if (enc->filter_hdr.simple == 1) { // simple
VP8SimpleHFilter16i(y_dst, BPS, limit);
VP8SimpleVFilter16i(y_dst, BPS, limit);
} else { // complex
@@ -139,11 +144,11 @@ static double GetMBSSIM(const uint8_t* yuv1, const uint8_t* yuv2) {
void VP8InitFilter(VP8EncIterator* const it) {
#if !defined(WEBP_REDUCE_SIZE)
if (it->lf_stats_ != NULL) {
if (it->lf_stats != NULL) {
int s, i;
for (s = 0; s < NUM_MB_SEGMENTS; s++) {
for (i = 0; i < MAX_LF_LEVELS; i++) {
(*it->lf_stats_)[s][i] = 0;
(*it->lf_stats)[s][i] = 0;
}
}
VP8SSIMDspInit();
@@ -156,16 +161,16 @@ void VP8InitFilter(VP8EncIterator* const it) {
void VP8StoreFilterStats(VP8EncIterator* const it) {
#if !defined(WEBP_REDUCE_SIZE)
int d;
VP8Encoder* const enc = it->enc_;
const int s = it->mb_->segment_;
const int level0 = enc->dqm_[s].fstrength_;
VP8Encoder* const enc = it->enc;
const int s = it->mb->segment;
const int level0 = enc->dqm[s].fstrength;
// explore +/-quant range of values around level0
const int delta_min = -enc->dqm_[s].quant_;
const int delta_max = enc->dqm_[s].quant_;
const int delta_min = -enc->dqm[s].quant;
const int delta_max = enc->dqm[s].quant;
const int step_size = (delta_max - delta_min >= 4) ? 4 : 1;
if (it->lf_stats_ == NULL) return;
if (it->lf_stats == NULL) return;
// NOTE: Currently we are applying filter only across the sublock edges
// There are two reasons for that.
@@ -173,10 +178,10 @@ void VP8StoreFilterStats(VP8EncIterator* const it) {
// the left and top macro blocks. That will be hard to restore
// 2. Macro Blocks on the bottom and right are not yet compressed. So we
// cannot apply filter on the right and bottom macro block edges.
if (it->mb_->type_ == 1 && it->mb_->skip_) return;
if (it->mb->type == 1 && it->mb->skip) return;
// Always try filter level zero
(*it->lf_stats_)[s][0] += GetMBSSIM(it->yuv_in_, it->yuv_out_);
(*it->lf_stats)[s][0] += GetMBSSIM(it->yuv_in, it->yuv_out);
for (d = delta_min; d <= delta_max; d += step_size) {
const int level = level0 + d;
@@ -184,7 +189,7 @@ void VP8StoreFilterStats(VP8EncIterator* const it) {
continue;
}
DoFilter(it, level);
(*it->lf_stats_)[s][level] += GetMBSSIM(it->yuv_in_, it->yuv_out2_);
(*it->lf_stats)[s][level] += GetMBSSIM(it->yuv_in, it->yuv_out2);
}
#else // defined(WEBP_REDUCE_SIZE)
(void)it;
@@ -192,43 +197,43 @@ void VP8StoreFilterStats(VP8EncIterator* const it) {
}
void VP8AdjustFilterStrength(VP8EncIterator* const it) {
VP8Encoder* const enc = it->enc_;
VP8Encoder* const enc = it->enc;
#if !defined(WEBP_REDUCE_SIZE)
if (it->lf_stats_ != NULL) {
if (it->lf_stats != NULL) {
int s;
for (s = 0; s < NUM_MB_SEGMENTS; s++) {
int i, best_level = 0;
// Improvement over filter level 0 should be at least 1e-5 (relatively)
double best_v = 1.00001 * (*it->lf_stats_)[s][0];
double best_v = 1.00001 * (*it->lf_stats)[s][0];
for (i = 1; i < MAX_LF_LEVELS; i++) {
const double v = (*it->lf_stats_)[s][i];
const double v = (*it->lf_stats)[s][i];
if (v > best_v) {
best_v = v;
best_level = i;
}
}
enc->dqm_[s].fstrength_ = best_level;
enc->dqm[s].fstrength = best_level;
}
return;
}
#endif // !defined(WEBP_REDUCE_SIZE)
if (enc->config_->filter_strength > 0) {
if (enc->config->filter_strength > 0) {
int max_level = 0;
int s;
for (s = 0; s < NUM_MB_SEGMENTS; s++) {
VP8SegmentInfo* const dqm = &enc->dqm_[s];
VP8SegmentInfo* const dqm = &enc->dqm[s];
// this '>> 3' accounts for some inverse WHT scaling
const int delta = (dqm->max_edge_ * dqm->y2_.q_[1]) >> 3;
const int delta = (dqm->max_edge * dqm->y2.q[1]) >> 3;
const int level =
VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, delta);
if (level > dqm->fstrength_) {
dqm->fstrength_ = level;
VP8FilterStrengthFromDelta(enc->filter_hdr.sharpness, delta);
if (level > dqm->fstrength) {
dqm->fstrength = level;
}
if (max_level < dqm->fstrength_) {
max_level = dqm->fstrength_;
if (max_level < dqm->fstrength) {
max_level = dqm->fstrength;
}
}
enc->filter_hdr_.level_ = max_level;
enc->filter_hdr.level = max_level;
}
}

View File

@@ -11,12 +11,17 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include <string.h>
#include <assert.h>
#include <math.h>
#include <string.h>
#include "src/dec/common_dec.h"
#include "src/webp/types.h"
#include "src/dsp/dsp.h"
#include "src/enc/cost_enc.h"
#include "src/enc/vp8i_enc.h"
#include "src/dsp/dsp.h"
#include "src/utils/bit_writer_utils.h"
#include "src/webp/encode.h"
#include "src/webp/format_constants.h" // RIFF constants
#define SEGMENT_VISU 0
@@ -46,15 +51,15 @@ typedef struct { // struct for organizing convergence in either size or PSNR
} PassStats;
static int InitPassStats(const VP8Encoder* const enc, PassStats* const s) {
const uint64_t target_size = (uint64_t)enc->config_->target_size;
const uint64_t target_size = (uint64_t)enc->config->target_size;
const int do_size_search = (target_size != 0);
const float target_PSNR = enc->config_->target_PSNR;
const float target_PSNR = enc->config->target_PSNR;
s->is_first = 1;
s->dq = 10.f;
s->qmin = 1.f * enc->config_->qmin;
s->qmax = 1.f * enc->config_->qmax;
s->q = s->last_q = Clamp(enc->config_->quality, s->qmin, s->qmax);
s->qmin = 1.f * enc->config->qmin;
s->qmax = 1.f * enc->config->qmax;
s->q = s->last_q = Clamp(enc->config->quality, s->qmin, s->qmax);
s->target = do_size_search ? (double)target_size
: (target_PSNR > 0.) ? target_PSNR
: 40.; // default, just in case
@@ -95,9 +100,9 @@ const uint8_t VP8Cat6[] =
// Reset the statistics about: number of skips, token proba, level cost,...
static void ResetStats(VP8Encoder* const enc) {
VP8EncProba* const proba = &enc->proba_;
VP8EncProba* const proba = &enc->proba;
VP8CalculateLevelCosts(proba);
proba->nb_skip_ = 0;
proba->nb_skip = 0;
}
//------------------------------------------------------------------------------
@@ -111,17 +116,17 @@ static int CalcSkipProba(uint64_t nb, uint64_t total) {
// Returns the bit-cost for coding the skip probability.
static int FinalizeSkipProba(VP8Encoder* const enc) {
VP8EncProba* const proba = &enc->proba_;
const int nb_mbs = enc->mb_w_ * enc->mb_h_;
const int nb_events = proba->nb_skip_;
VP8EncProba* const proba = &enc->proba;
const int nb_mbs = enc->mb_w * enc->mb_h;
const int nb_events = proba->nb_skip;
int size;
proba->skip_proba_ = CalcSkipProba(nb_events, nb_mbs);
proba->use_skip_proba_ = (proba->skip_proba_ < SKIP_PROBA_THRESHOLD);
proba->skip_proba = CalcSkipProba(nb_events, nb_mbs);
proba->use_skip_proba = (proba->skip_proba < SKIP_PROBA_THRESHOLD);
size = 256; // 'use_skip_proba' bit
if (proba->use_skip_proba_) {
size += nb_events * VP8BitCost(1, proba->skip_proba_)
+ (nb_mbs - nb_events) * VP8BitCost(0, proba->skip_proba_);
size += 8 * 256; // cost of signaling the skip_proba_ itself.
if (proba->use_skip_proba) {
size += nb_events * VP8BitCost(1, proba->skip_proba)
+ (nb_mbs - nb_events) * VP8BitCost(0, proba->skip_proba);
size += 8 * 256; // cost of signaling the 'skip_proba' itself.
}
return size;
}
@@ -139,8 +144,8 @@ static int BranchCost(int nb, int total, int proba) {
}
static void ResetTokenStats(VP8Encoder* const enc) {
VP8EncProba* const proba = &enc->proba_;
memset(proba->stats_, 0, sizeof(proba->stats_));
VP8EncProba* const proba = &enc->proba;
memset(proba->stats, 0, sizeof(proba->stats));
}
static int FinalizeTokenProbas(VP8EncProba* const proba) {
@@ -151,7 +156,7 @@ static int FinalizeTokenProbas(VP8EncProba* const proba) {
for (b = 0; b < NUM_BANDS; ++b) {
for (c = 0; c < NUM_CTX; ++c) {
for (p = 0; p < NUM_PROBAS; ++p) {
const proba_t stats = proba->stats_[t][b][c][p];
const proba_t stats = proba->stats[t][b][c][p];
const int nb = (stats >> 0) & 0xffff;
const int total = (stats >> 16) & 0xffff;
const int update_proba = VP8CoeffsUpdateProba[t][b][c][p];
@@ -165,17 +170,17 @@ static int FinalizeTokenProbas(VP8EncProba* const proba) {
const int use_new_p = (old_cost > new_cost);
size += VP8BitCost(use_new_p, update_proba);
if (use_new_p) { // only use proba that seem meaningful enough.
proba->coeffs_[t][b][c][p] = new_p;
proba->coeffs[t][b][c][p] = new_p;
has_changed |= (new_p != old_p);
size += 8 * 256;
} else {
proba->coeffs_[t][b][c][p] = old_p;
proba->coeffs[t][b][c][p] = old_p;
}
}
}
}
}
proba->dirty_ = has_changed;
proba->dirty = has_changed;
return size;
}
@@ -190,8 +195,8 @@ static int GetProba(int a, int b) {
static void ResetSegments(VP8Encoder* const enc) {
int n;
for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
enc->mb_info_[n].segment_ = 0;
for (n = 0; n < enc->mb_w * enc->mb_h; ++n) {
enc->mb_info[n].segment = 0;
}
}
@@ -199,34 +204,34 @@ static void SetSegmentProbas(VP8Encoder* const enc) {
int p[NUM_MB_SEGMENTS] = { 0 };
int n;
for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
const VP8MBInfo* const mb = &enc->mb_info_[n];
++p[mb->segment_];
for (n = 0; n < enc->mb_w * enc->mb_h; ++n) {
const VP8MBInfo* const mb = &enc->mb_info[n];
++p[mb->segment];
}
#if !defined(WEBP_DISABLE_STATS)
if (enc->pic_->stats != NULL) {
if (enc->pic->stats != NULL) {
for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
enc->pic_->stats->segment_size[n] = p[n];
enc->pic->stats->segment_size[n] = p[n];
}
}
#endif
if (enc->segment_hdr_.num_segments_ > 1) {
uint8_t* const probas = enc->proba_.segments_;
if (enc->segment_hdr.num_segments > 1) {
uint8_t* const probas = enc->proba.segments;
probas[0] = GetProba(p[0] + p[1], p[2] + p[3]);
probas[1] = GetProba(p[0], p[1]);
probas[2] = GetProba(p[2], p[3]);
enc->segment_hdr_.update_map_ =
enc->segment_hdr.update_map =
(probas[0] != 255) || (probas[1] != 255) || (probas[2] != 255);
if (!enc->segment_hdr_.update_map_) ResetSegments(enc);
enc->segment_hdr_.size_ =
if (!enc->segment_hdr.update_map) ResetSegments(enc);
enc->segment_hdr.size =
p[0] * (VP8BitCost(0, probas[0]) + VP8BitCost(0, probas[1])) +
p[1] * (VP8BitCost(0, probas[0]) + VP8BitCost(1, probas[1])) +
p[2] * (VP8BitCost(1, probas[0]) + VP8BitCost(0, probas[2])) +
p[3] * (VP8BitCost(1, probas[0]) + VP8BitCost(1, probas[2]));
} else {
enc->segment_hdr_.update_map_ = 0;
enc->segment_hdr_.size_ = 0;
enc->segment_hdr.update_map = 0;
enc->segment_hdr.size = 0;
}
}
@@ -311,9 +316,9 @@ static void CodeResiduals(VP8BitWriter* const bw, VP8EncIterator* const it,
int x, y, ch;
VP8Residual res;
uint64_t pos1, pos2, pos3;
const int i16 = (it->mb_->type_ == 1);
const int segment = it->mb_->segment_;
VP8Encoder* const enc = it->enc_;
const int i16 = (it->mb->type == 1);
const int segment = it->mb->segment;
VP8Encoder* const enc = it->enc;
VP8IteratorNzToBytes(it);
@@ -321,8 +326,8 @@ static void CodeResiduals(VP8BitWriter* const bw, VP8EncIterator* const it,
if (i16) {
VP8InitResidual(0, 1, enc, &res);
VP8SetResidualCoeffs(rd->y_dc_levels, &res);
it->top_nz_[8] = it->left_nz_[8] =
PutCoeffs(bw, it->top_nz_[8] + it->left_nz_[8], &res);
it->top_nz[8] = it->left_nz[8] =
PutCoeffs(bw, it->top_nz[8] + it->left_nz[8], &res);
VP8InitResidual(1, 0, enc, &res);
} else {
VP8InitResidual(0, 3, enc, &res);
@@ -331,9 +336,9 @@ static void CodeResiduals(VP8BitWriter* const bw, VP8EncIterator* const it,
// luma-AC
for (y = 0; y < 4; ++y) {
for (x = 0; x < 4; ++x) {
const int ctx = it->top_nz_[x] + it->left_nz_[y];
const int ctx = it->top_nz[x] + it->left_nz[y];
VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
it->top_nz_[x] = it->left_nz_[y] = PutCoeffs(bw, ctx, &res);
it->top_nz[x] = it->left_nz[y] = PutCoeffs(bw, ctx, &res);
}
}
pos2 = VP8BitWriterPos(bw);
@@ -343,18 +348,18 @@ static void CodeResiduals(VP8BitWriter* const bw, VP8EncIterator* const it,
for (ch = 0; ch <= 2; ch += 2) {
for (y = 0; y < 2; ++y) {
for (x = 0; x < 2; ++x) {
const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
const int ctx = it->top_nz[4 + ch + x] + it->left_nz[4 + ch + y];
VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
it->top_nz[4 + ch + x] = it->left_nz[4 + ch + y] =
PutCoeffs(bw, ctx, &res);
}
}
}
pos3 = VP8BitWriterPos(bw);
it->luma_bits_ = pos2 - pos1;
it->uv_bits_ = pos3 - pos2;
it->bit_count_[segment][i16] += it->luma_bits_;
it->bit_count_[segment][2] += it->uv_bits_;
it->luma_bits = pos2 - pos1;
it->uv_bits = pos3 - pos2;
it->bit_count[segment][i16] += it->luma_bits;
it->bit_count[segment][2] += it->uv_bits;
VP8IteratorBytesToNz(it);
}
@@ -364,15 +369,15 @@ static void RecordResiduals(VP8EncIterator* const it,
const VP8ModeScore* const rd) {
int x, y, ch;
VP8Residual res;
VP8Encoder* const enc = it->enc_;
VP8Encoder* const enc = it->enc;
VP8IteratorNzToBytes(it);
if (it->mb_->type_ == 1) { // i16x16
if (it->mb->type == 1) { // i16x16
VP8InitResidual(0, 1, enc, &res);
VP8SetResidualCoeffs(rd->y_dc_levels, &res);
it->top_nz_[8] = it->left_nz_[8] =
VP8RecordCoeffs(it->top_nz_[8] + it->left_nz_[8], &res);
it->top_nz[8] = it->left_nz[8] =
VP8RecordCoeffs(it->top_nz[8] + it->left_nz[8], &res);
VP8InitResidual(1, 0, enc, &res);
} else {
VP8InitResidual(0, 3, enc, &res);
@@ -381,9 +386,9 @@ static void RecordResiduals(VP8EncIterator* const it,
// luma-AC
for (y = 0; y < 4; ++y) {
for (x = 0; x < 4; ++x) {
const int ctx = it->top_nz_[x] + it->left_nz_[y];
const int ctx = it->top_nz[x] + it->left_nz[y];
VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
it->top_nz_[x] = it->left_nz_[y] = VP8RecordCoeffs(ctx, &res);
it->top_nz[x] = it->left_nz[y] = VP8RecordCoeffs(ctx, &res);
}
}
@@ -392,9 +397,9 @@ static void RecordResiduals(VP8EncIterator* const it,
for (ch = 0; ch <= 2; ch += 2) {
for (y = 0; y < 2; ++y) {
for (x = 0; x < 2; ++x) {
const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
const int ctx = it->top_nz[4 + ch + x] + it->left_nz[4 + ch + y];
VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
it->top_nz[4 + ch + x] = it->left_nz[4 + ch + y] =
VP8RecordCoeffs(ctx, &res);
}
}
@@ -412,14 +417,14 @@ static int RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd,
VP8TBuffer* const tokens) {
int x, y, ch;
VP8Residual res;
VP8Encoder* const enc = it->enc_;
VP8Encoder* const enc = it->enc;
VP8IteratorNzToBytes(it);
if (it->mb_->type_ == 1) { // i16x16
const int ctx = it->top_nz_[8] + it->left_nz_[8];
if (it->mb->type == 1) { // i16x16
const int ctx = it->top_nz[8] + it->left_nz[8];
VP8InitResidual(0, 1, enc, &res);
VP8SetResidualCoeffs(rd->y_dc_levels, &res);
it->top_nz_[8] = it->left_nz_[8] =
it->top_nz[8] = it->left_nz[8] =
VP8RecordCoeffTokens(ctx, &res, tokens);
VP8InitResidual(1, 0, enc, &res);
} else {
@@ -429,9 +434,9 @@ static int RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd,
// luma-AC
for (y = 0; y < 4; ++y) {
for (x = 0; x < 4; ++x) {
const int ctx = it->top_nz_[x] + it->left_nz_[y];
const int ctx = it->top_nz[x] + it->left_nz[y];
VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
it->top_nz_[x] = it->left_nz_[y] =
it->top_nz[x] = it->left_nz[y] =
VP8RecordCoeffTokens(ctx, &res, tokens);
}
}
@@ -441,15 +446,15 @@ static int RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd,
for (ch = 0; ch <= 2; ch += 2) {
for (y = 0; y < 2; ++y) {
for (x = 0; x < 2; ++x) {
const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
const int ctx = it->top_nz[4 + ch + x] + it->left_nz[4 + ch + y];
VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
it->top_nz[4 + ch + x] = it->left_nz[4 + ch + y] =
VP8RecordCoeffTokens(ctx, &res, tokens);
}
}
}
VP8IteratorBytesToNz(it);
return !tokens->error_;
return !tokens->error;
}
#endif // !DISABLE_TOKEN_BUFFER
@@ -470,64 +475,64 @@ static void SetBlock(uint8_t* p, int value, int size) {
#endif
static void ResetSSE(VP8Encoder* const enc) {
enc->sse_[0] = 0;
enc->sse_[1] = 0;
enc->sse_[2] = 0;
// Note: enc->sse_[3] is managed by alpha.c
enc->sse_count_ = 0;
enc->sse[0] = 0;
enc->sse[1] = 0;
enc->sse[2] = 0;
// Note: enc->sse[3] is managed by alpha.c
enc->sse_count = 0;
}
static void StoreSSE(const VP8EncIterator* const it) {
VP8Encoder* const enc = it->enc_;
const uint8_t* const in = it->yuv_in_;
const uint8_t* const out = it->yuv_out_;
VP8Encoder* const enc = it->enc;
const uint8_t* const in = it->yuv_in;
const uint8_t* const out = it->yuv_out;
// Note: not totally accurate at boundary. And doesn't include in-loop filter.
enc->sse_[0] += VP8SSE16x16(in + Y_OFF_ENC, out + Y_OFF_ENC);
enc->sse_[1] += VP8SSE8x8(in + U_OFF_ENC, out + U_OFF_ENC);
enc->sse_[2] += VP8SSE8x8(in + V_OFF_ENC, out + V_OFF_ENC);
enc->sse_count_ += 16 * 16;
enc->sse[0] += VP8SSE16x16(in + Y_OFF_ENC, out + Y_OFF_ENC);
enc->sse[1] += VP8SSE8x8(in + U_OFF_ENC, out + U_OFF_ENC);
enc->sse[2] += VP8SSE8x8(in + V_OFF_ENC, out + V_OFF_ENC);
enc->sse_count += 16 * 16;
}
static void StoreSideInfo(const VP8EncIterator* const it) {
VP8Encoder* const enc = it->enc_;
const VP8MBInfo* const mb = it->mb_;
WebPPicture* const pic = enc->pic_;
VP8Encoder* const enc = it->enc;
const VP8MBInfo* const mb = it->mb;
WebPPicture* const pic = enc->pic;
if (pic->stats != NULL) {
StoreSSE(it);
enc->block_count_[0] += (mb->type_ == 0);
enc->block_count_[1] += (mb->type_ == 1);
enc->block_count_[2] += (mb->skip_ != 0);
enc->block_count[0] += (mb->type == 0);
enc->block_count[1] += (mb->type == 1);
enc->block_count[2] += (mb->skip != 0);
}
if (pic->extra_info != NULL) {
uint8_t* const info = &pic->extra_info[it->x_ + it->y_ * enc->mb_w_];
uint8_t* const info = &pic->extra_info[it->x + it->y * enc->mb_w];
switch (pic->extra_info_type) {
case 1: *info = mb->type_; break;
case 2: *info = mb->segment_; break;
case 3: *info = enc->dqm_[mb->segment_].quant_; break;
case 4: *info = (mb->type_ == 1) ? it->preds_[0] : 0xff; break;
case 5: *info = mb->uv_mode_; break;
case 1: *info = mb->type; break;
case 2: *info = mb->segment; break;
case 3: *info = enc->dqm[mb->segment].quant; break;
case 4: *info = (mb->type == 1) ? it->preds[0] : 0xff; break;
case 5: *info = mb->uv_mode; break;
case 6: {
const int b = (int)((it->luma_bits_ + it->uv_bits_ + 7) >> 3);
const int b = (int)((it->luma_bits + it->uv_bits + 7) >> 3);
*info = (b > 255) ? 255 : b; break;
}
case 7: *info = mb->alpha_; break;
case 7: *info = mb->alpha; break;
default: *info = 0; break;
}
}
#if SEGMENT_VISU // visualize segments and prediction modes
SetBlock(it->yuv_out_ + Y_OFF_ENC, mb->segment_ * 64, 16);
SetBlock(it->yuv_out_ + U_OFF_ENC, it->preds_[0] * 64, 8);
SetBlock(it->yuv_out_ + V_OFF_ENC, mb->uv_mode_ * 64, 8);
SetBlock(it->yuv_out + Y_OFF_ENC, mb->segment * 64, 16);
SetBlock(it->yuv_out + U_OFF_ENC, it->preds[0] * 64, 8);
SetBlock(it->yuv_out + V_OFF_ENC, mb->uv_mode * 64, 8);
#endif
}
static void ResetSideInfo(const VP8EncIterator* const it) {
VP8Encoder* const enc = it->enc_;
WebPPicture* const pic = enc->pic_;
VP8Encoder* const enc = it->enc;
WebPPicture* const pic = enc->pic;
if (pic->stats != NULL) {
memset(enc->block_count_, 0, sizeof(enc->block_count_));
memset(enc->block_count, 0, sizeof(enc->block_count));
}
ResetSSE(enc);
}
@@ -536,12 +541,12 @@ static void ResetSSE(VP8Encoder* const enc) {
(void)enc;
}
static void StoreSideInfo(const VP8EncIterator* const it) {
VP8Encoder* const enc = it->enc_;
WebPPicture* const pic = enc->pic_;
VP8Encoder* const enc = it->enc;
WebPPicture* const pic = enc->pic;
if (pic->extra_info != NULL) {
if (it->x_ == 0 && it->y_ == 0) { // only do it once, at start
if (it->x == 0 && it->y == 0) { // only do it once, at start
memset(pic->extra_info, 0,
enc->mb_w_ * enc->mb_h_ * sizeof(*pic->extra_info));
enc->mb_w * enc->mb_h * sizeof(*pic->extra_info));
}
}
}
@@ -587,7 +592,7 @@ static uint64_t OneStatPass(VP8Encoder* const enc, VP8RDLevel rd_opt,
VP8IteratorImport(&it, NULL);
if (VP8Decimate(&it, &info, rd_opt)) {
// Just record the number of skips and act like skip_proba is not used.
++enc->proba_.nb_skip_;
++enc->proba.nb_skip;
}
RecordResiduals(&it, &info);
size += info.R + info.H;
@@ -599,10 +604,10 @@ static uint64_t OneStatPass(VP8Encoder* const enc, VP8RDLevel rd_opt,
VP8IteratorSaveBoundary(&it);
} while (VP8IteratorNext(&it) && --nb_mbs > 0);
size_p0 += enc->segment_hdr_.size_;
size_p0 += enc->segment_hdr.size;
if (s->do_size_search) {
size += FinalizeSkipProba(enc);
size += FinalizeTokenProbas(&enc->proba_);
size += FinalizeTokenProbas(&enc->proba);
size = ((size + size_p0 + 1024) >> 11) + HEADER_SIZE_ESTIMATE;
s->value = (double)size;
} else {
@@ -612,17 +617,17 @@ static uint64_t OneStatPass(VP8Encoder* const enc, VP8RDLevel rd_opt,
}
static int StatLoop(VP8Encoder* const enc) {
const int method = enc->method_;
const int do_search = enc->do_search_;
const int method = enc->method;
const int do_search = enc->do_search;
const int fast_probe = ((method == 0 || method == 3) && !do_search);
int num_pass_left = enc->config_->pass;
int num_pass_left = enc->config->pass;
const int task_percent = 20;
const int percent_per_pass =
(task_percent + num_pass_left / 2) / num_pass_left;
const int final_percent = enc->percent_ + task_percent;
const int final_percent = enc->percent + task_percent;
const VP8RDLevel rd_opt =
(method >= 3 || do_search) ? RD_OPT_BASIC : RD_OPT_NONE;
int nb_mbs = enc->mb_w_ * enc->mb_h_;
int nb_mbs = enc->mb_w * enc->mb_h;
PassStats stats;
InitPassStats(enc, &stats);
@@ -640,7 +645,7 @@ static int StatLoop(VP8Encoder* const enc) {
while (num_pass_left-- > 0) {
const int is_last_pass = (fabs(stats.dq) <= DQ_LIMIT) ||
(num_pass_left == 0) ||
(enc->max_i4_header_bits_ == 0);
(enc->max_i4_header_bits == 0);
const uint64_t size_p0 =
OneStatPass(enc, rd_opt, nb_mbs, percent_per_pass, &stats);
if (size_p0 == 0) return 0;
@@ -648,9 +653,9 @@ static int StatLoop(VP8Encoder* const enc) {
printf("#%d value:%.1lf -> %.1lf q:%.2f -> %.2f\n",
num_pass_left, stats.last_value, stats.value, stats.last_q, stats.q);
#endif
if (enc->max_i4_header_bits_ > 0 && size_p0 > PARTITION0_SIZE_LIMIT) {
if (enc->max_i4_header_bits > 0 && size_p0 > PARTITION0_SIZE_LIMIT) {
++num_pass_left;
enc->max_i4_header_bits_ >>= 1; // strengthen header bit limitation...
enc->max_i4_header_bits >>= 1; // strengthen header bit limitation...
continue; // ...and start over
}
if (is_last_pass) {
@@ -665,10 +670,10 @@ static int StatLoop(VP8Encoder* const enc) {
if (!do_search || !stats.do_size_search) {
// Need to finalize probas now, since it wasn't done during the search.
FinalizeSkipProba(enc);
FinalizeTokenProbas(&enc->proba_);
FinalizeTokenProbas(&enc->proba);
}
VP8CalculateLevelCosts(&enc->proba_); // finalize costs
return WebPReportProgress(enc->pic_, final_percent, &enc->percent_);
VP8CalculateLevelCosts(&enc->proba); // finalize costs
return WebPReportProgress(enc->pic, final_percent, &enc->percent);
}
//------------------------------------------------------------------------------
@@ -680,37 +685,37 @@ static const uint8_t kAverageBytesPerMB[8] = { 50, 24, 16, 9, 7, 5, 3, 2 };
static int PreLoopInitialize(VP8Encoder* const enc) {
int p;
int ok = 1;
const int average_bytes_per_MB = kAverageBytesPerMB[enc->base_quant_ >> 4];
const int average_bytes_per_MB = kAverageBytesPerMB[enc->base_quant >> 4];
const int bytes_per_parts =
enc->mb_w_ * enc->mb_h_ * average_bytes_per_MB / enc->num_parts_;
enc->mb_w * enc->mb_h * average_bytes_per_MB / enc->num_parts;
// Initialize the bit-writers
for (p = 0; ok && p < enc->num_parts_; ++p) {
ok = VP8BitWriterInit(enc->parts_ + p, bytes_per_parts);
for (p = 0; ok && p < enc->num_parts; ++p) {
ok = VP8BitWriterInit(enc->parts + p, bytes_per_parts);
}
if (!ok) {
VP8EncFreeBitWriters(enc); // malloc error occurred
return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
return WebPEncodingSetError(enc->pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
return ok;
}
static int PostLoopFinalize(VP8EncIterator* const it, int ok) {
VP8Encoder* const enc = it->enc_;
VP8Encoder* const enc = it->enc;
if (ok) { // Finalize the partitions, check for extra errors.
int p;
for (p = 0; p < enc->num_parts_; ++p) {
VP8BitWriterFinish(enc->parts_ + p);
ok &= !enc->parts_[p].error_;
for (p = 0; p < enc->num_parts; ++p) {
VP8BitWriterFinish(enc->parts + p);
ok &= !enc->parts[p].error;
}
}
if (ok) { // All good. Finish up.
#if !defined(WEBP_DISABLE_STATS)
if (enc->pic_->stats != NULL) { // finalize byte counters...
if (enc->pic->stats != NULL) { // finalize byte counters...
int i, s;
for (i = 0; i <= 2; ++i) {
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
enc->residual_bytes_[i][s] = (int)((it->bit_count_[s][i] + 7) >> 3);
enc->residual_bytes[i][s] = (int)((it->bit_count[s][i] + 7) >> 3);
}
}
}
@@ -719,7 +724,7 @@ static int PostLoopFinalize(VP8EncIterator* const it, int ok) {
} else {
// Something bad happened -> need to do some memory cleanup.
VP8EncFreeBitWriters(enc);
return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
return WebPEncodingSetError(enc->pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
return ok;
}
@@ -728,11 +733,11 @@ static int PostLoopFinalize(VP8EncIterator* const it, int ok) {
// VP8EncLoop(): does the final bitstream coding.
static void ResetAfterSkip(VP8EncIterator* const it) {
if (it->mb_->type_ == 1) {
*it->nz_ = 0; // reset all predictors
it->left_nz_[8] = 0;
if (it->mb->type == 1) {
*it->nz = 0; // reset all predictors
it->left_nz[8] = 0;
} else {
*it->nz_ &= (1 << 24); // preserve the dc_nz bit
*it->nz &= (1 << 24); // preserve the dc_nz bit
}
}
@@ -747,16 +752,16 @@ int VP8EncLoop(VP8Encoder* const enc) {
VP8InitFilter(&it);
do {
VP8ModeScore info;
const int dont_use_skip = !enc->proba_.use_skip_proba_;
const VP8RDLevel rd_opt = enc->rd_opt_level_;
const int dont_use_skip = !enc->proba.use_skip_proba;
const VP8RDLevel rd_opt = enc->rd_opt_level;
VP8IteratorImport(&it, NULL);
// Warning! order is important: first call VP8Decimate() and
// *then* decide how to code the skip decision if there's one.
if (!VP8Decimate(&it, &info, rd_opt) || dont_use_skip) {
CodeResiduals(it.bw_, &it, &info);
if (it.bw_->error_) {
// enc->pic_->error_code is set in PostLoopFinalize().
CodeResiduals(it.bw, &it, &info);
if (it.bw->error) {
// enc->pic->error_code is set in PostLoopFinalize().
ok = 0;
break;
}
@@ -782,14 +787,14 @@ int VP8EncLoop(VP8Encoder* const enc) {
int VP8EncTokenLoop(VP8Encoder* const enc) {
// Roughly refresh the proba eight times per pass
int max_count = (enc->mb_w_ * enc->mb_h_) >> 3;
int num_pass_left = enc->config_->pass;
int max_count = (enc->mb_w * enc->mb_h) >> 3;
int num_pass_left = enc->config->pass;
int remaining_progress = 40; // percents
const int do_search = enc->do_search_;
const int do_search = enc->do_search;
VP8EncIterator it;
VP8EncProba* const proba = &enc->proba_;
const VP8RDLevel rd_opt = enc->rd_opt_level_;
const uint64_t pixel_count = (uint64_t)enc->mb_w_ * enc->mb_h_ * 384;
VP8EncProba* const proba = &enc->proba;
const VP8RDLevel rd_opt = enc->rd_opt_level;
const uint64_t pixel_count = (uint64_t)enc->mb_w * enc->mb_h * 384;
PassStats stats;
int ok;
@@ -799,16 +804,16 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
if (max_count < MIN_COUNT) max_count = MIN_COUNT;
assert(enc->num_parts_ == 1);
assert(enc->use_tokens_);
assert(proba->use_skip_proba_ == 0);
assert(enc->num_parts == 1);
assert(enc->use_tokens);
assert(proba->use_skip_proba == 0);
assert(rd_opt >= RD_OPT_BASIC); // otherwise, token-buffer won't be useful
assert(num_pass_left > 0);
while (ok && num_pass_left-- > 0) {
const int is_last_pass = (fabs(stats.dq) <= DQ_LIMIT) ||
(num_pass_left == 0) ||
(enc->max_i4_header_bits_ == 0);
(enc->max_i4_header_bits == 0);
uint64_t size_p0 = 0;
uint64_t distortion = 0;
int cnt = max_count;
@@ -821,7 +826,7 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
ResetTokenStats(enc);
VP8InitFilter(&it); // don't collect stats until last pass (too costly)
}
VP8TBufferClear(&enc->tokens_);
VP8TBufferClear(&enc->tokens);
do {
VP8ModeScore info;
VP8IteratorImport(&it, NULL);
@@ -831,9 +836,9 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
cnt = max_count;
}
VP8Decimate(&it, &info, rd_opt);
ok = RecordTokens(&it, &info, &enc->tokens_);
ok = RecordTokens(&it, &info, &enc->tokens);
if (!ok) {
WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
WebPEncodingSetError(enc->pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
break;
}
size_p0 += info.H;
@@ -848,11 +853,11 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
} while (ok && VP8IteratorNext(&it));
if (!ok) break;
size_p0 += enc->segment_hdr_.size_;
size_p0 += enc->segment_hdr.size;
if (stats.do_size_search) {
uint64_t size = FinalizeTokenProbas(&enc->proba_);
size += VP8EstimateTokenSize(&enc->tokens_,
(const uint8_t*)proba->coeffs_);
uint64_t size = FinalizeTokenProbas(&enc->proba);
size += VP8EstimateTokenSize(&enc->tokens,
(const uint8_t*)proba->coeffs);
size = (size + size_p0 + 1024) >> 11; // -> size in bytes
size += HEADER_SIZE_ESTIMATE;
stats.value = (double)size;
@@ -866,9 +871,9 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
num_pass_left, stats.last_value, stats.value,
stats.last_q, stats.q, stats.dq, stats.qmin, stats.qmax);
#endif
if (enc->max_i4_header_bits_ > 0 && size_p0 > PARTITION0_SIZE_LIMIT) {
if (enc->max_i4_header_bits > 0 && size_p0 > PARTITION0_SIZE_LIMIT) {
++num_pass_left;
enc->max_i4_header_bits_ >>= 1; // strengthen header bit limitation...
enc->max_i4_header_bits >>= 1; // strengthen header bit limitation...
if (is_last_pass) {
ResetSideInfo(&it);
}
@@ -883,13 +888,13 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
}
if (ok) {
if (!stats.do_size_search) {
FinalizeTokenProbas(&enc->proba_);
FinalizeTokenProbas(&enc->proba);
}
ok = VP8EmitTokens(&enc->tokens_, enc->parts_ + 0,
(const uint8_t*)proba->coeffs_, 1);
ok = VP8EmitTokens(&enc->tokens, enc->parts + 0,
(const uint8_t*)proba->coeffs, 1);
}
ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + remaining_progress,
&enc->percent_);
ok = ok && WebPReportProgress(enc->pic, enc->percent + remaining_progress,
&enc->percent);
return PostLoopFinalize(&it, ok);
}

File diff suppressed because it is too large Load Diff

View File

@@ -14,9 +14,8 @@
#ifndef WEBP_ENC_HISTOGRAM_ENC_H_
#define WEBP_ENC_HISTOGRAM_ENC_H_
#include <string.h>
#include "src/enc/backward_references_enc.h"
#include "src/webp/encode.h"
#include "src/webp/format_constants.h"
#include "src/webp/types.h"
@@ -25,26 +24,29 @@ extern "C" {
#endif
// Not a trivial literal symbol.
#define VP8L_NON_TRIVIAL_SYM (0xffffffff)
#define VP8L_NON_TRIVIAL_SYM ((uint16_t)(0xffff))
// A simple container for histograms of data.
typedef struct {
// literal_ contains green literal, palette-code and
// 'literal' contains green literal, palette-code and
// copy-length-prefix histogram
uint32_t* literal_; // Pointer to the allocated buffer for literal.
uint32_t red_[NUM_LITERAL_CODES];
uint32_t blue_[NUM_LITERAL_CODES];
uint32_t alpha_[NUM_LITERAL_CODES];
uint32_t* literal; // Pointer to the allocated buffer for literal.
uint32_t red[NUM_LITERAL_CODES];
uint32_t blue[NUM_LITERAL_CODES];
uint32_t alpha[NUM_LITERAL_CODES];
// Backward reference prefix-code histogram.
uint32_t distance_[NUM_DISTANCE_CODES];
int palette_code_bits_;
uint32_t trivial_symbol_; // True, if histograms for Red, Blue & Alpha
// literal symbols are single valued.
uint64_t bit_cost_; // cached value of bit cost.
uint64_t literal_cost_; // Cached values of dominant entropy costs:
uint64_t red_cost_; // literal, red & blue.
uint64_t blue_cost_;
uint8_t is_used_[5]; // 5 for literal, red, blue, alpha, distance
uint32_t distance[NUM_DISTANCE_CODES];
int palette_code_bits;
// The following members are only used within VP8LGetHistoImageSymbols.
// Index of the unique value of a histogram if any, VP8L_NON_TRIVIAL_SYM
// otherwise.
uint16_t trivial_symbol[5];
uint64_t bit_cost; // Cached value of total bit cost.
// Cached values of entropy costs: literal, red, blue, alpha, distance
uint64_t costs[5];
uint8_t is_used[5]; // 5 for literal, red, blue, alpha, distance
uint16_t bin_id; // entropy bin index.
} VP8LHistogram;
// Collection of histograms with fixed capacity, allocated as one
@@ -60,17 +62,21 @@ typedef struct {
// The input data is the PixOrCopy data, which models the literals, stop
// codes and backward references (both distances and lengths). Also: if
// palette_code_bits is >= 0, initialize the histogram with this value.
void VP8LHistogramCreate(VP8LHistogram* const p,
void VP8LHistogramCreate(VP8LHistogram* const h,
const VP8LBackwardRefs* const refs,
int palette_code_bits);
// Set the palette_code_bits and reset the stats.
// If init_arrays is true, the arrays are also filled with 0's.
void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits,
void VP8LHistogramInit(VP8LHistogram* const h, int palette_code_bits,
int init_arrays);
// Collect all the references into a histogram (without reset)
// The distance modifier function is applied to the distance before
// the histogram is updated. It can be NULL.
void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs,
int (*const distance_modifier)(int, int),
int distance_modifier_arg0,
VP8LHistogram* const histo);
// Free the memory allocated for the histogram.
@@ -91,12 +97,6 @@ void VP8LHistogramSetClear(VP8LHistogramSet* const set);
// Special case of VP8LAllocateHistogramSet, with size equals 1.
VP8LHistogram* VP8LAllocateHistogram(int cache_bits);
// Accumulate a token 'v' into a histogram.
void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
const PixOrCopy* const v,
int (*const distance_modifier)(int, int),
int distance_modifier_arg0);
static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) {
return NUM_LITERAL_CODES + NUM_LENGTH_CODES +
((palette_code_bits > 0) ? (1 << palette_code_bits) : 0);
@@ -118,7 +118,7 @@ uint64_t VP8LBitsEntropy(const uint32_t* const array, int n);
// Estimate how many bits the combined entropy of literals and distance
// approximately maps to.
uint64_t VP8LHistogramEstimateBits(VP8LHistogram* const p);
uint64_t VP8LHistogramEstimateBits(const VP8LHistogram* const h);
#ifdef __cplusplus
}

View File

@@ -14,88 +14,91 @@
#include <string.h>
#include "src/dsp/cpu.h"
#include "src/dsp/dsp.h"
#include "src/enc/vp8i_enc.h"
#include "src/utils/utils.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
// VP8Iterator
//------------------------------------------------------------------------------
static void InitLeft(VP8EncIterator* const it) {
it->y_left_[-1] = it->u_left_[-1] = it->v_left_[-1] =
(it->y_ > 0) ? 129 : 127;
memset(it->y_left_, 129, 16);
memset(it->u_left_, 129, 8);
memset(it->v_left_, 129, 8);
it->left_nz_[8] = 0;
if (it->top_derr_ != NULL) {
memset(&it->left_derr_, 0, sizeof(it->left_derr_));
it->y_left[-1] = it->u_left[-1] = it->v_left[-1] =
(it->y > 0) ? 129 : 127;
memset(it->y_left, 129, 16);
memset(it->u_left, 129, 8);
memset(it->v_left, 129, 8);
it->left_nz[8] = 0;
if (it->top_derr != NULL) {
memset(&it->left_derr, 0, sizeof(it->left_derr));
}
}
static void InitTop(VP8EncIterator* const it) {
const VP8Encoder* const enc = it->enc_;
const size_t top_size = enc->mb_w_ * 16;
memset(enc->y_top_, 127, 2 * top_size);
memset(enc->nz_, 0, enc->mb_w_ * sizeof(*enc->nz_));
if (enc->top_derr_ != NULL) {
memset(enc->top_derr_, 0, enc->mb_w_ * sizeof(*enc->top_derr_));
const VP8Encoder* const enc = it->enc;
const size_t top_size = enc->mb_w * 16;
memset(enc->y_top, 127, 2 * top_size);
memset(enc->nz, 0, enc->mb_w * sizeof(*enc->nz));
if (enc->top_derr != NULL) {
memset(enc->top_derr, 0, enc->mb_w * sizeof(*enc->top_derr));
}
}
void VP8IteratorSetRow(VP8EncIterator* const it, int y) {
VP8Encoder* const enc = it->enc_;
it->x_ = 0;
it->y_ = y;
it->bw_ = &enc->parts_[y & (enc->num_parts_ - 1)];
it->preds_ = enc->preds_ + y * 4 * enc->preds_w_;
it->nz_ = enc->nz_;
it->mb_ = enc->mb_info_ + y * enc->mb_w_;
it->y_top_ = enc->y_top_;
it->uv_top_ = enc->uv_top_;
VP8Encoder* const enc = it->enc;
it->x = 0;
it->y = y;
it->bw = &enc->parts[y & (enc->num_parts - 1)];
it->preds = enc->preds + y * 4 * enc->preds_w;
it->nz = enc->nz;
it->mb = enc->mb_info + y * enc->mb_w;
it->y_top = enc->y_top;
it->uv_top = enc->uv_top;
InitLeft(it);
}
// restart a scan
static void VP8IteratorReset(VP8EncIterator* const it) {
VP8Encoder* const enc = it->enc_;
VP8Encoder* const enc = it->enc;
VP8IteratorSetRow(it, 0);
VP8IteratorSetCountDown(it, enc->mb_w_ * enc->mb_h_); // default
VP8IteratorSetCountDown(it, enc->mb_w * enc->mb_h); // default
InitTop(it);
memset(it->bit_count_, 0, sizeof(it->bit_count_));
it->do_trellis_ = 0;
memset(it->bit_count, 0, sizeof(it->bit_count));
it->do_trellis = 0;
}
void VP8IteratorSetCountDown(VP8EncIterator* const it, int count_down) {
it->count_down_ = it->count_down0_ = count_down;
it->count_down = it->count_down0 = count_down;
}
int VP8IteratorIsDone(const VP8EncIterator* const it) {
return (it->count_down_ <= 0);
return (it->count_down <= 0);
}
void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it) {
it->enc_ = enc;
it->yuv_in_ = (uint8_t*)WEBP_ALIGN(it->yuv_mem_);
it->yuv_out_ = it->yuv_in_ + YUV_SIZE_ENC;
it->yuv_out2_ = it->yuv_out_ + YUV_SIZE_ENC;
it->yuv_p_ = it->yuv_out2_ + YUV_SIZE_ENC;
it->lf_stats_ = enc->lf_stats_;
it->percent0_ = enc->percent_;
it->y_left_ = (uint8_t*)WEBP_ALIGN(it->yuv_left_mem_ + 1);
it->u_left_ = it->y_left_ + 16 + 16;
it->v_left_ = it->u_left_ + 16;
it->top_derr_ = enc->top_derr_;
it->enc = enc;
it->yuv_in = (uint8_t*)WEBP_ALIGN(it->yuv_mem);
it->yuv_out = it->yuv_in + YUV_SIZE_ENC;
it->yuv_out2 = it->yuv_out + YUV_SIZE_ENC;
it->yuv_p = it->yuv_out2 + YUV_SIZE_ENC;
it->lf_stats = enc->lf_stats;
it->percent0 = enc->percent;
it->y_left = (uint8_t*)WEBP_ALIGN(it->yuv_left_mem + 1);
it->u_left = it->y_left + 16 + 16;
it->v_left = it->u_left + 16;
it->top_derr = enc->top_derr;
VP8IteratorReset(it);
}
int VP8IteratorProgress(const VP8EncIterator* const it, int delta) {
VP8Encoder* const enc = it->enc_;
if (delta && enc->pic_->progress_hook != NULL) {
const int done = it->count_down0_ - it->count_down_;
const int percent = (it->count_down0_ <= 0)
? it->percent0_
: it->percent0_ + delta * done / it->count_down0_;
return WebPReportProgress(enc->pic_, percent, &enc->percent_);
VP8Encoder* const enc = it->enc;
if (delta && enc->pic->progress_hook != NULL) {
const int done = it->count_down0 - it->count_down;
const int percent = (it->count_down0 <= 0)
? it->percent0
: it->percent0 + delta * done / it->count_down0;
return WebPReportProgress(enc->pic, percent, &enc->percent);
}
return 1;
}
@@ -131,9 +134,9 @@ static void ImportLine(const uint8_t* src, int src_stride,
}
void VP8IteratorImport(VP8EncIterator* const it, uint8_t* const tmp_32) {
const VP8Encoder* const enc = it->enc_;
const int x = it->x_, y = it->y_;
const WebPPicture* const pic = enc->pic_;
const VP8Encoder* const enc = it->enc;
const int x = it->x, y = it->y;
const WebPPicture* const pic = enc->pic;
const uint8_t* const ysrc = pic->y + (y * pic->y_stride + x) * 16;
const uint8_t* const usrc = pic->u + (y * pic->uv_stride + x) * 8;
const uint8_t* const vsrc = pic->v + (y * pic->uv_stride + x) * 8;
@@ -142,9 +145,9 @@ void VP8IteratorImport(VP8EncIterator* const it, uint8_t* const tmp_32) {
const int uv_w = (w + 1) >> 1;
const int uv_h = (h + 1) >> 1;
ImportBlock(ysrc, pic->y_stride, it->yuv_in_ + Y_OFF_ENC, w, h, 16);
ImportBlock(usrc, pic->uv_stride, it->yuv_in_ + U_OFF_ENC, uv_w, uv_h, 8);
ImportBlock(vsrc, pic->uv_stride, it->yuv_in_ + V_OFF_ENC, uv_w, uv_h, 8);
ImportBlock(ysrc, pic->y_stride, it->yuv_in + Y_OFF_ENC, w, h, 16);
ImportBlock(usrc, pic->uv_stride, it->yuv_in + U_OFF_ENC, uv_w, uv_h, 8);
ImportBlock(vsrc, pic->uv_stride, it->yuv_in + V_OFF_ENC, uv_w, uv_h, 8);
if (tmp_32 == NULL) return;
@@ -153,19 +156,19 @@ void VP8IteratorImport(VP8EncIterator* const it, uint8_t* const tmp_32) {
InitLeft(it);
} else {
if (y == 0) {
it->y_left_[-1] = it->u_left_[-1] = it->v_left_[-1] = 127;
it->y_left[-1] = it->u_left[-1] = it->v_left[-1] = 127;
} else {
it->y_left_[-1] = ysrc[- 1 - pic->y_stride];
it->u_left_[-1] = usrc[- 1 - pic->uv_stride];
it->v_left_[-1] = vsrc[- 1 - pic->uv_stride];
it->y_left[-1] = ysrc[- 1 - pic->y_stride];
it->u_left[-1] = usrc[- 1 - pic->uv_stride];
it->v_left[-1] = vsrc[- 1 - pic->uv_stride];
}
ImportLine(ysrc - 1, pic->y_stride, it->y_left_, h, 16);
ImportLine(usrc - 1, pic->uv_stride, it->u_left_, uv_h, 8);
ImportLine(vsrc - 1, pic->uv_stride, it->v_left_, uv_h, 8);
ImportLine(ysrc - 1, pic->y_stride, it->y_left, h, 16);
ImportLine(usrc - 1, pic->uv_stride, it->u_left, uv_h, 8);
ImportLine(vsrc - 1, pic->uv_stride, it->v_left, uv_h, 8);
}
it->y_top_ = tmp_32 + 0;
it->uv_top_ = tmp_32 + 16;
it->y_top = tmp_32 + 0;
it->uv_top = tmp_32 + 16;
if (y == 0) {
memset(tmp_32, 127, 32 * sizeof(*tmp_32));
} else {
@@ -188,13 +191,13 @@ static void ExportBlock(const uint8_t* src, uint8_t* dst, int dst_stride,
}
void VP8IteratorExport(const VP8EncIterator* const it) {
const VP8Encoder* const enc = it->enc_;
if (enc->config_->show_compressed) {
const int x = it->x_, y = it->y_;
const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC;
const uint8_t* const usrc = it->yuv_out_ + U_OFF_ENC;
const uint8_t* const vsrc = it->yuv_out_ + V_OFF_ENC;
const WebPPicture* const pic = enc->pic_;
const VP8Encoder* const enc = it->enc;
if (enc->config->show_compressed) {
const int x = it->x, y = it->y;
const uint8_t* const ysrc = it->yuv_out + Y_OFF_ENC;
const uint8_t* const usrc = it->yuv_out + U_OFF_ENC;
const uint8_t* const vsrc = it->yuv_out + V_OFF_ENC;
const WebPPicture* const pic = enc->pic;
uint8_t* const ydst = pic->y + (y * pic->y_stride + x) * 16;
uint8_t* const udst = pic->u + (y * pic->uv_stride + x) * 8;
uint8_t* const vdst = pic->v + (y * pic->uv_stride + x) * 8;
@@ -234,9 +237,9 @@ void VP8IteratorExport(const VP8EncIterator* const it) {
#define BIT(nz, n) (!!((nz) & (1 << (n))))
void VP8IteratorNzToBytes(VP8EncIterator* const it) {
const int tnz = it->nz_[0], lnz = it->nz_[-1];
int* const top_nz = it->top_nz_;
int* const left_nz = it->left_nz_;
const int tnz = it->nz[0], lnz = it->nz[-1];
int* const top_nz = it->top_nz;
int* const left_nz = it->left_nz;
// Top-Y
top_nz[0] = BIT(tnz, 12);
@@ -268,8 +271,8 @@ void VP8IteratorNzToBytes(VP8EncIterator* const it) {
void VP8IteratorBytesToNz(VP8EncIterator* const it) {
uint32_t nz = 0;
const int* const top_nz = it->top_nz_;
const int* const left_nz = it->left_nz_;
const int* const top_nz = it->top_nz;
const int* const left_nz = it->left_nz;
// top
nz |= (top_nz[0] << 12) | (top_nz[1] << 13);
nz |= (top_nz[2] << 14) | (top_nz[3] << 15);
@@ -281,7 +284,7 @@ void VP8IteratorBytesToNz(VP8EncIterator* const it) {
nz |= (left_nz[2] << 11);
nz |= (left_nz[4] << 17) | (left_nz[6] << 21);
*it->nz_ = nz;
*it->nz = nz;
}
#undef BIT
@@ -290,77 +293,77 @@ void VP8IteratorBytesToNz(VP8EncIterator* const it) {
// Advance to the next position, doing the bookkeeping.
void VP8IteratorSaveBoundary(VP8EncIterator* const it) {
VP8Encoder* const enc = it->enc_;
const int x = it->x_, y = it->y_;
const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC;
const uint8_t* const uvsrc = it->yuv_out_ + U_OFF_ENC;
if (x < enc->mb_w_ - 1) { // left
VP8Encoder* const enc = it->enc;
const int x = it->x, y = it->y;
const uint8_t* const ysrc = it->yuv_out + Y_OFF_ENC;
const uint8_t* const uvsrc = it->yuv_out + U_OFF_ENC;
if (x < enc->mb_w - 1) { // left
int i;
for (i = 0; i < 16; ++i) {
it->y_left_[i] = ysrc[15 + i * BPS];
it->y_left[i] = ysrc[15 + i * BPS];
}
for (i = 0; i < 8; ++i) {
it->u_left_[i] = uvsrc[7 + i * BPS];
it->v_left_[i] = uvsrc[15 + i * BPS];
it->u_left[i] = uvsrc[7 + i * BPS];
it->v_left[i] = uvsrc[15 + i * BPS];
}
// top-left (before 'top'!)
it->y_left_[-1] = it->y_top_[15];
it->u_left_[-1] = it->uv_top_[0 + 7];
it->v_left_[-1] = it->uv_top_[8 + 7];
it->y_left[-1] = it->y_top[15];
it->u_left[-1] = it->uv_top[0 + 7];
it->v_left[-1] = it->uv_top[8 + 7];
}
if (y < enc->mb_h_ - 1) { // top
memcpy(it->y_top_, ysrc + 15 * BPS, 16);
memcpy(it->uv_top_, uvsrc + 7 * BPS, 8 + 8);
if (y < enc->mb_h - 1) { // top
memcpy(it->y_top, ysrc + 15 * BPS, 16);
memcpy(it->uv_top, uvsrc + 7 * BPS, 8 + 8);
}
}
int VP8IteratorNext(VP8EncIterator* const it) {
if (++it->x_ == it->enc_->mb_w_) {
VP8IteratorSetRow(it, ++it->y_);
if (++it->x == it->enc->mb_w) {
VP8IteratorSetRow(it, ++it->y);
} else {
it->preds_ += 4;
it->mb_ += 1;
it->nz_ += 1;
it->y_top_ += 16;
it->uv_top_ += 16;
it->preds += 4;
it->mb += 1;
it->nz += 1;
it->y_top += 16;
it->uv_top += 16;
}
return (0 < --it->count_down_);
return (0 < --it->count_down);
}
//------------------------------------------------------------------------------
// Helper function to set mode properties
void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode) {
uint8_t* preds = it->preds_;
uint8_t* preds = it->preds;
int y;
for (y = 0; y < 4; ++y) {
memset(preds, mode, 4);
preds += it->enc_->preds_w_;
preds += it->enc->preds_w;
}
it->mb_->type_ = 1;
it->mb->type = 1;
}
void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes) {
uint8_t* preds = it->preds_;
uint8_t* preds = it->preds;
int y;
for (y = 4; y > 0; --y) {
memcpy(preds, modes, 4 * sizeof(*modes));
preds += it->enc_->preds_w_;
preds += it->enc->preds_w;
modes += 4;
}
it->mb_->type_ = 0;
it->mb->type = 0;
}
void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode) {
it->mb_->uv_mode_ = mode;
it->mb->uv_mode = mode;
}
void VP8SetSkip(const VP8EncIterator* const it, int skip) {
it->mb_->skip_ = skip;
it->mb->skip = skip;
}
void VP8SetSegment(const VP8EncIterator* const it, int segment) {
it->mb_->segment_ = segment;
it->mb->segment = segment;
}
//------------------------------------------------------------------------------
@@ -403,52 +406,52 @@ static const uint8_t VP8TopLeftI4[16] = {
};
void VP8IteratorStartI4(VP8EncIterator* const it) {
const VP8Encoder* const enc = it->enc_;
const VP8Encoder* const enc = it->enc;
int i;
it->i4_ = 0; // first 4x4 sub-block
it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[0];
it->i4 = 0; // first 4x4 sub-block
it->i4_top = it->i4_boundary + VP8TopLeftI4[0];
// Import the boundary samples
for (i = 0; i < 17; ++i) { // left
it->i4_boundary_[i] = it->y_left_[15 - i];
it->i4_boundary[i] = it->y_left[15 - i];
}
for (i = 0; i < 16; ++i) { // top
it->i4_boundary_[17 + i] = it->y_top_[i];
it->i4_boundary[17 + i] = it->y_top[i];
}
// top-right samples have a special case on the far right of the picture
if (it->x_ < enc->mb_w_ - 1) {
if (it->x < enc->mb_w - 1) {
for (i = 16; i < 16 + 4; ++i) {
it->i4_boundary_[17 + i] = it->y_top_[i];
it->i4_boundary[17 + i] = it->y_top[i];
}
} else { // else, replicate the last valid pixel four times
for (i = 16; i < 16 + 4; ++i) {
it->i4_boundary_[17 + i] = it->i4_boundary_[17 + 15];
it->i4_boundary[17 + i] = it->i4_boundary[17 + 15];
}
}
#if WEBP_AARCH64 && BPS == 32 && defined(WEBP_MSAN)
// Intra4Preds_NEON() reads 3 uninitialized bytes from i4_boundary_ when top
// Intra4Preds_NEON() reads 3 uninitialized bytes from 'i4_boundary' when top
// is positioned at offset 29 (VP8TopLeftI4[3]). The values are not used
// meaningfully, but due to limitations in MemorySanitizer related to
// modeling of tbl instructions, a warning will be issued. This can be
// removed if MSan is updated to support the instructions. See
// https://issues.webmproject.org/372109644.
memset(it->i4_boundary_ + sizeof(it->i4_boundary_) - 3, 0xaa, 3);
memset(it->i4_boundary + sizeof(it->i4_boundary) - 3, 0xaa, 3);
#endif
VP8IteratorNzToBytes(it); // import the non-zero context
}
int VP8IteratorRotateI4(VP8EncIterator* const it,
const uint8_t* const yuv_out) {
const uint8_t* const blk = yuv_out + VP8Scan[it->i4_];
uint8_t* const top = it->i4_top_;
const uint8_t* const blk = yuv_out + VP8Scan[it->i4];
uint8_t* const top = it->i4_top;
int i;
// Update the cache with 7 fresh samples
for (i = 0; i <= 3; ++i) {
top[-4 + i] = blk[i + 3 * BPS]; // store future top samples
}
if ((it->i4_ & 3) != 3) { // if not on the right sub-blocks #3, #7, #11, #15
if ((it->i4 & 3) != 3) { // if not on the right sub-blocks #3, #7, #11, #15
for (i = 0; i <= 2; ++i) { // store future left samples
top[i] = blk[3 + (2 - i) * BPS];
}
@@ -458,12 +461,12 @@ int VP8IteratorRotateI4(VP8EncIterator* const it,
}
}
// move pointers to next sub-block
++it->i4_;
if (it->i4_ == 16) { // we're done
++it->i4;
if (it->i4 == 16) { // we're done
return 0;
}
it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[it->i4_];
it->i4_top = it->i4_boundary + VP8TopLeftI4[it->i4];
return 1;
}

View File

@@ -16,10 +16,13 @@
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "src/dsp/lossless_common.h"
#include "src/utils/utils.h"
#include "src/webp/types.h"
#include "src/enc/vp8li_enc.h"
#include "src/utils/utils.h"
#include "src/webp/encode.h"
#if (WEBP_NEAR_LOSSLESS == 1)

View File

@@ -12,18 +12,21 @@
// Author: Skal (pascal.massimino@gmail.com)
#include <assert.h>
#include <stdlib.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include "sharpyuv/sharpyuv.h"
#include "sharpyuv/sharpyuv_csp.h"
#include "src/enc/vp8i_enc.h"
#include "src/utils/random_utils.h"
#include "src/utils/utils.h"
#include "src/dsp/cpu.h"
#include "src/dsp/dsp.h"
#include "src/dsp/lossless.h"
#include "src/dsp/yuv.h"
#include "src/dsp/cpu.h"
#include "src/enc/vp8i_enc.h"
#include "src/utils/random_utils.h"
#include "src/utils/utils.h"
#include "src/webp/encode.h"
#include "src/webp/types.h"
#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
#include <pthread.h>

View File

@@ -14,9 +14,12 @@
#include <assert.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include "src/enc/vp8i_enc.h"
#include "src/utils/utils.h"
#include "src/webp/encode.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
// WebPPicture
@@ -226,9 +229,7 @@ int WebPMemoryWrite(const uint8_t* data, size_t data_size,
void WebPMemoryWriterClear(WebPMemoryWriter* writer) {
if (writer != NULL) {
WebPSafeFree(writer->mem);
writer->mem = NULL;
writer->size = 0;
writer->max_size = 0;
WebPMemoryWriterInit(writer);
}
}

View File

@@ -18,6 +18,7 @@
#include <math.h>
#include <stdlib.h>
#include "src/webp/types.h"
#include "src/dsp/dsp.h"
#include "src/enc/vp8i_enc.h"
#include "src/utils/utils.h"

View File

@@ -16,6 +16,8 @@
#include <assert.h>
#include <stdlib.h>
#include "src/webp/types.h"
#include "src/dsp/dsp.h"
#include "src/enc/vp8i_enc.h"
#if !defined(WEBP_REDUCE_SIZE)

View File

@@ -12,9 +12,14 @@
// Author: Skal (pascal.massimino@gmail.com)
#include <assert.h>
#include <stddef.h>
#include <string.h>
#include "src/enc/vp8i_enc.h"
#include "src/dsp/dsp.h"
#include "src/dsp/yuv.h"
#include "src/enc/vp8i_enc.h"
#include "src/webp/encode.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
// Helper: clean up fully transparent area to help compressibility.

View File

@@ -831,24 +831,24 @@ int VP8LResidualImage(int width, int height, int min_bits, int max_bits,
// Color transform functions.
static WEBP_INLINE void MultipliersClear(VP8LMultipliers* const m) {
m->green_to_red_ = 0;
m->green_to_blue_ = 0;
m->red_to_blue_ = 0;
m->green_to_red = 0;
m->green_to_blue = 0;
m->red_to_blue = 0;
}
static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
VP8LMultipliers* const m) {
m->green_to_red_ = (color_code >> 0) & 0xff;
m->green_to_blue_ = (color_code >> 8) & 0xff;
m->red_to_blue_ = (color_code >> 16) & 0xff;
m->green_to_red = (color_code >> 0) & 0xff;
m->green_to_blue = (color_code >> 8) & 0xff;
m->red_to_blue = (color_code >> 16) & 0xff;
}
static WEBP_INLINE uint32_t MultipliersToColorCode(
const VP8LMultipliers* const m) {
return 0xff000000u |
((uint32_t)(m->red_to_blue_) << 16) |
((uint32_t)(m->green_to_blue_) << 8) |
m->green_to_red_;
((uint32_t)(m->red_to_blue) << 16) |
((uint32_t)(m->green_to_blue) << 8) |
m->green_to_red;
}
static int64_t PredictionCostCrossColor(const uint32_t accumulated[256],
@@ -871,11 +871,11 @@ static int64_t GetPredictionCostCrossColorRed(
green_to_red, histo);
cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo);
if ((uint8_t)green_to_red == prev_x.green_to_red_) {
if ((uint8_t)green_to_red == prev_x.green_to_red) {
// favor keeping the areas locally similar
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
}
if ((uint8_t)green_to_red == prev_y.green_to_red_) {
if ((uint8_t)green_to_red == prev_y.green_to_red) {
// favor keeping the areas locally similar
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
}
@@ -913,7 +913,7 @@ static void GetBestGreenToRed(const uint32_t* argb, int stride, int tile_width,
}
}
}
best_tx->green_to_red_ = (green_to_red_best & 0xff);
best_tx->green_to_red = (green_to_red_best & 0xff);
}
static int64_t GetPredictionCostCrossColorBlue(
@@ -927,19 +927,19 @@ static int64_t GetPredictionCostCrossColorBlue(
green_to_blue, red_to_blue, histo);
cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo);
if ((uint8_t)green_to_blue == prev_x.green_to_blue_) {
if ((uint8_t)green_to_blue == prev_x.green_to_blue) {
// favor keeping the areas locally similar
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
}
if ((uint8_t)green_to_blue == prev_y.green_to_blue_) {
if ((uint8_t)green_to_blue == prev_y.green_to_blue) {
// favor keeping the areas locally similar
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
}
if ((uint8_t)red_to_blue == prev_x.red_to_blue_) {
if ((uint8_t)red_to_blue == prev_x.red_to_blue) {
// favor keeping the areas locally similar
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
}
if ((uint8_t)red_to_blue == prev_y.red_to_blue_) {
if ((uint8_t)red_to_blue == prev_y.red_to_blue) {
// favor keeping the areas locally similar
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
}
@@ -997,8 +997,8 @@ static void GetBestGreenRedToBlue(const uint32_t* argb, int stride,
break; // out of iter-loop.
}
}
best_tx->green_to_blue_ = green_to_blue_best & 0xff;
best_tx->red_to_blue_ = red_to_blue_best & 0xff;
best_tx->green_to_blue = green_to_blue_best & 0xff;
best_tx->red_to_blue = red_to_blue_best & 0xff;
}
#undef kGreenRedToBlueMaxIters
#undef kGreenRedToBlueNumAxis

View File

@@ -14,10 +14,14 @@
#include <assert.h>
#include <math.h>
#include <stdlib.h> // for abs()
#include <string.h>
#include "src/dec/common_dec.h"
#include "src/dsp/dsp.h"
#include "src/dsp/quant.h"
#include "src/enc/vp8i_enc.h"
#include "src/enc/cost_enc.h"
#include "src/enc/vp8i_enc.h"
#include "src/webp/types.h"
#define DO_TRELLIS_I4 1
#define DO_TRELLIS_I16 1 // not a huge gain, but ok at low bitrate.
@@ -54,11 +58,11 @@
static void PrintBlockInfo(const VP8EncIterator* const it,
const VP8ModeScore* const rd) {
int i, j;
const int is_i16 = (it->mb_->type_ == 1);
const uint8_t* const y_in = it->yuv_in_ + Y_OFF_ENC;
const uint8_t* const y_out = it->yuv_out_ + Y_OFF_ENC;
const uint8_t* const uv_in = it->yuv_in_ + U_OFF_ENC;
const uint8_t* const uv_out = it->yuv_out_ + U_OFF_ENC;
const int is_i16 = (it->mb->type == 1);
const uint8_t* const y_in = it->yuv_in + Y_OFF_ENC;
const uint8_t* const y_out = it->yuv_out + Y_OFF_ENC;
const uint8_t* const uv_in = it->yuv_in + U_OFF_ENC;
const uint8_t* const uv_out = it->yuv_out + U_OFF_ENC;
printf("SOURCE / OUTPUT / ABS DELTA\n");
for (j = 0; j < 16; ++j) {
for (i = 0; i < 16; ++i) printf("%3d ", y_in[i + j * BPS]);
@@ -211,26 +215,26 @@ static int ExpandMatrix(VP8Matrix* const m, int type) {
for (i = 0; i < 2; ++i) {
const int is_ac_coeff = (i > 0);
const int bias = kBiasMatrices[type][is_ac_coeff];
m->iq_[i] = (1 << QFIX) / m->q_[i];
m->bias_[i] = BIAS(bias);
// zthresh_ is the exact value such that QUANTDIV(coeff, iQ, B) is:
m->iq[i] = (1 << QFIX) / m->q[i];
m->bias[i] = BIAS(bias);
// zthresh is the exact value such that QUANTDIV(coeff, iQ, B) is:
// * zero if coeff <= zthresh
// * non-zero if coeff > zthresh
m->zthresh_[i] = ((1 << QFIX) - 1 - m->bias_[i]) / m->iq_[i];
m->zthresh[i] = ((1 << QFIX) - 1 - m->bias[i]) / m->iq[i];
}
for (i = 2; i < 16; ++i) {
m->q_[i] = m->q_[1];
m->iq_[i] = m->iq_[1];
m->bias_[i] = m->bias_[1];
m->zthresh_[i] = m->zthresh_[1];
m->q[i] = m->q[1];
m->iq[i] = m->iq[1];
m->bias[i] = m->bias[1];
m->zthresh[i] = m->zthresh[1];
}
for (sum = 0, i = 0; i < 16; ++i) {
if (type == 0) { // we only use sharpening for AC luma coeffs
m->sharpen_[i] = (kFreqSharpening[i] * m->q_[i]) >> SHARPEN_BITS;
m->sharpen[i] = (kFreqSharpening[i] * m->q[i]) >> SHARPEN_BITS;
} else {
m->sharpen_[i] = 0;
m->sharpen[i] = 0;
}
sum += m->q_[i];
sum += m->q[i];
}
return (sum + 8) >> 4;
}
@@ -240,49 +244,49 @@ static void CheckLambdaValue(int* const v) { if (*v < 1) *v = 1; }
static void SetupMatrices(VP8Encoder* enc) {
int i;
const int tlambda_scale =
(enc->method_ >= 4) ? enc->config_->sns_strength
(enc->method >= 4) ? enc->config->sns_strength
: 0;
const int num_segments = enc->segment_hdr_.num_segments_;
const int num_segments = enc->segment_hdr.num_segments;
for (i = 0; i < num_segments; ++i) {
VP8SegmentInfo* const m = &enc->dqm_[i];
const int q = m->quant_;
VP8SegmentInfo* const m = &enc->dqm[i];
const int q = m->quant;
int q_i4, q_i16, q_uv;
m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)];
m->y1_.q_[1] = kAcTable[clip(q, 0, 127)];
m->y1.q[0] = kDcTable[clip(q + enc->dq_y1_dc, 0, 127)];
m->y1.q[1] = kAcTable[clip(q, 0, 127)];
m->y2_.q_[0] = kDcTable[ clip(q + enc->dq_y2_dc_, 0, 127)] * 2;
m->y2_.q_[1] = kAcTable2[clip(q + enc->dq_y2_ac_, 0, 127)];
m->y2.q[0] = kDcTable[ clip(q + enc->dq_y2_dc, 0, 127)] * 2;
m->y2.q[1] = kAcTable2[clip(q + enc->dq_y2_ac, 0, 127)];
m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)];
m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)];
m->uv.q[0] = kDcTable[clip(q + enc->dq_uv_dc, 0, 117)];
m->uv.q[1] = kAcTable[clip(q + enc->dq_uv_ac, 0, 127)];
q_i4 = ExpandMatrix(&m->y1_, 0);
q_i16 = ExpandMatrix(&m->y2_, 1);
q_uv = ExpandMatrix(&m->uv_, 2);
q_i4 = ExpandMatrix(&m->y1, 0);
q_i16 = ExpandMatrix(&m->y2, 1);
q_uv = ExpandMatrix(&m->uv, 2);
m->lambda_i4_ = (3 * q_i4 * q_i4) >> 7;
m->lambda_i16_ = (3 * q_i16 * q_i16);
m->lambda_uv_ = (3 * q_uv * q_uv) >> 6;
m->lambda_mode_ = (1 * q_i4 * q_i4) >> 7;
m->lambda_trellis_i4_ = (7 * q_i4 * q_i4) >> 3;
m->lambda_trellis_i16_ = (q_i16 * q_i16) >> 2;
m->lambda_trellis_uv_ = (q_uv * q_uv) << 1;
m->tlambda_ = (tlambda_scale * q_i4) >> 5;
m->lambda_i4 = (3 * q_i4 * q_i4) >> 7;
m->lambda_i16 = (3 * q_i16 * q_i16);
m->lambda_uv = (3 * q_uv * q_uv) >> 6;
m->lambda_mode = (1 * q_i4 * q_i4) >> 7;
m->lambda_trellis_i4 = (7 * q_i4 * q_i4) >> 3;
m->lambda_trellis_i16 = (q_i16 * q_i16) >> 2;
m->lambda_trellis_uv = (q_uv * q_uv) << 1;
m->tlambda = (tlambda_scale * q_i4) >> 5;
// none of these constants should be < 1
CheckLambdaValue(&m->lambda_i4_);
CheckLambdaValue(&m->lambda_i16_);
CheckLambdaValue(&m->lambda_uv_);
CheckLambdaValue(&m->lambda_mode_);
CheckLambdaValue(&m->lambda_trellis_i4_);
CheckLambdaValue(&m->lambda_trellis_i16_);
CheckLambdaValue(&m->lambda_trellis_uv_);
CheckLambdaValue(&m->tlambda_);
CheckLambdaValue(&m->lambda_i4);
CheckLambdaValue(&m->lambda_i16);
CheckLambdaValue(&m->lambda_uv);
CheckLambdaValue(&m->lambda_mode);
CheckLambdaValue(&m->lambda_trellis_i4);
CheckLambdaValue(&m->lambda_trellis_i16);
CheckLambdaValue(&m->lambda_trellis_uv);
CheckLambdaValue(&m->tlambda);
m->min_disto_ = 20 * m->y1_.q_[0]; // quantization-aware min disto
m->max_edge_ = 0;
m->min_disto = 20 * m->y1.q[0]; // quantization-aware min disto
m->max_edge = 0;
m->i4_penalty_ = 1000 * q_i4 * q_i4;
m->i4_penalty = 1000 * q_i4 * q_i4;
}
}
@@ -296,21 +300,21 @@ static void SetupMatrices(VP8Encoder* enc) {
static void SetupFilterStrength(VP8Encoder* const enc) {
int i;
// level0 is in [0..500]. Using '-f 50' as filter_strength is mid-filtering.
const int level0 = 5 * enc->config_->filter_strength;
const int level0 = 5 * enc->config->filter_strength;
for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
VP8SegmentInfo* const m = &enc->dqm_[i];
VP8SegmentInfo* const m = &enc->dqm[i];
// We focus on the quantization of AC coeffs.
const int qstep = kAcTable[clip(m->quant_, 0, 127)] >> 2;
const int qstep = kAcTable[clip(m->quant, 0, 127)] >> 2;
const int base_strength =
VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, qstep);
VP8FilterStrengthFromDelta(enc->filter_hdr.sharpness, qstep);
// Segments with lower complexity ('beta') will be less filtered.
const int f = base_strength * level0 / (256 + m->beta_);
m->fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;
const int f = base_strength * level0 / (256 + m->beta);
m->fstrength = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;
}
// We record the initial strength (mainly for the case of 1-segment only).
enc->filter_hdr_.level_ = enc->dqm_[0].fstrength_;
enc->filter_hdr_.simple_ = (enc->config_->filter_type == 0);
enc->filter_hdr_.sharpness_ = enc->config_->filter_sharpness;
enc->filter_hdr.level = enc->dqm[0].fstrength;
enc->filter_hdr.simple = (enc->config->filter_type == 0);
enc->filter_hdr.sharpness = enc->config->filter_sharpness;
}
//------------------------------------------------------------------------------
@@ -356,25 +360,25 @@ static double QualityToJPEGCompression(double c, double alpha) {
static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1,
const VP8SegmentInfo* const S2) {
return (S1->quant_ == S2->quant_) && (S1->fstrength_ == S2->fstrength_);
return (S1->quant == S2->quant) && (S1->fstrength == S2->fstrength);
}
static void SimplifySegments(VP8Encoder* const enc) {
int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 };
// 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an
// 'num_segments' is previously validated and <= NUM_MB_SEGMENTS, but an
// explicit check is needed to avoid a spurious warning about 'i' exceeding
// array bounds of 'dqm_' with some compilers (noticed with gcc-4.9).
const int num_segments = (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS)
? enc->segment_hdr_.num_segments_
// array bounds of 'dqm' with some compilers (noticed with gcc-4.9).
const int num_segments = (enc->segment_hdr.num_segments < NUM_MB_SEGMENTS)
? enc->segment_hdr.num_segments
: NUM_MB_SEGMENTS;
int num_final_segments = 1;
int s1, s2;
for (s1 = 1; s1 < num_segments; ++s1) { // find similar segments
const VP8SegmentInfo* const S1 = &enc->dqm_[s1];
const VP8SegmentInfo* const S1 = &enc->dqm[s1];
int found = 0;
// check if we already have similar segment
for (s2 = 0; s2 < num_final_segments; ++s2) {
const VP8SegmentInfo* const S2 = &enc->dqm_[s2];
const VP8SegmentInfo* const S2 = &enc->dqm[s2];
if (SegmentsAreEquivalent(S1, S2)) {
found = 1;
break;
@@ -383,18 +387,18 @@ static void SimplifySegments(VP8Encoder* const enc) {
map[s1] = s2;
if (!found) {
if (num_final_segments != s1) {
enc->dqm_[num_final_segments] = enc->dqm_[s1];
enc->dqm[num_final_segments] = enc->dqm[s1];
}
++num_final_segments;
}
}
if (num_final_segments < num_segments) { // Remap
int i = enc->mb_w_ * enc->mb_h_;
while (i-- > 0) enc->mb_info_[i].segment_ = map[enc->mb_info_[i].segment_];
enc->segment_hdr_.num_segments_ = num_final_segments;
int i = enc->mb_w * enc->mb_h;
while (i-- > 0) enc->mb_info[i].segment = map[enc->mb_info[i].segment];
enc->segment_hdr.num_segments = num_final_segments;
// Replicate the trailing segment infos (it's mostly cosmetics)
for (i = num_final_segments; i < num_segments; ++i) {
enc->dqm_[i] = enc->dqm_[num_final_segments - 1];
enc->dqm[i] = enc->dqm[num_final_segments - 1];
}
}
}
@@ -402,50 +406,50 @@ static void SimplifySegments(VP8Encoder* const enc) {
void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
int i;
int dq_uv_ac, dq_uv_dc;
const int num_segments = enc->segment_hdr_.num_segments_;
const double amp = SNS_TO_DQ * enc->config_->sns_strength / 100. / 128.;
const int num_segments = enc->segment_hdr.num_segments;
const double amp = SNS_TO_DQ * enc->config->sns_strength / 100. / 128.;
const double Q = quality / 100.;
const double c_base = enc->config_->emulate_jpeg_size ?
QualityToJPEGCompression(Q, enc->alpha_ / 255.) :
const double c_base = enc->config->emulate_jpeg_size ?
QualityToJPEGCompression(Q, enc->alpha / 255.) :
QualityToCompression(Q);
for (i = 0; i < num_segments; ++i) {
// We modulate the base coefficient to accommodate for the quantization
// susceptibility and allow denser segments to be quantized more.
const double expn = 1. - amp * enc->dqm_[i].alpha_;
const double expn = 1. - amp * enc->dqm[i].alpha;
const double c = pow(c_base, expn);
const int q = (int)(127. * (1. - c));
assert(expn > 0.);
enc->dqm_[i].quant_ = clip(q, 0, 127);
enc->dqm[i].quant = clip(q, 0, 127);
}
// purely indicative in the bitstream (except for the 1-segment case)
enc->base_quant_ = enc->dqm_[0].quant_;
enc->base_quant = enc->dqm[0].quant;
// fill-in values for the unused segments (required by the syntax)
for (i = num_segments; i < NUM_MB_SEGMENTS; ++i) {
enc->dqm_[i].quant_ = enc->base_quant_;
enc->dqm[i].quant = enc->base_quant;
}
// uv_alpha_ is normally spread around ~60. The useful range is
// uv_alpha is normally spread around ~60. The useful range is
// typically ~30 (quite bad) to ~100 (ok to decimate UV more).
// We map it to the safe maximal range of MAX/MIN_DQ_UV for dq_uv.
dq_uv_ac = (enc->uv_alpha_ - MID_ALPHA) * (MAX_DQ_UV - MIN_DQ_UV)
/ (MAX_ALPHA - MIN_ALPHA);
dq_uv_ac = (enc->uv_alpha - MID_ALPHA) * (MAX_DQ_UV - MIN_DQ_UV)
/ (MAX_ALPHA - MIN_ALPHA);
// we rescale by the user-defined strength of adaptation
dq_uv_ac = dq_uv_ac * enc->config_->sns_strength / 100;
dq_uv_ac = dq_uv_ac * enc->config->sns_strength / 100;
// and make it safe.
dq_uv_ac = clip(dq_uv_ac, MIN_DQ_UV, MAX_DQ_UV);
// We also boost the dc-uv-quant a little, based on sns-strength, since
// U/V channels are quite more reactive to high quants (flat DC-blocks
// tend to appear, and are unpleasant).
dq_uv_dc = -4 * enc->config_->sns_strength / 100;
dq_uv_dc = -4 * enc->config->sns_strength / 100;
dq_uv_dc = clip(dq_uv_dc, -15, 15); // 4bit-signed max allowed
enc->dq_y1_dc_ = 0; // TODO(skal): dq-lum
enc->dq_y2_dc_ = 0;
enc->dq_y2_ac_ = 0;
enc->dq_uv_dc_ = dq_uv_dc;
enc->dq_uv_ac_ = dq_uv_ac;
enc->dq_y1_dc = 0; // TODO(skal): dq-lum
enc->dq_y2_dc = 0;
enc->dq_y2_ac = 0;
enc->dq_uv_dc = dq_uv_dc;
enc->dq_uv_ac = dq_uv_ac;
SetupFilterStrength(enc); // initialize segments' filtering, eventually
@@ -467,21 +471,21 @@ static const uint16_t VP8I4ModeOffsets[NUM_BMODES] = {
};
void VP8MakeLuma16Preds(const VP8EncIterator* const it) {
const uint8_t* const left = it->x_ ? it->y_left_ : NULL;
const uint8_t* const top = it->y_ ? it->y_top_ : NULL;
VP8EncPredLuma16(it->yuv_p_, left, top);
const uint8_t* const left = it->x ? it->y_left : NULL;
const uint8_t* const top = it->y ? it->y_top : NULL;
VP8EncPredLuma16(it->yuv_p, left, top);
}
void VP8MakeChroma8Preds(const VP8EncIterator* const it) {
const uint8_t* const left = it->x_ ? it->u_left_ : NULL;
const uint8_t* const top = it->y_ ? it->uv_top_ : NULL;
VP8EncPredChroma8(it->yuv_p_, left, top);
const uint8_t* const left = it->x ? it->u_left : NULL;
const uint8_t* const top = it->y ? it->uv_top : NULL;
VP8EncPredChroma8(it->yuv_p, left, top);
}
// Form all the ten Intra4x4 predictions in the yuv_p_ cache
// for the 4x4 block it->i4_
// Form all the ten Intra4x4 predictions in the 'yuv_p' cache
// for the 4x4 block it->i4
static void MakeIntra4Preds(const VP8EncIterator* const it) {
VP8EncPredLuma4(it->yuv_p_, it->i4_top_);
VP8EncPredLuma4(it->yuv_p, it->i4_top);
}
//------------------------------------------------------------------------------
@@ -600,9 +604,9 @@ static int TrellisQuantizeBlock(const VP8Encoder* WEBP_RESTRICT const enc,
int ctx0, int coeff_type,
const VP8Matrix* WEBP_RESTRICT const mtx,
int lambda) {
const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
const ProbaArray* const probas = enc->proba.coeffs[coeff_type];
CostArrayPtr const costs =
(CostArrayPtr)enc->proba_.remapped_costs_[coeff_type];
(CostArrayPtr)enc->proba.remapped_costs[coeff_type];
const int first = (coeff_type == TYPE_I16_AC) ? 1 : 0;
Node nodes[16][NUM_NODES];
ScoreState score_states[2][NUM_NODES];
@@ -614,7 +618,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* WEBP_RESTRICT const enc,
{
score_t cost;
const int thresh = mtx->q_[1] * mtx->q_[1] / 4;
const int thresh = mtx->q[1] * mtx->q[1] / 4;
const int last_proba = probas[VP8EncBands[first]][ctx0][0];
// compute the position of the last interesting coefficient
@@ -646,13 +650,13 @@ static int TrellisQuantizeBlock(const VP8Encoder* WEBP_RESTRICT const enc,
// traverse trellis.
for (n = first; n <= last; ++n) {
const int j = kZigzag[n];
const uint32_t Q = mtx->q_[j];
const uint32_t iQ = mtx->iq_[j];
const uint32_t Q = mtx->q[j];
const uint32_t iQ = mtx->iq[j];
const uint32_t B = BIAS(0x00); // neutral bias
// note: it's important to take sign of the _original_ coeff,
// so we don't have to consider level < 0 afterward.
const int sign = (in[j] < 0);
const uint32_t coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
const uint32_t coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen[j];
int level0 = QUANTDIV(coeff0, iQ, B);
int thresh_level = QUANTDIV(coeff0, iQ, BIAS(0x80));
if (thresh_level > MAX_LEVEL) thresh_level = MAX_LEVEL;
@@ -760,7 +764,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* WEBP_RESTRICT const enc,
const int j = kZigzag[n];
out[n] = node->sign ? -node->level : node->level;
nz |= node->level;
in[j] = out[n] * mtx->q_[j];
in[j] = out[n] * mtx->q[j];
best_node = node->prev;
}
return (nz != 0);
@@ -778,10 +782,10 @@ static int ReconstructIntra16(VP8EncIterator* WEBP_RESTRICT const it,
VP8ModeScore* WEBP_RESTRICT const rd,
uint8_t* WEBP_RESTRICT const yuv_out,
int mode) {
const VP8Encoder* const enc = it->enc_;
const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
const VP8Encoder* const enc = it->enc;
const uint8_t* const ref = it->yuv_p + VP8I16ModeOffsets[mode];
const uint8_t* const src = it->yuv_in + Y_OFF_ENC;
const VP8SegmentInfo* const dqm = &enc->dqm[it->mb->segment];
int nz = 0;
int n;
int16_t tmp[16][16], dc_tmp[16];
@@ -790,18 +794,18 @@ static int ReconstructIntra16(VP8EncIterator* WEBP_RESTRICT const it,
VP8FTransform2(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
}
VP8FTransformWHT(tmp[0], dc_tmp);
nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24;
nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2) << 24;
if (DO_TRELLIS_I16 && it->do_trellis_) {
if (DO_TRELLIS_I16 && it->do_trellis) {
int x, y;
VP8IteratorNzToBytes(it);
for (y = 0, n = 0; y < 4; ++y) {
for (x = 0; x < 4; ++x, ++n) {
const int ctx = it->top_nz_[x] + it->left_nz_[y];
const int ctx = it->top_nz[x] + it->left_nz[y];
const int non_zero = TrellisQuantizeBlock(
enc, tmp[n], rd->y_ac_levels[n], ctx, TYPE_I16_AC, &dqm->y1_,
dqm->lambda_trellis_i16_);
it->top_nz_[x] = it->left_nz_[y] = non_zero;
enc, tmp[n], rd->y_ac_levels[n], ctx, TYPE_I16_AC, &dqm->y1,
dqm->lambda_trellis_i16);
it->top_nz[x] = it->left_nz[y] = non_zero;
rd->y_ac_levels[n][0] = 0;
nz |= non_zero << n;
}
@@ -811,7 +815,7 @@ static int ReconstructIntra16(VP8EncIterator* WEBP_RESTRICT const it,
// Zero-out the first coeff, so that: a) nz is correct below, and
// b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified.
tmp[n][0] = tmp[n + 1][0] = 0;
nz |= VP8EncQuantize2Blocks(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;
nz |= VP8EncQuantize2Blocks(tmp[n], rd->y_ac_levels[n], &dqm->y1) << n;
assert(rd->y_ac_levels[n + 0][0] == 0);
assert(rd->y_ac_levels[n + 1][0] == 0);
}
@@ -831,20 +835,20 @@ static int ReconstructIntra4(VP8EncIterator* WEBP_RESTRICT const it,
const uint8_t* WEBP_RESTRICT const src,
uint8_t* WEBP_RESTRICT const yuv_out,
int mode) {
const VP8Encoder* const enc = it->enc_;
const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
const VP8Encoder* const enc = it->enc;
const uint8_t* const ref = it->yuv_p + VP8I4ModeOffsets[mode];
const VP8SegmentInfo* const dqm = &enc->dqm[it->mb->segment];
int nz = 0;
int16_t tmp[16];
VP8FTransform(src, ref, tmp);
if (DO_TRELLIS_I4 && it->do_trellis_) {
const int x = it->i4_ & 3, y = it->i4_ >> 2;
const int ctx = it->top_nz_[x] + it->left_nz_[y];
nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, TYPE_I4_AC, &dqm->y1_,
dqm->lambda_trellis_i4_);
if (DO_TRELLIS_I4 && it->do_trellis) {
const int x = it->i4 & 3, y = it->i4 >> 2;
const int ctx = it->top_nz[x] + it->left_nz[y];
nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, TYPE_I4_AC, &dqm->y1,
dqm->lambda_trellis_i4);
} else {
nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_);
nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1);
}
VP8ITransform(ref, tmp, yuv_out, 0);
return nz;
@@ -867,8 +871,8 @@ static int QuantizeSingle(int16_t* WEBP_RESTRICT const v,
int V = *v;
const int sign = (V < 0);
if (sign) V = -V;
if (V > (int)mtx->zthresh_[0]) {
const int qV = QUANTDIV(V, mtx->iq_[0], mtx->bias_[0]) * mtx->q_[0];
if (V > (int)mtx->zthresh[0]) {
const int qV = QUANTDIV(V, mtx->iq[0], mtx->bias[0]) * mtx->q[0];
const int err = (V - qV);
*v = sign ? -qV : qV;
return (sign ? -err : err) >> DSCALE;
@@ -890,8 +894,8 @@ static void CorrectDCValues(const VP8EncIterator* WEBP_RESTRICT const it,
// as top[]/left[] on the next block.
int ch;
for (ch = 0; ch <= 1; ++ch) {
const int8_t* const top = it->top_derr_[it->x_][ch];
const int8_t* const left = it->left_derr_[ch];
const int8_t* const top = it->top_derr[it->x][ch];
const int8_t* const left = it->left_derr[ch];
int16_t (* const c)[16] = &tmp[ch * 4];
int err0, err1, err2, err3;
c[0][0] += (C1 * top[0] + C2 * left[0]) >> (DSHIFT - DSCALE);
@@ -902,7 +906,7 @@ static void CorrectDCValues(const VP8EncIterator* WEBP_RESTRICT const it,
err2 = QuantizeSingle(&c[2][0], mtx);
c[3][0] += (C1 * err1 + C2 * err2) >> (DSHIFT - DSCALE);
err3 = QuantizeSingle(&c[3][0], mtx);
// error 'err' is bounded by mtx->q_[0] which is 132 at max. Hence
// error 'err' is bounded by mtx->q[0] which is 132 at max. Hence
// err >> DSCALE will fit in an int8_t type if DSCALE>=1.
assert(abs(err1) <= 127 && abs(err2) <= 127 && abs(err3) <= 127);
rd->derr[ch][0] = (int8_t)err1;
@@ -915,8 +919,8 @@ static void StoreDiffusionErrors(VP8EncIterator* WEBP_RESTRICT const it,
const VP8ModeScore* WEBP_RESTRICT const rd) {
int ch;
for (ch = 0; ch <= 1; ++ch) {
int8_t* const top = it->top_derr_[it->x_][ch];
int8_t* const left = it->left_derr_[ch];
int8_t* const top = it->top_derr[it->x][ch];
int8_t* const left = it->left_derr[ch];
left[0] = rd->derr[ch][0]; // restore err1
left[1] = 3 * rd->derr[ch][2] >> 2; // ... 3/4th of err3
top[0] = rd->derr[ch][1]; // ... err2
@@ -934,10 +938,10 @@ static void StoreDiffusionErrors(VP8EncIterator* WEBP_RESTRICT const it,
static int ReconstructUV(VP8EncIterator* WEBP_RESTRICT const it,
VP8ModeScore* WEBP_RESTRICT const rd,
uint8_t* WEBP_RESTRICT const yuv_out, int mode) {
const VP8Encoder* const enc = it->enc_;
const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
const VP8Encoder* const enc = it->enc;
const uint8_t* const ref = it->yuv_p + VP8UVModeOffsets[mode];
const uint8_t* const src = it->yuv_in + U_OFF_ENC;
const VP8SegmentInfo* const dqm = &enc->dqm[it->mb->segment];
int nz = 0;
int n;
int16_t tmp[8][16];
@@ -945,25 +949,25 @@ static int ReconstructUV(VP8EncIterator* WEBP_RESTRICT const it,
for (n = 0; n < 8; n += 2) {
VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);
}
if (it->top_derr_ != NULL) CorrectDCValues(it, &dqm->uv_, tmp, rd);
if (it->top_derr != NULL) CorrectDCValues(it, &dqm->uv, tmp, rd);
if (DO_TRELLIS_UV && it->do_trellis_) {
if (DO_TRELLIS_UV && it->do_trellis) {
int ch, x, y;
for (ch = 0, n = 0; ch <= 2; ch += 2) {
for (y = 0; y < 2; ++y) {
for (x = 0; x < 2; ++x, ++n) {
const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
const int ctx = it->top_nz[4 + ch + x] + it->left_nz[4 + ch + y];
const int non_zero = TrellisQuantizeBlock(
enc, tmp[n], rd->uv_levels[n], ctx, TYPE_CHROMA_A, &dqm->uv_,
dqm->lambda_trellis_uv_);
it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;
enc, tmp[n], rd->uv_levels[n], ctx, TYPE_CHROMA_A, &dqm->uv,
dqm->lambda_trellis_uv);
it->top_nz[4 + ch + x] = it->left_nz[4 + ch + y] = non_zero;
nz |= non_zero << n;
}
}
}
} else {
for (n = 0; n < 8; n += 2) {
nz |= VP8EncQuantize2Blocks(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;
nz |= VP8EncQuantize2Blocks(tmp[n], rd->uv_levels[n], &dqm->uv) << n;
}
}
@@ -985,7 +989,7 @@ static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) {
const int v2 = abs(DCs[4]);
int max_v = (v1 > v0) ? v1 : v0;
max_v = (v2 > max_v) ? v2 : max_v;
if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v;
if (max_v > dqm->max_edge) dqm->max_edge = max_v;
}
static void SwapModeScore(VP8ModeScore** a, VP8ModeScore** b) {
@@ -1001,25 +1005,25 @@ static void SwapPtr(uint8_t** a, uint8_t** b) {
}
static void SwapOut(VP8EncIterator* const it) {
SwapPtr(&it->yuv_out_, &it->yuv_out2_);
SwapPtr(&it->yuv_out, &it->yuv_out2);
}
static void PickBestIntra16(VP8EncIterator* WEBP_RESTRICT const it,
VP8ModeScore* WEBP_RESTRICT rd) {
const int kNumBlocks = 16;
VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
const int lambda = dqm->lambda_i16_;
const int tlambda = dqm->tlambda_;
const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
VP8SegmentInfo* const dqm = &it->enc->dqm[it->mb->segment];
const int lambda = dqm->lambda_i16;
const int tlambda = dqm->tlambda;
const uint8_t* const src = it->yuv_in + Y_OFF_ENC;
VP8ModeScore rd_tmp;
VP8ModeScore* rd_cur = &rd_tmp;
VP8ModeScore* rd_best = rd;
int mode;
int is_flat = IsFlatSource16(it->yuv_in_ + Y_OFF_ENC);
int is_flat = IsFlatSource16(it->yuv_in + Y_OFF_ENC);
rd->mode_i16 = -1;
for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC; // scratch buffer
uint8_t* const tmp_dst = it->yuv_out2 + Y_OFF_ENC; // scratch buffer
rd_cur->mode_i16 = mode;
// Reconstruct
@@ -1051,13 +1055,13 @@ static void PickBestIntra16(VP8EncIterator* WEBP_RESTRICT const it,
if (rd_best != rd) {
memcpy(rd, rd_best, sizeof(*rd));
}
SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision.
SetRDScore(dqm->lambda_mode, rd); // finalize score for mode decision.
VP8SetIntra16Mode(it, rd->mode_i16);
// we have a blocky macroblock (only DCs are non-zero) with fairly high
// distortion, record max delta so we can later adjust the minimal filtering
// strength needed to smooth these blocks out.
if ((rd->nz & 0x100ffff) == 0x1000000 && rd->D > dqm->min_disto_) {
if ((rd->nz & 0x100ffff) == 0x1000000 && rd->D > dqm->min_disto) {
StoreMaxDelta(dqm, rd->y_dc_levels);
}
}
@@ -1067,41 +1071,41 @@ static void PickBestIntra16(VP8EncIterator* WEBP_RESTRICT const it,
// return the cost array corresponding to the surrounding prediction modes.
static const uint16_t* GetCostModeI4(VP8EncIterator* WEBP_RESTRICT const it,
const uint8_t modes[16]) {
const int preds_w = it->enc_->preds_w_;
const int x = (it->i4_ & 3), y = it->i4_ >> 2;
const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1];
const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4];
const int preds_w = it->enc->preds_w;
const int x = (it->i4 & 3), y = it->i4 >> 2;
const int left = (x == 0) ? it->preds[y * preds_w - 1] : modes[it->i4 - 1];
const int top = (y == 0) ? it->preds[-preds_w + x] : modes[it->i4 - 4];
return VP8FixedCostsI4[top][left];
}
static int PickBestIntra4(VP8EncIterator* WEBP_RESTRICT const it,
VP8ModeScore* WEBP_RESTRICT const rd) {
const VP8Encoder* const enc = it->enc_;
const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
const int lambda = dqm->lambda_i4_;
const int tlambda = dqm->tlambda_;
const uint8_t* const src0 = it->yuv_in_ + Y_OFF_ENC;
uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF_ENC;
const VP8Encoder* const enc = it->enc;
const VP8SegmentInfo* const dqm = &enc->dqm[it->mb->segment];
const int lambda = dqm->lambda_i4;
const int tlambda = dqm->tlambda;
const uint8_t* const src0 = it->yuv_in + Y_OFF_ENC;
uint8_t* const best_blocks = it->yuv_out2 + Y_OFF_ENC;
int total_header_bits = 0;
VP8ModeScore rd_best;
if (enc->max_i4_header_bits_ == 0) {
if (enc->max_i4_header_bits == 0) {
return 0;
}
InitScore(&rd_best);
rd_best.H = 211; // '211' is the value of VP8BitCost(0, 145)
SetRDScore(dqm->lambda_mode_, &rd_best);
SetRDScore(dqm->lambda_mode, &rd_best);
VP8IteratorStartI4(it);
do {
const int kNumBlocks = 1;
VP8ModeScore rd_i4;
int mode;
int best_mode = -1;
const uint8_t* const src = src0 + VP8Scan[it->i4_];
const uint8_t* const src = src0 + VP8Scan[it->i4];
const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);
uint8_t* best_block = best_blocks + VP8Scan[it->i4_];
uint8_t* tmp_dst = it->yuv_p_ + I4TMP; // scratch buffer.
uint8_t* best_block = best_blocks + VP8Scan[it->i4];
uint8_t* tmp_dst = it->yuv_p + I4TMP; // scratch buffer.
InitScore(&rd_i4);
MakeIntra4Preds(it);
@@ -1111,7 +1115,7 @@ static int PickBestIntra4(VP8EncIterator* WEBP_RESTRICT const it,
// Reconstruct
rd_tmp.nz =
ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_;
ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4;
// Compute RD-score
rd_tmp.D = VP8SSE4x4(src, tmp_dst);
@@ -1140,25 +1144,25 @@ static int PickBestIntra4(VP8EncIterator* WEBP_RESTRICT const it,
CopyScore(&rd_i4, &rd_tmp);
best_mode = mode;
SwapPtr(&tmp_dst, &best_block);
memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels,
sizeof(rd_best.y_ac_levels[it->i4_]));
memcpy(rd_best.y_ac_levels[it->i4], tmp_levels,
sizeof(rd_best.y_ac_levels[it->i4]));
}
}
SetRDScore(dqm->lambda_mode_, &rd_i4);
SetRDScore(dqm->lambda_mode, &rd_i4);
AddScore(&rd_best, &rd_i4);
if (rd_best.score >= rd->score) {
return 0;
}
total_header_bits += (int)rd_i4.H; // <- equal to mode_costs[best_mode];
if (total_header_bits > enc->max_i4_header_bits_) {
if (total_header_bits > enc->max_i4_header_bits) {
return 0;
}
// Copy selected samples if not in the right place already.
if (best_block != best_blocks + VP8Scan[it->i4_]) {
VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4_]);
if (best_block != best_blocks + VP8Scan[it->i4]) {
VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4]);
}
rd->modes_i4[it->i4_] = best_mode;
it->top_nz_[it->i4_ & 3] = it->left_nz_[it->i4_ >> 2] = (rd_i4.nz ? 1 : 0);
rd->modes_i4[it->i4] = best_mode;
it->top_nz[it->i4 & 3] = it->left_nz[it->i4 >> 2] = (rd_i4.nz ? 1 : 0);
} while (VP8IteratorRotateI4(it, best_blocks));
// finalize state
@@ -1174,11 +1178,11 @@ static int PickBestIntra4(VP8EncIterator* WEBP_RESTRICT const it,
static void PickBestUV(VP8EncIterator* WEBP_RESTRICT const it,
VP8ModeScore* WEBP_RESTRICT const rd) {
const int kNumBlocks = 8;
const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
const int lambda = dqm->lambda_uv_;
const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
uint8_t* tmp_dst = it->yuv_out2_ + U_OFF_ENC; // scratch buffer
uint8_t* dst0 = it->yuv_out_ + U_OFF_ENC;
const VP8SegmentInfo* const dqm = &it->enc->dqm[it->mb->segment];
const int lambda = dqm->lambda_uv;
const uint8_t* const src = it->yuv_in + U_OFF_ENC;
uint8_t* tmp_dst = it->yuv_out2 + U_OFF_ENC; // scratch buffer
uint8_t* dst0 = it->yuv_out + U_OFF_ENC;
uint8_t* dst = dst0;
VP8ModeScore rd_best;
int mode;
@@ -1205,7 +1209,7 @@ static void PickBestUV(VP8EncIterator* WEBP_RESTRICT const it,
CopyScore(&rd_best, &rd_uv);
rd->mode_uv = mode;
memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));
if (it->top_derr_ != NULL) {
if (it->top_derr != NULL) {
memcpy(rd->derr, rd_uv.derr, sizeof(rd_uv.derr));
}
SwapPtr(&dst, &tmp_dst);
@@ -1216,7 +1220,7 @@ static void PickBestUV(VP8EncIterator* WEBP_RESTRICT const it,
if (dst != dst0) { // copy 16x8 block if needed
VP8Copy16x8(dst, dst0);
}
if (it->top_derr_ != NULL) { // store diffusion errors for next block
if (it->top_derr != NULL) { // store diffusion errors for next block
StoreDiffusionErrors(it, rd);
}
}
@@ -1226,26 +1230,26 @@ static void PickBestUV(VP8EncIterator* WEBP_RESTRICT const it,
static void SimpleQuantize(VP8EncIterator* WEBP_RESTRICT const it,
VP8ModeScore* WEBP_RESTRICT const rd) {
const VP8Encoder* const enc = it->enc_;
const int is_i16 = (it->mb_->type_ == 1);
const VP8Encoder* const enc = it->enc;
const int is_i16 = (it->mb->type == 1);
int nz = 0;
if (is_i16) {
nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]);
nz = ReconstructIntra16(it, rd, it->yuv_out + Y_OFF_ENC, it->preds[0]);
} else {
VP8IteratorStartI4(it);
do {
const int mode =
it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_];
const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
uint8_t* const dst = it->yuv_out_ + Y_OFF_ENC + VP8Scan[it->i4_];
it->preds[(it->i4 & 3) + (it->i4 >> 2) * enc->preds_w];
const uint8_t* const src = it->yuv_in + Y_OFF_ENC + VP8Scan[it->i4];
uint8_t* const dst = it->yuv_out + Y_OFF_ENC + VP8Scan[it->i4];
MakeIntra4Preds(it);
nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],
src, dst, mode) << it->i4_;
} while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF_ENC));
nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4],
src, dst, mode) << it->i4;
} while (VP8IteratorRotateI4(it, it->yuv_out + Y_OFF_ENC));
}
nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_);
nz |= ReconstructUV(it, rd, it->yuv_out + U_OFF_ENC, it->mb->uv_mode);
rd->nz = nz;
}
@@ -1256,23 +1260,23 @@ static void RefineUsingDistortion(VP8EncIterator* WEBP_RESTRICT const it,
score_t best_score = MAX_COST;
int nz = 0;
int mode;
int is_i16 = try_both_modes || (it->mb_->type_ == 1);
int is_i16 = try_both_modes || (it->mb->type == 1);
const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
const VP8SegmentInfo* const dqm = &it->enc->dqm[it->mb->segment];
// Some empiric constants, of approximate order of magnitude.
const int lambda_d_i16 = 106;
const int lambda_d_i4 = 11;
const int lambda_d_uv = 120;
score_t score_i4 = dqm->i4_penalty_;
score_t score_i4 = dqm->i4_penalty;
score_t i4_bit_sum = 0;
const score_t bit_limit = try_both_modes ? it->enc_->mb_header_limit_
const score_t bit_limit = try_both_modes ? it->enc->mb_header_limit
: MAX_COST; // no early-out allowed
if (is_i16) { // First, evaluate Intra16 distortion
int best_mode = -1;
const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
const uint8_t* const src = it->yuv_in + Y_OFF_ENC;
for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
const uint8_t* const ref = it->yuv_p + VP8I16ModeOffsets[mode];
const score_t score = (score_t)VP8SSE16x16(src, ref) * RD_DISTO_MULT
+ VP8FixedCostsI16[mode] * lambda_d_i16;
if (mode > 0 && VP8FixedCostsI16[mode] > bit_limit) {
@@ -1284,10 +1288,10 @@ static void RefineUsingDistortion(VP8EncIterator* WEBP_RESTRICT const it,
best_score = score;
}
}
if (it->x_ == 0 || it->y_ == 0) {
if (it->x == 0 || it->y == 0) {
// avoid starting a checkerboard resonance from the border. See bug #432.
if (IsFlatSource16(src)) {
best_mode = (it->x_ == 0) ? 0 : 2;
best_mode = (it->x == 0) ? 0 : 2;
try_both_modes = 0; // stick to i16
}
}
@@ -1304,12 +1308,12 @@ static void RefineUsingDistortion(VP8EncIterator* WEBP_RESTRICT const it,
do {
int best_i4_mode = -1;
score_t best_i4_score = MAX_COST;
const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
const uint8_t* const src = it->yuv_in + Y_OFF_ENC + VP8Scan[it->i4];
const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);
MakeIntra4Preds(it);
for (mode = 0; mode < NUM_BMODES; ++mode) {
const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
const uint8_t* const ref = it->yuv_p + VP8I4ModeOffsets[mode];
const score_t score = VP8SSE4x4(src, ref) * RD_DISTO_MULT
+ mode_costs[mode] * lambda_d_i4;
if (score < best_i4_score) {
@@ -1318,18 +1322,18 @@ static void RefineUsingDistortion(VP8EncIterator* WEBP_RESTRICT const it,
}
}
i4_bit_sum += mode_costs[best_i4_mode];
rd->modes_i4[it->i4_] = best_i4_mode;
rd->modes_i4[it->i4] = best_i4_mode;
score_i4 += best_i4_score;
if (score_i4 >= best_score || i4_bit_sum > bit_limit) {
// Intra4 won't be better than Intra16. Bail out and pick Intra16.
is_i16 = 1;
break;
} else { // reconstruct partial block inside yuv_out2_ buffer
uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC + VP8Scan[it->i4_];
nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],
src, tmp_dst, best_i4_mode) << it->i4_;
} else { // reconstruct partial block inside yuv_out2 buffer
uint8_t* const tmp_dst = it->yuv_out2 + Y_OFF_ENC + VP8Scan[it->i4];
nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4],
src, tmp_dst, best_i4_mode) << it->i4;
}
} while (VP8IteratorRotateI4(it, it->yuv_out2_ + Y_OFF_ENC));
} while (VP8IteratorRotateI4(it, it->yuv_out2 + Y_OFF_ENC));
}
// Final reconstruction, depending on which mode is selected.
@@ -1338,16 +1342,16 @@ static void RefineUsingDistortion(VP8EncIterator* WEBP_RESTRICT const it,
SwapOut(it);
best_score = score_i4;
} else {
nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]);
nz = ReconstructIntra16(it, rd, it->yuv_out + Y_OFF_ENC, it->preds[0]);
}
// ... and UV!
if (refine_uv_mode) {
int best_mode = -1;
score_t best_uv_score = MAX_COST;
const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
const uint8_t* const src = it->yuv_in + U_OFF_ENC;
for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
const uint8_t* const ref = it->yuv_p + VP8UVModeOffsets[mode];
const score_t score = VP8SSE16x8(src, ref) * RD_DISTO_MULT
+ VP8FixedCostsUV[mode] * lambda_d_uv;
if (score < best_uv_score) {
@@ -1357,7 +1361,7 @@ static void RefineUsingDistortion(VP8EncIterator* WEBP_RESTRICT const it,
}
VP8SetIntraUVMode(it, best_mode);
}
nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_);
nz |= ReconstructUV(it, rd, it->yuv_out + U_OFF_ENC, it->mb->uv_mode);
rd->nz = nz;
rd->score = best_score;
@@ -1370,7 +1374,7 @@ int VP8Decimate(VP8EncIterator* WEBP_RESTRICT const it,
VP8ModeScore* WEBP_RESTRICT const rd,
VP8RDLevel rd_opt) {
int is_skipped;
const int method = it->enc_->method_;
const int method = it->enc->method;
InitScore(rd);
@@ -1380,14 +1384,14 @@ int VP8Decimate(VP8EncIterator* WEBP_RESTRICT const it,
VP8MakeChroma8Preds(it);
if (rd_opt > RD_OPT_NONE) {
it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL);
it->do_trellis = (rd_opt >= RD_OPT_TRELLIS_ALL);
PickBestIntra16(it, rd);
if (method >= 2) {
PickBestIntra4(it, rd);
}
PickBestUV(it, rd);
if (rd_opt == RD_OPT_TRELLIS) { // finish off with trellis-optim now
it->do_trellis_ = 1;
it->do_trellis = 1;
SimpleQuantize(it, rd);
}
} else {

View File

@@ -12,18 +12,23 @@
// Author: Skal (pascal.massimino@gmail.com)
#include <assert.h>
#include <stddef.h>
#include "src/dec/common_dec.h"
#include "src/webp/types.h"
#include "src/enc/vp8i_enc.h"
#include "src/utils/bit_writer_utils.h"
#include "src/utils/utils.h"
#include "src/webp/encode.h"
#include "src/webp/format_constants.h" // RIFF constants
#include "src/webp/mux_types.h" // ALPHA_FLAG
#include "src/enc/vp8i_enc.h"
//------------------------------------------------------------------------------
// Helper functions
static int IsVP8XNeeded(const VP8Encoder* const enc) {
return !!enc->has_alpha_; // Currently the only case when VP8X is needed.
// This could change in the future.
return !!enc->has_alpha; // Currently the only case when VP8X is needed.
// This could change in the future.
}
static int PutPaddingByte(const WebPPicture* const pic) {
@@ -36,7 +41,7 @@ static int PutPaddingByte(const WebPPicture* const pic) {
static WebPEncodingError PutRIFFHeader(const VP8Encoder* const enc,
size_t riff_size) {
const WebPPicture* const pic = enc->pic_;
const WebPPicture* const pic = enc->pic;
uint8_t riff[RIFF_HEADER_SIZE] = {
'R', 'I', 'F', 'F', 0, 0, 0, 0, 'W', 'E', 'B', 'P'
};
@@ -49,7 +54,7 @@ static WebPEncodingError PutRIFFHeader(const VP8Encoder* const enc,
}
static WebPEncodingError PutVP8XHeader(const VP8Encoder* const enc) {
const WebPPicture* const pic = enc->pic_;
const WebPPicture* const pic = enc->pic;
uint8_t vp8x[CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE] = {
'V', 'P', '8', 'X'
};
@@ -59,7 +64,7 @@ static WebPEncodingError PutVP8XHeader(const VP8Encoder* const enc) {
assert(pic->width >= 1 && pic->height >= 1);
assert(pic->width <= MAX_CANVAS_SIZE && pic->height <= MAX_CANVAS_SIZE);
if (enc->has_alpha_) {
if (enc->has_alpha) {
flags |= ALPHA_FLAG;
}
@@ -74,26 +79,26 @@ static WebPEncodingError PutVP8XHeader(const VP8Encoder* const enc) {
}
static WebPEncodingError PutAlphaChunk(const VP8Encoder* const enc) {
const WebPPicture* const pic = enc->pic_;
const WebPPicture* const pic = enc->pic;
uint8_t alpha_chunk_hdr[CHUNK_HEADER_SIZE] = {
'A', 'L', 'P', 'H'
};
assert(enc->has_alpha_);
assert(enc->has_alpha);
// Alpha chunk header.
PutLE32(alpha_chunk_hdr + TAG_SIZE, enc->alpha_data_size_);
PutLE32(alpha_chunk_hdr + TAG_SIZE, enc->alpha_data_size);
if (!pic->writer(alpha_chunk_hdr, sizeof(alpha_chunk_hdr), pic)) {
return VP8_ENC_ERROR_BAD_WRITE;
}
// Alpha chunk data.
if (!pic->writer(enc->alpha_data_, enc->alpha_data_size_, pic)) {
if (!pic->writer(enc->alpha_data, enc->alpha_data_size, pic)) {
return VP8_ENC_ERROR_BAD_WRITE;
}
// Padding.
if ((enc->alpha_data_size_ & 1) && !PutPaddingByte(pic)) {
if ((enc->alpha_data_size & 1) && !PutPaddingByte(pic)) {
return VP8_ENC_ERROR_BAD_WRITE;
}
return VP8_ENC_OK;
@@ -148,7 +153,7 @@ static WebPEncodingError PutVP8FrameHeader(const WebPPicture* const pic,
// WebP Headers.
static int PutWebPHeaders(const VP8Encoder* const enc, size_t size0,
size_t vp8_size, size_t riff_size) {
WebPPicture* const pic = enc->pic_;
WebPPicture* const pic = enc->pic;
WebPEncodingError err = VP8_ENC_OK;
// RIFF header.
@@ -162,7 +167,7 @@ static int PutWebPHeaders(const VP8Encoder* const enc, size_t size0,
}
// Alpha.
if (enc->has_alpha_) {
if (enc->has_alpha) {
err = PutAlphaChunk(enc);
if (err != VP8_ENC_OK) goto Error;
}
@@ -172,7 +177,7 @@ static int PutWebPHeaders(const VP8Encoder* const enc, size_t size0,
if (err != VP8_ENC_OK) goto Error;
// VP8 frame header.
err = PutVP8FrameHeader(pic, enc->profile_, size0);
err = PutVP8FrameHeader(pic, enc->profile, size0);
if (err != VP8_ENC_OK) goto Error;
// All OK.
@@ -186,27 +191,27 @@ static int PutWebPHeaders(const VP8Encoder* const enc, size_t size0,
// Segmentation header
static void PutSegmentHeader(VP8BitWriter* const bw,
const VP8Encoder* const enc) {
const VP8EncSegmentHeader* const hdr = &enc->segment_hdr_;
const VP8EncProba* const proba = &enc->proba_;
if (VP8PutBitUniform(bw, (hdr->num_segments_ > 1))) {
const VP8EncSegmentHeader* const hdr = &enc->segment_hdr;
const VP8EncProba* const proba = &enc->proba;
if (VP8PutBitUniform(bw, (hdr->num_segments > 1))) {
// We always 'update' the quant and filter strength values
const int update_data = 1;
int s;
VP8PutBitUniform(bw, hdr->update_map_);
VP8PutBitUniform(bw, hdr->update_map);
if (VP8PutBitUniform(bw, update_data)) {
// we always use absolute values, not relative ones
VP8PutBitUniform(bw, 1); // (segment_feature_mode = 1. Paragraph 9.3.)
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
VP8PutSignedBits(bw, enc->dqm_[s].quant_, 7);
VP8PutSignedBits(bw, enc->dqm[s].quant, 7);
}
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
VP8PutSignedBits(bw, enc->dqm_[s].fstrength_, 6);
VP8PutSignedBits(bw, enc->dqm[s].fstrength, 6);
}
}
if (hdr->update_map_) {
if (hdr->update_map) {
for (s = 0; s < 3; ++s) {
if (VP8PutBitUniform(bw, (proba->segments_[s] != 255u))) {
VP8PutBits(bw, proba->segments_[s], 8);
if (VP8PutBitUniform(bw, (proba->segments[s] != 255u))) {
VP8PutBits(bw, proba->segments[s], 8);
}
}
}
@@ -216,18 +221,18 @@ static void PutSegmentHeader(VP8BitWriter* const bw,
// Filtering parameters header
static void PutFilterHeader(VP8BitWriter* const bw,
const VP8EncFilterHeader* const hdr) {
const int use_lf_delta = (hdr->i4x4_lf_delta_ != 0);
VP8PutBitUniform(bw, hdr->simple_);
VP8PutBits(bw, hdr->level_, 6);
VP8PutBits(bw, hdr->sharpness_, 3);
const int use_lf_delta = (hdr->i4x4_lf_delta != 0);
VP8PutBitUniform(bw, hdr->simple);
VP8PutBits(bw, hdr->level, 6);
VP8PutBits(bw, hdr->sharpness, 3);
if (VP8PutBitUniform(bw, use_lf_delta)) {
// '0' is the default value for i4x4_lf_delta_ at frame #0.
const int need_update = (hdr->i4x4_lf_delta_ != 0);
// '0' is the default value for i4x4_lf_delta at frame #0.
const int need_update = (hdr->i4x4_lf_delta != 0);
if (VP8PutBitUniform(bw, need_update)) {
// we don't use ref_lf_delta => emit four 0 bits
VP8PutBits(bw, 0, 4);
// we use mode_lf_delta for i4x4
VP8PutSignedBits(bw, hdr->i4x4_lf_delta_, 6);
VP8PutSignedBits(bw, hdr->i4x4_lf_delta, 6);
VP8PutBits(bw, 0, 3); // all others unused
}
}
@@ -236,12 +241,12 @@ static void PutFilterHeader(VP8BitWriter* const bw,
// Nominal quantization parameters
static void PutQuant(VP8BitWriter* const bw,
const VP8Encoder* const enc) {
VP8PutBits(bw, enc->base_quant_, 7);
VP8PutSignedBits(bw, enc->dq_y1_dc_, 4);
VP8PutSignedBits(bw, enc->dq_y2_dc_, 4);
VP8PutSignedBits(bw, enc->dq_y2_ac_, 4);
VP8PutSignedBits(bw, enc->dq_uv_dc_, 4);
VP8PutSignedBits(bw, enc->dq_uv_ac_, 4);
VP8PutBits(bw, enc->base_quant, 7);
VP8PutSignedBits(bw, enc->dq_y1_dc, 4);
VP8PutSignedBits(bw, enc->dq_y2_dc, 4);
VP8PutSignedBits(bw, enc->dq_y2_ac, 4);
VP8PutSignedBits(bw, enc->dq_uv_dc, 4);
VP8PutSignedBits(bw, enc->dq_uv_ac, 4);
}
// Partition sizes
@@ -249,8 +254,8 @@ static int EmitPartitionsSize(const VP8Encoder* const enc,
WebPPicture* const pic) {
uint8_t buf[3 * (MAX_NUM_PARTITIONS - 1)];
int p;
for (p = 0; p < enc->num_parts_ - 1; ++p) {
const size_t part_size = VP8BitWriterSize(enc->parts_ + p);
for (p = 0; p < enc->num_parts - 1; ++p) {
const size_t part_size = VP8BitWriterSize(enc->parts + p);
if (part_size >= VP8_MAX_PARTITION_SIZE) {
return WebPEncodingSetError(pic, VP8_ENC_ERROR_PARTITION_OVERFLOW);
}
@@ -267,25 +272,25 @@ static int EmitPartitionsSize(const VP8Encoder* const enc,
//------------------------------------------------------------------------------
static int GeneratePartition0(VP8Encoder* const enc) {
VP8BitWriter* const bw = &enc->bw_;
const int mb_size = enc->mb_w_ * enc->mb_h_;
VP8BitWriter* const bw = &enc->bw;
const int mb_size = enc->mb_w * enc->mb_h;
uint64_t pos1, pos2, pos3;
pos1 = VP8BitWriterPos(bw);
if (!VP8BitWriterInit(bw, mb_size * 7 / 8)) { // ~7 bits per macroblock
return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
return WebPEncodingSetError(enc->pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
VP8PutBitUniform(bw, 0); // colorspace
VP8PutBitUniform(bw, 0); // clamp type
PutSegmentHeader(bw, enc);
PutFilterHeader(bw, &enc->filter_hdr_);
VP8PutBits(bw, enc->num_parts_ == 8 ? 3 :
enc->num_parts_ == 4 ? 2 :
enc->num_parts_ == 2 ? 1 : 0, 2);
PutFilterHeader(bw, &enc->filter_hdr);
VP8PutBits(bw, enc->num_parts == 8 ? 3 :
enc->num_parts == 4 ? 2 :
enc->num_parts == 2 ? 1 : 0, 2);
PutQuant(bw, enc);
VP8PutBitUniform(bw, 0); // no proba update
VP8WriteProbas(bw, &enc->proba_);
VP8WriteProbas(bw, &enc->proba);
pos2 = VP8BitWriterPos(bw);
VP8CodeIntraModes(enc);
VP8BitWriterFinish(bw);
@@ -293,36 +298,36 @@ static int GeneratePartition0(VP8Encoder* const enc) {
pos3 = VP8BitWriterPos(bw);
#if !defined(WEBP_DISABLE_STATS)
if (enc->pic_->stats) {
enc->pic_->stats->header_bytes[0] = (int)((pos2 - pos1 + 7) >> 3);
enc->pic_->stats->header_bytes[1] = (int)((pos3 - pos2 + 7) >> 3);
enc->pic_->stats->alpha_data_size = (int)enc->alpha_data_size_;
if (enc->pic->stats) {
enc->pic->stats->header_bytes[0] = (int)((pos2 - pos1 + 7) >> 3);
enc->pic->stats->header_bytes[1] = (int)((pos3 - pos2 + 7) >> 3);
enc->pic->stats->alpha_data_size = (int)enc->alpha_data_size;
}
#else
(void)pos1;
(void)pos2;
(void)pos3;
#endif
if (bw->error_) {
return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
if (bw->error) {
return WebPEncodingSetError(enc->pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
return 1;
}
void VP8EncFreeBitWriters(VP8Encoder* const enc) {
int p;
VP8BitWriterWipeOut(&enc->bw_);
for (p = 0; p < enc->num_parts_; ++p) {
VP8BitWriterWipeOut(enc->parts_ + p);
VP8BitWriterWipeOut(&enc->bw);
for (p = 0; p < enc->num_parts; ++p) {
VP8BitWriterWipeOut(enc->parts + p);
}
}
int VP8EncWrite(VP8Encoder* const enc) {
WebPPicture* const pic = enc->pic_;
VP8BitWriter* const bw = &enc->bw_;
WebPPicture* const pic = enc->pic;
VP8BitWriter* const bw = &enc->bw;
const int task_percent = 19;
const int percent_per_part = task_percent / enc->num_parts_;
const int final_percent = enc->percent_ + task_percent;
const int percent_per_part = task_percent / enc->num_parts;
const int final_percent = enc->percent + task_percent;
int ok = 0;
size_t vp8_size, pad, riff_size;
int p;
@@ -334,9 +339,9 @@ int VP8EncWrite(VP8Encoder* const enc) {
// Compute VP8 size
vp8_size = VP8_FRAME_HEADER_SIZE +
VP8BitWriterSize(bw) +
3 * (enc->num_parts_ - 1);
for (p = 0; p < enc->num_parts_; ++p) {
vp8_size += VP8BitWriterSize(enc->parts_ + p);
3 * (enc->num_parts - 1);
for (p = 0; p < enc->num_parts; ++p) {
vp8_size += VP8BitWriterSize(enc->parts + p);
}
pad = vp8_size & 1;
vp8_size += pad;
@@ -347,9 +352,9 @@ int VP8EncWrite(VP8Encoder* const enc) {
if (IsVP8XNeeded(enc)) { // Add size for: VP8X header + data.
riff_size += CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE;
}
if (enc->has_alpha_) { // Add size for: ALPH header + data.
const uint32_t padded_alpha_size = enc->alpha_data_size_ +
(enc->alpha_data_size_ & 1);
if (enc->has_alpha) { // Add size for: ALPH header + data.
const uint32_t padded_alpha_size = enc->alpha_data_size +
(enc->alpha_data_size & 1);
riff_size += CHUNK_HEADER_SIZE + padded_alpha_size;
}
// RIFF size should fit in 32-bits.
@@ -368,13 +373,13 @@ int VP8EncWrite(VP8Encoder* const enc) {
}
// Token partitions
for (p = 0; p < enc->num_parts_; ++p) {
const uint8_t* const buf = VP8BitWriterBuf(enc->parts_ + p);
const size_t size = VP8BitWriterSize(enc->parts_ + p);
for (p = 0; p < enc->num_parts; ++p) {
const uint8_t* const buf = VP8BitWriterBuf(enc->parts + p);
const size_t size = VP8BitWriterSize(enc->parts + p);
if (size) ok = ok && pic->writer(buf, size, pic);
VP8BitWriterWipeOut(enc->parts_ + p); // will free the internal buffer.
ok = ok && WebPReportProgress(pic, enc->percent_ + percent_per_part,
&enc->percent_);
VP8BitWriterWipeOut(enc->parts + p); // will free the internal buffer.
ok = ok && WebPReportProgress(pic, enc->percent + percent_per_part,
&enc->percent);
}
// Padding byte
@@ -382,11 +387,10 @@ int VP8EncWrite(VP8Encoder* const enc) {
ok = PutPaddingByte(pic);
}
enc->coded_size_ = (int)(CHUNK_HEADER_SIZE + riff_size);
ok = ok && WebPReportProgress(pic, final_percent, &enc->percent_);
enc->coded_size = (int)(CHUNK_HEADER_SIZE + riff_size);
ok = ok && WebPReportProgress(pic, final_percent, &enc->percent);
if (!ok) WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE);
return ok;
}
//------------------------------------------------------------------------------

Some files were not shown because too many files have changed in this diff Show More