Merge pull request #113963 from akien-mga/libwebp-1.6.0

libwebp: Update to 1.6.0
2025-12-31 01:49:10 +03:00 · 2025-12-15 08:00:58 -06:00
parent 23e956443b c5c85a7d36
commit afd4e0f67b
139 changed files with 6462 additions and 4929 deletions
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -635,7 +635,7 @@ Files extracted from upstream source:
 ## libwebp

 - Upstream: https://chromium.googlesource.com/webm/libwebp/
- Version: 1.5.0 (a4d7a715337ded4451fec90ff8ce79728e04126c, 2024)
+- Version: 1.6.0 (4fa21912338357f89e4fd51cf2368325b59e9bd9, 2025)
 - License: BSD-3-Clause

 Files extracted from upstream source:
@@ -645,9 +645,9 @@ Files extracted from upstream source:

 Patches:

- `0001-msvc-node-debug-rename.patch`
- `0002-msvc-arm64-fpstrict.patch`
- `0003-clang-cl-sse2-sse41.patch`
+- `0001-msvc-node-debug-rename.patch` ([GH-75769](https://github.com/godotengine/godot/pull/75769))
+- `0002-msvc-arm64-fpstrict.patch` ([GH-94655](https://github.com/godotengine/godot/pull/94655))
+- `0003-clang-cl-sse2-sse41-avx2.patch` ([GH-92316](https://github.com/godotengine/godot/pull/92316))


 ## linuxbsd_headers
--- a/thirdparty/libwebp/AUTHORS
+++ b/thirdparty/libwebp/AUTHORS
@@ -10,9 +10,11 @@ Contributors:
 - Christian Duvivier (cduvivier at google dot com)
 - Christopher Degawa (ccom at randomderp dot com)
 - Clement Courbet (courbet at google dot com)
+- devtools-clrobot at google dot com (devtools-clrobot@google dot com)
 - Djordje Pesut (djordje dot pesut at imgtec dot com)
 - Frank (1433351828 at qq dot com)
 - Frank Barchard (fbarchard at google dot com)
+- Henner Zeller (hzeller at google dot com)
 - Hui Su (huisu at google dot com)
 - H. Vetinari (h dot vetinari at gmx dot com)
 - Ilya Kurdyukov (jpegqs at gmail dot com)
--- a/thirdparty/libwebp/patches/0001-msvc-node-debug-rename.patch
+++ b/thirdparty/libwebp/patches/0001-msvc-node-debug-rename.patch
@@ -1,8 +1,8 @@
 diff --git a/thirdparty/libwebp/src/enc/quant_enc.c b/thirdparty/libwebp/src/enc/quant_enc.c
-index 6d8202d277..302e8047f2 100644
+index 4b8cb5e9be..440991f86d 100644
 --- a/thirdparty/libwebp/src/enc/quant_enc.c
 +++ b/thirdparty/libwebp/src/enc/quant_enc.c
-@@ -556,6 +556,9 @@ static void AddScore(VP8ModeScore* WEBP_RESTRICT const dst,
+@@ -562,6 +562,9 @@ static void AddScore(VP8ModeScore* WEBP_RESTRICT const dst,
 //------------------------------------------------------------------------------
 // Performs trellis-optimized quantization.
 
--- a/thirdparty/libwebp/patches/0002-msvc-arm64-fpstrict.patch
+++ b/thirdparty/libwebp/patches/0002-msvc-arm64-fpstrict.patch
@@ -1,8 +1,8 @@
 diff --git a/thirdparty/libwebp/sharpyuv/sharpyuv_gamma.c b/thirdparty/libwebp/sharpyuv/sharpyuv_gamma.c
-index 09028428ac..6f1a88bf1a 100644
+index f72be4b89e..17c4b1940b 100644
 --- a/thirdparty/libwebp/sharpyuv/sharpyuv_gamma.c
 +++ b/thirdparty/libwebp/sharpyuv/sharpyuv_gamma.c
-@@ -26,7 +26,11 @@ static uint32_t kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 2];
+@@ -27,7 +27,11 @@ static uint32_t kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 2];
 #define LINEAR_TO_GAMMA_TAB_SIZE (1 << LINEAR_TO_GAMMA_TAB_BITS)
 static uint32_t kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 2];
 
--- a/thirdparty/libwebp/patches/0003-clang-cl-sse2-sse41-avx2.patch
+++ b/thirdparty/libwebp/patches/0003-clang-cl-sse2-sse41-avx2.patch
@@ -1,8 +1,8 @@
 diff --git a/thirdparty/libwebp/src/dsp/cpu.h b/thirdparty/libwebp/src/dsp/cpu.h
-index c86540f280..4dbe607aec 100644
+index 7f87d7daaa..ef63219043 100644
 --- a/thirdparty/libwebp/src/dsp/cpu.h
 +++ b/thirdparty/libwebp/src/dsp/cpu.h
-@@ -47,12 +47,12 @@
+@@ -47,17 +47,17 @@
 // x86 defines.
 
 #if !defined(HAVE_CONFIG_H)
@@ -17,3 +17,9 @@ index c86540f280..4dbe607aec 100644
     (defined(_M_X64) || defined(_M_IX86))
 #define WEBP_MSC_SSE41  // Visual C++ SSE4.1 targets
 #endif
+ 
+-#if defined(_MSC_VER) && _MSC_VER >= 1700 && \
+#if defined(_MSC_VER) && !defined(__clang__) && _MSC_VER >= 1700 && \
+     (defined(_M_X64) || defined(_M_IX86))
+ #define WEBP_MSC_AVX2  // Visual C++ AVX2 targets
+ #endif
--- a/thirdparty/libwebp/sharpyuv/sharpyuv.c
+++ b/thirdparty/libwebp/sharpyuv/sharpyuv.c
@@ -19,10 +19,10 @@
 #include <stdlib.h>
 #include <string.h>

-#include "src/webp/types.h"
 #include "sharpyuv/sharpyuv_cpu.h"
 #include "sharpyuv/sharpyuv_dsp.h"
 #include "sharpyuv/sharpyuv_gamma.h"
+#include "src/webp/types.h"

 //------------------------------------------------------------------------------

--- a/thirdparty/libwebp/sharpyuv/sharpyuv.h
+++ b/thirdparty/libwebp/sharpyuv/sharpyuv.h
@@ -52,7 +52,7 @@ extern "C" {
 // SharpYUV API version following the convention from semver.org
 #define SHARPYUV_VERSION_MAJOR 0
 #define SHARPYUV_VERSION_MINOR 4
-#define SHARPYUV_VERSION_PATCH 1
+#define SHARPYUV_VERSION_PATCH 2
 // Version as a uint32_t. The major number is the high 8 bits.
 // The minor number is the middle 8 bits. The patch number is the low 16 bits.
 #define SHARPYUV_MAKE_VERSION(MAJOR, MINOR, PATCH) \
--- a/thirdparty/libwebp/sharpyuv/sharpyuv_csp.c
+++ b/thirdparty/libwebp/sharpyuv/sharpyuv_csp.c
@@ -15,6 +15,8 @@
 #include <math.h>
 #include <stddef.h>

+#include "sharpyuv/sharpyuv.h"
+
 static int ToFixed16(float f) { return (int)floor(f * (1 << 16) + 0.5f); }

 void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space,
--- a/thirdparty/libwebp/sharpyuv/sharpyuv_dsp.c
+++ b/thirdparty/libwebp/sharpyuv/sharpyuv_dsp.c
@@ -17,6 +17,7 @@
 #include <stdlib.h>

 #include "sharpyuv/sharpyuv_cpu.h"
+#include "src/dsp/cpu.h"
 #include "src/webp/types.h"

 //-----------------------------------------------------------------------------
--- a/thirdparty/libwebp/sharpyuv/sharpyuv_gamma.c
+++ b/thirdparty/libwebp/sharpyuv/sharpyuv_gamma.c
@@ -15,6 +15,7 @@
 #include <float.h>
 #include <math.h>

+#include "sharpyuv/sharpyuv.h"
 #include "src/webp/types.h"

 // Gamma correction compensates loss of resolution during chroma subsampling.
--- a/thirdparty/libwebp/sharpyuv/sharpyuv_sse2.c
+++ b/thirdparty/libwebp/sharpyuv/sharpyuv_sse2.c
@@ -14,9 +14,13 @@
 #include "sharpyuv/sharpyuv_dsp.h"

 #if defined(WEBP_USE_SSE2)
-#include <stdlib.h>
 #include <emmintrin.h>

+#include <stdlib.h>
+
+#include "src/dsp/cpu.h"
+#include "src/webp/types.h"
+
 static uint16_t clip_SSE2(int v, int max) {
  return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
 }
--- a/thirdparty/libwebp/src/dec/alpha_dec.c
+++ b/thirdparty/libwebp/src/dec/alpha_dec.c
@@ -11,14 +11,18 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

+#include <assert.h>
 #include <stdlib.h>
+
 #include "src/dec/alphai_dec.h"
 #include "src/dec/vp8_dec.h"
 #include "src/dec/vp8i_dec.h"
 #include "src/dec/vp8li_dec.h"
+#include "src/dec/webpi_dec.h"
 #include "src/dsp/dsp.h"
 #include "src/utils/quant_levels_dec_utils.h"
 #include "src/utils/utils.h"
+#include "src/webp/decode.h"
 #include "src/webp/format_constants.h"
 #include "src/webp/types.h"

@@ -34,8 +38,8 @@ WEBP_NODISCARD static ALPHDecoder* ALPHNew(void) {
 // Clears and deallocates an alpha decoder instance.
 static void ALPHDelete(ALPHDecoder* const dec) {
  if (dec != NULL) {
-    VP8LDelete(dec->vp8l_dec_);
-    dec->vp8l_dec_ = NULL;
+    VP8LDelete(dec->vp8l_dec);
+    dec->vp8l_dec = NULL;
    WebPSafeFree(dec);
  }
 }
@@ -54,28 +58,28 @@ WEBP_NODISCARD static int ALPHInit(ALPHDecoder* const dec, const uint8_t* data,
  const uint8_t* const alpha_data = data + ALPHA_HEADER_LEN;
  const size_t alpha_data_size = data_size - ALPHA_HEADER_LEN;
  int rsrv;
-  VP8Io* const io = &dec->io_;
+  VP8Io* const io = &dec->io;

  assert(data != NULL && output != NULL && src_io != NULL);

  VP8FiltersInit();
-  dec->output_ = output;
-  dec->width_ = src_io->width;
-  dec->height_ = src_io->height;
-  assert(dec->width_ > 0 && dec->height_ > 0);
+  dec->output = output;
+  dec->width = src_io->width;
+  dec->height = src_io->height;
+  assert(dec->width > 0 && dec->height > 0);

  if (data_size <= ALPHA_HEADER_LEN) {
    return 0;
  }

-  dec->method_ = (data[0] >> 0) & 0x03;
-  dec->filter_ = (WEBP_FILTER_TYPE)((data[0] >> 2) & 0x03);
-  dec->pre_processing_ = (data[0] >> 4) & 0x03;
+  dec->method = (data[0] >> 0) & 0x03;
+  dec->filter = (WEBP_FILTER_TYPE)((data[0] >> 2) & 0x03);
+  dec->pre_processing = (data[0] >> 4) & 0x03;
  rsrv = (data[0] >> 6) & 0x03;
-  if (dec->method_ < ALPHA_NO_COMPRESSION ||
-      dec->method_ > ALPHA_LOSSLESS_COMPRESSION ||
-      dec->filter_ >= WEBP_FILTER_LAST ||
-      dec->pre_processing_ > ALPHA_PREPROCESSED_LEVELS ||
+  if (dec->method < ALPHA_NO_COMPRESSION ||
+      dec->method > ALPHA_LOSSLESS_COMPRESSION ||
+      dec->filter >= WEBP_FILTER_LAST ||
+      dec->pre_processing > ALPHA_PREPROCESSED_LEVELS ||
      rsrv != 0) {
    return 0;
  }
@@ -96,11 +100,11 @@ WEBP_NODISCARD static int ALPHInit(ALPHDecoder* const dec, const uint8_t* data,
  io->crop_bottom = src_io->crop_bottom;
  // No need to copy the scaling parameters.

-  if (dec->method_ == ALPHA_NO_COMPRESSION) {
-    const size_t alpha_decoded_size = dec->width_ * dec->height_;
+  if (dec->method == ALPHA_NO_COMPRESSION) {
+    const size_t alpha_decoded_size = dec->width * dec->height;
    ok = (alpha_data_size >= alpha_decoded_size);
  } else {
-    assert(dec->method_ == ALPHA_LOSSLESS_COMPRESSION);
+    assert(dec->method == ALPHA_LOSSLESS_COMPRESSION);
    ok = VP8LDecodeAlphaHeader(dec, alpha_data, alpha_data_size);
  }

@@ -113,32 +117,32 @@ WEBP_NODISCARD static int ALPHInit(ALPHDecoder* const dec, const uint8_t* data,
 // Returns false in case of bitstream error.
 WEBP_NODISCARD static int ALPHDecode(VP8Decoder* const dec, int row,
                                     int num_rows) {
-  ALPHDecoder* const alph_dec = dec->alph_dec_;
-  const int width = alph_dec->width_;
-  const int height = alph_dec->io_.crop_bottom;
-  if (alph_dec->method_ == ALPHA_NO_COMPRESSION) {
+  ALPHDecoder* const alph_dec = dec->alph_dec;
+  const int width = alph_dec->width;
+  const int height = alph_dec->io.crop_bottom;
+  if (alph_dec->method == ALPHA_NO_COMPRESSION) {
    int y;
-    const uint8_t* prev_line = dec->alpha_prev_line_;
-    const uint8_t* deltas = dec->alpha_data_ + ALPHA_HEADER_LEN + row * width;
-    uint8_t* dst = dec->alpha_plane_ + row * width;
-    assert(deltas <= &dec->alpha_data_[dec->alpha_data_size_]);
-    assert(WebPUnfilters[alph_dec->filter_] != NULL);
+    const uint8_t* prev_line = dec->alpha_prev_line;
+    const uint8_t* deltas = dec->alpha_data + ALPHA_HEADER_LEN + row * width;
+    uint8_t* dst = dec->alpha_plane + row * width;
+    assert(deltas <= &dec->alpha_data[dec->alpha_data_size]);
+    assert(WebPUnfilters[alph_dec->filter] != NULL);
    for (y = 0; y < num_rows; ++y) {
-      WebPUnfilters[alph_dec->filter_](prev_line, deltas, dst, width);
+      WebPUnfilters[alph_dec->filter](prev_line, deltas, dst, width);
      prev_line = dst;
      dst += width;
      deltas += width;
    }
-    dec->alpha_prev_line_ = prev_line;
-  } else {  // alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION
-    assert(alph_dec->vp8l_dec_ != NULL);
+    dec->alpha_prev_line = prev_line;
+  } else {  // alph_dec->method == ALPHA_LOSSLESS_COMPRESSION
+    assert(alph_dec->vp8l_dec != NULL);
    if (!VP8LDecodeAlphaImageStream(alph_dec, row + num_rows)) {
      return 0;
    }
  }

  if (row + num_rows >= height) {
-    dec->is_alpha_decoded_ = 1;
+    dec->is_alpha_decoded = 1;
  }
  return 1;
 }
@@ -148,25 +152,25 @@ WEBP_NODISCARD static int AllocateAlphaPlane(VP8Decoder* const dec,
  const int stride = io->width;
  const int height = io->crop_bottom;
  const uint64_t alpha_size = (uint64_t)stride * height;
-  assert(dec->alpha_plane_mem_ == NULL);
-  dec->alpha_plane_mem_ =
-      (uint8_t*)WebPSafeMalloc(alpha_size, sizeof(*dec->alpha_plane_));
-  if (dec->alpha_plane_mem_ == NULL) {
+  assert(dec->alpha_plane_mem == NULL);
+  dec->alpha_plane_mem =
+      (uint8_t*)WebPSafeMalloc(alpha_size, sizeof(*dec->alpha_plane));
+  if (dec->alpha_plane_mem == NULL) {
    return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
                       "Alpha decoder initialization failed.");
  }
-  dec->alpha_plane_ = dec->alpha_plane_mem_;
-  dec->alpha_prev_line_ = NULL;
+  dec->alpha_plane = dec->alpha_plane_mem;
+  dec->alpha_prev_line = NULL;
  return 1;
 }

 void WebPDeallocateAlphaMemory(VP8Decoder* const dec) {
  assert(dec != NULL);
-  WebPSafeFree(dec->alpha_plane_mem_);
-  dec->alpha_plane_mem_ = NULL;
-  dec->alpha_plane_ = NULL;
-  ALPHDelete(dec->alph_dec_);
-  dec->alph_dec_ = NULL;
+  WebPSafeFree(dec->alpha_plane_mem);
+  dec->alpha_plane_mem = NULL;
+  dec->alpha_plane = NULL;
+  ALPHDelete(dec->alph_dec);
+  dec->alph_dec = NULL;
 }

 //------------------------------------------------------------------------------
@@ -184,46 +188,46 @@ WEBP_NODISCARD const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
    return NULL;
  }

-  if (!dec->is_alpha_decoded_) {
-    if (dec->alph_dec_ == NULL) {    // Initialize decoder.
-      dec->alph_dec_ = ALPHNew();
-      if (dec->alph_dec_ == NULL) {
+  if (!dec->is_alpha_decoded) {
+    if (dec->alph_dec == NULL) {    // Initialize decoder.
+      dec->alph_dec = ALPHNew();
+      if (dec->alph_dec == NULL) {
        VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
                    "Alpha decoder initialization failed.");
        return NULL;
      }
      if (!AllocateAlphaPlane(dec, io)) goto Error;
-      if (!ALPHInit(dec->alph_dec_, dec->alpha_data_, dec->alpha_data_size_,
-                    io, dec->alpha_plane_)) {
-        VP8LDecoder* const vp8l_dec = dec->alph_dec_->vp8l_dec_;
+      if (!ALPHInit(dec->alph_dec, dec->alpha_data, dec->alpha_data_size,
+                    io, dec->alpha_plane)) {
+        VP8LDecoder* const vp8l_dec = dec->alph_dec->vp8l_dec;
        VP8SetError(dec,
                    (vp8l_dec == NULL) ? VP8_STATUS_OUT_OF_MEMORY
-                                       : vp8l_dec->status_,
+                                       : vp8l_dec->status,
                    "Alpha decoder initialization failed.");
        goto Error;
      }
      // if we allowed use of alpha dithering, check whether it's needed at all
-      if (dec->alph_dec_->pre_processing_ != ALPHA_PREPROCESSED_LEVELS) {
-        dec->alpha_dithering_ = 0;   // disable dithering
+      if (dec->alph_dec->pre_processing != ALPHA_PREPROCESSED_LEVELS) {
+        dec->alpha_dithering = 0;    // disable dithering
      } else {
        num_rows = height - row;     // decode everything in one pass
      }
    }

-    assert(dec->alph_dec_ != NULL);
+    assert(dec->alph_dec != NULL);
    assert(row + num_rows <= height);
    if (!ALPHDecode(dec, row, num_rows)) goto Error;

-    if (dec->is_alpha_decoded_) {   // finished?
-      ALPHDelete(dec->alph_dec_);
-      dec->alph_dec_ = NULL;
-      if (dec->alpha_dithering_ > 0) {
-        uint8_t* const alpha = dec->alpha_plane_ + io->crop_top * width
+    if (dec->is_alpha_decoded) {   // finished?
+      ALPHDelete(dec->alph_dec);
+      dec->alph_dec = NULL;
+      if (dec->alpha_dithering > 0) {
+        uint8_t* const alpha = dec->alpha_plane + io->crop_top * width
                             + io->crop_left;
        if (!WebPDequantizeLevels(alpha,
                                  io->crop_right - io->crop_left,
                                  io->crop_bottom - io->crop_top,
-                                  width, dec->alpha_dithering_)) {
+                                  width, dec->alpha_dithering)) {
          goto Error;
        }
      }
@@ -231,7 +235,7 @@ WEBP_NODISCARD const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
  }

  // Return a pointer to the current decoded row.
-  return dec->alpha_plane_ + row * width;
+  return dec->alpha_plane + row * width;

 Error:
  WebPDeallocateAlphaMemory(dec);
--- a/thirdparty/libwebp/src/dec/alphai_dec.h
+++ b/thirdparty/libwebp/src/dec/alphai_dec.h
@@ -14,7 +14,10 @@
 #ifndef WEBP_DEC_ALPHAI_DEC_H_
 #define WEBP_DEC_ALPHAI_DEC_H_

+#include "src/dec/vp8_dec.h"
+#include "src/webp/types.h"
 #include "src/dec/webpi_dec.h"
+#include "src/dsp/dsp.h"
 #include "src/utils/filters_utils.h"

 #ifdef __cplusplus
@@ -25,24 +28,24 @@ struct VP8LDecoder;  // Defined in dec/vp8li.h.

 typedef struct ALPHDecoder ALPHDecoder;
 struct ALPHDecoder {
-  int width_;
-  int height_;
-  int method_;
-  WEBP_FILTER_TYPE filter_;
-  int pre_processing_;
-  struct VP8LDecoder* vp8l_dec_;
-  VP8Io io_;
-  int use_8b_decode_;  // Although alpha channel requires only 1 byte per
+  int width;
+  int height;
+  int method;
+  WEBP_FILTER_TYPE filter;
+  int pre_processing;
+  struct VP8LDecoder* vp8l_dec;
+  VP8Io io;
+  int use_8b_decode;   // Although alpha channel requires only 1 byte per
                       // pixel, sometimes VP8LDecoder may need to allocate
                       // 4 bytes per pixel internally during decode.
-  uint8_t* output_;
-  const uint8_t* prev_line_;   // last output row (or NULL)
+  uint8_t* output;
+  const uint8_t* prev_line;   // last output row (or NULL)
 };

 //------------------------------------------------------------------------------
 // internal functions. Not public.

-// Deallocate memory associated to dec->alpha_plane_ decoding
+// Deallocate memory associated to dec->alpha_plane decoding
 void WebPDeallocateAlphaMemory(VP8Decoder* const dec);

 //------------------------------------------------------------------------------
--- a/thirdparty/libwebp/src/dec/buffer_dec.c
+++ b/thirdparty/libwebp/src/dec/buffer_dec.c
@@ -11,11 +11,16 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

+#include <assert.h>
 #include <stdlib.h>
+#include <string.h>

 #include "src/dec/vp8i_dec.h"
 #include "src/dec/webpi_dec.h"
+#include "src/utils/rescaler_utils.h"
 #include "src/utils/utils.h"
+#include "src/webp/decode.h"
+#include "src/webp/types.h"

 //------------------------------------------------------------------------------
 // WebPDecBuffer
@@ -26,10 +31,9 @@ static const uint8_t kModeBpp[MODE_LAST] = {
  4, 4, 4, 2,    // pre-multiplied modes
  1, 1 };

-// Check that webp_csp_mode is within the bounds of WEBP_CSP_MODE.
 // Convert to an integer to handle both the unsigned/signed enum cases
 // without the need for casting to remove type limit warnings.
-static int IsValidColorspace(int webp_csp_mode) {
+int IsValidColorspace(int webp_csp_mode) {
  return (webp_csp_mode >= MODE_RGB && webp_csp_mode < MODE_LAST);
 }

--- a/thirdparty/libwebp/src/dec/common_dec.h
+++ b/thirdparty/libwebp/src/dec/common_dec.h
@@ -51,4 +51,7 @@ enum { MB_FEATURE_TREE_PROBS = 3,
       NUM_PROBAS = 11
     };

+// Check that webp_csp_mode is within the bounds of WEBP_CSP_MODE.
+int IsValidColorspace(int webp_csp_mode);
+
 #endif  // WEBP_DEC_COMMON_DEC_H_
--- a/thirdparty/libwebp/src/dec/frame_dec.c
+++ b/thirdparty/libwebp/src/dec/frame_dec.c
@@ -11,9 +11,20 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

+#include <assert.h>
 #include <stdlib.h>
+#include <string.h>
+
+#include "src/dec/common_dec.h"
+#include "src/dec/vp8_dec.h"
 #include "src/dec/vp8i_dec.h"
+#include "src/dec/webpi_dec.h"
+#include "src/dsp/dsp.h"
+#include "src/utils/random_utils.h"
+#include "src/utils/thread_utils.h"
 #include "src/utils/utils.h"
+#include "src/webp/decode.h"
+#include "src/webp/types.h"

 //------------------------------------------------------------------------------
 // Main reconstruction function.
@@ -72,11 +83,11 @@ static void ReconstructRow(const VP8Decoder* const dec,
                           const VP8ThreadContext* ctx) {
  int j;
  int mb_x;
-  const int mb_y = ctx->mb_y_;
-  const int cache_id = ctx->id_;
-  uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
-  uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
-  uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
+  const int mb_y = ctx->mb_y;
+  const int cache_id = ctx->id;
+  uint8_t* const y_dst = dec->yuv_b + Y_OFF;
+  uint8_t* const u_dst = dec->yuv_b + U_OFF;
+  uint8_t* const v_dst = dec->yuv_b + V_OFF;

  // Initialize left-most block.
  for (j = 0; j < 16; ++j) {
@@ -99,8 +110,8 @@ static void ReconstructRow(const VP8Decoder* const dec,
  }

  // Reconstruct one row.
-  for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) {
-    const VP8MBData* const block = ctx->mb_data_ + mb_x;
+  for (mb_x = 0; mb_x < dec->mb_w; ++mb_x) {
+    const VP8MBData* const block = ctx->mb_data + mb_x;

    // Rotate in the left samples from previously decoded block. We move four
    // pixels at a time for alignment reason, and because of in-loop filter.
@@ -115,9 +126,9 @@ static void ReconstructRow(const VP8Decoder* const dec,
    }
    {
      // bring top samples into the cache
-      VP8TopSamples* const top_yuv = dec->yuv_t_ + mb_x;
-      const int16_t* const coeffs = block->coeffs_;
-      uint32_t bits = block->non_zero_y_;
+      VP8TopSamples* const top_yuv = dec->yuv_t + mb_x;
+      const int16_t* const coeffs = block->coeffs;
+      uint32_t bits = block->non_zero_y;
      int n;

      if (mb_y > 0) {
@@ -127,11 +138,11 @@ static void ReconstructRow(const VP8Decoder* const dec,
      }

      // predict and add residuals
-      if (block->is_i4x4_) {   // 4x4
+      if (block->is_i4x4) {   // 4x4
        uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);

        if (mb_y > 0) {
-          if (mb_x >= dec->mb_w_ - 1) {    // on rightmost border
+          if (mb_x >= dec->mb_w - 1) {    // on rightmost border
            memset(top_right, top_yuv[0].y[15], sizeof(*top_right));
          } else {
            memcpy(top_right, top_yuv[1].y, sizeof(*top_right));
@@ -143,11 +154,11 @@ static void ReconstructRow(const VP8Decoder* const dec,
        // predict and add residuals for all 4x4 blocks in turn.
        for (n = 0; n < 16; ++n, bits <<= 2) {
          uint8_t* const dst = y_dst + kScan[n];
-          VP8PredLuma4[block->imodes_[n]](dst);
+          VP8PredLuma4[block->imodes[n]](dst);
          DoTransform(bits, coeffs + n * 16, dst);
        }
      } else {    // 16x16
-        const int pred_func = CheckMode(mb_x, mb_y, block->imodes_[0]);
+        const int pred_func = CheckMode(mb_x, mb_y, block->imodes[0]);
        VP8PredLuma16[pred_func](y_dst);
        if (bits != 0) {
          for (n = 0; n < 16; ++n, bits <<= 2) {
@@ -157,8 +168,8 @@ static void ReconstructRow(const VP8Decoder* const dec,
      }
      {
        // Chroma
-        const uint32_t bits_uv = block->non_zero_uv_;
-        const int pred_func = CheckMode(mb_x, mb_y, block->uvmode_);
+        const uint32_t bits_uv = block->non_zero_uv;
+        const int pred_func = CheckMode(mb_x, mb_y, block->uvmode);
        VP8PredChroma8[pred_func](u_dst);
        VP8PredChroma8[pred_func](v_dst);
        DoUVTransform(bits_uv >> 0, coeffs + 16 * 16, u_dst);
@@ -166,25 +177,25 @@ static void ReconstructRow(const VP8Decoder* const dec,
      }

      // stash away top samples for next block
-      if (mb_y < dec->mb_h_ - 1) {
+      if (mb_y < dec->mb_h - 1) {
        memcpy(top_yuv[0].y, y_dst + 15 * BPS, 16);
        memcpy(top_yuv[0].u, u_dst +  7 * BPS,  8);
        memcpy(top_yuv[0].v, v_dst +  7 * BPS,  8);
      }
    }
-    // Transfer reconstructed samples from yuv_b_ cache to final destination.
+    // Transfer reconstructed samples from yuv_b cache to final destination.
    {
-      const int y_offset = cache_id * 16 * dec->cache_y_stride_;
-      const int uv_offset = cache_id * 8 * dec->cache_uv_stride_;
-      uint8_t* const y_out = dec->cache_y_ + mb_x * 16 + y_offset;
-      uint8_t* const u_out = dec->cache_u_ + mb_x * 8 + uv_offset;
-      uint8_t* const v_out = dec->cache_v_ + mb_x * 8 + uv_offset;
+      const int y_offset = cache_id * 16 * dec->cache_y_stride;
+      const int uv_offset = cache_id * 8 * dec->cache_uv_stride;
+      uint8_t* const y_out = dec->cache_y + mb_x * 16 + y_offset;
+      uint8_t* const u_out = dec->cache_u + mb_x * 8 + uv_offset;
+      uint8_t* const v_out = dec->cache_v + mb_x * 8 + uv_offset;
      for (j = 0; j < 16; ++j) {
-        memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16);
+        memcpy(y_out + j * dec->cache_y_stride, y_dst + j * BPS, 16);
      }
      for (j = 0; j < 8; ++j) {
-        memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8);
-        memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8);
+        memcpy(u_out + j * dec->cache_uv_stride, u_dst + j * BPS, 8);
+        memcpy(v_out + j * dec->cache_uv_stride, v_dst + j * BPS, 8);
      }
    }
  }
@@ -201,40 +212,40 @@ static void ReconstructRow(const VP8Decoder* const dec,
 static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 };

 static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
-  const VP8ThreadContext* const ctx = &dec->thread_ctx_;
-  const int cache_id = ctx->id_;
-  const int y_bps = dec->cache_y_stride_;
-  const VP8FInfo* const f_info = ctx->f_info_ + mb_x;
-  uint8_t* const y_dst = dec->cache_y_ + cache_id * 16 * y_bps + mb_x * 16;
-  const int ilevel = f_info->f_ilevel_;
-  const int limit = f_info->f_limit_;
+  const VP8ThreadContext* const ctx = &dec->thread_ctx;
+  const int cache_id = ctx->id;
+  const int y_bps = dec->cache_y_stride;
+  const VP8FInfo* const f_info = ctx->f_info + mb_x;
+  uint8_t* const y_dst = dec->cache_y + cache_id * 16 * y_bps + mb_x * 16;
+  const int ilevel = f_info->f_ilevel;
+  const int limit = f_info->f_limit;
  if (limit == 0) {
    return;
  }
  assert(limit >= 3);
-  if (dec->filter_type_ == 1) {   // simple
+  if (dec->filter_type == 1) {   // simple
    if (mb_x > 0) {
      VP8SimpleHFilter16(y_dst, y_bps, limit + 4);
    }
-    if (f_info->f_inner_) {
+    if (f_info->f_inner) {
      VP8SimpleHFilter16i(y_dst, y_bps, limit);
    }
    if (mb_y > 0) {
      VP8SimpleVFilter16(y_dst, y_bps, limit + 4);
    }
-    if (f_info->f_inner_) {
+    if (f_info->f_inner) {
      VP8SimpleVFilter16i(y_dst, y_bps, limit);
    }
  } else {    // complex
-    const int uv_bps = dec->cache_uv_stride_;
-    uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8;
-    uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8;
-    const int hev_thresh = f_info->hev_thresh_;
+    const int uv_bps = dec->cache_uv_stride;
+    uint8_t* const u_dst = dec->cache_u + cache_id * 8 * uv_bps + mb_x * 8;
+    uint8_t* const v_dst = dec->cache_v + cache_id * 8 * uv_bps + mb_x * 8;
+    const int hev_thresh = f_info->hev_thresh;
    if (mb_x > 0) {
      VP8HFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
      VP8HFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
    }
-    if (f_info->f_inner_) {
+    if (f_info->f_inner) {
      VP8HFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);
      VP8HFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);
    }
@@ -242,7 +253,7 @@ static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
      VP8VFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
      VP8VFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
    }
-    if (f_info->f_inner_) {
+    if (f_info->f_inner) {
      VP8VFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);
      VP8VFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);
    }
@@ -252,9 +263,9 @@ static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
 // Filter the decoded macroblock row (if needed)
 static void FilterRow(const VP8Decoder* const dec) {
  int mb_x;
-  const int mb_y = dec->thread_ctx_.mb_y_;
-  assert(dec->thread_ctx_.filter_row_);
-  for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) {
+  const int mb_y = dec->thread_ctx.mb_y;
+  assert(dec->thread_ctx.filter_row);
+  for (mb_x = dec->tl_mb_x; mb_x < dec->br_mb_x; ++mb_x) {
    DoFilter(dec, mb_x, mb_y);
  }
 }
@@ -263,51 +274,51 @@ static void FilterRow(const VP8Decoder* const dec) {
 // Precompute the filtering strength for each segment and each i4x4/i16x16 mode.

 static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
-  if (dec->filter_type_ > 0) {
+  if (dec->filter_type > 0) {
    int s;
-    const VP8FilterHeader* const hdr = &dec->filter_hdr_;
+    const VP8FilterHeader* const hdr = &dec->filter_hdr;
    for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
      int i4x4;
      // First, compute the initial level
      int base_level;
-      if (dec->segment_hdr_.use_segment_) {
-        base_level = dec->segment_hdr_.filter_strength_[s];
-        if (!dec->segment_hdr_.absolute_delta_) {
-          base_level += hdr->level_;
+      if (dec->segment_hdr.use_segment) {
+        base_level = dec->segment_hdr.filter_strength[s];
+        if (!dec->segment_hdr.absolute_delta) {
+          base_level += hdr->level;
        }
      } else {
-        base_level = hdr->level_;
+        base_level = hdr->level;
      }
      for (i4x4 = 0; i4x4 <= 1; ++i4x4) {
-        VP8FInfo* const info = &dec->fstrengths_[s][i4x4];
+        VP8FInfo* const info = &dec->fstrengths[s][i4x4];
        int level = base_level;
-        if (hdr->use_lf_delta_) {
-          level += hdr->ref_lf_delta_[0];
+        if (hdr->use_lf_delta) {
+          level += hdr->ref_lf_delta[0];
          if (i4x4) {
-            level += hdr->mode_lf_delta_[0];
+            level += hdr->mode_lf_delta[0];
          }
        }
        level = (level < 0) ? 0 : (level > 63) ? 63 : level;
        if (level > 0) {
          int ilevel = level;
-          if (hdr->sharpness_ > 0) {
-            if (hdr->sharpness_ > 4) {
+          if (hdr->sharpness > 0) {
+            if (hdr->sharpness > 4) {
              ilevel >>= 2;
            } else {
              ilevel >>= 1;
            }
-            if (ilevel > 9 - hdr->sharpness_) {
-              ilevel = 9 - hdr->sharpness_;
+            if (ilevel > 9 - hdr->sharpness) {
+              ilevel = 9 - hdr->sharpness;
            }
          }
          if (ilevel < 1) ilevel = 1;
-          info->f_ilevel_ = ilevel;
-          info->f_limit_ = 2 * level + ilevel;
-          info->hev_thresh_ = (level >= 40) ? 2 : (level >= 15) ? 1 : 0;
+          info->f_ilevel = ilevel;
+          info->f_limit = 2 * level + ilevel;
+          info->hev_thresh = (level >= 40) ? 2 : (level >= 15) ? 1 : 0;
        } else {
-          info->f_limit_ = 0;  // no filtering
+          info->f_limit = 0;  // no filtering
        }
-        info->f_inner_ = i4x4;
+        info->f_inner = i4x4;
      }
    }
  }
@@ -321,7 +332,7 @@ static void PrecomputeFilterStrengths(VP8Decoder* const dec) {

 #define DITHER_AMP_TAB_SIZE 12
 static const uint8_t kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
-  // roughly, it's dqm->uv_mat_[1]
+  // roughly, it's dqm->uv_mat[1]
  8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1
 };

@@ -336,24 +347,24 @@ void VP8InitDithering(const WebPDecoderOptions* const options,
      int s;
      int all_amp = 0;
      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
-        VP8QuantMatrix* const dqm = &dec->dqm_[s];
-        if (dqm->uv_quant_ < DITHER_AMP_TAB_SIZE) {
-          const int idx = (dqm->uv_quant_ < 0) ? 0 : dqm->uv_quant_;
-          dqm->dither_ = (f * kQuantToDitherAmp[idx]) >> 3;
+        VP8QuantMatrix* const dqm = &dec->dqm[s];
+        if (dqm->uv_quant < DITHER_AMP_TAB_SIZE) {
+          const int idx = (dqm->uv_quant < 0) ? 0 : dqm->uv_quant;
+          dqm->dither = (f * kQuantToDitherAmp[idx]) >> 3;
        }
-        all_amp |= dqm->dither_;
+        all_amp |= dqm->dither;
      }
      if (all_amp != 0) {
-        VP8InitRandom(&dec->dithering_rg_, 1.0f);
-        dec->dither_ = 1;
+        VP8InitRandom(&dec->dithering_rg, 1.0f);
+        dec->dither = 1;
      }
    }
    // potentially allow alpha dithering
-    dec->alpha_dithering_ = options->alpha_dithering_strength;
-    if (dec->alpha_dithering_ > 100) {
-      dec->alpha_dithering_ = 100;
-    } else if (dec->alpha_dithering_ < 0) {
-      dec->alpha_dithering_ = 0;
+    dec->alpha_dithering = options->alpha_dithering_strength;
+    if (dec->alpha_dithering > 100) {
+      dec->alpha_dithering = 100;
+    } else if (dec->alpha_dithering < 0) {
+      dec->alpha_dithering = 0;
    }
  }
 }
@@ -370,17 +381,17 @@ static void Dither8x8(VP8Random* const rg, uint8_t* dst, int bps, int amp) {

 static void DitherRow(VP8Decoder* const dec) {
  int mb_x;
-  assert(dec->dither_);
-  for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) {
-    const VP8ThreadContext* const ctx = &dec->thread_ctx_;
-    const VP8MBData* const data = ctx->mb_data_ + mb_x;
-    const int cache_id = ctx->id_;
-    const int uv_bps = dec->cache_uv_stride_;
-    if (data->dither_ >= MIN_DITHER_AMP) {
-      uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8;
-      uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8;
-      Dither8x8(&dec->dithering_rg_, u_dst, uv_bps, data->dither_);
-      Dither8x8(&dec->dithering_rg_, v_dst, uv_bps, data->dither_);
+  assert(dec->dither);
+  for (mb_x = dec->tl_mb_x; mb_x < dec->br_mb_x; ++mb_x) {
+    const VP8ThreadContext* const ctx = &dec->thread_ctx;
+    const VP8MBData* const data = ctx->mb_data + mb_x;
+    const int cache_id = ctx->id;
+    const int uv_bps = dec->cache_uv_stride;
+    if (data->dither >= MIN_DITHER_AMP) {
+      uint8_t* const u_dst = dec->cache_u + cache_id * 8 * uv_bps + mb_x * 8;
+      uint8_t* const v_dst = dec->cache_v + cache_id * 8 * uv_bps + mb_x * 8;
+      Dither8x8(&dec->dithering_rg, u_dst, uv_bps, data->dither);
+      Dither8x8(&dec->dithering_rg, v_dst, uv_bps, data->dither);
    }
  }
 }
@@ -403,29 +414,29 @@ static int FinishRow(void* arg1, void* arg2) {
  VP8Decoder* const dec = (VP8Decoder*)arg1;
  VP8Io* const io = (VP8Io*)arg2;
  int ok = 1;
-  const VP8ThreadContext* const ctx = &dec->thread_ctx_;
-  const int cache_id = ctx->id_;
-  const int extra_y_rows = kFilterExtraRows[dec->filter_type_];
-  const int ysize = extra_y_rows * dec->cache_y_stride_;
-  const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_;
-  const int y_offset = cache_id * 16 * dec->cache_y_stride_;
-  const int uv_offset = cache_id * 8 * dec->cache_uv_stride_;
-  uint8_t* const ydst = dec->cache_y_ - ysize + y_offset;
-  uint8_t* const udst = dec->cache_u_ - uvsize + uv_offset;
-  uint8_t* const vdst = dec->cache_v_ - uvsize + uv_offset;
-  const int mb_y = ctx->mb_y_;
+  const VP8ThreadContext* const ctx = &dec->thread_ctx;
+  const int cache_id = ctx->id;
+  const int extra_y_rows = kFilterExtraRows[dec->filter_type];
+  const int ysize = extra_y_rows * dec->cache_y_stride;
+  const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride;
+  const int y_offset = cache_id * 16 * dec->cache_y_stride;
+  const int uv_offset = cache_id * 8 * dec->cache_uv_stride;
+  uint8_t* const ydst = dec->cache_y - ysize + y_offset;
+  uint8_t* const udst = dec->cache_u - uvsize + uv_offset;
+  uint8_t* const vdst = dec->cache_v - uvsize + uv_offset;
+  const int mb_y = ctx->mb_y;
  const int is_first_row = (mb_y == 0);
-  const int is_last_row = (mb_y >= dec->br_mb_y_ - 1);
+  const int is_last_row = (mb_y >= dec->br_mb_y - 1);

-  if (dec->mt_method_ == 2) {
+  if (dec->mt_method == 2) {
    ReconstructRow(dec, ctx);
  }

-  if (ctx->filter_row_) {
+  if (ctx->filter_row) {
    FilterRow(dec);
  }

-  if (dec->dither_) {
+  if (dec->dither) {
    DitherRow(dec);
  }

@@ -438,9 +449,9 @@ static int FinishRow(void* arg1, void* arg2) {
      io->u = udst;
      io->v = vdst;
    } else {
-      io->y = dec->cache_y_ + y_offset;
-      io->u = dec->cache_u_ + uv_offset;
-      io->v = dec->cache_v_ + uv_offset;
+      io->y = dec->cache_y + y_offset;
+      io->u = dec->cache_u + uv_offset;
+      io->v = dec->cache_v + uv_offset;
    }

    if (!is_last_row) {
@@ -449,9 +460,9 @@ static int FinishRow(void* arg1, void* arg2) {
    if (y_end > io->crop_bottom) {
      y_end = io->crop_bottom;    // make sure we don't overflow on last row.
    }
-    // If dec->alpha_data_ is not NULL, we have some alpha plane present.
+    // If dec->alpha_data is not NULL, we have some alpha plane present.
    io->a = NULL;
-    if (dec->alpha_data_ != NULL && y_start < y_end) {
+    if (dec->alpha_data != NULL && y_start < y_end) {
      io->a = VP8DecompressAlphaRows(dec, io, y_start, y_end - y_start);
      if (io->a == NULL) {
        return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
@@ -462,9 +473,9 @@ static int FinishRow(void* arg1, void* arg2) {
      const int delta_y = io->crop_top - y_start;
      y_start = io->crop_top;
      assert(!(delta_y & 1));
-      io->y += dec->cache_y_stride_ * delta_y;
-      io->u += dec->cache_uv_stride_ * (delta_y >> 1);
-      io->v += dec->cache_uv_stride_ * (delta_y >> 1);
+      io->y += dec->cache_y_stride * delta_y;
+      io->u += dec->cache_uv_stride * (delta_y >> 1);
+      io->v += dec->cache_uv_stride * (delta_y >> 1);
      if (io->a != NULL) {
        io->a += io->width * delta_y;
      }
@@ -483,11 +494,11 @@ static int FinishRow(void* arg1, void* arg2) {
    }
  }
  // rotate top samples if needed
-  if (cache_id + 1 == dec->num_caches_) {
+  if (cache_id + 1 == dec->num_caches) {
    if (!is_last_row) {
-      memcpy(dec->cache_y_ - ysize, ydst + 16 * dec->cache_y_stride_, ysize);
-      memcpy(dec->cache_u_ - uvsize, udst + 8 * dec->cache_uv_stride_, uvsize);
-      memcpy(dec->cache_v_ - uvsize, vdst + 8 * dec->cache_uv_stride_, uvsize);
+      memcpy(dec->cache_y - ysize, ydst + 16 * dec->cache_y_stride, ysize);
+      memcpy(dec->cache_u - uvsize, udst + 8 * dec->cache_uv_stride, uvsize);
+      memcpy(dec->cache_v - uvsize, vdst + 8 * dec->cache_uv_stride, uvsize);
    }
  }

@@ -500,43 +511,43 @@ static int FinishRow(void* arg1, void* arg2) {

 int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {
  int ok = 1;
-  VP8ThreadContext* const ctx = &dec->thread_ctx_;
+  VP8ThreadContext* const ctx = &dec->thread_ctx;
  const int filter_row =
-      (dec->filter_type_ > 0) &&
-      (dec->mb_y_ >= dec->tl_mb_y_) && (dec->mb_y_ <= dec->br_mb_y_);
-  if (dec->mt_method_ == 0) {
-    // ctx->id_ and ctx->f_info_ are already set
-    ctx->mb_y_ = dec->mb_y_;
-    ctx->filter_row_ = filter_row;
+      (dec->filter_type > 0) &&
+      (dec->mb_y >= dec->tl_mb_y) && (dec->mb_y <= dec->br_mb_y);
+  if (dec->mt_method == 0) {
+    // ctx->id and ctx->f_info are already set
+    ctx->mb_y = dec->mb_y;
+    ctx->filter_row = filter_row;
    ReconstructRow(dec, ctx);
    ok = FinishRow(dec, io);
  } else {
-    WebPWorker* const worker = &dec->worker_;
+    WebPWorker* const worker = &dec->worker;
    // Finish previous job *before* updating context
    ok &= WebPGetWorkerInterface()->Sync(worker);
-    assert(worker->status_ == OK);
+    assert(worker->status == OK);
    if (ok) {   // spawn a new deblocking/output job
-      ctx->io_ = *io;
-      ctx->id_ = dec->cache_id_;
-      ctx->mb_y_ = dec->mb_y_;
-      ctx->filter_row_ = filter_row;
-      if (dec->mt_method_ == 2) {  // swap macroblock data
-        VP8MBData* const tmp = ctx->mb_data_;
-        ctx->mb_data_ = dec->mb_data_;
-        dec->mb_data_ = tmp;
+      ctx->io = *io;
+      ctx->id = dec->cache_id;
+      ctx->mb_y = dec->mb_y;
+      ctx->filter_row = filter_row;
+      if (dec->mt_method == 2) {  // swap macroblock data
+        VP8MBData* const tmp = ctx->mb_data;
+        ctx->mb_data = dec->mb_data;
+        dec->mb_data = tmp;
      } else {
        // perform reconstruction directly in main thread
        ReconstructRow(dec, ctx);
      }
      if (filter_row) {            // swap filter info
-        VP8FInfo* const tmp = ctx->f_info_;
-        ctx->f_info_ = dec->f_info_;
-        dec->f_info_ = tmp;
+        VP8FInfo* const tmp = ctx->f_info;
+        ctx->f_info = dec->f_info;
+        dec->f_info = tmp;
      }
      // (reconstruct)+filter in parallel
      WebPGetWorkerInterface()->Launch(worker);
-      if (++dec->cache_id_ == dec->num_caches_) {
-        dec->cache_id_ = 0;
+      if (++dec->cache_id == dec->num_caches) {
+        dec->cache_id = 0;
      }
    }
  }
@@ -551,12 +562,12 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
  // Note: Afterward, we must call teardown() no matter what.
  if (io->setup != NULL && !io->setup(io)) {
    VP8SetError(dec, VP8_STATUS_USER_ABORT, "Frame setup failed");
-    return dec->status_;
+    return dec->status;
  }

  // Disable filtering per user request
  if (io->bypass_filtering) {
-    dec->filter_type_ = 0;
+    dec->filter_type = 0;
  }

  // Define the area where we can skip in-loop filtering, in case of cropping.
@@ -569,29 +580,29 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
  // top-left corner of the picture (MB #0). We must filter all the previous
  // macroblocks.
  {
-    const int extra_pixels = kFilterExtraRows[dec->filter_type_];
-    if (dec->filter_type_ == 2) {
+    const int extra_pixels = kFilterExtraRows[dec->filter_type];
+    if (dec->filter_type == 2) {
      // For complex filter, we need to preserve the dependency chain.
-      dec->tl_mb_x_ = 0;
-      dec->tl_mb_y_ = 0;
+      dec->tl_mb_x = 0;
+      dec->tl_mb_y = 0;
    } else {
      // For simple filter, we can filter only the cropped region.
      // We include 'extra_pixels' on the other side of the boundary, since
      // vertical or horizontal filtering of the previous macroblock can
      // modify some abutting pixels.
-      dec->tl_mb_x_ = (io->crop_left - extra_pixels) >> 4;
-      dec->tl_mb_y_ = (io->crop_top - extra_pixels) >> 4;
-      if (dec->tl_mb_x_ < 0) dec->tl_mb_x_ = 0;
-      if (dec->tl_mb_y_ < 0) dec->tl_mb_y_ = 0;
+      dec->tl_mb_x = (io->crop_left - extra_pixels) >> 4;
+      dec->tl_mb_y = (io->crop_top - extra_pixels) >> 4;
+      if (dec->tl_mb_x < 0) dec->tl_mb_x = 0;
+      if (dec->tl_mb_y < 0) dec->tl_mb_y = 0;
    }
    // We need some 'extra' pixels on the right/bottom.
-    dec->br_mb_y_ = (io->crop_bottom + 15 + extra_pixels) >> 4;
-    dec->br_mb_x_ = (io->crop_right + 15 + extra_pixels) >> 4;
-    if (dec->br_mb_x_ > dec->mb_w_) {
-      dec->br_mb_x_ = dec->mb_w_;
+    dec->br_mb_y = (io->crop_bottom + 15 + extra_pixels) >> 4;
+    dec->br_mb_x = (io->crop_right + 15 + extra_pixels) >> 4;
+    if (dec->br_mb_x > dec->mb_w) {
+      dec->br_mb_x = dec->mb_w;
    }
-    if (dec->br_mb_y_ > dec->mb_h_) {
-      dec->br_mb_y_ = dec->mb_h_;
+    if (dec->br_mb_y > dec->mb_h) {
+      dec->br_mb_y = dec->mb_h;
    }
  }
  PrecomputeFilterStrengths(dec);
@@ -600,8 +611,8 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {

 int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {
  int ok = 1;
-  if (dec->mt_method_ > 0) {
-    ok = WebPGetWorkerInterface()->Sync(&dec->worker_);
+  if (dec->mt_method > 0) {
+    ok = WebPGetWorkerInterface()->Sync(&dec->worker);
  }

  if (io->teardown != NULL) {
@@ -639,20 +650,20 @@ int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {

 // Initialize multi/single-thread worker
 static int InitThreadContext(VP8Decoder* const dec) {
-  dec->cache_id_ = 0;
-  if (dec->mt_method_ > 0) {
-    WebPWorker* const worker = &dec->worker_;
+  dec->cache_id = 0;
+  if (dec->mt_method > 0) {
+    WebPWorker* const worker = &dec->worker;
    if (!WebPGetWorkerInterface()->Reset(worker)) {
      return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
                         "thread initialization failed.");
    }
    worker->data1 = dec;
-    worker->data2 = (void*)&dec->thread_ctx_.io_;
+    worker->data2 = (void*)&dec->thread_ctx.io;
    worker->hook = FinishRow;
-    dec->num_caches_ =
-      (dec->filter_type_ > 0) ? MT_CACHE_LINES : MT_CACHE_LINES - 1;
+    dec->num_caches =
+        (dec->filter_type > 0) ? MT_CACHE_LINES : MT_CACHE_LINES - 1;
  } else {
-    dec->num_caches_ = ST_CACHE_LINES;
+    dec->num_caches = ST_CACHE_LINES;
  }
  return 1;
 }
@@ -680,25 +691,25 @@ int VP8GetThreadMethod(const WebPDecoderOptions* const options,
 // Memory setup

 static int AllocateMemory(VP8Decoder* const dec) {
-  const int num_caches = dec->num_caches_;
-  const int mb_w = dec->mb_w_;
+  const int num_caches = dec->num_caches;
+  const int mb_w = dec->mb_w;
  // Note: we use 'size_t' when there's no overflow risk, uint64_t otherwise.
  const size_t intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t);
  const size_t top_size = sizeof(VP8TopSamples) * mb_w;
  const size_t mb_info_size = (mb_w + 1) * sizeof(VP8MB);
  const size_t f_info_size =
-      (dec->filter_type_ > 0) ?
-          mb_w * (dec->mt_method_ > 0 ? 2 : 1) * sizeof(VP8FInfo)
+      (dec->filter_type > 0) ?
+          mb_w * (dec->mt_method > 0 ? 2 : 1) * sizeof(VP8FInfo)
        : 0;
-  const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);
+  const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b);
  const size_t mb_data_size =
-      (dec->mt_method_ == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data_);
+      (dec->mt_method == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data);
  const size_t cache_height = (16 * num_caches
-                            + kFilterExtraRows[dec->filter_type_]) * 3 / 2;
+                            + kFilterExtraRows[dec->filter_type]) * 3 / 2;
  const size_t cache_size = top_size * cache_height;
  // alpha_size is the only one that scales as width x height.
-  const uint64_t alpha_size = (dec->alpha_data_ != NULL) ?
-      (uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL;
+  const uint64_t alpha_size = (dec->alpha_data != NULL) ?
+      (uint64_t)dec->pic_hdr.width * dec->pic_hdr.height : 0ULL;
  const uint64_t needed = (uint64_t)intra_pred_mode_size
                        + top_size + mb_info_size + f_info_size
                        + yuv_size + mb_data_size
@@ -706,77 +717,77 @@ static int AllocateMemory(VP8Decoder* const dec) {
  uint8_t* mem;

  if (!CheckSizeOverflow(needed)) return 0;  // check for overflow
-  if (needed > dec->mem_size_) {
-    WebPSafeFree(dec->mem_);
-    dec->mem_size_ = 0;
-    dec->mem_ = WebPSafeMalloc(needed, sizeof(uint8_t));
-    if (dec->mem_ == NULL) {
+  if (needed > dec->mem_size) {
+    WebPSafeFree(dec->mem);
+    dec->mem_size = 0;
+    dec->mem = WebPSafeMalloc(needed, sizeof(uint8_t));
+    if (dec->mem == NULL) {
      return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
                         "no memory during frame initialization.");
    }
    // down-cast is ok, thanks to WebPSafeMalloc() above.
-    dec->mem_size_ = (size_t)needed;
+    dec->mem_size = (size_t)needed;
  }

-  mem = (uint8_t*)dec->mem_;
-  dec->intra_t_ = mem;
+  mem = (uint8_t*)dec->mem;
+  dec->intra_t = mem;
  mem += intra_pred_mode_size;

-  dec->yuv_t_ = (VP8TopSamples*)mem;
+  dec->yuv_t = (VP8TopSamples*)mem;
  mem += top_size;

-  dec->mb_info_ = ((VP8MB*)mem) + 1;
+  dec->mb_info = ((VP8MB*)mem) + 1;
  mem += mb_info_size;

-  dec->f_info_ = f_info_size ? (VP8FInfo*)mem : NULL;
+  dec->f_info = f_info_size ? (VP8FInfo*)mem : NULL;
  mem += f_info_size;
-  dec->thread_ctx_.id_ = 0;
-  dec->thread_ctx_.f_info_ = dec->f_info_;
-  if (dec->filter_type_ > 0 && dec->mt_method_ > 0) {
+  dec->thread_ctx.id = 0;
+  dec->thread_ctx.f_info = dec->f_info;
+  if (dec->filter_type > 0 && dec->mt_method > 0) {
    // secondary cache line. The deblocking process need to make use of the
    // filtering strength from previous macroblock row, while the new ones
    // are being decoded in parallel. We'll just swap the pointers.
-    dec->thread_ctx_.f_info_ += mb_w;
+    dec->thread_ctx.f_info += mb_w;
  }

  mem = (uint8_t*)WEBP_ALIGN(mem);
  assert((yuv_size & WEBP_ALIGN_CST) == 0);
-  dec->yuv_b_ = mem;
+  dec->yuv_b = mem;
  mem += yuv_size;

-  dec->mb_data_ = (VP8MBData*)mem;
-  dec->thread_ctx_.mb_data_ = (VP8MBData*)mem;
-  if (dec->mt_method_ == 2) {
-    dec->thread_ctx_.mb_data_ += mb_w;
+  dec->mb_data = (VP8MBData*)mem;
+  dec->thread_ctx.mb_data = (VP8MBData*)mem;
+  if (dec->mt_method == 2) {
+    dec->thread_ctx.mb_data += mb_w;
  }
  mem += mb_data_size;

-  dec->cache_y_stride_ = 16 * mb_w;
-  dec->cache_uv_stride_ = 8 * mb_w;
+  dec->cache_y_stride = 16 * mb_w;
+  dec->cache_uv_stride = 8 * mb_w;
  {
-    const int extra_rows = kFilterExtraRows[dec->filter_type_];
-    const int extra_y = extra_rows * dec->cache_y_stride_;
-    const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_;
-    dec->cache_y_ = mem + extra_y;
-    dec->cache_u_ = dec->cache_y_
-                  + 16 * num_caches * dec->cache_y_stride_ + extra_uv;
-    dec->cache_v_ = dec->cache_u_
-                  + 8 * num_caches * dec->cache_uv_stride_ + extra_uv;
-    dec->cache_id_ = 0;
+    const int extra_rows = kFilterExtraRows[dec->filter_type];
+    const int extra_y = extra_rows * dec->cache_y_stride;
+    const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride;
+    dec->cache_y = mem + extra_y;
+    dec->cache_u = dec->cache_y
+                  + 16 * num_caches * dec->cache_y_stride + extra_uv;
+    dec->cache_v = dec->cache_u
+                  + 8 * num_caches * dec->cache_uv_stride + extra_uv;
+    dec->cache_id = 0;
  }
  mem += cache_size;

  // alpha plane
-  dec->alpha_plane_ = alpha_size ? mem : NULL;
+  dec->alpha_plane = alpha_size ? mem : NULL;
  mem += alpha_size;
-  assert(mem <= (uint8_t*)dec->mem_ + dec->mem_size_);
+  assert(mem <= (uint8_t*)dec->mem + dec->mem_size);

  // note: left/top-info is initialized once for all.
-  memset(dec->mb_info_ - 1, 0, mb_info_size);
+  memset(dec->mb_info - 1, 0, mb_info_size);
  VP8InitScanline(dec);   // initialize left too.

  // initialize top
-  memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size);
+  memset(dec->intra_t, B_DC_PRED, intra_pred_mode_size);

  return 1;
 }
@@ -784,16 +795,16 @@ static int AllocateMemory(VP8Decoder* const dec) {
 static void InitIo(VP8Decoder* const dec, VP8Io* io) {
  // prepare 'io'
  io->mb_y = 0;
-  io->y = dec->cache_y_;
-  io->u = dec->cache_u_;
-  io->v = dec->cache_v_;
-  io->y_stride = dec->cache_y_stride_;
-  io->uv_stride = dec->cache_uv_stride_;
+  io->y = dec->cache_y;
+  io->u = dec->cache_u;
+  io->v = dec->cache_v;
+  io->y_stride = dec->cache_y_stride;
+  io->uv_stride = dec->cache_uv_stride;
  io->a = NULL;
 }

 int VP8InitFrame(VP8Decoder* const dec, VP8Io* const io) {
-  if (!InitThreadContext(dec)) return 0;  // call first. Sets dec->num_caches_.
+  if (!InitThreadContext(dec)) return 0;  // call first. Sets dec->num_caches.
  if (!AllocateMemory(dec)) return 0;
  InitIo(dec, io);
  VP8DspInit();  // Init critical function pointers and look-up tables.
--- a/thirdparty/libwebp/src/dec/idec_dec.c
+++ b/thirdparty/libwebp/src/dec/idec_dec.c
@@ -12,15 +12,20 @@
 // Author: somnath@google.com (Somnath Banerjee)

 #include <assert.h>
-#include <string.h>
 #include <stdlib.h>
+#include <string.h>

 #include "src/dec/alphai_dec.h"
-#include "src/dec/webpi_dec.h"
 #include "src/dec/vp8_dec.h"
 #include "src/dec/vp8i_dec.h"
+#include "src/dec/vp8li_dec.h"
+#include "src/dec/webpi_dec.h"
+#include "src/utils/bit_reader_utils.h"
+#include "src/utils/thread_utils.h"
 #include "src/utils/utils.h"
 #include "src/webp/decode.h"
+#include "src/webp/format_constants.h"
+#include "src/webp/types.h"

 // In append mode, buffer allocations increase as multiples of this value.
 // Needs to be a power of 2.
@@ -54,134 +59,140 @@ typedef enum {

 // storage for partition #0 and partial data (in a rolling fashion)
 typedef struct {
-  MemBufferMode mode_;  // Operation mode
-  size_t start_;        // start location of the data to be decoded
-  size_t end_;          // end location
-  size_t buf_size_;     // size of the allocated buffer
-  uint8_t* buf_;        // We don't own this buffer in case WebPIUpdate()
+  MemBufferMode mode;  // Operation mode
+  size_t start;        // start location of the data to be decoded
+  size_t end;          // end location
+  size_t buf_size;     // size of the allocated buffer
+  uint8_t* buf;        // We don't own this buffer in case WebPIUpdate()

-  size_t part0_size_;         // size of partition #0
-  const uint8_t* part0_buf_;  // buffer to store partition #0
+  size_t part0_size;         // size of partition #0
+  const uint8_t* part0_buf;  // buffer to store partition #0
 } MemBuffer;

 struct WebPIDecoder {
-  DecState state_;         // current decoding state
-  WebPDecParams params_;   // Params to store output info
-  int is_lossless_;        // for down-casting 'dec_'.
-  void* dec_;              // either a VP8Decoder or a VP8LDecoder instance
-  VP8Io io_;
+  DecState state;         // current decoding state
+  WebPDecParams params;   // Params to store output info
+  int is_lossless;        // for down-casting 'dec'.
+  void* dec;              // either a VP8Decoder or a VP8LDecoder instance
+  VP8Io io;

-  MemBuffer mem_;          // input memory buffer.
-  WebPDecBuffer output_;   // output buffer (when no external one is supplied,
-                           // or if the external one has slow-memory)
-  WebPDecBuffer* final_output_;  // Slow-memory output to copy to eventually.
-  size_t chunk_size_;      // Compressed VP8/VP8L size extracted from Header.
+  MemBuffer mem;          // input memory buffer.
+  WebPDecBuffer output;   // output buffer (when no external one is supplied,
+                          // or if the external one has slow-memory)
+  WebPDecBuffer* final_output;  // Slow-memory output to copy to eventually.
+  size_t chunk_size;      // Compressed VP8/VP8L size extracted from Header.

-  int last_mb_y_;          // last row reached for intra-mode decoding
+  int last_mb_y;          // last row reached for intra-mode decoding
 };

 // MB context to restore in case VP8DecodeMB() fails
 typedef struct {
-  VP8MB left_;
-  VP8MB info_;
-  VP8BitReader token_br_;
+  VP8MB left;
+  VP8MB info;
+  VP8BitReader token_br;
 } MBContext;

 //------------------------------------------------------------------------------
 // MemBuffer: incoming data handling

 static WEBP_INLINE size_t MemDataSize(const MemBuffer* mem) {
-  return (mem->end_ - mem->start_);
+  return (mem->end - mem->start);
 }

 // Check if we need to preserve the compressed alpha data, as it may not have
 // been decoded yet.
 static int NeedCompressedAlpha(const WebPIDecoder* const idec) {
-  if (idec->state_ == STATE_WEBP_HEADER) {
+  if (idec->state == STATE_WEBP_HEADER) {
    // We haven't parsed the headers yet, so we don't know whether the image is
    // lossy or lossless. This also means that we haven't parsed the ALPH chunk.
    return 0;
  }
-  if (idec->is_lossless_) {
+  if (idec->is_lossless) {
    return 0;  // ALPH chunk is not present for lossless images.
  } else {
-    const VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
-    assert(dec != NULL);  // Must be true as idec->state_ != STATE_WEBP_HEADER.
-    return (dec->alpha_data_ != NULL) && !dec->is_alpha_decoded_;
+    const VP8Decoder* const dec = (VP8Decoder*)idec->dec;
+    assert(dec != NULL);  // Must be true as idec->state != STATE_WEBP_HEADER.
+    return (dec->alpha_data != NULL) && !dec->is_alpha_decoded;
  }
 }

 static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
-  MemBuffer* const mem = &idec->mem_;
-  const uint8_t* const new_base = mem->buf_ + mem->start_;
-  // note: for VP8, setting up idec->io_ is only really needed at the beginning
+  MemBuffer* const mem = &idec->mem;
+  const uint8_t* const new_base = mem->buf + mem->start;
+  // note: for VP8, setting up idec->io is only really needed at the beginning
  // of the decoding, till partition #0 is complete.
-  idec->io_.data = new_base;
-  idec->io_.data_size = MemDataSize(mem);
+  idec->io.data = new_base;
+  idec->io.data_size = MemDataSize(mem);

-  if (idec->dec_ != NULL) {
-    if (!idec->is_lossless_) {
-      VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
-      const uint32_t last_part = dec->num_parts_minus_one_;
+  if (idec->dec != NULL) {
+    if (!idec->is_lossless) {
+      VP8Decoder* const dec = (VP8Decoder*)idec->dec;
+      const uint32_t last_part = dec->num_parts_minus_one;
      if (offset != 0) {
        uint32_t p;
        for (p = 0; p <= last_part; ++p) {
-          VP8RemapBitReader(dec->parts_ + p, offset);
+          VP8RemapBitReader(dec->parts + p, offset);
        }
        // Remap partition #0 data pointer to new offset, but only in MAP
        // mode (in APPEND mode, partition #0 is copied into a fixed memory).
-        if (mem->mode_ == MEM_MODE_MAP) {
-          VP8RemapBitReader(&dec->br_, offset);
+        if (mem->mode == MEM_MODE_MAP) {
+          VP8RemapBitReader(&dec->br, offset);
        }
      }
      {
-        const uint8_t* const last_start = dec->parts_[last_part].buf_;
-        VP8BitReaderSetBuffer(&dec->parts_[last_part], last_start,
-                              mem->buf_ + mem->end_ - last_start);
+        const uint8_t* const last_start = dec->parts[last_part].buf;
+        // 'last_start' will be NULL when 'idec->state' is < STATE_VP8_PARTS0
+        // and through a portion of that state (when there isn't enough data to
+        // parse the partitions). The bitreader is only used meaningfully when
+        // there is enough data to begin parsing partition 0.
+        if (last_start != NULL) {
+          VP8BitReaderSetBuffer(&dec->parts[last_part], last_start,
+                                mem->buf + mem->end - last_start);
+        }
      }
      if (NeedCompressedAlpha(idec)) {
-        ALPHDecoder* const alph_dec = dec->alph_dec_;
-        dec->alpha_data_ += offset;
-        if (alph_dec != NULL && alph_dec->vp8l_dec_ != NULL) {
-          if (alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION) {
-            VP8LDecoder* const alph_vp8l_dec = alph_dec->vp8l_dec_;
-            assert(dec->alpha_data_size_ >= ALPHA_HEADER_LEN);
-            VP8LBitReaderSetBuffer(&alph_vp8l_dec->br_,
-                                   dec->alpha_data_ + ALPHA_HEADER_LEN,
-                                   dec->alpha_data_size_ - ALPHA_HEADER_LEN);
-          } else {  // alph_dec->method_ == ALPHA_NO_COMPRESSION
+        ALPHDecoder* const alph_dec = dec->alph_dec;
+        dec->alpha_data += offset;
+        if (alph_dec != NULL && alph_dec->vp8l_dec != NULL) {
+          if (alph_dec->method == ALPHA_LOSSLESS_COMPRESSION) {
+            VP8LDecoder* const alph_vp8l_dec = alph_dec->vp8l_dec;
+            assert(dec->alpha_data_size >= ALPHA_HEADER_LEN);
+            VP8LBitReaderSetBuffer(&alph_vp8l_dec->br,
+                                   dec->alpha_data + ALPHA_HEADER_LEN,
+                                   dec->alpha_data_size - ALPHA_HEADER_LEN);
+          } else {  // alph_dec->method == ALPHA_NO_COMPRESSION
            // Nothing special to do in this case.
          }
        }
      }
    } else {    // Resize lossless bitreader
-      VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
-      VP8LBitReaderSetBuffer(&dec->br_, new_base, MemDataSize(mem));
+      VP8LDecoder* const dec = (VP8LDecoder*)idec->dec;
+      VP8LBitReaderSetBuffer(&dec->br, new_base, MemDataSize(mem));
    }
  }
 }

-// Appends data to the end of MemBuffer->buf_. It expands the allocated memory
+// Appends data to the end of MemBuffer->buf. It expands the allocated memory
 // size if required and also updates VP8BitReader's if new memory is allocated.
 WEBP_NODISCARD static int AppendToMemBuffer(WebPIDecoder* const idec,
                                            const uint8_t* const data,
                                            size_t data_size) {
-  VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
-  MemBuffer* const mem = &idec->mem_;
+  VP8Decoder* const dec = (VP8Decoder*)idec->dec;
+  MemBuffer* const mem = &idec->mem;
  const int need_compressed_alpha = NeedCompressedAlpha(idec);
  const uint8_t* const old_start =
-      (mem->buf_ == NULL) ? NULL : mem->buf_ + mem->start_;
+      (mem->buf == NULL) ? NULL : mem->buf + mem->start;
  const uint8_t* const old_base =
-      need_compressed_alpha ? dec->alpha_data_ : old_start;
-  assert(mem->buf_ != NULL || mem->start_ == 0);
-  assert(mem->mode_ == MEM_MODE_APPEND);
+      need_compressed_alpha ? dec->alpha_data : old_start;
+  assert(mem->buf != NULL || mem->start == 0);
+  assert(mem->mode == MEM_MODE_APPEND);
  if (data_size > MAX_CHUNK_PAYLOAD) {
    // security safeguard: trying to allocate more than what the format
    // allows for a chunk should be considered a smoke smell.
    return 0;
  }

-  if (mem->end_ + data_size > mem->buf_size_) {  // Need some free memory
+  if (mem->end + data_size > mem->buf_size) {  // Need some free memory
    const size_t new_mem_start = old_start - old_base;
    const size_t current_size = MemDataSize(mem) + new_mem_start;
    const uint64_t new_size = (uint64_t)current_size + data_size;
@@ -190,85 +201,85 @@ WEBP_NODISCARD static int AppendToMemBuffer(WebPIDecoder* const idec,
        (uint8_t*)WebPSafeMalloc(extra_size, sizeof(*new_buf));
    if (new_buf == NULL) return 0;
    if (old_base != NULL) memcpy(new_buf, old_base, current_size);
-    WebPSafeFree(mem->buf_);
-    mem->buf_ = new_buf;
-    mem->buf_size_ = (size_t)extra_size;
-    mem->start_ = new_mem_start;
-    mem->end_ = current_size;
+    WebPSafeFree(mem->buf);
+    mem->buf = new_buf;
+    mem->buf_size = (size_t)extra_size;
+    mem->start = new_mem_start;
+    mem->end = current_size;
  }

-  assert(mem->buf_ != NULL);
-  memcpy(mem->buf_ + mem->end_, data, data_size);
-  mem->end_ += data_size;
-  assert(mem->end_ <= mem->buf_size_);
+  assert(mem->buf != NULL);
+  memcpy(mem->buf + mem->end, data, data_size);
+  mem->end += data_size;
+  assert(mem->end <= mem->buf_size);

-  DoRemap(idec, mem->buf_ + mem->start_ - old_start);
+  DoRemap(idec, mem->buf + mem->start - old_start);
  return 1;
 }

 WEBP_NODISCARD static int RemapMemBuffer(WebPIDecoder* const idec,
                                         const uint8_t* const data,
                                         size_t data_size) {
-  MemBuffer* const mem = &idec->mem_;
-  const uint8_t* const old_buf = mem->buf_;
+  MemBuffer* const mem = &idec->mem;
+  const uint8_t* const old_buf = mem->buf;
  const uint8_t* const old_start =
-      (old_buf == NULL) ? NULL : old_buf + mem->start_;
-  assert(old_buf != NULL || mem->start_ == 0);
-  assert(mem->mode_ == MEM_MODE_MAP);
+      (old_buf == NULL) ? NULL : old_buf + mem->start;
+  assert(old_buf != NULL || mem->start == 0);
+  assert(mem->mode == MEM_MODE_MAP);

-  if (data_size < mem->buf_size_) return 0;  // can't remap to a shorter buffer!
+  if (data_size < mem->buf_size) return 0;  // can't remap to a shorter buffer!

-  mem->buf_ = (uint8_t*)data;
-  mem->end_ = mem->buf_size_ = data_size;
+  mem->buf = (uint8_t*)data;
+  mem->end = mem->buf_size = data_size;

-  DoRemap(idec, mem->buf_ + mem->start_ - old_start);
+  DoRemap(idec, mem->buf + mem->start - old_start);
  return 1;
 }

 static void InitMemBuffer(MemBuffer* const mem) {
-  mem->mode_       = MEM_MODE_NONE;
-  mem->buf_        = NULL;
-  mem->buf_size_   = 0;
-  mem->part0_buf_  = NULL;
-  mem->part0_size_ = 0;
+  mem->mode       = MEM_MODE_NONE;
+  mem->buf        = NULL;
+  mem->buf_size   = 0;
+  mem->part0_buf  = NULL;
+  mem->part0_size = 0;
 }

 static void ClearMemBuffer(MemBuffer* const mem) {
  assert(mem);
-  if (mem->mode_ == MEM_MODE_APPEND) {
-    WebPSafeFree(mem->buf_);
-    WebPSafeFree((void*)mem->part0_buf_);
+  if (mem->mode == MEM_MODE_APPEND) {
+    WebPSafeFree(mem->buf);
+    WebPSafeFree((void*)mem->part0_buf);
  }
 }

 WEBP_NODISCARD static int CheckMemBufferMode(MemBuffer* const mem,
                                             MemBufferMode expected) {
-  if (mem->mode_ == MEM_MODE_NONE) {
-    mem->mode_ = expected;    // switch to the expected mode
-  } else if (mem->mode_ != expected) {
+  if (mem->mode == MEM_MODE_NONE) {
+    mem->mode = expected;    // switch to the expected mode
+  } else if (mem->mode != expected) {
    return 0;         // we mixed the modes => error
  }
-  assert(mem->mode_ == expected);   // mode is ok
+  assert(mem->mode == expected);   // mode is ok
  return 1;
 }

 // To be called last.
 WEBP_NODISCARD static VP8StatusCode FinishDecoding(WebPIDecoder* const idec) {
-  const WebPDecoderOptions* const options = idec->params_.options;
-  WebPDecBuffer* const output = idec->params_.output;
+  const WebPDecoderOptions* const options = idec->params.options;
+  WebPDecBuffer* const output = idec->params.output;

-  idec->state_ = STATE_DONE;
+  idec->state = STATE_DONE;
  if (options != NULL && options->flip) {
    const VP8StatusCode status = WebPFlipBuffer(output);
    if (status != VP8_STATUS_OK) return status;
  }
-  if (idec->final_output_ != NULL) {
+  if (idec->final_output != NULL) {
    const VP8StatusCode status = WebPCopyDecBufferPixels(
-        output, idec->final_output_);  // do the slow-copy
-    WebPFreeDecBuffer(&idec->output_);
+        output, idec->final_output);  // do the slow-copy
+    WebPFreeDecBuffer(&idec->output);
    if (status != VP8_STATUS_OK) return status;
-    *output = *idec->final_output_;
-    idec->final_output_ = NULL;
+    *output = *idec->final_output;
+    idec->final_output = NULL;
  }
  return VP8_STATUS_OK;
 }
@@ -278,43 +289,43 @@ WEBP_NODISCARD static VP8StatusCode FinishDecoding(WebPIDecoder* const idec) {

 static void SaveContext(const VP8Decoder* dec, const VP8BitReader* token_br,
                        MBContext* const context) {
-  context->left_ = dec->mb_info_[-1];
-  context->info_ = dec->mb_info_[dec->mb_x_];
-  context->token_br_ = *token_br;
+  context->left = dec->mb_info[-1];
+  context->info = dec->mb_info[dec->mb_x];
+  context->token_br = *token_br;
 }

 static void RestoreContext(const MBContext* context, VP8Decoder* const dec,
                           VP8BitReader* const token_br) {
-  dec->mb_info_[-1] = context->left_;
-  dec->mb_info_[dec->mb_x_] = context->info_;
-  *token_br = context->token_br_;
+  dec->mb_info[-1] = context->left;
+  dec->mb_info[dec->mb_x] = context->info;
+  *token_br = context->token_br;
 }

 //------------------------------------------------------------------------------

 static VP8StatusCode IDecError(WebPIDecoder* const idec, VP8StatusCode error) {
-  if (idec->state_ == STATE_VP8_DATA) {
+  if (idec->state == STATE_VP8_DATA) {
    // Synchronize the thread, clean-up and check for errors.
-    (void)VP8ExitCritical((VP8Decoder*)idec->dec_, &idec->io_);
+    (void)VP8ExitCritical((VP8Decoder*)idec->dec, &idec->io);
  }
-  idec->state_ = STATE_ERROR;
+  idec->state = STATE_ERROR;
  return error;
 }

 static void ChangeState(WebPIDecoder* const idec, DecState new_state,
                        size_t consumed_bytes) {
-  MemBuffer* const mem = &idec->mem_;
-  idec->state_ = new_state;
-  mem->start_ += consumed_bytes;
-  assert(mem->start_ <= mem->end_);
-  idec->io_.data = mem->buf_ + mem->start_;
-  idec->io_.data_size = MemDataSize(mem);
+  MemBuffer* const mem = &idec->mem;
+  idec->state = new_state;
+  mem->start += consumed_bytes;
+  assert(mem->start <= mem->end);
+  idec->io.data = mem->buf + mem->start;
+  idec->io.data_size = MemDataSize(mem);
 }

 // Headers
 static VP8StatusCode DecodeWebPHeaders(WebPIDecoder* const idec) {
-  MemBuffer* const mem = &idec->mem_;
-  const uint8_t* data = mem->buf_ + mem->start_;
+  MemBuffer* const mem = &idec->mem;
+  const uint8_t* data = mem->buf + mem->start;
  size_t curr_size = MemDataSize(mem);
  VP8StatusCode status;
  WebPHeaderStructure headers;
@@ -329,32 +340,32 @@ static VP8StatusCode DecodeWebPHeaders(WebPIDecoder* const idec) {
    return IDecError(idec, status);
  }

-  idec->chunk_size_ = headers.compressed_size;
-  idec->is_lossless_ = headers.is_lossless;
-  if (!idec->is_lossless_) {
+  idec->chunk_size = headers.compressed_size;
+  idec->is_lossless = headers.is_lossless;
+  if (!idec->is_lossless) {
    VP8Decoder* const dec = VP8New();
    if (dec == NULL) {
      return VP8_STATUS_OUT_OF_MEMORY;
    }
-    dec->incremental_ = 1;
-    idec->dec_ = dec;
-    dec->alpha_data_ = headers.alpha_data;
-    dec->alpha_data_size_ = headers.alpha_data_size;
+    dec->incremental = 1;
+    idec->dec = dec;
+    dec->alpha_data = headers.alpha_data;
+    dec->alpha_data_size = headers.alpha_data_size;
    ChangeState(idec, STATE_VP8_HEADER, headers.offset);
  } else {
    VP8LDecoder* const dec = VP8LNew();
    if (dec == NULL) {
      return VP8_STATUS_OUT_OF_MEMORY;
    }
-    idec->dec_ = dec;
+    idec->dec = dec;
    ChangeState(idec, STATE_VP8L_HEADER, headers.offset);
  }
  return VP8_STATUS_OK;
 }

 static VP8StatusCode DecodeVP8FrameHeader(WebPIDecoder* const idec) {
-  const uint8_t* data = idec->mem_.buf_ + idec->mem_.start_;
-  const size_t curr_size = MemDataSize(&idec->mem_);
+  const uint8_t* data = idec->mem.buf + idec->mem.start;
+  const size_t curr_size = MemDataSize(&idec->mem);
  int width, height;
  uint32_t bits;

@@ -362,61 +373,61 @@ static VP8StatusCode DecodeVP8FrameHeader(WebPIDecoder* const idec) {
    // Not enough data bytes to extract VP8 Frame Header.
    return VP8_STATUS_SUSPENDED;
  }
-  if (!VP8GetInfo(data, curr_size, idec->chunk_size_, &width, &height)) {
+  if (!VP8GetInfo(data, curr_size, idec->chunk_size, &width, &height)) {
    return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
  }

  bits = data[0] | (data[1] << 8) | (data[2] << 16);
-  idec->mem_.part0_size_ = (bits >> 5) + VP8_FRAME_HEADER_SIZE;
+  idec->mem.part0_size = (bits >> 5) + VP8_FRAME_HEADER_SIZE;

-  idec->io_.data = data;
-  idec->io_.data_size = curr_size;
-  idec->state_ = STATE_VP8_PARTS0;
+  idec->io.data = data;
+  idec->io.data_size = curr_size;
+  idec->state = STATE_VP8_PARTS0;
  return VP8_STATUS_OK;
 }

 // Partition #0
 static VP8StatusCode CopyParts0Data(WebPIDecoder* const idec) {
-  VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
-  VP8BitReader* const br = &dec->br_;
-  const size_t part_size = br->buf_end_ - br->buf_;
-  MemBuffer* const mem = &idec->mem_;
-  assert(!idec->is_lossless_);
-  assert(mem->part0_buf_ == NULL);
+  VP8Decoder* const dec = (VP8Decoder*)idec->dec;
+  VP8BitReader* const br = &dec->br;
+  const size_t part_size = br->buf_end - br->buf;
+  MemBuffer* const mem = &idec->mem;
+  assert(!idec->is_lossless);
+  assert(mem->part0_buf == NULL);
  // the following is a format limitation, no need for runtime check:
-  assert(part_size <= mem->part0_size_);
+  assert(part_size <= mem->part0_size);
  if (part_size == 0) {   // can't have zero-size partition #0
    return VP8_STATUS_BITSTREAM_ERROR;
  }
-  if (mem->mode_ == MEM_MODE_APPEND) {
+  if (mem->mode == MEM_MODE_APPEND) {
    // We copy and grab ownership of the partition #0 data.
    uint8_t* const part0_buf = (uint8_t*)WebPSafeMalloc(1ULL, part_size);
    if (part0_buf == NULL) {
      return VP8_STATUS_OUT_OF_MEMORY;
    }
-    memcpy(part0_buf, br->buf_, part_size);
-    mem->part0_buf_ = part0_buf;
+    memcpy(part0_buf, br->buf, part_size);
+    mem->part0_buf = part0_buf;
    VP8BitReaderSetBuffer(br, part0_buf, part_size);
  } else {
-    // Else: just keep pointers to the partition #0's data in dec_->br_.
+    // Else: just keep pointers to the partition #0's data in dec->br.
  }
-  mem->start_ += part_size;
+  mem->start += part_size;
  return VP8_STATUS_OK;
 }

 static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) {
-  VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
-  VP8Io* const io = &idec->io_;
-  const WebPDecParams* const params = &idec->params_;
+  VP8Decoder* const dec = (VP8Decoder*)idec->dec;
+  VP8Io* const io = &idec->io;
+  const WebPDecParams* const params = &idec->params;
  WebPDecBuffer* const output = params->output;

  // Wait till we have enough data for the whole partition #0
-  if (MemDataSize(&idec->mem_) < idec->mem_.part0_size_) {
+  if (MemDataSize(&idec->mem) < idec->mem.part0_size) {
    return VP8_STATUS_SUSPENDED;
  }

  if (!VP8GetHeaders(dec, io)) {
-    const VP8StatusCode status = dec->status_;
+    const VP8StatusCode status = dec->status;
    if (status == VP8_STATUS_SUSPENDED ||
        status == VP8_STATUS_NOT_ENOUGH_DATA) {
      // treating NOT_ENOUGH_DATA as SUSPENDED state
@@ -426,69 +437,69 @@ static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) {
  }

  // Allocate/Verify output buffer now
-  dec->status_ = WebPAllocateDecBuffer(io->width, io->height, params->options,
-                                       output);
-  if (dec->status_ != VP8_STATUS_OK) {
-    return IDecError(idec, dec->status_);
+  dec->status = WebPAllocateDecBuffer(io->width, io->height, params->options,
+                                      output);
+  if (dec->status != VP8_STATUS_OK) {
+    return IDecError(idec, dec->status);
  }
  // This change must be done before calling VP8InitFrame()
-  dec->mt_method_ = VP8GetThreadMethod(params->options, NULL,
-                                       io->width, io->height);
+  dec->mt_method = VP8GetThreadMethod(params->options, NULL,
+                                      io->width, io->height);
  VP8InitDithering(params->options, dec);

-  dec->status_ = CopyParts0Data(idec);
-  if (dec->status_ != VP8_STATUS_OK) {
-    return IDecError(idec, dec->status_);
+  dec->status = CopyParts0Data(idec);
+  if (dec->status != VP8_STATUS_OK) {
+    return IDecError(idec, dec->status);
  }

  // Finish setting up the decoding parameters. Will call io->setup().
  if (VP8EnterCritical(dec, io) != VP8_STATUS_OK) {
-    return IDecError(idec, dec->status_);
+    return IDecError(idec, dec->status);
  }

  // Note: past this point, teardown() must always be called
  // in case of error.
-  idec->state_ = STATE_VP8_DATA;
+  idec->state = STATE_VP8_DATA;
  // Allocate memory and prepare everything.
  if (!VP8InitFrame(dec, io)) {
-    return IDecError(idec, dec->status_);
+    return IDecError(idec, dec->status);
  }
  return VP8_STATUS_OK;
 }

 // Remaining partitions
 static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
-  VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
-  VP8Io* const io = &idec->io_;
+  VP8Decoder* const dec = (VP8Decoder*)idec->dec;
+  VP8Io* const io = &idec->io;

-  // Make sure partition #0 has been read before, to set dec to ready_.
-  if (!dec->ready_) {
+  // Make sure partition #0 has been read before, to set dec to ready.
+  if (!dec->ready) {
    return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
  }
-  for (; dec->mb_y_ < dec->mb_h_; ++dec->mb_y_) {
-    if (idec->last_mb_y_ != dec->mb_y_) {
-      if (!VP8ParseIntraModeRow(&dec->br_, dec)) {
+  for (; dec->mb_y < dec->mb_h; ++dec->mb_y) {
+    if (idec->last_mb_y != dec->mb_y) {
+      if (!VP8ParseIntraModeRow(&dec->br, dec)) {
        // note: normally, error shouldn't occur since we already have the whole
        // partition0 available here in DecodeRemaining(). Reaching EOF while
        // reading intra modes really means a BITSTREAM_ERROR.
        return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
      }
-      idec->last_mb_y_ = dec->mb_y_;
+      idec->last_mb_y = dec->mb_y;
    }
-    for (; dec->mb_x_ < dec->mb_w_; ++dec->mb_x_) {
+    for (; dec->mb_x < dec->mb_w; ++dec->mb_x) {
      VP8BitReader* const token_br =
-          &dec->parts_[dec->mb_y_ & dec->num_parts_minus_one_];
+          &dec->parts[dec->mb_y & dec->num_parts_minus_one];
      MBContext context;
      SaveContext(dec, token_br, &context);
      if (!VP8DecodeMB(dec, token_br)) {
        // We shouldn't fail when MAX_MB data was available
-        if (dec->num_parts_minus_one_ == 0 &&
-            MemDataSize(&idec->mem_) > MAX_MB_SIZE) {
+        if (dec->num_parts_minus_one == 0 &&
+            MemDataSize(&idec->mem) > MAX_MB_SIZE) {
          return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
        }
        // Synchronize the threads.
-        if (dec->mt_method_ > 0) {
-          if (!WebPGetWorkerInterface()->Sync(&dec->worker_)) {
+        if (dec->mt_method > 0) {
+          if (!WebPGetWorkerInterface()->Sync(&dec->worker)) {
            return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
          }
        }
@@ -496,9 +507,9 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
        return VP8_STATUS_SUSPENDED;
      }
      // Release buffer only if there is only one partition
-      if (dec->num_parts_minus_one_ == 0) {
-        idec->mem_.start_ = token_br->buf_ - idec->mem_.buf_;
-        assert(idec->mem_.start_ <= idec->mem_.end_);
+      if (dec->num_parts_minus_one == 0) {
+        idec->mem.start = token_br->buf - idec->mem.buf;
+        assert(idec->mem.start <= idec->mem.end);
      }
    }
    VP8InitScanline(dec);   // Prepare for next scanline
@@ -510,10 +521,10 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
  }
  // Synchronize the thread and check for errors.
  if (!VP8ExitCritical(dec, io)) {
-    idec->state_ = STATE_ERROR;  // prevent re-entry in IDecError
+    idec->state = STATE_ERROR;  // prevent re-entry in IDecError
    return IDecError(idec, VP8_STATUS_USER_ABORT);
  }
-  dec->ready_ = 0;
+  dec->ready = 0;
  return FinishDecoding(idec);
 }

@@ -526,81 +537,81 @@ static VP8StatusCode ErrorStatusLossless(WebPIDecoder* const idec,
 }

 static VP8StatusCode DecodeVP8LHeader(WebPIDecoder* const idec) {
-  VP8Io* const io = &idec->io_;
-  VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
-  const WebPDecParams* const params = &idec->params_;
+  VP8Io* const io = &idec->io;
+  VP8LDecoder* const dec = (VP8LDecoder*)idec->dec;
+  const WebPDecParams* const params = &idec->params;
  WebPDecBuffer* const output = params->output;
-  size_t curr_size = MemDataSize(&idec->mem_);
-  assert(idec->is_lossless_);
+  size_t curr_size = MemDataSize(&idec->mem);
+  assert(idec->is_lossless);

  // Wait until there's enough data for decoding header.
-  if (curr_size < (idec->chunk_size_ >> 3)) {
-    dec->status_ = VP8_STATUS_SUSPENDED;
-    return ErrorStatusLossless(idec, dec->status_);
+  if (curr_size < (idec->chunk_size >> 3)) {
+    dec->status = VP8_STATUS_SUSPENDED;
+    return ErrorStatusLossless(idec, dec->status);
  }

  if (!VP8LDecodeHeader(dec, io)) {
-    if (dec->status_ == VP8_STATUS_BITSTREAM_ERROR &&
-        curr_size < idec->chunk_size_) {
-      dec->status_ = VP8_STATUS_SUSPENDED;
+    if (dec->status == VP8_STATUS_BITSTREAM_ERROR &&
+        curr_size < idec->chunk_size) {
+      dec->status = VP8_STATUS_SUSPENDED;
    }
-    return ErrorStatusLossless(idec, dec->status_);
+    return ErrorStatusLossless(idec, dec->status);
  }
  // Allocate/verify output buffer now.
-  dec->status_ = WebPAllocateDecBuffer(io->width, io->height, params->options,
-                                       output);
-  if (dec->status_ != VP8_STATUS_OK) {
-    return IDecError(idec, dec->status_);
+  dec->status = WebPAllocateDecBuffer(io->width, io->height, params->options,
+                                      output);
+  if (dec->status != VP8_STATUS_OK) {
+    return IDecError(idec, dec->status);
  }

-  idec->state_ = STATE_VP8L_DATA;
+  idec->state = STATE_VP8L_DATA;
  return VP8_STATUS_OK;
 }

 static VP8StatusCode DecodeVP8LData(WebPIDecoder* const idec) {
-  VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
-  const size_t curr_size = MemDataSize(&idec->mem_);
-  assert(idec->is_lossless_);
+  VP8LDecoder* const dec = (VP8LDecoder*)idec->dec;
+  const size_t curr_size = MemDataSize(&idec->mem);
+  assert(idec->is_lossless);

  // Switch to incremental decoding if we don't have all the bytes available.
-  dec->incremental_ = (curr_size < idec->chunk_size_);
+  dec->incremental = (curr_size < idec->chunk_size);

  if (!VP8LDecodeImage(dec)) {
-    return ErrorStatusLossless(idec, dec->status_);
+    return ErrorStatusLossless(idec, dec->status);
  }
-  assert(dec->status_ == VP8_STATUS_OK || dec->status_ == VP8_STATUS_SUSPENDED);
-  return (dec->status_ == VP8_STATUS_SUSPENDED) ? dec->status_
-                                                : FinishDecoding(idec);
+  assert(dec->status == VP8_STATUS_OK || dec->status == VP8_STATUS_SUSPENDED);
+  return (dec->status == VP8_STATUS_SUSPENDED) ? dec->status
+                                               : FinishDecoding(idec);
 }

  // Main decoding loop
 static VP8StatusCode IDecode(WebPIDecoder* idec) {
  VP8StatusCode status = VP8_STATUS_SUSPENDED;

-  if (idec->state_ == STATE_WEBP_HEADER) {
+  if (idec->state == STATE_WEBP_HEADER) {
    status = DecodeWebPHeaders(idec);
  } else {
-    if (idec->dec_ == NULL) {
+    if (idec->dec == NULL) {
      return VP8_STATUS_SUSPENDED;    // can't continue if we have no decoder.
    }
  }
-  if (idec->state_ == STATE_VP8_HEADER) {
+  if (idec->state == STATE_VP8_HEADER) {
    status = DecodeVP8FrameHeader(idec);
  }
-  if (idec->state_ == STATE_VP8_PARTS0) {
+  if (idec->state == STATE_VP8_PARTS0) {
    status = DecodePartition0(idec);
  }
-  if (idec->state_ == STATE_VP8_DATA) {
-    const VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+  if (idec->state == STATE_VP8_DATA) {
+    const VP8Decoder* const dec = (VP8Decoder*)idec->dec;
    if (dec == NULL) {
      return VP8_STATUS_SUSPENDED;  // can't continue if we have no decoder.
    }
    status = DecodeRemaining(idec);
  }
-  if (idec->state_ == STATE_VP8L_HEADER) {
+  if (idec->state == STATE_VP8L_HEADER) {
    status = DecodeVP8LHeader(idec);
  }
-  if (idec->state_ == STATE_VP8L_DATA) {
+  if (idec->state == STATE_VP8L_DATA) {
    status = DecodeVP8LData(idec);
  }
  return status;
@@ -617,29 +628,29 @@ WEBP_NODISCARD static WebPIDecoder* NewDecoder(
    return NULL;
  }

-  idec->state_ = STATE_WEBP_HEADER;
-  idec->chunk_size_ = 0;
+  idec->state = STATE_WEBP_HEADER;
+  idec->chunk_size = 0;

-  idec->last_mb_y_ = -1;
+  idec->last_mb_y = -1;

-  InitMemBuffer(&idec->mem_);
-  if (!WebPInitDecBuffer(&idec->output_) || !VP8InitIo(&idec->io_)) {
+  InitMemBuffer(&idec->mem);
+  if (!WebPInitDecBuffer(&idec->output) || !VP8InitIo(&idec->io)) {
    WebPSafeFree(idec);
    return NULL;
  }

-  WebPResetDecParams(&idec->params_);
+  WebPResetDecParams(&idec->params);
  if (output_buffer == NULL || WebPAvoidSlowMemory(output_buffer, features)) {
-    idec->params_.output = &idec->output_;
-    idec->final_output_ = output_buffer;
+    idec->params.output = &idec->output;
+    idec->final_output = output_buffer;
    if (output_buffer != NULL) {
-      idec->params_.output->colorspace = output_buffer->colorspace;
+      idec->params.output->colorspace = output_buffer->colorspace;
    }
  } else {
-    idec->params_.output = output_buffer;
-    idec->final_output_ = NULL;
+    idec->params.output = output_buffer;
+    idec->final_output = NULL;
  }
-  WebPInitCustomIo(&idec->params_, &idec->io_);  // Plug the I/O functions.
+  WebPInitCustomIo(&idec->params, &idec->io);  // Plug the I/O functions.

  return idec;
 }
@@ -674,27 +685,27 @@ WebPIDecoder* WebPIDecode(const uint8_t* data, size_t data_size,
  }
  // Finish initialization
  if (config != NULL) {
-    idec->params_.options = &config->options;
+    idec->params.options = &config->options;
  }
  return idec;
 }

 void WebPIDelete(WebPIDecoder* idec) {
  if (idec == NULL) return;
-  if (idec->dec_ != NULL) {
-    if (!idec->is_lossless_) {
-      if (idec->state_ == STATE_VP8_DATA) {
+  if (idec->dec != NULL) {
+    if (!idec->is_lossless) {
+      if (idec->state == STATE_VP8_DATA) {
        // Synchronize the thread, clean-up and check for errors.
        // TODO(vrabaud) do we care about the return result?
-        (void)VP8ExitCritical((VP8Decoder*)idec->dec_, &idec->io_);
+        (void)VP8ExitCritical((VP8Decoder*)idec->dec, &idec->io);
      }
-      VP8Delete((VP8Decoder*)idec->dec_);
+      VP8Delete((VP8Decoder*)idec->dec);
    } else {
-      VP8LDelete((VP8LDecoder*)idec->dec_);
+      VP8LDelete((VP8LDecoder*)idec->dec);
    }
  }
-  ClearMemBuffer(&idec->mem_);
-  WebPFreeDecBuffer(&idec->output_);
+  ClearMemBuffer(&idec->mem);
+  WebPFreeDecBuffer(&idec->output);
  WebPSafeFree(idec);
 }

@@ -717,11 +728,11 @@ WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE csp, uint8_t* output_buffer,
  }
  idec = WebPINewDecoder(NULL);
  if (idec == NULL) return NULL;
-  idec->output_.colorspace = csp;
-  idec->output_.is_external_memory = is_external_memory;
-  idec->output_.u.RGBA.rgba = output_buffer;
-  idec->output_.u.RGBA.stride = output_stride;
-  idec->output_.u.RGBA.size = output_buffer_size;
+  idec->output.colorspace = csp;
+  idec->output.is_external_memory = is_external_memory;
+  idec->output.u.RGBA.rgba = output_buffer;
+  idec->output.u.RGBA.stride = output_stride;
+  idec->output.u.RGBA.size = output_buffer_size;
  return idec;
 }

@@ -751,20 +762,20 @@ WebPIDecoder* WebPINewYUVA(uint8_t* luma, size_t luma_size, int luma_stride,
  idec = WebPINewDecoder(NULL);
  if (idec == NULL) return NULL;

-  idec->output_.colorspace = colorspace;
-  idec->output_.is_external_memory = is_external_memory;
-  idec->output_.u.YUVA.y = luma;
-  idec->output_.u.YUVA.y_stride = luma_stride;
-  idec->output_.u.YUVA.y_size = luma_size;
-  idec->output_.u.YUVA.u = u;
-  idec->output_.u.YUVA.u_stride = u_stride;
-  idec->output_.u.YUVA.u_size = u_size;
-  idec->output_.u.YUVA.v = v;
-  idec->output_.u.YUVA.v_stride = v_stride;
-  idec->output_.u.YUVA.v_size = v_size;
-  idec->output_.u.YUVA.a = a;
-  idec->output_.u.YUVA.a_stride = a_stride;
-  idec->output_.u.YUVA.a_size = a_size;
+  idec->output.colorspace = colorspace;
+  idec->output.is_external_memory = is_external_memory;
+  idec->output.u.YUVA.y = luma;
+  idec->output.u.YUVA.y_stride = luma_stride;
+  idec->output.u.YUVA.y_size = luma_size;
+  idec->output.u.YUVA.u = u;
+  idec->output.u.YUVA.u_stride = u_stride;
+  idec->output.u.YUVA.u_size = u_size;
+  idec->output.u.YUVA.v = v;
+  idec->output.u.YUVA.v_stride = v_stride;
+  idec->output.u.YUVA.v_size = v_size;
+  idec->output.u.YUVA.a = a;
+  idec->output.u.YUVA.a_stride = a_stride;
+  idec->output.u.YUVA.a_size = a_size;
  return idec;
 }

@@ -781,10 +792,10 @@ WebPIDecoder* WebPINewYUV(uint8_t* luma, size_t luma_size, int luma_stride,

 static VP8StatusCode IDecCheckStatus(const WebPIDecoder* const idec) {
  assert(idec);
-  if (idec->state_ == STATE_ERROR) {
+  if (idec->state == STATE_ERROR) {
    return VP8_STATUS_BITSTREAM_ERROR;
  }
-  if (idec->state_ == STATE_DONE) {
+  if (idec->state == STATE_DONE) {
    return VP8_STATUS_OK;
  }
  return VP8_STATUS_SUSPENDED;
@@ -801,7 +812,7 @@ VP8StatusCode WebPIAppend(WebPIDecoder* idec,
    return status;
  }
  // Check mixed calls between RemapMemBuffer and AppendToMemBuffer.
-  if (!CheckMemBufferMode(&idec->mem_, MEM_MODE_APPEND)) {
+  if (!CheckMemBufferMode(&idec->mem, MEM_MODE_APPEND)) {
    return VP8_STATUS_INVALID_PARAM;
  }
  // Append data to memory buffer
@@ -822,7 +833,7 @@ VP8StatusCode WebPIUpdate(WebPIDecoder* idec,
    return status;
  }
  // Check mixed calls between RemapMemBuffer and AppendToMemBuffer.
-  if (!CheckMemBufferMode(&idec->mem_, MEM_MODE_MAP)) {
+  if (!CheckMemBufferMode(&idec->mem, MEM_MODE_MAP)) {
    return VP8_STATUS_INVALID_PARAM;
  }
  // Make the memory buffer point to the new buffer
@@ -835,16 +846,16 @@ VP8StatusCode WebPIUpdate(WebPIDecoder* idec,
 //------------------------------------------------------------------------------

 static const WebPDecBuffer* GetOutputBuffer(const WebPIDecoder* const idec) {
-  if (idec == NULL || idec->dec_ == NULL) {
+  if (idec == NULL || idec->dec == NULL) {
    return NULL;
  }
-  if (idec->state_ <= STATE_VP8_PARTS0) {
+  if (idec->state <= STATE_VP8_PARTS0) {
    return NULL;
  }
-  if (idec->final_output_ != NULL) {
+  if (idec->final_output != NULL) {
    return NULL;   // not yet slow-copied
  }
-  return idec->params_.output;
+  return idec->params.output;
 }

 const WebPDecBuffer* WebPIDecodedArea(const WebPIDecoder* idec,
@@ -855,7 +866,7 @@ const WebPDecBuffer* WebPIDecodedArea(const WebPIDecoder* idec,
  if (top != NULL) *top = 0;
  if (src != NULL) {
    if (width != NULL) *width = src->width;
-    if (height != NULL) *height = idec->params_.last_y;
+    if (height != NULL) *height = idec->params.last_y;
  } else {
    if (width != NULL) *width = 0;
    if (height != NULL) *height = 0;
@@ -871,7 +882,7 @@ WEBP_NODISCARD uint8_t* WebPIDecGetRGB(const WebPIDecoder* idec, int* last_y,
    return NULL;
  }

-  if (last_y != NULL) *last_y = idec->params_.last_y;
+  if (last_y != NULL) *last_y = idec->params.last_y;
  if (width != NULL) *width = src->width;
  if (height != NULL) *height = src->height;
  if (stride != NULL) *stride = src->u.RGBA.stride;
@@ -889,7 +900,7 @@ WEBP_NODISCARD uint8_t* WebPIDecGetYUVA(const WebPIDecoder* idec, int* last_y,
    return NULL;
  }

-  if (last_y != NULL) *last_y = idec->params_.last_y;
+  if (last_y != NULL) *last_y = idec->params.last_y;
  if (u != NULL) *u = src->u.YUVA.u;
  if (v != NULL) *v = src->u.YUVA.v;
  if (a != NULL) *a = src->u.YUVA.a;
@@ -907,14 +918,14 @@ int WebPISetIOHooks(WebPIDecoder* const idec,
                    VP8IoSetupHook setup,
                    VP8IoTeardownHook teardown,
                    void* user_data) {
-  if (idec == NULL || idec->state_ > STATE_WEBP_HEADER) {
+  if (idec == NULL || idec->state > STATE_WEBP_HEADER) {
    return 0;
  }

-  idec->io_.put = put;
-  idec->io_.setup = setup;
-  idec->io_.teardown = teardown;
-  idec->io_.opaque = user_data;
+  idec->io.put = put;
+  idec->io.setup = setup;
+  idec->io.teardown = teardown;
+  idec->io.opaque = user_data;

  return 1;
 }
--- a/thirdparty/libwebp/src/dec/io_dec.c
+++ b/thirdparty/libwebp/src/dec/io_dec.c
@@ -12,12 +12,20 @@
 // Author: Skal (pascal.massimino@gmail.com)

 #include <assert.h>
+#include <stddef.h>
 #include <stdlib.h>
+#include <string.h>
+
+#include "src/dec/vp8_dec.h"
+#include "src/webp/types.h"
 #include "src/dec/vp8i_dec.h"
 #include "src/dec/webpi_dec.h"
+#include "src/dsp/cpu.h"
 #include "src/dsp/dsp.h"
 #include "src/dsp/yuv.h"
+#include "src/utils/rescaler_utils.h"
 #include "src/utils/utils.h"
+#include "src/webp/decode.h"

 //------------------------------------------------------------------------------
 // Main YUV<->RGB conversion functions
@@ -25,9 +33,9 @@
 static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) {
  WebPDecBuffer* output = p->output;
  const WebPYUVABuffer* const buf = &output->u.YUVA;
-  uint8_t* const y_dst = buf->y + (size_t)io->mb_y * buf->y_stride;
-  uint8_t* const u_dst = buf->u + (size_t)(io->mb_y >> 1) * buf->u_stride;
-  uint8_t* const v_dst = buf->v + (size_t)(io->mb_y >> 1) * buf->v_stride;
+  uint8_t* const y_dst = buf->y + (ptrdiff_t)io->mb_y * buf->y_stride;
+  uint8_t* const u_dst = buf->u + (ptrdiff_t)(io->mb_y >> 1) * buf->u_stride;
+  uint8_t* const v_dst = buf->v + (ptrdiff_t)(io->mb_y >> 1) * buf->v_stride;
  const int mb_w = io->mb_w;
  const int mb_h = io->mb_h;
  const int uv_w = (mb_w + 1) / 2;
@@ -42,7 +50,7 @@ static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) {
 static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) {
  WebPDecBuffer* const output = p->output;
  WebPRGBABuffer* const buf = &output->u.RGBA;
-  uint8_t* const dst = buf->rgba + (size_t)io->mb_y * buf->stride;
+  uint8_t* const dst = buf->rgba + (ptrdiff_t)io->mb_y * buf->stride;
  WebPSamplerProcessPlane(io->y, io->y_stride,
                          io->u, io->v, io->uv_stride,
                          dst, buf->stride, io->mb_w, io->mb_h,
@@ -57,7 +65,7 @@ static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) {
 static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
  int num_lines_out = io->mb_h;   // a priori guess
  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
-  uint8_t* dst = buf->rgba + (size_t)io->mb_y * buf->stride;
+  uint8_t* dst = buf->rgba + (ptrdiff_t)io->mb_y * buf->stride;
  WebPUpsampleLinePairFunc upsample = WebPUpsamplers[p->output->colorspace];
  const uint8_t* cur_y = io->y;
  const uint8_t* cur_u = io->u;
@@ -128,7 +136,7 @@ static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
  const WebPYUVABuffer* const buf = &p->output->u.YUVA;
  const int mb_w = io->mb_w;
  const int mb_h = io->mb_h;
-  uint8_t* dst = buf->a + (size_t)io->mb_y * buf->a_stride;
+  uint8_t* dst = buf->a + (ptrdiff_t)io->mb_y * buf->a_stride;
  int j;
  (void)expected_num_lines_out;
  assert(expected_num_lines_out == mb_h);
@@ -181,8 +189,8 @@ static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p,
        (colorspace == MODE_ARGB || colorspace == MODE_Argb);
    const WebPRGBABuffer* const buf = &p->output->u.RGBA;
    int num_rows;
-    const size_t start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
-    uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
+    const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
+    uint8_t* const base_rgba = buf->rgba + (ptrdiff_t)start_y * buf->stride;
    uint8_t* const dst = base_rgba + (alpha_first ? 0 : 3);
    const int has_alpha = WebPDispatchAlpha(alpha, io->width, mb_w,
                                            num_rows, dst, buf->stride);
@@ -205,8 +213,8 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
    const WEBP_CSP_MODE colorspace = p->output->colorspace;
    const WebPRGBABuffer* const buf = &p->output->u.RGBA;
    int num_rows;
-    const size_t start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
-    uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
+    const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
+    uint8_t* const base_rgba = buf->rgba + (ptrdiff_t)start_y * buf->stride;
 #if (WEBP_SWAP_16BIT_CSP == 1)
    uint8_t* alpha_dst = base_rgba;
 #else
@@ -257,7 +265,7 @@ static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) {
  if (WebPIsAlphaMode(p->output->colorspace) && io->a != NULL) {
    // Before rescaling, we premultiply the luma directly into the io->y
    // internal buffer. This is OK since these samples are not used for
-    // intra-prediction (the top samples are saved in cache_y_/u_/v_).
+    // intra-prediction (the top samples are saved in cache_y/u/v).
    // But we need to cast the const away, though.
    WebPMultRows((uint8_t*)io->y, io->y_stride,
                 io->a, io->width, io->mb_w, mb_h, 0);
@@ -271,9 +279,9 @@ static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) {
 static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
                                int expected_num_lines_out) {
  const WebPYUVABuffer* const buf = &p->output->u.YUVA;
-  uint8_t* const dst_a = buf->a + (size_t)p->last_y * buf->a_stride;
+  uint8_t* const dst_a = buf->a + (ptrdiff_t)p->last_y * buf->a_stride;
  if (io->a != NULL) {
-    uint8_t* const dst_y = buf->y + (size_t)p->last_y * buf->y_stride;
+    uint8_t* const dst_y = buf->y + (ptrdiff_t)p->last_y * buf->y_stride;
    const int num_lines_out = Rescale(io->a, io->width, io->mb_h, p->scaler_a);
    assert(expected_num_lines_out == num_lines_out);
    if (num_lines_out > 0) {   // unmultiply the Y
@@ -362,7 +370,7 @@ static int ExportRGB(WebPDecParams* const p, int y_pos) {
  const WebPYUV444Converter convert =
      WebPYUV444Converters[p->output->colorspace];
  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
-  uint8_t* dst = buf->rgba + (size_t)y_pos * buf->stride;
+  uint8_t* dst = buf->rgba + (ptrdiff_t)y_pos * buf->stride;
  int num_lines_out = 0;
  // For RGB rescaling, because of the YUV420, current scan position
  // U/V can be +1/-1 line from the Y one.  Hence the double test.
@@ -389,14 +397,14 @@ static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) {
  while (j < mb_h) {
    const int y_lines_in =
        WebPRescalerImport(p->scaler_y, mb_h - j,
-                           io->y + (size_t)j * io->y_stride, io->y_stride);
+                           io->y + (ptrdiff_t)j * io->y_stride, io->y_stride);
    j += y_lines_in;
    if (WebPRescaleNeededLines(p->scaler_u, uv_mb_h - uv_j)) {
      const int u_lines_in = WebPRescalerImport(
-          p->scaler_u, uv_mb_h - uv_j, io->u + (size_t)uv_j * io->uv_stride,
+          p->scaler_u, uv_mb_h - uv_j, io->u + (ptrdiff_t)uv_j * io->uv_stride,
          io->uv_stride);
      const int v_lines_in = WebPRescalerImport(
-          p->scaler_v, uv_mb_h - uv_j, io->v + (size_t)uv_j * io->uv_stride,
+          p->scaler_v, uv_mb_h - uv_j, io->v + (ptrdiff_t)uv_j * io->uv_stride,
          io->uv_stride);
      (void)v_lines_in;   // remove a gcc warning
      assert(u_lines_in == v_lines_in);
@@ -409,7 +417,7 @@ static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) {

 static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) {
  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
-  uint8_t* const base_rgba = buf->rgba + (size_t)y_pos * buf->stride;
+  uint8_t* const base_rgba = buf->rgba + (ptrdiff_t)y_pos * buf->stride;
  const WEBP_CSP_MODE colorspace = p->output->colorspace;
  const int alpha_first =
      (colorspace == MODE_ARGB || colorspace == MODE_Argb);
@@ -437,7 +445,7 @@ static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) {
 static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos,
                               int max_lines_out) {
  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
-  uint8_t* const base_rgba = buf->rgba + (size_t)y_pos * buf->stride;
+  uint8_t* const base_rgba = buf->rgba + (ptrdiff_t)y_pos * buf->stride;
 #if (WEBP_SWAP_16BIT_CSP == 1)
  uint8_t* alpha_dst = base_rgba;
 #else
@@ -476,7 +484,7 @@ static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p,
    int lines_left = expected_num_out_lines;
    const int y_end = p->last_y + lines_left;
    while (lines_left > 0) {
-      const int64_t row_offset = (int64_t)scaler->src_y - io->mb_y;
+      const int64_t row_offset = (ptrdiff_t)scaler->src_y - io->mb_y;
      WebPRescalerImport(scaler, io->mb_h + io->mb_y - scaler->src_y,
                         io->a + row_offset * io->width, io->width);
      lines_left -= p->emit_alpha_row(p, y_end - lines_left, lines_left);
--- a/thirdparty/libwebp/src/dec/quant_dec.c
+++ b/thirdparty/libwebp/src/dec/quant_dec.c
@@ -11,7 +11,11 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

+#include "src/dec/common_dec.h"
+#include "src/dec/vp8_dec.h"
 #include "src/dec/vp8i_dec.h"
+#include "src/utils/bit_reader_utils.h"
+#include "src/webp/types.h"

 static WEBP_INLINE int clip(int v, int M) {
  return v < 0 ? 0 : v > M ? M : v;
@@ -60,7 +64,7 @@ static const uint16_t kAcTable[128] = {
 // Paragraph 9.6

 void VP8ParseQuant(VP8Decoder* const dec) {
-  VP8BitReader* const br = &dec->br_;
+  VP8BitReader* const br = &dec->br;
  const int base_q0 = VP8GetValue(br, 7, "global-header");
  const int dqy1_dc = VP8Get(br, "global-header") ?
       VP8GetSignedValue(br, 4, "global-header") : 0;
@@ -73,43 +77,42 @@ void VP8ParseQuant(VP8Decoder* const dec) {
  const int dquv_ac = VP8Get(br, "global-header") ?
       VP8GetSignedValue(br, 4, "global-header") : 0;

-  const VP8SegmentHeader* const hdr = &dec->segment_hdr_;
+  const VP8SegmentHeader* const hdr = &dec->segment_hdr;
  int i;

  for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
    int q;
-    if (hdr->use_segment_) {
-      q = hdr->quantizer_[i];
-      if (!hdr->absolute_delta_) {
+    if (hdr->use_segment) {
+      q = hdr->quantizer[i];
+      if (!hdr->absolute_delta) {
        q += base_q0;
      }
    } else {
      if (i > 0) {
-        dec->dqm_[i] = dec->dqm_[0];
+        dec->dqm[i] = dec->dqm[0];
        continue;
      } else {
        q = base_q0;
      }
    }
    {
-      VP8QuantMatrix* const m = &dec->dqm_[i];
-      m->y1_mat_[0] = kDcTable[clip(q + dqy1_dc, 127)];
-      m->y1_mat_[1] = kAcTable[clip(q + 0,       127)];
+      VP8QuantMatrix* const m = &dec->dqm[i];
+      m->y1_mat[0] = kDcTable[clip(q + dqy1_dc, 127)];
+      m->y1_mat[1] = kAcTable[clip(q + 0,       127)];

-      m->y2_mat_[0] = kDcTable[clip(q + dqy2_dc, 127)] * 2;
+      m->y2_mat[0] = kDcTable[clip(q + dqy2_dc, 127)] * 2;
      // For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16.
      // The smallest precision for that is '(x*6349) >> 12' but 16 is a good
      // word size.
-      m->y2_mat_[1] = (kAcTable[clip(q + dqy2_ac, 127)] * 101581) >> 16;
-      if (m->y2_mat_[1] < 8) m->y2_mat_[1] = 8;
+      m->y2_mat[1] = (kAcTable[clip(q + dqy2_ac, 127)] * 101581) >> 16;
+      if (m->y2_mat[1] < 8) m->y2_mat[1] = 8;

-      m->uv_mat_[0] = kDcTable[clip(q + dquv_dc, 117)];
-      m->uv_mat_[1] = kAcTable[clip(q + dquv_ac, 127)];
+      m->uv_mat[0] = kDcTable[clip(q + dquv_dc, 117)];
+      m->uv_mat[1] = kAcTable[clip(q + dquv_ac, 127)];

-      m->uv_quant_ = q + dquv_ac;   // for dithering strength evaluation
+      m->uv_quant = q + dquv_ac;   // for dithering strength evaluation
    }
  }
 }

 //------------------------------------------------------------------------------
-
--- a/thirdparty/libwebp/src/dec/tree_dec.c
+++ b/thirdparty/libwebp/src/dec/tree_dec.c
@@ -11,9 +11,15 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

+#include <string.h>
+
+#include "src/dec/common_dec.h"
+#include "src/webp/types.h"
+#include "src/dec/vp8_dec.h"
 #include "src/dec/vp8i_dec.h"
 #include "src/dsp/cpu.h"
 #include "src/utils/bit_reader_inl_utils.h"
+#include "src/utils/bit_reader_utils.h"

 #if !defined(USE_GENERIC_TREE)
 #if !defined(__arm__) && !defined(_M_ARM) && !WEBP_AARCH64 && \
@@ -284,40 +290,40 @@ static const uint8_t kBModesProba[NUM_BMODES][NUM_BMODES][NUM_BMODES - 1] = {
 };

 void VP8ResetProba(VP8Proba* const proba) {
-  memset(proba->segments_, 255u, sizeof(proba->segments_));
-  // proba->bands_[][] is initialized later
+  memset(proba->segments, 255u, sizeof(proba->segments));
+  // proba->bands[][] is initialized later
 }

 static void ParseIntraMode(VP8BitReader* const br,
                           VP8Decoder* const dec, int mb_x) {
-  uint8_t* const top = dec->intra_t_ + 4 * mb_x;
-  uint8_t* const left = dec->intra_l_;
-  VP8MBData* const block = dec->mb_data_ + mb_x;
+  uint8_t* const top = dec->intra_t + 4 * mb_x;
+  uint8_t* const left = dec->intra_l;
+  VP8MBData* const block = dec->mb_data + mb_x;

  // Note: we don't save segment map (yet), as we don't expect
  // to decode more than 1 keyframe.
-  if (dec->segment_hdr_.update_map_) {
+  if (dec->segment_hdr.update_map) {
    // Hardcoded tree parsing
-    block->segment_ = !VP8GetBit(br, dec->proba_.segments_[0], "segments")
-                    ?  VP8GetBit(br, dec->proba_.segments_[1], "segments")
-                    :  VP8GetBit(br, dec->proba_.segments_[2], "segments") + 2;
+    block->segment = !VP8GetBit(br, dec->proba.segments[0], "segments")
+                   ?  VP8GetBit(br, dec->proba.segments[1], "segments")
+                   :  VP8GetBit(br, dec->proba.segments[2], "segments") + 2;
  } else {
-    block->segment_ = 0;  // default for intra
+    block->segment = 0;  // default for intra
  }
-  if (dec->use_skip_proba_) block->skip_ = VP8GetBit(br, dec->skip_p_, "skip");
+  if (dec->use_skip_proba) block->skip = VP8GetBit(br, dec->skip_p, "skip");

-  block->is_i4x4_ = !VP8GetBit(br, 145, "block-size");
-  if (!block->is_i4x4_) {
+  block->is_i4x4 = !VP8GetBit(br, 145, "block-size");
+  if (!block->is_i4x4) {
    // Hardcoded 16x16 intra-mode decision tree.
    const int ymode =
        VP8GetBit(br, 156, "pred-modes") ?
            (VP8GetBit(br, 128, "pred-modes") ? TM_PRED : H_PRED) :
            (VP8GetBit(br, 163, "pred-modes") ? V_PRED : DC_PRED);
-    block->imodes_[0] = ymode;
+    block->imodes[0] = ymode;
    memset(top, ymode, 4 * sizeof(*top));
    memset(left, ymode, 4 * sizeof(*left));
  } else {
-    uint8_t* modes = block->imodes_;
+    uint8_t* modes = block->imodes;
    int y;
    for (y = 0; y < 4; ++y) {
      int ymode = left[y];
@@ -354,17 +360,17 @@ static void ParseIntraMode(VP8BitReader* const br,
    }
  }
  // Hardcoded UVMode decision tree
-  block->uvmode_ = !VP8GetBit(br, 142, "pred-modes-uv") ? DC_PRED
-                 : !VP8GetBit(br, 114, "pred-modes-uv") ? V_PRED
-                 : VP8GetBit(br, 183, "pred-modes-uv") ? TM_PRED : H_PRED;
+  block->uvmode = !VP8GetBit(br, 142, "pred-modes-uv") ? DC_PRED
+                : !VP8GetBit(br, 114, "pred-modes-uv") ? V_PRED
+                : VP8GetBit(br, 183, "pred-modes-uv") ? TM_PRED : H_PRED;
 }

 int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec) {
  int mb_x;
-  for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) {
+  for (mb_x = 0; mb_x < dec->mb_w; ++mb_x) {
    ParseIntraMode(br, dec, mb_x);
  }
-  return !dec->br_.eof_;
+  return !dec->br.eof;
 }

 //------------------------------------------------------------------------------
@@ -514,7 +520,7 @@ static const uint8_t kBands[16 + 1] = {
 };

 void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec) {
-  VP8Proba* const proba = &dec->proba_;
+  VP8Proba* const proba = &dec->proba;
  int t, b, c, p;
  for (t = 0; t < NUM_TYPES; ++t) {
    for (b = 0; b < NUM_BANDS; ++b) {
@@ -524,16 +530,16 @@ void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec) {
              VP8GetBit(br, CoeffsUpdateProba[t][b][c][p], "global-header") ?
                        VP8GetValue(br, 8, "global-header") :
                        CoeffsProba0[t][b][c][p];
-          proba->bands_[t][b].probas_[c][p] = v;
+          proba->bands[t][b].probas[c][p] = v;
        }
      }
    }
    for (b = 0; b < 16 + 1; ++b) {
-      proba->bands_ptr_[t][b] = &proba->bands_[t][kBands[b]];
+      proba->bands_ptr[t][b] = &proba->bands[t][kBands[b]];
    }
  }
-  dec->use_skip_proba_ = VP8Get(br, "global-header");
-  if (dec->use_skip_proba_) {
-    dec->skip_p_ = VP8GetValue(br, 8, "global-header");
+  dec->use_skip_proba = VP8Get(br, "global-header");
+  if (dec->use_skip_proba) {
+    dec->skip_p = VP8GetValue(br, 8, "global-header");
  }
 }
--- a/thirdparty/libwebp/src/dec/vp8_dec.c
+++ b/thirdparty/libwebp/src/dec/vp8_dec.c
@@ -11,14 +11,25 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

+#include <assert.h>
 #include <stdlib.h>
+#include <string.h>

 #include "src/dec/alphai_dec.h"
+#include "src/dec/common_dec.h"
+#include "src/dec/vp8_dec.h"
 #include "src/dec/vp8i_dec.h"
 #include "src/dec/vp8li_dec.h"
 #include "src/dec/webpi_dec.h"
+#include "src/dsp/cpu.h"
+#include "src/dsp/dsp.h"
 #include "src/utils/bit_reader_inl_utils.h"
+#include "src/utils/bit_reader_utils.h"
+#include "src/utils/thread_utils.h"
 #include "src/utils/utils.h"
+#include "src/webp/decode.h"
+#include "src/webp/format_constants.h"
+#include "src/webp/types.h"

 //------------------------------------------------------------------------------

@@ -40,8 +51,8 @@ static void InitGetCoeffs(void);
 // VP8Decoder

 static void SetOk(VP8Decoder* const dec) {
-  dec->status_ = VP8_STATUS_OK;
-  dec->error_msg_ = "OK";
+  dec->status = VP8_STATUS_OK;
+  dec->error_msg = "OK";
 }

 int VP8InitIoInternal(VP8Io* const io, int version) {
@@ -58,9 +69,9 @@ VP8Decoder* VP8New(void) {
  VP8Decoder* const dec = (VP8Decoder*)WebPSafeCalloc(1ULL, sizeof(*dec));
  if (dec != NULL) {
    SetOk(dec);
-    WebPGetWorkerInterface()->Init(&dec->worker_);
-    dec->ready_ = 0;
-    dec->num_parts_minus_one_ = 0;
+    WebPGetWorkerInterface()->Init(&dec->worker);
+    dec->ready = 0;
+    dec->num_parts_minus_one = 0;
    InitGetCoeffs();
  }
  return dec;
@@ -68,13 +79,13 @@ VP8Decoder* VP8New(void) {

 VP8StatusCode VP8Status(VP8Decoder* const dec) {
  if (!dec) return VP8_STATUS_INVALID_PARAM;
-  return dec->status_;
+  return dec->status;
 }

 const char* VP8StatusMessage(VP8Decoder* const dec) {
  if (dec == NULL) return "no object";
-  if (!dec->error_msg_) return "OK";
-  return dec->error_msg_;
+  if (!dec->error_msg) return "OK";
+  return dec->error_msg;
 }

 void VP8Delete(VP8Decoder* const dec) {
@@ -87,12 +98,12 @@ void VP8Delete(VP8Decoder* const dec) {
 int VP8SetError(VP8Decoder* const dec,
                VP8StatusCode error, const char* const msg) {
  // VP8_STATUS_SUSPENDED is only meaningful in incremental decoding.
-  assert(dec->incremental_ || error != VP8_STATUS_SUSPENDED);
+  assert(dec->incremental || error != VP8_STATUS_SUSPENDED);
  // The oldest error reported takes precedence over the new one.
-  if (dec->status_ == VP8_STATUS_OK) {
-    dec->status_ = error;
-    dec->error_msg_ = msg;
-    dec->ready_ = 0;
+  if (dec->status == VP8_STATUS_OK) {
+    dec->status = error;
+    dec->error_msg = msg;
+    dec->ready = 0;
  }
  return 0;
 }
@@ -151,11 +162,11 @@ int VP8GetInfo(const uint8_t* data, size_t data_size, size_t chunk_size,

 static void ResetSegmentHeader(VP8SegmentHeader* const hdr) {
  assert(hdr != NULL);
-  hdr->use_segment_ = 0;
-  hdr->update_map_ = 0;
-  hdr->absolute_delta_ = 1;
-  memset(hdr->quantizer_, 0, sizeof(hdr->quantizer_));
-  memset(hdr->filter_strength_, 0, sizeof(hdr->filter_strength_));
+  hdr->use_segment = 0;
+  hdr->update_map = 0;
+  hdr->absolute_delta = 1;
+  memset(hdr->quantizer, 0, sizeof(hdr->quantizer));
+  memset(hdr->filter_strength, 0, sizeof(hdr->filter_strength));
 }

 // Paragraph 9.3
@@ -163,32 +174,32 @@ static int ParseSegmentHeader(VP8BitReader* br,
                              VP8SegmentHeader* hdr, VP8Proba* proba) {
  assert(br != NULL);
  assert(hdr != NULL);
-  hdr->use_segment_ = VP8Get(br, "global-header");
-  if (hdr->use_segment_) {
-    hdr->update_map_ = VP8Get(br, "global-header");
+  hdr->use_segment = VP8Get(br, "global-header");
+  if (hdr->use_segment) {
+    hdr->update_map = VP8Get(br, "global-header");
    if (VP8Get(br, "global-header")) {   // update data
      int s;
-      hdr->absolute_delta_ = VP8Get(br, "global-header");
+      hdr->absolute_delta = VP8Get(br, "global-header");
      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
-        hdr->quantizer_[s] = VP8Get(br, "global-header") ?
+        hdr->quantizer[s] = VP8Get(br, "global-header") ?
            VP8GetSignedValue(br, 7, "global-header") : 0;
      }
      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
-        hdr->filter_strength_[s] = VP8Get(br, "global-header") ?
+        hdr->filter_strength[s] = VP8Get(br, "global-header") ?
            VP8GetSignedValue(br, 6, "global-header") : 0;
      }
    }
-    if (hdr->update_map_) {
+    if (hdr->update_map) {
      int s;
      for (s = 0; s < MB_FEATURE_TREE_PROBS; ++s) {
-        proba->segments_[s] = VP8Get(br, "global-header") ?
+        proba->segments[s] = VP8Get(br, "global-header") ?
            VP8GetValue(br, 8, "global-header") : 255u;
      }
    }
  } else {
-    hdr->update_map_ = 0;
+    hdr->update_map = 0;
  }
-  return !br->eof_;
+  return !br->eof;
 }

 // Paragraph 9.5
@@ -202,7 +213,7 @@ static int ParseSegmentHeader(VP8BitReader* br,
 // If the partitions were positioned ok, VP8_STATUS_OK is returned.
 static VP8StatusCode ParsePartitions(VP8Decoder* const dec,
                                     const uint8_t* buf, size_t size) {
-  VP8BitReader* const br = &dec->br_;
+  VP8BitReader* const br = &dec->br;
  const uint8_t* sz = buf;
  const uint8_t* buf_end = buf + size;
  const uint8_t* part_start;
@@ -210,8 +221,8 @@ static VP8StatusCode ParsePartitions(VP8Decoder* const dec,
  size_t last_part;
  size_t p;

-  dec->num_parts_minus_one_ = (1 << VP8GetValue(br, 2, "global-header")) - 1;
-  last_part = dec->num_parts_minus_one_;
+  dec->num_parts_minus_one = (1 << VP8GetValue(br, 2, "global-header")) - 1;
+  last_part = dec->num_parts_minus_one;
  if (size < 3 * last_part) {
    // we can't even read the sizes with sz[]! That's a failure.
    return VP8_STATUS_NOT_ENOUGH_DATA;
@@ -221,42 +232,42 @@ static VP8StatusCode ParsePartitions(VP8Decoder* const dec,
  for (p = 0; p < last_part; ++p) {
    size_t psize = sz[0] | (sz[1] << 8) | (sz[2] << 16);
    if (psize > size_left) psize = size_left;
-    VP8InitBitReader(dec->parts_ + p, part_start, psize);
+    VP8InitBitReader(dec->parts + p, part_start, psize);
    part_start += psize;
    size_left -= psize;
    sz += 3;
  }
-  VP8InitBitReader(dec->parts_ + last_part, part_start, size_left);
+  VP8InitBitReader(dec->parts + last_part, part_start, size_left);
  if (part_start < buf_end) return VP8_STATUS_OK;
-  return dec->incremental_
+  return dec->incremental
             ? VP8_STATUS_SUSPENDED  // Init is ok, but there's not enough data
             : VP8_STATUS_NOT_ENOUGH_DATA;
 }

 // Paragraph 9.4
 static int ParseFilterHeader(VP8BitReader* br, VP8Decoder* const dec) {
-  VP8FilterHeader* const hdr = &dec->filter_hdr_;
-  hdr->simple_    = VP8Get(br, "global-header");
-  hdr->level_     = VP8GetValue(br, 6, "global-header");
-  hdr->sharpness_ = VP8GetValue(br, 3, "global-header");
-  hdr->use_lf_delta_ = VP8Get(br, "global-header");
-  if (hdr->use_lf_delta_) {
+  VP8FilterHeader* const hdr = &dec->filter_hdr;
+  hdr->simple    = VP8Get(br, "global-header");
+  hdr->level     = VP8GetValue(br, 6, "global-header");
+  hdr->sharpness = VP8GetValue(br, 3, "global-header");
+  hdr->use_lf_delta = VP8Get(br, "global-header");
+  if (hdr->use_lf_delta) {
    if (VP8Get(br, "global-header")) {   // update lf-delta?
      int i;
      for (i = 0; i < NUM_REF_LF_DELTAS; ++i) {
        if (VP8Get(br, "global-header")) {
-          hdr->ref_lf_delta_[i] = VP8GetSignedValue(br, 6, "global-header");
+          hdr->ref_lf_delta[i] = VP8GetSignedValue(br, 6, "global-header");
        }
      }
      for (i = 0; i < NUM_MODE_LF_DELTAS; ++i) {
        if (VP8Get(br, "global-header")) {
-          hdr->mode_lf_delta_[i] = VP8GetSignedValue(br, 6, "global-header");
+          hdr->mode_lf_delta[i] = VP8GetSignedValue(br, 6, "global-header");
        }
      }
    }
  }
-  dec->filter_type_ = (hdr->level_ == 0) ? 0 : hdr->simple_ ? 1 : 2;
-  return !br->eof_;
+  dec->filter_type = (hdr->level == 0) ? 0 : hdr->simple ? 1 : 2;
+  return !br->eof;
 }

 // Topmost call
@@ -286,16 +297,16 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
  // Paragraph 9.1
  {
    const uint32_t bits = buf[0] | (buf[1] << 8) | (buf[2] << 16);
-    frm_hdr = &dec->frm_hdr_;
-    frm_hdr->key_frame_ = !(bits & 1);
-    frm_hdr->profile_ = (bits >> 1) & 7;
-    frm_hdr->show_ = (bits >> 4) & 1;
-    frm_hdr->partition_length_ = (bits >> 5);
-    if (frm_hdr->profile_ > 3) {
+    frm_hdr = &dec->frm_hdr;
+    frm_hdr->key_frame = !(bits & 1);
+    frm_hdr->profile = (bits >> 1) & 7;
+    frm_hdr->show = (bits >> 4) & 1;
+    frm_hdr->partition_length = (bits >> 5);
+    if (frm_hdr->profile > 3) {
      return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
                         "Incorrect keyframe parameters.");
    }
-    if (!frm_hdr->show_) {
+    if (!frm_hdr->show) {
      return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE,
                         "Frame not displayable.");
    }
@@ -303,8 +314,8 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
    buf_size -= 3;
  }

-  pic_hdr = &dec->pic_hdr_;
-  if (frm_hdr->key_frame_) {
+  pic_hdr = &dec->pic_hdr;
+  if (frm_hdr->key_frame) {
    // Paragraph 9.2
    if (buf_size < 7) {
      return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
@@ -314,20 +325,20 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
      return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
                         "Bad code word");
    }
-    pic_hdr->width_ = ((buf[4] << 8) | buf[3]) & 0x3fff;
-    pic_hdr->xscale_ = buf[4] >> 6;   // ratio: 1, 5/4 5/3 or 2
-    pic_hdr->height_ = ((buf[6] << 8) | buf[5]) & 0x3fff;
-    pic_hdr->yscale_ = buf[6] >> 6;
+    pic_hdr->width = ((buf[4] << 8) | buf[3]) & 0x3fff;
+    pic_hdr->xscale = buf[4] >> 6;   // ratio: 1, 5/4 5/3 or 2
+    pic_hdr->height = ((buf[6] << 8) | buf[5]) & 0x3fff;
+    pic_hdr->yscale = buf[6] >> 6;
    buf += 7;
    buf_size -= 7;

-    dec->mb_w_ = (pic_hdr->width_ + 15) >> 4;
-    dec->mb_h_ = (pic_hdr->height_ + 15) >> 4;
+    dec->mb_w = (pic_hdr->width + 15) >> 4;
+    dec->mb_h = (pic_hdr->height + 15) >> 4;

    // Setup default output area (can be later modified during io->setup())
-    io->width = pic_hdr->width_;
-    io->height = pic_hdr->height_;
-    // IMPORTANT! use some sane dimensions in crop_* and scaled_* fields.
+    io->width = pic_hdr->width;
+    io->height = pic_hdr->height;
+    // IMPORTANT! use some sane dimensions in crop* and scaled* fields.
    // So they can be used interchangeably without always testing for
    // 'use_cropping'.
    io->use_cropping = 0;
@@ -342,27 +353,27 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
    io->mb_w = io->width;   // for soundness
    io->mb_h = io->height;  // ditto

-    VP8ResetProba(&dec->proba_);
-    ResetSegmentHeader(&dec->segment_hdr_);
+    VP8ResetProba(&dec->proba);
+    ResetSegmentHeader(&dec->segment_hdr);
  }

-  // Check if we have all the partition #0 available, and initialize dec->br_
+  // Check if we have all the partition #0 available, and initialize dec->br
  // to read this partition (and this partition only).
-  if (frm_hdr->partition_length_ > buf_size) {
+  if (frm_hdr->partition_length > buf_size) {
    return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
                       "bad partition length");
  }

-  br = &dec->br_;
-  VP8InitBitReader(br, buf, frm_hdr->partition_length_);
-  buf += frm_hdr->partition_length_;
-  buf_size -= frm_hdr->partition_length_;
+  br = &dec->br;
+  VP8InitBitReader(br, buf, frm_hdr->partition_length);
+  buf += frm_hdr->partition_length;
+  buf_size -= frm_hdr->partition_length;

-  if (frm_hdr->key_frame_) {
-    pic_hdr->colorspace_ = VP8Get(br, "global-header");
-    pic_hdr->clamp_type_ = VP8Get(br, "global-header");
+  if (frm_hdr->key_frame) {
+    pic_hdr->colorspace = VP8Get(br, "global-header");
+    pic_hdr->clamp_type = VP8Get(br, "global-header");
  }
-  if (!ParseSegmentHeader(br, &dec->segment_hdr_, &dec->proba_)) {
+  if (!ParseSegmentHeader(br, &dec->segment_hdr, &dec->proba)) {
    return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
                       "cannot parse segment header");
  }
@@ -380,17 +391,17 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
  VP8ParseQuant(dec);

  // Frame buffer marking
-  if (!frm_hdr->key_frame_) {
+  if (!frm_hdr->key_frame) {
    return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE,
                       "Not a key frame.");
  }

-  VP8Get(br, "global-header");   // ignore the value of update_proba_
+  VP8Get(br, "global-header");   // ignore the value of 'update_proba'

  VP8ParseProba(br, dec);

  // sanitized state
-  dec->ready_ = 1;
+  dec->ready = 1;
  return 1;
 }

@@ -443,17 +454,17 @@ static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
 static int GetCoeffsFast(VP8BitReader* const br,
                         const VP8BandProbas* const prob[],
                         int ctx, const quant_t dq, int n, int16_t* out) {
-  const uint8_t* p = prob[n]->probas_[ctx];
+  const uint8_t* p = prob[n]->probas[ctx];
  for (; n < 16; ++n) {
    if (!VP8GetBit(br, p[0], "coeffs")) {
      return n;  // previous coeff was last non-zero coeff
    }
    while (!VP8GetBit(br, p[1], "coeffs")) {       // sequence of zero coeffs
-      p = prob[++n]->probas_[0];
+      p = prob[++n]->probas[0];
      if (n == 16) return 16;
    }
    {        // non zero coeff
-      const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas_[0];
+      const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas[0];
      int v;
      if (!VP8GetBit(br, p[2], "coeffs")) {
        v = 1;
@@ -473,17 +484,17 @@ static int GetCoeffsFast(VP8BitReader* const br,
 static int GetCoeffsAlt(VP8BitReader* const br,
                        const VP8BandProbas* const prob[],
                        int ctx, const quant_t dq, int n, int16_t* out) {
-  const uint8_t* p = prob[n]->probas_[ctx];
+  const uint8_t* p = prob[n]->probas[ctx];
  for (; n < 16; ++n) {
    if (!VP8GetBitAlt(br, p[0], "coeffs")) {
      return n;  // previous coeff was last non-zero coeff
    }
    while (!VP8GetBitAlt(br, p[1], "coeffs")) {       // sequence of zero coeffs
-      p = prob[++n]->probas_[0];
+      p = prob[++n]->probas[0];
      if (n == 16) return 16;
    }
    {        // non zero coeff
-      const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas_[0];
+      const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas[0];
      int v;
      if (!VP8GetBitAlt(br, p[2], "coeffs")) {
        v = 1;
@@ -516,12 +527,12 @@ static WEBP_INLINE uint32_t NzCodeBits(uint32_t nz_coeffs, int nz, int dc_nz) {

 static int ParseResiduals(VP8Decoder* const dec,
                          VP8MB* const mb, VP8BitReader* const token_br) {
-  const VP8BandProbas* (* const bands)[16 + 1] = dec->proba_.bands_ptr_;
+  const VP8BandProbas* (* const bands)[16 + 1] = dec->proba.bands_ptr;
  const VP8BandProbas* const * ac_proba;
-  VP8MBData* const block = dec->mb_data_ + dec->mb_x_;
-  const VP8QuantMatrix* const q = &dec->dqm_[block->segment_];
-  int16_t* dst = block->coeffs_;
-  VP8MB* const left_mb = dec->mb_info_ - 1;
+  VP8MBData* const block = dec->mb_data + dec->mb_x;
+  const VP8QuantMatrix* const q = &dec->dqm[block->segment];
+  int16_t* dst = block->coeffs;
+  VP8MB* const left_mb = dec->mb_info - 1;
  uint8_t tnz, lnz;
  uint32_t non_zero_y = 0;
  uint32_t non_zero_uv = 0;
@@ -530,11 +541,11 @@ static int ParseResiduals(VP8Decoder* const dec,
  int first;

  memset(dst, 0, 384 * sizeof(*dst));
-  if (!block->is_i4x4_) {    // parse DC
+  if (!block->is_i4x4) {    // parse DC
    int16_t dc[16] = { 0 };
-    const int ctx = mb->nz_dc_ + left_mb->nz_dc_;
-    const int nz = GetCoeffs(token_br, bands[1], ctx, q->y2_mat_, 0, dc);
-    mb->nz_dc_ = left_mb->nz_dc_ = (nz > 0);
+    const int ctx = mb->nz_dc + left_mb->nz_dc;
+    const int nz = GetCoeffs(token_br, bands[1], ctx, q->y2_mat, 0, dc);
+    mb->nz_dc = left_mb->nz_dc = (nz > 0);
    if (nz > 1) {   // more than just the DC -> perform the full transform
      VP8TransformWHT(dc, dst);
    } else {        // only DC is non-zero -> inlined simplified transform
@@ -549,14 +560,14 @@ static int ParseResiduals(VP8Decoder* const dec,
    ac_proba = bands[3];
  }

-  tnz = mb->nz_ & 0x0f;
-  lnz = left_mb->nz_ & 0x0f;
+  tnz = mb->nz & 0x0f;
+  lnz = left_mb->nz & 0x0f;
  for (y = 0; y < 4; ++y) {
    int l = lnz & 1;
    uint32_t nz_coeffs = 0;
    for (x = 0; x < 4; ++x) {
      const int ctx = l + (tnz & 1);
-      const int nz = GetCoeffs(token_br, ac_proba, ctx, q->y1_mat_, first, dst);
+      const int nz = GetCoeffs(token_br, ac_proba, ctx, q->y1_mat, first, dst);
      l = (nz > first);
      tnz = (tnz >> 1) | (l << 7);
      nz_coeffs = NzCodeBits(nz_coeffs, nz, dst[0] != 0);
@@ -571,13 +582,13 @@ static int ParseResiduals(VP8Decoder* const dec,

  for (ch = 0; ch < 4; ch += 2) {
    uint32_t nz_coeffs = 0;
-    tnz = mb->nz_ >> (4 + ch);
-    lnz = left_mb->nz_ >> (4 + ch);
+    tnz = mb->nz >> (4 + ch);
+    lnz = left_mb->nz >> (4 + ch);
    for (y = 0; y < 2; ++y) {
      int l = lnz & 1;
      for (x = 0; x < 2; ++x) {
        const int ctx = l + (tnz & 1);
-        const int nz = GetCoeffs(token_br, bands[2], ctx, q->uv_mat_, 0, dst);
+        const int nz = GetCoeffs(token_br, bands[2], ctx, q->uv_mat, 0, dst);
        l = (nz > 0);
        tnz = (tnz >> 1) | (l << 3);
        nz_coeffs = NzCodeBits(nz_coeffs, nz, dst[0] != 0);
@@ -591,16 +602,16 @@ static int ParseResiduals(VP8Decoder* const dec,
    out_t_nz |= (tnz << 4) << ch;
    out_l_nz |= (lnz & 0xf0) << ch;
  }
-  mb->nz_ = out_t_nz;
-  left_mb->nz_ = out_l_nz;
+  mb->nz = out_t_nz;
+  left_mb->nz = out_l_nz;

-  block->non_zero_y_ = non_zero_y;
-  block->non_zero_uv_ = non_zero_uv;
+  block->non_zero_y = non_zero_y;
+  block->non_zero_uv = non_zero_uv;

  // We look at the mode-code of each block and check if some blocks have less
  // than three non-zero coeffs (code < 2). This is to avoid dithering flat and
  // empty blocks.
-  block->dither_ = (non_zero_uv & 0xaaaa) ? 0 : q->dither_;
+  block->dither = (non_zero_uv & 0xaaaa) ? 0 : q->dither;

  return !(non_zero_y | non_zero_uv);  // will be used for further optimization
 }
@@ -609,50 +620,50 @@ static int ParseResiduals(VP8Decoder* const dec,
 // Main loop

 int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) {
-  VP8MB* const left = dec->mb_info_ - 1;
-  VP8MB* const mb = dec->mb_info_ + dec->mb_x_;
-  VP8MBData* const block = dec->mb_data_ + dec->mb_x_;
-  int skip = dec->use_skip_proba_ ? block->skip_ : 0;
+  VP8MB* const left = dec->mb_info - 1;
+  VP8MB* const mb = dec->mb_info + dec->mb_x;
+  VP8MBData* const block = dec->mb_data + dec->mb_x;
+  int skip = dec->use_skip_proba ? block->skip : 0;

  if (!skip) {
    skip = ParseResiduals(dec, mb, token_br);
  } else {
-    left->nz_ = mb->nz_ = 0;
-    if (!block->is_i4x4_) {
-      left->nz_dc_ = mb->nz_dc_ = 0;
+    left->nz = mb->nz = 0;
+    if (!block->is_i4x4) {
+      left->nz_dc = mb->nz_dc = 0;
    }
-    block->non_zero_y_ = 0;
-    block->non_zero_uv_ = 0;
-    block->dither_ = 0;
+    block->non_zero_y = 0;
+    block->non_zero_uv = 0;
+    block->dither = 0;
  }

-  if (dec->filter_type_ > 0) {  // store filter info
-    VP8FInfo* const finfo = dec->f_info_ + dec->mb_x_;
-    *finfo = dec->fstrengths_[block->segment_][block->is_i4x4_];
-    finfo->f_inner_ |= !skip;
+  if (dec->filter_type > 0) {  // store filter info
+    VP8FInfo* const finfo = dec->f_info + dec->mb_x;
+    *finfo = dec->fstrengths[block->segment][block->is_i4x4];
+    finfo->f_inner |= !skip;
  }

-  return !token_br->eof_;
+  return !token_br->eof;
 }

 void VP8InitScanline(VP8Decoder* const dec) {
-  VP8MB* const left = dec->mb_info_ - 1;
-  left->nz_ = 0;
-  left->nz_dc_ = 0;
-  memset(dec->intra_l_, B_DC_PRED, sizeof(dec->intra_l_));
-  dec->mb_x_ = 0;
+  VP8MB* const left = dec->mb_info - 1;
+  left->nz = 0;
+  left->nz_dc = 0;
+  memset(dec->intra_l, B_DC_PRED, sizeof(dec->intra_l));
+  dec->mb_x = 0;
 }

 static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
-  for (dec->mb_y_ = 0; dec->mb_y_ < dec->br_mb_y_; ++dec->mb_y_) {
+  for (dec->mb_y = 0; dec->mb_y < dec->br_mb_y; ++dec->mb_y) {
    // Parse bitstream for this row.
    VP8BitReader* const token_br =
-        &dec->parts_[dec->mb_y_ & dec->num_parts_minus_one_];
-    if (!VP8ParseIntraModeRow(&dec->br_, dec)) {
+        &dec->parts[dec->mb_y & dec->num_parts_minus_one];
+    if (!VP8ParseIntraModeRow(&dec->br, dec)) {
      return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
                         "Premature end-of-partition0 encountered.");
    }
-    for (; dec->mb_x_ < dec->mb_w_; ++dec->mb_x_) {
+    for (; dec->mb_x < dec->mb_w; ++dec->mb_x) {
      if (!VP8DecodeMB(dec, token_br)) {
        return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
                           "Premature end-of-file encountered.");
@@ -665,8 +676,8 @@ static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
      return VP8SetError(dec, VP8_STATUS_USER_ABORT, "Output aborted.");
    }
  }
-  if (dec->mt_method_ > 0) {
-    if (!WebPGetWorkerInterface()->Sync(&dec->worker_)) return 0;
+  if (dec->mt_method > 0) {
+    if (!WebPGetWorkerInterface()->Sync(&dec->worker)) return 0;
  }

  return 1;
@@ -683,12 +694,12 @@ int VP8Decode(VP8Decoder* const dec, VP8Io* const io) {
                       "NULL VP8Io parameter in VP8Decode().");
  }

-  if (!dec->ready_) {
+  if (!dec->ready) {
    if (!VP8GetHeaders(dec, io)) {
      return 0;
    }
  }
-  assert(dec->ready_);
+  assert(dec->ready);

  // Finish setting up the decoding parameter. Will call io->setup().
  ok = (VP8EnterCritical(dec, io) == VP8_STATUS_OK);
@@ -708,7 +719,7 @@ int VP8Decode(VP8Decoder* const dec, VP8Io* const io) {
    return 0;
  }

-  dec->ready_ = 0;
+  dec->ready = 0;
  return ok;
 }

@@ -716,13 +727,13 @@ void VP8Clear(VP8Decoder* const dec) {
  if (dec == NULL) {
    return;
  }
-  WebPGetWorkerInterface()->End(&dec->worker_);
+  WebPGetWorkerInterface()->End(&dec->worker);
  WebPDeallocateAlphaMemory(dec);
-  WebPSafeFree(dec->mem_);
-  dec->mem_ = NULL;
-  dec->mem_size_ = 0;
-  memset(&dec->br_, 0, sizeof(dec->br_));
-  dec->ready_ = 0;
+  WebPSafeFree(dec->mem);
+  dec->mem = NULL;
+  dec->mem_size = 0;
+  memset(&dec->br, 0, sizeof(dec->br));
+  dec->ready = 0;
 }

 //------------------------------------------------------------------------------
--- a/thirdparty/libwebp/src/dec/vp8_dec.h
+++ b/thirdparty/libwebp/src/dec/vp8_dec.h
@@ -14,6 +14,8 @@
 #ifndef WEBP_DEC_VP8_DEC_H_
 #define WEBP_DEC_VP8_DEC_H_

+#include <stddef.h>
+
 #include "src/webp/decode.h"
 #include "src/webp/types.h"

--- a/thirdparty/libwebp/src/dec/vp8i_dec.h
+++ b/thirdparty/libwebp/src/dec/vp8i_dec.h
@@ -15,12 +15,16 @@
 #define WEBP_DEC_VP8I_DEC_H_

 #include <string.h>     // for memcpy()
+
 #include "src/dec/common_dec.h"
+#include "src/dec/vp8_dec.h"
 #include "src/dec/vp8li_dec.h"
+#include "src/dec/webpi_dec.h"
+#include "src/dsp/dsp.h"
 #include "src/utils/bit_reader_utils.h"
 #include "src/utils/random_utils.h"
 #include "src/utils/thread_utils.h"
-#include "src/dsp/dsp.h"
+#include "src/webp/decode.h"
 #include "src/webp/types.h"

 #ifdef __cplusplus
@@ -32,7 +36,7 @@ extern "C" {

 // version numbers
 #define DEC_MAJ_VERSION 1
-#define DEC_MIN_VERSION 5
+#define DEC_MIN_VERSION 6
 #define DEC_REV_VERSION 0

 // YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
@@ -69,85 +73,85 @@ extern "C" {
 // Headers

 typedef struct {
-  uint8_t key_frame_;
-  uint8_t profile_;
-  uint8_t show_;
-  uint32_t partition_length_;
+  uint8_t key_frame;
+  uint8_t profile;
+  uint8_t show;
+  uint32_t partition_length;
 } VP8FrameHeader;

 typedef struct {
-  uint16_t width_;
-  uint16_t height_;
-  uint8_t xscale_;
-  uint8_t yscale_;
-  uint8_t colorspace_;   // 0 = YCbCr
-  uint8_t clamp_type_;
+  uint16_t width;
+  uint16_t height;
+  uint8_t xscale;
+  uint8_t yscale;
+  uint8_t colorspace;   // 0 = YCbCr
+  uint8_t clamp_type;
 } VP8PictureHeader;

 // segment features
 typedef struct {
-  int use_segment_;
-  int update_map_;        // whether to update the segment map or not
-  int absolute_delta_;    // absolute or delta values for quantizer and filter
-  int8_t quantizer_[NUM_MB_SEGMENTS];        // quantization changes
-  int8_t filter_strength_[NUM_MB_SEGMENTS];  // filter strength for segments
+  int use_segment;
+  int update_map;        // whether to update the segment map or not
+  int absolute_delta;    // absolute or delta values for quantizer and filter
+  int8_t quantizer[NUM_MB_SEGMENTS];        // quantization changes
+  int8_t filter_strength[NUM_MB_SEGMENTS];  // filter strength for segments
 } VP8SegmentHeader;

 // probas associated to one of the contexts
 typedef uint8_t VP8ProbaArray[NUM_PROBAS];

 typedef struct {   // all the probas associated to one band
-  VP8ProbaArray probas_[NUM_CTX];
+  VP8ProbaArray probas[NUM_CTX];
 } VP8BandProbas;

 // Struct collecting all frame-persistent probabilities.
 typedef struct {
-  uint8_t segments_[MB_FEATURE_TREE_PROBS];
+  uint8_t segments[MB_FEATURE_TREE_PROBS];
  // Type: 0:Intra16-AC  1:Intra16-DC   2:Chroma   3:Intra4
-  VP8BandProbas bands_[NUM_TYPES][NUM_BANDS];
-  const VP8BandProbas* bands_ptr_[NUM_TYPES][16 + 1];
+  VP8BandProbas bands[NUM_TYPES][NUM_BANDS];
+  const VP8BandProbas* bands_ptr[NUM_TYPES][16 + 1];
 } VP8Proba;

 // Filter parameters
 typedef struct {
-  int simple_;                  // 0=complex, 1=simple
-  int level_;                   // [0..63]
-  int sharpness_;               // [0..7]
-  int use_lf_delta_;
-  int ref_lf_delta_[NUM_REF_LF_DELTAS];
-  int mode_lf_delta_[NUM_MODE_LF_DELTAS];
+  int simple;                  // 0=complex, 1=simple
+  int level;                   // [0..63]
+  int sharpness;               // [0..7]
+  int use_lf_delta;
+  int ref_lf_delta[NUM_REF_LF_DELTAS];
+  int mode_lf_delta[NUM_MODE_LF_DELTAS];
 } VP8FilterHeader;

 //------------------------------------------------------------------------------
 // Informations about the macroblocks.

 typedef struct {  // filter specs
-  uint8_t f_limit_;      // filter limit in [3..189], or 0 if no filtering
-  uint8_t f_ilevel_;     // inner limit in [1..63]
-  uint8_t f_inner_;      // do inner filtering?
-  uint8_t hev_thresh_;   // high edge variance threshold in [0..2]
+  uint8_t f_limit;      // filter limit in [3..189], or 0 if no filtering
+  uint8_t f_ilevel;     // inner limit in [1..63]
+  uint8_t f_inner;      // do inner filtering?
+  uint8_t hev_thresh;   // high edge variance threshold in [0..2]
 } VP8FInfo;

 typedef struct {  // Top/Left Contexts used for syntax-parsing
-  uint8_t nz_;        // non-zero AC/DC coeffs (4bit for luma + 4bit for chroma)
-  uint8_t nz_dc_;     // non-zero DC coeff (1bit)
+  uint8_t nz;        // non-zero AC/DC coeffs (4bit for luma + 4bit for chroma)
+  uint8_t nz_dc;     // non-zero DC coeff (1bit)
 } VP8MB;

 // Dequantization matrices
 typedef int quant_t[2];      // [DC / AC].  Can be 'uint16_t[2]' too (~slower).
 typedef struct {
-  quant_t y1_mat_, y2_mat_, uv_mat_;
+  quant_t y1_mat, y2_mat, uv_mat;

-  int uv_quant_;   // U/V quantizer value
-  int dither_;     // dithering amplitude (0 = off, max=255)
+  int uv_quant;   // U/V quantizer value
+  int dither;     // dithering amplitude (0 = off, max=255)
 } VP8QuantMatrix;

 // Data needed to reconstruct a macroblock
 typedef struct {
-  int16_t coeffs_[384];   // 384 coeffs = (16+4+4) * 4*4
-  uint8_t is_i4x4_;       // true if intra4x4
-  uint8_t imodes_[16];    // one 16x16 mode (#0) or sixteen 4x4 modes
-  uint8_t uvmode_;        // chroma prediction mode
+  int16_t coeffs[384];   // 384 coeffs = (16+4+4) * 4*4
+  uint8_t is_i4x4;       // true if intra4x4
+  uint8_t imodes[16];    // one 16x16 mode (#0) or sixteen 4x4 modes
+  uint8_t uvmode;        // chroma prediction mode
  // bit-wise info about the content of each sub-4x4 blocks (in decoding order).
  // Each of the 4x4 blocks for y/u/v is associated with a 2b code according to:
  //   code=0 -> no coefficient
@@ -155,21 +159,21 @@ typedef struct {
  //   code=2 -> first three coefficients are non-zero
  //   code=3 -> more than three coefficients are non-zero
  // This allows to call specialized transform functions.
-  uint32_t non_zero_y_;
-  uint32_t non_zero_uv_;
-  uint8_t dither_;      // local dithering strength (deduced from non_zero_*)
-  uint8_t skip_;
-  uint8_t segment_;
+  uint32_t non_zero_y;
+  uint32_t non_zero_uv;
+  uint8_t dither;      // local dithering strength (deduced from non_zero*)
+  uint8_t skip;
+  uint8_t segment;
 } VP8MBData;

 // Persistent information needed by the parallel processing
 typedef struct {
-  int id_;              // cache row to process (in [0..2])
-  int mb_y_;            // macroblock position of the row
-  int filter_row_;      // true if row-filtering is needed
-  VP8FInfo* f_info_;    // filter strengths (swapped with dec->f_info_)
-  VP8MBData* mb_data_;  // reconstruction data (swapped with dec->mb_data_)
-  VP8Io io_;            // copy of the VP8Io to pass to put()
+  int id;              // cache row to process (in [0..2])
+  int mb_y;            // macroblock position of the row
+  int filter_row;      // true if row-filtering is needed
+  VP8FInfo* f_info;    // filter strengths (swapped with dec->f_info)
+  VP8MBData* mb_data;  // reconstruction data (swapped with dec->mb_data)
+  VP8Io io;            // copy of the VP8Io to pass to put()
 } VP8ThreadContext;

 // Saved top samples, per macroblock. Fits into a cache-line.
@@ -181,89 +185,89 @@ typedef struct {
 // VP8Decoder: the main opaque structure handed over to user

 struct VP8Decoder {
-  VP8StatusCode status_;
-  int ready_;     // true if ready to decode a picture with VP8Decode()
-  const char* error_msg_;  // set when status_ is not OK.
+  VP8StatusCode status;
+  int ready;     // true if ready to decode a picture with VP8Decode()
+  const char* error_msg;  // set when status is not OK.

  // Main data source
-  VP8BitReader br_;
-  int incremental_;  // if true, incremental decoding is expected
+  VP8BitReader br;
+  int incremental;  // if true, incremental decoding is expected

  // headers
-  VP8FrameHeader   frm_hdr_;
-  VP8PictureHeader pic_hdr_;
-  VP8FilterHeader  filter_hdr_;
-  VP8SegmentHeader segment_hdr_;
+  VP8FrameHeader   frm_hdr;
+  VP8PictureHeader pic_hdr;
+  VP8FilterHeader  filter_hdr;
+  VP8SegmentHeader segment_hdr;

  // Worker
-  WebPWorker worker_;
-  int mt_method_;      // multi-thread method: 0=off, 1=[parse+recon][filter]
-                       // 2=[parse][recon+filter]
-  int cache_id_;       // current cache row
-  int num_caches_;     // number of cached rows of 16 pixels (1, 2 or 3)
-  VP8ThreadContext thread_ctx_;  // Thread context
+  WebPWorker worker;
+  int mt_method;      // multi-thread method: 0=off, 1=[parse+recon][filter]
+                      // 2=[parse][recon+filter]
+  int cache_id;       // current cache row
+  int num_caches;     // number of cached rows of 16 pixels (1, 2 or 3)
+  VP8ThreadContext thread_ctx;  // Thread context

  // dimension, in macroblock units.
-  int mb_w_, mb_h_;
+  int mb_w, mb_h;

  // Macroblock to process/filter, depending on cropping and filter_type.
-  int tl_mb_x_, tl_mb_y_;  // top-left MB that must be in-loop filtered
-  int br_mb_x_, br_mb_y_;  // last bottom-right MB that must be decoded
+  int tl_mb_x, tl_mb_y;  // top-left MB that must be in-loop filtered
+  int br_mb_x, br_mb_y;  // last bottom-right MB that must be decoded

  // number of partitions minus one.
-  uint32_t num_parts_minus_one_;
+  uint32_t num_parts_minus_one;
  // per-partition boolean decoders.
-  VP8BitReader parts_[MAX_NUM_PARTITIONS];
+  VP8BitReader parts[MAX_NUM_PARTITIONS];

  // Dithering strength, deduced from decoding options
-  int dither_;                // whether to use dithering or not
-  VP8Random dithering_rg_;    // random generator for dithering
+  int dither;                // whether to use dithering or not
+  VP8Random dithering_rg;    // random generator for dithering

  // dequantization (one set of DC/AC dequant factor per segment)
-  VP8QuantMatrix dqm_[NUM_MB_SEGMENTS];
+  VP8QuantMatrix dqm[NUM_MB_SEGMENTS];

  // probabilities
-  VP8Proba proba_;
-  int use_skip_proba_;
-  uint8_t skip_p_;
+  VP8Proba proba;
+  int use_skip_proba;
+  uint8_t skip_p;

  // Boundary data cache and persistent buffers.
-  uint8_t* intra_t_;      // top intra modes values: 4 * mb_w_
-  uint8_t  intra_l_[4];   // left intra modes values
+  uint8_t* intra_t;      // top intra modes values: 4 * mb_w
+  uint8_t  intra_l[4];   // left intra modes values

-  VP8TopSamples* yuv_t_;  // top y/u/v samples
+  VP8TopSamples* yuv_t;  // top y/u/v samples

-  VP8MB* mb_info_;        // contextual macroblock info (mb_w_ + 1)
-  VP8FInfo* f_info_;      // filter strength info
-  uint8_t* yuv_b_;        // main block for Y/U/V (size = YUV_SIZE)
+  VP8MB* mb_info;        // contextual macroblock info (mb_w + 1)
+  VP8FInfo* f_info;      // filter strength info
+  uint8_t* yuv_b;        // main block for Y/U/V (size = YUV_SIZE)

-  uint8_t* cache_y_;      // macroblock row for storing unfiltered samples
-  uint8_t* cache_u_;
-  uint8_t* cache_v_;
-  int cache_y_stride_;
-  int cache_uv_stride_;
+  uint8_t* cache_y;      // macroblock row for storing unfiltered samples
+  uint8_t* cache_u;
+  uint8_t* cache_v;
+  int cache_y_stride;
+  int cache_uv_stride;

  // main memory chunk for the above data. Persistent.
-  void* mem_;
-  size_t mem_size_;
+  void* mem;
+  size_t mem_size;

  // Per macroblock non-persistent infos.
-  int mb_x_, mb_y_;       // current position, in macroblock units
-  VP8MBData* mb_data_;    // parsed reconstruction data
+  int mb_x, mb_y;        // current position, in macroblock units
+  VP8MBData* mb_data;    // parsed reconstruction data

  // Filtering side-info
-  int filter_type_;                          // 0=off, 1=simple, 2=complex
-  VP8FInfo fstrengths_[NUM_MB_SEGMENTS][2];  // precalculated per-segment/type
+  int filter_type;                          // 0=off, 1=simple, 2=complex
+  VP8FInfo fstrengths[NUM_MB_SEGMENTS][2];  // precalculated per-segment/type

  // Alpha
-  struct ALPHDecoder* alph_dec_;  // alpha-plane decoder object
-  const uint8_t* alpha_data_;     // compressed alpha data (if present)
-  size_t alpha_data_size_;
-  int is_alpha_decoded_;      // true if alpha_data_ is decoded in alpha_plane_
-  uint8_t* alpha_plane_mem_;  // memory allocated for alpha_plane_
-  uint8_t* alpha_plane_;      // output. Persistent, contains the whole data.
-  const uint8_t* alpha_prev_line_;  // last decoded alpha row (or NULL)
-  int alpha_dithering_;       // derived from decoding options (0=off, 100=full)
+  struct ALPHDecoder* alph_dec;  // alpha-plane decoder object
+  const uint8_t* alpha_data;     // compressed alpha data (if present)
+  size_t alpha_data_size;
+  int is_alpha_decoded;      // true if alpha_data is decoded in alpha_plane
+  uint8_t* alpha_plane_mem;  // memory allocated for alpha_plane
+  uint8_t* alpha_plane;      // output. Persistent, contains the whole data.
+  const uint8_t* alpha_prev_line;  // last decoded alpha row (or NULL)
+  int alpha_dithering;       // derived from decoding options (0=off, 100=full)
 };

 //------------------------------------------------------------------------------
--- a/thirdparty/libwebp/src/dec/vp8l_dec.c
+++ b/thirdparty/libwebp/src/dec/vp8l_dec.c
--- a/thirdparty/libwebp/src/dec/vp8li_dec.h
+++ b/thirdparty/libwebp/src/dec/vp8li_dec.h
@@ -16,10 +16,15 @@
 #define WEBP_DEC_VP8LI_DEC_H_

 #include <string.h>     // for memcpy()
+
+#include "src/dec/vp8_dec.h"
 #include "src/dec/webpi_dec.h"
 #include "src/utils/bit_reader_utils.h"
 #include "src/utils/color_cache_utils.h"
 #include "src/utils/huffman_utils.h"
+#include "src/utils/rescaler_utils.h"
+#include "src/webp/decode.h"
+#include "src/webp/format_constants.h"
 #include "src/webp/types.h"

 #ifdef __cplusplus
@@ -34,58 +39,58 @@ typedef enum {

 typedef struct VP8LTransform VP8LTransform;
 struct VP8LTransform {
-  VP8LImageTransformType type_;   // transform type.
-  int                    bits_;   // subsampling bits defining transform window.
-  int                    xsize_;  // transform window X index.
-  int                    ysize_;  // transform window Y index.
-  uint32_t*              data_;   // transform data.
+  VP8LImageTransformType type;   // transform type.
+  int                    bits;   // subsampling bits defining transform window.
+  int                    xsize;  // transform window X index.
+  int                    ysize;  // transform window Y index.
+  uint32_t*              data;   // transform data.
 };

 typedef struct {
-  int             color_cache_size_;
-  VP8LColorCache  color_cache_;
-  VP8LColorCache  saved_color_cache_;  // for incremental
+  int             color_cache_size;
+  VP8LColorCache  color_cache;
+  VP8LColorCache  saved_color_cache;  // for incremental

-  int             huffman_mask_;
-  int             huffman_subsample_bits_;
-  int             huffman_xsize_;
-  uint32_t*       huffman_image_;
-  int             num_htree_groups_;
-  HTreeGroup*     htree_groups_;
-  HuffmanTables   huffman_tables_;
+  int             huffman_mask;
+  int             huffman_subsample_bits;
+  int             huffman_xsize;
+  uint32_t*       huffman_image;
+  int             num_htree_groups;
+  HTreeGroup*     htree_groups;
+  HuffmanTables   huffman_tables;
 } VP8LMetadata;

 typedef struct VP8LDecoder VP8LDecoder;
 struct VP8LDecoder {
-  VP8StatusCode    status_;
-  VP8LDecodeState  state_;
-  VP8Io*           io_;
+  VP8StatusCode    status;
+  VP8LDecodeState  state;
+  VP8Io*           io;

-  const WebPDecBuffer* output_;    // shortcut to io->opaque->output
+  const WebPDecBuffer* output;    // shortcut to io->opaque->output

-  uint32_t*        pixels_;        // Internal data: either uint8_t* for alpha
-                                   // or uint32_t* for BGRA.
-  uint32_t*        argb_cache_;    // Scratch buffer for temporary BGRA storage.
+  uint32_t*        pixels;        // Internal data: either uint8_t* for alpha
+                                  // or uint32_t* for BGRA.
+  uint32_t*        argb_cache;    // Scratch buffer for temporary BGRA storage.

-  VP8LBitReader    br_;
-  int              incremental_;   // if true, incremental decoding is expected
-  VP8LBitReader    saved_br_;      // note: could be local variables too
-  int              saved_last_pixel_;
+  VP8LBitReader    br;
+  int              incremental;   // if true, incremental decoding is expected
+  VP8LBitReader    saved_br;      // note: could be local variables too
+  int              saved_last_pixel;

-  int              width_;
-  int              height_;
-  int              last_row_;      // last input row decoded so far.
-  int              last_pixel_;    // last pixel decoded so far. However, it may
-                                   // not be transformed, scaled and
-                                   // color-converted yet.
-  int              last_out_row_;  // last row output so far.
+  int              width;
+  int              height;
+  int              last_row;      // last input row decoded so far.
+  int              last_pixel;    // last pixel decoded so far. However, it may
+                                  // not be transformed, scaled and
+                                  // color-converted yet.
+  int              last_out_row;  // last row output so far.

-  VP8LMetadata     hdr_;
+  VP8LMetadata     hdr;

-  int              next_transform_;
-  VP8LTransform    transforms_[NUM_TRANSFORMS];
+  int              next_transform;
+  VP8LTransform    transforms[NUM_TRANSFORMS];
  // or'd bitset storing the transforms types.
-  uint32_t         transforms_seen_;
+  uint32_t         transforms_seen;

  uint8_t*         rescaler_memory;  // Working memory for rescaling work.
  WebPRescaler*    rescaler;         // Common rescaler for all channels.
@@ -118,7 +123,7 @@ WEBP_NODISCARD VP8LDecoder* VP8LNew(void);
 WEBP_NODISCARD int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io);

 // Decodes an image. It's required to decode the lossless header before calling
-// this function. Returns false in case of error, with updated dec->status_.
+// this function. Returns false in case of error, with updated dec->status.
 WEBP_NODISCARD int VP8LDecodeImage(VP8LDecoder* const dec);

 // Clears and deallocate a lossless decoder instance.
--- a/thirdparty/libwebp/src/dec/webp_dec.c
+++ b/thirdparty/libwebp/src/dec/webp_dec.c
@@ -11,15 +11,20 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

+#include <assert.h>
 #include <stdlib.h>
+#include <string.h>

+#include "src/dec/common_dec.h"
 #include "src/dec/vp8_dec.h"
 #include "src/dec/vp8i_dec.h"
 #include "src/dec/vp8li_dec.h"
 #include "src/dec/webpi_dec.h"
+#include "src/utils/rescaler_utils.h"
 #include "src/utils/utils.h"
-#include "src/webp/mux_types.h"  // ALPHA_FLAG
 #include "src/webp/decode.h"
+#include "src/webp/format_constants.h"
+#include "src/webp/mux_types.h"  // ALPHA_FLAG
 #include "src/webp/types.h"

 //------------------------------------------------------------------------------
@@ -475,23 +480,23 @@ WEBP_NODISCARD static VP8StatusCode DecodeInto(const uint8_t* const data,
    if (dec == NULL) {
      return VP8_STATUS_OUT_OF_MEMORY;
    }
-    dec->alpha_data_ = headers.alpha_data;
-    dec->alpha_data_size_ = headers.alpha_data_size;
+    dec->alpha_data = headers.alpha_data;
+    dec->alpha_data_size = headers.alpha_data_size;

    // Decode bitstream header, update io->width/io->height.
    if (!VP8GetHeaders(dec, &io)) {
-      status = dec->status_;   // An error occurred. Grab error status.
+      status = dec->status;   // An error occurred. Grab error status.
    } else {
      // Allocate/check output buffers.
      status = WebPAllocateDecBuffer(io.width, io.height, params->options,
                                     params->output);
      if (status == VP8_STATUS_OK) {  // Decode
        // This change must be done before calling VP8Decode()
-        dec->mt_method_ = VP8GetThreadMethod(params->options, &headers,
-                                             io.width, io.height);
+        dec->mt_method = VP8GetThreadMethod(params->options, &headers,
+                                            io.width, io.height);
        VP8InitDithering(params->options, dec);
        if (!VP8Decode(dec, &io)) {
-          status = dec->status_;
+          status = dec->status;
        }
      }
    }
@@ -502,14 +507,14 @@ WEBP_NODISCARD static VP8StatusCode DecodeInto(const uint8_t* const data,
      return VP8_STATUS_OUT_OF_MEMORY;
    }
    if (!VP8LDecodeHeader(dec, &io)) {
-      status = dec->status_;   // An error occurred. Grab error status.
+      status = dec->status;   // An error occurred. Grab error status.
    } else {
      // Allocate/check output buffers.
      status = WebPAllocateDecBuffer(io.width, io.height, params->options,
                                     params->output);
      if (status == VP8_STATUS_OK) {  // Decode
        if (!VP8LDecodeImage(dec)) {
-          status = dec->status_;
+          status = dec->status;
        }
      }
    }
@@ -747,6 +752,61 @@ int WebPInitDecoderConfigInternal(WebPDecoderConfig* config,
  return 1;
 }

+static int WebPCheckCropDimensionsBasic(int x, int y, int w, int h) {
+  return !(x < 0 || y < 0 || w <= 0 || h <= 0);
+}
+
+int WebPValidateDecoderConfig(const WebPDecoderConfig* config) {
+  const WebPDecoderOptions* options;
+  if (config == NULL) return 0;
+  if (!IsValidColorspace(config->output.colorspace)) {
+    return 0;
+  }
+
+  options = &config->options;
+  // bypass_filtering, no_fancy_upsampling, use_cropping, use_scaling,
+  // use_threads, flip can be any integer and are interpreted as boolean.
+
+  // Check for cropping.
+  if (options->use_cropping && !WebPCheckCropDimensionsBasic(
+                                   options->crop_left, options->crop_top,
+                                   options->crop_width, options->crop_height)) {
+    return 0;
+  }
+  // Check for scaling.
+  if (options->use_scaling &&
+      (options->scaled_width < 0 || options->scaled_height < 0 ||
+       (options->scaled_width == 0 && options->scaled_height == 0))) {
+    return 0;
+  }
+
+  // In case the WebPBitstreamFeatures has been filled in, check further.
+  if (config->input.width > 0 || config->input.height > 0) {
+    int scaled_width = options->scaled_width;
+    int scaled_height = options->scaled_height;
+    if (options->use_cropping &&
+        !WebPCheckCropDimensions(config->input.width, config->input.height,
+                                 options->crop_left, options->crop_top,
+                                 options->crop_width, options->crop_height)) {
+      return 0;
+    }
+    if (options->use_scaling && !WebPRescalerGetScaledDimensions(
+                                    config->input.width, config->input.height,
+                                    &scaled_width, &scaled_height)) {
+      return 0;
+    }
+  }
+
+  // Check for dithering.
+  if (options->dithering_strength < 0 || options->dithering_strength > 100 ||
+      options->alpha_dithering_strength < 0 ||
+      options->alpha_dithering_strength > 100) {
+    return 0;
+  }
+
+  return 1;
+}
+
 VP8StatusCode WebPGetFeaturesInternal(const uint8_t* data, size_t data_size,
                                      WebPBitstreamFeatures* features,
                                      int version) {
@@ -806,8 +866,8 @@ VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size,

 int WebPCheckCropDimensions(int image_width, int image_height,
                            int x, int y, int w, int h) {
-  return !(x < 0 || y < 0 || w <= 0 || h <= 0 ||
-           x >= image_width || w > image_width || w > image_width - x ||
+  return WebPCheckCropDimensionsBasic(x, y, w, h) &&
+         !(x >= image_width || w > image_width || w > image_width - x ||
           y >= image_height || h > image_height || h > image_height - y);
 }

--- a/thirdparty/libwebp/src/dec/webpi_dec.h
+++ b/thirdparty/libwebp/src/dec/webpi_dec.h
@@ -18,9 +18,12 @@
 extern "C" {
 #endif

-#include "src/utils/rescaler_utils.h"
+#include <stddef.h>
+
 #include "src/dec/vp8_dec.h"
+#include "src/utils/rescaler_utils.h"
 #include "src/webp/decode.h"
+#include "src/webp/types.h"

 //------------------------------------------------------------------------------
 // WebPDecParams: Decoding output parameters. Transient internal object.
--- a/thirdparty/libwebp/src/demux/anim_decode.c
+++ b/thirdparty/libwebp/src/demux/anim_decode.c
@@ -20,6 +20,8 @@
 #include "src/utils/utils.h"
 #include "src/webp/decode.h"
 #include "src/webp/demux.h"
+#include "src/webp/mux.h"
+#include "src/webp/mux_types.h"
 #include "src/webp/types.h"

 #define NUM_CHANNELS 4
@@ -39,18 +41,18 @@ static void BlendPixelRowPremult(uint32_t* const src, const uint32_t* const dst,
                                 int num_pixels);

 struct WebPAnimDecoder {
-  WebPDemuxer* demux_;             // Demuxer created from given WebP bitstream.
-  WebPDecoderConfig config_;       // Decoder config.
+  WebPDemuxer* demux;              // Demuxer created from given WebP bitstream.
+  WebPDecoderConfig config;        // Decoder config.
  // Note: we use a pointer to a function blending multiple pixels at a time to
  // allow possible inlining of per-pixel blending function.
-  BlendRowFunc blend_func_;        // Pointer to the chose blend row function.
-  WebPAnimInfo info_;              // Global info about the animation.
-  uint8_t* curr_frame_;            // Current canvas (not disposed).
-  uint8_t* prev_frame_disposed_;   // Previous canvas (properly disposed).
-  int prev_frame_timestamp_;       // Previous frame timestamp (milliseconds).
-  WebPIterator prev_iter_;         // Iterator object for previous frame.
-  int prev_frame_was_keyframe_;    // True if previous frame was a keyframe.
-  int next_frame_;                 // Index of the next frame to be decoded
+  BlendRowFunc blend_func;         // Pointer to the chose blend row function.
+  WebPAnimInfo info;               // Global info about the animation.
+  uint8_t* curr_frame;             // Current canvas (not disposed).
+  uint8_t* prev_frame_disposed;    // Previous canvas (properly disposed).
+  int prev_frame_timestamp;        // Previous frame timestamp (milliseconds).
+  WebPIterator prev_iter;          // Iterator object for previous frame.
+  int prev_frame_was_keyframe;     // True if previous frame was a keyframe.
+  int next_frame;                  // Index of the next frame to be decoded
                                   // (starting from 1).
 };

@@ -73,7 +75,7 @@ WEBP_NODISCARD static int ApplyDecoderOptions(
    const WebPAnimDecoderOptions* const dec_options,
    WebPAnimDecoder* const dec) {
  WEBP_CSP_MODE mode;
-  WebPDecoderConfig* config = &dec->config_;
+  WebPDecoderConfig* config = &dec->config;
  assert(dec_options != NULL);

  mode = dec_options->color_mode;
@@ -81,9 +83,9 @@ WEBP_NODISCARD static int ApplyDecoderOptions(
      mode != MODE_rgbA && mode != MODE_bgrA) {
    return 0;
  }
-  dec->blend_func_ = (mode == MODE_RGBA || mode == MODE_BGRA)
-                         ? &BlendPixelRowNonPremult
-                         : &BlendPixelRowPremult;
+  dec->blend_func = (mode == MODE_RGBA || mode == MODE_BGRA)
+                        ? &BlendPixelRowNonPremult
+                        : &BlendPixelRowPremult;
  if (!WebPInitDecoderConfig(config)) {
    return 0;
  }
@@ -123,22 +125,22 @@ WebPAnimDecoder* WebPAnimDecoderNewInternal(
  }
  if (!ApplyDecoderOptions(&options, dec)) goto Error;

-  dec->demux_ = WebPDemux(webp_data);
-  if (dec->demux_ == NULL) goto Error;
+  dec->demux = WebPDemux(webp_data);
+  if (dec->demux == NULL) goto Error;

-  dec->info_.canvas_width = WebPDemuxGetI(dec->demux_, WEBP_FF_CANVAS_WIDTH);
-  dec->info_.canvas_height = WebPDemuxGetI(dec->demux_, WEBP_FF_CANVAS_HEIGHT);
-  dec->info_.loop_count = WebPDemuxGetI(dec->demux_, WEBP_FF_LOOP_COUNT);
-  dec->info_.bgcolor = WebPDemuxGetI(dec->demux_, WEBP_FF_BACKGROUND_COLOR);
-  dec->info_.frame_count = WebPDemuxGetI(dec->demux_, WEBP_FF_FRAME_COUNT);
+  dec->info.canvas_width = WebPDemuxGetI(dec->demux, WEBP_FF_CANVAS_WIDTH);
+  dec->info.canvas_height = WebPDemuxGetI(dec->demux, WEBP_FF_CANVAS_HEIGHT);
+  dec->info.loop_count = WebPDemuxGetI(dec->demux, WEBP_FF_LOOP_COUNT);
+  dec->info.bgcolor = WebPDemuxGetI(dec->demux, WEBP_FF_BACKGROUND_COLOR);
+  dec->info.frame_count = WebPDemuxGetI(dec->demux, WEBP_FF_FRAME_COUNT);

  // Note: calloc() because we fill frame with zeroes as well.
-  dec->curr_frame_ = (uint8_t*)WebPSafeCalloc(
-      dec->info_.canvas_width * NUM_CHANNELS, dec->info_.canvas_height);
-  if (dec->curr_frame_ == NULL) goto Error;
-  dec->prev_frame_disposed_ = (uint8_t*)WebPSafeCalloc(
-      dec->info_.canvas_width * NUM_CHANNELS, dec->info_.canvas_height);
-  if (dec->prev_frame_disposed_ == NULL) goto Error;
+  dec->curr_frame = (uint8_t*)WebPSafeCalloc(
+      dec->info.canvas_width * NUM_CHANNELS, dec->info.canvas_height);
+  if (dec->curr_frame == NULL) goto Error;
+  dec->prev_frame_disposed = (uint8_t*)WebPSafeCalloc(
+      dec->info.canvas_width * NUM_CHANNELS, dec->info.canvas_height);
+  if (dec->prev_frame_disposed == NULL) goto Error;

  WebPAnimDecoderReset(dec);
  return dec;
@@ -150,7 +152,7 @@ WebPAnimDecoder* WebPAnimDecoderNewInternal(

 int WebPAnimDecoderGetInfo(const WebPAnimDecoder* dec, WebPAnimInfo* info) {
  if (dec == NULL || info == NULL) return 0;
-  *info = dec->info_;
+  *info = dec->info;
  return 1;
 }

@@ -338,25 +340,25 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
  if (dec == NULL || buf_ptr == NULL || timestamp_ptr == NULL) return 0;
  if (!WebPAnimDecoderHasMoreFrames(dec)) return 0;

-  width = dec->info_.canvas_width;
-  height = dec->info_.canvas_height;
-  blend_row = dec->blend_func_;
+  width = dec->info.canvas_width;
+  height = dec->info.canvas_height;
+  blend_row = dec->blend_func;

  // Get compressed frame.
-  if (!WebPDemuxGetFrame(dec->demux_, dec->next_frame_, &iter)) {
+  if (!WebPDemuxGetFrame(dec->demux, dec->next_frame, &iter)) {
    return 0;
  }
-  timestamp = dec->prev_frame_timestamp_ + iter.duration;
+  timestamp = dec->prev_frame_timestamp + iter.duration;

  // Initialize.
-  is_key_frame = IsKeyFrame(&iter, &dec->prev_iter_,
-                            dec->prev_frame_was_keyframe_, width, height);
+  is_key_frame = IsKeyFrame(&iter, &dec->prev_iter,
+                            dec->prev_frame_was_keyframe, width, height);
  if (is_key_frame) {
-    if (!ZeroFillCanvas(dec->curr_frame_, width, height)) {
+    if (!ZeroFillCanvas(dec->curr_frame, width, height)) {
      goto Error;
    }
  } else {
-    if (!CopyCanvas(dec->prev_frame_disposed_, dec->curr_frame_,
+    if (!CopyCanvas(dec->prev_frame_disposed, dec->curr_frame,
                    width, height)) {
      goto Error;
    }
@@ -370,12 +372,12 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
    const uint64_t out_offset = (uint64_t)iter.y_offset * stride +
                                (uint64_t)iter.x_offset * NUM_CHANNELS;  // 53b
    const uint64_t size = (uint64_t)iter.height * stride;  // at most 25 + 27b
-    WebPDecoderConfig* const config = &dec->config_;
+    WebPDecoderConfig* const config = &dec->config;
    WebPRGBABuffer* const buf = &config->output.u.RGBA;
    if ((size_t)size != size) goto Error;
    buf->stride = (int)stride;
    buf->size = (size_t)size;
-    buf->rgba = dec->curr_frame_ + out_offset;
+    buf->rgba = dec->curr_frame + out_offset;

    if (WebPDecode(in, in_size, config) != VP8_STATUS_OK) {
      goto Error;
@@ -388,18 +390,18 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
  // that pixel in the previous frame if blending method of is WEBP_MUX_BLEND.
  if (iter.frame_num > 1 && iter.blend_method == WEBP_MUX_BLEND &&
      !is_key_frame) {
-    if (dec->prev_iter_.dispose_method == WEBP_MUX_DISPOSE_NONE) {
+    if (dec->prev_iter.dispose_method == WEBP_MUX_DISPOSE_NONE) {
      int y;
      // Blend transparent pixels with pixels in previous canvas.
      for (y = 0; y < iter.height; ++y) {
        const size_t offset =
            (iter.y_offset + y) * width + iter.x_offset;
-        blend_row((uint32_t*)dec->curr_frame_ + offset,
-                  (uint32_t*)dec->prev_frame_disposed_ + offset, iter.width);
+        blend_row((uint32_t*)dec->curr_frame + offset,
+                  (uint32_t*)dec->prev_frame_disposed + offset, iter.width);
      }
    } else {
      int y;
-      assert(dec->prev_iter_.dispose_method == WEBP_MUX_DISPOSE_BACKGROUND);
+      assert(dec->prev_iter.dispose_method == WEBP_MUX_DISPOSE_BACKGROUND);
      // We need to blend a transparent pixel with its value just after
      // initialization. That is, blend it with:
      // * Fully transparent pixel if it belongs to prevRect <-- No-op.
@@ -407,39 +409,39 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
      for (y = 0; y < iter.height; ++y) {
        const int canvas_y = iter.y_offset + y;
        int left1, width1, left2, width2;
-        FindBlendRangeAtRow(&iter, &dec->prev_iter_, canvas_y, &left1, &width1,
+        FindBlendRangeAtRow(&iter, &dec->prev_iter, canvas_y, &left1, &width1,
                            &left2, &width2);
        if (width1 > 0) {
          const size_t offset1 = canvas_y * width + left1;
-          blend_row((uint32_t*)dec->curr_frame_ + offset1,
-                    (uint32_t*)dec->prev_frame_disposed_ + offset1, width1);
+          blend_row((uint32_t*)dec->curr_frame + offset1,
+                    (uint32_t*)dec->prev_frame_disposed + offset1, width1);
        }
        if (width2 > 0) {
          const size_t offset2 = canvas_y * width + left2;
-          blend_row((uint32_t*)dec->curr_frame_ + offset2,
-                    (uint32_t*)dec->prev_frame_disposed_ + offset2, width2);
+          blend_row((uint32_t*)dec->curr_frame + offset2,
+                    (uint32_t*)dec->prev_frame_disposed + offset2, width2);
        }
      }
    }
  }

  // Update info of the previous frame and dispose it for the next iteration.
-  dec->prev_frame_timestamp_ = timestamp;
-  WebPDemuxReleaseIterator(&dec->prev_iter_);
-  dec->prev_iter_ = iter;
-  dec->prev_frame_was_keyframe_ = is_key_frame;
-  if (!CopyCanvas(dec->curr_frame_, dec->prev_frame_disposed_, width, height)) {
+  dec->prev_frame_timestamp = timestamp;
+  WebPDemuxReleaseIterator(&dec->prev_iter);
+  dec->prev_iter = iter;
+  dec->prev_frame_was_keyframe = is_key_frame;
+  if (!CopyCanvas(dec->curr_frame, dec->prev_frame_disposed, width, height)) {
    goto Error;
  }
-  if (dec->prev_iter_.dispose_method == WEBP_MUX_DISPOSE_BACKGROUND) {
-    ZeroFillFrameRect(dec->prev_frame_disposed_, width * NUM_CHANNELS,
-                      dec->prev_iter_.x_offset, dec->prev_iter_.y_offset,
-                      dec->prev_iter_.width, dec->prev_iter_.height);
+  if (dec->prev_iter.dispose_method == WEBP_MUX_DISPOSE_BACKGROUND) {
+    ZeroFillFrameRect(dec->prev_frame_disposed, width * NUM_CHANNELS,
+                      dec->prev_iter.x_offset, dec->prev_iter.y_offset,
+                      dec->prev_iter.width, dec->prev_iter.height);
  }
-  ++dec->next_frame_;
+  ++dec->next_frame;

  // All OK, fill in the values.
-  *buf_ptr = dec->curr_frame_;
+  *buf_ptr = dec->curr_frame;
  *timestamp_ptr = timestamp;
  return 1;

@@ -450,30 +452,30 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,

 int WebPAnimDecoderHasMoreFrames(const WebPAnimDecoder* dec) {
  if (dec == NULL) return 0;
-  return (dec->next_frame_ <= (int)dec->info_.frame_count);
+  return (dec->next_frame <= (int)dec->info.frame_count);
 }

 void WebPAnimDecoderReset(WebPAnimDecoder* dec) {
  if (dec != NULL) {
-    dec->prev_frame_timestamp_ = 0;
-    WebPDemuxReleaseIterator(&dec->prev_iter_);
-    memset(&dec->prev_iter_, 0, sizeof(dec->prev_iter_));
-    dec->prev_frame_was_keyframe_ = 0;
-    dec->next_frame_ = 1;
+    dec->prev_frame_timestamp = 0;
+    WebPDemuxReleaseIterator(&dec->prev_iter);
+    memset(&dec->prev_iter, 0, sizeof(dec->prev_iter));
+    dec->prev_frame_was_keyframe = 0;
+    dec->next_frame = 1;
  }
 }

 const WebPDemuxer* WebPAnimDecoderGetDemuxer(const WebPAnimDecoder* dec) {
  if (dec == NULL) return NULL;
-  return dec->demux_;
+  return dec->demux;
 }

 void WebPAnimDecoderDelete(WebPAnimDecoder* dec) {
  if (dec != NULL) {
-    WebPDemuxReleaseIterator(&dec->prev_iter_);
-    WebPDemuxDelete(dec->demux_);
-    WebPSafeFree(dec->curr_frame_);
-    WebPSafeFree(dec->prev_frame_disposed_);
+    WebPDemuxReleaseIterator(&dec->prev_iter);
+    WebPDemuxDelete(dec->demux);
+    WebPSafeFree(dec->curr_frame);
+    WebPSafeFree(dec->prev_frame_disposed);
    WebPSafeFree(dec);
  }
 }
--- a/thirdparty/libwebp/src/demux/demux.c
+++ b/thirdparty/libwebp/src/demux/demux.c
@@ -22,55 +22,58 @@
 #include "src/webp/decode.h"     // WebPGetFeatures
 #include "src/webp/demux.h"
 #include "src/webp/format_constants.h"
+#include "src/webp/mux.h"
+#include "src/webp/mux_types.h"
+#include "src/webp/types.h"

 #define DMUX_MAJ_VERSION 1
-#define DMUX_MIN_VERSION 5
+#define DMUX_MIN_VERSION 6
 #define DMUX_REV_VERSION 0

 typedef struct {
-  size_t start_;        // start location of the data
-  size_t end_;          // end location
-  size_t riff_end_;     // riff chunk end location, can be > end_.
-  size_t buf_size_;     // size of the buffer
-  const uint8_t* buf_;
+  size_t start;         // start location of the data
+  size_t end;           // end location
+  size_t riff_end;      // riff chunk end location, can be > end.
+  size_t buf_size;      // size of the buffer
+  const uint8_t* buf;
 } MemBuffer;

 typedef struct {
-  size_t offset_;
-  size_t size_;
+  size_t offset;
+  size_t size;
 } ChunkData;

 typedef struct Frame {
-  int x_offset_, y_offset_;
-  int width_, height_;
-  int has_alpha_;
-  int duration_;
-  WebPMuxAnimDispose dispose_method_;
-  WebPMuxAnimBlend blend_method_;
-  int frame_num_;
-  int complete_;   // img_components_ contains a full image.
-  ChunkData img_components_[2];  // 0=VP8{,L} 1=ALPH
-  struct Frame* next_;
+  int x_offset, y_offset;
+  int width, height;
+  int has_alpha;
+  int duration;
+  WebPMuxAnimDispose dispose_method;
+  WebPMuxAnimBlend blend_method;
+  int frame_num;
+  int complete;   // img_components contains a full image.
+  ChunkData img_components[2];  // 0=VP8{,L} 1=ALPH
+  struct Frame* next;
 } Frame;

 typedef struct Chunk {
-  ChunkData data_;
-  struct Chunk* next_;
+  ChunkData data;
+  struct Chunk* next;
 } Chunk;

 struct WebPDemuxer {
-  MemBuffer mem_;
-  WebPDemuxState state_;
-  int is_ext_format_;
-  uint32_t feature_flags_;
-  int canvas_width_, canvas_height_;
-  int loop_count_;
-  uint32_t bgcolor_;
-  int num_frames_;
-  Frame* frames_;
-  Frame** frames_tail_;
-  Chunk* chunks_;  // non-image chunks
-  Chunk** chunks_tail_;
+  MemBuffer mem;
+  WebPDemuxState state;
+  int is_ext_format;
+  uint32_t feature_flags;
+  int canvas_width, canvas_height;
+  int loop_count;
+  uint32_t bgcolor;
+  int num_frames;
+  Frame* frames;
+  Frame** frames_tail;
+  Chunk* chunks;  // non-image chunks
+  Chunk** chunks_tail;
 };

 typedef enum {
@@ -108,10 +111,10 @@ int WebPGetDemuxVersion(void) {

 static int RemapMemBuffer(MemBuffer* const mem,
                          const uint8_t* data, size_t size) {
-  if (size < mem->buf_size_) return 0;  // can't remap to a shorter buffer!
+  if (size < mem->buf_size) return 0;  // can't remap to a shorter buffer!

-  mem->buf_ = data;
-  mem->end_ = mem->buf_size_ = size;
+  mem->buf = data;
+  mem->end = mem->buf_size = size;
  return 1;
 }

@@ -123,49 +126,49 @@ static int InitMemBuffer(MemBuffer* const mem,

 // Return the remaining data size available in 'mem'.
 static WEBP_INLINE size_t MemDataSize(const MemBuffer* const mem) {
-  return (mem->end_ - mem->start_);
+  return (mem->end - mem->start);
 }

 // Return true if 'size' exceeds the end of the RIFF chunk.
 static WEBP_INLINE int SizeIsInvalid(const MemBuffer* const mem, size_t size) {
-  return (size > mem->riff_end_ - mem->start_);
+  return (size > mem->riff_end - mem->start);
 }

 static WEBP_INLINE void Skip(MemBuffer* const mem, size_t size) {
-  mem->start_ += size;
+  mem->start += size;
 }

 static WEBP_INLINE void Rewind(MemBuffer* const mem, size_t size) {
-  mem->start_ -= size;
+  mem->start -= size;
 }

 static WEBP_INLINE const uint8_t* GetBuffer(MemBuffer* const mem) {
-  return mem->buf_ + mem->start_;
+  return mem->buf + mem->start;
 }

 // Read from 'mem' and skip the read bytes.
 static WEBP_INLINE uint8_t ReadByte(MemBuffer* const mem) {
-  const uint8_t byte = mem->buf_[mem->start_];
+  const uint8_t byte = mem->buf[mem->start];
  Skip(mem, 1);
  return byte;
 }

 static WEBP_INLINE int ReadLE16s(MemBuffer* const mem) {
-  const uint8_t* const data = mem->buf_ + mem->start_;
+  const uint8_t* const data = mem->buf + mem->start;
  const int val = GetLE16(data);
  Skip(mem, 2);
  return val;
 }

 static WEBP_INLINE int ReadLE24s(MemBuffer* const mem) {
-  const uint8_t* const data = mem->buf_ + mem->start_;
+  const uint8_t* const data = mem->buf + mem->start;
  const int val = GetLE24(data);
  Skip(mem, 3);
  return val;
 }

 static WEBP_INLINE uint32_t ReadLE32(MemBuffer* const mem) {
-  const uint8_t* const data = mem->buf_ + mem->start_;
+  const uint8_t* const data = mem->buf + mem->start;
  const uint32_t val = GetLE32(data);
  Skip(mem, 4);
  return val;
@@ -175,20 +178,20 @@ static WEBP_INLINE uint32_t ReadLE32(MemBuffer* const mem) {
 // Secondary chunk parsing

 static void AddChunk(WebPDemuxer* const dmux, Chunk* const chunk) {
-  *dmux->chunks_tail_ = chunk;
-  chunk->next_ = NULL;
-  dmux->chunks_tail_ = &chunk->next_;
+  *dmux->chunks_tail = chunk;
+  chunk->next = NULL;
+  dmux->chunks_tail = &chunk->next;
 }

 // Add a frame to the end of the list, ensuring the last frame is complete.
 // Returns true on success, false otherwise.
 static int AddFrame(WebPDemuxer* const dmux, Frame* const frame) {
-  const Frame* const last_frame = *dmux->frames_tail_;
-  if (last_frame != NULL && !last_frame->complete_) return 0;
+  const Frame* const last_frame = *dmux->frames_tail;
+  if (last_frame != NULL && !last_frame->complete) return 0;

-  *dmux->frames_tail_ = frame;
-  frame->next_ = NULL;
-  dmux->frames_tail_ = &frame->next_;
+  *dmux->frames_tail = frame;
+  frame->next = NULL;
+  dmux->frames_tail = &frame->next;
  return 1;
 }

@@ -196,13 +199,13 @@ static void SetFrameInfo(size_t start_offset, size_t size,
                         int frame_num, int complete,
                         const WebPBitstreamFeatures* const features,
                         Frame* const frame) {
-  frame->img_components_[0].offset_ = start_offset;
-  frame->img_components_[0].size_ = size;
-  frame->width_ = features->width;
-  frame->height_ = features->height;
-  frame->has_alpha_ |= features->has_alpha;
-  frame->frame_num_ = frame_num;
-  frame->complete_ = complete;
+  frame->img_components[0].offset = start_offset;
+  frame->img_components[0].size = size;
+  frame->width = features->width;
+  frame->height = features->height;
+  frame->has_alpha |= features->has_alpha;
+  frame->frame_num = frame_num;
+  frame->complete = complete;
 }

 // Store image bearing chunks to 'frame'. 'min_size' is an optional size
@@ -218,7 +221,7 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
  if (done) return PARSE_NEED_MORE_DATA;

  do {
-    const size_t chunk_start_offset = mem->start_;
+    const size_t chunk_start_offset = mem->start;
    const uint32_t fourcc = ReadLE32(mem);
    const uint32_t payload_size = ReadLE32(mem);
    uint32_t payload_size_padded;
@@ -238,10 +241,10 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
      case MKFOURCC('A', 'L', 'P', 'H'):
        if (alpha_chunks == 0) {
          ++alpha_chunks;
-          frame->img_components_[1].offset_ = chunk_start_offset;
-          frame->img_components_[1].size_ = chunk_size;
-          frame->has_alpha_ = 1;
-          frame->frame_num_ = frame_num;
+          frame->img_components[1].offset = chunk_start_offset;
+          frame->img_components[1].size = chunk_size;
+          frame->has_alpha = 1;
+          frame->frame_num = frame_num;
          Skip(mem, payload_available);
        } else {
          goto Done;
@@ -256,7 +259,7 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
          // is incomplete.
          WebPBitstreamFeatures features;
          const VP8StatusCode vp8_status =
-              WebPGetFeatures(mem->buf_ + chunk_start_offset, chunk_size,
+              WebPGetFeatures(mem->buf + chunk_start_offset, chunk_size,
                              &features);
          if (status == PARSE_NEED_MORE_DATA &&
              vp8_status == VP8_STATUS_NOT_ENOUGH_DATA) {
@@ -281,7 +284,7 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
        break;
    }

-    if (mem->start_ == mem->riff_end_) {
+    if (mem->start == mem->riff_end) {
      done = 1;
    } else if (MemDataSize(mem) < CHUNK_HEADER_SIZE) {
      status = PARSE_NEED_MORE_DATA;
@@ -310,42 +313,42 @@ static ParseStatus NewFrame(const MemBuffer* const mem,
 // 'frame_chunk_size' is the previously validated, padded chunk size.
 static ParseStatus ParseAnimationFrame(
    WebPDemuxer* const dmux, uint32_t frame_chunk_size) {
-  const int is_animation = !!(dmux->feature_flags_ & ANIMATION_FLAG);
+  const int is_animation = !!(dmux->feature_flags & ANIMATION_FLAG);
  const uint32_t anmf_payload_size = frame_chunk_size - ANMF_CHUNK_SIZE;
  int added_frame = 0;
  int bits;
-  MemBuffer* const mem = &dmux->mem_;
+  MemBuffer* const mem = &dmux->mem;
  Frame* frame;
  size_t start_offset;
  ParseStatus status =
      NewFrame(mem, ANMF_CHUNK_SIZE, frame_chunk_size, &frame);
  if (status != PARSE_OK) return status;

-  frame->x_offset_       = 2 * ReadLE24s(mem);
-  frame->y_offset_       = 2 * ReadLE24s(mem);
-  frame->width_          = 1 + ReadLE24s(mem);
-  frame->height_         = 1 + ReadLE24s(mem);
-  frame->duration_       = ReadLE24s(mem);
+  frame->x_offset       = 2 * ReadLE24s(mem);
+  frame->y_offset       = 2 * ReadLE24s(mem);
+  frame->width          = 1 + ReadLE24s(mem);
+  frame->height         = 1 + ReadLE24s(mem);
+  frame->duration       = ReadLE24s(mem);
  bits = ReadByte(mem);
-  frame->dispose_method_ =
+  frame->dispose_method =
      (bits & 1) ? WEBP_MUX_DISPOSE_BACKGROUND : WEBP_MUX_DISPOSE_NONE;
-  frame->blend_method_ = (bits & 2) ? WEBP_MUX_NO_BLEND : WEBP_MUX_BLEND;
-  if (frame->width_ * (uint64_t)frame->height_ >= MAX_IMAGE_AREA) {
+  frame->blend_method = (bits & 2) ? WEBP_MUX_NO_BLEND : WEBP_MUX_BLEND;
+  if (frame->width * (uint64_t)frame->height >= MAX_IMAGE_AREA) {
    WebPSafeFree(frame);
    return PARSE_ERROR;
  }

  // Store a frame only if the animation flag is set there is some data for
  // this frame is available.
-  start_offset = mem->start_;
-  status = StoreFrame(dmux->num_frames_ + 1, anmf_payload_size, mem, frame);
-  if (status != PARSE_ERROR && mem->start_ - start_offset > anmf_payload_size) {
+  start_offset = mem->start;
+  status = StoreFrame(dmux->num_frames + 1, anmf_payload_size, mem, frame);
+  if (status != PARSE_ERROR && mem->start - start_offset > anmf_payload_size) {
    status = PARSE_ERROR;
  }
-  if (status != PARSE_ERROR && is_animation && frame->frame_num_ > 0) {
+  if (status != PARSE_ERROR && is_animation && frame->frame_num > 0) {
    added_frame = AddFrame(dmux, frame);
    if (added_frame) {
-      ++dmux->num_frames_;
+      ++dmux->num_frames;
    } else {
      status = PARSE_ERROR;
    }
@@ -364,8 +367,8 @@ static int StoreChunk(WebPDemuxer* const dmux,
  Chunk* const chunk = (Chunk*)WebPSafeCalloc(1ULL, sizeof(*chunk));
  if (chunk == NULL) return 0;

-  chunk->data_.offset_ = start_offset;
-  chunk->data_.size_ = size;
+  chunk->data.offset = start_offset;
+  chunk->data.size = size;
  AddChunk(dmux, chunk);
  return 1;
 }
@@ -389,9 +392,9 @@ static ParseStatus ReadHeader(MemBuffer* const mem) {
  if (riff_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;

  // There's no point in reading past the end of the RIFF chunk
-  mem->riff_end_ = riff_size + CHUNK_HEADER_SIZE;
-  if (mem->buf_size_ > mem->riff_end_) {
-    mem->buf_size_ = mem->end_ = mem->riff_end_;
+  mem->riff_end = riff_size + CHUNK_HEADER_SIZE;
+  if (mem->buf_size > mem->riff_end) {
+    mem->buf_size = mem->end = mem->riff_end;
  }

  Skip(mem, RIFF_HEADER_SIZE);
@@ -400,12 +403,12 @@ static ParseStatus ReadHeader(MemBuffer* const mem) {

 static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
  const size_t min_size = CHUNK_HEADER_SIZE;
-  MemBuffer* const mem = &dmux->mem_;
+  MemBuffer* const mem = &dmux->mem;
  Frame* frame;
  ParseStatus status;
  int image_added = 0;

-  if (dmux->frames_ != NULL) return PARSE_ERROR;
+  if (dmux->frames != NULL) return PARSE_ERROR;
  if (SizeIsInvalid(mem, min_size)) return PARSE_ERROR;
  if (MemDataSize(mem) < min_size) return PARSE_NEED_MORE_DATA;

@@ -414,29 +417,29 @@ static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {

  // For the single image case we allow parsing of a partial frame, so no
  // minimum size is imposed here.
-  status = StoreFrame(1, 0, &dmux->mem_, frame);
+  status = StoreFrame(1, 0, &dmux->mem, frame);
  if (status != PARSE_ERROR) {
-    const int has_alpha = !!(dmux->feature_flags_ & ALPHA_FLAG);
+    const int has_alpha = !!(dmux->feature_flags & ALPHA_FLAG);
    // Clear any alpha when the alpha flag is missing.
-    if (!has_alpha && frame->img_components_[1].size_ > 0) {
-      frame->img_components_[1].offset_ = 0;
-      frame->img_components_[1].size_ = 0;
-      frame->has_alpha_ = 0;
+    if (!has_alpha && frame->img_components[1].size > 0) {
+      frame->img_components[1].offset = 0;
+      frame->img_components[1].size = 0;
+      frame->has_alpha = 0;
    }

    // Use the frame width/height as the canvas values for non-vp8x files.
    // Also, set ALPHA_FLAG if this is a lossless image with alpha.
-    if (!dmux->is_ext_format_ && frame->width_ > 0 && frame->height_ > 0) {
-      dmux->state_ = WEBP_DEMUX_PARSED_HEADER;
-      dmux->canvas_width_ = frame->width_;
-      dmux->canvas_height_ = frame->height_;
-      dmux->feature_flags_ |= frame->has_alpha_ ? ALPHA_FLAG : 0;
+    if (!dmux->is_ext_format && frame->width > 0 && frame->height > 0) {
+      dmux->state = WEBP_DEMUX_PARSED_HEADER;
+      dmux->canvas_width = frame->width;
+      dmux->canvas_height = frame->height;
+      dmux->feature_flags |= frame->has_alpha ? ALPHA_FLAG : 0;
    }
    if (!AddFrame(dmux, frame)) {
      status = PARSE_ERROR;  // last frame was left incomplete
    } else {
      image_added = 1;
-      dmux->num_frames_ = 1;
+      dmux->num_frames = 1;
    }
  }

@@ -445,14 +448,14 @@ static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
 }

 static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) {
-  const int is_animation = !!(dmux->feature_flags_ & ANIMATION_FLAG);
-  MemBuffer* const mem = &dmux->mem_;
+  const int is_animation = !!(dmux->feature_flags & ANIMATION_FLAG);
+  MemBuffer* const mem = &dmux->mem;
  int anim_chunks = 0;
  ParseStatus status = PARSE_OK;

  do {
    int store_chunk = 1;
-    const size_t chunk_start_offset = mem->start_;
+    const size_t chunk_start_offset = mem->start;
    const uint32_t fourcc = ReadLE32(mem);
    const uint32_t chunk_size = ReadLE32(mem);
    uint32_t chunk_size_padded;
@@ -483,8 +486,8 @@ static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) {
          status = PARSE_NEED_MORE_DATA;
        } else if (anim_chunks == 0) {
          ++anim_chunks;
-          dmux->bgcolor_ = ReadLE32(mem);
-          dmux->loop_count_ = ReadLE16s(mem);
+          dmux->bgcolor = ReadLE32(mem);
+          dmux->loop_count = ReadLE16s(mem);
          Skip(mem, chunk_size_padded - ANIM_CHUNK_SIZE);
        } else {
          store_chunk = 0;
@@ -498,15 +501,15 @@ static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) {
        break;
      }
      case MKFOURCC('I', 'C', 'C', 'P'): {
-        store_chunk = !!(dmux->feature_flags_ & ICCP_FLAG);
+        store_chunk = !!(dmux->feature_flags & ICCP_FLAG);
        goto Skip;
      }
      case MKFOURCC('E', 'X', 'I', 'F'): {
-        store_chunk = !!(dmux->feature_flags_ & EXIF_FLAG);
+        store_chunk = !!(dmux->feature_flags & EXIF_FLAG);
        goto Skip;
      }
      case MKFOURCC('X', 'M', 'P', ' '): {
-        store_chunk = !!(dmux->feature_flags_ & XMP_FLAG);
+        store_chunk = !!(dmux->feature_flags & XMP_FLAG);
        goto Skip;
      }
 Skip:
@@ -527,7 +530,7 @@ static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) {
      }
    }

-    if (mem->start_ == mem->riff_end_) {
+    if (mem->start == mem->riff_end) {
      break;
    } else if (MemDataSize(mem) < CHUNK_HEADER_SIZE) {
      status = PARSE_NEED_MORE_DATA;
@@ -538,12 +541,12 @@ static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) {
 }

 static ParseStatus ParseVP8X(WebPDemuxer* const dmux) {
-  MemBuffer* const mem = &dmux->mem_;
+  MemBuffer* const mem = &dmux->mem;
  uint32_t vp8x_size;

  if (MemDataSize(mem) < CHUNK_HEADER_SIZE) return PARSE_NEED_MORE_DATA;

-  dmux->is_ext_format_ = 1;
+  dmux->is_ext_format = 1;
  Skip(mem, TAG_SIZE);  // VP8X
  vp8x_size = ReadLE32(mem);
  if (vp8x_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
@@ -552,15 +555,15 @@ static ParseStatus ParseVP8X(WebPDemuxer* const dmux) {
  if (SizeIsInvalid(mem, vp8x_size)) return PARSE_ERROR;
  if (MemDataSize(mem) < vp8x_size) return PARSE_NEED_MORE_DATA;

-  dmux->feature_flags_ = ReadByte(mem);
+  dmux->feature_flags = ReadByte(mem);
  Skip(mem, 3);  // Reserved.
-  dmux->canvas_width_  = 1 + ReadLE24s(mem);
-  dmux->canvas_height_ = 1 + ReadLE24s(mem);
-  if (dmux->canvas_width_ * (uint64_t)dmux->canvas_height_ >= MAX_IMAGE_AREA) {
+  dmux->canvas_width  = 1 + ReadLE24s(mem);
+  dmux->canvas_height = 1 + ReadLE24s(mem);
+  if (dmux->canvas_width * (uint64_t)dmux->canvas_height >= MAX_IMAGE_AREA) {
    return PARSE_ERROR;  // image final dimension is too large
  }
  Skip(mem, vp8x_size - VP8X_CHUNK_SIZE);  // skip any trailing data.
-  dmux->state_ = WEBP_DEMUX_PARSED_HEADER;
+  dmux->state = WEBP_DEMUX_PARSED_HEADER;

  if (SizeIsInvalid(mem, CHUNK_HEADER_SIZE)) return PARSE_ERROR;
  if (MemDataSize(mem) < CHUNK_HEADER_SIZE) return PARSE_NEED_MORE_DATA;
@@ -572,13 +575,13 @@ static ParseStatus ParseVP8X(WebPDemuxer* const dmux) {
 // Format validation

 static int IsValidSimpleFormat(const WebPDemuxer* const dmux) {
-  const Frame* const frame = dmux->frames_;
-  if (dmux->state_ == WEBP_DEMUX_PARSING_HEADER) return 1;
+  const Frame* const frame = dmux->frames;
+  if (dmux->state == WEBP_DEMUX_PARSING_HEADER) return 1;

-  if (dmux->canvas_width_ <= 0 || dmux->canvas_height_ <= 0) return 0;
-  if (dmux->state_ == WEBP_DEMUX_DONE && frame == NULL) return 0;
+  if (dmux->canvas_width <= 0 || dmux->canvas_height <= 0) return 0;
+  if (dmux->state == WEBP_DEMUX_DONE && frame == NULL) return 0;

-  if (frame->width_ <= 0 || frame->height_ <= 0) return 0;
+  if (frame->width <= 0 || frame->height <= 0) return 0;
  return 1;
 }

@@ -587,65 +590,65 @@ static int IsValidSimpleFormat(const WebPDemuxer* const dmux) {
 static int CheckFrameBounds(const Frame* const frame, int exact,
                            int canvas_width, int canvas_height) {
  if (exact) {
-    if (frame->x_offset_ != 0 || frame->y_offset_ != 0) {
+    if (frame->x_offset != 0 || frame->y_offset != 0) {
      return 0;
    }
-    if (frame->width_ != canvas_width || frame->height_ != canvas_height) {
+    if (frame->width != canvas_width || frame->height != canvas_height) {
      return 0;
    }
  } else {
-    if (frame->x_offset_ < 0 || frame->y_offset_ < 0) return 0;
-    if (frame->width_ + frame->x_offset_ > canvas_width) return 0;
-    if (frame->height_ + frame->y_offset_ > canvas_height) return 0;
+    if (frame->x_offset < 0 || frame->y_offset < 0) return 0;
+    if (frame->width + frame->x_offset > canvas_width) return 0;
+    if (frame->height + frame->y_offset > canvas_height) return 0;
  }
  return 1;
 }

 static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
-  const int is_animation = !!(dmux->feature_flags_ & ANIMATION_FLAG);
-  const Frame* f = dmux->frames_;
+  const int is_animation = !!(dmux->feature_flags & ANIMATION_FLAG);
+  const Frame* f = dmux->frames;

-  if (dmux->state_ == WEBP_DEMUX_PARSING_HEADER) return 1;
+  if (dmux->state == WEBP_DEMUX_PARSING_HEADER) return 1;

-  if (dmux->canvas_width_ <= 0 || dmux->canvas_height_ <= 0) return 0;
-  if (dmux->loop_count_ < 0) return 0;
-  if (dmux->state_ == WEBP_DEMUX_DONE && dmux->frames_ == NULL) return 0;
-  if (dmux->feature_flags_ & ~ALL_VALID_FLAGS) return 0;  // invalid bitstream
+  if (dmux->canvas_width <= 0 || dmux->canvas_height <= 0) return 0;
+  if (dmux->loop_count < 0) return 0;
+  if (dmux->state == WEBP_DEMUX_DONE && dmux->frames == NULL) return 0;
+  if (dmux->feature_flags & ~ALL_VALID_FLAGS) return 0;  // invalid bitstream

  while (f != NULL) {
-    const int cur_frame_set = f->frame_num_;
+    const int cur_frame_set = f->frame_num;

    // Check frame properties.
-    for (; f != NULL && f->frame_num_ == cur_frame_set; f = f->next_) {
-      const ChunkData* const image = f->img_components_;
-      const ChunkData* const alpha = f->img_components_ + 1;
+    for (; f != NULL && f->frame_num == cur_frame_set; f = f->next) {
+      const ChunkData* const image = f->img_components;
+      const ChunkData* const alpha = f->img_components + 1;

-      if (!is_animation && f->frame_num_ > 1) return 0;
+      if (!is_animation && f->frame_num > 1) return 0;

-      if (f->complete_) {
-        if (alpha->size_ == 0 && image->size_ == 0) return 0;
+      if (f->complete) {
+        if (alpha->size == 0 && image->size == 0) return 0;
        // Ensure alpha precedes image bitstream.
-        if (alpha->size_ > 0 && alpha->offset_ > image->offset_) {
+        if (alpha->size > 0 && alpha->offset > image->offset) {
          return 0;
        }

-        if (f->width_ <= 0 || f->height_ <= 0) return 0;
+        if (f->width <= 0 || f->height <= 0) return 0;
      } else {
        // There shouldn't be a partial frame in a complete file.
-        if (dmux->state_ == WEBP_DEMUX_DONE) return 0;
+        if (dmux->state == WEBP_DEMUX_DONE) return 0;

        // Ensure alpha precedes image bitstream.
-        if (alpha->size_ > 0 && image->size_ > 0 &&
-            alpha->offset_ > image->offset_) {
+        if (alpha->size > 0 && image->size > 0 &&
+            alpha->offset > image->offset) {
          return 0;
        }
        // There shouldn't be any frames after an incomplete one.
-        if (f->next_ != NULL) return 0;
+        if (f->next != NULL) return 0;
      }

-      if (f->width_ > 0 && f->height_ > 0 &&
+      if (f->width > 0 && f->height > 0 &&
          !CheckFrameBounds(f, !is_animation,
-                            dmux->canvas_width_, dmux->canvas_height_)) {
+                            dmux->canvas_width, dmux->canvas_height)) {
        return 0;
      }
    }
@@ -657,21 +660,21 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
 // WebPDemuxer object

 static void InitDemux(WebPDemuxer* const dmux, const MemBuffer* const mem) {
-  dmux->state_ = WEBP_DEMUX_PARSING_HEADER;
-  dmux->loop_count_ = 1;
-  dmux->bgcolor_ = 0xFFFFFFFF;  // White background by default.
-  dmux->canvas_width_ = -1;
-  dmux->canvas_height_ = -1;
-  dmux->frames_tail_ = &dmux->frames_;
-  dmux->chunks_tail_ = &dmux->chunks_;
-  dmux->mem_ = *mem;
+  dmux->state = WEBP_DEMUX_PARSING_HEADER;
+  dmux->loop_count = 1;
+  dmux->bgcolor = 0xFFFFFFFF;  // White background by default.
+  dmux->canvas_width = -1;
+  dmux->canvas_height = -1;
+  dmux->frames_tail = &dmux->frames;
+  dmux->chunks_tail = &dmux->chunks;
+  dmux->mem = *mem;
 }

 static ParseStatus CreateRawImageDemuxer(MemBuffer* const mem,
                                         WebPDemuxer** demuxer) {
  WebPBitstreamFeatures features;
  const VP8StatusCode status =
-      WebPGetFeatures(mem->buf_, mem->buf_size_, &features);
+      WebPGetFeatures(mem->buf, mem->buf_size, &features);
  *demuxer = NULL;
  if (status != VP8_STATUS_OK) {
    return (status == VP8_STATUS_NOT_ENOUGH_DATA) ? PARSE_NEED_MORE_DATA
@@ -683,14 +686,14 @@ static ParseStatus CreateRawImageDemuxer(MemBuffer* const mem,
    Frame* const frame = (Frame*)WebPSafeCalloc(1ULL, sizeof(*frame));
    if (dmux == NULL || frame == NULL) goto Error;
    InitDemux(dmux, mem);
-    SetFrameInfo(0, mem->buf_size_, 1 /*frame_num*/, 1 /*complete*/, &features,
+    SetFrameInfo(0, mem->buf_size, 1 /*frame_num*/, 1 /*complete*/, &features,
                 frame);
    if (!AddFrame(dmux, frame)) goto Error;
-    dmux->state_ = WEBP_DEMUX_DONE;
-    dmux->canvas_width_ = frame->width_;
-    dmux->canvas_height_ = frame->height_;
-    dmux->feature_flags_ |= frame->has_alpha_ ? ALPHA_FLAG : 0;
-    dmux->num_frames_ = 1;
+    dmux->state = WEBP_DEMUX_DONE;
+    dmux->canvas_width = frame->width;
+    dmux->canvas_height = frame->height;
+    dmux->feature_flags |= frame->has_alpha ? ALPHA_FLAG : 0;
+    dmux->num_frames = 1;
    assert(IsValidSimpleFormat(dmux));
    *demuxer = dmux;
    return PARSE_OK;
@@ -734,7 +737,7 @@ WebPDemuxer* WebPDemuxInternal(const WebPData* data, int allow_partial,
    return NULL;
  }

-  partial = (mem.buf_size_ < mem.riff_end_);
+  partial = (mem.buf_size < mem.riff_end);
  if (!allow_partial && partial) return NULL;

  dmux = (WebPDemuxer*)WebPSafeCalloc(1ULL, sizeof(*dmux));
@@ -743,16 +746,16 @@ WebPDemuxer* WebPDemuxInternal(const WebPData* data, int allow_partial,

  status = PARSE_ERROR;
  for (parser = kMasterChunks; parser->parse != NULL; ++parser) {
-    if (!memcmp(parser->id, GetBuffer(&dmux->mem_), TAG_SIZE)) {
+    if (!memcmp(parser->id, GetBuffer(&dmux->mem), TAG_SIZE)) {
      status = parser->parse(dmux);
-      if (status == PARSE_OK) dmux->state_ = WEBP_DEMUX_DONE;
+      if (status == PARSE_OK) dmux->state = WEBP_DEMUX_DONE;
      if (status == PARSE_NEED_MORE_DATA && !partial) status = PARSE_ERROR;
      if (status != PARSE_ERROR && !parser->valid(dmux)) status = PARSE_ERROR;
-      if (status == PARSE_ERROR) dmux->state_ = WEBP_DEMUX_PARSE_ERROR;
+      if (status == PARSE_ERROR) dmux->state = WEBP_DEMUX_PARSE_ERROR;
      break;
    }
  }
-  if (state != NULL) *state = dmux->state_;
+  if (state != NULL) *state = dmux->state;

  if (status == PARSE_ERROR) {
    WebPDemuxDelete(dmux);
@@ -766,14 +769,14 @@ void WebPDemuxDelete(WebPDemuxer* dmux) {
  Frame* f;
  if (dmux == NULL) return;

-  for (f = dmux->frames_; f != NULL;) {
+  for (f = dmux->frames; f != NULL;) {
    Frame* const cur_frame = f;
-    f = f->next_;
+    f = f->next;
    WebPSafeFree(cur_frame);
  }
-  for (c = dmux->chunks_; c != NULL;) {
+  for (c = dmux->chunks; c != NULL;) {
    Chunk* const cur_chunk = c;
-    c = c->next_;
+    c = c->next;
    WebPSafeFree(cur_chunk);
  }
  WebPSafeFree(dmux);
@@ -785,12 +788,12 @@ uint32_t WebPDemuxGetI(const WebPDemuxer* dmux, WebPFormatFeature feature) {
  if (dmux == NULL) return 0;

  switch (feature) {
-    case WEBP_FF_FORMAT_FLAGS:     return dmux->feature_flags_;
-    case WEBP_FF_CANVAS_WIDTH:     return (uint32_t)dmux->canvas_width_;
-    case WEBP_FF_CANVAS_HEIGHT:    return (uint32_t)dmux->canvas_height_;
-    case WEBP_FF_LOOP_COUNT:       return (uint32_t)dmux->loop_count_;
-    case WEBP_FF_BACKGROUND_COLOR: return dmux->bgcolor_;
-    case WEBP_FF_FRAME_COUNT:      return (uint32_t)dmux->num_frames_;
+    case WEBP_FF_FORMAT_FLAGS:     return dmux->feature_flags;
+    case WEBP_FF_CANVAS_WIDTH:     return (uint32_t)dmux->canvas_width;
+    case WEBP_FF_CANVAS_HEIGHT:    return (uint32_t)dmux->canvas_height;
+    case WEBP_FF_LOOP_COUNT:       return (uint32_t)dmux->loop_count;
+    case WEBP_FF_BACKGROUND_COLOR: return dmux->bgcolor;
+    case WEBP_FF_FRAME_COUNT:      return (uint32_t)dmux->num_frames;
  }
  return 0;
 }
@@ -800,8 +803,8 @@ uint32_t WebPDemuxGetI(const WebPDemuxer* dmux, WebPFormatFeature feature) {

 static const Frame* GetFrame(const WebPDemuxer* const dmux, int frame_num) {
  const Frame* f;
-  for (f = dmux->frames_; f != NULL; f = f->next_) {
-    if (frame_num == f->frame_num_) break;
+  for (f = dmux->frames; f != NULL; f = f->next) {
+    if (frame_num == f->frame_num) break;
  }
  return f;
 }
@@ -811,19 +814,19 @@ static const uint8_t* GetFramePayload(const uint8_t* const mem_buf,
                                      size_t* const data_size) {
  *data_size = 0;
  if (frame != NULL) {
-    const ChunkData* const image = frame->img_components_;
-    const ChunkData* const alpha = frame->img_components_ + 1;
-    size_t start_offset = image->offset_;
-    *data_size = image->size_;
+    const ChunkData* const image = frame->img_components;
+    const ChunkData* const alpha = frame->img_components + 1;
+    size_t start_offset = image->offset;
+    *data_size = image->size;

    // if alpha exists it precedes image, update the size allowing for
    // intervening chunks.
-    if (alpha->size_ > 0) {
-      const size_t inter_size = (image->offset_ > 0)
-                              ? image->offset_ - (alpha->offset_ + alpha->size_)
+    if (alpha->size > 0) {
+      const size_t inter_size = (image->offset > 0)
+                              ? image->offset - (alpha->offset + alpha->size)
                              : 0;
-      start_offset = alpha->offset_;
-      *data_size  += alpha->size_ + inter_size;
+      start_offset = alpha->offset;
+      *data_size  += alpha->size + inter_size;
    }
    return mem_buf + start_offset;
  }
@@ -834,23 +837,23 @@ static const uint8_t* GetFramePayload(const uint8_t* const mem_buf,
 static int SynthesizeFrame(const WebPDemuxer* const dmux,
                           const Frame* const frame,
                           WebPIterator* const iter) {
-  const uint8_t* const mem_buf = dmux->mem_.buf_;
+  const uint8_t* const mem_buf = dmux->mem.buf;
  size_t payload_size = 0;
  const uint8_t* const payload = GetFramePayload(mem_buf, frame, &payload_size);
  if (payload == NULL) return 0;
  assert(frame != NULL);

-  iter->frame_num      = frame->frame_num_;
-  iter->num_frames     = dmux->num_frames_;
-  iter->x_offset       = frame->x_offset_;
-  iter->y_offset       = frame->y_offset_;
-  iter->width          = frame->width_;
-  iter->height         = frame->height_;
-  iter->has_alpha      = frame->has_alpha_;
-  iter->duration       = frame->duration_;
-  iter->dispose_method = frame->dispose_method_;
-  iter->blend_method   = frame->blend_method_;
-  iter->complete       = frame->complete_;
+  iter->frame_num      = frame->frame_num;
+  iter->num_frames     = dmux->num_frames;
+  iter->x_offset       = frame->x_offset;
+  iter->y_offset       = frame->y_offset;
+  iter->width          = frame->width;
+  iter->height         = frame->height;
+  iter->has_alpha      = frame->has_alpha;
+  iter->duration       = frame->duration;
+  iter->dispose_method = frame->dispose_method;
+  iter->blend_method   = frame->blend_method;
+  iter->complete       = frame->complete;
  iter->fragment.bytes = payload;
  iter->fragment.size  = payload_size;
  return 1;
@@ -860,8 +863,8 @@ static int SetFrame(int frame_num, WebPIterator* const iter) {
  const Frame* frame;
  const WebPDemuxer* const dmux = (WebPDemuxer*)iter->private_;
  if (dmux == NULL || frame_num < 0) return 0;
-  if (frame_num > dmux->num_frames_) return 0;
-  if (frame_num == 0) frame_num = dmux->num_frames_;
+  if (frame_num > dmux->num_frames) return 0;
+  if (frame_num == 0) frame_num = dmux->num_frames;

  frame = GetFrame(dmux, frame_num);
  if (frame == NULL) return 0;
@@ -896,11 +899,11 @@ void WebPDemuxReleaseIterator(WebPIterator* iter) {
 // Chunk iteration

 static int ChunkCount(const WebPDemuxer* const dmux, const char fourcc[4]) {
-  const uint8_t* const mem_buf = dmux->mem_.buf_;
+  const uint8_t* const mem_buf = dmux->mem.buf;
  const Chunk* c;
  int count = 0;
-  for (c = dmux->chunks_; c != NULL; c = c->next_) {
-    const uint8_t* const header = mem_buf + c->data_.offset_;
+  for (c = dmux->chunks; c != NULL; c = c->next) {
+    const uint8_t* const header = mem_buf + c->data.offset;
    if (!memcmp(header, fourcc, TAG_SIZE)) ++count;
  }
  return count;
@@ -908,11 +911,11 @@ static int ChunkCount(const WebPDemuxer* const dmux, const char fourcc[4]) {

 static const Chunk* GetChunk(const WebPDemuxer* const dmux,
                             const char fourcc[4], int chunk_num) {
-  const uint8_t* const mem_buf = dmux->mem_.buf_;
+  const uint8_t* const mem_buf = dmux->mem.buf;
  const Chunk* c;
  int count = 0;
-  for (c = dmux->chunks_; c != NULL; c = c->next_) {
-    const uint8_t* const header = mem_buf + c->data_.offset_;
+  for (c = dmux->chunks; c != NULL; c = c->next) {
+    const uint8_t* const header = mem_buf + c->data.offset;
    if (!memcmp(header, fourcc, TAG_SIZE)) ++count;
    if (count == chunk_num) break;
  }
@@ -930,10 +933,10 @@ static int SetChunk(const char fourcc[4], int chunk_num,
  if (chunk_num == 0) chunk_num = count;

  if (chunk_num <= count) {
-    const uint8_t* const mem_buf = dmux->mem_.buf_;
+    const uint8_t* const mem_buf = dmux->mem.buf;
    const Chunk* const chunk = GetChunk(dmux, fourcc, chunk_num);
-    iter->chunk.bytes = mem_buf + chunk->data_.offset_ + CHUNK_HEADER_SIZE;
-    iter->chunk.size  = chunk->data_.size_ - CHUNK_HEADER_SIZE;
+    iter->chunk.bytes = mem_buf + chunk->data.offset + CHUNK_HEADER_SIZE;
+    iter->chunk.size  = chunk->data.size - CHUNK_HEADER_SIZE;
    iter->num_chunks  = count;
    iter->chunk_num   = chunk_num;
    return 1;
@@ -972,4 +975,3 @@ int WebPDemuxPrevChunk(WebPChunkIterator* iter) {
 void WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter) {
  (void)iter;
 }
-
--- a/thirdparty/libwebp/src/dsp/alpha_processing.c
+++ b/thirdparty/libwebp/src/dsp/alpha_processing.c
@@ -12,7 +12,11 @@
 // Author: Skal (pascal.massimino@gmail.com)

 #include <assert.h>
+#include <stddef.h>
+
+#include "src/dsp/cpu.h"
 #include "src/dsp/dsp.h"
+#include "src/webp/types.h"

 // Tables can be faster on some platform but incur some extra binary size (~2k).
 #if !defined(USE_TABLES_FOR_ALPHA_MULT)
--- a/thirdparty/libwebp/src/dsp/alpha_processing_sse2.c
+++ b/thirdparty/libwebp/src/dsp/alpha_processing_sse2.c
@@ -16,6 +16,9 @@
 #if defined(WEBP_USE_SSE2)
 #include <emmintrin.h>

+#include "src/webp/types.h"
+#include "src/dsp/cpu.h"
+
 //------------------------------------------------------------------------------

 static int DispatchAlpha_SSE2(const uint8_t* WEBP_RESTRICT alpha,
@@ -26,38 +29,44 @@ static int DispatchAlpha_SSE2(const uint8_t* WEBP_RESTRICT alpha,
  uint32_t alpha_and = 0xff;
  int i, j;
  const __m128i zero = _mm_setzero_si128();
-  const __m128i rgb_mask = _mm_set1_epi32((int)0xffffff00);  // to preserve RGB
-  const __m128i all_0xff = _mm_set_epi32(0, 0, ~0, ~0);
-  __m128i all_alphas = all_0xff;
+  const __m128i alpha_mask = _mm_set1_epi32((int)0xff);  // to preserve A
+  const __m128i all_0xff = _mm_set1_epi8((char)0xff);
+  __m128i all_alphas16 = all_0xff;
+  __m128i all_alphas8 = all_0xff;

  // We must be able to access 3 extra bytes after the last written byte
  // 'dst[4 * width - 4]', because we don't know if alpha is the first or the
  // last byte of the quadruplet.
-  const int limit = (width - 1) & ~7;
-
  for (j = 0; j < height; ++j) {
-    __m128i* out = (__m128i*)dst;
-    for (i = 0; i < limit; i += 8) {
+    char* ptr = (char*)dst;
+    for (i = 0; i + 16 <= width - 1; i += 16) {
+      // load 16 alpha bytes
+      const __m128i a0 = _mm_loadu_si128((const __m128i*)&alpha[i]);
+      const __m128i a1_lo = _mm_unpacklo_epi8(a0, zero);
+      const __m128i a1_hi = _mm_unpackhi_epi8(a0, zero);
+      const __m128i a2_lo_lo = _mm_unpacklo_epi16(a1_lo, zero);
+      const __m128i a2_lo_hi = _mm_unpackhi_epi16(a1_lo, zero);
+      const __m128i a2_hi_lo = _mm_unpacklo_epi16(a1_hi, zero);
+      const __m128i a2_hi_hi = _mm_unpackhi_epi16(a1_hi, zero);
+      _mm_maskmoveu_si128(a2_lo_lo, alpha_mask, ptr + 0);
+      _mm_maskmoveu_si128(a2_lo_hi, alpha_mask, ptr + 16);
+      _mm_maskmoveu_si128(a2_hi_lo, alpha_mask, ptr + 32);
+      _mm_maskmoveu_si128(a2_hi_hi, alpha_mask, ptr + 48);
+      // accumulate 16 alpha 'and' in parallel
+      all_alphas16 = _mm_and_si128(all_alphas16, a0);
+      ptr += 64;
+    }
+    if (i + 8 <= width - 1) {
      // load 8 alpha bytes
      const __m128i a0 = _mm_loadl_epi64((const __m128i*)&alpha[i]);
      const __m128i a1 = _mm_unpacklo_epi8(a0, zero);
      const __m128i a2_lo = _mm_unpacklo_epi16(a1, zero);
      const __m128i a2_hi = _mm_unpackhi_epi16(a1, zero);
-      // load 8 dst pixels (32 bytes)
-      const __m128i b0_lo = _mm_loadu_si128(out + 0);
-      const __m128i b0_hi = _mm_loadu_si128(out + 1);
-      // mask dst alpha values
-      const __m128i b1_lo = _mm_and_si128(b0_lo, rgb_mask);
-      const __m128i b1_hi = _mm_and_si128(b0_hi, rgb_mask);
-      // combine
-      const __m128i b2_lo = _mm_or_si128(b1_lo, a2_lo);
-      const __m128i b2_hi = _mm_or_si128(b1_hi, a2_hi);
-      // store
-      _mm_storeu_si128(out + 0, b2_lo);
-      _mm_storeu_si128(out + 1, b2_hi);
-      // accumulate eight alpha 'and' in parallel
-      all_alphas = _mm_and_si128(all_alphas, a0);
-      out += 2;
+      _mm_maskmoveu_si128(a2_lo, alpha_mask, ptr);
+      _mm_maskmoveu_si128(a2_hi, alpha_mask, ptr + 16);
+      // accumulate 8 alpha 'and' in parallel
+      all_alphas8 = _mm_and_si128(all_alphas8, a0);
+      i += 8;
    }
    for (; i < width; ++i) {
      const uint32_t alpha_value = alpha[i];
@@ -68,8 +77,9 @@ static int DispatchAlpha_SSE2(const uint8_t* WEBP_RESTRICT alpha,
    dst += dst_stride;
  }
  // Combine the eight alpha 'and' into a 8-bit mask.
-  alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas, all_0xff));
-  return (alpha_and != 0xff);
+  alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas8, all_0xff)) & 0xff;
+  return (alpha_and != 0xff ||
+          _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas16, all_0xff)) != 0xffff);
 }

 static void DispatchAlphaToGreen_SSE2(const uint8_t* WEBP_RESTRICT alpha,
--- a/thirdparty/libwebp/src/dsp/alpha_processing_sse41.c
+++ b/thirdparty/libwebp/src/dsp/alpha_processing_sse41.c
@@ -11,10 +11,12 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

+#include "src/dsp/cpu.h"
+#include "src/webp/types.h"
 #include "src/dsp/dsp.h"

 #if defined(WEBP_USE_SSE41)
-
+#include <emmintrin.h>
 #include <smmintrin.h>

 //------------------------------------------------------------------------------
--- a/thirdparty/libwebp/src/dsp/cost.c
+++ b/thirdparty/libwebp/src/dsp/cost.c
@@ -9,8 +9,15 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

+#include <assert.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "src/dsp/cpu.h"
+#include "src/webp/types.h"
 #include "src/dsp/dsp.h"
 #include "src/enc/cost_enc.h"
+#include "src/enc/vp8i_enc.h"

 //------------------------------------------------------------------------------
 // Boolean-cost cost table
--- a/thirdparty/libwebp/src/dsp/cost_sse2.c
+++ b/thirdparty/libwebp/src/dsp/cost_sse2.c
@@ -16,6 +16,10 @@
 #if defined(WEBP_USE_SSE2)
 #include <emmintrin.h>

+#include <assert.h>
+
+#include "src/webp/types.h"
+#include "src/dsp/cpu.h"
 #include "src/enc/cost_enc.h"
 #include "src/enc/vp8i_enc.h"
 #include "src/utils/utils.h"
--- a/thirdparty/libwebp/src/dsp/cpu.c
+++ b/thirdparty/libwebp/src/dsp/cpu.c
@@ -22,6 +22,10 @@
 #include <cpu-features.h>
 #endif

+#include <stddef.h>
+
+#include "src/webp/types.h"
+
 //------------------------------------------------------------------------------
 // SSE2 detection.
 //
--- a/thirdparty/libwebp/src/dsp/cpu.h
+++ b/thirdparty/libwebp/src/dsp/cpu.h
@@ -56,6 +56,11 @@
    (defined(_M_X64) || defined(_M_IX86))
 #define WEBP_MSC_SSE41  // Visual C++ SSE4.1 targets
 #endif
+
+#if defined(_MSC_VER) && !defined(__clang__) && _MSC_VER >= 1700 && \
+    (defined(_M_X64) || defined(_M_IX86))
+#define WEBP_MSC_AVX2  // Visual C++ AVX2 targets
+#endif
 #endif

 // WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
@@ -80,6 +85,16 @@
 #define WEBP_HAVE_SSE41
 #endif

+#if (defined(__AVX2__) || defined(WEBP_MSC_AVX2)) && \
+    (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_AVX2))
+#define WEBP_USE_AVX2
+#endif
+
+#if defined(WEBP_USE_AVX2) && !defined(WEBP_HAVE_AVX2)
+#define WEBP_HAVE_AVX2
+#endif
+
+#undef WEBP_MSC_AVX2
 #undef WEBP_MSC_SSE41
 #undef WEBP_MSC_SSE2

--- a/thirdparty/libwebp/src/dsp/dec.c
+++ b/thirdparty/libwebp/src/dsp/dec.c
@@ -12,10 +12,15 @@
 // Author: Skal (pascal.massimino@gmail.com)

 #include <assert.h>
+#include <stddef.h>
+#include <string.h>

-#include "src/dsp/dsp.h"
+#include "src/dec/common_dec.h"
 #include "src/dec/vp8i_dec.h"
+#include "src/dsp/cpu.h"
+#include "src/dsp/dsp.h"
 #include "src/utils/utils.h"
+#include "src/webp/types.h"

 //------------------------------------------------------------------------------

--- a/thirdparty/libwebp/src/dsp/dec_clip_tables.c
+++ b/thirdparty/libwebp/src/dsp/dec_clip_tables.c
@@ -11,6 +11,8 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

+#include "src/dsp/cpu.h"
+#include "src/webp/types.h"
 #include "src/dsp/dsp.h"

 // define to 0 to have run-time table initialization
--- a/thirdparty/libwebp/src/dsp/dec_sse2.c
+++ b/thirdparty/libwebp/src/dsp/dec_sse2.c
@@ -23,9 +23,12 @@
 #endif

 #include <emmintrin.h>
-#include "src/dsp/common_sse2.h"
+
 #include "src/dec/vp8i_dec.h"
+#include "src/dsp/common_sse2.h"
+#include "src/dsp/cpu.h"
 #include "src/utils/utils.h"
+#include "src/webp/types.h"

 //------------------------------------------------------------------------------
 // Transforms (Paragraph 14.4)
--- a/thirdparty/libwebp/src/dsp/dec_sse41.c
+++ b/thirdparty/libwebp/src/dsp/dec_sse41.c
@@ -14,9 +14,12 @@
 #include "src/dsp/dsp.h"

 #if defined(WEBP_USE_SSE41)
-
+#include <emmintrin.h>
 #include <smmintrin.h>
+
+#include "src/webp/types.h"
 #include "src/dec/vp8i_dec.h"
+#include "src/dsp/cpu.h"
 #include "src/utils/utils.h"

 static void HE16_SSE41(uint8_t* dst) {     // horizontal
--- a/thirdparty/libwebp/src/dsp/enc.c
+++ b/thirdparty/libwebp/src/dsp/enc.c
@@ -13,9 +13,13 @@

 #include <assert.h>
 #include <stdlib.h>  // for abs()
+#include <string.h>

+#include "src/dsp/cpu.h"
 #include "src/dsp/dsp.h"
 #include "src/enc/vp8i_enc.h"
+#include "src/utils/utils.h"
+#include "src/webp/types.h"

 static WEBP_INLINE uint8_t clip_8b(int v) {
  return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
@@ -688,11 +692,11 @@ static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
  for (n = 0; n < 16; ++n) {
    const int j = kZigzag[n];
    const int sign = (in[j] < 0);
-    const uint32_t coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
-    if (coeff > mtx->zthresh_[j]) {
-      const uint32_t Q = mtx->q_[j];
-      const uint32_t iQ = mtx->iq_[j];
-      const uint32_t B = mtx->bias_[j];
+    const uint32_t coeff = (sign ? -in[j] : in[j]) + mtx->sharpen[j];
+    if (coeff > mtx->zthresh[j]) {
+      const uint32_t Q = mtx->q[j];
+      const uint32_t iQ = mtx->iq[j];
+      const uint32_t B = mtx->bias[j];
      int level = QUANTDIV(coeff, iQ, B);
      if (level > MAX_LEVEL) level = MAX_LEVEL;
      if (sign) level = -level;
--- a/thirdparty/libwebp/src/dsp/enc_mips32.c
+++ b/thirdparty/libwebp/src/dsp/enc_mips32.c
@@ -193,11 +193,11 @@ static int QuantizeBlock_MIPS32(int16_t in[16], int16_t out[16],

  int16_t* ppin             = &in[0];
  int16_t* pout             = &out[0];
-  const uint16_t* ppsharpen = &mtx->sharpen_[0];
-  const uint32_t* ppzthresh = &mtx->zthresh_[0];
-  const uint16_t* ppq       = &mtx->q_[0];
-  const uint16_t* ppiq      = &mtx->iq_[0];
-  const uint32_t* ppbias    = &mtx->bias_[0];
+  const uint16_t* ppsharpen = &mtx->sharpen[0];
+  const uint32_t* ppzthresh = &mtx->zthresh[0];
+  const uint16_t* ppq       = &mtx->q[0];
+  const uint16_t* ppiq      = &mtx->iq[0];
+  const uint32_t* ppbias    = &mtx->bias[0];

  __asm__ volatile(
    QUANTIZE_ONE( 0,  0,  0)
--- a/thirdparty/libwebp/src/dsp/enc_mips_dsp_r2.c
+++ b/thirdparty/libwebp/src/dsp/enc_mips_dsp_r2.c
@@ -1296,11 +1296,11 @@ static int QuantizeBlock_MIPSdspR2(int16_t in[16], int16_t out[16],

  int16_t* ppin             = &in[0];
  int16_t* pout             = &out[0];
-  const uint16_t* ppsharpen = &mtx->sharpen_[0];
-  const uint32_t* ppzthresh = &mtx->zthresh_[0];
-  const uint16_t* ppq       = &mtx->q_[0];
-  const uint16_t* ppiq      = &mtx->iq_[0];
-  const uint32_t* ppbias    = &mtx->bias_[0];
+  const uint16_t* ppsharpen = &mtx->sharpen[0];
+  const uint32_t* ppzthresh = &mtx->zthresh[0];
+  const uint16_t* ppq       = &mtx->q[0];
+  const uint16_t* ppiq      = &mtx->iq[0];
+  const uint32_t* ppbias    = &mtx->bias[0];

  __asm__ volatile (
    QUANTIZE_ONE( 0,  0,  0,  2)
--- a/thirdparty/libwebp/src/dsp/enc_msa.c
+++ b/thirdparty/libwebp/src/dsp/enc_msa.c
@@ -845,7 +845,7 @@ static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
  const v8i16 maxlevel = __msa_fill_h(MAX_LEVEL);

  LD_SH2(&in[0], 8, in0, in1);
-  LD_SH2(&mtx->sharpen_[0], 8, sh0, sh1);
+  LD_SH2(&mtx->sharpen[0], 8, sh0, sh1);
  tmp4 = __msa_add_a_h(in0, zero);
  tmp5 = __msa_add_a_h(in1, zero);
  ILVRL_H2_SH(sh0, tmp4, tmp0, tmp1);
@@ -853,10 +853,10 @@ static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
  HADD_SH4_SW(tmp0, tmp1, tmp2, tmp3, s0, s1, s2, s3);
  sign0 = (in0 < zero);
  sign1 = (in1 < zero);                           // sign
-  LD_SH2(&mtx->iq_[0], 8, tmp0, tmp1);            // iq
+  LD_SH2(&mtx->iq[0], 8, tmp0, tmp1);             // iq
  ILVRL_H2_SW(zero, tmp0, t0, t1);
  ILVRL_H2_SW(zero, tmp1, t2, t3);
-  LD_SW4(&mtx->bias_[0], 4, b0, b1, b2, b3);      // bias
+  LD_SW4(&mtx->bias[0], 4, b0, b1, b2, b3);       // bias
  MUL4(t0, s0, t1, s1, t2, s2, t3, s3, t0, t1, t2, t3);
  ADD4(b0, t0, b1, t1, b2, t2, b3, t3, b0, b1, b2, b3);
  SRAI_W4_SW(b0, b1, b2, b3, 17);
@@ -868,7 +868,7 @@ static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
  SUB2(zero, tmp2, zero, tmp3, tmp0, tmp1);
  tmp2 = (v8i16)__msa_bmnz_v((v16u8)tmp2, (v16u8)tmp0, (v16u8)sign0);
  tmp3 = (v8i16)__msa_bmnz_v((v16u8)tmp3, (v16u8)tmp1, (v16u8)sign1);
-  LD_SW4(&mtx->zthresh_[0], 4, t0, t1, t2, t3);   // zthresh
+  LD_SW4(&mtx->zthresh[0], 4, t0, t1, t2, t3);    // zthresh
  t0 = (s0 > t0);
  t1 = (s1 > t1);
  t2 = (s2 > t2);
@@ -876,7 +876,7 @@ static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
  PCKEV_H2_SH(t1, t0, t3, t2, tmp0, tmp1);
  tmp4 = (v8i16)__msa_bmnz_v((v16u8)zero, (v16u8)tmp2, (v16u8)tmp0);
  tmp5 = (v8i16)__msa_bmnz_v((v16u8)zero, (v16u8)tmp3, (v16u8)tmp1);
-  LD_SH2(&mtx->q_[0], 8, tmp0, tmp1);
+  LD_SH2(&mtx->q[0], 8, tmp0, tmp1);
  MUL2(tmp4, tmp0, tmp5, tmp1, in0, in1);
  VSHF_H2_SH(tmp4, tmp5, tmp4, tmp5, zigzag0, zigzag1, out0, out1);
  ST_SH2(in0, in1, &in[0], 8);
--- a/thirdparty/libwebp/src/dsp/enc_neon.c
+++ b/thirdparty/libwebp/src/dsp/enc_neon.c
@@ -841,11 +841,11 @@ static int SSE4x4_NEON(const uint8_t* WEBP_RESTRICT a,
 static int16x8_t Quantize_NEON(int16_t* WEBP_RESTRICT const in,
                               const VP8Matrix* WEBP_RESTRICT const mtx,
                               int offset) {
-  const uint16x8_t sharp = vld1q_u16(&mtx->sharpen_[offset]);
-  const uint16x8_t q = vld1q_u16(&mtx->q_[offset]);
-  const uint16x8_t iq = vld1q_u16(&mtx->iq_[offset]);
-  const uint32x4_t bias0 = vld1q_u32(&mtx->bias_[offset + 0]);
-  const uint32x4_t bias1 = vld1q_u32(&mtx->bias_[offset + 4]);
+  const uint16x8_t sharp = vld1q_u16(&mtx->sharpen[offset]);
+  const uint16x8_t q = vld1q_u16(&mtx->q[offset]);
+  const uint16x8_t iq = vld1q_u16(&mtx->iq[offset]);
+  const uint32x4_t bias0 = vld1q_u32(&mtx->bias[offset + 0]);
+  const uint32x4_t bias1 = vld1q_u32(&mtx->bias[offset + 4]);

  const int16x8_t a = vld1q_s16(in + offset);                // in
  const uint16x8_t b = vreinterpretq_u16_s16(vabsq_s16(a));  // coeff = abs(in)
@@ -945,6 +945,28 @@ static int Quantize2Blocks_NEON(int16_t in[32], int16_t out[32],
    vst1q_u8(dst, r);                                                          \
  } while (0)

+static WEBP_INLINE uint8x8x2_t Vld1U8x2(const uint8_t* ptr) {
+#if LOCAL_CLANG_PREREQ(3, 4) || LOCAL_GCC_PREREQ(8, 5) || defined(_MSC_VER)
+  return vld1_u8_x2(ptr);
+#else
+  uint8x8x2_t res;
+  INIT_VECTOR2(res, vld1_u8(ptr + 0 * 8), vld1_u8(ptr + 1 * 8));
+  return res;
+#endif
+}
+
+static WEBP_INLINE uint8x16x4_t Vld1qU8x4(const uint8_t* ptr) {
+#if LOCAL_CLANG_PREREQ(3, 4) || LOCAL_GCC_PREREQ(9, 4) || defined(_MSC_VER)
+  return vld1q_u8_x4(ptr);
+#else
+  uint8x16x4_t res;
+  INIT_VECTOR4(res,
+               vld1q_u8(ptr + 0 * 16), vld1q_u8(ptr + 1 * 16),
+               vld1q_u8(ptr + 2 * 16), vld1q_u8(ptr + 3 * 16));
+  return res;
+#endif
+}
+
 static void Intra4Preds_NEON(uint8_t* WEBP_RESTRICT dst,
                             const uint8_t* WEBP_RESTRICT top) {
  // 0   1   2   3   4   5   6   7   8   9  10  11  12  13
@@ -971,9 +993,9 @@ static void Intra4Preds_NEON(uint8_t* WEBP_RESTRICT dst,
    30, 30, 30, 30,  0,  0,  0,  0, 21, 22, 23, 24, 16, 16, 16, 16
  };

-  const uint8x16x4_t lookup_avgs1 = vld1q_u8_x4(kLookupTbl1);
-  const uint8x16x4_t lookup_avgs2 = vld1q_u8_x4(kLookupTbl2);
-  const uint8x16x4_t lookup_avgs3 = vld1q_u8_x4(kLookupTbl3);
+  const uint8x16x4_t lookup_avgs1 = Vld1qU8x4(kLookupTbl1);
+  const uint8x16x4_t lookup_avgs2 = Vld1qU8x4(kLookupTbl2);
+  const uint8x16x4_t lookup_avgs3 = Vld1qU8x4(kLookupTbl3);

  const uint8x16_t preload = vld1q_u8(top - 5);
  uint8x16x2_t qcombined;
@@ -1167,7 +1189,7 @@ static WEBP_INLINE void TrueMotion_NEON(uint8_t* dst, const uint8_t* left,

  // Neither left nor top are NULL.
  a = vdupq_n_u16(left[-1]);
-  inner = vld1_u8_x2(top);
+  inner = Vld1U8x2(top);

  for (i = 0; i < 4; i++) {
    const uint8x8x4_t outer = vld4_dup_u8(&left[i * 4]);
--- a/thirdparty/libwebp/src/dsp/enc_sse2.c
+++ b/thirdparty/libwebp/src/dsp/enc_sse2.c
@@ -14,13 +14,18 @@
 #include "src/dsp/dsp.h"

 #if defined(WEBP_USE_SSE2)
-#include <assert.h>
-#include <stdlib.h>  // for abs()
 #include <emmintrin.h>

+#include <assert.h>
+#include <stdlib.h>  // for abs()
+#include <string.h>
+
 #include "src/dsp/common_sse2.h"
+#include "src/dsp/cpu.h"
 #include "src/enc/cost_enc.h"
 #include "src/enc/vp8i_enc.h"
+#include "src/utils/utils.h"
+#include "src/webp/types.h"

 //------------------------------------------------------------------------------
 // Transforms (Paragraph 14.4)
@@ -1410,10 +1415,10 @@ static WEBP_INLINE int DoQuantizeBlock_SSE2(
  // Load all inputs.
  __m128i in0 = _mm_loadu_si128((__m128i*)&in[0]);
  __m128i in8 = _mm_loadu_si128((__m128i*)&in[8]);
-  const __m128i iq0 = _mm_loadu_si128((const __m128i*)&mtx->iq_[0]);
-  const __m128i iq8 = _mm_loadu_si128((const __m128i*)&mtx->iq_[8]);
-  const __m128i q0 = _mm_loadu_si128((const __m128i*)&mtx->q_[0]);
-  const __m128i q8 = _mm_loadu_si128((const __m128i*)&mtx->q_[8]);
+  const __m128i iq0 = _mm_loadu_si128((const __m128i*)&mtx->iq[0]);
+  const __m128i iq8 = _mm_loadu_si128((const __m128i*)&mtx->iq[8]);
+  const __m128i q0 = _mm_loadu_si128((const __m128i*)&mtx->q[0]);
+  const __m128i q8 = _mm_loadu_si128((const __m128i*)&mtx->q[8]);

  // extract sign(in)  (0x0000 if positive, 0xffff if negative)
  const __m128i sign0 = _mm_cmpgt_epi16(zero, in0);
@@ -1446,10 +1451,10 @@ static WEBP_INLINE int DoQuantizeBlock_SSE2(
    __m128i out_08 = _mm_unpacklo_epi16(coeff_iQ8L, coeff_iQ8H);
    __m128i out_12 = _mm_unpackhi_epi16(coeff_iQ8L, coeff_iQ8H);
    // out = (coeff * iQ + B)
-    const __m128i bias_00 = _mm_loadu_si128((const __m128i*)&mtx->bias_[0]);
-    const __m128i bias_04 = _mm_loadu_si128((const __m128i*)&mtx->bias_[4]);
-    const __m128i bias_08 = _mm_loadu_si128((const __m128i*)&mtx->bias_[8]);
-    const __m128i bias_12 = _mm_loadu_si128((const __m128i*)&mtx->bias_[12]);
+    const __m128i bias_00 = _mm_loadu_si128((const __m128i*)&mtx->bias[0]);
+    const __m128i bias_04 = _mm_loadu_si128((const __m128i*)&mtx->bias[4]);
+    const __m128i bias_08 = _mm_loadu_si128((const __m128i*)&mtx->bias[8]);
+    const __m128i bias_12 = _mm_loadu_si128((const __m128i*)&mtx->bias[12]);
    out_00 = _mm_add_epi32(out_00, bias_00);
    out_04 = _mm_add_epi32(out_04, bias_04);
    out_08 = _mm_add_epi32(out_08, bias_08);
@@ -1512,7 +1517,7 @@ static WEBP_INLINE int DoQuantizeBlock_SSE2(

 static int QuantizeBlock_SSE2(int16_t in[16], int16_t out[16],
                              const VP8Matrix* WEBP_RESTRICT const mtx) {
-  return DoQuantizeBlock_SSE2(in, out, &mtx->sharpen_[0], mtx);
+  return DoQuantizeBlock_SSE2(in, out, &mtx->sharpen[0], mtx);
 }

 static int QuantizeBlockWHT_SSE2(int16_t in[16], int16_t out[16],
@@ -1523,7 +1528,7 @@ static int QuantizeBlockWHT_SSE2(int16_t in[16], int16_t out[16],
 static int Quantize2Blocks_SSE2(int16_t in[32], int16_t out[32],
                                const VP8Matrix* WEBP_RESTRICT const mtx) {
  int nz;
-  const uint16_t* const sharpen = &mtx->sharpen_[0];
+  const uint16_t* const sharpen = &mtx->sharpen[0];
  nz  = DoQuantizeBlock_SSE2(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
  nz |= DoQuantizeBlock_SSE2(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
  return nz;
--- a/thirdparty/libwebp/src/dsp/enc_sse41.c
+++ b/thirdparty/libwebp/src/dsp/enc_sse41.c
@@ -14,11 +14,15 @@
 #include "src/dsp/dsp.h"

 #if defined(WEBP_USE_SSE41)
+#include <emmintrin.h>
 #include <smmintrin.h>
+
 #include <stdlib.h>  // for abs()

 #include "src/dsp/common_sse2.h"
+#include "src/dsp/cpu.h"
 #include "src/enc/vp8i_enc.h"
+#include "src/webp/types.h"

 //------------------------------------------------------------------------------
 // Compute susceptibility based on DCT-coeff histograms.
@@ -211,10 +215,10 @@ static WEBP_INLINE int DoQuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
  // Load all inputs.
  __m128i in0 = _mm_loadu_si128((__m128i*)&in[0]);
  __m128i in8 = _mm_loadu_si128((__m128i*)&in[8]);
-  const __m128i iq0 = _mm_loadu_si128((const __m128i*)&mtx->iq_[0]);
-  const __m128i iq8 = _mm_loadu_si128((const __m128i*)&mtx->iq_[8]);
-  const __m128i q0 = _mm_loadu_si128((const __m128i*)&mtx->q_[0]);
-  const __m128i q8 = _mm_loadu_si128((const __m128i*)&mtx->q_[8]);
+  const __m128i iq0 = _mm_loadu_si128((const __m128i*)&mtx->iq[0]);
+  const __m128i iq8 = _mm_loadu_si128((const __m128i*)&mtx->iq[8]);
+  const __m128i q0 = _mm_loadu_si128((const __m128i*)&mtx->q[0]);
+  const __m128i q8 = _mm_loadu_si128((const __m128i*)&mtx->q[8]);

  // coeff = abs(in)
  __m128i coeff0 = _mm_abs_epi16(in0);
@@ -241,10 +245,10 @@ static WEBP_INLINE int DoQuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
    __m128i out_08 = _mm_unpacklo_epi16(coeff_iQ8L, coeff_iQ8H);
    __m128i out_12 = _mm_unpackhi_epi16(coeff_iQ8L, coeff_iQ8H);
    // out = (coeff * iQ + B)
-    const __m128i bias_00 = _mm_loadu_si128((const __m128i*)&mtx->bias_[0]);
-    const __m128i bias_04 = _mm_loadu_si128((const __m128i*)&mtx->bias_[4]);
-    const __m128i bias_08 = _mm_loadu_si128((const __m128i*)&mtx->bias_[8]);
-    const __m128i bias_12 = _mm_loadu_si128((const __m128i*)&mtx->bias_[12]);
+    const __m128i bias_00 = _mm_loadu_si128((const __m128i*)&mtx->bias[0]);
+    const __m128i bias_04 = _mm_loadu_si128((const __m128i*)&mtx->bias[4]);
+    const __m128i bias_08 = _mm_loadu_si128((const __m128i*)&mtx->bias[8]);
+    const __m128i bias_12 = _mm_loadu_si128((const __m128i*)&mtx->bias[12]);
    out_00 = _mm_add_epi32(out_00, bias_00);
    out_04 = _mm_add_epi32(out_04, bias_04);
    out_08 = _mm_add_epi32(out_08, bias_08);
@@ -305,7 +309,7 @@ static WEBP_INLINE int DoQuantizeBlock_SSE41(int16_t in[16], int16_t out[16],

 static int QuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
                               const VP8Matrix* WEBP_RESTRICT const mtx) {
-  return DoQuantizeBlock_SSE41(in, out, &mtx->sharpen_[0], mtx);
+  return DoQuantizeBlock_SSE41(in, out, &mtx->sharpen[0], mtx);
 }

 static int QuantizeBlockWHT_SSE41(int16_t in[16], int16_t out[16],
@@ -316,7 +320,7 @@ static int QuantizeBlockWHT_SSE41(int16_t in[16], int16_t out[16],
 static int Quantize2Blocks_SSE41(int16_t in[32], int16_t out[32],
                                 const VP8Matrix* WEBP_RESTRICT const mtx) {
  int nz;
-  const uint16_t* const sharpen = &mtx->sharpen_[0];
+  const uint16_t* const sharpen = &mtx->sharpen[0];
  nz  = DoQuantizeBlock_SSE41(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
  nz |= DoQuantizeBlock_SSE41(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
  return nz;
--- a/thirdparty/libwebp/src/dsp/filters.c
+++ b/thirdparty/libwebp/src/dsp/filters.c
@@ -11,11 +11,14 @@
 //
 // Author: Urvang (urvang@google.com)

-#include "src/dsp/dsp.h"
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>

+#include "src/dsp/cpu.h"
+#include "src/dsp/dsp.h"
+#include "src/webp/types.h"
+
 //------------------------------------------------------------------------------
 // Helpful macro.

--- a/thirdparty/libwebp/src/dsp/filters_sse2.c
+++ b/thirdparty/libwebp/src/dsp/filters_sse2.c
@@ -20,6 +20,9 @@
 #include <stdlib.h>
 #include <string.h>

+#include "src/dsp/cpu.h"
+#include "src/webp/types.h"
+
 //------------------------------------------------------------------------------
 // Helpful macro.

--- a/thirdparty/libwebp/src/dsp/lossless.c
+++ b/thirdparty/libwebp/src/dsp/lossless.c
@@ -13,15 +13,21 @@
 //          Jyrki Alakuijala (jyrki@google.com)
 //          Urvang Joshi (urvang@google.com)

-#include "src/dsp/dsp.h"
+#include "src/dsp/lossless.h"

 #include <assert.h>
-#include <math.h>
 #include <stdlib.h>
+#include <string.h>
+
 #include "src/dec/vp8li_dec.h"
-#include "src/utils/endian_inl_utils.h"
-#include "src/dsp/lossless.h"
+#include "src/dsp/cpu.h"
+#include "src/dsp/dsp.h"
 #include "src/dsp/lossless_common.h"
+#include "src/utils/endian_inl_utils.h"
+#include "src/utils/utils.h"
+#include "src/webp/decode.h"
+#include "src/webp/format_constants.h"
+#include "src/webp/types.h"

 //------------------------------------------------------------------------------
 // Image transforms.
@@ -215,7 +221,7 @@ GENERATE_PREDICTOR_ADD(VP8LPredictor13_C, PredictorAdd13_C)
 static void PredictorInverseTransform_C(const VP8LTransform* const transform,
                                        int y_start, int y_end,
                                        const uint32_t* in, uint32_t* out) {
-  const int width = transform->xsize_;
+  const int width = transform->xsize;
  if (y_start == 0) {  // First Row follows the L (mode=1) mode.
    PredictorAdd0_C(in, NULL, 1, out);
    PredictorAdd1_C(in + 1, NULL, width - 1, out + 1);
@@ -226,11 +232,11 @@ static void PredictorInverseTransform_C(const VP8LTransform* const transform,

  {
    int y = y_start;
-    const int tile_width = 1 << transform->bits_;
+    const int tile_width = 1 << transform->bits;
    const int mask = tile_width - 1;
-    const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
+    const int tiles_per_row = VP8LSubSampleSize(width, transform->bits);
    const uint32_t* pred_mode_base =
-        transform->data_ + (y >> transform->bits_) * tiles_per_row;
+        transform->data + (y >> transform->bits) * tiles_per_row;

    while (y < y_end) {
      const uint32_t* pred_mode_src = pred_mode_base;
@@ -278,9 +284,9 @@ static WEBP_INLINE int ColorTransformDelta(int8_t color_pred,

 static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
                                               VP8LMultipliers* const m) {
-  m->green_to_red_  = (color_code >>  0) & 0xff;
-  m->green_to_blue_ = (color_code >>  8) & 0xff;
-  m->red_to_blue_   = (color_code >> 16) & 0xff;
+  m->green_to_red  = (color_code >>  0) & 0xff;
+  m->green_to_blue = (color_code >>  8) & 0xff;
+  m->red_to_blue   = (color_code >> 16) & 0xff;
 }

 void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
@@ -293,10 +299,10 @@ void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
    const uint32_t red = argb >> 16;
    int new_red = red & 0xff;
    int new_blue = argb & 0xff;
-    new_red += ColorTransformDelta((int8_t)m->green_to_red_, green);
+    new_red += ColorTransformDelta((int8_t)m->green_to_red, green);
    new_red &= 0xff;
-    new_blue += ColorTransformDelta((int8_t)m->green_to_blue_, green);
-    new_blue += ColorTransformDelta((int8_t)m->red_to_blue_, (int8_t)new_red);
+    new_blue += ColorTransformDelta((int8_t)m->green_to_blue, green);
+    new_blue += ColorTransformDelta((int8_t)m->red_to_blue, (int8_t)new_red);
    new_blue &= 0xff;
    dst[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
  }
@@ -306,15 +312,15 @@ void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
 static void ColorSpaceInverseTransform_C(const VP8LTransform* const transform,
                                         int y_start, int y_end,
                                         const uint32_t* src, uint32_t* dst) {
-  const int width = transform->xsize_;
-  const int tile_width = 1 << transform->bits_;
+  const int width = transform->xsize;
+  const int tile_width = 1 << transform->bits;
  const int mask = tile_width - 1;
  const int safe_width = width & ~mask;
  const int remaining_width = width - safe_width;
-  const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
+  const int tiles_per_row = VP8LSubSampleSize(width, transform->bits);
  int y = y_start;
  const uint32_t* pred_row =
-      transform->data_ + (y >> transform->bits_) * tiles_per_row;
+      transform->data + (y >> transform->bits) * tiles_per_row;

  while (y < y_end) {
    const uint32_t* pred = pred_row;
@@ -356,11 +362,11 @@ STATIC_DECL void FUNC_NAME(const VP8LTransform* const transform,               \
                           int y_start, int y_end, const TYPE* src,            \
                           TYPE* dst) {                                        \
  int y;                                                                       \
-  const int bits_per_pixel = 8 >> transform->bits_;                            \
-  const int width = transform->xsize_;                                         \
-  const uint32_t* const color_map = transform->data_;                          \
+  const int bits_per_pixel = 8 >> transform->bits;                             \
+  const int width = transform->xsize;                                          \
+  const uint32_t* const color_map = transform->data;                           \
  if (bits_per_pixel < 8) {                                                    \
-    const int pixels_per_byte = 1 << transform->bits_;                         \
+    const int pixels_per_byte = 1 << transform->bits;                          \
    const int count_mask = pixels_per_byte - 1;                                \
    const uint32_t bit_mask = (1 << bits_per_pixel) - 1;                       \
    for (y = y_start; y < y_end; ++y) {                                        \
@@ -391,16 +397,16 @@ COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha_C, ,
 void VP8LInverseTransform(const VP8LTransform* const transform,
                          int row_start, int row_end,
                          const uint32_t* const in, uint32_t* const out) {
-  const int width = transform->xsize_;
+  const int width = transform->xsize;
  assert(row_start < row_end);
-  assert(row_end <= transform->ysize_);
-  switch (transform->type_) {
+  assert(row_end <= transform->ysize);
+  switch (transform->type) {
    case SUBTRACT_GREEN_TRANSFORM:
      VP8LAddGreenToBlueAndRed(in, (row_end - row_start) * width, out);
      break;
    case PREDICTOR_TRANSFORM:
      PredictorInverseTransform_C(transform, row_start, row_end, in, out);
-      if (row_end != transform->ysize_) {
+      if (row_end != transform->ysize) {
        // The last predicted row in this iteration will be the top-pred row
        // for the first row in next iteration.
        memcpy(out - width, out + (row_end - row_start - 1) * width,
@@ -411,15 +417,15 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
      ColorSpaceInverseTransform_C(transform, row_start, row_end, in, out);
      break;
    case COLOR_INDEXING_TRANSFORM:
-      if (in == out && transform->bits_ > 0) {
+      if (in == out && transform->bits > 0) {
        // Move packed pixels to the end of unpacked region, so that unpacking
        // can occur seamlessly.
        // Also, note that this is the only transform that applies on
-        // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
-        // transforms work on effective width of xsize_.
+        // the effective width of VP8LSubSampleSize(xsize, bits). All other
+        // transforms work on effective width of 'xsize'.
        const int out_stride = (row_end - row_start) * width;
        const int in_stride = (row_end - row_start) *
-            VP8LSubSampleSize(transform->xsize_, transform->bits_);
+            VP8LSubSampleSize(transform->xsize, transform->bits);
        uint32_t* const src = out + out_stride - in_stride;
        memmove(src, out, in_stride * sizeof(*src));
        ColorIndexInverseTransform_C(transform, row_start, row_end, src, out);
@@ -571,16 +577,21 @@ void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
 //------------------------------------------------------------------------------

 VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed;
+VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed_SSE;
 VP8LPredictorAddSubFunc VP8LPredictorsAdd[16];
+VP8LPredictorAddSubFunc VP8LPredictorsAdd_SSE[16];
 VP8LPredictorFunc VP8LPredictors[16];

 // exposed plain-C implementations
 VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16];

 VP8LTransformColorInverseFunc VP8LTransformColorInverse;
+VP8LTransformColorInverseFunc VP8LTransformColorInverse_SSE;

 VP8LConvertFunc VP8LConvertBGRAToRGB;
+VP8LConvertFunc VP8LConvertBGRAToRGB_SSE;
 VP8LConvertFunc VP8LConvertBGRAToRGBA;
+VP8LConvertFunc VP8LConvertBGRAToRGBA_SSE;
 VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
 VP8LConvertFunc VP8LConvertBGRAToRGB565;
 VP8LConvertFunc VP8LConvertBGRAToBGR;
@@ -591,6 +602,7 @@ VP8LMapAlphaFunc VP8LMapColor8b;
 extern VP8CPUInfo VP8GetCPUInfo;
 extern void VP8LDspInitSSE2(void);
 extern void VP8LDspInitSSE41(void);
+extern void VP8LDspInitAVX2(void);
 extern void VP8LDspInitNEON(void);
 extern void VP8LDspInitMIPSdspR2(void);
 extern void VP8LDspInitMSA(void);
@@ -643,6 +655,11 @@ WEBP_DSP_INIT_FUNC(VP8LDspInit) {
 #if defined(WEBP_HAVE_SSE41)
      if (VP8GetCPUInfo(kSSE4_1)) {
        VP8LDspInitSSE41();
+#if defined(WEBP_HAVE_AVX2)
+        if (VP8GetCPUInfo(kAVX2)) {
+          VP8LDspInitAVX2();
+        }
+#endif
      }
 #endif
    }
--- a/thirdparty/libwebp/src/dsp/lossless.h
+++ b/thirdparty/libwebp/src/dsp/lossless.h
@@ -15,13 +15,10 @@
 #ifndef WEBP_DSP_LOSSLESS_H_
 #define WEBP_DSP_LOSSLESS_H_

+#include "src/dsp/dsp.h"
 #include "src/webp/types.h"
 #include "src/webp/decode.h"

-#include "src/dsp/dsp.h"
-#include "src/enc/histogram_enc.h"
-#include "src/utils/utils.h"
-
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -64,22 +61,25 @@ typedef void (*VP8LPredictorAddSubFunc)(const uint32_t* in,
                                        uint32_t* WEBP_RESTRICT out);
 extern VP8LPredictorAddSubFunc VP8LPredictorsAdd[16];
 extern VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16];
+extern VP8LPredictorAddSubFunc VP8LPredictorsAdd_SSE[16];

 typedef void (*VP8LProcessDecBlueAndRedFunc)(const uint32_t* src,
                                             int num_pixels, uint32_t* dst);
 extern VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed;
+extern VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed_SSE;

 typedef struct {
  // Note: the members are uint8_t, so that any negative values are
  // automatically converted to "mod 256" values.
-  uint8_t green_to_red_;
-  uint8_t green_to_blue_;
-  uint8_t red_to_blue_;
+  uint8_t green_to_red;
+  uint8_t green_to_blue;
+  uint8_t red_to_blue;
 } VP8LMultipliers;
 typedef void (*VP8LTransformColorInverseFunc)(const VP8LMultipliers* const m,
                                              const uint32_t* src,
                                              int num_pixels, uint32_t* dst);
 extern VP8LTransformColorInverseFunc VP8LTransformColorInverse;
+extern VP8LTransformColorInverseFunc VP8LTransformColorInverse_SSE;

 struct VP8LTransform;  // Defined in dec/vp8li.h.

@@ -99,6 +99,8 @@ extern VP8LConvertFunc VP8LConvertBGRAToRGBA;
 extern VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
 extern VP8LConvertFunc VP8LConvertBGRAToRGB565;
 extern VP8LConvertFunc VP8LConvertBGRAToBGR;
+extern VP8LConvertFunc VP8LConvertBGRAToRGB_SSE;
+extern VP8LConvertFunc VP8LConvertBGRAToRGBA_SSE;

 // Converts from BGRA to other color spaces.
 void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
@@ -149,21 +151,25 @@ void VP8LDspInit(void);

 typedef void (*VP8LProcessEncBlueAndRedFunc)(uint32_t* dst, int num_pixels);
 extern VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
+extern VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed_SSE;
 typedef void (*VP8LTransformColorFunc)(
    const VP8LMultipliers* WEBP_RESTRICT const m, uint32_t* WEBP_RESTRICT dst,
    int num_pixels);
 extern VP8LTransformColorFunc VP8LTransformColor;
+extern VP8LTransformColorFunc VP8LTransformColor_SSE;
 typedef void (*VP8LCollectColorBlueTransformsFunc)(
    const uint32_t* WEBP_RESTRICT argb, int stride,
    int tile_width, int tile_height,
    int green_to_blue, int red_to_blue, uint32_t histo[]);
 extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
+extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms_SSE;

 typedef void (*VP8LCollectColorRedTransformsFunc)(
    const uint32_t* WEBP_RESTRICT argb, int stride,
    int tile_width, int tile_height,
    int green_to_red, uint32_t histo[]);
 extern VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
+extern VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms_SSE;

 // Expose some C-only fallback functions
 void VP8LTransformColor_C(const VP8LMultipliers* WEBP_RESTRICT const m,
@@ -181,20 +187,17 @@ void VP8LCollectColorBlueTransforms_C(const uint32_t* WEBP_RESTRICT argb,

 extern VP8LPredictorAddSubFunc VP8LPredictorsSub[16];
 extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
+extern VP8LPredictorAddSubFunc VP8LPredictorsSub_SSE[16];

 // -----------------------------------------------------------------------------
 // Huffman-cost related functions.

 typedef uint32_t (*VP8LCostFunc)(const uint32_t* population, int length);
-typedef uint32_t (*VP8LCostCombinedFunc)(const uint32_t* WEBP_RESTRICT X,
-                                         const uint32_t* WEBP_RESTRICT Y,
-                                         int length);
 typedef uint64_t (*VP8LCombinedShannonEntropyFunc)(const uint32_t X[256],
                                                   const uint32_t Y[256]);
 typedef uint64_t (*VP8LShannonEntropyFunc)(const uint32_t* X, int length);

 extern VP8LCostFunc VP8LExtraCost;
-extern VP8LCostCombinedFunc VP8LExtraCostCombined;
 extern VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
 extern VP8LShannonEntropyFunc VP8LShannonEntropy;

@@ -239,9 +242,6 @@ extern VP8LAddVectorFunc VP8LAddVector;
 typedef void (*VP8LAddVectorEqFunc)(const uint32_t* WEBP_RESTRICT a,
                                    uint32_t* WEBP_RESTRICT out, int size);
 extern VP8LAddVectorEqFunc VP8LAddVectorEq;
-void VP8LHistogramAdd(const VP8LHistogram* WEBP_RESTRICT const a,
-                      const VP8LHistogram* WEBP_RESTRICT const b,
-                      VP8LHistogram* WEBP_RESTRICT const out);

 // -----------------------------------------------------------------------------
 // PrefixEncode()
@@ -255,6 +255,7 @@ typedef void (*VP8LBundleColorMapFunc)(const uint8_t* WEBP_RESTRICT const row,
                                       int width, int xbits,
                                       uint32_t* WEBP_RESTRICT dst);
 extern VP8LBundleColorMapFunc VP8LBundleColorMap;
+extern VP8LBundleColorMapFunc VP8LBundleColorMap_SSE;
 void VP8LBundleColorMap_C(const uint8_t* WEBP_RESTRICT const row,
                          int width, int xbits, uint32_t* WEBP_RESTRICT dst);

--- a/thirdparty/libwebp/src/dsp/lossless_avx2.c
+++ b/thirdparty/libwebp/src/dsp/lossless_avx2.c
@@ -0,0 +1,443 @@
+// Copyright 2025 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// AVX2 variant of methods for lossless decoder
+//
+// Author: Vincent Rabaud (vrabaud@google.com)
+
+#include "src/dsp/dsp.h"
+
+#if defined(WEBP_USE_AVX2)
+
+#include <stddef.h>
+#include <immintrin.h>
+
+#include "src/dsp/cpu.h"
+#include "src/dsp/lossless.h"
+#include "src/webp/format_constants.h"
+#include "src/webp/types.h"
+
+//------------------------------------------------------------------------------
+// Predictor Transform
+
+static WEBP_INLINE void Average2_m256i(const __m256i* const a0,
+                                       const __m256i* const a1,
+                                       __m256i* const avg) {
+  // (a + b) >> 1 = ((a + b + 1) >> 1) - ((a ^ b) & 1)
+  const __m256i ones = _mm256_set1_epi8(1);
+  const __m256i avg1 = _mm256_avg_epu8(*a0, *a1);
+  const __m256i one = _mm256_and_si256(_mm256_xor_si256(*a0, *a1), ones);
+  *avg = _mm256_sub_epi8(avg1, one);
+}
+
+// Batch versions of those functions.
+
+// Predictor0: ARGB_BLACK.
+static void PredictorAdd0_AVX2(const uint32_t* in, const uint32_t* upper,
+                               int num_pixels, uint32_t* WEBP_RESTRICT out) {
+  int i;
+  const __m256i black = _mm256_set1_epi32((int)ARGB_BLACK);
+  for (i = 0; i + 8 <= num_pixels; i += 8) {
+    const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
+    const __m256i res = _mm256_add_epi8(src, black);
+    _mm256_storeu_si256((__m256i*)&out[i], res);
+  }
+  if (i != num_pixels) {
+    VP8LPredictorsAdd_SSE[0](in + i, NULL, num_pixels - i, out + i);
+  }
+  (void)upper;
+}
+
+// Predictor1: left.
+static void PredictorAdd1_AVX2(const uint32_t* in, const uint32_t* upper,
+                               int num_pixels, uint32_t* WEBP_RESTRICT out) {
+  int i;
+  __m256i prev = _mm256_set1_epi32((int)out[-1]);
+  for (i = 0; i + 8 <= num_pixels; i += 8) {
+    // h | g | f | e | d | c | b | a
+    const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
+    // g | f | e | 0 | c | b | a | 0
+    const __m256i shift0 = _mm256_slli_si256(src, 4);
+    // g + h | f + g | e + f | e | c + d | b + c | a + b | a
+    const __m256i sum0 = _mm256_add_epi8(src, shift0);
+    // e + f | e | 0 | 0 | a + b | a | 0 | 0
+    const __m256i shift1 = _mm256_slli_si256(sum0, 8);
+    // e + f + g + h | e + f + g | e + f | e | a + b + c + d | a + b + c | a + b
+    // | a
+    const __m256i sum1 = _mm256_add_epi8(sum0, shift1);
+    // Add a + b + c + d to the upper lane.
+    const int32_t sum_abcd = _mm256_extract_epi32(sum1, 3);
+    const __m256i sum2 = _mm256_add_epi8(
+        sum1,
+        _mm256_set_epi32(sum_abcd, sum_abcd, sum_abcd, sum_abcd, 0, 0, 0, 0));
+
+    const __m256i res = _mm256_add_epi8(sum2, prev);
+    _mm256_storeu_si256((__m256i*)&out[i], res);
+    // replicate last res output in prev.
+    prev = _mm256_permutevar8x32_epi32(
+        res, _mm256_set_epi32(7, 7, 7, 7, 7, 7, 7, 7));
+  }
+  if (i != num_pixels) {
+    VP8LPredictorsAdd_SSE[1](in + i, upper + i, num_pixels - i, out + i);
+  }
+}
+
+// Macro that adds 32-bit integers from IN using mod 256 arithmetic
+// per 8 bit channel.
+#define GENERATE_PREDICTOR_1(X, IN)                                         \
+  static void PredictorAdd##X##_AVX2(const uint32_t* in,                    \
+                                     const uint32_t* upper, int num_pixels, \
+                                     uint32_t* WEBP_RESTRICT out) {         \
+    int i;                                                                  \
+    for (i = 0; i + 8 <= num_pixels; i += 8) {                              \
+      const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);       \
+      const __m256i other = _mm256_loadu_si256((const __m256i*)&(IN));      \
+      const __m256i res = _mm256_add_epi8(src, other);                      \
+      _mm256_storeu_si256((__m256i*)&out[i], res);                          \
+    }                                                                       \
+    if (i != num_pixels) {                                                  \
+      VP8LPredictorsAdd_SSE[(X)](in + i, upper + i, num_pixels - i, out + i); \
+    }                                                                       \
+  }
+
+// Predictor2: Top.
+GENERATE_PREDICTOR_1(2, upper[i])
+// Predictor3: Top-right.
+GENERATE_PREDICTOR_1(3, upper[i + 1])
+// Predictor4: Top-left.
+GENERATE_PREDICTOR_1(4, upper[i - 1])
+#undef GENERATE_PREDICTOR_1
+
+// Due to averages with integers, values cannot be accumulated in parallel for
+// predictors 5 to 7.
+
+#define GENERATE_PREDICTOR_2(X, IN)                                         \
+  static void PredictorAdd##X##_AVX2(const uint32_t* in,                    \
+                                     const uint32_t* upper, int num_pixels, \
+                                     uint32_t* WEBP_RESTRICT out) {         \
+    int i;                                                                  \
+    for (i = 0; i + 8 <= num_pixels; i += 8) {                              \
+      const __m256i Tother = _mm256_loadu_si256((const __m256i*)&(IN));     \
+      const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);      \
+      const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);       \
+      __m256i avg, res;                                                     \
+      Average2_m256i(&T, &Tother, &avg);                                    \
+      res = _mm256_add_epi8(avg, src);                                      \
+      _mm256_storeu_si256((__m256i*)&out[i], res);                          \
+    }                                                                       \
+    if (i != num_pixels) {                                                  \
+      VP8LPredictorsAdd_SSE[(X)](in + i, upper + i, num_pixels - i, out + i); \
+    }                                                                       \
+  }
+// Predictor8: average TL T.
+GENERATE_PREDICTOR_2(8, upper[i - 1])
+// Predictor9: average T TR.
+GENERATE_PREDICTOR_2(9, upper[i + 1])
+#undef GENERATE_PREDICTOR_2
+
+// Predictor10: average of (average of (L,TL), average of (T, TR)).
+#define DO_PRED10(OUT)                                  \
+  do {                                                  \
+    __m256i avgLTL, avg;                                \
+    Average2_m256i(&L, &TL, &avgLTL);                   \
+    Average2_m256i(&avgTTR, &avgLTL, &avg);             \
+    L = _mm256_add_epi8(avg, src);                      \
+    out[i + (OUT)] = (uint32_t)_mm256_cvtsi256_si32(L); \
+  } while (0)
+
+#define DO_PRED10_SHIFT                                         \
+  do {                                                          \
+    /* Rotate the pre-computed values for the next iteration.*/ \
+    avgTTR = _mm256_srli_si256(avgTTR, 4);                      \
+    TL = _mm256_srli_si256(TL, 4);                              \
+    src = _mm256_srli_si256(src, 4);                            \
+  } while (0)
+
+static void PredictorAdd10_AVX2(const uint32_t* in, const uint32_t* upper,
+                                int num_pixels, uint32_t* WEBP_RESTRICT out) {
+  int i, j;
+  __m256i L = _mm256_setr_epi32((int)out[-1], 0, 0, 0, 0, 0, 0, 0);
+  for (i = 0; i + 8 <= num_pixels; i += 8) {
+    __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
+    __m256i TL = _mm256_loadu_si256((const __m256i*)&upper[i - 1]);
+    const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
+    const __m256i TR = _mm256_loadu_si256((const __m256i*)&upper[i + 1]);
+    __m256i avgTTR;
+    Average2_m256i(&T, &TR, &avgTTR);
+    {
+      const __m256i avgTTR_bak = avgTTR;
+      const __m256i TL_bak = TL;
+      const __m256i src_bak = src;
+      for (j = 0; j < 4; ++j) {
+        DO_PRED10(j);
+        DO_PRED10_SHIFT;
+      }
+      avgTTR = _mm256_permute2x128_si256(avgTTR_bak, avgTTR_bak, 1);
+      TL = _mm256_permute2x128_si256(TL_bak, TL_bak, 1);
+      src = _mm256_permute2x128_si256(src_bak, src_bak, 1);
+      for (; j < 8; ++j) {
+        DO_PRED10(j);
+        DO_PRED10_SHIFT;
+      }
+    }
+  }
+  if (i != num_pixels) {
+    VP8LPredictorsAdd_SSE[10](in + i, upper + i, num_pixels - i, out + i);
+  }
+}
+#undef DO_PRED10
+#undef DO_PRED10_SHIFT
+
+// Predictor11: select.
+#define DO_PRED11(OUT)                                                      \
+  do {                                                                      \
+    const __m256i L_lo = _mm256_unpacklo_epi32(L, T);                       \
+    const __m256i TL_lo = _mm256_unpacklo_epi32(TL, T);                     \
+    const __m256i pb = _mm256_sad_epu8(L_lo, TL_lo); /* pb = sum |L-TL|*/   \
+    const __m256i mask = _mm256_cmpgt_epi32(pb, pa);                        \
+    const __m256i A = _mm256_and_si256(mask, L);                            \
+    const __m256i B = _mm256_andnot_si256(mask, T);                         \
+    const __m256i pred = _mm256_or_si256(A, B); /* pred = (pa > b)? L : T*/ \
+    L = _mm256_add_epi8(src, pred);                                         \
+    out[i + (OUT)] = (uint32_t)_mm256_cvtsi256_si32(L);                     \
+  } while (0)
+
+#define DO_PRED11_SHIFT                                       \
+  do {                                                        \
+    /* Shift the pre-computed value for the next iteration.*/ \
+    T = _mm256_srli_si256(T, 4);                              \
+    TL = _mm256_srli_si256(TL, 4);                            \
+    src = _mm256_srli_si256(src, 4);                          \
+    pa = _mm256_srli_si256(pa, 4);                            \
+  } while (0)
+
+static void PredictorAdd11_AVX2(const uint32_t* in, const uint32_t* upper,
+                                int num_pixels, uint32_t* WEBP_RESTRICT out) {
+  int i, j;
+  __m256i pa;
+  __m256i L = _mm256_setr_epi32((int)out[-1], 0, 0, 0, 0, 0, 0, 0);
+  for (i = 0; i + 8 <= num_pixels; i += 8) {
+    __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
+    __m256i TL = _mm256_loadu_si256((const __m256i*)&upper[i - 1]);
+    __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
+    {
+      // We can unpack with any value on the upper 32 bits, provided it's the
+      // same on both operands (so that their sum of abs diff is zero). Here we
+      // use T.
+      const __m256i T_lo = _mm256_unpacklo_epi32(T, T);
+      const __m256i TL_lo = _mm256_unpacklo_epi32(TL, T);
+      const __m256i T_hi = _mm256_unpackhi_epi32(T, T);
+      const __m256i TL_hi = _mm256_unpackhi_epi32(TL, T);
+      const __m256i s_lo = _mm256_sad_epu8(T_lo, TL_lo);
+      const __m256i s_hi = _mm256_sad_epu8(T_hi, TL_hi);
+      pa = _mm256_packs_epi32(s_lo, s_hi);  // pa = sum |T-TL|
+    }
+    {
+      const __m256i T_bak = T;
+      const __m256i TL_bak = TL;
+      const __m256i src_bak = src;
+      const __m256i pa_bak = pa;
+      for (j = 0; j < 4; ++j) {
+        DO_PRED11(j);
+        DO_PRED11_SHIFT;
+      }
+      T = _mm256_permute2x128_si256(T_bak, T_bak, 1);
+      TL = _mm256_permute2x128_si256(TL_bak, TL_bak, 1);
+      src = _mm256_permute2x128_si256(src_bak, src_bak, 1);
+      pa = _mm256_permute2x128_si256(pa_bak, pa_bak, 1);
+      for (; j < 8; ++j) {
+        DO_PRED11(j);
+        DO_PRED11_SHIFT;
+      }
+    }
+  }
+  if (i != num_pixels) {
+    VP8LPredictorsAdd_SSE[11](in + i, upper + i, num_pixels - i, out + i);
+  }
+}
+#undef DO_PRED11
+#undef DO_PRED11_SHIFT
+
+// Predictor12: ClampedAddSubtractFull.
+#define DO_PRED12(DIFF, OUT)                              \
+  do {                                                    \
+    const __m256i all = _mm256_add_epi16(L, (DIFF));      \
+    const __m256i alls = _mm256_packus_epi16(all, all);   \
+    const __m256i res = _mm256_add_epi8(src, alls);       \
+    out[i + (OUT)] = (uint32_t)_mm256_cvtsi256_si32(res); \
+    L = _mm256_unpacklo_epi8(res, zero);                  \
+  } while (0)
+
+#define DO_PRED12_SHIFT(DIFF, LANE)                           \
+  do {                                                        \
+    /* Shift the pre-computed value for the next iteration.*/ \
+    if ((LANE) == 0) (DIFF) = _mm256_srli_si256(DIFF, 8);     \
+    src = _mm256_srli_si256(src, 4);                          \
+  } while (0)
+
+static void PredictorAdd12_AVX2(const uint32_t* in, const uint32_t* upper,
+                                int num_pixels, uint32_t* WEBP_RESTRICT out) {
+  int i;
+  const __m256i zero = _mm256_setzero_si256();
+  const __m256i L8 = _mm256_setr_epi32((int)out[-1], 0, 0, 0, 0, 0, 0, 0);
+  __m256i L = _mm256_unpacklo_epi8(L8, zero);
+  for (i = 0; i + 8 <= num_pixels; i += 8) {
+    // Load 8 pixels at a time.
+    __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
+    const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
+    const __m256i T_lo = _mm256_unpacklo_epi8(T, zero);
+    const __m256i T_hi = _mm256_unpackhi_epi8(T, zero);
+    const __m256i TL = _mm256_loadu_si256((const __m256i*)&upper[i - 1]);
+    const __m256i TL_lo = _mm256_unpacklo_epi8(TL, zero);
+    const __m256i TL_hi = _mm256_unpackhi_epi8(TL, zero);
+    __m256i diff_lo = _mm256_sub_epi16(T_lo, TL_lo);
+    __m256i diff_hi = _mm256_sub_epi16(T_hi, TL_hi);
+    const __m256i diff_lo_bak = diff_lo;
+    const __m256i diff_hi_bak = diff_hi;
+    const __m256i src_bak = src;
+    DO_PRED12(diff_lo, 0);
+    DO_PRED12_SHIFT(diff_lo, 0);
+    DO_PRED12(diff_lo, 1);
+    DO_PRED12_SHIFT(diff_lo, 0);
+    DO_PRED12(diff_hi, 2);
+    DO_PRED12_SHIFT(diff_hi, 0);
+    DO_PRED12(diff_hi, 3);
+    DO_PRED12_SHIFT(diff_hi, 0);
+
+    // Process the upper lane.
+    diff_lo = _mm256_permute2x128_si256(diff_lo_bak, diff_lo_bak, 1);
+    diff_hi = _mm256_permute2x128_si256(diff_hi_bak, diff_hi_bak, 1);
+    src = _mm256_permute2x128_si256(src_bak, src_bak, 1);
+
+    DO_PRED12(diff_lo, 4);
+    DO_PRED12_SHIFT(diff_lo, 0);
+    DO_PRED12(diff_lo, 5);
+    DO_PRED12_SHIFT(diff_lo, 1);
+    DO_PRED12(diff_hi, 6);
+    DO_PRED12_SHIFT(diff_hi, 0);
+    DO_PRED12(diff_hi, 7);
+  }
+  if (i != num_pixels) {
+    VP8LPredictorsAdd_SSE[12](in + i, upper + i, num_pixels - i, out + i);
+  }
+}
+#undef DO_PRED12
+#undef DO_PRED12_SHIFT
+
+// Due to averages with integers, values cannot be accumulated in parallel for
+// predictors 13.
+
+//------------------------------------------------------------------------------
+// Subtract-Green Transform
+
+static void AddGreenToBlueAndRed_AVX2(const uint32_t* const src, int num_pixels,
+                                      uint32_t* dst) {
+  int i;
+  const __m256i kCstShuffle = _mm256_set_epi8(
+      -1, 29, -1, 29, -1, 25, -1, 25, -1, 21, -1, 21, -1, 17, -1, 17, -1, 13,
+      -1, 13, -1, 9, -1, 9, -1, 5, -1, 5, -1, 1, -1, 1);
+  for (i = 0; i + 8 <= num_pixels; i += 8) {
+    const __m256i in = _mm256_loadu_si256((const __m256i*)&src[i]);  // argb
+    const __m256i in_0g0g = _mm256_shuffle_epi8(in, kCstShuffle);    // 0g0g
+    const __m256i out = _mm256_add_epi8(in, in_0g0g);
+    _mm256_storeu_si256((__m256i*)&dst[i], out);
+  }
+  // fallthrough and finish off with SSE.
+  if (i != num_pixels) {
+    VP8LAddGreenToBlueAndRed_SSE(src + i, num_pixels - i, dst + i);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Color Transform
+
+static void TransformColorInverse_AVX2(const VP8LMultipliers* const m,
+                                       const uint32_t* const src,
+                                       int num_pixels, uint32_t* dst) {
+// sign-extended multiplying constants, pre-shifted by 5.
+#define CST(X)  (((int16_t)(m->X << 8)) >> 5)   // sign-extend
+  const __m256i mults_rb =
+      _mm256_set1_epi32((int)((uint32_t)CST(green_to_red) << 16 |
+                              (CST(green_to_blue) & 0xffff)));
+  const __m256i mults_b2 = _mm256_set1_epi32(CST(red_to_blue));
+#undef CST
+  const __m256i mask_ag = _mm256_set1_epi32((int)0xff00ff00);
+  const __m256i perm1 = _mm256_setr_epi8(
+      -1, 1, -1, 1, -1, 5, -1, 5, -1, 9, -1, 9, -1, 13, -1, 13, -1, 17, -1, 17,
+      -1, 21, -1, 21, -1, 25, -1, 25, -1, 29, -1, 29);
+  const __m256i perm2 = _mm256_setr_epi8(
+      -1, 2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1, 18, -1,
+      -1, -1, 22, -1, -1, -1, 26, -1, -1, -1, 30, -1, -1);
+  int i;
+  for (i = 0; i + 8 <= num_pixels; i += 8) {
+    const __m256i A = _mm256_loadu_si256((const __m256i*)(src + i));
+    const __m256i B = _mm256_shuffle_epi8(A, perm1);  // argb -> g0g0
+    const __m256i C = _mm256_mulhi_epi16(B, mults_rb);
+    const __m256i D = _mm256_add_epi8(A, C);
+    const __m256i E = _mm256_shuffle_epi8(D, perm2);
+    const __m256i F = _mm256_mulhi_epi16(E, mults_b2);
+    const __m256i G = _mm256_add_epi8(D, F);
+    const __m256i out = _mm256_blendv_epi8(G, A, mask_ag);
+    _mm256_storeu_si256((__m256i*)&dst[i], out);
+  }
+  // Fall-back to SSE-version for left-overs.
+  if (i != num_pixels) {
+    VP8LTransformColorInverse_SSE(m, src + i, num_pixels - i, dst + i);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Color-space conversion functions
+
+static void ConvertBGRAToRGBA_AVX2(const uint32_t* WEBP_RESTRICT src,
+                                   int num_pixels, uint8_t* WEBP_RESTRICT dst) {
+  const __m256i* in = (const __m256i*)src;
+  __m256i* out = (__m256i*)dst;
+  while (num_pixels >= 8) {
+    const __m256i A = _mm256_loadu_si256(in++);
+    const __m256i B = _mm256_shuffle_epi8(
+        A,
+        _mm256_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2,
+                        15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2));
+    _mm256_storeu_si256(out++, B);
+    num_pixels -= 8;
+  }
+  // left-overs
+  if (num_pixels > 0) {
+    VP8LConvertBGRAToRGBA_SSE((const uint32_t*)in, num_pixels, (uint8_t*)out);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8LDspInitAVX2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitAVX2(void) {
+  VP8LPredictorsAdd[0] = PredictorAdd0_AVX2;
+  VP8LPredictorsAdd[1] = PredictorAdd1_AVX2;
+  VP8LPredictorsAdd[2] = PredictorAdd2_AVX2;
+  VP8LPredictorsAdd[3] = PredictorAdd3_AVX2;
+  VP8LPredictorsAdd[4] = PredictorAdd4_AVX2;
+  VP8LPredictorsAdd[8] = PredictorAdd8_AVX2;
+  VP8LPredictorsAdd[9] = PredictorAdd9_AVX2;
+  VP8LPredictorsAdd[10] = PredictorAdd10_AVX2;
+  VP8LPredictorsAdd[11] = PredictorAdd11_AVX2;
+  VP8LPredictorsAdd[12] = PredictorAdd12_AVX2;
+
+  VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed_AVX2;
+  VP8LTransformColorInverse = TransformColorInverse_AVX2;
+  VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA_AVX2;
+}
+
+#else  // !WEBP_USE_AVX2
+
+WEBP_DSP_INIT_STUB(VP8LDspInitAVX2)
+
+#endif  // WEBP_USE_AVX2
--- a/thirdparty/libwebp/src/dsp/lossless_common.h
+++ b/thirdparty/libwebp/src/dsp/lossless_common.h
@@ -16,6 +16,9 @@
 #ifndef WEBP_DSP_LOSSLESS_COMMON_H_
 #define WEBP_DSP_LOSSLESS_COMMON_H_

+#include <assert.h>
+#include <stddef.h>
+
 #include "src/dsp/cpu.h"
 #include "src/utils/utils.h"
 #include "src/webp/types.h"
@@ -137,8 +140,8 @@ static WEBP_INLINE void VP8LPrefixEncodeNoLUT(int distance, int* const code,

 #define PREFIX_LOOKUP_IDX_MAX   512
 typedef struct {
-  int8_t code_;
-  int8_t extra_bits_;
+  int8_t code;
+  int8_t extra_bits;
 } VP8LPrefixCode;

 // These tables are derived using VP8LPrefixEncodeNoLUT.
@@ -148,8 +151,8 @@ static WEBP_INLINE void VP8LPrefixEncodeBits(int distance, int* const code,
                                             int* const extra_bits) {
  if (distance < PREFIX_LOOKUP_IDX_MAX) {
    const VP8LPrefixCode prefix_code = kPrefixEncodeCode[distance];
-    *code = prefix_code.code_;
-    *extra_bits = prefix_code.extra_bits_;
+    *code = prefix_code.code;
+    *extra_bits = prefix_code.extra_bits;
  } else {
    VP8LPrefixEncodeBitsNoLUT(distance, code, extra_bits);
  }
@@ -160,8 +163,8 @@ static WEBP_INLINE void VP8LPrefixEncode(int distance, int* const code,
                                         int* const extra_bits_value) {
  if (distance < PREFIX_LOOKUP_IDX_MAX) {
    const VP8LPrefixCode prefix_code = kPrefixEncodeCode[distance];
-    *code = prefix_code.code_;
-    *extra_bits = prefix_code.extra_bits_;
+    *code = prefix_code.code;
+    *extra_bits = prefix_code.extra_bits;
    *extra_bits_value = kPrefixEncodeExtraBitsValue[distance];
  } else {
    VP8LPrefixEncodeNoLUT(distance, code, extra_bits, extra_bits_value);
--- a/thirdparty/libwebp/src/dsp/lossless_enc.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc.c
@@ -13,16 +13,19 @@
 //          Jyrki Alakuijala (jyrki@google.com)
 //          Urvang Joshi (urvang@google.com)

-#include "src/dsp/dsp.h"
-
 #include <assert.h>
 #include <math.h>
 #include <stdlib.h>
-#include "src/dec/vp8li_dec.h"
-#include "src/utils/endian_inl_utils.h"
+#include <string.h>
+
+#include "src/dsp/cpu.h"
+#include "src/dsp/dsp.h"
 #include "src/dsp/lossless.h"
 #include "src/dsp/lossless_common.h"
-#include "src/dsp/yuv.h"
+#include "src/enc/histogram_enc.h"
+#include "src/utils/utils.h"
+#include "src/webp/format_constants.h"
+#include "src/webp/types.h"

 // lookup table for small values of log2(int) * (1 << LOG_2_PRECISION_BITS).
 // Obtained in Python with:
@@ -479,10 +482,10 @@ void VP8LTransformColor_C(const VP8LMultipliers* WEBP_RESTRICT const m,
    const int8_t red   = U32ToS8(argb >> 16);
    int new_red = red & 0xff;
    int new_blue = argb & 0xff;
-    new_red -= ColorTransformDelta((int8_t)m->green_to_red_, green);
+    new_red -= ColorTransformDelta((int8_t)m->green_to_red, green);
    new_red &= 0xff;
-    new_blue -= ColorTransformDelta((int8_t)m->green_to_blue_, green);
-    new_blue -= ColorTransformDelta((int8_t)m->red_to_blue_, red);
+    new_blue -= ColorTransformDelta((int8_t)m->green_to_blue, green);
+    new_blue -= ColorTransformDelta((int8_t)m->red_to_blue, red);
    new_blue &= 0xff;
    data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
  }
@@ -580,20 +583,6 @@ static uint32_t ExtraCost_C(const uint32_t* population, int length) {
  return cost;
 }

-static uint32_t ExtraCostCombined_C(const uint32_t* WEBP_RESTRICT X,
-                                    const uint32_t* WEBP_RESTRICT Y,
-                                    int length) {
-  int i;
-  uint32_t cost = X[4] + Y[4] + X[5] + Y[5];
-  assert(length % 2 == 0);
-  for (i = 2; i < length / 2 - 1; ++i) {
-    const int xy0 = X[2 * i + 2] + Y[2 * i + 2];
-    const int xy1 = X[2 * i + 3] + Y[2 * i + 3];
-    cost += i * (xy0 + xy1);
-  }
-  return cost;
-}
-
 //------------------------------------------------------------------------------

 static void AddVector_C(const uint32_t* WEBP_RESTRICT a,
@@ -609,58 +598,6 @@ static void AddVectorEq_C(const uint32_t* WEBP_RESTRICT a,
  for (i = 0; i < size; ++i) out[i] += a[i];
 }

-#define ADD(X, ARG, LEN) do {                                                  \
-  if (a->is_used_[X]) {                                                        \
-    if (b->is_used_[X]) {                                                      \
-      VP8LAddVector(a->ARG, b->ARG, out->ARG, (LEN));                          \
-    } else {                                                                   \
-      memcpy(&out->ARG[0], &a->ARG[0], (LEN) * sizeof(out->ARG[0]));           \
-    }                                                                          \
-  } else if (b->is_used_[X]) {                                                 \
-    memcpy(&out->ARG[0], &b->ARG[0], (LEN) * sizeof(out->ARG[0]));             \
-  } else {                                                                     \
-    memset(&out->ARG[0], 0, (LEN) * sizeof(out->ARG[0]));                      \
-  }                                                                            \
-} while (0)
-
-#define ADD_EQ(X, ARG, LEN) do {                                               \
-  if (a->is_used_[X]) {                                                        \
-    if (out->is_used_[X]) {                                                    \
-      VP8LAddVectorEq(a->ARG, out->ARG, (LEN));                                \
-    } else {                                                                   \
-      memcpy(&out->ARG[0], &a->ARG[0], (LEN) * sizeof(out->ARG[0]));           \
-    }                                                                          \
-  }                                                                            \
-} while (0)
-
-void VP8LHistogramAdd(const VP8LHistogram* WEBP_RESTRICT const a,
-                      const VP8LHistogram* WEBP_RESTRICT const b,
-                      VP8LHistogram* WEBP_RESTRICT const out) {
-  int i;
-  const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
-  assert(a->palette_code_bits_ == b->palette_code_bits_);
-
-  if (b != out) {
-    ADD(0, literal_, literal_size);
-    ADD(1, red_, NUM_LITERAL_CODES);
-    ADD(2, blue_, NUM_LITERAL_CODES);
-    ADD(3, alpha_, NUM_LITERAL_CODES);
-    ADD(4, distance_, NUM_DISTANCE_CODES);
-    for (i = 0; i < 5; ++i) {
-      out->is_used_[i] = (a->is_used_[i] | b->is_used_[i]);
-    }
-  } else {
-    ADD_EQ(0, literal_, literal_size);
-    ADD_EQ(1, red_, NUM_LITERAL_CODES);
-    ADD_EQ(2, blue_, NUM_LITERAL_CODES);
-    ADD_EQ(3, alpha_, NUM_LITERAL_CODES);
-    ADD_EQ(4, distance_, NUM_DISTANCE_CODES);
-    for (i = 0; i < 5; ++i) out->is_used_[i] |= a->is_used_[i];
-  }
-}
-#undef ADD
-#undef ADD_EQ
-
 //------------------------------------------------------------------------------
 // Image transforms.

@@ -710,17 +647,20 @@ GENERATE_PREDICTOR_SUB(13)
 //------------------------------------------------------------------------------

 VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
+VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed_SSE;

 VP8LTransformColorFunc VP8LTransformColor;
+VP8LTransformColorFunc VP8LTransformColor_SSE;

 VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
+VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms_SSE;
 VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
+VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms_SSE;

 VP8LFastLog2SlowFunc VP8LFastLog2Slow;
 VP8LFastSLog2SlowFunc VP8LFastSLog2Slow;

 VP8LCostFunc VP8LExtraCost;
-VP8LCostCombinedFunc VP8LExtraCostCombined;
 VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
 VP8LShannonEntropyFunc VP8LShannonEntropy;

@@ -732,13 +672,16 @@ VP8LAddVectorEqFunc VP8LAddVectorEq;

 VP8LVectorMismatchFunc VP8LVectorMismatch;
 VP8LBundleColorMapFunc VP8LBundleColorMap;
+VP8LBundleColorMapFunc VP8LBundleColorMap_SSE;

 VP8LPredictorAddSubFunc VP8LPredictorsSub[16];
 VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
+VP8LPredictorAddSubFunc VP8LPredictorsSub_SSE[16];

 extern VP8CPUInfo VP8GetCPUInfo;
 extern void VP8LEncDspInitSSE2(void);
 extern void VP8LEncDspInitSSE41(void);
+extern void VP8LEncDspInitAVX2(void);
 extern void VP8LEncDspInitNEON(void);
 extern void VP8LEncDspInitMIPS32(void);
 extern void VP8LEncDspInitMIPSdspR2(void);
@@ -760,7 +703,6 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
  VP8LFastSLog2Slow = FastSLog2Slow_C;

  VP8LExtraCost = ExtraCost_C;
-  VP8LExtraCostCombined = ExtraCostCombined_C;
  VP8LCombinedShannonEntropy = CombinedShannonEntropy_C;
  VP8LShannonEntropy = ShannonEntropy_C;

@@ -815,6 +757,11 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
 #if defined(WEBP_HAVE_SSE41)
      if (VP8GetCPUInfo(kSSE4_1)) {
        VP8LEncDspInitSSE41();
+#if defined(WEBP_HAVE_AVX2)
+        if (VP8GetCPUInfo(kAVX2)) {
+          VP8LEncDspInitAVX2();
+        }
+#endif
      }
 #endif
    }
@@ -850,7 +797,6 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
  assert(VP8LFastLog2Slow != NULL);
  assert(VP8LFastSLog2Slow != NULL);
  assert(VP8LExtraCost != NULL);
-  assert(VP8LExtraCostCombined != NULL);
  assert(VP8LCombinedShannonEntropy != NULL);
  assert(VP8LShannonEntropy != NULL);
  assert(VP8LGetEntropyUnrefined != NULL);
--- a/thirdparty/libwebp/src/dsp/lossless_enc_avx2.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc_avx2.c
@@ -0,0 +1,736 @@
+// Copyright 2025 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// AVX2 variant of methods for lossless encoder
+//
+// Author: Vincent Rabaud (vrabaud@google.com)
+
+#include "src/dsp/dsp.h"
+
+#if defined(WEBP_USE_AVX2)
+#include <emmintrin.h>
+#include <immintrin.h>
+
+#include <assert.h>
+#include <stddef.h>
+
+#include "src/dsp/cpu.h"
+#include "src/dsp/lossless.h"
+#include "src/dsp/lossless_common.h"
+#include "src/utils/utils.h"
+#include "src/webp/format_constants.h"
+#include "src/webp/types.h"
+
+//------------------------------------------------------------------------------
+// Subtract-Green Transform
+
+static void SubtractGreenFromBlueAndRed_AVX2(uint32_t* argb_data,
+                                             int num_pixels) {
+  int i;
+  const __m256i kCstShuffle = _mm256_set_epi8(
+      -1, 29, -1, 29, -1, 25, -1, 25, -1, 21, -1, 21, -1, 17, -1, 17, -1, 13,
+      -1, 13, -1, 9, -1, 9, -1, 5, -1, 5, -1, 1, -1, 1);
+  for (i = 0; i + 8 <= num_pixels; i += 8) {
+    const __m256i in = _mm256_loadu_si256((__m256i*)&argb_data[i]);  // argb
+    const __m256i in_0g0g = _mm256_shuffle_epi8(in, kCstShuffle);
+    const __m256i out = _mm256_sub_epi8(in, in_0g0g);
+    _mm256_storeu_si256((__m256i*)&argb_data[i], out);
+  }
+  // fallthrough and finish off with plain-SSE
+  if (i != num_pixels) {
+    VP8LSubtractGreenFromBlueAndRed_SSE(argb_data + i, num_pixels - i);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Color Transform
+
+// For sign-extended multiplying constants, pre-shifted by 5:
+#define CST_5b(X) (((int16_t)((uint16_t)(X) << 8)) >> 5)
+
+#define MK_CST_16(HI, LO) \
+  _mm256_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
+
+static void TransformColor_AVX2(const VP8LMultipliers* WEBP_RESTRICT const m,
+                                uint32_t* WEBP_RESTRICT argb_data,
+                                int num_pixels) {
+  const __m256i mults_rb =
+      MK_CST_16(CST_5b(m->green_to_red), CST_5b(m->green_to_blue));
+  const __m256i mults_b2 = MK_CST_16(CST_5b(m->red_to_blue), 0);
+  const __m256i mask_rb = _mm256_set1_epi32(0x00ff00ff);  // red-blue masks
+  const __m256i kCstShuffle = _mm256_set_epi8(
+      29, -1, 29, -1, 25, -1, 25, -1, 21, -1, 21, -1, 17, -1, 17, -1, 13, -1,
+      13, -1, 9, -1, 9, -1, 5, -1, 5, -1, 1, -1, 1, -1);
+  int i;
+  for (i = 0; i + 8 <= num_pixels; i += 8) {
+    const __m256i in = _mm256_loadu_si256((__m256i*)&argb_data[i]);  // argb
+    const __m256i A = _mm256_shuffle_epi8(in, kCstShuffle);          // g0g0
+    const __m256i B = _mm256_mulhi_epi16(A, mults_rb);  // x dr  x db1
+    const __m256i C = _mm256_slli_epi16(in, 8);         // r 0   b   0
+    const __m256i D = _mm256_mulhi_epi16(C, mults_b2);  // x db2 0   0
+    const __m256i E = _mm256_srli_epi32(D, 16);         // 0 0   x db2
+    const __m256i F = _mm256_add_epi8(E, B);            // x dr  x  db
+    const __m256i G = _mm256_and_si256(F, mask_rb);     // 0 dr  0  db
+    const __m256i out = _mm256_sub_epi8(in, G);
+    _mm256_storeu_si256((__m256i*)&argb_data[i], out);
+  }
+  // fallthrough and finish off with plain-C
+  if (i != num_pixels) {
+    VP8LTransformColor_SSE(m, argb_data + i, num_pixels - i);
+  }
+}
+
+//------------------------------------------------------------------------------
+#define SPAN 16
+static void CollectColorBlueTransforms_AVX2(const uint32_t* WEBP_RESTRICT argb,
+                                            int stride, int tile_width,
+                                            int tile_height, int green_to_blue,
+                                            int red_to_blue, uint32_t histo[]) {
+  const __m256i mult =
+      MK_CST_16(CST_5b(red_to_blue) + 256, CST_5b(green_to_blue));
+  const __m256i perm = _mm256_setr_epi8(
+      -1, 1, -1, 2, -1, 5, -1, 6, -1, 9, -1, 10, -1, 13, -1, 14, -1, 17, -1, 18,
+      -1, 21, -1, 22, -1, 25, -1, 26, -1, 29, -1, 30);
+  if (tile_width >= 8) {
+    int y, i;
+    for (y = 0; y < tile_height; ++y) {
+      uint8_t values[32];
+      const uint32_t* const src = argb + y * stride;
+      const __m256i A1 = _mm256_loadu_si256((const __m256i*)src);
+      const __m256i B1 = _mm256_shuffle_epi8(A1, perm);
+      const __m256i C1 = _mm256_mulhi_epi16(B1, mult);
+      const __m256i D1 = _mm256_sub_epi16(A1, C1);
+      __m256i E = _mm256_add_epi16(_mm256_srli_epi32(D1, 16), D1);
+      int x;
+      for (x = 8; x + 8 <= tile_width; x += 8) {
+        const __m256i A2 = _mm256_loadu_si256((const __m256i*)(src + x));
+        __m256i B2, C2, D2;
+        _mm256_storeu_si256((__m256i*)values, E);
+        for (i = 0; i < 32; i += 4) ++histo[values[i]];
+        B2 = _mm256_shuffle_epi8(A2, perm);
+        C2 = _mm256_mulhi_epi16(B2, mult);
+        D2 = _mm256_sub_epi16(A2, C2);
+        E = _mm256_add_epi16(_mm256_srli_epi32(D2, 16), D2);
+      }
+      _mm256_storeu_si256((__m256i*)values, E);
+      for (i = 0; i < 32; i += 4) ++histo[values[i]];
+    }
+  }
+  {
+    const int left_over = tile_width & 7;
+    if (left_over > 0) {
+      VP8LCollectColorBlueTransforms_SSE(argb + tile_width - left_over, stride,
+                                         left_over, tile_height, green_to_blue,
+                                         red_to_blue, histo);
+    }
+  }
+}
+
+static void CollectColorRedTransforms_AVX2(const uint32_t* WEBP_RESTRICT argb,
+                                           int stride, int tile_width,
+                                           int tile_height, int green_to_red,
+                                           uint32_t histo[]) {
+  const __m256i mult = MK_CST_16(0, CST_5b(green_to_red));
+  const __m256i mask_g = _mm256_set1_epi32(0x0000ff00);
+  if (tile_width >= 8) {
+    int y, i;
+    for (y = 0; y < tile_height; ++y) {
+      uint8_t values[32];
+      const uint32_t* const src = argb + y * stride;
+      const __m256i A1 = _mm256_loadu_si256((const __m256i*)src);
+      const __m256i B1 = _mm256_and_si256(A1, mask_g);
+      const __m256i C1 = _mm256_madd_epi16(B1, mult);
+      __m256i D = _mm256_sub_epi16(A1, C1);
+      int x;
+      for (x = 8; x + 8 <= tile_width; x += 8) {
+        const __m256i A2 = _mm256_loadu_si256((const __m256i*)(src + x));
+        __m256i B2, C2;
+        _mm256_storeu_si256((__m256i*)values, D);
+        for (i = 2; i < 32; i += 4) ++histo[values[i]];
+        B2 = _mm256_and_si256(A2, mask_g);
+        C2 = _mm256_madd_epi16(B2, mult);
+        D = _mm256_sub_epi16(A2, C2);
+      }
+      _mm256_storeu_si256((__m256i*)values, D);
+      for (i = 2; i < 32; i += 4) ++histo[values[i]];
+    }
+  }
+  {
+    const int left_over = tile_width & 7;
+    if (left_over > 0) {
+      VP8LCollectColorRedTransforms_SSE(argb + tile_width - left_over, stride,
+                                        left_over, tile_height, green_to_red,
+                                        histo);
+    }
+  }
+}
+#undef SPAN
+#undef MK_CST_16
+
+//------------------------------------------------------------------------------
+
+// Note we are adding uint32_t's as *signed* int32's (using _mm256_add_epi32).
+// But that's ok since the histogram values are less than 1<<28 (max picture
+// size).
+static void AddVector_AVX2(const uint32_t* WEBP_RESTRICT a,
+                           const uint32_t* WEBP_RESTRICT b,
+                           uint32_t* WEBP_RESTRICT out, int size) {
+  int i = 0;
+  int aligned_size = size & ~31;
+  // Size is, at minimum, NUM_DISTANCE_CODES (40) and may be as large as
+  // NUM_LITERAL_CODES (256) + NUM_LENGTH_CODES (24) + (0 or a non-zero power of
+  // 2). See the usage in VP8LHistogramAdd().
+  assert(size >= 32);
+  assert(size % 2 == 0);
+
+  do {
+    const __m256i a0 = _mm256_loadu_si256((const __m256i*)&a[i + 0]);
+    const __m256i a1 = _mm256_loadu_si256((const __m256i*)&a[i + 8]);
+    const __m256i a2 = _mm256_loadu_si256((const __m256i*)&a[i + 16]);
+    const __m256i a3 = _mm256_loadu_si256((const __m256i*)&a[i + 24]);
+    const __m256i b0 = _mm256_loadu_si256((const __m256i*)&b[i + 0]);
+    const __m256i b1 = _mm256_loadu_si256((const __m256i*)&b[i + 8]);
+    const __m256i b2 = _mm256_loadu_si256((const __m256i*)&b[i + 16]);
+    const __m256i b3 = _mm256_loadu_si256((const __m256i*)&b[i + 24]);
+    _mm256_storeu_si256((__m256i*)&out[i + 0], _mm256_add_epi32(a0, b0));
+    _mm256_storeu_si256((__m256i*)&out[i + 8], _mm256_add_epi32(a1, b1));
+    _mm256_storeu_si256((__m256i*)&out[i + 16], _mm256_add_epi32(a2, b2));
+    _mm256_storeu_si256((__m256i*)&out[i + 24], _mm256_add_epi32(a3, b3));
+    i += 32;
+  } while (i != aligned_size);
+
+  if ((size & 16) != 0) {
+    const __m256i a0 = _mm256_loadu_si256((const __m256i*)&a[i + 0]);
+    const __m256i a1 = _mm256_loadu_si256((const __m256i*)&a[i + 8]);
+    const __m256i b0 = _mm256_loadu_si256((const __m256i*)&b[i + 0]);
+    const __m256i b1 = _mm256_loadu_si256((const __m256i*)&b[i + 8]);
+    _mm256_storeu_si256((__m256i*)&out[i + 0], _mm256_add_epi32(a0, b0));
+    _mm256_storeu_si256((__m256i*)&out[i + 8], _mm256_add_epi32(a1, b1));
+    i += 16;
+  }
+
+  size &= 15;
+  if (size == 8) {
+    const __m256i a0 = _mm256_loadu_si256((const __m256i*)&a[i]);
+    const __m256i b0 = _mm256_loadu_si256((const __m256i*)&b[i]);
+    _mm256_storeu_si256((__m256i*)&out[i], _mm256_add_epi32(a0, b0));
+  } else {
+    for (; size--; ++i) {
+      out[i] = a[i] + b[i];
+    }
+  }
+}
+
+static void AddVectorEq_AVX2(const uint32_t* WEBP_RESTRICT a,
+                             uint32_t* WEBP_RESTRICT out, int size) {
+  int i = 0;
+  int aligned_size = size & ~31;
+  // Size is, at minimum, NUM_DISTANCE_CODES (40) and may be as large as
+  // NUM_LITERAL_CODES (256) + NUM_LENGTH_CODES (24) + (0 or a non-zero power of
+  // 2). See the usage in VP8LHistogramAdd().
+  assert(size >= 32);
+  assert(size % 2 == 0);
+
+  do {
+    const __m256i a0 = _mm256_loadu_si256((const __m256i*)&a[i + 0]);
+    const __m256i a1 = _mm256_loadu_si256((const __m256i*)&a[i + 8]);
+    const __m256i a2 = _mm256_loadu_si256((const __m256i*)&a[i + 16]);
+    const __m256i a3 = _mm256_loadu_si256((const __m256i*)&a[i + 24]);
+    const __m256i b0 = _mm256_loadu_si256((const __m256i*)&out[i + 0]);
+    const __m256i b1 = _mm256_loadu_si256((const __m256i*)&out[i + 8]);
+    const __m256i b2 = _mm256_loadu_si256((const __m256i*)&out[i + 16]);
+    const __m256i b3 = _mm256_loadu_si256((const __m256i*)&out[i + 24]);
+    _mm256_storeu_si256((__m256i*)&out[i + 0], _mm256_add_epi32(a0, b0));
+    _mm256_storeu_si256((__m256i*)&out[i + 8], _mm256_add_epi32(a1, b1));
+    _mm256_storeu_si256((__m256i*)&out[i + 16], _mm256_add_epi32(a2, b2));
+    _mm256_storeu_si256((__m256i*)&out[i + 24], _mm256_add_epi32(a3, b3));
+    i += 32;
+  } while (i != aligned_size);
+
+  if ((size & 16) != 0) {
+    const __m256i a0 = _mm256_loadu_si256((const __m256i*)&a[i + 0]);
+    const __m256i a1 = _mm256_loadu_si256((const __m256i*)&a[i + 8]);
+    const __m256i b0 = _mm256_loadu_si256((const __m256i*)&out[i + 0]);
+    const __m256i b1 = _mm256_loadu_si256((const __m256i*)&out[i + 8]);
+    _mm256_storeu_si256((__m256i*)&out[i + 0], _mm256_add_epi32(a0, b0));
+    _mm256_storeu_si256((__m256i*)&out[i + 8], _mm256_add_epi32(a1, b1));
+    i += 16;
+  }
+
+  size &= 15;
+  if (size == 8) {
+    const __m256i a0 = _mm256_loadu_si256((const __m256i*)&a[i]);
+    const __m256i b0 = _mm256_loadu_si256((const __m256i*)&out[i]);
+    _mm256_storeu_si256((__m256i*)&out[i], _mm256_add_epi32(a0, b0));
+  } else {
+    for (; size--; ++i) {
+      out[i] += a[i];
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// Entropy
+
+#if !defined(WEBP_HAVE_SLOW_CLZ_CTZ)
+
+static uint64_t CombinedShannonEntropy_AVX2(const uint32_t X[256],
+                                            const uint32_t Y[256]) {
+  int i;
+  uint64_t retval = 0;
+  uint32_t sumX = 0, sumXY = 0;
+  const __m256i zero = _mm256_setzero_si256();
+
+  for (i = 0; i < 256; i += 32) {
+    const __m256i x0 = _mm256_loadu_si256((const __m256i*)(X + i + 0));
+    const __m256i y0 = _mm256_loadu_si256((const __m256i*)(Y + i + 0));
+    const __m256i x1 = _mm256_loadu_si256((const __m256i*)(X + i + 8));
+    const __m256i y1 = _mm256_loadu_si256((const __m256i*)(Y + i + 8));
+    const __m256i x2 = _mm256_loadu_si256((const __m256i*)(X + i + 16));
+    const __m256i y2 = _mm256_loadu_si256((const __m256i*)(Y + i + 16));
+    const __m256i x3 = _mm256_loadu_si256((const __m256i*)(X + i + 24));
+    const __m256i y3 = _mm256_loadu_si256((const __m256i*)(Y + i + 24));
+    const __m256i x4 = _mm256_packs_epi16(_mm256_packs_epi32(x0, x1),
+                                          _mm256_packs_epi32(x2, x3));
+    const __m256i y4 = _mm256_packs_epi16(_mm256_packs_epi32(y0, y1),
+                                          _mm256_packs_epi32(y2, y3));
+    // Packed pixels are actually in order: ... 17 16 12 11 10 9 8 3 2 1 0
+    const __m256i x5 = _mm256_permutevar8x32_epi32(
+        x4, _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0));
+    const __m256i y5 = _mm256_permutevar8x32_epi32(
+        y4, _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0));
+    const uint32_t mx =
+        (uint32_t)_mm256_movemask_epi8(_mm256_cmpgt_epi8(x5, zero));
+    uint32_t my =
+        (uint32_t)_mm256_movemask_epi8(_mm256_cmpgt_epi8(y5, zero)) | mx;
+    while (my) {
+      const int32_t j = BitsCtz(my);
+      uint32_t xy;
+      if ((mx >> j) & 1) {
+        const int x = X[i + j];
+        sumXY += x;
+        retval += VP8LFastSLog2(x);
+      }
+      xy = X[i + j] + Y[i + j];
+      sumX += xy;
+      retval += VP8LFastSLog2(xy);
+      my &= my - 1;
+    }
+  }
+  retval = VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY) - retval;
+  return retval;
+}
+
+#else
+
+#define DONT_USE_COMBINED_SHANNON_ENTROPY_SSE2_FUNC   // won't be faster
+
+#endif
+
+//------------------------------------------------------------------------------
+
+static int VectorMismatch_AVX2(const uint32_t* const array1,
+                               const uint32_t* const array2, int length) {
+  int match_len;
+
+  if (length >= 24) {
+    __m256i A0 = _mm256_loadu_si256((const __m256i*)&array1[0]);
+    __m256i A1 = _mm256_loadu_si256((const __m256i*)&array2[0]);
+    match_len = 0;
+    do {
+      // Loop unrolling and early load both provide a speedup of 10% for the
+      // current function. Also, max_limit can be MAX_LENGTH=4096 at most.
+      const __m256i cmpA = _mm256_cmpeq_epi32(A0, A1);
+      const __m256i B0 =
+          _mm256_loadu_si256((const __m256i*)&array1[match_len + 8]);
+      const __m256i B1 =
+          _mm256_loadu_si256((const __m256i*)&array2[match_len + 8]);
+      if ((uint32_t)_mm256_movemask_epi8(cmpA) != 0xffffffff) break;
+      match_len += 8;
+
+      {
+        const __m256i cmpB = _mm256_cmpeq_epi32(B0, B1);
+        A0 = _mm256_loadu_si256((const __m256i*)&array1[match_len + 8]);
+        A1 = _mm256_loadu_si256((const __m256i*)&array2[match_len + 8]);
+        if ((uint32_t)_mm256_movemask_epi8(cmpB) != 0xffffffff) break;
+        match_len += 8;
+      }
+    } while (match_len + 24 < length);
+  } else {
+    match_len = 0;
+    // Unroll the potential first two loops.
+    if (length >= 8 &&
+        (uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi32(
+            _mm256_loadu_si256((const __m256i*)&array1[0]),
+            _mm256_loadu_si256((const __m256i*)&array2[0]))) == 0xffffffff) {
+      match_len = 8;
+      if (length >= 16 &&
+          (uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi32(
+              _mm256_loadu_si256((const __m256i*)&array1[8]),
+              _mm256_loadu_si256((const __m256i*)&array2[8]))) == 0xffffffff) {
+        match_len = 16;
+      }
+    }
+  }
+
+  while (match_len < length && array1[match_len] == array2[match_len]) {
+    ++match_len;
+  }
+  return match_len;
+}
+
+// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
+static void BundleColorMap_AVX2(const uint8_t* WEBP_RESTRICT const row,
+                                int width, int xbits,
+                                uint32_t* WEBP_RESTRICT dst) {
+  int x = 0;
+  assert(xbits >= 0);
+  assert(xbits <= 3);
+  switch (xbits) {
+    case 0: {
+      const __m256i ff = _mm256_set1_epi16((short)0xff00);
+      const __m256i zero = _mm256_setzero_si256();
+      // Store 0xff000000 | (row[x] << 8).
+      for (x = 0; x + 32 <= width; x += 32, dst += 32) {
+        const __m256i in = _mm256_loadu_si256((const __m256i*)&row[x]);
+        const __m256i in_lo = _mm256_unpacklo_epi8(zero, in);
+        const __m256i dst0 = _mm256_unpacklo_epi16(in_lo, ff);
+        const __m256i dst1 = _mm256_unpackhi_epi16(in_lo, ff);
+        const __m256i in_hi = _mm256_unpackhi_epi8(zero, in);
+        const __m256i dst2 = _mm256_unpacklo_epi16(in_hi, ff);
+        const __m256i dst3 = _mm256_unpackhi_epi16(in_hi, ff);
+        _mm256_storeu2_m128i((__m128i*)&dst[16], (__m128i*)&dst[0], dst0);
+        _mm256_storeu2_m128i((__m128i*)&dst[20], (__m128i*)&dst[4], dst1);
+        _mm256_storeu2_m128i((__m128i*)&dst[24], (__m128i*)&dst[8], dst2);
+        _mm256_storeu2_m128i((__m128i*)&dst[28], (__m128i*)&dst[12], dst3);
+      }
+      break;
+    }
+    case 1: {
+      const __m256i ff = _mm256_set1_epi16((short)0xff00);
+      const __m256i mul = _mm256_set1_epi16(0x110);
+      for (x = 0; x + 32 <= width; x += 32, dst += 16) {
+        // 0a0b | (where a/b are 4 bits).
+        const __m256i in = _mm256_loadu_si256((const __m256i*)&row[x]);
+        const __m256i tmp = _mm256_mullo_epi16(in, mul);  // aba0
+        const __m256i pack = _mm256_and_si256(tmp, ff);   // ab00
+        const __m256i dst0 = _mm256_unpacklo_epi16(pack, ff);
+        const __m256i dst1 = _mm256_unpackhi_epi16(pack, ff);
+        _mm256_storeu2_m128i((__m128i*)&dst[8], (__m128i*)&dst[0], dst0);
+        _mm256_storeu2_m128i((__m128i*)&dst[12], (__m128i*)&dst[4], dst1);
+      }
+      break;
+    }
+    case 2: {
+      const __m256i mask_or = _mm256_set1_epi32((int)0xff000000);
+      const __m256i mul_cst = _mm256_set1_epi16(0x0104);
+      const __m256i mask_mul = _mm256_set1_epi16(0x0f00);
+      for (x = 0; x + 32 <= width; x += 32, dst += 8) {
+        // 000a000b000c000d | (where a/b/c/d are 2 bits).
+        const __m256i in = _mm256_loadu_si256((const __m256i*)&row[x]);
+        const __m256i mul =
+            _mm256_mullo_epi16(in, mul_cst);  // 00ab00b000cd00d0
+        const __m256i tmp =
+            _mm256_and_si256(mul, mask_mul);               //  00ab000000cd0000
+        const __m256i shift = _mm256_srli_epi32(tmp, 12);  // 00000000ab000000
+        const __m256i pack = _mm256_or_si256(shift, tmp);  // 00000000abcd0000
+        // Convert to 0xff00**00.
+        const __m256i res = _mm256_or_si256(pack, mask_or);
+        _mm256_storeu_si256((__m256i*)dst, res);
+      }
+      break;
+    }
+    default: {
+      assert(xbits == 3);
+      for (x = 0; x + 32 <= width; x += 32, dst += 4) {
+        // 0000000a00000000b... | (where a/b are 1 bit).
+        const __m256i in = _mm256_loadu_si256((const __m256i*)&row[x]);
+        const __m256i shift = _mm256_slli_epi64(in, 7);
+        const uint32_t move = _mm256_movemask_epi8(shift);
+        dst[0] = 0xff000000 | ((move & 0xff) << 8);
+        dst[1] = 0xff000000 | (move & 0xff00);
+        dst[2] = 0xff000000 | ((move & 0xff0000) >> 8);
+        dst[3] = 0xff000000 | ((move & 0xff000000) >> 16);
+      }
+      break;
+    }
+  }
+  if (x != width) {
+    VP8LBundleColorMap_SSE(row + x, width - x, xbits, dst);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Batch version of Predictor Transform subtraction
+
+static WEBP_INLINE void Average2_m256i(const __m256i* const a0,
+                                       const __m256i* const a1,
+                                       __m256i* const avg) {
+  // (a + b) >> 1 = ((a + b + 1) >> 1) - ((a ^ b) & 1)
+  const __m256i ones = _mm256_set1_epi8(1);
+  const __m256i avg1 = _mm256_avg_epu8(*a0, *a1);
+  const __m256i one = _mm256_and_si256(_mm256_xor_si256(*a0, *a1), ones);
+  *avg = _mm256_sub_epi8(avg1, one);
+}
+
+// Predictor0: ARGB_BLACK.
+static void PredictorSub0_AVX2(const uint32_t* in, const uint32_t* upper,
+                               int num_pixels, uint32_t* WEBP_RESTRICT out) {
+  int i;
+  const __m256i black = _mm256_set1_epi32((int)ARGB_BLACK);
+  for (i = 0; i + 8 <= num_pixels; i += 8) {
+    const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
+    const __m256i res = _mm256_sub_epi8(src, black);
+    _mm256_storeu_si256((__m256i*)&out[i], res);
+  }
+  if (i != num_pixels) {
+    VP8LPredictorsSub_SSE[0](in + i, NULL, num_pixels - i, out + i);
+  }
+  (void)upper;
+}
+
+#define GENERATE_PREDICTOR_1(X, IN)                                          \
+  static void PredictorSub##X##_AVX2(                                        \
+      const uint32_t* const in, const uint32_t* const upper, int num_pixels, \
+      uint32_t* WEBP_RESTRICT const out) {                                   \
+    int i;                                                                   \
+    for (i = 0; i + 8 <= num_pixels; i += 8) {                               \
+      const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);        \
+      const __m256i pred = _mm256_loadu_si256((const __m256i*)&(IN));        \
+      const __m256i res = _mm256_sub_epi8(src, pred);                        \
+      _mm256_storeu_si256((__m256i*)&out[i], res);                           \
+    }                                                                        \
+    if (i != num_pixels) {                                                   \
+      VP8LPredictorsSub_SSE[(X)](in + i, WEBP_OFFSET_PTR(upper, i),          \
+                                 num_pixels - i, out + i);                   \
+    }                                                                        \
+  }
+
+GENERATE_PREDICTOR_1(1, in[i - 1])       // Predictor1: L
+GENERATE_PREDICTOR_1(2, upper[i])        // Predictor2: T
+GENERATE_PREDICTOR_1(3, upper[i + 1])    // Predictor3: TR
+GENERATE_PREDICTOR_1(4, upper[i - 1])    // Predictor4: TL
+#undef GENERATE_PREDICTOR_1
+
+// Predictor5: avg2(avg2(L, TR), T)
+static void PredictorSub5_AVX2(const uint32_t* in, const uint32_t* upper,
+                               int num_pixels, uint32_t* WEBP_RESTRICT out) {
+  int i;
+  for (i = 0; i + 8 <= num_pixels; i += 8) {
+    const __m256i L = _mm256_loadu_si256((const __m256i*)&in[i - 1]);
+    const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
+    const __m256i TR = _mm256_loadu_si256((const __m256i*)&upper[i + 1]);
+    const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
+    __m256i avg, pred, res;
+    Average2_m256i(&L, &TR, &avg);
+    Average2_m256i(&avg, &T, &pred);
+    res = _mm256_sub_epi8(src, pred);
+    _mm256_storeu_si256((__m256i*)&out[i], res);
+  }
+  if (i != num_pixels) {
+    VP8LPredictorsSub_SSE[5](in + i, upper + i, num_pixels - i, out + i);
+  }
+}
+
+#define GENERATE_PREDICTOR_2(X, A, B)                                         \
+  static void PredictorSub##X##_AVX2(const uint32_t* in,                      \
+                                     const uint32_t* upper, int num_pixels,   \
+                                     uint32_t* WEBP_RESTRICT out) {           \
+    int i;                                                                    \
+    for (i = 0; i + 8 <= num_pixels; i += 8) {                                \
+      const __m256i tA = _mm256_loadu_si256((const __m256i*)&(A));            \
+      const __m256i tB = _mm256_loadu_si256((const __m256i*)&(B));            \
+      const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);         \
+      __m256i pred, res;                                                      \
+      Average2_m256i(&tA, &tB, &pred);                                        \
+      res = _mm256_sub_epi8(src, pred);                                       \
+      _mm256_storeu_si256((__m256i*)&out[i], res);                            \
+    }                                                                         \
+    if (i != num_pixels) {                                                    \
+      VP8LPredictorsSub_SSE[(X)](in + i, upper + i, num_pixels - i, out + i); \
+    }                                                                         \
+  }
+
+GENERATE_PREDICTOR_2(6, in[i - 1], upper[i - 1])   // Predictor6: avg(L, TL)
+GENERATE_PREDICTOR_2(7, in[i - 1], upper[i])       // Predictor7: avg(L, T)
+GENERATE_PREDICTOR_2(8, upper[i - 1], upper[i])    // Predictor8: avg(TL, T)
+GENERATE_PREDICTOR_2(9, upper[i], upper[i + 1])    // Predictor9: average(T, TR)
+#undef GENERATE_PREDICTOR_2
+
+// Predictor10: avg(avg(L,TL), avg(T, TR)).
+static void PredictorSub10_AVX2(const uint32_t* in, const uint32_t* upper,
+                                int num_pixels, uint32_t* WEBP_RESTRICT out) {
+  int i;
+  for (i = 0; i + 8 <= num_pixels; i += 8) {
+    const __m256i L = _mm256_loadu_si256((const __m256i*)&in[i - 1]);
+    const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
+    const __m256i TL = _mm256_loadu_si256((const __m256i*)&upper[i - 1]);
+    const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
+    const __m256i TR = _mm256_loadu_si256((const __m256i*)&upper[i + 1]);
+    __m256i avgTTR, avgLTL, avg, res;
+    Average2_m256i(&T, &TR, &avgTTR);
+    Average2_m256i(&L, &TL, &avgLTL);
+    Average2_m256i(&avgTTR, &avgLTL, &avg);
+    res = _mm256_sub_epi8(src, avg);
+    _mm256_storeu_si256((__m256i*)&out[i], res);
+  }
+  if (i != num_pixels) {
+    VP8LPredictorsSub_SSE[10](in + i, upper + i, num_pixels - i, out + i);
+  }
+}
+
+// Predictor11: select.
+static void GetSumAbsDiff32_AVX2(const __m256i* const A, const __m256i* const B,
+                                 __m256i* const out) {
+  // We can unpack with any value on the upper 32 bits, provided it's the same
+  // on both operands (to that their sum of abs diff is zero). Here we use *A.
+  const __m256i A_lo = _mm256_unpacklo_epi32(*A, *A);
+  const __m256i B_lo = _mm256_unpacklo_epi32(*B, *A);
+  const __m256i A_hi = _mm256_unpackhi_epi32(*A, *A);
+  const __m256i B_hi = _mm256_unpackhi_epi32(*B, *A);
+  const __m256i s_lo = _mm256_sad_epu8(A_lo, B_lo);
+  const __m256i s_hi = _mm256_sad_epu8(A_hi, B_hi);
+  *out = _mm256_packs_epi32(s_lo, s_hi);
+}
+
+static void PredictorSub11_AVX2(const uint32_t* in, const uint32_t* upper,
+                                int num_pixels, uint32_t* WEBP_RESTRICT out) {
+  int i;
+  for (i = 0; i + 8 <= num_pixels; i += 8) {
+    const __m256i L = _mm256_loadu_si256((const __m256i*)&in[i - 1]);
+    const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
+    const __m256i TL = _mm256_loadu_si256((const __m256i*)&upper[i - 1]);
+    const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
+    __m256i pa, pb;
+    GetSumAbsDiff32_AVX2(&T, &TL, &pa);  // pa = sum |T-TL|
+    GetSumAbsDiff32_AVX2(&L, &TL, &pb);  // pb = sum |L-TL|
+    {
+      const __m256i mask = _mm256_cmpgt_epi32(pb, pa);
+      const __m256i A = _mm256_and_si256(mask, L);
+      const __m256i B = _mm256_andnot_si256(mask, T);
+      const __m256i pred = _mm256_or_si256(A, B);  // pred = (L > T)? L : T
+      const __m256i res = _mm256_sub_epi8(src, pred);
+      _mm256_storeu_si256((__m256i*)&out[i], res);
+    }
+  }
+  if (i != num_pixels) {
+    VP8LPredictorsSub_SSE[11](in + i, upper + i, num_pixels - i, out + i);
+  }
+}
+
+// Predictor12: ClampedSubSubtractFull.
+static void PredictorSub12_AVX2(const uint32_t* in, const uint32_t* upper,
+                                int num_pixels, uint32_t* WEBP_RESTRICT out) {
+  int i;
+  const __m256i zero = _mm256_setzero_si256();
+  for (i = 0; i + 8 <= num_pixels; i += 8) {
+    const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
+    const __m256i L = _mm256_loadu_si256((const __m256i*)&in[i - 1]);
+    const __m256i L_lo = _mm256_unpacklo_epi8(L, zero);
+    const __m256i L_hi = _mm256_unpackhi_epi8(L, zero);
+    const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
+    const __m256i T_lo = _mm256_unpacklo_epi8(T, zero);
+    const __m256i T_hi = _mm256_unpackhi_epi8(T, zero);
+    const __m256i TL = _mm256_loadu_si256((const __m256i*)&upper[i - 1]);
+    const __m256i TL_lo = _mm256_unpacklo_epi8(TL, zero);
+    const __m256i TL_hi = _mm256_unpackhi_epi8(TL, zero);
+    const __m256i diff_lo = _mm256_sub_epi16(T_lo, TL_lo);
+    const __m256i diff_hi = _mm256_sub_epi16(T_hi, TL_hi);
+    const __m256i pred_lo = _mm256_add_epi16(L_lo, diff_lo);
+    const __m256i pred_hi = _mm256_add_epi16(L_hi, diff_hi);
+    const __m256i pred = _mm256_packus_epi16(pred_lo, pred_hi);
+    const __m256i res = _mm256_sub_epi8(src, pred);
+    _mm256_storeu_si256((__m256i*)&out[i], res);
+  }
+  if (i != num_pixels) {
+    VP8LPredictorsSub_SSE[12](in + i, upper + i, num_pixels - i, out + i);
+  }
+}
+
+// Predictors13: ClampedAddSubtractHalf
+static void PredictorSub13_AVX2(const uint32_t* in, const uint32_t* upper,
+                                int num_pixels, uint32_t* WEBP_RESTRICT out) {
+  int i;
+  const __m256i zero = _mm256_setzero_si256();
+  for (i = 0; i + 8 <= num_pixels; i += 8) {
+    const __m256i L = _mm256_loadu_si256((const __m256i*)&in[i - 1]);
+    const __m256i src = _mm256_loadu_si256((const __m256i*)&in[i]);
+    const __m256i T = _mm256_loadu_si256((const __m256i*)&upper[i]);
+    const __m256i TL = _mm256_loadu_si256((const __m256i*)&upper[i - 1]);
+    // lo.
+    const __m256i L_lo = _mm256_unpacklo_epi8(L, zero);
+    const __m256i T_lo = _mm256_unpacklo_epi8(T, zero);
+    const __m256i TL_lo = _mm256_unpacklo_epi8(TL, zero);
+    const __m256i sum_lo = _mm256_add_epi16(T_lo, L_lo);
+    const __m256i avg_lo = _mm256_srli_epi16(sum_lo, 1);
+    const __m256i A1_lo = _mm256_sub_epi16(avg_lo, TL_lo);
+    const __m256i bit_fix_lo = _mm256_cmpgt_epi16(TL_lo, avg_lo);
+    const __m256i A2_lo = _mm256_sub_epi16(A1_lo, bit_fix_lo);
+    const __m256i A3_lo = _mm256_srai_epi16(A2_lo, 1);
+    const __m256i A4_lo = _mm256_add_epi16(avg_lo, A3_lo);
+    // hi.
+    const __m256i L_hi = _mm256_unpackhi_epi8(L, zero);
+    const __m256i T_hi = _mm256_unpackhi_epi8(T, zero);
+    const __m256i TL_hi = _mm256_unpackhi_epi8(TL, zero);
+    const __m256i sum_hi = _mm256_add_epi16(T_hi, L_hi);
+    const __m256i avg_hi = _mm256_srli_epi16(sum_hi, 1);
+    const __m256i A1_hi = _mm256_sub_epi16(avg_hi, TL_hi);
+    const __m256i bit_fix_hi = _mm256_cmpgt_epi16(TL_hi, avg_hi);
+    const __m256i A2_hi = _mm256_sub_epi16(A1_hi, bit_fix_hi);
+    const __m256i A3_hi = _mm256_srai_epi16(A2_hi, 1);
+    const __m256i A4_hi = _mm256_add_epi16(avg_hi, A3_hi);
+
+    const __m256i pred = _mm256_packus_epi16(A4_lo, A4_hi);
+    const __m256i res = _mm256_sub_epi8(src, pred);
+    _mm256_storeu_si256((__m256i*)&out[i], res);
+  }
+  if (i != num_pixels) {
+    VP8LPredictorsSub_SSE[13](in + i, upper + i, num_pixels - i, out + i);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8LEncDspInitAVX2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitAVX2(void) {
+  VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed_AVX2;
+  VP8LTransformColor = TransformColor_AVX2;
+  VP8LCollectColorBlueTransforms = CollectColorBlueTransforms_AVX2;
+  VP8LCollectColorRedTransforms = CollectColorRedTransforms_AVX2;
+  VP8LAddVector = AddVector_AVX2;
+  VP8LAddVectorEq = AddVectorEq_AVX2;
+  VP8LCombinedShannonEntropy = CombinedShannonEntropy_AVX2;
+  VP8LVectorMismatch = VectorMismatch_AVX2;
+  VP8LBundleColorMap = BundleColorMap_AVX2;
+
+  VP8LPredictorsSub[0] = PredictorSub0_AVX2;
+  VP8LPredictorsSub[1] = PredictorSub1_AVX2;
+  VP8LPredictorsSub[2] = PredictorSub2_AVX2;
+  VP8LPredictorsSub[3] = PredictorSub3_AVX2;
+  VP8LPredictorsSub[4] = PredictorSub4_AVX2;
+  VP8LPredictorsSub[5] = PredictorSub5_AVX2;
+  VP8LPredictorsSub[6] = PredictorSub6_AVX2;
+  VP8LPredictorsSub[7] = PredictorSub7_AVX2;
+  VP8LPredictorsSub[8] = PredictorSub8_AVX2;
+  VP8LPredictorsSub[9] = PredictorSub9_AVX2;
+  VP8LPredictorsSub[10] = PredictorSub10_AVX2;
+  VP8LPredictorsSub[11] = PredictorSub11_AVX2;
+  VP8LPredictorsSub[12] = PredictorSub12_AVX2;
+  VP8LPredictorsSub[13] = PredictorSub13_AVX2;
+  VP8LPredictorsSub[14] = PredictorSub0_AVX2;  // <- padding security sentinels
+  VP8LPredictorsSub[15] = PredictorSub0_AVX2;
+}
+
+#else  // !WEBP_USE_AVX2
+
+WEBP_DSP_INIT_STUB(VP8LEncDspInitAVX2)
+
+#endif  // WEBP_USE_AVX2
--- a/thirdparty/libwebp/src/dsp/lossless_enc_mips32.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc_mips32.c
@@ -133,60 +133,6 @@ static uint32_t ExtraCost_MIPS32(const uint32_t* const population, int length) {
  return ((int64_t)temp0 << 32 | temp1);
 }

-// C version of this function:
-//   int i = 0;
-//   int64_t cost = 0;
-//   const uint32_t* pX = &X[4];
-//   const uint32_t* pY = &Y[4];
-//   const uint32_t* LoopEnd = &X[length];
-//   while (pX != LoopEnd) {
-//     const uint32_t xy0 = *pX + *pY;
-//     const uint32_t xy1 = *(pX + 1) + *(pY + 1);
-//     ++i;
-//     cost += i * xy0;
-//     cost += i * xy1;
-//     pX += 2;
-//     pY += 2;
-//   }
-//   return cost;
-static uint32_t ExtraCostCombined_MIPS32(const uint32_t* WEBP_RESTRICT const X,
-                                         const uint32_t* WEBP_RESTRICT const Y,
-                                         int length) {
-  int i, temp0, temp1, temp2, temp3;
-  const uint32_t* pX = &X[4];
-  const uint32_t* pY = &Y[4];
-  const uint32_t* const LoopEnd = &X[length];
-
-  __asm__ volatile(
-    "mult   $zero,    $zero                  \n\t"
-    "xor    %[i],     %[i],       %[i]       \n\t"
-    "beq    %[pX],    %[LoopEnd], 2f         \n\t"
-  "1:                                        \n\t"
-    "lw     %[temp0], 0(%[pX])               \n\t"
-    "lw     %[temp1], 0(%[pY])               \n\t"
-    "lw     %[temp2], 4(%[pX])               \n\t"
-    "lw     %[temp3], 4(%[pY])               \n\t"
-    "addiu  %[i],     %[i],       1          \n\t"
-    "addu   %[temp0], %[temp0],   %[temp1]   \n\t"
-    "addu   %[temp2], %[temp2],   %[temp3]   \n\t"
-    "addiu  %[pX],    %[pX],      8          \n\t"
-    "addiu  %[pY],    %[pY],      8          \n\t"
-    "madd   %[i],     %[temp0]               \n\t"
-    "madd   %[i],     %[temp2]               \n\t"
-    "bne    %[pX],    %[LoopEnd], 1b         \n\t"
-  "2:                                        \n\t"
-    "mfhi   %[temp0]                         \n\t"
-    "mflo   %[temp1]                         \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
-      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
-      [i]"=&r"(i), [pX]"+r"(pX), [pY]"+r"(pY)
-    : [LoopEnd]"r"(LoopEnd)
-    : "memory", "hi", "lo"
-  );
-
-  return ((int64_t)temp0 << 32 | temp1);
-}
-
 #define HUFFMAN_COST_PASS                                 \
  __asm__ volatile(                                       \
    "sll   %[temp1],  %[temp0],    3           \n\t"      \
@@ -299,7 +245,7 @@ static void GetCombinedEntropyUnrefined_MIPS32(
 // A..D - offsets
 // E - temp variable to tell macro
 //     if pointer should be incremented
-// literal_ and successive histograms could be unaligned
+// 'literal' and successive histograms could be unaligned
 // so we must use ulw and usw
 #define ADD_TO_OUT(A, B, C, D, E, P0, P1, P2)           \
    "ulw    %[temp0], " #A "(%[" #P0 "])    \n\t"       \
@@ -388,7 +334,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitMIPS32(void) {
  VP8LFastSLog2Slow = FastSLog2Slow_MIPS32;
  VP8LFastLog2Slow = FastLog2Slow_MIPS32;
  VP8LExtraCost = ExtraCost_MIPS32;
-  VP8LExtraCostCombined = ExtraCostCombined_MIPS32;
  VP8LGetEntropyUnrefined = GetEntropyUnrefined_MIPS32;
  VP8LGetCombinedEntropyUnrefined = GetCombinedEntropyUnrefined_MIPS32;
  VP8LAddVector = AddVector_MIPS32;
--- a/thirdparty/libwebp/src/dsp/lossless_enc_mips_dsp_r2.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc_mips_dsp_r2.c
@@ -83,9 +83,9 @@ static void TransformColor_MIPSdspR2(
    int num_pixels) {
  int temp0, temp1, temp2, temp3, temp4, temp5;
  uint32_t argb, argb1, new_red, new_red1;
-  const uint32_t G_to_R = m->green_to_red_;
-  const uint32_t G_to_B = m->green_to_blue_;
-  const uint32_t R_to_B = m->red_to_blue_;
+  const uint32_t G_to_R = m->green_to_red;
+  const uint32_t G_to_B = m->green_to_blue;
+  const uint32_t R_to_B = m->red_to_blue;
  uint32_t* const p_loop_end = data + (num_pixels & ~1);
  __asm__ volatile (
    ".set            push                                    \n\t"
@@ -152,10 +152,10 @@ static void TransformColor_MIPSdspR2(
    const uint32_t red = argb_ >> 16;
    uint32_t new_blue = argb_;
    new_red = red;
-    new_red -= ColorTransformDelta(m->green_to_red_, green);
+    new_red -= ColorTransformDelta(m->green_to_red, green);
    new_red &= 0xff;
-    new_blue -= ColorTransformDelta(m->green_to_blue_, green);
-    new_blue -= ColorTransformDelta(m->red_to_blue_, red);
+    new_blue -= ColorTransformDelta(m->green_to_blue, green);
+    new_blue -= ColorTransformDelta(m->red_to_blue, red);
    new_blue &= 0xff;
    data[0] = (argb_ & 0xff00ff00u) | (new_red << 16) | (new_blue);
  }
--- a/thirdparty/libwebp/src/dsp/lossless_enc_msa.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc_msa.c
@@ -51,9 +51,9 @@
 static void TransformColor_MSA(const VP8LMultipliers* WEBP_RESTRICT const m,
                               uint32_t* WEBP_RESTRICT data, int num_pixels) {
  v16u8 src0, dst0;
-  const v16i8 g2br = (v16i8)__msa_fill_w(m->green_to_blue_ |
-                                         (m->green_to_red_ << 16));
-  const v16i8 r2b = (v16i8)__msa_fill_w(m->red_to_blue_);
+  const v16i8 g2br = (v16i8)__msa_fill_w(m->green_to_blue |
+                                         (m->green_to_red << 16));
+  const v16i8 r2b = (v16i8)__msa_fill_w(m->red_to_blue);
  const v16u8 mask0 = { 1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255,
                        13, 255, 13, 255 };
  const v16u8 mask1 = { 16, 1, 18, 3, 20, 5, 22, 7, 24, 9, 26, 11,
--- a/thirdparty/libwebp/src/dsp/lossless_enc_neon.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc_neon.c
@@ -78,15 +78,15 @@ static void TransformColor_NEON(const VP8LMultipliers* WEBP_RESTRICT const m,
  // sign-extended multiplying constants, pre-shifted by 6.
 #define CST(X)  (((int16_t)(m->X << 8)) >> 6)
  const int16_t rb[8] = {
-    CST(green_to_blue_), CST(green_to_red_),
-    CST(green_to_blue_), CST(green_to_red_),
-    CST(green_to_blue_), CST(green_to_red_),
-    CST(green_to_blue_), CST(green_to_red_)
+    CST(green_to_blue), CST(green_to_red),
+    CST(green_to_blue), CST(green_to_red),
+    CST(green_to_blue), CST(green_to_red),
+    CST(green_to_blue), CST(green_to_red)
  };
  const int16x8_t mults_rb = vld1q_s16(rb);
  const int16_t b2[8] = {
-    0, CST(red_to_blue_), 0, CST(red_to_blue_),
-    0, CST(red_to_blue_), 0, CST(red_to_blue_),
+    0, CST(red_to_blue), 0, CST(red_to_blue),
+    0, CST(red_to_blue), 0, CST(red_to_blue),
  };
  const int16x8_t mults_b2 = vld1q_s16(b2);
 #undef CST
--- a/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c
@@ -14,11 +14,17 @@
 #include "src/dsp/dsp.h"

 #if defined(WEBP_USE_SSE2)
-#include <assert.h>
 #include <emmintrin.h>
+
+#include <assert.h>
+#include <string.h>
+
+#include "src/dsp/cpu.h"
 #include "src/dsp/lossless.h"
-#include "src/dsp/common_sse2.h"
 #include "src/dsp/lossless_common.h"
+#include "src/utils/utils.h"
+#include "src/webp/format_constants.h"
+#include "src/webp/types.h"

 // For sign-extended multiplying constants, pre-shifted by 5:
 #define CST_5b(X)  (((int16_t)((uint16_t)(X) << 8)) >> 5)
@@ -52,9 +58,9 @@ static void SubtractGreenFromBlueAndRed_SSE2(uint32_t* argb_data,
 static void TransformColor_SSE2(const VP8LMultipliers* WEBP_RESTRICT const m,
                                uint32_t* WEBP_RESTRICT argb_data,
                                int num_pixels) {
-  const __m128i mults_rb = MK_CST_16(CST_5b(m->green_to_red_),
-                                     CST_5b(m->green_to_blue_));
-  const __m128i mults_b2 = MK_CST_16(CST_5b(m->red_to_blue_), 0);
+  const __m128i mults_rb = MK_CST_16(CST_5b(m->green_to_red),
+                                     CST_5b(m->green_to_blue));
+  const __m128i mults_b2 = MK_CST_16(CST_5b(m->red_to_blue), 0);
  const __m128i mask_ag = _mm_set1_epi32((int)0xff00ff00);  // alpha-green masks
  const __m128i mask_rb = _mm_set1_epi32(0x00ff00ff);       // red-blue masks
  int i;
@@ -645,25 +651,43 @@ static void PredictorSub13_SSE2(const uint32_t* in, const uint32_t* upper,
                                int num_pixels, uint32_t* WEBP_RESTRICT out) {
  int i;
  const __m128i zero = _mm_setzero_si128();
-  for (i = 0; i + 2 <= num_pixels; i += 2) {
-    // we can only process two pixels at a time
-    const __m128i L = _mm_loadl_epi64((const __m128i*)&in[i - 1]);
-    const __m128i src = _mm_loadl_epi64((const __m128i*)&in[i]);
-    const __m128i T = _mm_loadl_epi64((const __m128i*)&upper[i]);
-    const __m128i TL = _mm_loadl_epi64((const __m128i*)&upper[i - 1]);
-    const __m128i L_lo = _mm_unpacklo_epi8(L, zero);
-    const __m128i T_lo = _mm_unpacklo_epi8(T, zero);
-    const __m128i TL_lo = _mm_unpacklo_epi8(TL, zero);
-    const __m128i sum = _mm_add_epi16(T_lo, L_lo);
-    const __m128i avg = _mm_srli_epi16(sum, 1);
-    const __m128i A1 = _mm_sub_epi16(avg, TL_lo);
-    const __m128i bit_fix = _mm_cmpgt_epi16(TL_lo, avg);
-    const __m128i A2 = _mm_sub_epi16(A1, bit_fix);
-    const __m128i A3 = _mm_srai_epi16(A2, 1);
-    const __m128i A4 = _mm_add_epi16(avg, A3);
-    const __m128i pred = _mm_packus_epi16(A4, A4);
-    const __m128i res = _mm_sub_epi8(src, pred);
-    _mm_storel_epi64((__m128i*)&out[i], res);
+  for (i = 0; i + 4 <= num_pixels; i += 4) {
+    const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]);
+    const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
+    const __m128i T = _mm_loadu_si128((const __m128i*)&upper[i]);
+    const __m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]);
+    __m128i A4_lo, A4_hi;
+    // lo.
+    {
+      const __m128i L_lo = _mm_unpacklo_epi8(L, zero);
+      const __m128i T_lo = _mm_unpacklo_epi8(T, zero);
+      const __m128i TL_lo = _mm_unpacklo_epi8(TL, zero);
+      const __m128i sum_lo = _mm_add_epi16(T_lo, L_lo);
+      const __m128i avg_lo = _mm_srli_epi16(sum_lo, 1);
+      const __m128i A1_lo = _mm_sub_epi16(avg_lo, TL_lo);
+      const __m128i bit_fix_lo = _mm_cmpgt_epi16(TL_lo, avg_lo);
+      const __m128i A2_lo = _mm_sub_epi16(A1_lo, bit_fix_lo);
+      const __m128i A3_lo = _mm_srai_epi16(A2_lo, 1);
+      A4_lo = _mm_add_epi16(avg_lo, A3_lo);
+    }
+    // hi.
+    {
+      const __m128i L_hi = _mm_unpackhi_epi8(L, zero);
+      const __m128i T_hi = _mm_unpackhi_epi8(T, zero);
+      const __m128i TL_hi = _mm_unpackhi_epi8(TL, zero);
+      const __m128i sum_hi = _mm_add_epi16(T_hi, L_hi);
+      const __m128i avg_hi = _mm_srli_epi16(sum_hi, 1);
+      const __m128i A1_hi = _mm_sub_epi16(avg_hi, TL_hi);
+      const __m128i bit_fix_hi = _mm_cmpgt_epi16(TL_hi, avg_hi);
+      const __m128i A2_hi = _mm_sub_epi16(A1_hi, bit_fix_hi);
+      const __m128i A3_hi = _mm_srai_epi16(A2_hi, 1);
+      A4_hi = _mm_add_epi16(avg_hi, A3_hi);
+    }
+    {
+      const __m128i pred = _mm_packus_epi16(A4_lo, A4_hi);
+      const __m128i res = _mm_sub_epi8(src, pred);
+      _mm_storeu_si128((__m128i*)&out[i], res);
+    }
  }
  if (i != num_pixels) {
    VP8LPredictorsSub_C[13](in + i, upper + i, num_pixels - i, out + i);
@@ -704,6 +728,15 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE2(void) {
  VP8LPredictorsSub[13] = PredictorSub13_SSE2;
  VP8LPredictorsSub[14] = PredictorSub0_SSE2;  // <- padding security sentinels
  VP8LPredictorsSub[15] = PredictorSub0_SSE2;
+
+  // SSE exports for AVX and above.
+  VP8LSubtractGreenFromBlueAndRed_SSE = SubtractGreenFromBlueAndRed_SSE2;
+  VP8LTransformColor_SSE = TransformColor_SSE2;
+  VP8LCollectColorBlueTransforms_SSE = CollectColorBlueTransforms_SSE2;
+  VP8LCollectColorRedTransforms_SSE = CollectColorRedTransforms_SSE2;
+  VP8LBundleColorMap_SSE = BundleColorMap_SSE2;
+
+  memcpy(VP8LPredictorsSub_SSE, VP8LPredictorsSub, sizeof(VP8LPredictorsSub));
 }

 #else  // !WEBP_USE_SSE2
--- a/thirdparty/libwebp/src/dsp/lossless_enc_sse41.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc_sse41.c
@@ -14,9 +14,14 @@
 #include "src/dsp/dsp.h"

 #if defined(WEBP_USE_SSE41)
-#include <assert.h>
+#include <emmintrin.h>
 #include <smmintrin.h>
+
+#include <assert.h>
+
+#include "src/dsp/cpu.h"
 #include "src/dsp/lossless.h"
+#include "src/webp/types.h"

 //------------------------------------------------------------------------------
 // Cost operations.
@@ -44,29 +49,6 @@ static uint32_t ExtraCost_SSE41(const uint32_t* const a, int length) {
  return HorizontalSum_SSE41(cost);
 }

-static uint32_t ExtraCostCombined_SSE41(const uint32_t* WEBP_RESTRICT const a,
-                                        const uint32_t* WEBP_RESTRICT const b,
-                                        int length) {
-  int i;
-  __m128i cost = _mm_add_epi32(_mm_set_epi32(2 * a[7], 2 * a[6], a[5], a[4]),
-                               _mm_set_epi32(2 * b[7], 2 * b[6], b[5], b[4]));
-  assert(length % 8 == 0);
-
-  for (i = 8; i + 8 <= length; i += 8) {
-    const int j = (i - 2) >> 1;
-    const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i]);
-    const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
-    const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i]);
-    const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[i + 4]);
-    const __m128i w = _mm_set_epi32(j + 3, j + 2, j + 1, j);
-    const __m128i a2 = _mm_hadd_epi32(a0, a1);
-    const __m128i b2 = _mm_hadd_epi32(b0, b1);
-    const __m128i mul = _mm_mullo_epi32(_mm_add_epi32(a2, b2), w);
-    cost = _mm_add_epi32(mul, cost);
-  }
-  return HorizontalSum_SSE41(cost);
-}
-
 //------------------------------------------------------------------------------
 // Subtract-Green Transform

@@ -195,10 +177,14 @@ extern void VP8LEncDspInitSSE41(void);

 WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE41(void) {
  VP8LExtraCost = ExtraCost_SSE41;
-  VP8LExtraCostCombined = ExtraCostCombined_SSE41;
  VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed_SSE41;
  VP8LCollectColorBlueTransforms = CollectColorBlueTransforms_SSE41;
  VP8LCollectColorRedTransforms = CollectColorRedTransforms_SSE41;
+
+  // SSE exports for AVX and above.
+  VP8LSubtractGreenFromBlueAndRed_SSE = SubtractGreenFromBlueAndRed_SSE41;
+  VP8LCollectColorBlueTransforms_SSE = CollectColorBlueTransforms_SSE41;
+  VP8LCollectColorRedTransforms_SSE = CollectColorRedTransforms_SSE41;
 }

 #else  // !WEBP_USE_SSE41
--- a/thirdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c
+++ b/thirdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c
@@ -299,9 +299,9 @@ static void TransformColorInverse_MIPSdspR2(const VP8LMultipliers* const m,
                                            uint32_t* dst) {
  int temp0, temp1, temp2, temp3, temp4, temp5;
  uint32_t argb, argb1, new_red;
-  const uint32_t G_to_R = m->green_to_red_;
-  const uint32_t G_to_B = m->green_to_blue_;
-  const uint32_t R_to_B = m->red_to_blue_;
+  const uint32_t G_to_R = m->green_to_red;
+  const uint32_t G_to_B = m->green_to_blue;
+  const uint32_t R_to_B = m->red_to_blue;
  const uint32_t* const p_loop_end = src + (num_pixels & ~1);
  __asm__ volatile (
    ".set            push                                    \n\t"
--- a/thirdparty/libwebp/src/dsp/lossless_msa.c
+++ b/thirdparty/libwebp/src/dsp/lossless_msa.c
@@ -290,9 +290,9 @@ static void TransformColorInverse_MSA(const VP8LMultipliers* const m,
                                      const uint32_t* src, int num_pixels,
                                      uint32_t* dst) {
  v16u8 src0, dst0;
-  const v16i8 g2br = (v16i8)__msa_fill_w(m->green_to_blue_ |
-                                         (m->green_to_red_ << 16));
-  const v16i8 r2b = (v16i8)__msa_fill_w(m->red_to_blue_);
+  const v16i8 g2br = (v16i8)__msa_fill_w(m->green_to_blue |
+                                         (m->green_to_red << 16));
+  const v16i8 r2b = (v16i8)__msa_fill_w(m->red_to_blue);
  const v16u8 mask0 = { 1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255,
                        13, 255, 13, 255 };
  const v16u8 mask1 = { 16, 1, 18, 3, 20, 5, 22, 7, 24, 9, 26, 11,
--- a/thirdparty/libwebp/src/dsp/lossless_neon.c
+++ b/thirdparty/libwebp/src/dsp/lossless_neon.c
@@ -19,6 +19,7 @@

 #include "src/dsp/lossless.h"
 #include "src/dsp/neon.h"
+#include "src/webp/format_constants.h"

 //------------------------------------------------------------------------------
 // Colorspace conversion functions
@@ -551,15 +552,15 @@ static void TransformColorInverse_NEON(const VP8LMultipliers* const m,
 // sign-extended multiplying constants, pre-shifted by 6.
 #define CST(X)  (((int16_t)(m->X << 8)) >> 6)
  const int16_t rb[8] = {
-    CST(green_to_blue_), CST(green_to_red_),
-    CST(green_to_blue_), CST(green_to_red_),
-    CST(green_to_blue_), CST(green_to_red_),
-    CST(green_to_blue_), CST(green_to_red_)
+    CST(green_to_blue), CST(green_to_red),
+    CST(green_to_blue), CST(green_to_red),
+    CST(green_to_blue), CST(green_to_red),
+    CST(green_to_blue), CST(green_to_red)
  };
  const int16x8_t mults_rb = vld1q_s16(rb);
  const int16_t b2[8] = {
-    0, CST(red_to_blue_), 0, CST(red_to_blue_),
-    0, CST(red_to_blue_), 0, CST(red_to_blue_),
+    0, CST(red_to_blue), 0, CST(red_to_blue),
+    0, CST(red_to_blue), 0, CST(red_to_blue),
  };
  const int16x8_t mults_b2 = vld1q_s16(b2);
 #undef CST
--- a/thirdparty/libwebp/src/dsp/lossless_sse2.c
+++ b/thirdparty/libwebp/src/dsp/lossless_sse2.c
@@ -15,10 +15,15 @@

 #if defined(WEBP_USE_SSE2)

+#include <emmintrin.h>
+#include <string.h>
+
 #include "src/dsp/common_sse2.h"
+#include "src/dsp/cpu.h"
 #include "src/dsp/lossless.h"
 #include "src/dsp/lossless_common.h"
-#include <emmintrin.h>
+#include "src/webp/format_constants.h"
+#include "src/webp/types.h"

 //------------------------------------------------------------------------------
 // Predictor Transform
@@ -462,8 +467,8 @@ static void TransformColorInverse_SSE2(const VP8LMultipliers* const m,
 #define CST(X)  (((int16_t)(m->X << 8)) >> 5)   // sign-extend
 #define MK_CST_16(HI, LO) \
  _mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
-  const __m128i mults_rb = MK_CST_16(CST(green_to_red_), CST(green_to_blue_));
-  const __m128i mults_b2 = MK_CST_16(CST(red_to_blue_), 0);
+  const __m128i mults_rb = MK_CST_16(CST(green_to_red), CST(green_to_blue));
+  const __m128i mults_b2 = MK_CST_16(CST(red_to_blue), 0);
 #undef MK_CST_16
 #undef CST
  const __m128i mask_ag = _mm_set1_epi32((int)0xff00ff00);  // alpha-green masks
@@ -707,6 +712,15 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitSSE2(void) {
  VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444_SSE2;
  VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565_SSE2;
  VP8LConvertBGRAToBGR = ConvertBGRAToBGR_SSE2;
+
+  // SSE exports for AVX and above.
+  memcpy(VP8LPredictorsAdd_SSE, VP8LPredictorsAdd, sizeof(VP8LPredictorsAdd));
+
+  VP8LAddGreenToBlueAndRed_SSE = AddGreenToBlueAndRed_SSE2;
+  VP8LTransformColorInverse_SSE = TransformColorInverse_SSE2;
+
+  VP8LConvertBGRAToRGB_SSE = ConvertBGRAToRGB_SSE2;
+  VP8LConvertBGRAToRGBA_SSE = ConvertBGRAToRGBA_SSE2;
 }

 #else  // !WEBP_USE_SSE2
--- a/thirdparty/libwebp/src/dsp/lossless_sse41.c
+++ b/thirdparty/libwebp/src/dsp/lossless_sse41.c
@@ -12,10 +12,12 @@
 #include "src/dsp/dsp.h"

 #if defined(WEBP_USE_SSE41)
+#include <emmintrin.h>
+#include <smmintrin.h>

-#include "src/dsp/common_sse41.h"
+#include "src/webp/types.h"
+#include "src/dsp/cpu.h"
 #include "src/dsp/lossless.h"
-#include "src/dsp/lossless_common.h"

 //------------------------------------------------------------------------------
 // Color-space conversion functions
@@ -26,9 +28,9 @@ static void TransformColorInverse_SSE41(const VP8LMultipliers* const m,
 // sign-extended multiplying constants, pre-shifted by 5.
 #define CST(X)  (((int16_t)(m->X << 8)) >> 5)   // sign-extend
  const __m128i mults_rb =
-      _mm_set1_epi32((int)((uint32_t)CST(green_to_red_) << 16 |
-                           (CST(green_to_blue_) & 0xffff)));
-  const __m128i mults_b2 = _mm_set1_epi32(CST(red_to_blue_));
+      _mm_set1_epi32((int)((uint32_t)CST(green_to_red) << 16 |
+                           (CST(green_to_blue) & 0xffff)));
+  const __m128i mults_b2 = _mm_set1_epi32(CST(red_to_blue));
 #undef CST
  const __m128i mask_ag = _mm_set1_epi32((int)0xff00ff00);
  const __m128i perm1 = _mm_setr_epi8(-1, 1, -1, 1, -1, 5, -1, 5,
@@ -124,6 +126,10 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitSSE41(void) {
  VP8LTransformColorInverse = TransformColorInverse_SSE41;
  VP8LConvertBGRAToRGB = ConvertBGRAToRGB_SSE41;
  VP8LConvertBGRAToBGR = ConvertBGRAToBGR_SSE41;
+
+  // SSE exports for AVX and above.
+  VP8LTransformColorInverse_SSE = TransformColorInverse_SSE41;
+  VP8LConvertBGRAToRGB_SSE = ConvertBGRAToRGB_SSE41;
 }

 #else  // !WEBP_USE_SSE41
--- a/thirdparty/libwebp/src/dsp/rescaler.c
+++ b/thirdparty/libwebp/src/dsp/rescaler.c
@@ -12,7 +12,10 @@
 // Author: Skal (pascal.massimino@gmail.com)

 #include <assert.h>
+#include <stddef.h>

+#include "src/dsp/cpu.h"
+#include "src/webp/types.h"
 #include "src/dsp/dsp.h"
 #include "src/utils/rescaler_utils.h"

--- a/thirdparty/libwebp/src/dsp/rescaler_sse2.c
+++ b/thirdparty/libwebp/src/dsp/rescaler_sse2.c
@@ -17,8 +17,12 @@
 #include <emmintrin.h>

 #include <assert.h>
+#include <stddef.h>
+
+#include "src/dsp/cpu.h"
 #include "src/utils/rescaler_utils.h"
 #include "src/utils/utils.h"
+#include "src/webp/types.h"

 //------------------------------------------------------------------------------
 // Implementations of critical functions ImportRow / ExportRow
--- a/thirdparty/libwebp/src/dsp/ssim.c
+++ b/thirdparty/libwebp/src/dsp/ssim.c
@@ -14,7 +14,9 @@
 #include <assert.h>
 #include <stdlib.h>  // for abs()

+#include "src/dsp/cpu.h"
 #include "src/dsp/dsp.h"
+#include "src/webp/types.h"

 #if !defined(WEBP_REDUCE_SIZE)

--- a/thirdparty/libwebp/src/dsp/ssim_sse2.c
+++ b/thirdparty/libwebp/src/dsp/ssim_sse2.c
@@ -14,11 +14,13 @@
 #include "src/dsp/dsp.h"

 #if defined(WEBP_USE_SSE2)
-
-#include <assert.h>
 #include <emmintrin.h>

+#include <assert.h>
+
 #include "src/dsp/common_sse2.h"
+#include "src/dsp/cpu.h"
+#include "src/webp/types.h"

 #if !defined(WEBP_DISABLE_STATS)

--- a/thirdparty/libwebp/src/dsp/upsampling.c
+++ b/thirdparty/libwebp/src/dsp/upsampling.c
@@ -11,10 +11,14 @@
 //
 // Author: somnath@google.com (Somnath Banerjee)

+#include <assert.h>
+#include <stddef.h>
+
+#include "src/dsp/cpu.h"
+#include "src/webp/types.h"
 #include "src/dsp/dsp.h"
 #include "src/dsp/yuv.h"
-
-#include <assert.h>
+#include "src/webp/decode.h"

 //------------------------------------------------------------------------------
 // Fancy upsampler
--- a/thirdparty/libwebp/src/dsp/upsampling_sse2.c
+++ b/thirdparty/libwebp/src/dsp/upsampling_sse2.c
@@ -14,11 +14,15 @@
 #include "src/dsp/dsp.h"

 #if defined(WEBP_USE_SSE2)
+#include <emmintrin.h>

 #include <assert.h>
-#include <emmintrin.h>
 #include <string.h>
+
+#include "src/webp/types.h"
+#include "src/dsp/cpu.h"
 #include "src/dsp/yuv.h"
+#include "src/webp/decode.h"

 #ifdef FANCY_UPSAMPLING

--- a/thirdparty/libwebp/src/dsp/upsampling_sse41.c
+++ b/thirdparty/libwebp/src/dsp/upsampling_sse41.c
@@ -14,11 +14,15 @@
 #include "src/dsp/dsp.h"

 #if defined(WEBP_USE_SSE41)
+#include <smmintrin.h>

 #include <assert.h>
-#include <smmintrin.h>
 #include <string.h>
+
+#include "src/webp/types.h"
+#include "src/dsp/cpu.h"
 #include "src/dsp/yuv.h"
+#include "src/webp/decode.h"

 #ifdef FANCY_UPSAMPLING

--- a/thirdparty/libwebp/src/dsp/yuv.c
+++ b/thirdparty/libwebp/src/dsp/yuv.c
@@ -11,11 +11,15 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

-#include "src/dsp/yuv.h"
-
 #include <assert.h>
 #include <stdlib.h>

+#include "src/dsp/cpu.h"
+#include "src/webp/types.h"
+#include "src/dsp/dsp.h"
+#include "src/dsp/yuv.h"
+#include "src/webp/decode.h"
+
 //-----------------------------------------------------------------------------
 // Plain-C version

--- a/thirdparty/libwebp/src/dsp/yuv.h
+++ b/thirdparty/libwebp/src/dsp/yuv.h
@@ -35,8 +35,10 @@
 #ifndef WEBP_DSP_YUV_H_
 #define WEBP_DSP_YUV_H_

-#include "src/dsp/dsp.h"
 #include "src/dec/vp8_dec.h"
+#include "src/dsp/cpu.h"
+#include "src/dsp/dsp.h"
+#include "src/webp/types.h"

 //------------------------------------------------------------------------------
 // YUV -> RGB conversion
--- a/thirdparty/libwebp/src/dsp/yuv_neon.c
+++ b/thirdparty/libwebp/src/dsp/yuv_neon.c
@@ -18,6 +18,7 @@
 #include <assert.h>
 #include <stdlib.h>

+#include "src/dsp/dsp.h"
 #include "src/dsp/neon.h"

 //-----------------------------------------------------------------------------
--- a/thirdparty/libwebp/src/dsp/yuv_sse2.c
+++ b/thirdparty/libwebp/src/dsp/yuv_sse2.c
@@ -14,12 +14,16 @@
 #include "src/dsp/yuv.h"

 #if defined(WEBP_USE_SSE2)
-
-#include <stdlib.h>
 #include <emmintrin.h>

+#include <stdlib.h>
+
 #include "src/dsp/common_sse2.h"
+#include "src/dsp/cpu.h"
+#include "src/dsp/dsp.h"
 #include "src/utils/utils.h"
+#include "src/webp/decode.h"
+#include "src/webp/types.h"

 //-----------------------------------------------------------------------------
 // Convert spans of 32 pixels to various RGB formats for the fancy upsampler.
--- a/thirdparty/libwebp/src/dsp/yuv_sse41.c
+++ b/thirdparty/libwebp/src/dsp/yuv_sse41.c
@@ -14,12 +14,17 @@
 #include "src/dsp/yuv.h"

 #if defined(WEBP_USE_SSE41)
-
-#include <stdlib.h>
+#include <emmintrin.h>
 #include <smmintrin.h>

+#include <stdlib.h>
+
 #include "src/dsp/common_sse41.h"
+#include "src/dsp/cpu.h"
+#include "src/dsp/dsp.h"
 #include "src/utils/utils.h"
+#include "src/webp/decode.h"
+#include "src/webp/types.h"

 //-----------------------------------------------------------------------------
 // Convert spans of 32 pixels to various RGB formats for the fancy upsampler.
--- a/thirdparty/libwebp/src/enc/alpha_enc.c
+++ b/thirdparty/libwebp/src/enc/alpha_enc.c
@@ -15,10 +15,13 @@
 #include <stdlib.h>
 #include <string.h>

-#include "src/enc/vp8i_enc.h"
 #include "src/dsp/dsp.h"
+#include "src/webp/types.h"
+#include "src/enc/vp8i_enc.h"
+#include "src/utils/bit_writer_utils.h"
 #include "src/utils/filters_utils.h"
 #include "src/utils/quant_levels_utils.h"
+#include "src/utils/thread_utils.h"
 #include "src/utils/utils.h"
 #include "src/webp/encode.h"
 #include "src/webp/format_constants.h"
@@ -86,7 +89,7 @@ static int EncodeLossless(const uint8_t* const data, int width, int height,

  ok = VP8LEncodeStream(&config, &picture, bw);
  WebPPictureFree(&picture);
-  ok = ok && !bw->error_;
+  ok = ok && !bw->error;
  if (!ok) {
    VP8LBitWriterWipeOut(bw);
    return 0;
@@ -138,7 +141,7 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
                              !reduce_levels, &tmp_bw, &result->stats);
    if (ok) {
      output = VP8LBitWriterFinish(&tmp_bw);
-      if (tmp_bw.error_) {
+      if (tmp_bw.error) {
        VP8LBitWriterWipeOut(&tmp_bw);
        memset(&result->bw, 0, sizeof(result->bw));
        return 0;
@@ -173,7 +176,7 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
  if (method != ALPHA_NO_COMPRESSION) {
    VP8LBitWriterWipeOut(&tmp_bw);
  }
-  ok = ok && !result->bw.error_;
+  ok = ok && !result->bw.error;
  result->score = VP8BitWriterSize(&result->bw);
  return ok;
 }
@@ -298,7 +301,7 @@ static int EncodeAlpha(VP8Encoder* const enc,
                       int quality, int method, int filter,
                       int effort_level,
                       uint8_t** const output, size_t* const output_size) {
-  const WebPPicture* const pic = enc->pic_;
+  const WebPPicture* const pic = enc->pic;
  const int width = pic->width;
  const int height = pic->height;

@@ -357,7 +360,7 @@ static int EncodeAlpha(VP8Encoder* const enc,
 #if !defined(WEBP_DISABLE_STATS)
    if (pic->stats != NULL) {  // need stats?
      pic->stats->coded_size += (int)(*output_size);
-      enc->sse_[3] = sse;
+      enc->sse[3] = sse;
    }
 #endif
  }
@@ -371,7 +374,7 @@ static int EncodeAlpha(VP8Encoder* const enc,

 static int CompressAlphaJob(void* arg1, void* unused) {
  VP8Encoder* const enc = (VP8Encoder*)arg1;
-  const WebPConfig* config = enc->config_;
+  const WebPConfig* config = enc->config;
  uint8_t* alpha_data = NULL;
  size_t alpha_size = 0;
  const int effort_level = config->method;  // maps to [0..6]
@@ -387,19 +390,19 @@ static int CompressAlphaJob(void* arg1, void* unused) {
    WebPSafeFree(alpha_data);
    return 0;
  }
-  enc->alpha_data_size_ = (uint32_t)alpha_size;
-  enc->alpha_data_ = alpha_data;
+  enc->alpha_data_size = (uint32_t)alpha_size;
+  enc->alpha_data = alpha_data;
  (void)unused;
  return 1;
 }

 void VP8EncInitAlpha(VP8Encoder* const enc) {
  WebPInitAlphaProcessing();
-  enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_);
-  enc->alpha_data_ = NULL;
-  enc->alpha_data_size_ = 0;
-  if (enc->thread_level_ > 0) {
-    WebPWorker* const worker = &enc->alpha_worker_;
+  enc->has_alpha = WebPPictureHasTransparency(enc->pic);
+  enc->alpha_data = NULL;
+  enc->alpha_data_size = 0;
+  if (enc->thread_level > 0) {
+    WebPWorker* const worker = &enc->alpha_worker;
    WebPGetWorkerInterface()->Init(worker);
    worker->data1 = enc;
    worker->data2 = NULL;
@@ -408,12 +411,12 @@ void VP8EncInitAlpha(VP8Encoder* const enc) {
 }

 int VP8EncStartAlpha(VP8Encoder* const enc) {
-  if (enc->has_alpha_) {
-    if (enc->thread_level_ > 0) {
-      WebPWorker* const worker = &enc->alpha_worker_;
+  if (enc->has_alpha) {
+    if (enc->thread_level > 0) {
+      WebPWorker* const worker = &enc->alpha_worker;
      // Makes sure worker is good to go.
      if (!WebPGetWorkerInterface()->Reset(worker)) {
-        return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
+        return WebPEncodingSetError(enc->pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
      }
      WebPGetWorkerInterface()->Launch(worker);
      return 1;
@@ -425,27 +428,27 @@ int VP8EncStartAlpha(VP8Encoder* const enc) {
 }

 int VP8EncFinishAlpha(VP8Encoder* const enc) {
-  if (enc->has_alpha_) {
-    if (enc->thread_level_ > 0) {
-      WebPWorker* const worker = &enc->alpha_worker_;
+  if (enc->has_alpha) {
+    if (enc->thread_level > 0) {
+      WebPWorker* const worker = &enc->alpha_worker;
      if (!WebPGetWorkerInterface()->Sync(worker)) return 0;  // error
    }
  }
-  return WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
+  return WebPReportProgress(enc->pic, enc->percent + 20, &enc->percent);
 }

 int VP8EncDeleteAlpha(VP8Encoder* const enc) {
  int ok = 1;
-  if (enc->thread_level_ > 0) {
-    WebPWorker* const worker = &enc->alpha_worker_;
+  if (enc->thread_level > 0) {
+    WebPWorker* const worker = &enc->alpha_worker;
    // finish anything left in flight
    ok = WebPGetWorkerInterface()->Sync(worker);
    // still need to end the worker, even if !ok
    WebPGetWorkerInterface()->End(worker);
  }
-  WebPSafeFree(enc->alpha_data_);
-  enc->alpha_data_ = NULL;
-  enc->alpha_data_size_ = 0;
-  enc->has_alpha_ = 0;
+  WebPSafeFree(enc->alpha_data);
+  enc->alpha_data = NULL;
+  enc->alpha_data_size = 0;
+  enc->has_alpha = 0;
  return ok;
 }
--- a/thirdparty/libwebp/src/enc/analysis_enc.c
+++ b/thirdparty/libwebp/src/enc/analysis_enc.c
@@ -11,13 +11,17 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

+#include <assert.h>
 #include <stdlib.h>
 #include <string.h>
-#include <assert.h>

+#include "src/dec/common_dec.h"
+#include "src/dsp/dsp.h"
 #include "src/enc/vp8i_enc.h"
-#include "src/enc/cost_enc.h"
+#include "src/utils/thread_utils.h"
 #include "src/utils/utils.h"
+#include "src/webp/encode.h"
+#include "src/webp/types.h"

 #define MAX_ITERS_K_MEANS  6

@@ -27,8 +31,8 @@

 static void SmoothSegmentMap(VP8Encoder* const enc) {
  int n, x, y;
-  const int w = enc->mb_w_;
-  const int h = enc->mb_h_;
+  const int w = enc->mb_w;
+  const int h = enc->mb_h;
  const int majority_cnt_3_x_3_grid = 5;
  uint8_t* const tmp = (uint8_t*)WebPSafeMalloc(w * h, sizeof(*tmp));
  assert((uint64_t)(w * h) == (uint64_t)w * h);   // no overflow, as per spec
@@ -37,17 +41,17 @@ static void SmoothSegmentMap(VP8Encoder* const enc) {
  for (y = 1; y < h - 1; ++y) {
    for (x = 1; x < w - 1; ++x) {
      int cnt[NUM_MB_SEGMENTS] = { 0 };
-      const VP8MBInfo* const mb = &enc->mb_info_[x + w * y];
-      int majority_seg = mb->segment_;
+      const VP8MBInfo* const mb = &enc->mb_info[x + w * y];
+      int majority_seg = mb->segment;
      // Check the 8 neighbouring segment values.
-      cnt[mb[-w - 1].segment_]++;  // top-left
-      cnt[mb[-w + 0].segment_]++;  // top
-      cnt[mb[-w + 1].segment_]++;  // top-right
-      cnt[mb[   - 1].segment_]++;  // left
-      cnt[mb[   + 1].segment_]++;  // right
-      cnt[mb[ w - 1].segment_]++;  // bottom-left
-      cnt[mb[ w + 0].segment_]++;  // bottom
-      cnt[mb[ w + 1].segment_]++;  // bottom-right
+      cnt[mb[-w - 1].segment]++;  // top-left
+      cnt[mb[-w + 0].segment]++;  // top
+      cnt[mb[-w + 1].segment]++;  // top-right
+      cnt[mb[   - 1].segment]++;  // left
+      cnt[mb[   + 1].segment]++;  // right
+      cnt[mb[ w - 1].segment]++;  // bottom-left
+      cnt[mb[ w + 0].segment]++;  // bottom
+      cnt[mb[ w + 1].segment]++;  // bottom-right
      for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
        if (cnt[n] >= majority_cnt_3_x_3_grid) {
          majority_seg = n;
@@ -59,15 +63,15 @@ static void SmoothSegmentMap(VP8Encoder* const enc) {
  }
  for (y = 1; y < h - 1; ++y) {
    for (x = 1; x < w - 1; ++x) {
-      VP8MBInfo* const mb = &enc->mb_info_[x + w * y];
-      mb->segment_ = tmp[x + y * w];
+      VP8MBInfo* const mb = &enc->mb_info[x + w * y];
+      mb->segment = tmp[x + y * w];
    }
  }
  WebPSafeFree(tmp);
 }

 //------------------------------------------------------------------------------
-// set segment susceptibility alpha_ / beta_
+// set segment susceptibility 'alpha' / 'beta'

 static WEBP_INLINE int clip(int v, int m, int M) {
  return (v < m) ? m : (v > M) ? M : v;
@@ -76,7 +80,7 @@ static WEBP_INLINE int clip(int v, int m, int M) {
 static void SetSegmentAlphas(VP8Encoder* const enc,
                             const int centers[NUM_MB_SEGMENTS],
                             int mid) {
-  const int nb = enc->segment_hdr_.num_segments_;
+  const int nb = enc->segment_hdr.num_segments;
  int min = centers[0], max = centers[0];
  int n;

@@ -91,8 +95,8 @@ static void SetSegmentAlphas(VP8Encoder* const enc,
  for (n = 0; n < nb; ++n) {
    const int alpha = 255 * (centers[n] - mid) / (max - min);
    const int beta = 255 * (centers[n] - min) / (max - min);
-    enc->dqm_[n].alpha_ = clip(alpha, -127, 127);
-    enc->dqm_[n].beta_ = clip(beta, 0, 255);
+    enc->dqm[n].alpha = clip(alpha, -127, 127);
+    enc->dqm[n].beta = clip(beta, 0, 255);
  }
 }

@@ -131,11 +135,11 @@ static void InitHistogram(VP8Histogram* const histo) {

 static void AssignSegments(VP8Encoder* const enc,
                           const int alphas[MAX_ALPHA + 1]) {
-  // 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an
+  // 'num_segments' is previously validated and <= NUM_MB_SEGMENTS, but an
  // explicit check is needed to avoid spurious warning about 'n + 1' exceeding
  // array bounds of 'centers' with some compilers (noticed with gcc-4.9).
-  const int nb = (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS) ?
-                 enc->segment_hdr_.num_segments_ : NUM_MB_SEGMENTS;
+  const int nb = (enc->segment_hdr.num_segments < NUM_MB_SEGMENTS) ?
+                 enc->segment_hdr.num_segments : NUM_MB_SEGMENTS;
  int centers[NUM_MB_SEGMENTS];
  int weighted_average = 0;
  int map[MAX_ALPHA + 1];
@@ -200,15 +204,15 @@ static void AssignSegments(VP8Encoder* const enc,
  }

  // Map each original value to the closest centroid
-  for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
-    VP8MBInfo* const mb = &enc->mb_info_[n];
-    const int alpha = mb->alpha_;
-    mb->segment_ = map[alpha];
-    mb->alpha_ = centers[map[alpha]];  // for the record.
+  for (n = 0; n < enc->mb_w * enc->mb_h; ++n) {
+    VP8MBInfo* const mb = &enc->mb_info[n];
+    const int alpha = mb->alpha;
+    mb->segment = map[alpha];
+    mb->alpha = centers[map[alpha]];  // for the record.
  }

  if (nb > 1) {
-    const int smooth = (enc->config_->preprocessing & 1);
+    const int smooth = (enc->config->preprocessing & 1);
    if (smooth) SmoothSegmentMap(enc);
  }

@@ -220,7 +224,7 @@ static void AssignSegments(VP8Encoder* const enc,
 // susceptibility and set best modes for this macroblock.
 // Segment assignment is done later.

-// Number of modes to inspect for alpha_ evaluation. We don't need to test all
+// Number of modes to inspect for 'alpha' evaluation. We don't need to test all
 // the possible modes during the analysis phase: we risk falling into a local
 // optimum, or be subject to boundary effect
 #define MAX_INTRA16_MODE 2
@@ -239,8 +243,8 @@ static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
    int alpha;

    InitHistogram(&histo);
-    VP8CollectHistogram(it->yuv_in_ + Y_OFF_ENC,
-                        it->yuv_p_ + VP8I16ModeOffsets[mode],
+    VP8CollectHistogram(it->yuv_in + Y_OFF_ENC,
+                        it->yuv_p + VP8I16ModeOffsets[mode],
                        0, 16, &histo);
    alpha = GetAlpha(&histo);
    if (IS_BETTER_ALPHA(alpha, best_alpha)) {
@@ -255,12 +259,12 @@ static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
 static int FastMBAnalyze(VP8EncIterator* const it) {
  // Empirical cut-off value, should be around 16 (~=block size). We use the
  // [8-17] range and favor intra4 at high quality, intra16 for low quality.
-  const int q = (int)it->enc_->config_->quality;
+  const int q = (int)it->enc->config->quality;
  const uint32_t kThreshold = 8 + (17 - 8) * q / 100;
  int k;
  uint32_t dc[16], m, m2;
  for (k = 0; k < 16; k += 4) {
-    VP8Mean16x4(it->yuv_in_ + Y_OFF_ENC + k * BPS, &dc[k]);
+    VP8Mean16x4(it->yuv_in + Y_OFF_ENC + k * BPS, &dc[k]);
  }
  for (m = 0, m2 = 0, k = 0; k < 16; ++k) {
    m += dc[k];
@@ -287,8 +291,8 @@ static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
    VP8Histogram histo;
    int alpha;
    InitHistogram(&histo);
-    VP8CollectHistogram(it->yuv_in_ + U_OFF_ENC,
-                        it->yuv_p_ + VP8UVModeOffsets[mode],
+    VP8CollectHistogram(it->yuv_in + U_OFF_ENC,
+                        it->yuv_p + VP8UVModeOffsets[mode],
                        16, 16 + 4 + 4, &histo);
    alpha = GetAlpha(&histo);
    if (IS_BETTER_ALPHA(alpha, best_alpha)) {
@@ -307,14 +311,14 @@ static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
 static void MBAnalyze(VP8EncIterator* const it,
                      int alphas[MAX_ALPHA + 1],
                      int* const alpha, int* const uv_alpha) {
-  const VP8Encoder* const enc = it->enc_;
+  const VP8Encoder* const enc = it->enc;
  int best_alpha, best_uv_alpha;

  VP8SetIntra16Mode(it, 0);  // default: Intra16, DC_PRED
  VP8SetSkip(it, 0);         // not skipped
  VP8SetSegment(it, 0);      // default segment, spec-wise.

-  if (enc->method_ <= 1) {
+  if (enc->method <= 1) {
    best_alpha = FastMBAnalyze(it);
  } else {
    best_alpha = MBAnalyzeBestIntra16Mode(it);
@@ -325,7 +329,7 @@ static void MBAnalyze(VP8EncIterator* const it,
  best_alpha = (3 * best_alpha + best_uv_alpha + 2) >> 2;
  best_alpha = FinalAlphaValue(best_alpha);
  alphas[best_alpha]++;
-  it->mb_->alpha_ = best_alpha;   // for later remapping.
+  it->mb->alpha = best_alpha;   // for later remapping.

  // Accumulate for later complexity analysis.
  *alpha += best_alpha;   // mixed susceptibility (not just luma)
@@ -333,11 +337,11 @@ static void MBAnalyze(VP8EncIterator* const it,
 }

 static void DefaultMBInfo(VP8MBInfo* const mb) {
-  mb->type_ = 1;     // I16x16
-  mb->uv_mode_ = 0;
-  mb->skip_ = 0;     // not skipped
-  mb->segment_ = 0;  // default segment
-  mb->alpha_ = 0;
+  mb->type = 1;     // I16x16
+  mb->uv_mode = 0;
+  mb->skip = 0;     // not skipped
+  mb->segment = 0;  // default segment
+  mb->alpha = 0;
 }

 //------------------------------------------------------------------------------
@@ -352,16 +356,16 @@ static void DefaultMBInfo(VP8MBInfo* const mb) {

 static void ResetAllMBInfo(VP8Encoder* const enc) {
  int n;
-  for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
-    DefaultMBInfo(&enc->mb_info_[n]);
+  for (n = 0; n < enc->mb_w * enc->mb_h; ++n) {
+    DefaultMBInfo(&enc->mb_info[n]);
  }
  // Default susceptibilities.
-  enc->dqm_[0].alpha_ = 0;
-  enc->dqm_[0].beta_ = 0;
-  // Note: we can't compute this alpha_ / uv_alpha_ -> set to default value.
-  enc->alpha_ = 0;
-  enc->uv_alpha_ = 0;
-  WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
+  enc->dqm[0].alpha = 0;
+  enc->dqm[0].beta = 0;
+  // Note: we can't compute this 'alpha' / 'uv_alpha' -> set to default value.
+  enc->alpha = 0;
+  enc->uv_alpha = 0;
+  WebPReportProgress(enc->pic, enc->percent + 20, &enc->percent);
 }

 // struct used to collect job result
@@ -409,7 +413,7 @@ static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job,
  job->worker.hook = DoSegmentsJob;
  VP8IteratorInit(enc, &job->it);
  VP8IteratorSetRow(&job->it, start_row);
-  VP8IteratorSetCountDown(&job->it, (end_row - start_row) * enc->mb_w_);
+  VP8IteratorSetCountDown(&job->it, (end_row - start_row) * enc->mb_w);
  memset(job->alphas, 0, sizeof(job->alphas));
  job->alpha = 0;
  job->uv_alpha = 0;
@@ -422,17 +426,17 @@ static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job,
 int VP8EncAnalyze(VP8Encoder* const enc) {
  int ok = 1;
  const int do_segments =
-      enc->config_->emulate_jpeg_size ||   // We need the complexity evaluation.
-      (enc->segment_hdr_.num_segments_ > 1) ||
-      (enc->method_ <= 1);  // for method 0 - 1, we need preds_[] to be filled.
+      enc->config->emulate_jpeg_size ||   // We need the complexity evaluation.
+      (enc->segment_hdr.num_segments > 1) ||
+      (enc->method <= 1);  // for method 0 - 1, we need preds[] to be filled.
  if (do_segments) {
-    const int last_row = enc->mb_h_;
-    const int total_mb = last_row * enc->mb_w_;
+    const int last_row = enc->mb_h;
+    const int total_mb = last_row * enc->mb_w;
 #ifdef WEBP_USE_THREAD
    // We give a little more than a half work to the main thread.
    const int split_row = (9 * last_row + 15) >> 4;
    const int kMinSplitRow = 2;  // minimal rows needed for mt to be worth it
-    const int do_mt = (enc->thread_level_ > 0) && (split_row >= kMinSplitRow);
+    const int do_mt = (enc->thread_level > 0) && (split_row >= kMinSplitRow);
 #else
    const int do_mt = 0;
 #endif
@@ -467,17 +471,16 @@ int VP8EncAnalyze(VP8Encoder* const enc) {
    }
    worker_interface->End(&main_job.worker);
    if (ok) {
-      enc->alpha_ = main_job.alpha / total_mb;
-      enc->uv_alpha_ = main_job.uv_alpha / total_mb;
+      enc->alpha = main_job.alpha / total_mb;
+      enc->uv_alpha = main_job.uv_alpha / total_mb;
      AssignSegments(enc, main_job.alphas);
    }
  } else {   // Use only one default segment.
    ResetAllMBInfo(enc);
  }
  if (!ok) {
-    return WebPEncodingSetError(enc->pic_,
+    return WebPEncodingSetError(enc->pic,
                                VP8_ENC_ERROR_OUT_OF_MEMORY);  // imprecise
  }
  return ok;
 }
-
--- a/thirdparty/libwebp/src/enc/backward_references_cost_enc.c
+++ b/thirdparty/libwebp/src/enc/backward_references_cost_enc.c
@@ -22,6 +22,8 @@
 #include "src/enc/histogram_enc.h"
 #include "src/utils/color_cache_utils.h"
 #include "src/utils/utils.h"
+#include "src/webp/format_constants.h"
+#include "src/webp/types.h"

 #define VALUES_IN_BYTE 256

@@ -31,11 +33,11 @@ extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
                                      const PixOrCopy v);

 typedef struct {
-  uint32_t alpha_[VALUES_IN_BYTE];
-  uint32_t red_[VALUES_IN_BYTE];
-  uint32_t blue_[VALUES_IN_BYTE];
-  uint32_t distance_[NUM_DISTANCE_CODES];
-  uint32_t* literal_;
+  uint32_t alpha[VALUES_IN_BYTE];
+  uint32_t red[VALUES_IN_BYTE];
+  uint32_t blue[VALUES_IN_BYTE];
+  uint32_t distance[NUM_DISTANCE_CODES];
+  uint32_t* literal;
 } CostModel;

 static void ConvertPopulationCountTableToBitEstimates(
@@ -62,30 +64,25 @@ static void ConvertPopulationCountTableToBitEstimates(
 static int CostModelBuild(CostModel* const m, int xsize, int cache_bits,
                          const VP8LBackwardRefs* const refs) {
  int ok = 0;
-  VP8LRefsCursor c = VP8LRefsCursorInit(refs);
  VP8LHistogram* const histo = VP8LAllocateHistogram(cache_bits);
  if (histo == NULL) goto Error;

  // The following code is similar to VP8LHistogramCreate but converts the
  // distance to plane code.
  VP8LHistogramInit(histo, cache_bits, /*init_arrays=*/ 1);
-  while (VP8LRefsCursorOk(&c)) {
-    VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos, VP8LDistanceToPlaneCode,
-                                    xsize);
-    VP8LRefsCursorNext(&c);
-  }
+  VP8LHistogramStoreRefs(refs, VP8LDistanceToPlaneCode, xsize, histo);

  ConvertPopulationCountTableToBitEstimates(
-      VP8LHistogramNumCodes(histo->palette_code_bits_), histo->literal_,
-      m->literal_);
+      VP8LHistogramNumCodes(histo->palette_code_bits), histo->literal,
+      m->literal);
  ConvertPopulationCountTableToBitEstimates(
-      VALUES_IN_BYTE, histo->red_, m->red_);
+      VALUES_IN_BYTE, histo->red, m->red);
  ConvertPopulationCountTableToBitEstimates(
-      VALUES_IN_BYTE, histo->blue_, m->blue_);
+      VALUES_IN_BYTE, histo->blue, m->blue);
  ConvertPopulationCountTableToBitEstimates(
-      VALUES_IN_BYTE, histo->alpha_, m->alpha_);
+      VALUES_IN_BYTE, histo->alpha, m->alpha);
  ConvertPopulationCountTableToBitEstimates(
-      NUM_DISTANCE_CODES, histo->distance_, m->distance_);
+      NUM_DISTANCE_CODES, histo->distance, m->distance);
  ok = 1;

 Error:
@@ -95,21 +92,21 @@ static int CostModelBuild(CostModel* const m, int xsize, int cache_bits,

 static WEBP_INLINE int64_t GetLiteralCost(const CostModel* const m,
                                          uint32_t v) {
-  return (int64_t)m->alpha_[v >> 24] + m->red_[(v >> 16) & 0xff] +
-         m->literal_[(v >> 8) & 0xff] + m->blue_[v & 0xff];
+  return (int64_t)m->alpha[v >> 24] + m->red[(v >> 16) & 0xff] +
+         m->literal[(v >> 8) & 0xff] + m->blue[v & 0xff];
 }

 static WEBP_INLINE int64_t GetCacheCost(const CostModel* const m,
                                        uint32_t idx) {
  const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx;
-  return (int64_t)m->literal_[literal_idx];
+  return (int64_t)m->literal[literal_idx];
 }

 static WEBP_INLINE int64_t GetLengthCost(const CostModel* const m,
                                         uint32_t length) {
  int code, extra_bits;
  VP8LPrefixEncodeBits(length, &code, &extra_bits);
-  return (int64_t)m->literal_[VALUES_IN_BYTE + code] +
+  return (int64_t)m->literal[VALUES_IN_BYTE + code] +
         ((int64_t)extra_bits << LOG_2_PRECISION_BITS);
 }

@@ -117,7 +114,7 @@ static WEBP_INLINE int64_t GetDistanceCost(const CostModel* const m,
                                           uint32_t distance) {
  int code, extra_bits;
  VP8LPrefixEncodeBits(distance, &code, &extra_bits);
-  return (int64_t)m->distance_[code] +
+  return (int64_t)m->distance[code] +
         ((int64_t)extra_bits << LOG_2_PRECISION_BITS);
 }

@@ -147,84 +144,84 @@ static WEBP_INLINE void AddSingleLiteralWithCostModel(
 // Empirical value to avoid high memory consumption but good for performance.
 #define COST_CACHE_INTERVAL_SIZE_MAX 500

-// To perform backward reference every pixel at index index_ is considered and
+// To perform backward reference every pixel at index 'index' is considered and
 // the cost for the MAX_LENGTH following pixels computed. Those following pixels
-// at index index_ + k (k from 0 to MAX_LENGTH) have a cost of:
-//     cost_ = distance cost at index + GetLengthCost(cost_model, k)
+// at index 'index' + k (k from 0 to MAX_LENGTH) have a cost of:
+//     cost = distance cost at index + GetLengthCost(cost_model, k)
 // and the minimum value is kept. GetLengthCost(cost_model, k) is cached in an
 // array of size MAX_LENGTH.
 // Instead of performing MAX_LENGTH comparisons per pixel, we keep track of the
 // minimal values using intervals of constant cost.
-// An interval is defined by the index_ of the pixel that generated it and
-// is only useful in a range of indices from start_ to end_ (exclusive), i.e.
-// it contains the minimum value for pixels between start_ and end_.
-// Intervals are stored in a linked list and ordered by start_. When a new
+// An interval is defined by the 'index' of the pixel that generated it and
+// is only useful in a range of indices from 'start' to 'end' (exclusive), i.e.
+// it contains the minimum value for pixels between start and end.
+// Intervals are stored in a linked list and ordered by 'start'. When a new
 // interval has a better value, old intervals are split or removed. There are
 // therefore no overlapping intervals.
 typedef struct CostInterval CostInterval;
 struct CostInterval {
-  int64_t cost_;
-  int start_;
-  int end_;
-  int index_;
-  CostInterval* previous_;
-  CostInterval* next_;
+  int64_t cost;
+  int start;
+  int end;
+  int index;
+  CostInterval* previous;
+  CostInterval* next;
 };

 // The GetLengthCost(cost_model, k) are cached in a CostCacheInterval.
 typedef struct {
-  int64_t cost_;
-  int start_;
-  int end_;       // Exclusive.
+  int64_t cost;
+  int start;
+  int end;       // Exclusive.
 } CostCacheInterval;

 // This structure is in charge of managing intervals and costs.
 // It caches the different CostCacheInterval, caches the different
-// GetLengthCost(cost_model, k) in cost_cache_ and the CostInterval's (whose
-// count_ is limited by COST_CACHE_INTERVAL_SIZE_MAX).
+// GetLengthCost(cost_model, k) in cost_cache and the CostInterval's (whose
+// 'count' is limited by COST_CACHE_INTERVAL_SIZE_MAX).
 #define COST_MANAGER_MAX_FREE_LIST 10
 typedef struct {
-  CostInterval* head_;
-  int count_;  // The number of stored intervals.
-  CostCacheInterval* cache_intervals_;
-  size_t cache_intervals_size_;
+  CostInterval* head;
+  int count;  // The number of stored intervals.
+  CostCacheInterval* cache_intervals;
+  size_t cache_intervals_size;
  // Contains the GetLengthCost(cost_model, k).
-  int64_t cost_cache_[MAX_LENGTH];
-  int64_t* costs_;
-  uint16_t* dist_array_;
+  int64_t cost_cache[MAX_LENGTH];
+  int64_t* costs;
+  uint16_t* dist_array;
  // Most of the time, we only need few intervals -> use a free-list, to avoid
  // fragmentation with small allocs in most common cases.
-  CostInterval intervals_[COST_MANAGER_MAX_FREE_LIST];
-  CostInterval* free_intervals_;
+  CostInterval intervals[COST_MANAGER_MAX_FREE_LIST];
+  CostInterval* free_intervals;
  // These are regularly malloc'd remains. This list can't grow larger than than
  // size COST_CACHE_INTERVAL_SIZE_MAX - COST_MANAGER_MAX_FREE_LIST, note.
-  CostInterval* recycled_intervals_;
+  CostInterval* recycled_intervals;
 } CostManager;

 static void CostIntervalAddToFreeList(CostManager* const manager,
                                      CostInterval* const interval) {
-  interval->next_ = manager->free_intervals_;
-  manager->free_intervals_ = interval;
+  interval->next = manager->free_intervals;
+  manager->free_intervals = interval;
 }

 static int CostIntervalIsInFreeList(const CostManager* const manager,
                                    const CostInterval* const interval) {
-  return (interval >= &manager->intervals_[0] &&
-          interval <= &manager->intervals_[COST_MANAGER_MAX_FREE_LIST - 1]);
+  return (interval >= &manager->intervals[0] &&
+          interval <= &manager->intervals[COST_MANAGER_MAX_FREE_LIST - 1]);
 }

 static void CostManagerInitFreeList(CostManager* const manager) {
  int i;
-  manager->free_intervals_ = NULL;
+  manager->free_intervals = NULL;
  for (i = 0; i < COST_MANAGER_MAX_FREE_LIST; ++i) {
-    CostIntervalAddToFreeList(manager, &manager->intervals_[i]);
+    CostIntervalAddToFreeList(manager, &manager->intervals[i]);
  }
 }

 static void DeleteIntervalList(CostManager* const manager,
                               const CostInterval* interval) {
  while (interval != NULL) {
-    const CostInterval* const next = interval->next_;
+    const CostInterval* const next = interval->next;
    if (!CostIntervalIsInFreeList(manager, interval)) {
      WebPSafeFree((void*)interval);
    }  // else: do nothing
@@ -235,16 +232,16 @@ static void DeleteIntervalList(CostManager* const manager,
 static void CostManagerClear(CostManager* const manager) {
  if (manager == NULL) return;

-  WebPSafeFree(manager->costs_);
-  WebPSafeFree(manager->cache_intervals_);
+  WebPSafeFree(manager->costs);
+  WebPSafeFree(manager->cache_intervals);

  // Clear the interval lists.
-  DeleteIntervalList(manager, manager->head_);
-  manager->head_ = NULL;
-  DeleteIntervalList(manager, manager->recycled_intervals_);
-  manager->recycled_intervals_ = NULL;
+  DeleteIntervalList(manager, manager->head);
+  manager->head = NULL;
+  DeleteIntervalList(manager, manager->recycled_intervals);
+  manager->recycled_intervals = NULL;

-  // Reset pointers, count_ and cache_intervals_size_.
+  // Reset pointers, 'count' and 'cache_intervals_size'.
  memset(manager, 0, sizeof(*manager));
  CostManagerInitFreeList(manager);
 }
@@ -255,25 +252,25 @@ static int CostManagerInit(CostManager* const manager,
  int i;
  const int cost_cache_size = (pix_count > MAX_LENGTH) ? MAX_LENGTH : pix_count;

-  manager->costs_ = NULL;
-  manager->cache_intervals_ = NULL;
-  manager->head_ = NULL;
-  manager->recycled_intervals_ = NULL;
-  manager->count_ = 0;
-  manager->dist_array_ = dist_array;
+  manager->costs = NULL;
+  manager->cache_intervals = NULL;
+  manager->head = NULL;
+  manager->recycled_intervals = NULL;
+  manager->count = 0;
+  manager->dist_array = dist_array;
  CostManagerInitFreeList(manager);

-  // Fill in the cost_cache_.
+  // Fill in the 'cost_cache'.
  // Has to be done in two passes due to a GCC bug on i686
  // related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323
  for (i = 0; i < cost_cache_size; ++i) {
-    manager->cost_cache_[i] = GetLengthCost(cost_model, i);
+    manager->cost_cache[i] = GetLengthCost(cost_model, i);
  }
-  manager->cache_intervals_size_ = 1;
+  manager->cache_intervals_size = 1;
  for (i = 1; i < cost_cache_size; ++i) {
    // Get the number of bound intervals.
-    if (manager->cost_cache_[i] != manager->cost_cache_[i - 1]) {
-      ++manager->cache_intervals_size_;
+    if (manager->cost_cache[i] != manager->cost_cache[i - 1]) {
+      ++manager->cache_intervals_size;
    }
  }

@@ -281,46 +278,46 @@ static int CostManagerInit(CostManager* const manager,
  // The worst case scenario with a cost model would be if every length has a
  // different cost, hence MAX_LENGTH but that is impossible with the current
  // implementation that spirals around a pixel.
-  assert(manager->cache_intervals_size_ <= MAX_LENGTH);
-  manager->cache_intervals_ = (CostCacheInterval*)WebPSafeMalloc(
-      manager->cache_intervals_size_, sizeof(*manager->cache_intervals_));
-  if (manager->cache_intervals_ == NULL) {
+  assert(manager->cache_intervals_size <= MAX_LENGTH);
+  manager->cache_intervals = (CostCacheInterval*)WebPSafeMalloc(
+      manager->cache_intervals_size, sizeof(*manager->cache_intervals));
+  if (manager->cache_intervals == NULL) {
    CostManagerClear(manager);
    return 0;
  }

-  // Fill in the cache_intervals_.
+  // Fill in the 'cache_intervals'.
  {
-    CostCacheInterval* cur = manager->cache_intervals_;
+    CostCacheInterval* cur = manager->cache_intervals;

-    // Consecutive values in cost_cache_ are compared and if a big enough
+    // Consecutive values in 'cost_cache' are compared and if a big enough
    // difference is found, a new interval is created and bounded.
-    cur->start_ = 0;
-    cur->end_ = 1;
-    cur->cost_ = manager->cost_cache_[0];
+    cur->start = 0;
+    cur->end = 1;
+    cur->cost = manager->cost_cache[0];
    for (i = 1; i < cost_cache_size; ++i) {
-      const int64_t cost_val = manager->cost_cache_[i];
-      if (cost_val != cur->cost_) {
+      const int64_t cost_val = manager->cost_cache[i];
+      if (cost_val != cur->cost) {
        ++cur;
        // Initialize an interval.
-        cur->start_ = i;
-        cur->cost_ = cost_val;
+        cur->start = i;
+        cur->cost = cost_val;
      }
-      cur->end_ = i + 1;
+      cur->end = i + 1;
    }
-    assert((size_t)(cur - manager->cache_intervals_) + 1 ==
-           manager->cache_intervals_size_);
+    assert((size_t)(cur - manager->cache_intervals) + 1 ==
+           manager->cache_intervals_size);
  }

-  manager->costs_ =
-      (int64_t*)WebPSafeMalloc(pix_count, sizeof(*manager->costs_));
-  if (manager->costs_ == NULL) {
+  manager->costs =
+      (int64_t*)WebPSafeMalloc(pix_count, sizeof(*manager->costs));
+  if (manager->costs == NULL) {
    CostManagerClear(manager);
    return 0;
  }
-  // Set the initial costs_ to INT64_MAX for every pixel as we will keep the
+  // Set the initial 'costs' to INT64_MAX for every pixel as we will keep the
  // minimum.
-  for (i = 0; i < pix_count; ++i) manager->costs_[i] = WEBP_INT64_MAX;
+  for (i = 0; i < pix_count; ++i) manager->costs[i] = WEBP_INT64_MAX;

  return 1;
 }
@@ -332,9 +329,9 @@ static WEBP_INLINE void UpdateCost(CostManager* const manager, int i,
  const int k = i - position;
  assert(k >= 0 && k < MAX_LENGTH);

-  if (manager->costs_[i] > cost) {
-    manager->costs_[i] = cost;
-    manager->dist_array_[i] = k + 1;
+  if (manager->costs[i] > cost) {
+    manager->costs[i] = cost;
+    manager->dist_array[i] = k + 1;
  }
 }

@@ -352,12 +349,12 @@ static WEBP_INLINE void ConnectIntervals(CostManager* const manager,
                                         CostInterval* const prev,
                                         CostInterval* const next) {
  if (prev != NULL) {
-    prev->next_ = next;
+    prev->next = next;
  } else {
-    manager->head_ = next;
+    manager->head = next;
  }

-  if (next != NULL) next->previous_ = prev;
+  if (next != NULL) next->previous = prev;
 }

 // Pop an interval in the manager.
@@ -365,15 +362,15 @@ static WEBP_INLINE void PopInterval(CostManager* const manager,
                                    CostInterval* const interval) {
  if (interval == NULL) return;

-  ConnectIntervals(manager, interval->previous_, interval->next_);
+  ConnectIntervals(manager, interval->previous, interval->next);
  if (CostIntervalIsInFreeList(manager, interval)) {
    CostIntervalAddToFreeList(manager, interval);
  } else {  // recycle regularly malloc'd intervals too
-    interval->next_ = manager->recycled_intervals_;
-    manager->recycled_intervals_ = interval;
+    interval->next = manager->recycled_intervals;
+    manager->recycled_intervals = interval;
  }
-  --manager->count_;
-  assert(manager->count_ >= 0);
+  --manager->count;
+  assert(manager->count >= 0);
 }

 // Update the cost at index i by going over all the stored intervals that
@@ -382,17 +379,17 @@ static WEBP_INLINE void PopInterval(CostManager* const manager,
 // end before 'i' will be popped.
 static WEBP_INLINE void UpdateCostAtIndex(CostManager* const manager, int i,
                                          int do_clean_intervals) {
-  CostInterval* current = manager->head_;
+  CostInterval* current = manager->head;

-  while (current != NULL && current->start_ <= i) {
-    CostInterval* const next = current->next_;
-    if (current->end_ <= i) {
+  while (current != NULL && current->start <= i) {
+    CostInterval* const next = current->next;
+    if (current->end <= i) {
      if (do_clean_intervals) {
        // We have an outdated interval, remove it.
        PopInterval(manager, current);
      }
    } else {
-      UpdateCost(manager, i, current->index_, current->cost_);
+      UpdateCost(manager, i, current->index, current->cost);
    }
    current = next;
  }
@@ -400,31 +397,31 @@ static WEBP_INLINE void UpdateCostAtIndex(CostManager* const manager, int i,

 // Given a current orphan interval and its previous interval, before
 // it was orphaned (which can be NULL), set it at the right place in the list
-// of intervals using the start_ ordering and the previous interval as a hint.
+// of intervals using the 'start' ordering and the previous interval as a hint.
 static WEBP_INLINE void PositionOrphanInterval(CostManager* const manager,
                                               CostInterval* const current,
                                               CostInterval* previous) {
  assert(current != NULL);

-  if (previous == NULL) previous = manager->head_;
-  while (previous != NULL && current->start_ < previous->start_) {
-    previous = previous->previous_;
+  if (previous == NULL) previous = manager->head;
+  while (previous != NULL && current->start < previous->start) {
+    previous = previous->previous;
  }
-  while (previous != NULL && previous->next_ != NULL &&
-         previous->next_->start_ < current->start_) {
-    previous = previous->next_;
+  while (previous != NULL && previous->next != NULL &&
+         previous->next->start < current->start) {
+    previous = previous->next;
  }

  if (previous != NULL) {
-    ConnectIntervals(manager, current, previous->next_);
+    ConnectIntervals(manager, current, previous->next);
  } else {
-    ConnectIntervals(manager, current, manager->head_);
+    ConnectIntervals(manager, current, manager->head);
  }
  ConnectIntervals(manager, previous, current);
 }

 // Insert an interval in the list contained in the manager by starting at
-// interval_in as a hint. The intervals are sorted by start_ value.
+// 'interval_in' as a hint. The intervals are sorted by 'start' value.
 static WEBP_INLINE void InsertInterval(CostManager* const manager,
                                       CostInterval* const interval_in,
                                       int64_t cost, int position, int start,
@@ -432,17 +429,17 @@ static WEBP_INLINE void InsertInterval(CostManager* const manager,
  CostInterval* interval_new;

  if (start >= end) return;
-  if (manager->count_ >= COST_CACHE_INTERVAL_SIZE_MAX) {
+  if (manager->count >= COST_CACHE_INTERVAL_SIZE_MAX) {
    // Serialize the interval if we cannot store it.
    UpdateCostPerInterval(manager, start, end, position, cost);
    return;
  }
-  if (manager->free_intervals_ != NULL) {
-    interval_new = manager->free_intervals_;
-    manager->free_intervals_ = interval_new->next_;
-  } else if (manager->recycled_intervals_ != NULL) {
-    interval_new = manager->recycled_intervals_;
-    manager->recycled_intervals_ = interval_new->next_;
+  if (manager->free_intervals != NULL) {
+    interval_new = manager->free_intervals;
+    manager->free_intervals = interval_new->next;
+  } else if (manager->recycled_intervals != NULL) {
+    interval_new = manager->recycled_intervals;
+    manager->recycled_intervals = interval_new->next;
  } else {  // malloc for good
    interval_new = (CostInterval*)WebPSafeMalloc(1, sizeof(*interval_new));
    if (interval_new == NULL) {
@@ -452,13 +449,13 @@ static WEBP_INLINE void InsertInterval(CostManager* const manager,
    }
  }

-  interval_new->cost_ = cost;
-  interval_new->index_ = position;
-  interval_new->start_ = start;
-  interval_new->end_ = end;
+  interval_new->cost = cost;
+  interval_new->index = position;
+  interval_new->start = start;
+  interval_new->end = end;
  PositionOrphanInterval(manager, interval_new, interval_in);

-  ++manager->count_;
+  ++manager->count;
 }

 // Given a new cost interval defined by its start at position, its length value
@@ -469,10 +466,10 @@ static WEBP_INLINE void PushInterval(CostManager* const manager,
                                     int64_t distance_cost, int position,
                                     int len) {
  size_t i;
-  CostInterval* interval = manager->head_;
+  CostInterval* interval = manager->head;
  CostInterval* interval_next;
  const CostCacheInterval* const cost_cache_intervals =
-      manager->cache_intervals_;
+      manager->cache_intervals;
  // If the interval is small enough, no need to deal with the heavy
  // interval logic, just serialize it right away. This constant is empirical.
  const int kSkipDistance = 10;
@@ -483,84 +480,84 @@ static WEBP_INLINE void PushInterval(CostManager* const manager,
      const int k = j - position;
      int64_t cost_tmp;
      assert(k >= 0 && k < MAX_LENGTH);
-      cost_tmp = distance_cost + manager->cost_cache_[k];
+      cost_tmp = distance_cost + manager->cost_cache[k];

-      if (manager->costs_[j] > cost_tmp) {
-        manager->costs_[j] = cost_tmp;
-        manager->dist_array_[j] = k + 1;
+      if (manager->costs[j] > cost_tmp) {
+        manager->costs[j] = cost_tmp;
+        manager->dist_array[j] = k + 1;
      }
    }
    return;
  }

-  for (i = 0; i < manager->cache_intervals_size_ &&
-              cost_cache_intervals[i].start_ < len;
+  for (i = 0; i < manager->cache_intervals_size &&
+              cost_cache_intervals[i].start < len;
       ++i) {
    // Define the intersection of the ith interval with the new one.
-    int start = position + cost_cache_intervals[i].start_;
-    const int end = position + (cost_cache_intervals[i].end_ > len
+    int start = position + cost_cache_intervals[i].start;
+    const int end = position + (cost_cache_intervals[i].end > len
                                 ? len
-                                 : cost_cache_intervals[i].end_);
-    const int64_t cost = distance_cost + cost_cache_intervals[i].cost_;
+                                 : cost_cache_intervals[i].end);
+    const int64_t cost = distance_cost + cost_cache_intervals[i].cost;

-    for (; interval != NULL && interval->start_ < end;
+    for (; interval != NULL && interval->start < end;
         interval = interval_next) {
-      interval_next = interval->next_;
+      interval_next = interval->next;

      // Make sure we have some overlap
-      if (start >= interval->end_) continue;
+      if (start >= interval->end) continue;

-      if (cost >= interval->cost_) {
+      if (cost >= interval->cost) {
        // When intervals are represented, the lower, the better.
        // [**********************************************************[
        // start                                                    end
        //                   [----------------------------------[
-        //                   interval->start_       interval->end_
+        //                   interval->start        interval->end
        // If we are worse than what we already have, add whatever we have so
        // far up to interval.
-        const int start_new = interval->end_;
+        const int start_new = interval->end;
        InsertInterval(manager, interval, cost, position, start,
-                       interval->start_);
+                       interval->start);
        start = start_new;
        if (start >= end) break;
        continue;
      }

-      if (start <= interval->start_) {
-        if (interval->end_ <= end) {
+      if (start <= interval->start) {
+        if (interval->end <= end) {
          //                   [----------------------------------[
-          //                   interval->start_       interval->end_
+          //                   interval->start        interval->end
          // [**************************************************************[
          // start                                                        end
          // We can safely remove the old interval as it is fully included.
          PopInterval(manager, interval);
        } else {
          //              [------------------------------------[
-          //              interval->start_        interval->end_
+          //              interval->start          interval->end
          // [*****************************[
          // start                       end
-          interval->start_ = end;
+          interval->start = end;
          break;
        }
      } else {
-        if (end < interval->end_) {
+        if (end < interval->end) {
          // [--------------------------------------------------------------[
-          // interval->start_                                  interval->end_
+          // interval->start                                    interval->end
          //                     [*****************************[
          //                     start                       end
          // We have to split the old interval as it fully contains the new one.
-          const int end_original = interval->end_;
-          interval->end_ = start;
-          InsertInterval(manager, interval, interval->cost_, interval->index_,
+          const int end_original = interval->end;
+          interval->end = start;
+          InsertInterval(manager, interval, interval->cost, interval->index,
                         end, end_original);
-          interval = interval->next_;
+          interval = interval->next;
          break;
        } else {
          // [------------------------------------[
-          // interval->start_        interval->end_
+          // interval->start          interval->end
          //                     [*****************************[
          //                     start                       end
-          interval->end_ = start;
+          interval->end = start;
        }
      }
    }
@@ -579,7 +576,7 @@ static int BackwardReferencesHashChainDistanceOnly(
  const int pix_count = xsize * ysize;
  const int use_color_cache = (cache_bits > 0);
  const size_t literal_array_size =
-      sizeof(*((CostModel*)NULL)->literal_) * VP8LHistogramNumCodes(cache_bits);
+      sizeof(*((CostModel*)NULL)->literal) * VP8LHistogramNumCodes(cache_bits);
  const size_t cost_model_size = sizeof(CostModel) + literal_array_size;
  CostModel* const cost_model =
      (CostModel*)WebPSafeCalloc(1ULL, cost_model_size);
@@ -593,7 +590,7 @@ static int BackwardReferencesHashChainDistanceOnly(

  if (cost_model == NULL || cost_manager == NULL) goto Error;

-  cost_model->literal_ = (uint32_t*)(cost_model + 1);
+  cost_model->literal = (uint32_t*)(cost_model + 1);
  if (use_color_cache) {
    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
    if (!cc_init) goto Error;
@@ -613,17 +610,17 @@ static int BackwardReferencesHashChainDistanceOnly(
  // Add first pixel as literal.
  AddSingleLiteralWithCostModel(argb, &hashers, cost_model, /*idx=*/0,
                                use_color_cache, /*prev_cost=*/0,
-                                cost_manager->costs_, dist_array);
+                                cost_manager->costs, dist_array);

  for (i = 1; i < pix_count; ++i) {
-    const int64_t prev_cost = cost_manager->costs_[i - 1];
+    const int64_t prev_cost = cost_manager->costs[i - 1];
    int offset, len;
    VP8LHashChainFindCopy(hash_chain, i, &offset, &len);

    // Try adding the pixel as a literal.
    AddSingleLiteralWithCostModel(argb, &hashers, cost_model, i,
                                  use_color_cache, prev_cost,
-                                  cost_manager->costs_, dist_array);
+                                  cost_manager->costs, dist_array);

    // If we are dealing with a non-literal.
    if (len >= 2) {
@@ -671,7 +668,7 @@ static int BackwardReferencesHashChainDistanceOnly(
          UpdateCostAtIndex(cost_manager, j - 1, 0);
          UpdateCostAtIndex(cost_manager, j, 0);

-          PushInterval(cost_manager, cost_manager->costs_[j - 1] + offset_cost,
+          PushInterval(cost_manager, cost_manager->costs[j - 1] + offset_cost,
                       j, len_j);
          reach = j + len_j - 1;
        }
@@ -683,7 +680,7 @@ static int BackwardReferencesHashChainDistanceOnly(
    len_prev = len;
  }

-  ok = !refs->error_;
+  ok = !refs->error;
 Error:
  if (cc_init) VP8LColorCacheClear(&hashers);
  CostManagerClear(cost_manager);
@@ -756,7 +753,7 @@ static int BackwardReferencesHashChainFollowChosenPath(
      ++i;
    }
  }
-  ok = !refs->error_;
+  ok = !refs->error;
 Error:
  if (cc_init) VP8LColorCacheClear(&hashers);
  return ok;
--- a/thirdparty/libwebp/src/enc/backward_references_enc.c
+++ b/thirdparty/libwebp/src/enc/backward_references_enc.c
@@ -13,8 +13,9 @@
 #include "src/enc/backward_references_enc.h"

 #include <assert.h>
+#include <string.h>

-#include "src/dsp/dsp.h"
+#include "src/dsp/cpu.h"
 #include "src/dsp/lossless.h"
 #include "src/dsp/lossless_common.h"
 #include "src/enc/histogram_enc.h"
@@ -22,6 +23,8 @@
 #include "src/utils/color_cache_utils.h"
 #include "src/utils/utils.h"
 #include "src/webp/encode.h"
+#include "src/webp/format_constants.h"
+#include "src/webp/types.h"

 #define MIN_BLOCK_SIZE 256  // minimum block size for backward references

@@ -76,30 +79,30 @@ static WEBP_INLINE int FindMatchLength(const uint32_t* const array1,
 //  VP8LBackwardRefs

 struct PixOrCopyBlock {
-  PixOrCopyBlock* next_;   // next block (or NULL)
-  PixOrCopy* start_;       // data start
-  int size_;               // currently used size
+  PixOrCopyBlock* next;   // next block (or NULL)
+  PixOrCopy* start;       // data start
+  int size;               // currently used size
 };

 extern void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs);
 void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs) {
  assert(refs != NULL);
-  if (refs->tail_ != NULL) {
-    *refs->tail_ = refs->free_blocks_;  // recycle all blocks at once
+  if (refs->tail != NULL) {
+    *refs->tail = refs->free_blocks;  // recycle all blocks at once
  }
-  refs->free_blocks_ = refs->refs_;
-  refs->tail_ = &refs->refs_;
-  refs->last_block_ = NULL;
-  refs->refs_ = NULL;
+  refs->free_blocks = refs->refs;
+  refs->tail = &refs->refs;
+  refs->last_block = NULL;
+  refs->refs = NULL;
 }

 void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs) {
  assert(refs != NULL);
  VP8LClearBackwardRefs(refs);
-  while (refs->free_blocks_ != NULL) {
-    PixOrCopyBlock* const next = refs->free_blocks_->next_;
-    WebPSafeFree(refs->free_blocks_);
-    refs->free_blocks_ = next;
+  while (refs->free_blocks != NULL) {
+    PixOrCopyBlock* const next = refs->free_blocks->next;
+    WebPSafeFree(refs->free_blocks);
+    refs->free_blocks = next;
  }
 }

@@ -107,79 +110,79 @@ void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs) {
 static void BackwardRefsSwap(VP8LBackwardRefs* const refs1,
                             VP8LBackwardRefs* const refs2) {
  const int point_to_refs1 =
-      (refs1->tail_ != NULL && refs1->tail_ == &refs1->refs_);
+      (refs1->tail != NULL && refs1->tail == &refs1->refs);
  const int point_to_refs2 =
-      (refs2->tail_ != NULL && refs2->tail_ == &refs2->refs_);
+      (refs2->tail != NULL && refs2->tail == &refs2->refs);
  const VP8LBackwardRefs tmp = *refs1;
  *refs1 = *refs2;
  *refs2 = tmp;
-  if (point_to_refs2) refs1->tail_ = &refs1->refs_;
-  if (point_to_refs1) refs2->tail_ = &refs2->refs_;
+  if (point_to_refs2) refs1->tail = &refs1->refs;
+  if (point_to_refs1) refs2->tail = &refs2->refs;
 }

 void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size) {
  assert(refs != NULL);
  memset(refs, 0, sizeof(*refs));
-  refs->tail_ = &refs->refs_;
-  refs->block_size_ =
+  refs->tail = &refs->refs;
+  refs->block_size =
      (block_size < MIN_BLOCK_SIZE) ? MIN_BLOCK_SIZE : block_size;
 }

 VP8LRefsCursor VP8LRefsCursorInit(const VP8LBackwardRefs* const refs) {
  VP8LRefsCursor c;
-  c.cur_block_ = refs->refs_;
-  if (refs->refs_ != NULL) {
-    c.cur_pos = c.cur_block_->start_;
-    c.last_pos_ = c.cur_pos + c.cur_block_->size_;
+  c.cur_block = refs->refs;
+  if (refs->refs != NULL) {
+    c.cur_pos = c.cur_block->start;
+    c.last_pos = c.cur_pos + c.cur_block->size;
  } else {
    c.cur_pos = NULL;
-    c.last_pos_ = NULL;
+    c.last_pos = NULL;
  }
  return c;
 }

 void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c) {
-  PixOrCopyBlock* const b = c->cur_block_->next_;
-  c->cur_pos = (b == NULL) ? NULL : b->start_;
-  c->last_pos_ = (b == NULL) ? NULL : b->start_ + b->size_;
-  c->cur_block_ = b;
+  PixOrCopyBlock* const b = c->cur_block->next;
+  c->cur_pos = (b == NULL) ? NULL : b->start;
+  c->last_pos = (b == NULL) ? NULL : b->start + b->size;
+  c->cur_block = b;
 }

 // Create a new block, either from the free list or allocated
 static PixOrCopyBlock* BackwardRefsNewBlock(VP8LBackwardRefs* const refs) {
-  PixOrCopyBlock* b = refs->free_blocks_;
+  PixOrCopyBlock* b = refs->free_blocks;
  if (b == NULL) {   // allocate new memory chunk
    const size_t total_size =
-        sizeof(*b) + refs->block_size_ * sizeof(*b->start_);
+        sizeof(*b) + refs->block_size * sizeof(*b->start);
    b = (PixOrCopyBlock*)WebPSafeMalloc(1ULL, total_size);
    if (b == NULL) {
-      refs->error_ |= 1;
+      refs->error |= 1;
      return NULL;
    }
-    b->start_ = (PixOrCopy*)((uint8_t*)b + sizeof(*b));  // not always aligned
+    b->start = (PixOrCopy*)((uint8_t*)b + sizeof(*b));  // not always aligned
  } else {  // recycle from free-list
-    refs->free_blocks_ = b->next_;
+    refs->free_blocks = b->next;
  }
-  *refs->tail_ = b;
-  refs->tail_ = &b->next_;
-  refs->last_block_ = b;
-  b->next_ = NULL;
-  b->size_ = 0;
+  *refs->tail = b;
+  refs->tail = &b->next;
+  refs->last_block = b;
+  b->next = NULL;
+  b->size = 0;
  return b;
 }

 // Return 1 on success, 0 on error.
 static int BackwardRefsClone(const VP8LBackwardRefs* const from,
                             VP8LBackwardRefs* const to) {
-  const PixOrCopyBlock* block_from = from->refs_;
+  const PixOrCopyBlock* block_from = from->refs;
  VP8LClearBackwardRefs(to);
  while (block_from != NULL) {
    PixOrCopyBlock* const block_to = BackwardRefsNewBlock(to);
    if (block_to == NULL) return 0;
-    memcpy(block_to->start_, block_from->start_,
-           block_from->size_ * sizeof(PixOrCopy));
-    block_to->size_ = block_from->size_;
-    block_from = block_from->next_;
+    memcpy(block_to->start, block_from->start,
+           block_from->size * sizeof(PixOrCopy));
+    block_to->size = block_from->size;
+    block_from = block_from->next;
  }
  return 1;
 }
@@ -188,35 +191,35 @@ extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
                                      const PixOrCopy v);
 void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
                               const PixOrCopy v) {
-  PixOrCopyBlock* b = refs->last_block_;
-  if (b == NULL || b->size_ == refs->block_size_) {
+  PixOrCopyBlock* b = refs->last_block;
+  if (b == NULL || b->size == refs->block_size) {
    b = BackwardRefsNewBlock(refs);
-    if (b == NULL) return;   // refs->error_ is set
+    if (b == NULL) return;   // refs->error is set
  }
-  b->start_[b->size_++] = v;
+  b->start[b->size++] = v;
 }

 // -----------------------------------------------------------------------------
 // Hash chains

 int VP8LHashChainInit(VP8LHashChain* const p, int size) {
-  assert(p->size_ == 0);
-  assert(p->offset_length_ == NULL);
+  assert(p->size == 0);
+  assert(p->offset_length == NULL);
  assert(size > 0);
-  p->offset_length_ =
-      (uint32_t*)WebPSafeMalloc(size, sizeof(*p->offset_length_));
-  if (p->offset_length_ == NULL) return 0;
-  p->size_ = size;
+  p->offset_length =
+      (uint32_t*)WebPSafeMalloc(size, sizeof(*p->offset_length));
+  if (p->offset_length == NULL) return 0;
+  p->size = size;

  return 1;
 }

 void VP8LHashChainClear(VP8LHashChain* const p) {
  assert(p != NULL);
-  WebPSafeFree(p->offset_length_);
+  WebPSafeFree(p->offset_length);

-  p->size_ = 0;
-  p->offset_length_ = NULL;
+  p->size = 0;
+  p->offset_length = NULL;
 }

 // -----------------------------------------------------------------------------
@@ -265,14 +268,14 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality,
  int argb_comp;
  uint32_t base_position;
  int32_t* hash_to_first_index;
-  // Temporarily use the p->offset_length_ as a hash chain.
-  int32_t* chain = (int32_t*)p->offset_length_;
+  // Temporarily use the p->offset_length as a hash chain.
+  int32_t* chain = (int32_t*)p->offset_length;
  assert(size > 0);
-  assert(p->size_ != 0);
-  assert(p->offset_length_ != NULL);
+  assert(p->size != 0);
+  assert(p->offset_length != NULL);

  if (size <= 2) {
-    p->offset_length_[0] = p->offset_length_[size - 1] = 0;
+    p->offset_length[0] = p->offset_length[size - 1] = 0;
    return 1;
  }

@@ -351,7 +354,7 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality,
  // (hence a best length of 0) and the left-most pixel nothing to the left
  // (hence an offset of 0).
  assert(size > 2);
-  p->offset_length_[0] = p->offset_length_[size - 1] = 0;
+  p->offset_length[0] = p->offset_length[size - 1] = 0;
  for (base_position = size - 2; base_position > 0;) {
    const int max_len = MaxFindCopyLength(size - 1 - base_position);
    const uint32_t* const argb_start = argb + base_position;
@@ -411,7 +414,7 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality,
    while (1) {
      assert(best_length <= MAX_LENGTH);
      assert(best_distance <= WINDOW_SIZE);
-      p->offset_length_[base_position] =
+      p->offset_length[base_position] =
          (best_distance << MAX_LENGTH_BITS) | (uint32_t)best_length;
      --base_position;
      // Stop if we don't have a match or if we are out of bounds.
@@ -505,7 +508,7 @@ static int BackwardReferencesRle(int xsize, int ysize,
    }
  }
  if (use_color_cache) VP8LColorCacheClear(&hashers);
-  return !refs->error_;
+  return !refs->error;
 }

 static int BackwardReferencesLz77(int xsize, int ysize,
@@ -570,7 +573,7 @@ static int BackwardReferencesLz77(int xsize, int ysize,
    i += len;
  }

-  ok = !refs->error_;
+  ok = !refs->error;
 Error:
  if (cc_init) VP8LColorCacheClear(&hashers);
  return ok;
@@ -645,7 +648,7 @@ static int BackwardReferencesLz77Box(int xsize, int ysize,
    }
  }

-  hash_chain->offset_length_[0] = 0;
+  hash_chain->offset_length[0] = 0;
  for (i = 1; i < pix_count; ++i) {
    int ind;
    int best_length = VP8LHashChainFindLength(hash_chain_best, i);
@@ -712,17 +715,17 @@ static int BackwardReferencesLz77Box(int xsize, int ysize,
    assert(i + best_length <= pix_count);
    assert(best_length <= MAX_LENGTH);
    if (best_length <= MIN_LENGTH) {
-      hash_chain->offset_length_[i] = 0;
+      hash_chain->offset_length[i] = 0;
      best_offset_prev = 0;
      best_length_prev = 0;
    } else {
-      hash_chain->offset_length_[i] =
+      hash_chain->offset_length[i] =
          (best_offset << MAX_LENGTH_BITS) | (uint32_t)best_length;
      best_offset_prev = best_offset;
      best_length_prev = best_length;
    }
  }
-  hash_chain->offset_length_[0] = 0;
+  hash_chain->offset_length[0] = 0;
  WebPSafeFree(counts_ini);

  return BackwardReferencesLz77(xsize, ysize, argb, cache_bits, hash_chain,
@@ -793,20 +796,20 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality,
      // The keys of the caches can be derived from the longest one.
      int key = VP8LHashPix(pix, 32 - cache_bits_max);
      // Do not use the color cache for cache_bits = 0.
-      ++histos[0]->blue_[b];
-      ++histos[0]->literal_[g];
-      ++histos[0]->red_[r];
-      ++histos[0]->alpha_[a];
+      ++histos[0]->blue[b];
+      ++histos[0]->literal[g];
+      ++histos[0]->red[r];
+      ++histos[0]->alpha[a];
      // Deal with cache_bits > 0.
      for (i = cache_bits_max; i >= 1; --i, key >>= 1) {
        if (VP8LColorCacheLookup(&hashers[i], key) == pix) {
-          ++histos[i]->literal_[NUM_LITERAL_CODES + NUM_LENGTH_CODES + key];
+          ++histos[i]->literal[NUM_LITERAL_CODES + NUM_LENGTH_CODES + key];
        } else {
          VP8LColorCacheSet(&hashers[i], key, pix);
-          ++histos[i]->blue_[b];
-          ++histos[i]->literal_[g];
-          ++histos[i]->red_[r];
-          ++histos[i]->alpha_[a];
+          ++histos[i]->blue[b];
+          ++histos[i]->literal[g];
+          ++histos[i]->red[r];
+          ++histos[i]->alpha[a];
        }
      }
    } else {
@@ -815,12 +818,12 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality,
      // histograms but those are the same independently from the cache size.
      // As those constant contributions are in the end added to the other
      // histogram contributions, we can ignore them, except for the length
-      // prefix that is part of the literal_ histogram.
+      // prefix that is part of the 'literal' histogram.
      int len = PixOrCopyLength(v);
      uint32_t argb_prev = *argb ^ 0xffffffffu;
      VP8LPrefixEncode(len, &code, &extra_bits, &extra_bits_value);
      for (i = 0; i <= cache_bits_max; ++i) {
-        ++histos[i]->literal_[NUM_LITERAL_CODES + code];
+        ++histos[i]->literal[NUM_LITERAL_CODES + code];
      }
      // Update the color caches.
      do {
@@ -828,7 +831,7 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality,
          // Efficiency: insert only if the color changes.
          int key = VP8LHashPix(*argb, 32 - cache_bits_max);
          for (i = cache_bits_max; i >= 1; --i, key >>= 1) {
-            hashers[i].colors_[key] = *argb;
+            hashers[i].colors[key] = *argb;
          }
          argb_prev = *argb;
        }
--- a/thirdparty/libwebp/src/enc/backward_references_enc.h
+++ b/thirdparty/libwebp/src/enc/backward_references_enc.h
@@ -15,9 +15,10 @@

 #include <assert.h>
 #include <stdlib.h>
-#include "src/webp/types.h"
+
 #include "src/webp/encode.h"
 #include "src/webp/format_constants.h"
+#include "src/webp/types.h"

 #ifdef __cplusplus
 extern "C" {
@@ -126,10 +127,10 @@ struct VP8LHashChain {
  // (through WINDOW_SIZE = 1<<20).
  // The lower 12 bits contain the length of the match. The 12 bit limit is
  // defined in MaxFindCopyLength with MAX_LENGTH=4096.
-  uint32_t* offset_length_;
+  uint32_t* offset_length;
  // This is the maximum size of the hash_chain that can be constructed.
  // Typically this is the pixel count (width x height) for a given image.
-  int size_;
+  int size;
 };

 // Must be called first, to set size.
@@ -143,12 +144,12 @@ void VP8LHashChainClear(VP8LHashChain* const p);  // release memory

 static WEBP_INLINE int VP8LHashChainFindOffset(const VP8LHashChain* const p,
                                               const int base_position) {
-  return p->offset_length_[base_position] >> MAX_LENGTH_BITS;
+  return p->offset_length[base_position] >> MAX_LENGTH_BITS;
 }

 static WEBP_INLINE int VP8LHashChainFindLength(const VP8LHashChain* const p,
                                               const int base_position) {
-  return p->offset_length_[base_position] & ((1U << MAX_LENGTH_BITS) - 1);
+  return p->offset_length[base_position] & ((1U << MAX_LENGTH_BITS) - 1);
 }

 static WEBP_INLINE void VP8LHashChainFindCopy(const VP8LHashChain* const p,
@@ -170,12 +171,12 @@ typedef struct VP8LBackwardRefs VP8LBackwardRefs;

 // Container for blocks chain
 struct VP8LBackwardRefs {
-  int block_size_;               // common block-size
-  int error_;                    // set to true if some memory error occurred
-  PixOrCopyBlock* refs_;         // list of currently used blocks
-  PixOrCopyBlock** tail_;        // for list recycling
-  PixOrCopyBlock* free_blocks_;  // free-list
-  PixOrCopyBlock* last_block_;   // used for adding new refs (internal)
+  int block_size;               // common block-size
+  int error;                    // set to true if some memory error occurred
+  PixOrCopyBlock* refs;         // list of currently used blocks
+  PixOrCopyBlock** tail;        // for list recycling
+  PixOrCopyBlock* free_blocks;  // free-list
+  PixOrCopyBlock* last_block;   // used for adding new refs (internal)
 };

 // Initialize the object. 'block_size' is the common block size to store
@@ -189,8 +190,8 @@ typedef struct {
  // public:
  PixOrCopy* cur_pos;           // current position
  // private:
-  PixOrCopyBlock* cur_block_;   // current block in the refs list
-  const PixOrCopy* last_pos_;   // sentinel for switching to next block
+  PixOrCopyBlock* cur_block;    // current block in the refs list
+  const PixOrCopy* last_pos;    // sentinel for switching to next block
 } VP8LRefsCursor;

 // Returns a cursor positioned at the beginning of the references list.
@@ -205,7 +206,7 @@ void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c);
 static WEBP_INLINE void VP8LRefsCursorNext(VP8LRefsCursor* const c) {
  assert(c != NULL);
  assert(VP8LRefsCursorOk(c));
-  if (++c->cur_pos == c->last_pos_) VP8LRefsCursorNextBlock(c);
+  if (++c->cur_pos == c->last_pos) VP8LRefsCursorNextBlock(c);
 }

 // -----------------------------------------------------------------------------
--- a/thirdparty/libwebp/src/enc/config_enc.c
+++ b/thirdparty/libwebp/src/enc/config_enc.c
@@ -15,7 +15,10 @@
 #include "src/webp/config.h"
 #endif

+#include <stddef.h>
+
 #include "src/webp/encode.h"
+#include "src/webp/types.h"

 //------------------------------------------------------------------------------
 // WebPConfig
@@ -135,8 +138,8 @@ int WebPValidateConfig(const WebPConfig* config) {

 // Mapping between -z level and -m / -q parameter settings.
 static const struct {
-  uint8_t method_;
-  uint8_t quality_;
+  uint8_t method;
+  uint8_t quality;
 } kLosslessPresets[MAX_LEVEL + 1] = {
  { 0,  0 }, { 1, 20 }, { 2, 25 }, { 3, 30 }, { 3, 50 },
  { 4, 50 }, { 4, 75 }, { 4, 90 }, { 5, 90 }, { 6, 100 }
@@ -145,8 +148,8 @@ static const struct {
 int WebPConfigLosslessPreset(WebPConfig* config, int level) {
  if (config == NULL || level < 0 || level > MAX_LEVEL) return 0;
  config->lossless = 1;
-  config->method = kLosslessPresets[level].method_;
-  config->quality = kLosslessPresets[level].quality_;
+  config->method = kLosslessPresets[level].method;
+  config->quality = kLosslessPresets[level].quality;
  return 1;
 }

--- a/thirdparty/libwebp/src/enc/cost_enc.c
+++ b/thirdparty/libwebp/src/enc/cost_enc.c
@@ -11,7 +11,13 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

+#include <stdlib.h>
+
+#include "src/dec/common_dec.h"
+#include "src/webp/types.h"
+#include "src/dsp/dsp.h"
 #include "src/enc/cost_enc.h"
+#include "src/enc/vp8i_enc.h"

 //------------------------------------------------------------------------------
 // Level cost tables
@@ -60,14 +66,14 @@ static int VariableLevelCost(int level, const uint8_t probas[NUM_PROBAS]) {
 void VP8CalculateLevelCosts(VP8EncProba* const proba) {
  int ctype, band, ctx;

-  if (!proba->dirty_) return;  // nothing to do.
+  if (!proba->dirty) return;  // nothing to do.

  for (ctype = 0; ctype < NUM_TYPES; ++ctype) {
    int n;
    for (band = 0; band < NUM_BANDS; ++band) {
      for (ctx = 0; ctx < NUM_CTX; ++ctx) {
-        const uint8_t* const p = proba->coeffs_[ctype][band][ctx];
-        uint16_t* const table = proba->level_cost_[ctype][band][ctx];
+        const uint8_t* const p = proba->coeffs[ctype][band][ctx];
+        uint16_t* const table = proba->level_cost[ctype][band][ctx];
        const int cost0 = (ctx > 0) ? VP8BitCost(1, p[0]) : 0;
        const int cost_base = VP8BitCost(1, p[1]) + cost0;
        int v;
@@ -81,12 +87,12 @@ void VP8CalculateLevelCosts(VP8EncProba* const proba) {
    }
    for (n = 0; n < 16; ++n) {    // replicate bands. We don't need to sentinel.
      for (ctx = 0; ctx < NUM_CTX; ++ctx) {
-        proba->remapped_costs_[ctype][n][ctx] =
-            proba->level_cost_[ctype][VP8EncBands[n]][ctx];
+        proba->remapped_costs[ctype][n][ctx] =
+            proba->level_cost[ctype][VP8EncBands[n]][ctx];
      }
    }
  }
-  proba->dirty_ = 0;
+  proba->dirty = 0;
 }

 //------------------------------------------------------------------------------
@@ -206,9 +212,9 @@ const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES] = {
 void VP8InitResidual(int first, int coeff_type,
                     VP8Encoder* const enc, VP8Residual* const res) {
  res->coeff_type = coeff_type;
-  res->prob  = enc->proba_.coeffs_[coeff_type];
-  res->stats = enc->proba_.stats_[coeff_type];
-  res->costs = enc->proba_.remapped_costs_[coeff_type];
+  res->prob  = enc->proba.coeffs[coeff_type];
+  res->stats = enc->proba.stats[coeff_type];
+  res->costs = enc->proba.remapped_costs[coeff_type];
  res->first = first;
 }

@@ -216,14 +222,14 @@ void VP8InitResidual(int first, int coeff_type,
 // Mode costs

 int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]) {
-  const int x = (it->i4_ & 3), y = (it->i4_ >> 2);
+  const int x = (it->i4 & 3), y = (it->i4 >> 2);
  VP8Residual res;
-  VP8Encoder* const enc = it->enc_;
+  VP8Encoder* const enc = it->enc;
  int R = 0;
  int ctx;

  VP8InitResidual(0, 3, enc, &res);
-  ctx = it->top_nz_[x] + it->left_nz_[y];
+  ctx = it->top_nz[x] + it->left_nz[y];
  VP8SetResidualCoeffs(levels, &res);
  R += VP8GetResidualCost(ctx, &res);
  return R;
@@ -231,7 +237,7 @@ int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]) {

 int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd) {
  VP8Residual res;
-  VP8Encoder* const enc = it->enc_;
+  VP8Encoder* const enc = it->enc;
  int x, y;
  int R = 0;

@@ -240,16 +246,16 @@ int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd) {
  // DC
  VP8InitResidual(0, 1, enc, &res);
  VP8SetResidualCoeffs(rd->y_dc_levels, &res);
-  R += VP8GetResidualCost(it->top_nz_[8] + it->left_nz_[8], &res);
+  R += VP8GetResidualCost(it->top_nz[8] + it->left_nz[8], &res);

  // AC
  VP8InitResidual(1, 0, enc, &res);
  for (y = 0; y < 4; ++y) {
    for (x = 0; x < 4; ++x) {
-      const int ctx = it->top_nz_[x] + it->left_nz_[y];
+      const int ctx = it->top_nz[x] + it->left_nz[y];
      VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
      R += VP8GetResidualCost(ctx, &res);
-      it->top_nz_[x] = it->left_nz_[y] = (res.last >= 0);
+      it->top_nz[x] = it->left_nz[y] = (res.last >= 0);
    }
  }
  return R;
@@ -257,7 +263,7 @@ int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd) {

 int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd) {
  VP8Residual res;
-  VP8Encoder* const enc = it->enc_;
+  VP8Encoder* const enc = it->enc;
  int ch, x, y;
  int R = 0;

@@ -267,10 +273,10 @@ int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd) {
  for (ch = 0; ch <= 2; ch += 2) {
    for (y = 0; y < 2; ++y) {
      for (x = 0; x < 2; ++x) {
-        const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+        const int ctx = it->top_nz[4 + ch + x] + it->left_nz[4 + ch + y];
        VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
        R += VP8GetResidualCost(ctx, &res);
-        it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = (res.last >= 0);
+        it->top_nz[4 + ch + x] = it->left_nz[4 + ch + y] = (res.last >= 0);
      }
    }
  }
--- a/thirdparty/libwebp/src/enc/cost_enc.h
+++ b/thirdparty/libwebp/src/enc/cost_enc.h
@@ -16,7 +16,11 @@

 #include <assert.h>
 #include <stdlib.h>
+
+#include "src/dec/common_dec.h"
+#include "src/dsp/dsp.h"
 #include "src/enc/vp8i_enc.h"
+#include "src/webp/types.h"

 #ifdef __cplusplus
 extern "C" {
--- a/thirdparty/libwebp/src/enc/filter_enc.c
+++ b/thirdparty/libwebp/src/enc/filter_enc.c
@@ -12,8 +12,13 @@
 // Author: somnath@google.com (Somnath Banerjee)

 #include <assert.h>
-#include "src/enc/vp8i_enc.h"
+#include <stddef.h>
+#include <string.h>
+
+#include "src/dec/common_dec.h"
+#include "src/webp/types.h"
 #include "src/dsp/dsp.h"
+#include "src/enc/vp8i_enc.h"

 // This table gives, for a given sharpness, the filtering strength to be
 // used (at least) in order to filter a given edge step delta.
@@ -83,18 +88,18 @@ static int GetILevel(int sharpness, int level) {
 }

 static void DoFilter(const VP8EncIterator* const it, int level) {
-  const VP8Encoder* const enc = it->enc_;
-  const int ilevel = GetILevel(enc->config_->filter_sharpness, level);
+  const VP8Encoder* const enc = it->enc;
+  const int ilevel = GetILevel(enc->config->filter_sharpness, level);
  const int limit = 2 * level + ilevel;

-  uint8_t* const y_dst = it->yuv_out2_ + Y_OFF_ENC;
-  uint8_t* const u_dst = it->yuv_out2_ + U_OFF_ENC;
-  uint8_t* const v_dst = it->yuv_out2_ + V_OFF_ENC;
+  uint8_t* const y_dst = it->yuv_out2 + Y_OFF_ENC;
+  uint8_t* const u_dst = it->yuv_out2 + U_OFF_ENC;
+  uint8_t* const v_dst = it->yuv_out2 + V_OFF_ENC;

-  // copy current block to yuv_out2_
-  memcpy(y_dst, it->yuv_out_, YUV_SIZE_ENC * sizeof(uint8_t));
+  // copy current block to yuv_out2
+  memcpy(y_dst, it->yuv_out, YUV_SIZE_ENC * sizeof(uint8_t));

-  if (enc->filter_hdr_.simple_ == 1) {   // simple
+  if (enc->filter_hdr.simple == 1) {   // simple
    VP8SimpleHFilter16i(y_dst, BPS, limit);
    VP8SimpleVFilter16i(y_dst, BPS, limit);
  } else {    // complex
@@ -139,11 +144,11 @@ static double GetMBSSIM(const uint8_t* yuv1, const uint8_t* yuv2) {

 void VP8InitFilter(VP8EncIterator* const it) {
 #if !defined(WEBP_REDUCE_SIZE)
-  if (it->lf_stats_ != NULL) {
+  if (it->lf_stats != NULL) {
    int s, i;
    for (s = 0; s < NUM_MB_SEGMENTS; s++) {
      for (i = 0; i < MAX_LF_LEVELS; i++) {
-        (*it->lf_stats_)[s][i] = 0;
+        (*it->lf_stats)[s][i] = 0;
      }
    }
    VP8SSIMDspInit();
@@ -156,16 +161,16 @@ void VP8InitFilter(VP8EncIterator* const it) {
 void VP8StoreFilterStats(VP8EncIterator* const it) {
 #if !defined(WEBP_REDUCE_SIZE)
  int d;
-  VP8Encoder* const enc = it->enc_;
-  const int s = it->mb_->segment_;
-  const int level0 = enc->dqm_[s].fstrength_;
+  VP8Encoder* const enc = it->enc;
+  const int s = it->mb->segment;
+  const int level0 = enc->dqm[s].fstrength;

  // explore +/-quant range of values around level0
-  const int delta_min = -enc->dqm_[s].quant_;
-  const int delta_max = enc->dqm_[s].quant_;
+  const int delta_min = -enc->dqm[s].quant;
+  const int delta_max = enc->dqm[s].quant;
  const int step_size = (delta_max - delta_min >= 4) ? 4 : 1;

-  if (it->lf_stats_ == NULL) return;
+  if (it->lf_stats == NULL) return;

  // NOTE: Currently we are applying filter only across the sublock edges
  // There are two reasons for that.
@@ -173,10 +178,10 @@ void VP8StoreFilterStats(VP8EncIterator* const it) {
  // the left and top macro blocks. That will be hard to restore
  // 2. Macro Blocks on the bottom and right are not yet compressed. So we
  // cannot apply filter on the right and bottom macro block edges.
-  if (it->mb_->type_ == 1 && it->mb_->skip_) return;
+  if (it->mb->type == 1 && it->mb->skip) return;

  // Always try filter level  zero
-  (*it->lf_stats_)[s][0] += GetMBSSIM(it->yuv_in_, it->yuv_out_);
+  (*it->lf_stats)[s][0] += GetMBSSIM(it->yuv_in, it->yuv_out);

  for (d = delta_min; d <= delta_max; d += step_size) {
    const int level = level0 + d;
@@ -184,7 +189,7 @@ void VP8StoreFilterStats(VP8EncIterator* const it) {
      continue;
    }
    DoFilter(it, level);
-    (*it->lf_stats_)[s][level] += GetMBSSIM(it->yuv_in_, it->yuv_out2_);
+    (*it->lf_stats)[s][level] += GetMBSSIM(it->yuv_in, it->yuv_out2);
  }
 #else  // defined(WEBP_REDUCE_SIZE)
  (void)it;
@@ -192,43 +197,43 @@ void VP8StoreFilterStats(VP8EncIterator* const it) {
 }

 void VP8AdjustFilterStrength(VP8EncIterator* const it) {
-  VP8Encoder* const enc = it->enc_;
+  VP8Encoder* const enc = it->enc;
 #if !defined(WEBP_REDUCE_SIZE)
-  if (it->lf_stats_ != NULL) {
+  if (it->lf_stats != NULL) {
    int s;
    for (s = 0; s < NUM_MB_SEGMENTS; s++) {
      int i, best_level = 0;
      // Improvement over filter level 0 should be at least 1e-5 (relatively)
-      double best_v = 1.00001 * (*it->lf_stats_)[s][0];
+      double best_v = 1.00001 * (*it->lf_stats)[s][0];
      for (i = 1; i < MAX_LF_LEVELS; i++) {
-        const double v = (*it->lf_stats_)[s][i];
+        const double v = (*it->lf_stats)[s][i];
        if (v > best_v) {
          best_v = v;
          best_level = i;
        }
      }
-      enc->dqm_[s].fstrength_ = best_level;
+      enc->dqm[s].fstrength = best_level;
    }
    return;
  }
 #endif  // !defined(WEBP_REDUCE_SIZE)
-  if (enc->config_->filter_strength > 0) {
+  if (enc->config->filter_strength > 0) {
    int max_level = 0;
    int s;
    for (s = 0; s < NUM_MB_SEGMENTS; s++) {
-      VP8SegmentInfo* const dqm = &enc->dqm_[s];
+      VP8SegmentInfo* const dqm = &enc->dqm[s];
      // this '>> 3' accounts for some inverse WHT scaling
-      const int delta = (dqm->max_edge_ * dqm->y2_.q_[1]) >> 3;
+      const int delta = (dqm->max_edge * dqm->y2.q[1]) >> 3;
      const int level =
-          VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, delta);
-      if (level > dqm->fstrength_) {
-        dqm->fstrength_ = level;
+          VP8FilterStrengthFromDelta(enc->filter_hdr.sharpness, delta);
+      if (level > dqm->fstrength) {
+        dqm->fstrength = level;
      }
-      if (max_level < dqm->fstrength_) {
-        max_level = dqm->fstrength_;
+      if (max_level < dqm->fstrength) {
+        max_level = dqm->fstrength;
      }
    }
-    enc->filter_hdr_.level_ = max_level;
+    enc->filter_hdr.level = max_level;
  }
 }

--- a/thirdparty/libwebp/src/enc/frame_enc.c
+++ b/thirdparty/libwebp/src/enc/frame_enc.c
@@ -11,12 +11,17 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)

-#include <string.h>
+#include <assert.h>
 #include <math.h>
+#include <string.h>

+#include "src/dec/common_dec.h"
+#include "src/webp/types.h"
+#include "src/dsp/dsp.h"
 #include "src/enc/cost_enc.h"
 #include "src/enc/vp8i_enc.h"
-#include "src/dsp/dsp.h"
+#include "src/utils/bit_writer_utils.h"
+#include "src/webp/encode.h"
 #include "src/webp/format_constants.h"  // RIFF constants

 #define SEGMENT_VISU 0
@@ -46,15 +51,15 @@ typedef struct {  // struct for organizing convergence in either size or PSNR
 } PassStats;

 static int InitPassStats(const VP8Encoder* const enc, PassStats* const s) {
-  const uint64_t target_size = (uint64_t)enc->config_->target_size;
+  const uint64_t target_size = (uint64_t)enc->config->target_size;
  const int do_size_search = (target_size != 0);
-  const float target_PSNR = enc->config_->target_PSNR;
+  const float target_PSNR = enc->config->target_PSNR;

  s->is_first = 1;
  s->dq = 10.f;
-  s->qmin = 1.f * enc->config_->qmin;
-  s->qmax = 1.f * enc->config_->qmax;
-  s->q = s->last_q = Clamp(enc->config_->quality, s->qmin, s->qmax);
+  s->qmin = 1.f * enc->config->qmin;
+  s->qmax = 1.f * enc->config->qmax;
+  s->q = s->last_q = Clamp(enc->config->quality, s->qmin, s->qmax);
  s->target = do_size_search ? (double)target_size
            : (target_PSNR > 0.) ? target_PSNR
            : 40.;   // default, just in case
@@ -95,9 +100,9 @@ const uint8_t VP8Cat6[] =
 // Reset the statistics about: number of skips, token proba, level cost,...

 static void ResetStats(VP8Encoder* const enc) {
-  VP8EncProba* const proba = &enc->proba_;
+  VP8EncProba* const proba = &enc->proba;
  VP8CalculateLevelCosts(proba);
-  proba->nb_skip_ = 0;
+  proba->nb_skip = 0;
 }

 //------------------------------------------------------------------------------
@@ -111,17 +116,17 @@ static int CalcSkipProba(uint64_t nb, uint64_t total) {

 // Returns the bit-cost for coding the skip probability.
 static int FinalizeSkipProba(VP8Encoder* const enc) {
-  VP8EncProba* const proba = &enc->proba_;
-  const int nb_mbs = enc->mb_w_ * enc->mb_h_;
-  const int nb_events = proba->nb_skip_;
+  VP8EncProba* const proba = &enc->proba;
+  const int nb_mbs = enc->mb_w * enc->mb_h;
+  const int nb_events = proba->nb_skip;
  int size;
-  proba->skip_proba_ = CalcSkipProba(nb_events, nb_mbs);
-  proba->use_skip_proba_ = (proba->skip_proba_ < SKIP_PROBA_THRESHOLD);
+  proba->skip_proba = CalcSkipProba(nb_events, nb_mbs);
+  proba->use_skip_proba = (proba->skip_proba < SKIP_PROBA_THRESHOLD);
  size = 256;   // 'use_skip_proba' bit
-  if (proba->use_skip_proba_) {
-    size +=  nb_events * VP8BitCost(1, proba->skip_proba_)
-         + (nb_mbs - nb_events) * VP8BitCost(0, proba->skip_proba_);
-    size += 8 * 256;   // cost of signaling the skip_proba_ itself.
+  if (proba->use_skip_proba) {
+    size +=  nb_events * VP8BitCost(1, proba->skip_proba)
+         + (nb_mbs - nb_events) * VP8BitCost(0, proba->skip_proba);
+    size += 8 * 256;   // cost of signaling the 'skip_proba' itself.
  }
  return size;
 }
@@ -139,8 +144,8 @@ static int BranchCost(int nb, int total, int proba) {
 }

 static void ResetTokenStats(VP8Encoder* const enc) {
-  VP8EncProba* const proba = &enc->proba_;
-  memset(proba->stats_, 0, sizeof(proba->stats_));
+  VP8EncProba* const proba = &enc->proba;
+  memset(proba->stats, 0, sizeof(proba->stats));
 }

 static int FinalizeTokenProbas(VP8EncProba* const proba) {
@@ -151,7 +156,7 @@ static int FinalizeTokenProbas(VP8EncProba* const proba) {
    for (b = 0; b < NUM_BANDS; ++b) {
      for (c = 0; c < NUM_CTX; ++c) {
        for (p = 0; p < NUM_PROBAS; ++p) {
-          const proba_t stats = proba->stats_[t][b][c][p];
+          const proba_t stats = proba->stats[t][b][c][p];
          const int nb = (stats >> 0) & 0xffff;
          const int total = (stats >> 16) & 0xffff;
          const int update_proba = VP8CoeffsUpdateProba[t][b][c][p];
@@ -165,17 +170,17 @@ static int FinalizeTokenProbas(VP8EncProba* const proba) {
          const int use_new_p = (old_cost > new_cost);
          size += VP8BitCost(use_new_p, update_proba);
          if (use_new_p) {  // only use proba that seem meaningful enough.
-            proba->coeffs_[t][b][c][p] = new_p;
+            proba->coeffs[t][b][c][p] = new_p;
            has_changed |= (new_p != old_p);
            size += 8 * 256;
          } else {
-            proba->coeffs_[t][b][c][p] = old_p;
+            proba->coeffs[t][b][c][p] = old_p;
          }
        }
      }
    }
  }
-  proba->dirty_ = has_changed;
+  proba->dirty = has_changed;
  return size;
 }

@@ -190,8 +195,8 @@ static int GetProba(int a, int b) {

 static void ResetSegments(VP8Encoder* const enc) {
  int n;
-  for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
-    enc->mb_info_[n].segment_ = 0;
+  for (n = 0; n < enc->mb_w * enc->mb_h; ++n) {
+    enc->mb_info[n].segment = 0;
  }
 }

@@ -199,34 +204,34 @@ static void SetSegmentProbas(VP8Encoder* const enc) {
  int p[NUM_MB_SEGMENTS] = { 0 };
  int n;

-  for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
-    const VP8MBInfo* const mb = &enc->mb_info_[n];
-    ++p[mb->segment_];
+  for (n = 0; n < enc->mb_w * enc->mb_h; ++n) {
+    const VP8MBInfo* const mb = &enc->mb_info[n];
+    ++p[mb->segment];
  }
 #if !defined(WEBP_DISABLE_STATS)
-  if (enc->pic_->stats != NULL) {
+  if (enc->pic->stats != NULL) {
    for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
-      enc->pic_->stats->segment_size[n] = p[n];
+      enc->pic->stats->segment_size[n] = p[n];
    }
  }
 #endif
-  if (enc->segment_hdr_.num_segments_ > 1) {
-    uint8_t* const probas = enc->proba_.segments_;
+  if (enc->segment_hdr.num_segments > 1) {
+    uint8_t* const probas = enc->proba.segments;
    probas[0] = GetProba(p[0] + p[1], p[2] + p[3]);
    probas[1] = GetProba(p[0], p[1]);
    probas[2] = GetProba(p[2], p[3]);

-    enc->segment_hdr_.update_map_ =
+    enc->segment_hdr.update_map =
        (probas[0] != 255) || (probas[1] != 255) || (probas[2] != 255);
-    if (!enc->segment_hdr_.update_map_) ResetSegments(enc);
-    enc->segment_hdr_.size_ =
+    if (!enc->segment_hdr.update_map) ResetSegments(enc);
+    enc->segment_hdr.size =
        p[0] * (VP8BitCost(0, probas[0]) + VP8BitCost(0, probas[1])) +
        p[1] * (VP8BitCost(0, probas[0]) + VP8BitCost(1, probas[1])) +
        p[2] * (VP8BitCost(1, probas[0]) + VP8BitCost(0, probas[2])) +
        p[3] * (VP8BitCost(1, probas[0]) + VP8BitCost(1, probas[2]));
  } else {
-    enc->segment_hdr_.update_map_ = 0;
-    enc->segment_hdr_.size_ = 0;
+    enc->segment_hdr.update_map = 0;
+    enc->segment_hdr.size = 0;
  }
 }

@@ -311,9 +316,9 @@ static void CodeResiduals(VP8BitWriter* const bw, VP8EncIterator* const it,
  int x, y, ch;
  VP8Residual res;
  uint64_t pos1, pos2, pos3;
-  const int i16 = (it->mb_->type_ == 1);
-  const int segment = it->mb_->segment_;
-  VP8Encoder* const enc = it->enc_;
+  const int i16 = (it->mb->type == 1);
+  const int segment = it->mb->segment;
+  VP8Encoder* const enc = it->enc;

  VP8IteratorNzToBytes(it);

@@ -321,8 +326,8 @@ static void CodeResiduals(VP8BitWriter* const bw, VP8EncIterator* const it,
  if (i16) {
    VP8InitResidual(0, 1, enc, &res);
    VP8SetResidualCoeffs(rd->y_dc_levels, &res);
-    it->top_nz_[8] = it->left_nz_[8] =
-      PutCoeffs(bw, it->top_nz_[8] + it->left_nz_[8], &res);
+    it->top_nz[8] = it->left_nz[8] =
+        PutCoeffs(bw, it->top_nz[8] + it->left_nz[8], &res);
    VP8InitResidual(1, 0, enc, &res);
  } else {
    VP8InitResidual(0, 3, enc, &res);
@@ -331,9 +336,9 @@ static void CodeResiduals(VP8BitWriter* const bw, VP8EncIterator* const it,
  // luma-AC
  for (y = 0; y < 4; ++y) {
    for (x = 0; x < 4; ++x) {
-      const int ctx = it->top_nz_[x] + it->left_nz_[y];
+      const int ctx = it->top_nz[x] + it->left_nz[y];
      VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
-      it->top_nz_[x] = it->left_nz_[y] = PutCoeffs(bw, ctx, &res);
+      it->top_nz[x] = it->left_nz[y] = PutCoeffs(bw, ctx, &res);
    }
  }
  pos2 = VP8BitWriterPos(bw);
@@ -343,18 +348,18 @@ static void CodeResiduals(VP8BitWriter* const bw, VP8EncIterator* const it,
  for (ch = 0; ch <= 2; ch += 2) {
    for (y = 0; y < 2; ++y) {
      for (x = 0; x < 2; ++x) {
-        const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+        const int ctx = it->top_nz[4 + ch + x] + it->left_nz[4 + ch + y];
        VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
-        it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
+        it->top_nz[4 + ch + x] = it->left_nz[4 + ch + y] =
            PutCoeffs(bw, ctx, &res);
      }
    }
  }
  pos3 = VP8BitWriterPos(bw);
-  it->luma_bits_ = pos2 - pos1;
-  it->uv_bits_ = pos3 - pos2;
-  it->bit_count_[segment][i16] += it->luma_bits_;
-  it->bit_count_[segment][2] += it->uv_bits_;
+  it->luma_bits = pos2 - pos1;
+  it->uv_bits = pos3 - pos2;
+  it->bit_count[segment][i16] += it->luma_bits;
+  it->bit_count[segment][2] += it->uv_bits;
  VP8IteratorBytesToNz(it);
 }

@@ -364,15 +369,15 @@ static void RecordResiduals(VP8EncIterator* const it,
                            const VP8ModeScore* const rd) {
  int x, y, ch;
  VP8Residual res;
-  VP8Encoder* const enc = it->enc_;
+  VP8Encoder* const enc = it->enc;

  VP8IteratorNzToBytes(it);

-  if (it->mb_->type_ == 1) {   // i16x16
+  if (it->mb->type == 1) {   // i16x16
    VP8InitResidual(0, 1, enc, &res);
    VP8SetResidualCoeffs(rd->y_dc_levels, &res);
-    it->top_nz_[8] = it->left_nz_[8] =
-      VP8RecordCoeffs(it->top_nz_[8] + it->left_nz_[8], &res);
+    it->top_nz[8] = it->left_nz[8] =
+        VP8RecordCoeffs(it->top_nz[8] + it->left_nz[8], &res);
    VP8InitResidual(1, 0, enc, &res);
  } else {
    VP8InitResidual(0, 3, enc, &res);
@@ -381,9 +386,9 @@ static void RecordResiduals(VP8EncIterator* const it,
  // luma-AC
  for (y = 0; y < 4; ++y) {
    for (x = 0; x < 4; ++x) {
-      const int ctx = it->top_nz_[x] + it->left_nz_[y];
+      const int ctx = it->top_nz[x] + it->left_nz[y];
      VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
-      it->top_nz_[x] = it->left_nz_[y] = VP8RecordCoeffs(ctx, &res);
+      it->top_nz[x] = it->left_nz[y] = VP8RecordCoeffs(ctx, &res);
    }
  }

@@ -392,9 +397,9 @@ static void RecordResiduals(VP8EncIterator* const it,
  for (ch = 0; ch <= 2; ch += 2) {
    for (y = 0; y < 2; ++y) {
      for (x = 0; x < 2; ++x) {
-        const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+        const int ctx = it->top_nz[4 + ch + x] + it->left_nz[4 + ch + y];
        VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
-        it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
+        it->top_nz[4 + ch + x] = it->left_nz[4 + ch + y] =
            VP8RecordCoeffs(ctx, &res);
      }
    }
@@ -412,14 +417,14 @@ static int RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd,
                        VP8TBuffer* const tokens) {
  int x, y, ch;
  VP8Residual res;
-  VP8Encoder* const enc = it->enc_;
+  VP8Encoder* const enc = it->enc;

  VP8IteratorNzToBytes(it);
-  if (it->mb_->type_ == 1) {   // i16x16
-    const int ctx = it->top_nz_[8] + it->left_nz_[8];
+  if (it->mb->type == 1) {   // i16x16
+    const int ctx = it->top_nz[8] + it->left_nz[8];
    VP8InitResidual(0, 1, enc, &res);
    VP8SetResidualCoeffs(rd->y_dc_levels, &res);
-    it->top_nz_[8] = it->left_nz_[8] =
+    it->top_nz[8] = it->left_nz[8] =
        VP8RecordCoeffTokens(ctx, &res, tokens);
    VP8InitResidual(1, 0, enc, &res);
  } else {
@@ -429,9 +434,9 @@ static int RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd,
  // luma-AC
  for (y = 0; y < 4; ++y) {
    for (x = 0; x < 4; ++x) {
-      const int ctx = it->top_nz_[x] + it->left_nz_[y];
+      const int ctx = it->top_nz[x] + it->left_nz[y];
      VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
-      it->top_nz_[x] = it->left_nz_[y] =
+      it->top_nz[x] = it->left_nz[y] =
          VP8RecordCoeffTokens(ctx, &res, tokens);
    }
  }
@@ -441,15 +446,15 @@ static int RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd,
  for (ch = 0; ch <= 2; ch += 2) {
    for (y = 0; y < 2; ++y) {
      for (x = 0; x < 2; ++x) {
-        const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+        const int ctx = it->top_nz[4 + ch + x] + it->left_nz[4 + ch + y];
        VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
-        it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
+        it->top_nz[4 + ch + x] = it->left_nz[4 + ch + y] =
            VP8RecordCoeffTokens(ctx, &res, tokens);
      }
    }
  }
  VP8IteratorBytesToNz(it);
-  return !tokens->error_;
+  return !tokens->error;
 }

 #endif    // !DISABLE_TOKEN_BUFFER
@@ -470,64 +475,64 @@ static void SetBlock(uint8_t* p, int value, int size) {
 #endif

 static void ResetSSE(VP8Encoder* const enc) {
-  enc->sse_[0] = 0;
-  enc->sse_[1] = 0;
-  enc->sse_[2] = 0;
-  // Note: enc->sse_[3] is managed by alpha.c
-  enc->sse_count_ = 0;
+  enc->sse[0] = 0;
+  enc->sse[1] = 0;
+  enc->sse[2] = 0;
+  // Note: enc->sse[3] is managed by alpha.c
+  enc->sse_count = 0;
 }

 static void StoreSSE(const VP8EncIterator* const it) {
-  VP8Encoder* const enc = it->enc_;
-  const uint8_t* const in = it->yuv_in_;
-  const uint8_t* const out = it->yuv_out_;
+  VP8Encoder* const enc = it->enc;
+  const uint8_t* const in = it->yuv_in;
+  const uint8_t* const out = it->yuv_out;
  // Note: not totally accurate at boundary. And doesn't include in-loop filter.
-  enc->sse_[0] += VP8SSE16x16(in + Y_OFF_ENC, out + Y_OFF_ENC);
-  enc->sse_[1] += VP8SSE8x8(in + U_OFF_ENC, out + U_OFF_ENC);
-  enc->sse_[2] += VP8SSE8x8(in + V_OFF_ENC, out + V_OFF_ENC);
-  enc->sse_count_ += 16 * 16;
+  enc->sse[0] += VP8SSE16x16(in + Y_OFF_ENC, out + Y_OFF_ENC);
+  enc->sse[1] += VP8SSE8x8(in + U_OFF_ENC, out + U_OFF_ENC);
+  enc->sse[2] += VP8SSE8x8(in + V_OFF_ENC, out + V_OFF_ENC);
+  enc->sse_count += 16 * 16;
 }

 static void StoreSideInfo(const VP8EncIterator* const it) {
-  VP8Encoder* const enc = it->enc_;
-  const VP8MBInfo* const mb = it->mb_;
-  WebPPicture* const pic = enc->pic_;
+  VP8Encoder* const enc = it->enc;
+  const VP8MBInfo* const mb = it->mb;
+  WebPPicture* const pic = enc->pic;

  if (pic->stats != NULL) {
    StoreSSE(it);
-    enc->block_count_[0] += (mb->type_ == 0);
-    enc->block_count_[1] += (mb->type_ == 1);
-    enc->block_count_[2] += (mb->skip_ != 0);
+    enc->block_count[0] += (mb->type == 0);
+    enc->block_count[1] += (mb->type == 1);
+    enc->block_count[2] += (mb->skip != 0);
  }

  if (pic->extra_info != NULL) {
-    uint8_t* const info = &pic->extra_info[it->x_ + it->y_ * enc->mb_w_];
+    uint8_t* const info = &pic->extra_info[it->x + it->y * enc->mb_w];
    switch (pic->extra_info_type) {
-      case 1: *info = mb->type_; break;
-      case 2: *info = mb->segment_; break;
-      case 3: *info = enc->dqm_[mb->segment_].quant_; break;
-      case 4: *info = (mb->type_ == 1) ? it->preds_[0] : 0xff; break;
-      case 5: *info = mb->uv_mode_; break;
+      case 1: *info = mb->type; break;
+      case 2: *info = mb->segment; break;
+      case 3: *info = enc->dqm[mb->segment].quant; break;
+      case 4: *info = (mb->type == 1) ? it->preds[0] : 0xff; break;
+      case 5: *info = mb->uv_mode; break;
      case 6: {
-        const int b = (int)((it->luma_bits_ + it->uv_bits_ + 7) >> 3);
+        const int b = (int)((it->luma_bits + it->uv_bits + 7) >> 3);
        *info = (b > 255) ? 255 : b; break;
      }
-      case 7: *info = mb->alpha_; break;
+      case 7: *info = mb->alpha; break;
      default: *info = 0; break;
    }
  }
 #if SEGMENT_VISU  // visualize segments and prediction modes
-  SetBlock(it->yuv_out_ + Y_OFF_ENC, mb->segment_ * 64, 16);
-  SetBlock(it->yuv_out_ + U_OFF_ENC, it->preds_[0] * 64, 8);
-  SetBlock(it->yuv_out_ + V_OFF_ENC, mb->uv_mode_ * 64, 8);
+  SetBlock(it->yuv_out + Y_OFF_ENC, mb->segment * 64, 16);
+  SetBlock(it->yuv_out + U_OFF_ENC, it->preds[0] * 64, 8);
+  SetBlock(it->yuv_out + V_OFF_ENC, mb->uv_mode * 64, 8);
 #endif
 }

 static void ResetSideInfo(const VP8EncIterator* const it) {
-  VP8Encoder* const enc = it->enc_;
-  WebPPicture* const pic = enc->pic_;
+  VP8Encoder* const enc = it->enc;
+  WebPPicture* const pic = enc->pic;
  if (pic->stats != NULL) {
-    memset(enc->block_count_, 0, sizeof(enc->block_count_));
+    memset(enc->block_count, 0, sizeof(enc->block_count));
  }
  ResetSSE(enc);
 }
@@ -536,12 +541,12 @@ static void ResetSSE(VP8Encoder* const enc) {
  (void)enc;
 }
 static void StoreSideInfo(const VP8EncIterator* const it) {
-  VP8Encoder* const enc = it->enc_;
-  WebPPicture* const pic = enc->pic_;
+  VP8Encoder* const enc = it->enc;
+  WebPPicture* const pic = enc->pic;
  if (pic->extra_info != NULL) {
-    if (it->x_ == 0 && it->y_ == 0) {   // only do it once, at start
+    if (it->x == 0 && it->y == 0) {   // only do it once, at start
      memset(pic->extra_info, 0,
-             enc->mb_w_ * enc->mb_h_ * sizeof(*pic->extra_info));
+             enc->mb_w * enc->mb_h * sizeof(*pic->extra_info));
    }
  }
 }
@@ -587,7 +592,7 @@ static uint64_t OneStatPass(VP8Encoder* const enc, VP8RDLevel rd_opt,
    VP8IteratorImport(&it, NULL);
    if (VP8Decimate(&it, &info, rd_opt)) {
      // Just record the number of skips and act like skip_proba is not used.
-      ++enc->proba_.nb_skip_;
+      ++enc->proba.nb_skip;
    }
    RecordResiduals(&it, &info);
    size += info.R + info.H;
@@ -599,10 +604,10 @@ static uint64_t OneStatPass(VP8Encoder* const enc, VP8RDLevel rd_opt,
    VP8IteratorSaveBoundary(&it);
  } while (VP8IteratorNext(&it) && --nb_mbs > 0);

-  size_p0 += enc->segment_hdr_.size_;
+  size_p0 += enc->segment_hdr.size;
  if (s->do_size_search) {
    size += FinalizeSkipProba(enc);
-    size += FinalizeTokenProbas(&enc->proba_);
+    size += FinalizeTokenProbas(&enc->proba);
    size = ((size + size_p0 + 1024) >> 11) + HEADER_SIZE_ESTIMATE;
    s->value = (double)size;
  } else {
@@ -612,17 +617,17 @@ static uint64_t OneStatPass(VP8Encoder* const enc, VP8RDLevel rd_opt,
 }

 static int StatLoop(VP8Encoder* const enc) {
-  const int method = enc->method_;
-  const int do_search = enc->do_search_;
+  const int method = enc->method;
+  const int do_search = enc->do_search;
  const int fast_probe = ((method == 0 || method == 3) && !do_search);
-  int num_pass_left = enc->config_->pass;
+  int num_pass_left = enc->config->pass;
  const int task_percent = 20;
  const int percent_per_pass =
      (task_percent + num_pass_left / 2) / num_pass_left;
-  const int final_percent = enc->percent_ + task_percent;
+  const int final_percent = enc->percent + task_percent;
  const VP8RDLevel rd_opt =
      (method >= 3 || do_search) ? RD_OPT_BASIC : RD_OPT_NONE;
-  int nb_mbs = enc->mb_w_ * enc->mb_h_;
+  int nb_mbs = enc->mb_w * enc->mb_h;
  PassStats stats;

  InitPassStats(enc, &stats);
@@ -640,7 +645,7 @@ static int StatLoop(VP8Encoder* const enc) {
  while (num_pass_left-- > 0) {
    const int is_last_pass = (fabs(stats.dq) <= DQ_LIMIT) ||
                             (num_pass_left == 0) ||
-                             (enc->max_i4_header_bits_ == 0);
+                             (enc->max_i4_header_bits == 0);
    const uint64_t size_p0 =
        OneStatPass(enc, rd_opt, nb_mbs, percent_per_pass, &stats);
    if (size_p0 == 0) return 0;
@@ -648,9 +653,9 @@ static int StatLoop(VP8Encoder* const enc) {
    printf("#%d value:%.1lf -> %.1lf   q:%.2f -> %.2f\n",
           num_pass_left, stats.last_value, stats.value, stats.last_q, stats.q);
 #endif
-    if (enc->max_i4_header_bits_ > 0 && size_p0 > PARTITION0_SIZE_LIMIT) {
+    if (enc->max_i4_header_bits > 0 && size_p0 > PARTITION0_SIZE_LIMIT) {
      ++num_pass_left;
-      enc->max_i4_header_bits_ >>= 1;  // strengthen header bit limitation...
+      enc->max_i4_header_bits >>= 1;   // strengthen header bit limitation...
      continue;                        // ...and start over
    }
    if (is_last_pass) {
@@ -665,10 +670,10 @@ static int StatLoop(VP8Encoder* const enc) {
  if (!do_search || !stats.do_size_search) {
    // Need to finalize probas now, since it wasn't done during the search.
    FinalizeSkipProba(enc);
-    FinalizeTokenProbas(&enc->proba_);
+    FinalizeTokenProbas(&enc->proba);
  }
-  VP8CalculateLevelCosts(&enc->proba_);  // finalize costs
-  return WebPReportProgress(enc->pic_, final_percent, &enc->percent_);
+  VP8CalculateLevelCosts(&enc->proba);  // finalize costs
+  return WebPReportProgress(enc->pic, final_percent, &enc->percent);
 }

 //------------------------------------------------------------------------------
@@ -680,37 +685,37 @@ static const uint8_t kAverageBytesPerMB[8] = { 50, 24, 16, 9, 7, 5, 3, 2 };
 static int PreLoopInitialize(VP8Encoder* const enc) {
  int p;
  int ok = 1;
-  const int average_bytes_per_MB = kAverageBytesPerMB[enc->base_quant_ >> 4];
+  const int average_bytes_per_MB = kAverageBytesPerMB[enc->base_quant >> 4];
  const int bytes_per_parts =
-      enc->mb_w_ * enc->mb_h_ * average_bytes_per_MB / enc->num_parts_;
+      enc->mb_w * enc->mb_h * average_bytes_per_MB / enc->num_parts;
  // Initialize the bit-writers
-  for (p = 0; ok && p < enc->num_parts_; ++p) {
-    ok = VP8BitWriterInit(enc->parts_ + p, bytes_per_parts);
+  for (p = 0; ok && p < enc->num_parts; ++p) {
+    ok = VP8BitWriterInit(enc->parts + p, bytes_per_parts);
  }
  if (!ok) {
    VP8EncFreeBitWriters(enc);  // malloc error occurred
-    return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
+    return WebPEncodingSetError(enc->pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
  }
  return ok;
 }

 static int PostLoopFinalize(VP8EncIterator* const it, int ok) {
-  VP8Encoder* const enc = it->enc_;
+  VP8Encoder* const enc = it->enc;
  if (ok) {      // Finalize the partitions, check for extra errors.
    int p;
-    for (p = 0; p < enc->num_parts_; ++p) {
-      VP8BitWriterFinish(enc->parts_ + p);
-      ok &= !enc->parts_[p].error_;
+    for (p = 0; p < enc->num_parts; ++p) {
+      VP8BitWriterFinish(enc->parts + p);
+      ok &= !enc->parts[p].error;
    }
  }

  if (ok) {      // All good. Finish up.
 #if !defined(WEBP_DISABLE_STATS)
-    if (enc->pic_->stats != NULL) {  // finalize byte counters...
+    if (enc->pic->stats != NULL) {  // finalize byte counters...
      int i, s;
      for (i = 0; i <= 2; ++i) {
        for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
-          enc->residual_bytes_[i][s] = (int)((it->bit_count_[s][i] + 7) >> 3);
+          enc->residual_bytes[i][s] = (int)((it->bit_count[s][i] + 7) >> 3);
        }
      }
    }
@@ -719,7 +724,7 @@ static int PostLoopFinalize(VP8EncIterator* const it, int ok) {
  } else {
    // Something bad happened -> need to do some memory cleanup.
    VP8EncFreeBitWriters(enc);
-    return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
+    return WebPEncodingSetError(enc->pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
  }
  return ok;
 }
@@ -728,11 +733,11 @@ static int PostLoopFinalize(VP8EncIterator* const it, int ok) {
 //  VP8EncLoop(): does the final bitstream coding.

 static void ResetAfterSkip(VP8EncIterator* const it) {
-  if (it->mb_->type_ == 1) {
-    *it->nz_ = 0;  // reset all predictors
-    it->left_nz_[8] = 0;
+  if (it->mb->type == 1) {
+    *it->nz = 0;  // reset all predictors
+    it->left_nz[8] = 0;
  } else {
-    *it->nz_ &= (1 << 24);  // preserve the dc_nz bit
+    *it->nz &= (1 << 24);  // preserve the dc_nz bit
  }
 }

@@ -747,16 +752,16 @@ int VP8EncLoop(VP8Encoder* const enc) {
  VP8InitFilter(&it);
  do {
    VP8ModeScore info;
-    const int dont_use_skip = !enc->proba_.use_skip_proba_;
-    const VP8RDLevel rd_opt = enc->rd_opt_level_;
+    const int dont_use_skip = !enc->proba.use_skip_proba;
+    const VP8RDLevel rd_opt = enc->rd_opt_level;

    VP8IteratorImport(&it, NULL);
    // Warning! order is important: first call VP8Decimate() and
    // *then* decide how to code the skip decision if there's one.
    if (!VP8Decimate(&it, &info, rd_opt) || dont_use_skip) {
-      CodeResiduals(it.bw_, &it, &info);
-      if (it.bw_->error_) {
-        // enc->pic_->error_code is set in PostLoopFinalize().
+      CodeResiduals(it.bw, &it, &info);
+      if (it.bw->error) {
+        // enc->pic->error_code is set in PostLoopFinalize().
        ok = 0;
        break;
      }
@@ -782,14 +787,14 @@ int VP8EncLoop(VP8Encoder* const enc) {

 int VP8EncTokenLoop(VP8Encoder* const enc) {
  // Roughly refresh the proba eight times per pass
-  int max_count = (enc->mb_w_ * enc->mb_h_) >> 3;
-  int num_pass_left = enc->config_->pass;
+  int max_count = (enc->mb_w * enc->mb_h) >> 3;
+  int num_pass_left = enc->config->pass;
  int remaining_progress = 40;  // percents
-  const int do_search = enc->do_search_;
+  const int do_search = enc->do_search;
  VP8EncIterator it;
-  VP8EncProba* const proba = &enc->proba_;
-  const VP8RDLevel rd_opt = enc->rd_opt_level_;
-  const uint64_t pixel_count = (uint64_t)enc->mb_w_ * enc->mb_h_ * 384;
+  VP8EncProba* const proba = &enc->proba;
+  const VP8RDLevel rd_opt = enc->rd_opt_level;
+  const uint64_t pixel_count = (uint64_t)enc->mb_w * enc->mb_h * 384;
  PassStats stats;
  int ok;

@@ -799,16 +804,16 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {

  if (max_count < MIN_COUNT) max_count = MIN_COUNT;

-  assert(enc->num_parts_ == 1);
-  assert(enc->use_tokens_);
-  assert(proba->use_skip_proba_ == 0);
+  assert(enc->num_parts == 1);
+  assert(enc->use_tokens);
+  assert(proba->use_skip_proba == 0);
  assert(rd_opt >= RD_OPT_BASIC);   // otherwise, token-buffer won't be useful
  assert(num_pass_left > 0);

  while (ok && num_pass_left-- > 0) {
    const int is_last_pass = (fabs(stats.dq) <= DQ_LIMIT) ||
                             (num_pass_left == 0) ||
-                             (enc->max_i4_header_bits_ == 0);
+                             (enc->max_i4_header_bits == 0);
    uint64_t size_p0 = 0;
    uint64_t distortion = 0;
    int cnt = max_count;
@@ -821,7 +826,7 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
      ResetTokenStats(enc);
      VP8InitFilter(&it);  // don't collect stats until last pass (too costly)
    }
-    VP8TBufferClear(&enc->tokens_);
+    VP8TBufferClear(&enc->tokens);
    do {
      VP8ModeScore info;
      VP8IteratorImport(&it, NULL);
@@ -831,9 +836,9 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
        cnt = max_count;
      }
      VP8Decimate(&it, &info, rd_opt);
-      ok = RecordTokens(&it, &info, &enc->tokens_);
+      ok = RecordTokens(&it, &info, &enc->tokens);
      if (!ok) {
-        WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
+        WebPEncodingSetError(enc->pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
        break;
      }
      size_p0 += info.H;
@@ -848,11 +853,11 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
    } while (ok && VP8IteratorNext(&it));
    if (!ok) break;

-    size_p0 += enc->segment_hdr_.size_;
+    size_p0 += enc->segment_hdr.size;
    if (stats.do_size_search) {
-      uint64_t size = FinalizeTokenProbas(&enc->proba_);
-      size += VP8EstimateTokenSize(&enc->tokens_,
-                                   (const uint8_t*)proba->coeffs_);
+      uint64_t size = FinalizeTokenProbas(&enc->proba);
+      size += VP8EstimateTokenSize(&enc->tokens,
+                                   (const uint8_t*)proba->coeffs);
      size = (size + size_p0 + 1024) >> 11;  // -> size in bytes
      size += HEADER_SIZE_ESTIMATE;
      stats.value = (double)size;
@@ -866,9 +871,9 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
           num_pass_left, stats.last_value, stats.value,
           stats.last_q, stats.q, stats.dq, stats.qmin, stats.qmax);
 #endif
-    if (enc->max_i4_header_bits_ > 0 && size_p0 > PARTITION0_SIZE_LIMIT) {
+    if (enc->max_i4_header_bits > 0 && size_p0 > PARTITION0_SIZE_LIMIT) {
      ++num_pass_left;
-      enc->max_i4_header_bits_ >>= 1;  // strengthen header bit limitation...
+      enc->max_i4_header_bits >>= 1;  // strengthen header bit limitation...
      if (is_last_pass) {
        ResetSideInfo(&it);
      }
@@ -883,13 +888,13 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
  }
  if (ok) {
    if (!stats.do_size_search) {
-      FinalizeTokenProbas(&enc->proba_);
+      FinalizeTokenProbas(&enc->proba);
    }
-    ok = VP8EmitTokens(&enc->tokens_, enc->parts_ + 0,
-                       (const uint8_t*)proba->coeffs_, 1);
+    ok = VP8EmitTokens(&enc->tokens, enc->parts + 0,
+                       (const uint8_t*)proba->coeffs, 1);
  }
-  ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + remaining_progress,
-                                &enc->percent_);
+  ok = ok && WebPReportProgress(enc->pic, enc->percent + remaining_progress,
+                                &enc->percent);
  return PostLoopFinalize(&it, ok);
 }

--- a/thirdparty/libwebp/src/enc/histogram_enc.c
+++ b/thirdparty/libwebp/src/enc/histogram_enc.c
--- a/thirdparty/libwebp/src/enc/histogram_enc.h
+++ b/thirdparty/libwebp/src/enc/histogram_enc.h
@@ -14,9 +14,8 @@
 #ifndef WEBP_ENC_HISTOGRAM_ENC_H_
 #define WEBP_ENC_HISTOGRAM_ENC_H_

-#include <string.h>
-
 #include "src/enc/backward_references_enc.h"
+#include "src/webp/encode.h"
 #include "src/webp/format_constants.h"
 #include "src/webp/types.h"

@@ -25,26 +24,29 @@ extern "C" {
 #endif

 // Not a trivial literal symbol.
-#define VP8L_NON_TRIVIAL_SYM (0xffffffff)
+#define VP8L_NON_TRIVIAL_SYM ((uint16_t)(0xffff))

 // A simple container for histograms of data.
 typedef struct {
-  // literal_ contains green literal, palette-code and
+  // 'literal' contains green literal, palette-code and
  // copy-length-prefix histogram
-  uint32_t* literal_;         // Pointer to the allocated buffer for literal.
-  uint32_t red_[NUM_LITERAL_CODES];
-  uint32_t blue_[NUM_LITERAL_CODES];
-  uint32_t alpha_[NUM_LITERAL_CODES];
+  uint32_t* literal;        // Pointer to the allocated buffer for literal.
+  uint32_t red[NUM_LITERAL_CODES];
+  uint32_t blue[NUM_LITERAL_CODES];
+  uint32_t alpha[NUM_LITERAL_CODES];
  // Backward reference prefix-code histogram.
-  uint32_t distance_[NUM_DISTANCE_CODES];
-  int palette_code_bits_;
-  uint32_t trivial_symbol_;  // True, if histograms for Red, Blue & Alpha
-                             // literal symbols are single valued.
-  uint64_t bit_cost_;        // cached value of bit cost.
-  uint64_t literal_cost_;    // Cached values of dominant entropy costs:
-  uint64_t red_cost_;        // literal, red & blue.
-  uint64_t blue_cost_;
-  uint8_t is_used_[5];       // 5 for literal, red, blue, alpha, distance
+  uint32_t distance[NUM_DISTANCE_CODES];
+  int palette_code_bits;
+  // The following members are only used within VP8LGetHistoImageSymbols.
+
+  // Index of the unique value of a histogram if any, VP8L_NON_TRIVIAL_SYM
+  // otherwise.
+  uint16_t trivial_symbol[5];
+  uint64_t bit_cost;        // Cached value of total bit cost.
+  // Cached values of entropy costs: literal, red, blue, alpha, distance
+  uint64_t costs[5];
+  uint8_t is_used[5];       // 5 for literal, red, blue, alpha, distance
+  uint16_t bin_id;          // entropy bin index.
 } VP8LHistogram;

 // Collection of histograms with fixed capacity, allocated as one
@@ -60,17 +62,21 @@ typedef struct {
 // The input data is the PixOrCopy data, which models the literals, stop
 // codes and backward references (both distances and lengths).  Also: if
 // palette_code_bits is >= 0, initialize the histogram with this value.
-void VP8LHistogramCreate(VP8LHistogram* const p,
+void VP8LHistogramCreate(VP8LHistogram* const h,
                         const VP8LBackwardRefs* const refs,
                         int palette_code_bits);

 // Set the palette_code_bits and reset the stats.
 // If init_arrays is true, the arrays are also filled with 0's.
-void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits,
+void VP8LHistogramInit(VP8LHistogram* const h, int palette_code_bits,
                       int init_arrays);

 // Collect all the references into a histogram (without reset)
+// The distance modifier function is applied to the distance before
+// the histogram is updated. It can be NULL.
 void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs,
+                            int (*const distance_modifier)(int, int),
+                            int distance_modifier_arg0,
                            VP8LHistogram* const histo);

 // Free the memory allocated for the histogram.
@@ -91,12 +97,6 @@ void VP8LHistogramSetClear(VP8LHistogramSet* const set);
 // Special case of VP8LAllocateHistogramSet, with size equals 1.
 VP8LHistogram* VP8LAllocateHistogram(int cache_bits);

-// Accumulate a token 'v' into a histogram.
-void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
-                                     const PixOrCopy* const v,
-                                     int (*const distance_modifier)(int, int),
-                                     int distance_modifier_arg0);
-
 static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) {
  return NUM_LITERAL_CODES + NUM_LENGTH_CODES +
      ((palette_code_bits > 0) ? (1 << palette_code_bits) : 0);
@@ -118,7 +118,7 @@ uint64_t VP8LBitsEntropy(const uint32_t* const array, int n);

 // Estimate how many bits the combined entropy of literals and distance
 // approximately maps to.
-uint64_t VP8LHistogramEstimateBits(VP8LHistogram* const p);
+uint64_t VP8LHistogramEstimateBits(const VP8LHistogram* const h);

 #ifdef __cplusplus
 }
--- a/thirdparty/libwebp/src/enc/iterator_enc.c
+++ b/thirdparty/libwebp/src/enc/iterator_enc.c
@@ -14,88 +14,91 @@
 #include <string.h>

 #include "src/dsp/cpu.h"
+#include "src/dsp/dsp.h"
 #include "src/enc/vp8i_enc.h"
+#include "src/utils/utils.h"
+#include "src/webp/types.h"

 //------------------------------------------------------------------------------
 // VP8Iterator
 //------------------------------------------------------------------------------

 static void InitLeft(VP8EncIterator* const it) {
-  it->y_left_[-1] = it->u_left_[-1] = it->v_left_[-1] =
-      (it->y_ > 0) ? 129 : 127;
-  memset(it->y_left_, 129, 16);
-  memset(it->u_left_, 129, 8);
-  memset(it->v_left_, 129, 8);
-  it->left_nz_[8] = 0;
-  if (it->top_derr_ != NULL) {
-    memset(&it->left_derr_, 0, sizeof(it->left_derr_));
+  it->y_left[-1] = it->u_left[-1] = it->v_left[-1] =
+      (it->y > 0) ? 129 : 127;
+  memset(it->y_left, 129, 16);
+  memset(it->u_left, 129, 8);
+  memset(it->v_left, 129, 8);
+  it->left_nz[8] = 0;
+  if (it->top_derr != NULL) {
+    memset(&it->left_derr, 0, sizeof(it->left_derr));
  }
 }

 static void InitTop(VP8EncIterator* const it) {
-  const VP8Encoder* const enc = it->enc_;
-  const size_t top_size = enc->mb_w_ * 16;
-  memset(enc->y_top_, 127, 2 * top_size);
-  memset(enc->nz_, 0, enc->mb_w_ * sizeof(*enc->nz_));
-  if (enc->top_derr_ != NULL) {
-    memset(enc->top_derr_, 0, enc->mb_w_ * sizeof(*enc->top_derr_));
+  const VP8Encoder* const enc = it->enc;
+  const size_t top_size = enc->mb_w * 16;
+  memset(enc->y_top, 127, 2 * top_size);
+  memset(enc->nz, 0, enc->mb_w * sizeof(*enc->nz));
+  if (enc->top_derr != NULL) {
+    memset(enc->top_derr, 0, enc->mb_w * sizeof(*enc->top_derr));
  }
 }

 void VP8IteratorSetRow(VP8EncIterator* const it, int y) {
-  VP8Encoder* const enc = it->enc_;
-  it->x_ = 0;
-  it->y_ = y;
-  it->bw_ = &enc->parts_[y & (enc->num_parts_ - 1)];
-  it->preds_ = enc->preds_ + y * 4 * enc->preds_w_;
-  it->nz_ = enc->nz_;
-  it->mb_ = enc->mb_info_ + y * enc->mb_w_;
-  it->y_top_ = enc->y_top_;
-  it->uv_top_ = enc->uv_top_;
+  VP8Encoder* const enc = it->enc;
+  it->x = 0;
+  it->y = y;
+  it->bw = &enc->parts[y & (enc->num_parts - 1)];
+  it->preds = enc->preds + y * 4 * enc->preds_w;
+  it->nz = enc->nz;
+  it->mb = enc->mb_info + y * enc->mb_w;
+  it->y_top = enc->y_top;
+  it->uv_top = enc->uv_top;
  InitLeft(it);
 }

 // restart a scan
 static void VP8IteratorReset(VP8EncIterator* const it) {
-  VP8Encoder* const enc = it->enc_;
+  VP8Encoder* const enc = it->enc;
  VP8IteratorSetRow(it, 0);
-  VP8IteratorSetCountDown(it, enc->mb_w_ * enc->mb_h_);  // default
+  VP8IteratorSetCountDown(it, enc->mb_w * enc->mb_h);  // default
  InitTop(it);
-  memset(it->bit_count_, 0, sizeof(it->bit_count_));
-  it->do_trellis_ = 0;
+  memset(it->bit_count, 0, sizeof(it->bit_count));
+  it->do_trellis = 0;
 }

 void VP8IteratorSetCountDown(VP8EncIterator* const it, int count_down) {
-  it->count_down_ = it->count_down0_ = count_down;
+  it->count_down = it->count_down0 = count_down;
 }

 int VP8IteratorIsDone(const VP8EncIterator* const it) {
-  return (it->count_down_ <= 0);
+  return (it->count_down <= 0);
 }

 void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it) {
-  it->enc_ = enc;
-  it->yuv_in_   = (uint8_t*)WEBP_ALIGN(it->yuv_mem_);
-  it->yuv_out_  = it->yuv_in_ + YUV_SIZE_ENC;
-  it->yuv_out2_ = it->yuv_out_ + YUV_SIZE_ENC;
-  it->yuv_p_    = it->yuv_out2_ + YUV_SIZE_ENC;
-  it->lf_stats_ = enc->lf_stats_;
-  it->percent0_ = enc->percent_;
-  it->y_left_ = (uint8_t*)WEBP_ALIGN(it->yuv_left_mem_ + 1);
-  it->u_left_ = it->y_left_ + 16 + 16;
-  it->v_left_ = it->u_left_ + 16;
-  it->top_derr_ = enc->top_derr_;
+  it->enc = enc;
+  it->yuv_in   = (uint8_t*)WEBP_ALIGN(it->yuv_mem);
+  it->yuv_out  = it->yuv_in + YUV_SIZE_ENC;
+  it->yuv_out2 = it->yuv_out + YUV_SIZE_ENC;
+  it->yuv_p    = it->yuv_out2 + YUV_SIZE_ENC;
+  it->lf_stats = enc->lf_stats;
+  it->percent0 = enc->percent;
+  it->y_left = (uint8_t*)WEBP_ALIGN(it->yuv_left_mem + 1);
+  it->u_left = it->y_left + 16 + 16;
+  it->v_left = it->u_left + 16;
+  it->top_derr = enc->top_derr;
  VP8IteratorReset(it);
 }

 int VP8IteratorProgress(const VP8EncIterator* const it, int delta) {
-  VP8Encoder* const enc = it->enc_;
-  if (delta && enc->pic_->progress_hook != NULL) {
-    const int done = it->count_down0_ - it->count_down_;
-    const int percent = (it->count_down0_ <= 0)
-                      ? it->percent0_
-                      : it->percent0_ + delta * done / it->count_down0_;
-    return WebPReportProgress(enc->pic_, percent, &enc->percent_);
+  VP8Encoder* const enc = it->enc;
+  if (delta && enc->pic->progress_hook != NULL) {
+    const int done = it->count_down0 - it->count_down;
+    const int percent = (it->count_down0 <= 0)
+                      ? it->percent0
+                      : it->percent0 + delta * done / it->count_down0;
+    return WebPReportProgress(enc->pic, percent, &enc->percent);
  }
  return 1;
 }
@@ -131,9 +134,9 @@ static void ImportLine(const uint8_t* src, int src_stride,
 }

 void VP8IteratorImport(VP8EncIterator* const it, uint8_t* const tmp_32) {
-  const VP8Encoder* const enc = it->enc_;
-  const int x = it->x_, y = it->y_;
-  const WebPPicture* const pic = enc->pic_;
+  const VP8Encoder* const enc = it->enc;
+  const int x = it->x, y = it->y;
+  const WebPPicture* const pic = enc->pic;
  const uint8_t* const ysrc = pic->y + (y * pic->y_stride  + x) * 16;
  const uint8_t* const usrc = pic->u + (y * pic->uv_stride + x) * 8;
  const uint8_t* const vsrc = pic->v + (y * pic->uv_stride + x) * 8;
@@ -142,9 +145,9 @@ void VP8IteratorImport(VP8EncIterator* const it, uint8_t* const tmp_32) {
  const int uv_w = (w + 1) >> 1;
  const int uv_h = (h + 1) >> 1;

-  ImportBlock(ysrc, pic->y_stride,  it->yuv_in_ + Y_OFF_ENC, w, h, 16);
-  ImportBlock(usrc, pic->uv_stride, it->yuv_in_ + U_OFF_ENC, uv_w, uv_h, 8);
-  ImportBlock(vsrc, pic->uv_stride, it->yuv_in_ + V_OFF_ENC, uv_w, uv_h, 8);
+  ImportBlock(ysrc, pic->y_stride,  it->yuv_in + Y_OFF_ENC, w, h, 16);
+  ImportBlock(usrc, pic->uv_stride, it->yuv_in + U_OFF_ENC, uv_w, uv_h, 8);
+  ImportBlock(vsrc, pic->uv_stride, it->yuv_in + V_OFF_ENC, uv_w, uv_h, 8);

  if (tmp_32 == NULL) return;

@@ -153,19 +156,19 @@ void VP8IteratorImport(VP8EncIterator* const it, uint8_t* const tmp_32) {
    InitLeft(it);
  } else {
    if (y == 0) {
-      it->y_left_[-1] = it->u_left_[-1] = it->v_left_[-1] = 127;
+      it->y_left[-1] = it->u_left[-1] = it->v_left[-1] = 127;
    } else {
-      it->y_left_[-1] = ysrc[- 1 - pic->y_stride];
-      it->u_left_[-1] = usrc[- 1 - pic->uv_stride];
-      it->v_left_[-1] = vsrc[- 1 - pic->uv_stride];
+      it->y_left[-1] = ysrc[- 1 - pic->y_stride];
+      it->u_left[-1] = usrc[- 1 - pic->uv_stride];
+      it->v_left[-1] = vsrc[- 1 - pic->uv_stride];
    }
-    ImportLine(ysrc - 1, pic->y_stride,  it->y_left_, h,   16);
-    ImportLine(usrc - 1, pic->uv_stride, it->u_left_, uv_h, 8);
-    ImportLine(vsrc - 1, pic->uv_stride, it->v_left_, uv_h, 8);
+    ImportLine(ysrc - 1, pic->y_stride,  it->y_left, h,   16);
+    ImportLine(usrc - 1, pic->uv_stride, it->u_left, uv_h, 8);
+    ImportLine(vsrc - 1, pic->uv_stride, it->v_left, uv_h, 8);
  }

-  it->y_top_  = tmp_32 + 0;
-  it->uv_top_ = tmp_32 + 16;
+  it->y_top  = tmp_32 + 0;
+  it->uv_top = tmp_32 + 16;
  if (y == 0) {
    memset(tmp_32, 127, 32 * sizeof(*tmp_32));
  } else {
@@ -188,13 +191,13 @@ static void ExportBlock(const uint8_t* src, uint8_t* dst, int dst_stride,
 }

 void VP8IteratorExport(const VP8EncIterator* const it) {
-  const VP8Encoder* const enc = it->enc_;
-  if (enc->config_->show_compressed) {
-    const int x = it->x_, y = it->y_;
-    const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC;
-    const uint8_t* const usrc = it->yuv_out_ + U_OFF_ENC;
-    const uint8_t* const vsrc = it->yuv_out_ + V_OFF_ENC;
-    const WebPPicture* const pic = enc->pic_;
+  const VP8Encoder* const enc = it->enc;
+  if (enc->config->show_compressed) {
+    const int x = it->x, y = it->y;
+    const uint8_t* const ysrc = it->yuv_out + Y_OFF_ENC;
+    const uint8_t* const usrc = it->yuv_out + U_OFF_ENC;
+    const uint8_t* const vsrc = it->yuv_out + V_OFF_ENC;
+    const WebPPicture* const pic = enc->pic;
    uint8_t* const ydst = pic->y + (y * pic->y_stride + x) * 16;
    uint8_t* const udst = pic->u + (y * pic->uv_stride + x) * 8;
    uint8_t* const vdst = pic->v + (y * pic->uv_stride + x) * 8;
@@ -234,9 +237,9 @@ void VP8IteratorExport(const VP8EncIterator* const it) {
 #define BIT(nz, n) (!!((nz) & (1 << (n))))

 void VP8IteratorNzToBytes(VP8EncIterator* const it) {
-  const int tnz = it->nz_[0], lnz = it->nz_[-1];
-  int* const top_nz = it->top_nz_;
-  int* const left_nz = it->left_nz_;
+  const int tnz = it->nz[0], lnz = it->nz[-1];
+  int* const top_nz = it->top_nz;
+  int* const left_nz = it->left_nz;

  // Top-Y
  top_nz[0] = BIT(tnz, 12);
@@ -268,8 +271,8 @@ void VP8IteratorNzToBytes(VP8EncIterator* const it) {

 void VP8IteratorBytesToNz(VP8EncIterator* const it) {
  uint32_t nz = 0;
-  const int* const top_nz = it->top_nz_;
-  const int* const left_nz = it->left_nz_;
+  const int* const top_nz = it->top_nz;
+  const int* const left_nz = it->left_nz;
  // top
  nz |= (top_nz[0] << 12) | (top_nz[1] << 13);
  nz |= (top_nz[2] << 14) | (top_nz[3] << 15);
@@ -281,7 +284,7 @@ void VP8IteratorBytesToNz(VP8EncIterator* const it) {
  nz |= (left_nz[2] << 11);
  nz |= (left_nz[4] << 17) | (left_nz[6] << 21);

-  *it->nz_ = nz;
+  *it->nz = nz;
 }

 #undef BIT
@@ -290,77 +293,77 @@ void VP8IteratorBytesToNz(VP8EncIterator* const it) {
 // Advance to the next position, doing the bookkeeping.

 void VP8IteratorSaveBoundary(VP8EncIterator* const it) {
-  VP8Encoder* const enc = it->enc_;
-  const int x = it->x_, y = it->y_;
-  const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC;
-  const uint8_t* const uvsrc = it->yuv_out_ + U_OFF_ENC;
-  if (x < enc->mb_w_ - 1) {   // left
+  VP8Encoder* const enc = it->enc;
+  const int x = it->x, y = it->y;
+  const uint8_t* const ysrc = it->yuv_out + Y_OFF_ENC;
+  const uint8_t* const uvsrc = it->yuv_out + U_OFF_ENC;
+  if (x < enc->mb_w - 1) {   // left
    int i;
    for (i = 0; i < 16; ++i) {
-      it->y_left_[i] = ysrc[15 + i * BPS];
+      it->y_left[i] = ysrc[15 + i * BPS];
    }
    for (i = 0; i < 8; ++i) {
-      it->u_left_[i] = uvsrc[7 + i * BPS];
-      it->v_left_[i] = uvsrc[15 + i * BPS];
+      it->u_left[i] = uvsrc[7 + i * BPS];
+      it->v_left[i] = uvsrc[15 + i * BPS];
    }
    // top-left (before 'top'!)
-    it->y_left_[-1] = it->y_top_[15];
-    it->u_left_[-1] = it->uv_top_[0 + 7];
-    it->v_left_[-1] = it->uv_top_[8 + 7];
+    it->y_left[-1] = it->y_top[15];
+    it->u_left[-1] = it->uv_top[0 + 7];
+    it->v_left[-1] = it->uv_top[8 + 7];
  }
-  if (y < enc->mb_h_ - 1) {  // top
-    memcpy(it->y_top_, ysrc + 15 * BPS, 16);
-    memcpy(it->uv_top_, uvsrc + 7 * BPS, 8 + 8);
+  if (y < enc->mb_h - 1) {  // top
+    memcpy(it->y_top, ysrc + 15 * BPS, 16);
+    memcpy(it->uv_top, uvsrc + 7 * BPS, 8 + 8);
  }
 }

 int VP8IteratorNext(VP8EncIterator* const it) {
-  if (++it->x_ == it->enc_->mb_w_) {
-    VP8IteratorSetRow(it, ++it->y_);
+  if (++it->x == it->enc->mb_w) {
+    VP8IteratorSetRow(it, ++it->y);
  } else {
-    it->preds_ += 4;
-    it->mb_ += 1;
-    it->nz_ += 1;
-    it->y_top_ += 16;
-    it->uv_top_ += 16;
+    it->preds += 4;
+    it->mb += 1;
+    it->nz += 1;
+    it->y_top += 16;
+    it->uv_top += 16;
  }
-  return (0 < --it->count_down_);
+  return (0 < --it->count_down);
 }

 //------------------------------------------------------------------------------
 // Helper function to set mode properties

 void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode) {
-  uint8_t* preds = it->preds_;
+  uint8_t* preds = it->preds;
  int y;
  for (y = 0; y < 4; ++y) {
    memset(preds, mode, 4);
-    preds += it->enc_->preds_w_;
+    preds += it->enc->preds_w;
  }
-  it->mb_->type_ = 1;
+  it->mb->type = 1;
 }

 void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes) {
-  uint8_t* preds = it->preds_;
+  uint8_t* preds = it->preds;
  int y;
  for (y = 4; y > 0; --y) {
    memcpy(preds, modes, 4 * sizeof(*modes));
-    preds += it->enc_->preds_w_;
+    preds += it->enc->preds_w;
    modes += 4;
  }
-  it->mb_->type_ = 0;
+  it->mb->type = 0;
 }

 void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode) {
-  it->mb_->uv_mode_ = mode;
+  it->mb->uv_mode = mode;
 }

 void VP8SetSkip(const VP8EncIterator* const it, int skip) {
-  it->mb_->skip_ = skip;
+  it->mb->skip = skip;
 }

 void VP8SetSegment(const VP8EncIterator* const it, int segment) {
-  it->mb_->segment_ = segment;
+  it->mb->segment = segment;
 }

 //------------------------------------------------------------------------------
@@ -403,52 +406,52 @@ static const uint8_t VP8TopLeftI4[16] = {
 };

 void VP8IteratorStartI4(VP8EncIterator* const it) {
-  const VP8Encoder* const enc = it->enc_;
+  const VP8Encoder* const enc = it->enc;
  int i;

-  it->i4_ = 0;    // first 4x4 sub-block
-  it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[0];
+  it->i4 = 0;    // first 4x4 sub-block
+  it->i4_top = it->i4_boundary + VP8TopLeftI4[0];

  // Import the boundary samples
  for (i = 0; i < 17; ++i) {    // left
-    it->i4_boundary_[i] = it->y_left_[15 - i];
+    it->i4_boundary[i] = it->y_left[15 - i];
  }
  for (i = 0; i < 16; ++i) {    // top
-    it->i4_boundary_[17 + i] = it->y_top_[i];
+    it->i4_boundary[17 + i] = it->y_top[i];
  }
  // top-right samples have a special case on the far right of the picture
-  if (it->x_ < enc->mb_w_ - 1) {
+  if (it->x < enc->mb_w - 1) {
    for (i = 16; i < 16 + 4; ++i) {
-      it->i4_boundary_[17 + i] = it->y_top_[i];
+      it->i4_boundary[17 + i] = it->y_top[i];
    }
  } else {    // else, replicate the last valid pixel four times
    for (i = 16; i < 16 + 4; ++i) {
-      it->i4_boundary_[17 + i] = it->i4_boundary_[17 + 15];
+      it->i4_boundary[17 + i] = it->i4_boundary[17 + 15];
    }
  }
 #if WEBP_AARCH64 && BPS == 32 && defined(WEBP_MSAN)
-  // Intra4Preds_NEON() reads 3 uninitialized bytes from i4_boundary_ when top
+  // Intra4Preds_NEON() reads 3 uninitialized bytes from 'i4_boundary' when top
  // is positioned at offset 29 (VP8TopLeftI4[3]). The values are not used
  // meaningfully, but due to limitations in MemorySanitizer related to
  // modeling of tbl instructions, a warning will be issued. This can be
  // removed if MSan is updated to support the instructions. See
  // https://issues.webmproject.org/372109644.
-  memset(it->i4_boundary_ + sizeof(it->i4_boundary_) - 3, 0xaa, 3);
+  memset(it->i4_boundary + sizeof(it->i4_boundary) - 3, 0xaa, 3);
 #endif
  VP8IteratorNzToBytes(it);  // import the non-zero context
 }

 int VP8IteratorRotateI4(VP8EncIterator* const it,
                        const uint8_t* const yuv_out) {
-  const uint8_t* const blk = yuv_out + VP8Scan[it->i4_];
-  uint8_t* const top = it->i4_top_;
+  const uint8_t* const blk = yuv_out + VP8Scan[it->i4];
+  uint8_t* const top = it->i4_top;
  int i;

  // Update the cache with 7 fresh samples
  for (i = 0; i <= 3; ++i) {
    top[-4 + i] = blk[i + 3 * BPS];   // store future top samples
  }
-  if ((it->i4_ & 3) != 3) {  // if not on the right sub-blocks #3, #7, #11, #15
+  if ((it->i4 & 3) != 3) {  // if not on the right sub-blocks #3, #7, #11, #15
    for (i = 0; i <= 2; ++i) {        // store future left samples
      top[i] = blk[3 + (2 - i) * BPS];
    }
@@ -458,12 +461,12 @@ int VP8IteratorRotateI4(VP8EncIterator* const it,
    }
  }
  // move pointers to next sub-block
-  ++it->i4_;
-  if (it->i4_ == 16) {    // we're done
+  ++it->i4;
+  if (it->i4 == 16) {    // we're done
    return 0;
  }

-  it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[it->i4_];
+  it->i4_top = it->i4_boundary + VP8TopLeftI4[it->i4];
  return 1;
 }

--- a/thirdparty/libwebp/src/enc/near_lossless_enc.c
+++ b/thirdparty/libwebp/src/enc/near_lossless_enc.c
@@ -16,10 +16,13 @@

 #include <assert.h>
 #include <stdlib.h>
+#include <string.h>

 #include "src/dsp/lossless_common.h"
-#include "src/utils/utils.h"
+#include "src/webp/types.h"
 #include "src/enc/vp8li_enc.h"
+#include "src/utils/utils.h"
+#include "src/webp/encode.h"

 #if (WEBP_NEAR_LOSSLESS == 1)

--- a/thirdparty/libwebp/src/enc/picture_csp_enc.c
+++ b/thirdparty/libwebp/src/enc/picture_csp_enc.c
@@ -12,18 +12,21 @@
 // Author: Skal (pascal.massimino@gmail.com)

 #include <assert.h>
-#include <stdlib.h>
 #include <math.h>
+#include <stdlib.h>
+#include <string.h>

 #include "sharpyuv/sharpyuv.h"
 #include "sharpyuv/sharpyuv_csp.h"
-#include "src/enc/vp8i_enc.h"
-#include "src/utils/random_utils.h"
-#include "src/utils/utils.h"
+#include "src/dsp/cpu.h"
 #include "src/dsp/dsp.h"
 #include "src/dsp/lossless.h"
 #include "src/dsp/yuv.h"
-#include "src/dsp/cpu.h"
+#include "src/enc/vp8i_enc.h"
+#include "src/utils/random_utils.h"
+#include "src/utils/utils.h"
+#include "src/webp/encode.h"
+#include "src/webp/types.h"

 #if defined(WEBP_USE_THREAD) && !defined(_WIN32)
 #include <pthread.h>
--- a/thirdparty/libwebp/src/enc/picture_enc.c
+++ b/thirdparty/libwebp/src/enc/picture_enc.c
@@ -14,9 +14,12 @@
 #include <assert.h>
 #include <limits.h>
 #include <stdlib.h>
+#include <string.h>

 #include "src/enc/vp8i_enc.h"
 #include "src/utils/utils.h"
+#include "src/webp/encode.h"
+#include "src/webp/types.h"

 //------------------------------------------------------------------------------
 // WebPPicture
@@ -226,9 +229,7 @@ int WebPMemoryWrite(const uint8_t* data, size_t data_size,
 void WebPMemoryWriterClear(WebPMemoryWriter* writer) {
  if (writer != NULL) {
    WebPSafeFree(writer->mem);
-    writer->mem = NULL;
-    writer->size = 0;
-    writer->max_size = 0;
+    WebPMemoryWriterInit(writer);
  }
 }

--- a/thirdparty/libwebp/src/enc/picture_psnr_enc.c
+++ b/thirdparty/libwebp/src/enc/picture_psnr_enc.c
@@ -18,6 +18,7 @@
 #include <math.h>
 #include <stdlib.h>

+#include "src/webp/types.h"
 #include "src/dsp/dsp.h"
 #include "src/enc/vp8i_enc.h"
 #include "src/utils/utils.h"
--- a/thirdparty/libwebp/src/enc/picture_rescale_enc.c
+++ b/thirdparty/libwebp/src/enc/picture_rescale_enc.c
@@ -16,6 +16,8 @@
 #include <assert.h>
 #include <stdlib.h>

+#include "src/webp/types.h"
+#include "src/dsp/dsp.h"
 #include "src/enc/vp8i_enc.h"

 #if !defined(WEBP_REDUCE_SIZE)
--- a/thirdparty/libwebp/src/enc/picture_tools_enc.c
+++ b/thirdparty/libwebp/src/enc/picture_tools_enc.c
@@ -12,9 +12,14 @@
 // Author: Skal (pascal.massimino@gmail.com)

 #include <assert.h>
+#include <stddef.h>
+#include <string.h>

-#include "src/enc/vp8i_enc.h"
+#include "src/dsp/dsp.h"
 #include "src/dsp/yuv.h"
+#include "src/enc/vp8i_enc.h"
+#include "src/webp/encode.h"
+#include "src/webp/types.h"

 //------------------------------------------------------------------------------
 // Helper: clean up fully transparent area to help compressibility.
--- a/thirdparty/libwebp/src/enc/predictor_enc.c
+++ b/thirdparty/libwebp/src/enc/predictor_enc.c
@@ -831,24 +831,24 @@ int VP8LResidualImage(int width, int height, int min_bits, int max_bits,
 // Color transform functions.

 static WEBP_INLINE void MultipliersClear(VP8LMultipliers* const m) {
-  m->green_to_red_ = 0;
-  m->green_to_blue_ = 0;
-  m->red_to_blue_ = 0;
+  m->green_to_red = 0;
+  m->green_to_blue = 0;
+  m->red_to_blue = 0;
 }

 static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
                                               VP8LMultipliers* const m) {
-  m->green_to_red_  = (color_code >>  0) & 0xff;
-  m->green_to_blue_ = (color_code >>  8) & 0xff;
-  m->red_to_blue_   = (color_code >> 16) & 0xff;
+  m->green_to_red  = (color_code >>  0) & 0xff;
+  m->green_to_blue = (color_code >>  8) & 0xff;
+  m->red_to_blue   = (color_code >> 16) & 0xff;
 }

 static WEBP_INLINE uint32_t MultipliersToColorCode(
    const VP8LMultipliers* const m) {
  return 0xff000000u |
-         ((uint32_t)(m->red_to_blue_) << 16) |
-         ((uint32_t)(m->green_to_blue_) << 8) |
-         m->green_to_red_;
+         ((uint32_t)(m->red_to_blue) << 16) |
+         ((uint32_t)(m->green_to_blue) << 8) |
+         m->green_to_red;
 }

 static int64_t PredictionCostCrossColor(const uint32_t accumulated[256],
@@ -871,11 +871,11 @@ static int64_t GetPredictionCostCrossColorRed(
                                green_to_red, histo);

  cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo);
-  if ((uint8_t)green_to_red == prev_x.green_to_red_) {
+  if ((uint8_t)green_to_red == prev_x.green_to_red) {
    // favor keeping the areas locally similar
    cur_diff -= 3ll << LOG_2_PRECISION_BITS;
  }
-  if ((uint8_t)green_to_red == prev_y.green_to_red_) {
+  if ((uint8_t)green_to_red == prev_y.green_to_red) {
    // favor keeping the areas locally similar
    cur_diff -= 3ll << LOG_2_PRECISION_BITS;
  }
@@ -913,7 +913,7 @@ static void GetBestGreenToRed(const uint32_t* argb, int stride, int tile_width,
      }
    }
  }
-  best_tx->green_to_red_ = (green_to_red_best & 0xff);
+  best_tx->green_to_red = (green_to_red_best & 0xff);
 }

 static int64_t GetPredictionCostCrossColorBlue(
@@ -927,19 +927,19 @@ static int64_t GetPredictionCostCrossColorBlue(
                                 green_to_blue, red_to_blue, histo);

  cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo);
-  if ((uint8_t)green_to_blue == prev_x.green_to_blue_) {
+  if ((uint8_t)green_to_blue == prev_x.green_to_blue) {
    // favor keeping the areas locally similar
    cur_diff -= 3ll << LOG_2_PRECISION_BITS;
  }
-  if ((uint8_t)green_to_blue == prev_y.green_to_blue_) {
+  if ((uint8_t)green_to_blue == prev_y.green_to_blue) {
    // favor keeping the areas locally similar
    cur_diff -= 3ll << LOG_2_PRECISION_BITS;
  }
-  if ((uint8_t)red_to_blue == prev_x.red_to_blue_) {
+  if ((uint8_t)red_to_blue == prev_x.red_to_blue) {
    // favor keeping the areas locally similar
    cur_diff -= 3ll << LOG_2_PRECISION_BITS;
  }
-  if ((uint8_t)red_to_blue == prev_y.red_to_blue_) {
+  if ((uint8_t)red_to_blue == prev_y.red_to_blue) {
    // favor keeping the areas locally similar
    cur_diff -= 3ll << LOG_2_PRECISION_BITS;
  }
@@ -997,8 +997,8 @@ static void GetBestGreenRedToBlue(const uint32_t* argb, int stride,
      break;  // out of iter-loop.
    }
  }
-  best_tx->green_to_blue_ = green_to_blue_best & 0xff;
-  best_tx->red_to_blue_ = red_to_blue_best & 0xff;
+  best_tx->green_to_blue = green_to_blue_best & 0xff;
+  best_tx->red_to_blue = red_to_blue_best & 0xff;
 }
 #undef kGreenRedToBlueMaxIters
 #undef kGreenRedToBlueNumAxis
--- a/thirdparty/libwebp/src/enc/quant_enc.c
+++ b/thirdparty/libwebp/src/enc/quant_enc.c
@@ -14,10 +14,14 @@
 #include <assert.h>
 #include <math.h>
 #include <stdlib.h>  // for abs()
+#include <string.h>

+#include "src/dec/common_dec.h"
+#include "src/dsp/dsp.h"
 #include "src/dsp/quant.h"
-#include "src/enc/vp8i_enc.h"
 #include "src/enc/cost_enc.h"
+#include "src/enc/vp8i_enc.h"
+#include "src/webp/types.h"

 #define DO_TRELLIS_I4  1
 #define DO_TRELLIS_I16 1   // not a huge gain, but ok at low bitrate.
@@ -54,11 +58,11 @@
 static void PrintBlockInfo(const VP8EncIterator* const it,
                           const VP8ModeScore* const rd) {
  int i, j;
-  const int is_i16 = (it->mb_->type_ == 1);
-  const uint8_t* const y_in = it->yuv_in_ + Y_OFF_ENC;
-  const uint8_t* const y_out = it->yuv_out_ + Y_OFF_ENC;
-  const uint8_t* const uv_in = it->yuv_in_ + U_OFF_ENC;
-  const uint8_t* const uv_out = it->yuv_out_ + U_OFF_ENC;
+  const int is_i16 = (it->mb->type == 1);
+  const uint8_t* const y_in = it->yuv_in + Y_OFF_ENC;
+  const uint8_t* const y_out = it->yuv_out + Y_OFF_ENC;
+  const uint8_t* const uv_in = it->yuv_in + U_OFF_ENC;
+  const uint8_t* const uv_out = it->yuv_out + U_OFF_ENC;
  printf("SOURCE / OUTPUT / ABS DELTA\n");
  for (j = 0; j < 16; ++j) {
    for (i = 0; i < 16; ++i) printf("%3d ", y_in[i + j * BPS]);
@@ -211,26 +215,26 @@ static int ExpandMatrix(VP8Matrix* const m, int type) {
  for (i = 0; i < 2; ++i) {
    const int is_ac_coeff = (i > 0);
    const int bias = kBiasMatrices[type][is_ac_coeff];
-    m->iq_[i] = (1 << QFIX) / m->q_[i];
-    m->bias_[i] = BIAS(bias);
-    // zthresh_ is the exact value such that QUANTDIV(coeff, iQ, B) is:
+    m->iq[i] = (1 << QFIX) / m->q[i];
+    m->bias[i] = BIAS(bias);
+    // zthresh is the exact value such that QUANTDIV(coeff, iQ, B) is:
    //   * zero if coeff <= zthresh
    //   * non-zero if coeff > zthresh
-    m->zthresh_[i] = ((1 << QFIX) - 1 - m->bias_[i]) / m->iq_[i];
+    m->zthresh[i] = ((1 << QFIX) - 1 - m->bias[i]) / m->iq[i];
  }
  for (i = 2; i < 16; ++i) {
-    m->q_[i] = m->q_[1];
-    m->iq_[i] = m->iq_[1];
-    m->bias_[i] = m->bias_[1];
-    m->zthresh_[i] = m->zthresh_[1];
+    m->q[i] = m->q[1];
+    m->iq[i] = m->iq[1];
+    m->bias[i] = m->bias[1];
+    m->zthresh[i] = m->zthresh[1];
  }
  for (sum = 0, i = 0; i < 16; ++i) {
    if (type == 0) {  // we only use sharpening for AC luma coeffs
-      m->sharpen_[i] = (kFreqSharpening[i] * m->q_[i]) >> SHARPEN_BITS;
+      m->sharpen[i] = (kFreqSharpening[i] * m->q[i]) >> SHARPEN_BITS;
    } else {
-      m->sharpen_[i] = 0;
+      m->sharpen[i] = 0;
    }
-    sum += m->q_[i];
+    sum += m->q[i];
  }
  return (sum + 8) >> 4;
 }
@@ -240,49 +244,49 @@ static void CheckLambdaValue(int* const v) { if (*v < 1) *v = 1; }
 static void SetupMatrices(VP8Encoder* enc) {
  int i;
  const int tlambda_scale =
-    (enc->method_ >= 4) ? enc->config_->sns_strength
+    (enc->method >= 4) ? enc->config->sns_strength
                        : 0;
-  const int num_segments = enc->segment_hdr_.num_segments_;
+  const int num_segments = enc->segment_hdr.num_segments;
  for (i = 0; i < num_segments; ++i) {
-    VP8SegmentInfo* const m = &enc->dqm_[i];
-    const int q = m->quant_;
+    VP8SegmentInfo* const m = &enc->dqm[i];
+    const int q = m->quant;
    int q_i4, q_i16, q_uv;
-    m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)];
-    m->y1_.q_[1] = kAcTable[clip(q,                  0, 127)];
+    m->y1.q[0] = kDcTable[clip(q + enc->dq_y1_dc, 0, 127)];
+    m->y1.q[1] = kAcTable[clip(q,                  0, 127)];

-    m->y2_.q_[0] = kDcTable[ clip(q + enc->dq_y2_dc_, 0, 127)] * 2;
-    m->y2_.q_[1] = kAcTable2[clip(q + enc->dq_y2_ac_, 0, 127)];
+    m->y2.q[0] = kDcTable[ clip(q + enc->dq_y2_dc, 0, 127)] * 2;
+    m->y2.q[1] = kAcTable2[clip(q + enc->dq_y2_ac, 0, 127)];

-    m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)];
-    m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)];
+    m->uv.q[0] = kDcTable[clip(q + enc->dq_uv_dc, 0, 117)];
+    m->uv.q[1] = kAcTable[clip(q + enc->dq_uv_ac, 0, 127)];

-    q_i4  = ExpandMatrix(&m->y1_, 0);
-    q_i16 = ExpandMatrix(&m->y2_, 1);
-    q_uv  = ExpandMatrix(&m->uv_, 2);
+    q_i4  = ExpandMatrix(&m->y1, 0);
+    q_i16 = ExpandMatrix(&m->y2, 1);
+    q_uv  = ExpandMatrix(&m->uv, 2);

-    m->lambda_i4_          = (3 * q_i4 * q_i4) >> 7;
-    m->lambda_i16_         = (3 * q_i16 * q_i16);
-    m->lambda_uv_          = (3 * q_uv * q_uv) >> 6;
-    m->lambda_mode_        = (1 * q_i4 * q_i4) >> 7;
-    m->lambda_trellis_i4_  = (7 * q_i4 * q_i4) >> 3;
-    m->lambda_trellis_i16_ = (q_i16 * q_i16) >> 2;
-    m->lambda_trellis_uv_  = (q_uv * q_uv) << 1;
-    m->tlambda_            = (tlambda_scale * q_i4) >> 5;
+    m->lambda_i4          = (3 * q_i4 * q_i4) >> 7;
+    m->lambda_i16         = (3 * q_i16 * q_i16);
+    m->lambda_uv          = (3 * q_uv * q_uv) >> 6;
+    m->lambda_mode        = (1 * q_i4 * q_i4) >> 7;
+    m->lambda_trellis_i4  = (7 * q_i4 * q_i4) >> 3;
+    m->lambda_trellis_i16 = (q_i16 * q_i16) >> 2;
+    m->lambda_trellis_uv  = (q_uv * q_uv) << 1;
+    m->tlambda            = (tlambda_scale * q_i4) >> 5;

    // none of these constants should be < 1
-    CheckLambdaValue(&m->lambda_i4_);
-    CheckLambdaValue(&m->lambda_i16_);
-    CheckLambdaValue(&m->lambda_uv_);
-    CheckLambdaValue(&m->lambda_mode_);
-    CheckLambdaValue(&m->lambda_trellis_i4_);
-    CheckLambdaValue(&m->lambda_trellis_i16_);
-    CheckLambdaValue(&m->lambda_trellis_uv_);
-    CheckLambdaValue(&m->tlambda_);
+    CheckLambdaValue(&m->lambda_i4);
+    CheckLambdaValue(&m->lambda_i16);
+    CheckLambdaValue(&m->lambda_uv);
+    CheckLambdaValue(&m->lambda_mode);
+    CheckLambdaValue(&m->lambda_trellis_i4);
+    CheckLambdaValue(&m->lambda_trellis_i16);
+    CheckLambdaValue(&m->lambda_trellis_uv);
+    CheckLambdaValue(&m->tlambda);

-    m->min_disto_ = 20 * m->y1_.q_[0];   // quantization-aware min disto
-    m->max_edge_  = 0;
+    m->min_disto = 20 * m->y1.q[0];   // quantization-aware min disto
+    m->max_edge  = 0;

-    m->i4_penalty_ = 1000 * q_i4 * q_i4;
+    m->i4_penalty = 1000 * q_i4 * q_i4;
  }
 }

@@ -296,21 +300,21 @@ static void SetupMatrices(VP8Encoder* enc) {
 static void SetupFilterStrength(VP8Encoder* const enc) {
  int i;
  // level0 is in [0..500]. Using '-f 50' as filter_strength is mid-filtering.
-  const int level0 = 5 * enc->config_->filter_strength;
+  const int level0 = 5 * enc->config->filter_strength;
  for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
-    VP8SegmentInfo* const m = &enc->dqm_[i];
+    VP8SegmentInfo* const m = &enc->dqm[i];
    // We focus on the quantization of AC coeffs.
-    const int qstep = kAcTable[clip(m->quant_, 0, 127)] >> 2;
+    const int qstep = kAcTable[clip(m->quant, 0, 127)] >> 2;
    const int base_strength =
-        VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, qstep);
+        VP8FilterStrengthFromDelta(enc->filter_hdr.sharpness, qstep);
    // Segments with lower complexity ('beta') will be less filtered.
-    const int f = base_strength * level0 / (256 + m->beta_);
-    m->fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;
+    const int f = base_strength * level0 / (256 + m->beta);
+    m->fstrength = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;
  }
  // We record the initial strength (mainly for the case of 1-segment only).
-  enc->filter_hdr_.level_ = enc->dqm_[0].fstrength_;
-  enc->filter_hdr_.simple_ = (enc->config_->filter_type == 0);
-  enc->filter_hdr_.sharpness_ = enc->config_->filter_sharpness;
+  enc->filter_hdr.level = enc->dqm[0].fstrength;
+  enc->filter_hdr.simple = (enc->config->filter_type == 0);
+  enc->filter_hdr.sharpness = enc->config->filter_sharpness;
 }

 //------------------------------------------------------------------------------
@@ -356,25 +360,25 @@ static double QualityToJPEGCompression(double c, double alpha) {

 static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1,
                                 const VP8SegmentInfo* const S2) {
-  return (S1->quant_ == S2->quant_) && (S1->fstrength_ == S2->fstrength_);
+  return (S1->quant == S2->quant) && (S1->fstrength == S2->fstrength);
 }

 static void SimplifySegments(VP8Encoder* const enc) {
  int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 };
-  // 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an
+  // 'num_segments' is previously validated and <= NUM_MB_SEGMENTS, but an
  // explicit check is needed to avoid a spurious warning about 'i' exceeding
-  // array bounds of 'dqm_' with some compilers (noticed with gcc-4.9).
-  const int num_segments = (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS)
-                               ? enc->segment_hdr_.num_segments_
+  // array bounds of 'dqm' with some compilers (noticed with gcc-4.9).
+  const int num_segments = (enc->segment_hdr.num_segments < NUM_MB_SEGMENTS)
+                               ? enc->segment_hdr.num_segments
                               : NUM_MB_SEGMENTS;
  int num_final_segments = 1;
  int s1, s2;
  for (s1 = 1; s1 < num_segments; ++s1) {    // find similar segments
-    const VP8SegmentInfo* const S1 = &enc->dqm_[s1];
+    const VP8SegmentInfo* const S1 = &enc->dqm[s1];
    int found = 0;
    // check if we already have similar segment
    for (s2 = 0; s2 < num_final_segments; ++s2) {
-      const VP8SegmentInfo* const S2 = &enc->dqm_[s2];
+      const VP8SegmentInfo* const S2 = &enc->dqm[s2];
      if (SegmentsAreEquivalent(S1, S2)) {
        found = 1;
        break;
@@ -383,18 +387,18 @@ static void SimplifySegments(VP8Encoder* const enc) {
    map[s1] = s2;
    if (!found) {
      if (num_final_segments != s1) {
-        enc->dqm_[num_final_segments] = enc->dqm_[s1];
+        enc->dqm[num_final_segments] = enc->dqm[s1];
      }
      ++num_final_segments;
    }
  }
  if (num_final_segments < num_segments) {  // Remap
-    int i = enc->mb_w_ * enc->mb_h_;
-    while (i-- > 0) enc->mb_info_[i].segment_ = map[enc->mb_info_[i].segment_];
-    enc->segment_hdr_.num_segments_ = num_final_segments;
+    int i = enc->mb_w * enc->mb_h;
+    while (i-- > 0) enc->mb_info[i].segment = map[enc->mb_info[i].segment];
+    enc->segment_hdr.num_segments = num_final_segments;
    // Replicate the trailing segment infos (it's mostly cosmetics)
    for (i = num_final_segments; i < num_segments; ++i) {
-      enc->dqm_[i] = enc->dqm_[num_final_segments - 1];
+      enc->dqm[i] = enc->dqm[num_final_segments - 1];
    }
  }
 }
@@ -402,50 +406,50 @@ static void SimplifySegments(VP8Encoder* const enc) {
 void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
  int i;
  int dq_uv_ac, dq_uv_dc;
-  const int num_segments = enc->segment_hdr_.num_segments_;
-  const double amp = SNS_TO_DQ * enc->config_->sns_strength / 100. / 128.;
+  const int num_segments = enc->segment_hdr.num_segments;
+  const double amp = SNS_TO_DQ * enc->config->sns_strength / 100. / 128.;
  const double Q = quality / 100.;
-  const double c_base = enc->config_->emulate_jpeg_size ?
-      QualityToJPEGCompression(Q, enc->alpha_ / 255.) :
+  const double c_base = enc->config->emulate_jpeg_size ?
+      QualityToJPEGCompression(Q, enc->alpha / 255.) :
      QualityToCompression(Q);
  for (i = 0; i < num_segments; ++i) {
    // We modulate the base coefficient to accommodate for the quantization
    // susceptibility and allow denser segments to be quantized more.
-    const double expn = 1. - amp * enc->dqm_[i].alpha_;
+    const double expn = 1. - amp * enc->dqm[i].alpha;
    const double c = pow(c_base, expn);
    const int q = (int)(127. * (1. - c));
    assert(expn > 0.);
-    enc->dqm_[i].quant_ = clip(q, 0, 127);
+    enc->dqm[i].quant = clip(q, 0, 127);
  }

  // purely indicative in the bitstream (except for the 1-segment case)
-  enc->base_quant_ = enc->dqm_[0].quant_;
+  enc->base_quant = enc->dqm[0].quant;

  // fill-in values for the unused segments (required by the syntax)
  for (i = num_segments; i < NUM_MB_SEGMENTS; ++i) {
-    enc->dqm_[i].quant_ = enc->base_quant_;
+    enc->dqm[i].quant = enc->base_quant;
  }

-  // uv_alpha_ is normally spread around ~60. The useful range is
+  // uv_alpha is normally spread around ~60. The useful range is
  // typically ~30 (quite bad) to ~100 (ok to decimate UV more).
  // We map it to the safe maximal range of MAX/MIN_DQ_UV for dq_uv.
-  dq_uv_ac = (enc->uv_alpha_ - MID_ALPHA) * (MAX_DQ_UV - MIN_DQ_UV)
-                                          / (MAX_ALPHA - MIN_ALPHA);
+  dq_uv_ac = (enc->uv_alpha - MID_ALPHA) * (MAX_DQ_UV - MIN_DQ_UV)
+                                         / (MAX_ALPHA - MIN_ALPHA);
  // we rescale by the user-defined strength of adaptation
-  dq_uv_ac = dq_uv_ac * enc->config_->sns_strength / 100;
+  dq_uv_ac = dq_uv_ac * enc->config->sns_strength / 100;
  // and make it safe.
  dq_uv_ac = clip(dq_uv_ac, MIN_DQ_UV, MAX_DQ_UV);
  // We also boost the dc-uv-quant a little, based on sns-strength, since
  // U/V channels are quite more reactive to high quants (flat DC-blocks
  // tend to appear, and are unpleasant).
-  dq_uv_dc = -4 * enc->config_->sns_strength / 100;
+  dq_uv_dc = -4 * enc->config->sns_strength / 100;
  dq_uv_dc = clip(dq_uv_dc, -15, 15);   // 4bit-signed max allowed

-  enc->dq_y1_dc_ = 0;       // TODO(skal): dq-lum
-  enc->dq_y2_dc_ = 0;
-  enc->dq_y2_ac_ = 0;
-  enc->dq_uv_dc_ = dq_uv_dc;
-  enc->dq_uv_ac_ = dq_uv_ac;
+  enc->dq_y1_dc = 0;       // TODO(skal): dq-lum
+  enc->dq_y2_dc = 0;
+  enc->dq_y2_ac = 0;
+  enc->dq_uv_dc = dq_uv_dc;
+  enc->dq_uv_ac = dq_uv_ac;

  SetupFilterStrength(enc);   // initialize segments' filtering, eventually

@@ -467,21 +471,21 @@ static const uint16_t VP8I4ModeOffsets[NUM_BMODES] = {
 };

 void VP8MakeLuma16Preds(const VP8EncIterator* const it) {
-  const uint8_t* const left = it->x_ ? it->y_left_ : NULL;
-  const uint8_t* const top = it->y_ ? it->y_top_ : NULL;
-  VP8EncPredLuma16(it->yuv_p_, left, top);
+  const uint8_t* const left = it->x ? it->y_left : NULL;
+  const uint8_t* const top = it->y ? it->y_top : NULL;
+  VP8EncPredLuma16(it->yuv_p, left, top);
 }

 void VP8MakeChroma8Preds(const VP8EncIterator* const it) {
-  const uint8_t* const left = it->x_ ? it->u_left_ : NULL;
-  const uint8_t* const top = it->y_ ? it->uv_top_ : NULL;
-  VP8EncPredChroma8(it->yuv_p_, left, top);
+  const uint8_t* const left = it->x ? it->u_left : NULL;
+  const uint8_t* const top = it->y ? it->uv_top : NULL;
+  VP8EncPredChroma8(it->yuv_p, left, top);
 }

-// Form all the ten Intra4x4 predictions in the yuv_p_ cache
-// for the 4x4 block it->i4_
+// Form all the ten Intra4x4 predictions in the 'yuv_p' cache
+// for the 4x4 block it->i4
 static void MakeIntra4Preds(const VP8EncIterator* const it) {
-  VP8EncPredLuma4(it->yuv_p_, it->i4_top_);
+  VP8EncPredLuma4(it->yuv_p, it->i4_top);
 }

 //------------------------------------------------------------------------------
@@ -600,9 +604,9 @@ static int TrellisQuantizeBlock(const VP8Encoder* WEBP_RESTRICT const enc,
                                int ctx0, int coeff_type,
                                const VP8Matrix* WEBP_RESTRICT const mtx,
                                int lambda) {
-  const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
+  const ProbaArray* const probas = enc->proba.coeffs[coeff_type];
  CostArrayPtr const costs =
-      (CostArrayPtr)enc->proba_.remapped_costs_[coeff_type];
+      (CostArrayPtr)enc->proba.remapped_costs[coeff_type];
  const int first = (coeff_type == TYPE_I16_AC) ? 1 : 0;
  Node nodes[16][NUM_NODES];
  ScoreState score_states[2][NUM_NODES];
@@ -614,7 +618,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* WEBP_RESTRICT const enc,

  {
    score_t cost;
-    const int thresh = mtx->q_[1] * mtx->q_[1] / 4;
+    const int thresh = mtx->q[1] * mtx->q[1] / 4;
    const int last_proba = probas[VP8EncBands[first]][ctx0][0];

    // compute the position of the last interesting coefficient
@@ -646,13 +650,13 @@ static int TrellisQuantizeBlock(const VP8Encoder* WEBP_RESTRICT const enc,
  // traverse trellis.
  for (n = first; n <= last; ++n) {
    const int j = kZigzag[n];
-    const uint32_t Q  = mtx->q_[j];
-    const uint32_t iQ = mtx->iq_[j];
+    const uint32_t Q  = mtx->q[j];
+    const uint32_t iQ = mtx->iq[j];
    const uint32_t B = BIAS(0x00);     // neutral bias
    // note: it's important to take sign of the _original_ coeff,
    // so we don't have to consider level < 0 afterward.
    const int sign = (in[j] < 0);
-    const uint32_t coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
+    const uint32_t coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen[j];
    int level0 = QUANTDIV(coeff0, iQ, B);
    int thresh_level = QUANTDIV(coeff0, iQ, BIAS(0x80));
    if (thresh_level > MAX_LEVEL) thresh_level = MAX_LEVEL;
@@ -760,7 +764,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* WEBP_RESTRICT const enc,
      const int j = kZigzag[n];
      out[n] = node->sign ? -node->level : node->level;
      nz |= node->level;
-      in[j] = out[n] * mtx->q_[j];
+      in[j] = out[n] * mtx->q[j];
      best_node = node->prev;
    }
    return (nz != 0);
@@ -778,10 +782,10 @@ static int ReconstructIntra16(VP8EncIterator* WEBP_RESTRICT const it,
                              VP8ModeScore* WEBP_RESTRICT const rd,
                              uint8_t* WEBP_RESTRICT const yuv_out,
                              int mode) {
-  const VP8Encoder* const enc = it->enc_;
-  const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
-  const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
-  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
+  const VP8Encoder* const enc = it->enc;
+  const uint8_t* const ref = it->yuv_p + VP8I16ModeOffsets[mode];
+  const uint8_t* const src = it->yuv_in + Y_OFF_ENC;
+  const VP8SegmentInfo* const dqm = &enc->dqm[it->mb->segment];
  int nz = 0;
  int n;
  int16_t tmp[16][16], dc_tmp[16];
@@ -790,18 +794,18 @@ static int ReconstructIntra16(VP8EncIterator* WEBP_RESTRICT const it,
    VP8FTransform2(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
  }
  VP8FTransformWHT(tmp[0], dc_tmp);
-  nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24;
+  nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2) << 24;

-  if (DO_TRELLIS_I16 && it->do_trellis_) {
+  if (DO_TRELLIS_I16 && it->do_trellis) {
    int x, y;
    VP8IteratorNzToBytes(it);
    for (y = 0, n = 0; y < 4; ++y) {
      for (x = 0; x < 4; ++x, ++n) {
-        const int ctx = it->top_nz_[x] + it->left_nz_[y];
+        const int ctx = it->top_nz[x] + it->left_nz[y];
        const int non_zero = TrellisQuantizeBlock(
-            enc, tmp[n], rd->y_ac_levels[n], ctx, TYPE_I16_AC, &dqm->y1_,
-            dqm->lambda_trellis_i16_);
-        it->top_nz_[x] = it->left_nz_[y] = non_zero;
+            enc, tmp[n], rd->y_ac_levels[n], ctx, TYPE_I16_AC, &dqm->y1,
+            dqm->lambda_trellis_i16);
+        it->top_nz[x] = it->left_nz[y] = non_zero;
        rd->y_ac_levels[n][0] = 0;
        nz |= non_zero << n;
      }
@@ -811,7 +815,7 @@ static int ReconstructIntra16(VP8EncIterator* WEBP_RESTRICT const it,
      // Zero-out the first coeff, so that: a) nz is correct below, and
      // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified.
      tmp[n][0] = tmp[n + 1][0] = 0;
-      nz |= VP8EncQuantize2Blocks(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;
+      nz |= VP8EncQuantize2Blocks(tmp[n], rd->y_ac_levels[n], &dqm->y1) << n;
      assert(rd->y_ac_levels[n + 0][0] == 0);
      assert(rd->y_ac_levels[n + 1][0] == 0);
    }
@@ -831,20 +835,20 @@ static int ReconstructIntra4(VP8EncIterator* WEBP_RESTRICT const it,
                             const uint8_t* WEBP_RESTRICT const src,
                             uint8_t* WEBP_RESTRICT const yuv_out,
                             int mode) {
-  const VP8Encoder* const enc = it->enc_;
-  const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
-  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
+  const VP8Encoder* const enc = it->enc;
+  const uint8_t* const ref = it->yuv_p + VP8I4ModeOffsets[mode];
+  const VP8SegmentInfo* const dqm = &enc->dqm[it->mb->segment];
  int nz = 0;
  int16_t tmp[16];

  VP8FTransform(src, ref, tmp);
-  if (DO_TRELLIS_I4 && it->do_trellis_) {
-    const int x = it->i4_ & 3, y = it->i4_ >> 2;
-    const int ctx = it->top_nz_[x] + it->left_nz_[y];
-    nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, TYPE_I4_AC, &dqm->y1_,
-                              dqm->lambda_trellis_i4_);
+  if (DO_TRELLIS_I4 && it->do_trellis) {
+    const int x = it->i4 & 3, y = it->i4 >> 2;
+    const int ctx = it->top_nz[x] + it->left_nz[y];
+    nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, TYPE_I4_AC, &dqm->y1,
+                              dqm->lambda_trellis_i4);
  } else {
-    nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_);
+    nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1);
  }
  VP8ITransform(ref, tmp, yuv_out, 0);
  return nz;
@@ -867,8 +871,8 @@ static int QuantizeSingle(int16_t* WEBP_RESTRICT const v,
  int V = *v;
  const int sign = (V < 0);
  if (sign) V = -V;
-  if (V > (int)mtx->zthresh_[0]) {
-    const int qV = QUANTDIV(V, mtx->iq_[0], mtx->bias_[0]) * mtx->q_[0];
+  if (V > (int)mtx->zthresh[0]) {
+    const int qV = QUANTDIV(V, mtx->iq[0], mtx->bias[0]) * mtx->q[0];
    const int err = (V - qV);
    *v = sign ? -qV : qV;
    return (sign ? -err : err) >> DSCALE;
@@ -890,8 +894,8 @@ static void CorrectDCValues(const VP8EncIterator* WEBP_RESTRICT const it,
  // as top[]/left[] on the next block.
  int ch;
  for (ch = 0; ch <= 1; ++ch) {
-    const int8_t* const top = it->top_derr_[it->x_][ch];
-    const int8_t* const left = it->left_derr_[ch];
+    const int8_t* const top = it->top_derr[it->x][ch];
+    const int8_t* const left = it->left_derr[ch];
    int16_t (* const c)[16] = &tmp[ch * 4];
    int err0, err1, err2, err3;
    c[0][0] += (C1 * top[0] + C2 * left[0]) >> (DSHIFT - DSCALE);
@@ -902,7 +906,7 @@ static void CorrectDCValues(const VP8EncIterator* WEBP_RESTRICT const it,
    err2 = QuantizeSingle(&c[2][0], mtx);
    c[3][0] += (C1 * err1 + C2 * err2) >> (DSHIFT - DSCALE);
    err3 = QuantizeSingle(&c[3][0], mtx);
-    // error 'err' is bounded by mtx->q_[0] which is 132 at max. Hence
+    // error 'err' is bounded by mtx->q[0] which is 132 at max. Hence
    // err >> DSCALE will fit in an int8_t type if DSCALE>=1.
    assert(abs(err1) <= 127 && abs(err2) <= 127 && abs(err3) <= 127);
    rd->derr[ch][0] = (int8_t)err1;
@@ -915,8 +919,8 @@ static void StoreDiffusionErrors(VP8EncIterator* WEBP_RESTRICT const it,
                                 const VP8ModeScore* WEBP_RESTRICT const rd) {
  int ch;
  for (ch = 0; ch <= 1; ++ch) {
-    int8_t* const top = it->top_derr_[it->x_][ch];
-    int8_t* const left = it->left_derr_[ch];
+    int8_t* const top = it->top_derr[it->x][ch];
+    int8_t* const left = it->left_derr[ch];
    left[0] = rd->derr[ch][0];            // restore err1
    left[1] = 3 * rd->derr[ch][2] >> 2;   //     ... 3/4th of err3
    top[0]  = rd->derr[ch][1];            //     ... err2
@@ -934,10 +938,10 @@ static void StoreDiffusionErrors(VP8EncIterator* WEBP_RESTRICT const it,
 static int ReconstructUV(VP8EncIterator* WEBP_RESTRICT const it,
                         VP8ModeScore* WEBP_RESTRICT const rd,
                         uint8_t* WEBP_RESTRICT const yuv_out, int mode) {
-  const VP8Encoder* const enc = it->enc_;
-  const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
-  const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
-  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
+  const VP8Encoder* const enc = it->enc;
+  const uint8_t* const ref = it->yuv_p + VP8UVModeOffsets[mode];
+  const uint8_t* const src = it->yuv_in + U_OFF_ENC;
+  const VP8SegmentInfo* const dqm = &enc->dqm[it->mb->segment];
  int nz = 0;
  int n;
  int16_t tmp[8][16];
@@ -945,25 +949,25 @@ static int ReconstructUV(VP8EncIterator* WEBP_RESTRICT const it,
  for (n = 0; n < 8; n += 2) {
    VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);
  }
-  if (it->top_derr_ != NULL) CorrectDCValues(it, &dqm->uv_, tmp, rd);
+  if (it->top_derr != NULL) CorrectDCValues(it, &dqm->uv, tmp, rd);

-  if (DO_TRELLIS_UV && it->do_trellis_) {
+  if (DO_TRELLIS_UV && it->do_trellis) {
    int ch, x, y;
    for (ch = 0, n = 0; ch <= 2; ch += 2) {
      for (y = 0; y < 2; ++y) {
        for (x = 0; x < 2; ++x, ++n) {
-          const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+          const int ctx = it->top_nz[4 + ch + x] + it->left_nz[4 + ch + y];
          const int non_zero = TrellisQuantizeBlock(
-              enc, tmp[n], rd->uv_levels[n], ctx, TYPE_CHROMA_A, &dqm->uv_,
-              dqm->lambda_trellis_uv_);
-          it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;
+              enc, tmp[n], rd->uv_levels[n], ctx, TYPE_CHROMA_A, &dqm->uv,
+              dqm->lambda_trellis_uv);
+          it->top_nz[4 + ch + x] = it->left_nz[4 + ch + y] = non_zero;
          nz |= non_zero << n;
        }
      }
    }
  } else {
    for (n = 0; n < 8; n += 2) {
-      nz |= VP8EncQuantize2Blocks(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;
+      nz |= VP8EncQuantize2Blocks(tmp[n], rd->uv_levels[n], &dqm->uv) << n;
    }
  }

@@ -985,7 +989,7 @@ static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) {
  const int v2 = abs(DCs[4]);
  int max_v = (v1 > v0) ? v1 : v0;
  max_v = (v2 > max_v) ? v2 : max_v;
-  if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v;
+  if (max_v > dqm->max_edge) dqm->max_edge = max_v;
 }

 static void SwapModeScore(VP8ModeScore** a, VP8ModeScore** b) {
@@ -1001,25 +1005,25 @@ static void SwapPtr(uint8_t** a, uint8_t** b) {
 }

 static void SwapOut(VP8EncIterator* const it) {
-  SwapPtr(&it->yuv_out_, &it->yuv_out2_);
+  SwapPtr(&it->yuv_out, &it->yuv_out2);
 }

 static void PickBestIntra16(VP8EncIterator* WEBP_RESTRICT const it,
                            VP8ModeScore* WEBP_RESTRICT rd) {
  const int kNumBlocks = 16;
-  VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
-  const int lambda = dqm->lambda_i16_;
-  const int tlambda = dqm->tlambda_;
-  const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
+  VP8SegmentInfo* const dqm = &it->enc->dqm[it->mb->segment];
+  const int lambda = dqm->lambda_i16;
+  const int tlambda = dqm->tlambda;
+  const uint8_t* const src = it->yuv_in + Y_OFF_ENC;
  VP8ModeScore rd_tmp;
  VP8ModeScore* rd_cur = &rd_tmp;
  VP8ModeScore* rd_best = rd;
  int mode;
-  int is_flat = IsFlatSource16(it->yuv_in_ + Y_OFF_ENC);
+  int is_flat = IsFlatSource16(it->yuv_in + Y_OFF_ENC);

  rd->mode_i16 = -1;
  for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
-    uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC;  // scratch buffer
+    uint8_t* const tmp_dst = it->yuv_out2 + Y_OFF_ENC;  // scratch buffer
    rd_cur->mode_i16 = mode;

    // Reconstruct
@@ -1051,13 +1055,13 @@ static void PickBestIntra16(VP8EncIterator* WEBP_RESTRICT const it,
  if (rd_best != rd) {
    memcpy(rd, rd_best, sizeof(*rd));
  }
-  SetRDScore(dqm->lambda_mode_, rd);   // finalize score for mode decision.
+  SetRDScore(dqm->lambda_mode, rd);   // finalize score for mode decision.
  VP8SetIntra16Mode(it, rd->mode_i16);

  // we have a blocky macroblock (only DCs are non-zero) with fairly high
  // distortion, record max delta so we can later adjust the minimal filtering
  // strength needed to smooth these blocks out.
-  if ((rd->nz & 0x100ffff) == 0x1000000 && rd->D > dqm->min_disto_) {
+  if ((rd->nz & 0x100ffff) == 0x1000000 && rd->D > dqm->min_disto) {
    StoreMaxDelta(dqm, rd->y_dc_levels);
  }
 }
@@ -1067,41 +1071,41 @@ static void PickBestIntra16(VP8EncIterator* WEBP_RESTRICT const it,
 // return the cost array corresponding to the surrounding prediction modes.
 static const uint16_t* GetCostModeI4(VP8EncIterator* WEBP_RESTRICT const it,
                                     const uint8_t modes[16]) {
-  const int preds_w = it->enc_->preds_w_;
-  const int x = (it->i4_ & 3), y = it->i4_ >> 2;
-  const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1];
-  const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4];
+  const int preds_w = it->enc->preds_w;
+  const int x = (it->i4 & 3), y = it->i4 >> 2;
+  const int left = (x == 0) ? it->preds[y * preds_w - 1] : modes[it->i4 - 1];
+  const int top = (y == 0) ? it->preds[-preds_w + x] : modes[it->i4 - 4];
  return VP8FixedCostsI4[top][left];
 }

 static int PickBestIntra4(VP8EncIterator* WEBP_RESTRICT const it,
                          VP8ModeScore* WEBP_RESTRICT const rd) {
-  const VP8Encoder* const enc = it->enc_;
-  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
-  const int lambda = dqm->lambda_i4_;
-  const int tlambda = dqm->tlambda_;
-  const uint8_t* const src0 = it->yuv_in_ + Y_OFF_ENC;
-  uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF_ENC;
+  const VP8Encoder* const enc = it->enc;
+  const VP8SegmentInfo* const dqm = &enc->dqm[it->mb->segment];
+  const int lambda = dqm->lambda_i4;
+  const int tlambda = dqm->tlambda;
+  const uint8_t* const src0 = it->yuv_in + Y_OFF_ENC;
+  uint8_t* const best_blocks = it->yuv_out2 + Y_OFF_ENC;
  int total_header_bits = 0;
  VP8ModeScore rd_best;

-  if (enc->max_i4_header_bits_ == 0) {
+  if (enc->max_i4_header_bits == 0) {
    return 0;
  }

  InitScore(&rd_best);
  rd_best.H = 211;  // '211' is the value of VP8BitCost(0, 145)
-  SetRDScore(dqm->lambda_mode_, &rd_best);
+  SetRDScore(dqm->lambda_mode, &rd_best);
  VP8IteratorStartI4(it);
  do {
    const int kNumBlocks = 1;
    VP8ModeScore rd_i4;
    int mode;
    int best_mode = -1;
-    const uint8_t* const src = src0 + VP8Scan[it->i4_];
+    const uint8_t* const src = src0 + VP8Scan[it->i4];
    const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);
-    uint8_t* best_block = best_blocks + VP8Scan[it->i4_];
-    uint8_t* tmp_dst = it->yuv_p_ + I4TMP;    // scratch buffer.
+    uint8_t* best_block = best_blocks + VP8Scan[it->i4];
+    uint8_t* tmp_dst = it->yuv_p + I4TMP;    // scratch buffer.

    InitScore(&rd_i4);
    MakeIntra4Preds(it);
@@ -1111,7 +1115,7 @@ static int PickBestIntra4(VP8EncIterator* WEBP_RESTRICT const it,

      // Reconstruct
      rd_tmp.nz =
-          ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_;
+          ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4;

      // Compute RD-score
      rd_tmp.D = VP8SSE4x4(src, tmp_dst);
@@ -1140,25 +1144,25 @@ static int PickBestIntra4(VP8EncIterator* WEBP_RESTRICT const it,
        CopyScore(&rd_i4, &rd_tmp);
        best_mode = mode;
        SwapPtr(&tmp_dst, &best_block);
-        memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels,
-               sizeof(rd_best.y_ac_levels[it->i4_]));
+        memcpy(rd_best.y_ac_levels[it->i4], tmp_levels,
+               sizeof(rd_best.y_ac_levels[it->i4]));
      }
    }
-    SetRDScore(dqm->lambda_mode_, &rd_i4);
+    SetRDScore(dqm->lambda_mode, &rd_i4);
    AddScore(&rd_best, &rd_i4);
    if (rd_best.score >= rd->score) {
      return 0;
    }
    total_header_bits += (int)rd_i4.H;   // <- equal to mode_costs[best_mode];
-    if (total_header_bits > enc->max_i4_header_bits_) {
+    if (total_header_bits > enc->max_i4_header_bits) {
      return 0;
    }
    // Copy selected samples if not in the right place already.
-    if (best_block != best_blocks + VP8Scan[it->i4_]) {
-      VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4_]);
+    if (best_block != best_blocks + VP8Scan[it->i4]) {
+      VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4]);
    }
-    rd->modes_i4[it->i4_] = best_mode;
-    it->top_nz_[it->i4_ & 3] = it->left_nz_[it->i4_ >> 2] = (rd_i4.nz ? 1 : 0);
+    rd->modes_i4[it->i4] = best_mode;
+    it->top_nz[it->i4 & 3] = it->left_nz[it->i4 >> 2] = (rd_i4.nz ? 1 : 0);
  } while (VP8IteratorRotateI4(it, best_blocks));

  // finalize state
@@ -1174,11 +1178,11 @@ static int PickBestIntra4(VP8EncIterator* WEBP_RESTRICT const it,
 static void PickBestUV(VP8EncIterator* WEBP_RESTRICT const it,
                       VP8ModeScore* WEBP_RESTRICT const rd) {
  const int kNumBlocks = 8;
-  const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
-  const int lambda = dqm->lambda_uv_;
-  const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
-  uint8_t* tmp_dst = it->yuv_out2_ + U_OFF_ENC;  // scratch buffer
-  uint8_t* dst0 = it->yuv_out_ + U_OFF_ENC;
+  const VP8SegmentInfo* const dqm = &it->enc->dqm[it->mb->segment];
+  const int lambda = dqm->lambda_uv;
+  const uint8_t* const src = it->yuv_in + U_OFF_ENC;
+  uint8_t* tmp_dst = it->yuv_out2 + U_OFF_ENC;  // scratch buffer
+  uint8_t* dst0 = it->yuv_out + U_OFF_ENC;
  uint8_t* dst = dst0;
  VP8ModeScore rd_best;
  int mode;
@@ -1205,7 +1209,7 @@ static void PickBestUV(VP8EncIterator* WEBP_RESTRICT const it,
      CopyScore(&rd_best, &rd_uv);
      rd->mode_uv = mode;
      memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));
-      if (it->top_derr_ != NULL) {
+      if (it->top_derr != NULL) {
        memcpy(rd->derr, rd_uv.derr, sizeof(rd_uv.derr));
      }
      SwapPtr(&dst, &tmp_dst);
@@ -1216,7 +1220,7 @@ static void PickBestUV(VP8EncIterator* WEBP_RESTRICT const it,
  if (dst != dst0) {   // copy 16x8 block if needed
    VP8Copy16x8(dst, dst0);
  }
-  if (it->top_derr_ != NULL) {  // store diffusion errors for next block
+  if (it->top_derr != NULL) {  // store diffusion errors for next block
    StoreDiffusionErrors(it, rd);
  }
 }
@@ -1226,26 +1230,26 @@ static void PickBestUV(VP8EncIterator* WEBP_RESTRICT const it,

 static void SimpleQuantize(VP8EncIterator* WEBP_RESTRICT const it,
                           VP8ModeScore* WEBP_RESTRICT const rd) {
-  const VP8Encoder* const enc = it->enc_;
-  const int is_i16 = (it->mb_->type_ == 1);
+  const VP8Encoder* const enc = it->enc;
+  const int is_i16 = (it->mb->type == 1);
  int nz = 0;

  if (is_i16) {
-    nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]);
+    nz = ReconstructIntra16(it, rd, it->yuv_out + Y_OFF_ENC, it->preds[0]);
  } else {
    VP8IteratorStartI4(it);
    do {
      const int mode =
-          it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_];
-      const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
-      uint8_t* const dst = it->yuv_out_ + Y_OFF_ENC + VP8Scan[it->i4_];
+          it->preds[(it->i4 & 3) + (it->i4 >> 2) * enc->preds_w];
+      const uint8_t* const src = it->yuv_in + Y_OFF_ENC + VP8Scan[it->i4];
+      uint8_t* const dst = it->yuv_out + Y_OFF_ENC + VP8Scan[it->i4];
      MakeIntra4Preds(it);
-      nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],
-                              src, dst, mode) << it->i4_;
-    } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF_ENC));
+      nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4],
+                              src, dst, mode) << it->i4;
+    } while (VP8IteratorRotateI4(it, it->yuv_out + Y_OFF_ENC));
  }

-  nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_);
+  nz |= ReconstructUV(it, rd, it->yuv_out + U_OFF_ENC, it->mb->uv_mode);
  rd->nz = nz;
 }

@@ -1256,23 +1260,23 @@ static void RefineUsingDistortion(VP8EncIterator* WEBP_RESTRICT const it,
  score_t best_score = MAX_COST;
  int nz = 0;
  int mode;
-  int is_i16 = try_both_modes || (it->mb_->type_ == 1);
+  int is_i16 = try_both_modes || (it->mb->type == 1);

-  const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
+  const VP8SegmentInfo* const dqm = &it->enc->dqm[it->mb->segment];
  // Some empiric constants, of approximate order of magnitude.
  const int lambda_d_i16 = 106;
  const int lambda_d_i4 = 11;
  const int lambda_d_uv = 120;
-  score_t score_i4 = dqm->i4_penalty_;
+  score_t score_i4 = dqm->i4_penalty;
  score_t i4_bit_sum = 0;
-  const score_t bit_limit = try_both_modes ? it->enc_->mb_header_limit_
+  const score_t bit_limit = try_both_modes ? it->enc->mb_header_limit
                                           : MAX_COST;  // no early-out allowed

  if (is_i16) {   // First, evaluate Intra16 distortion
    int best_mode = -1;
-    const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
+    const uint8_t* const src = it->yuv_in + Y_OFF_ENC;
    for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
-      const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
+      const uint8_t* const ref = it->yuv_p + VP8I16ModeOffsets[mode];
      const score_t score = (score_t)VP8SSE16x16(src, ref) * RD_DISTO_MULT
                          + VP8FixedCostsI16[mode] * lambda_d_i16;
      if (mode > 0 && VP8FixedCostsI16[mode] > bit_limit) {
@@ -1284,10 +1288,10 @@ static void RefineUsingDistortion(VP8EncIterator* WEBP_RESTRICT const it,
        best_score = score;
      }
    }
-    if (it->x_ == 0 || it->y_ == 0) {
+    if (it->x == 0 || it->y == 0) {
      // avoid starting a checkerboard resonance from the border. See bug #432.
      if (IsFlatSource16(src)) {
-        best_mode = (it->x_ == 0) ? 0 : 2;
+        best_mode = (it->x == 0) ? 0 : 2;
        try_both_modes = 0;  // stick to i16
      }
    }
@@ -1304,12 +1308,12 @@ static void RefineUsingDistortion(VP8EncIterator* WEBP_RESTRICT const it,
    do {
      int best_i4_mode = -1;
      score_t best_i4_score = MAX_COST;
-      const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
+      const uint8_t* const src = it->yuv_in + Y_OFF_ENC + VP8Scan[it->i4];
      const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);

      MakeIntra4Preds(it);
      for (mode = 0; mode < NUM_BMODES; ++mode) {
-        const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
+        const uint8_t* const ref = it->yuv_p + VP8I4ModeOffsets[mode];
        const score_t score = VP8SSE4x4(src, ref) * RD_DISTO_MULT
                            + mode_costs[mode] * lambda_d_i4;
        if (score < best_i4_score) {
@@ -1318,18 +1322,18 @@ static void RefineUsingDistortion(VP8EncIterator* WEBP_RESTRICT const it,
        }
      }
      i4_bit_sum += mode_costs[best_i4_mode];
-      rd->modes_i4[it->i4_] = best_i4_mode;
+      rd->modes_i4[it->i4] = best_i4_mode;
      score_i4 += best_i4_score;
      if (score_i4 >= best_score || i4_bit_sum > bit_limit) {
        // Intra4 won't be better than Intra16. Bail out and pick Intra16.
        is_i16 = 1;
        break;
-      } else {  // reconstruct partial block inside yuv_out2_ buffer
-        uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC + VP8Scan[it->i4_];
-        nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],
-                                src, tmp_dst, best_i4_mode) << it->i4_;
+      } else {  // reconstruct partial block inside yuv_out2 buffer
+        uint8_t* const tmp_dst = it->yuv_out2 + Y_OFF_ENC + VP8Scan[it->i4];
+        nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4],
+                                src, tmp_dst, best_i4_mode) << it->i4;
      }
-    } while (VP8IteratorRotateI4(it, it->yuv_out2_ + Y_OFF_ENC));
+    } while (VP8IteratorRotateI4(it, it->yuv_out2 + Y_OFF_ENC));
  }

  // Final reconstruction, depending on which mode is selected.
@@ -1338,16 +1342,16 @@ static void RefineUsingDistortion(VP8EncIterator* WEBP_RESTRICT const it,
    SwapOut(it);
    best_score = score_i4;
  } else {
-    nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]);
+    nz = ReconstructIntra16(it, rd, it->yuv_out + Y_OFF_ENC, it->preds[0]);
  }

  // ... and UV!
  if (refine_uv_mode) {
    int best_mode = -1;
    score_t best_uv_score = MAX_COST;
-    const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
+    const uint8_t* const src = it->yuv_in + U_OFF_ENC;
    for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
-      const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
+      const uint8_t* const ref = it->yuv_p + VP8UVModeOffsets[mode];
      const score_t score = VP8SSE16x8(src, ref) * RD_DISTO_MULT
                          + VP8FixedCostsUV[mode] * lambda_d_uv;
      if (score < best_uv_score) {
@@ -1357,7 +1361,7 @@ static void RefineUsingDistortion(VP8EncIterator* WEBP_RESTRICT const it,
    }
    VP8SetIntraUVMode(it, best_mode);
  }
-  nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_);
+  nz |= ReconstructUV(it, rd, it->yuv_out + U_OFF_ENC, it->mb->uv_mode);

  rd->nz = nz;
  rd->score = best_score;
@@ -1370,7 +1374,7 @@ int VP8Decimate(VP8EncIterator* WEBP_RESTRICT const it,
                VP8ModeScore* WEBP_RESTRICT const rd,
                VP8RDLevel rd_opt) {
  int is_skipped;
-  const int method = it->enc_->method_;
+  const int method = it->enc->method;

  InitScore(rd);

@@ -1380,14 +1384,14 @@ int VP8Decimate(VP8EncIterator* WEBP_RESTRICT const it,
  VP8MakeChroma8Preds(it);

  if (rd_opt > RD_OPT_NONE) {
-    it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL);
+    it->do_trellis = (rd_opt >= RD_OPT_TRELLIS_ALL);
    PickBestIntra16(it, rd);
    if (method >= 2) {
      PickBestIntra4(it, rd);
    }
    PickBestUV(it, rd);
    if (rd_opt == RD_OPT_TRELLIS) {   // finish off with trellis-optim now
-      it->do_trellis_ = 1;
+      it->do_trellis = 1;
      SimpleQuantize(it, rd);
    }
  } else {
--- a/thirdparty/libwebp/src/enc/syntax_enc.c
+++ b/thirdparty/libwebp/src/enc/syntax_enc.c
@@ -12,18 +12,23 @@
 // Author: Skal (pascal.massimino@gmail.com)

 #include <assert.h>
+#include <stddef.h>

+#include "src/dec/common_dec.h"
+#include "src/webp/types.h"
+#include "src/enc/vp8i_enc.h"
+#include "src/utils/bit_writer_utils.h"
 #include "src/utils/utils.h"
+#include "src/webp/encode.h"
 #include "src/webp/format_constants.h"  // RIFF constants
 #include "src/webp/mux_types.h"         // ALPHA_FLAG
-#include "src/enc/vp8i_enc.h"

 //------------------------------------------------------------------------------
 // Helper functions

 static int IsVP8XNeeded(const VP8Encoder* const enc) {
-  return !!enc->has_alpha_;  // Currently the only case when VP8X is needed.
-                             // This could change in the future.
+  return !!enc->has_alpha;  // Currently the only case when VP8X is needed.
+                            // This could change in the future.
 }

 static int PutPaddingByte(const WebPPicture* const pic) {
@@ -36,7 +41,7 @@ static int PutPaddingByte(const WebPPicture* const pic) {

 static WebPEncodingError PutRIFFHeader(const VP8Encoder* const enc,
                                       size_t riff_size) {
-  const WebPPicture* const pic = enc->pic_;
+  const WebPPicture* const pic = enc->pic;
  uint8_t riff[RIFF_HEADER_SIZE] = {
    'R', 'I', 'F', 'F', 0, 0, 0, 0, 'W', 'E', 'B', 'P'
  };
@@ -49,7 +54,7 @@ static WebPEncodingError PutRIFFHeader(const VP8Encoder* const enc,
 }

 static WebPEncodingError PutVP8XHeader(const VP8Encoder* const enc) {
-  const WebPPicture* const pic = enc->pic_;
+  const WebPPicture* const pic = enc->pic;
  uint8_t vp8x[CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE] = {
    'V', 'P', '8', 'X'
  };
@@ -59,7 +64,7 @@ static WebPEncodingError PutVP8XHeader(const VP8Encoder* const enc) {
  assert(pic->width >= 1 && pic->height >= 1);
  assert(pic->width <= MAX_CANVAS_SIZE && pic->height <= MAX_CANVAS_SIZE);

-  if (enc->has_alpha_) {
+  if (enc->has_alpha) {
    flags |= ALPHA_FLAG;
  }

@@ -74,26 +79,26 @@ static WebPEncodingError PutVP8XHeader(const VP8Encoder* const enc) {
 }

 static WebPEncodingError PutAlphaChunk(const VP8Encoder* const enc) {
-  const WebPPicture* const pic = enc->pic_;
+  const WebPPicture* const pic = enc->pic;
  uint8_t alpha_chunk_hdr[CHUNK_HEADER_SIZE] = {
    'A', 'L', 'P', 'H'
  };

-  assert(enc->has_alpha_);
+  assert(enc->has_alpha);

  // Alpha chunk header.
-  PutLE32(alpha_chunk_hdr + TAG_SIZE, enc->alpha_data_size_);
+  PutLE32(alpha_chunk_hdr + TAG_SIZE, enc->alpha_data_size);
  if (!pic->writer(alpha_chunk_hdr, sizeof(alpha_chunk_hdr), pic)) {
    return VP8_ENC_ERROR_BAD_WRITE;
  }

  // Alpha chunk data.
-  if (!pic->writer(enc->alpha_data_, enc->alpha_data_size_, pic)) {
+  if (!pic->writer(enc->alpha_data, enc->alpha_data_size, pic)) {
    return VP8_ENC_ERROR_BAD_WRITE;
  }

  // Padding.
-  if ((enc->alpha_data_size_ & 1) && !PutPaddingByte(pic)) {
+  if ((enc->alpha_data_size & 1) && !PutPaddingByte(pic)) {
    return VP8_ENC_ERROR_BAD_WRITE;
  }
  return VP8_ENC_OK;
@@ -148,7 +153,7 @@ static WebPEncodingError PutVP8FrameHeader(const WebPPicture* const pic,
 // WebP Headers.
 static int PutWebPHeaders(const VP8Encoder* const enc, size_t size0,
                          size_t vp8_size, size_t riff_size) {
-  WebPPicture* const pic = enc->pic_;
+  WebPPicture* const pic = enc->pic;
  WebPEncodingError err = VP8_ENC_OK;

  // RIFF header.
@@ -162,7 +167,7 @@ static int PutWebPHeaders(const VP8Encoder* const enc, size_t size0,
  }

  // Alpha.
-  if (enc->has_alpha_) {
+  if (enc->has_alpha) {
    err = PutAlphaChunk(enc);
    if (err != VP8_ENC_OK) goto Error;
  }
@@ -172,7 +177,7 @@ static int PutWebPHeaders(const VP8Encoder* const enc, size_t size0,
  if (err != VP8_ENC_OK) goto Error;

  // VP8 frame header.
-  err = PutVP8FrameHeader(pic, enc->profile_, size0);
+  err = PutVP8FrameHeader(pic, enc->profile, size0);
  if (err != VP8_ENC_OK) goto Error;

  // All OK.
@@ -186,27 +191,27 @@ static int PutWebPHeaders(const VP8Encoder* const enc, size_t size0,
 // Segmentation header
 static void PutSegmentHeader(VP8BitWriter* const bw,
                             const VP8Encoder* const enc) {
-  const VP8EncSegmentHeader* const hdr = &enc->segment_hdr_;
-  const VP8EncProba* const proba = &enc->proba_;
-  if (VP8PutBitUniform(bw, (hdr->num_segments_ > 1))) {
+  const VP8EncSegmentHeader* const hdr = &enc->segment_hdr;
+  const VP8EncProba* const proba = &enc->proba;
+  if (VP8PutBitUniform(bw, (hdr->num_segments > 1))) {
    // We always 'update' the quant and filter strength values
    const int update_data = 1;
    int s;
-    VP8PutBitUniform(bw, hdr->update_map_);
+    VP8PutBitUniform(bw, hdr->update_map);
    if (VP8PutBitUniform(bw, update_data)) {
      // we always use absolute values, not relative ones
      VP8PutBitUniform(bw, 1);   // (segment_feature_mode = 1. Paragraph 9.3.)
      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
-        VP8PutSignedBits(bw, enc->dqm_[s].quant_, 7);
+        VP8PutSignedBits(bw, enc->dqm[s].quant, 7);
      }
      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
-        VP8PutSignedBits(bw, enc->dqm_[s].fstrength_, 6);
+        VP8PutSignedBits(bw, enc->dqm[s].fstrength, 6);
      }
    }
-    if (hdr->update_map_) {
+    if (hdr->update_map) {
      for (s = 0; s < 3; ++s) {
-        if (VP8PutBitUniform(bw, (proba->segments_[s] != 255u))) {
-          VP8PutBits(bw, proba->segments_[s], 8);
+        if (VP8PutBitUniform(bw, (proba->segments[s] != 255u))) {
+          VP8PutBits(bw, proba->segments[s], 8);
        }
      }
    }
@@ -216,18 +221,18 @@ static void PutSegmentHeader(VP8BitWriter* const bw,
 // Filtering parameters header
 static void PutFilterHeader(VP8BitWriter* const bw,
                            const VP8EncFilterHeader* const hdr) {
-  const int use_lf_delta = (hdr->i4x4_lf_delta_ != 0);
-  VP8PutBitUniform(bw, hdr->simple_);
-  VP8PutBits(bw, hdr->level_, 6);
-  VP8PutBits(bw, hdr->sharpness_, 3);
+  const int use_lf_delta = (hdr->i4x4_lf_delta != 0);
+  VP8PutBitUniform(bw, hdr->simple);
+  VP8PutBits(bw, hdr->level, 6);
+  VP8PutBits(bw, hdr->sharpness, 3);
  if (VP8PutBitUniform(bw, use_lf_delta)) {
-    // '0' is the default value for i4x4_lf_delta_ at frame #0.
-    const int need_update = (hdr->i4x4_lf_delta_ != 0);
+    // '0' is the default value for i4x4_lf_delta at frame #0.
+    const int need_update = (hdr->i4x4_lf_delta != 0);
    if (VP8PutBitUniform(bw, need_update)) {
      // we don't use ref_lf_delta => emit four 0 bits
      VP8PutBits(bw, 0, 4);
      // we use mode_lf_delta for i4x4
-      VP8PutSignedBits(bw, hdr->i4x4_lf_delta_, 6);
+      VP8PutSignedBits(bw, hdr->i4x4_lf_delta, 6);
      VP8PutBits(bw, 0, 3);    // all others unused
    }
  }
@@ -236,12 +241,12 @@ static void PutFilterHeader(VP8BitWriter* const bw,
 // Nominal quantization parameters
 static void PutQuant(VP8BitWriter* const bw,
                     const VP8Encoder* const enc) {
-  VP8PutBits(bw, enc->base_quant_, 7);
-  VP8PutSignedBits(bw, enc->dq_y1_dc_, 4);
-  VP8PutSignedBits(bw, enc->dq_y2_dc_, 4);
-  VP8PutSignedBits(bw, enc->dq_y2_ac_, 4);
-  VP8PutSignedBits(bw, enc->dq_uv_dc_, 4);
-  VP8PutSignedBits(bw, enc->dq_uv_ac_, 4);
+  VP8PutBits(bw, enc->base_quant, 7);
+  VP8PutSignedBits(bw, enc->dq_y1_dc, 4);
+  VP8PutSignedBits(bw, enc->dq_y2_dc, 4);
+  VP8PutSignedBits(bw, enc->dq_y2_ac, 4);
+  VP8PutSignedBits(bw, enc->dq_uv_dc, 4);
+  VP8PutSignedBits(bw, enc->dq_uv_ac, 4);
 }

 // Partition sizes
@@ -249,8 +254,8 @@ static int EmitPartitionsSize(const VP8Encoder* const enc,
                              WebPPicture* const pic) {
  uint8_t buf[3 * (MAX_NUM_PARTITIONS - 1)];
  int p;
-  for (p = 0; p < enc->num_parts_ - 1; ++p) {
-    const size_t part_size = VP8BitWriterSize(enc->parts_ + p);
+  for (p = 0; p < enc->num_parts - 1; ++p) {
+    const size_t part_size = VP8BitWriterSize(enc->parts + p);
    if (part_size >= VP8_MAX_PARTITION_SIZE) {
      return WebPEncodingSetError(pic, VP8_ENC_ERROR_PARTITION_OVERFLOW);
    }
@@ -267,25 +272,25 @@ static int EmitPartitionsSize(const VP8Encoder* const enc,
 //------------------------------------------------------------------------------

 static int GeneratePartition0(VP8Encoder* const enc) {
-  VP8BitWriter* const bw = &enc->bw_;
-  const int mb_size = enc->mb_w_ * enc->mb_h_;
+  VP8BitWriter* const bw = &enc->bw;
+  const int mb_size = enc->mb_w * enc->mb_h;
  uint64_t pos1, pos2, pos3;

  pos1 = VP8BitWriterPos(bw);
  if (!VP8BitWriterInit(bw, mb_size * 7 / 8)) {        // ~7 bits per macroblock
-    return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
+    return WebPEncodingSetError(enc->pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
  }
  VP8PutBitUniform(bw, 0);   // colorspace
  VP8PutBitUniform(bw, 0);   // clamp type

  PutSegmentHeader(bw, enc);
-  PutFilterHeader(bw, &enc->filter_hdr_);
-  VP8PutBits(bw, enc->num_parts_ == 8 ? 3 :
-                 enc->num_parts_ == 4 ? 2 :
-                 enc->num_parts_ == 2 ? 1 : 0, 2);
+  PutFilterHeader(bw, &enc->filter_hdr);
+  VP8PutBits(bw, enc->num_parts == 8 ? 3 :
+                 enc->num_parts == 4 ? 2 :
+                 enc->num_parts == 2 ? 1 : 0, 2);
  PutQuant(bw, enc);
  VP8PutBitUniform(bw, 0);   // no proba update
-  VP8WriteProbas(bw, &enc->proba_);
+  VP8WriteProbas(bw, &enc->proba);
  pos2 = VP8BitWriterPos(bw);
  VP8CodeIntraModes(enc);
  VP8BitWriterFinish(bw);
@@ -293,36 +298,36 @@ static int GeneratePartition0(VP8Encoder* const enc) {
  pos3 = VP8BitWriterPos(bw);

 #if !defined(WEBP_DISABLE_STATS)
-  if (enc->pic_->stats) {
-    enc->pic_->stats->header_bytes[0] = (int)((pos2 - pos1 + 7) >> 3);
-    enc->pic_->stats->header_bytes[1] = (int)((pos3 - pos2 + 7) >> 3);
-    enc->pic_->stats->alpha_data_size = (int)enc->alpha_data_size_;
+  if (enc->pic->stats) {
+    enc->pic->stats->header_bytes[0] = (int)((pos2 - pos1 + 7) >> 3);
+    enc->pic->stats->header_bytes[1] = (int)((pos3 - pos2 + 7) >> 3);
+    enc->pic->stats->alpha_data_size = (int)enc->alpha_data_size;
  }
 #else
  (void)pos1;
  (void)pos2;
  (void)pos3;
 #endif
-  if (bw->error_) {
-    return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
+  if (bw->error) {
+    return WebPEncodingSetError(enc->pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
  }
  return 1;
 }

 void VP8EncFreeBitWriters(VP8Encoder* const enc) {
  int p;
-  VP8BitWriterWipeOut(&enc->bw_);
-  for (p = 0; p < enc->num_parts_; ++p) {
-    VP8BitWriterWipeOut(enc->parts_ + p);
+  VP8BitWriterWipeOut(&enc->bw);
+  for (p = 0; p < enc->num_parts; ++p) {
+    VP8BitWriterWipeOut(enc->parts + p);
  }
 }

 int VP8EncWrite(VP8Encoder* const enc) {
-  WebPPicture* const pic = enc->pic_;
-  VP8BitWriter* const bw = &enc->bw_;
+  WebPPicture* const pic = enc->pic;
+  VP8BitWriter* const bw = &enc->bw;
  const int task_percent = 19;
-  const int percent_per_part = task_percent / enc->num_parts_;
-  const int final_percent = enc->percent_ + task_percent;
+  const int percent_per_part = task_percent / enc->num_parts;
+  const int final_percent = enc->percent + task_percent;
  int ok = 0;
  size_t vp8_size, pad, riff_size;
  int p;
@@ -334,9 +339,9 @@ int VP8EncWrite(VP8Encoder* const enc) {
  // Compute VP8 size
  vp8_size = VP8_FRAME_HEADER_SIZE +
             VP8BitWriterSize(bw) +
-             3 * (enc->num_parts_ - 1);
-  for (p = 0; p < enc->num_parts_; ++p) {
-    vp8_size += VP8BitWriterSize(enc->parts_ + p);
+             3 * (enc->num_parts - 1);
+  for (p = 0; p < enc->num_parts; ++p) {
+    vp8_size += VP8BitWriterSize(enc->parts + p);
  }
  pad = vp8_size & 1;
  vp8_size += pad;
@@ -347,9 +352,9 @@ int VP8EncWrite(VP8Encoder* const enc) {
  if (IsVP8XNeeded(enc)) {  // Add size for: VP8X header + data.
    riff_size += CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE;
  }
-  if (enc->has_alpha_) {  // Add size for: ALPH header + data.
-    const uint32_t padded_alpha_size = enc->alpha_data_size_ +
-                                       (enc->alpha_data_size_ & 1);
+  if (enc->has_alpha) {  // Add size for: ALPH header + data.
+    const uint32_t padded_alpha_size = enc->alpha_data_size +
+                                       (enc->alpha_data_size & 1);
    riff_size += CHUNK_HEADER_SIZE + padded_alpha_size;
  }
  // RIFF size should fit in 32-bits.
@@ -368,13 +373,13 @@ int VP8EncWrite(VP8Encoder* const enc) {
  }

  // Token partitions
-  for (p = 0; p < enc->num_parts_; ++p) {
-    const uint8_t* const buf = VP8BitWriterBuf(enc->parts_ + p);
-    const size_t size = VP8BitWriterSize(enc->parts_ + p);
+  for (p = 0; p < enc->num_parts; ++p) {
+    const uint8_t* const buf = VP8BitWriterBuf(enc->parts + p);
+    const size_t size = VP8BitWriterSize(enc->parts + p);
    if (size) ok = ok && pic->writer(buf, size, pic);
-    VP8BitWriterWipeOut(enc->parts_ + p);    // will free the internal buffer.
-    ok = ok && WebPReportProgress(pic, enc->percent_ + percent_per_part,
-                                  &enc->percent_);
+    VP8BitWriterWipeOut(enc->parts + p);    // will free the internal buffer.
+    ok = ok && WebPReportProgress(pic, enc->percent + percent_per_part,
+                                  &enc->percent);
  }

  // Padding byte
@@ -382,11 +387,10 @@ int VP8EncWrite(VP8Encoder* const enc) {
    ok = PutPaddingByte(pic);
  }

-  enc->coded_size_ = (int)(CHUNK_HEADER_SIZE + riff_size);
-  ok = ok && WebPReportProgress(pic, final_percent, &enc->percent_);
+  enc->coded_size = (int)(CHUNK_HEADER_SIZE + riff_size);
+  ok = ok && WebPReportProgress(pic, final_percent, &enc->percent);
  if (!ok) WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE);
  return ok;
 }

 //------------------------------------------------------------------------------
-
--- a/Show More
+++ b/Show More