Android: Make .gz sync faster by only checking last bytes

In particular, some angledata.gz are hundreds of megabytes, which
results in multiple seconds spend on some traces which adds up to a few
minutes when running all traces.

Gzip format includes an 8-byte trailer with the CRC-32 and the length of
the uncompressed data
https://en.wikipedia.org/wiki/Gzip#File_format

Instead of checking hash(file) check hash(tail(file)). Check more than
just 8 bytes (arbitrarily picked 4096) so that in a rare event of a
collision we're still likely to see different bytes as compressed
streams tend to diverge when data changes.

This reduces the already-fully-synced sync from ~130s to ~60s
in my tests.

Bug: b/276742336
Change-Id: I899b80ac90ef4def498c8cb52d6b096d8b1ef826
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/4691962
Commit-Queue: Roman Lavrov <romanl@google.com>
Reviewed-by: Cody Northrop <cnorthrop@google.com>
This commit is contained in:
Roman Lavrov
2023-07-17 17:07:52 -04:00
committed by Angle LUCI CQ
parent d7d78adb2a
commit 2fd33fc443

View File

@@ -210,27 +210,41 @@ def _GetDeviceApkPath():
return device_apk_path
def _LocalFileHash(local_path, gz_tail_size):
h = hashlib.sha256()
with open(local_path, 'rb') as f:
if local_path.endswith('.gz'):
# equivalent of tail -c {gz_tail_size}
offset = os.path.getsize(local_path) - gz_tail_size
if offset > 0:
f.seek(offset)
for data in iter(lambda: f.read(65536), b''):
h.update(data)
return h.hexdigest()
def _CompareHashes(local_path, device_path):
# The last 8 bytes of gzip contain CRC-32 and the initial file size and the preceding
# bytes should be affected by changes in the middle if we happen to run into a collision
gz_tail_size = 4096
if local_path.endswith('.gz'):
cmd = 'test -f {path} && tail -c {gz_tail_size} {path} | sha256sum -b || true'.format(
path=device_path, gz_tail_size=gz_tail_size)
else:
cmd = 'test -f {path} && sha256sum -b {path} || true'.format(path=device_path)
if device_path.startswith('/data'):
# Use run-as for files that reside on /data, which aren't accessible without root
device_hash = _AdbShell('run-as ' + TEST_PACKAGE_NAME + ' sha256sum -b ' + device_path +
' 2> /dev/null || true').decode().strip()
else:
device_hash = _AdbShell('sha256sum -b ' + device_path +
' 2> /dev/null || true').decode().strip()
cmd = "run-as {TEST_PACKAGE_NAME} sh -c '{cmd}'".format(
TEST_PACKAGE_NAME=TEST_PACKAGE_NAME, cmd=cmd)
device_hash = _AdbShell(cmd).decode().strip()
if not device_hash:
logging.debug('_CompareHashes: File not found on device')
return False # file not on device
h = hashlib.sha256()
try:
with open(local_path, 'rb') as f:
for data in iter(lambda: f.read(65536), b''):
h.update(data)
except Exception as e:
logging.error('An error occurred in _CompareHashes: %s' % e)
return h.hexdigest() == device_hash
return _LocalFileHash(local_path, gz_tail_size) == device_hash
def _PrepareTestSuite(suite_name):