From 4e62b6a728f02e5a6ff37bbdb215ceff434ce04a Mon Sep 17 00:00:00 2001 From: Brennon Thomas Date: Mon, 9 Feb 2026 11:25:40 -0600 Subject: [PATCH] Fix ssdeep/ppdeep hash mismatches --- Dockerfile | 39 +++ compare_ssdeep_ppdeep.py | 521 +++++++++++++++++++++++++++++++++++++++ ppdeep.py | 35 ++- 3 files changed, 587 insertions(+), 8 deletions(-) create mode 100644 Dockerfile create mode 100644 compare_ssdeep_ppdeep.py diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..5a5c554 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,39 @@ +FROM docker.io/python:3.13.11-slim-bookworm + +ARG APP_HOME=/app + +ENV PYTHONUNBUFFERED=1 +ENV PYTHONDONTWRITEBYTECODE=1 + +RUN apt-get update && apt-get install --no-install-recommends -y \ + build-essential \ + # ssdeep / fuzzy hashing dependencies. + libfuzzy-dev \ + # actual ssdeep binary incase you want a CLI option + ssdeep + +WORKDIR ${APP_HOME} + +RUN pip install --upgrade \ + pip \ + "setuptools<70" \ + wheel + +# Set constraint to ensure ssdeep build uses compatible setuptools. +ENV PIP_CONSTRAINT=/tmp/constraints.txt +RUN echo "setuptools<70" > /tmp/constraints.txt + +RUN pip install ssdeep==3.4 + +COPY ./compare_ssdeep_ppdeep.py . + +# Pick a ppdeep version... + +# 1) Current 20251115 ppdeep version +RUN pip install ppdeep==20251115 + +# 2) Updated ppdeep PR +# COPY ./ppdeep.py . +# COPY ./setup.py . +# COPY ./README.md . +# RUN python setup.py install diff --git a/compare_ssdeep_ppdeep.py b/compare_ssdeep_ppdeep.py new file mode 100644 index 0000000..266dc1d --- /dev/null +++ b/compare_ssdeep_ppdeep.py @@ -0,0 +1,521 @@ +#!/usr/bin/env python +""" +Compare ssdeep and ppdeep libraries to ensure they produce identical results. +Tests context triggered piecewise hashes (CTPH) / fuzzy hashes on both string and bytes objects. +""" + +import argparse +import os +import sys + + +def generate_test_data(): + """Generate 70 test objects: 35 strings and 35 bytes.""" + test_objects = [] + + # 35 string test cases (25 regular + 5 hex escape + 5 unicode escape) + string_tests = [ + "Hello, World!", + "The quick brown fox jumps over the lazy dog", + "Lorem ipsum dolor sit amet, consectetur adipiscing elit", + "a" * 100, + "b" * 500, + "Test with special chars: !@#$%^&*()_+-={}[]|:;<>?,./", + "Multiline\nstring\nwith\nnewlines", + "Tab\tseparated\tvalues", + "Unicode test: café, naïve, 日本語", + "Email: test@example.com, URL: https://example.com", + 'JSON-like: {"key": "value", "number": 123}', + 'XML-like: content', + "Base64-like: SGVsbG8gV29ybGQh", + "Hex-like: 48656c6c6f20576f726c6421", + "Long repeated pattern: " + "pattern" * 100, + "Mixed case: AbCdEfGhIjKlMnOpQrStUvWxYz", + "Numbers only: 1234567890" * 10, + "Whitespace variations: spaces tabs\ttabs ", + "Path-like: /usr/local/bin/python3.13", + "Windows path: C:\\Users\\Admin\\Documents\\file.txt", + "SQL-like: SELECT * FROM users WHERE id = 1", + "HTML:

Title

", + "Empty string", + "Single char: x", + "Very long string: " + "x" * 10000, + # Hex escape sequences + "\x48\x65\x6c\x6c\x6f", # "Hello" in hex + "\x00\x01\x02\x03\x04", # Control characters + "Null byte test: \x00 middle", + "\xff\xfe\xfd", # High byte values + "Mixed: \x41\x42\x43 ABC", # Hex + regular + # Unicode escape sequences + "\u00e9\u00e0\u00fc", # é à ü + "\u4e2d\u6587", # 中文 (Chinese) + "Emoji: \U0001f600\U0001f44d", # 😀👍 + "\u03b1\u03b2\u03b3", # αβγ (Greek) + "Mixed: \u2665 hearts \u2660 spades", # ♥ ♠ + ] + + for i, s in enumerate(string_tests): + test_objects.append( + { + "type": "string", + "id": f"str_{i + 1}", + "data": s, + } + ) + + # Bytes test cases. Some are byte literals, some are UTF-8 encoded strings, and some are binary data. + bytes_tests = [ + b"Hello, World!", + b"The quick brown fox jumps over the lazy dog", + b"Lorem ipsum dolor sit amet, consectetur adipiscing elit", + b"a" * 100, + b"b" * 500, + b"Test with special chars: !@#$%^&*()_+-={}[]|:;<>?,./", + b"Multiline\nstring\nwith\nnewlines", + b"Tab\tseparated\tvalues", + b"Email: test@example.com, URL: https://example.com", + b'JSON-like: {"key": "value", "number": 123}', + b'XML-like: content', + b"Base64-like: SGVsbG8gV29ybGQh", + b"Hex-like: 48656c6c6f20576f726c6421", + b"Long repeated pattern: " + b"pattern" * 100, + b"Mixed case: AbCdEfGhIjKlMnOpQrStUvWxYz", + b"Numbers only: 1234567890" * 10, + b"Whitespace variations: spaces tabs\ttabs ", + b"Path-like: /usr/local/bin/python3.13", + b"Windows path: C:\\Users\\Admin\\Documents\\file.txt", + b"SQL-like: SELECT * FROM users WHERE id = 1", + b"HTML:

Title

", + b"Empty bytes", + b"Single char: x", + b"Very long bytes: " + b"x" * 10000, + bytes(range(256)), # All possible byte values + # Hex escape sequences + b"\x48\x65\x6c\x6c\x6f", # b"Hello" in hex + b"\x00\x01\x02\x03\x04", # Control characters + b"Null byte test: \x00 middle", + b"\xff\xfe\xfd", # High byte values + b"Mixed: \x41\x42\x43 ABC", # Hex + regular + # Unicode escape sequences (as UTF-8 encoded bytes) + "\u00e9\u00e0\u00fc".encode("utf-8"), # é à ü + "\u4e2d\u6587".encode("utf-8"), # 中文 (Chinese) + "Emoji: \U0001f600\U0001f44d".encode("utf-8"), # 😀👍 + "\u03b1\u03b2\u03b3".encode("utf-8"), # αβγ (Greek) + "Mixed: \u2665 hearts \u2660 spades".encode("utf-8"), # ♥ ♠ + # 32 + b"\xc1C*\xa3 \xb3D@\xe4\x08\xab\xbc\x94\xc0W\x8d\x9e\xbc}\\{\x8d*\x07\x9f\xf9\xc8\x04\t\xba2\xa9", + b"\xa6`\x02\xda\x9aB\xf1Up\x1f\x876Ay\x07\xf7}\x10\xd7\xb7\xfa\x8fWs\x9d\\}X\xff\xe2\x9c\x8e", + b"\xa3\xcf\x99\xdd[\x9a?e\x0f\xbf]\xdd\x9e\xcb.\x17V`3\xbf\xed&T\xa6\xecN\x10\xfd\xc5\xda8\x1d", + b'\\\x07\xc4O\xf05\xb9\x19Z\xb9\xdb\x9a\xd5\xed\x93\x9d\xc7`\xab\xb6\xa8\x99\xc4\x98"\xde\xde9\xfdb<\x9b', + b"\x98\x11\x8d]\x93\x82\xaaEx~<}\\\x1a\xf9!\xae\xcc\x8cn-E\xe9\xa8\xe3\x0f\x0f\xa6\xa1\xdc\xd2\xe9\xa1", + # 33 + b"\x98\x11\x8d]\x93\x82\xaaEx~<}\\\x1a\xf9!\xae\xcc\x8cn-E\xe9\xa8\xe3\x0f\x0f\xa6\xa1\xdc\xd2\xe9\xa1", + # 64 + b"z\x06vf\xbb\xf9J*|4\\\xdd\x17\xd7\x8f9\xb3\x9a\r\xd2\xa2\xf0\xe3\x0f\xe4\xb5\\|\x7f\x1cq\xd0\x01\xaf\x86\x8b\xd1~\xf8*-\xf7\x12\xb5):Q\xa5z\xdc\xcb\x0bv|\x06c\xf0\xd2s\x18\rb\xd8\xed" + # 65 + b"\x14\xbc\x91V2K\x8a\xce>\xdb\xf1\xe8\x1e\xef\xc0F\xaf\xb6\xd6(\xd2\xda\xd4#\xf6\x7fl\n\x7fT`-m\xd7\x1c;\x90X\x91\x80\x88\x99\xb6-h-\xd9\xdcx\xfb\xa6Tn\x87Pw\xfa\x9e:\x00*\\g\x1f\x80" + b"\x00\x7f\xc5@\xc7\x18\x04\x995\x03\x9a\x0e\x8e\xb39\x13\x17\xb1SQ\xe2\xab\xb9\xe0D\x86,\x11\x9d7\xb1\xa2<\x95\x8b\x9e\xb8\xfe;\x9c\xca\xd3\x82'\x91\xe8\xd8f\xe6+\x9f\x12w\x16S\xbah\xa9\xee\xbd!\xc4+\xa9\xfe", + # 127 + b'\xfa\xe8p\x08\x8b)T[\xc0\xeaS\x05-\xea\xa1\xed\x85V\xe0\xee\xab\xef\x17\x16(k\x14\rZB)\xbe\xf5!"\xa3R\xb2\x9a\x0c\xd0\xbb\xa5\x81\xcbq\x9eP_L\xc4\x9aP\xdf\x1a\xbcz\xb9\xb1\xa1\x07\x9eC\x12\xb1\xe6{\xf8\x18\x02\xf0B\xe1s\xbf\xb7\x9c\xf8e\\\x11_-\xef2o\xea\x8c<\x05\t\x10\xbdI=(\xf8\n\xffa\x8d\xc4\xd7\x11N\xe3\xf2\xd5\x9bQ#\x94\xe5\xf9\xc7\x1a\xda\xbeR{\xe9\xcf@\xf8\tZM' + # 128 + b"cal\xbc\xaa\xfb\xc3@\x9a\x9euCi\xaf\xc5\xd8$\x8a\xe5\xabE\x85D\xd3\x161i)\xe5\xd4Uj\xdd\xf6\xe6\x08\x1e\xeb\xa8\x8eLd\x12\x81\xdd\xbbF\xc4\xc1\x17\xfd\xda\xb4W\xad_\x90\xadB\x140\xbdFI\xbeL\x9e\xc2\xc6\x03z-t\xbf\x84\xf33\xcd\xaa\x1ds0L\x1c\xaa\x16o\x1d\x078\xa8\x9ez\xa4\xb2\xe3on\xd5*\xbb\x9e?\x1dvf\xc8\xa0\xceHl\xd1\x1b_{\xe7\xdc\x19\x0c2)\r\xed\xa3\xf3\x13aw", + ] + + for i, b in enumerate(bytes_tests): + test_objects.append( + { + "type": "bytes", + "id": f"bytes_{i + 1}", + "data": b, + } + ) + + # Add file tests + file_paths = [ + "/usr/bin/setsid", + "/usr/bin/locale", + "/usr/bin/last", + "/usr/bin/perl", + # Add more file paths here as needed + ] + + for idx, file_path in enumerate(file_paths, start=1): + try: + with open(file_path, "rb") as f: + file_data = f.read() + test_objects.append( + { + "type": "file", + "id": f"file_{idx}", + "data": file_data, + } + ) + print(f"Added file test {idx}: {file_path} ({len(file_data)} bytes)") + except FileNotFoundError: + print(f"Warning: File '{file_path}' not found, skipping") + except Exception as e: + print(f"Warning: Could not read file '{file_path}': {e}") + + return test_objects + + +def compare_libraries(): + """Compare ssdeep and ppdeep hash results.""" + + # Try importing both libraries + try: + import ssdeep + + has_ssdeep = True + except ImportError as e: + print(f"Warning: ssdeep not available: {e}") + has_ssdeep = False + + try: + import ppdeep + + has_ppdeep = True + except ImportError as e: + print(f"Warning: ppdeep not available: {e}") + has_ppdeep = False + + if not has_ssdeep and not has_ppdeep: + print("Error: Neither ssdeep nor ppdeep is installed!") + sys.exit(1) + + if not has_ssdeep: + print("Warning: Only ppdeep is available. Cannot compare.") + return + + if not has_ppdeep: + print("Warning: Only ssdeep is available. Cannot compare.") + return + + print("=" * 80) + print("SSDEEP vs PPDEEP COMPARISON") + print("=" * 80) + print(f"ssdeep version: {ssdeep.__version__ if hasattr(ssdeep, '__version__') else 'unknown'}") + print(f"ppdeep version: {ppdeep.__version__ if hasattr(ppdeep, '__version__') else 'unknown'}") + print() + + # Generate test data + test_objects = generate_test_data() + string_count = len([t for t in test_objects if t["type"] == "string"]) + bytes_count = len([t for t in test_objects if t["type"] == "bytes"]) + file_count = len([t for t in test_objects if t["type"] == "file"]) + print(f"Testing {len(test_objects)} objects ({string_count} strings, {bytes_count} bytes, {file_count} files)\n") + + # Track results + total_tests = 0 + matching = 0 + mismatches = [] + ssdeep_errors = [] + ppdeep_errors = [] + + # Test each object + for obj in test_objects: + total_tests += 1 + obj_id = obj["id"] + obj_type = obj["type"] + data = obj["data"] + + # Get ssdeep hash + try: + if obj_type == "string": + ssdeep_hash = ssdeep.hash(data) + else: + ssdeep_hash = ssdeep.hash(data) + except Exception as e: + ssdeep_hash = None + ssdeep_errors.append({"id": obj_id, "error": str(e)}) + + # Get ppdeep hash + try: + if obj_type == "string": + ppdeep_hash = ppdeep.hash(data) + else: + ppdeep_hash = ppdeep.hash(data) + except Exception as e: + ppdeep_hash = None + ppdeep_errors.append({"id": obj_id, "error": str(e)}) + + # Compare results + if ssdeep_hash is not None and ppdeep_hash is not None: + if ssdeep_hash == ppdeep_hash: + matching += 1 + print(f"✓ {obj_id:15} MATCH") + else: + mismatches.append( + { + "id": obj_id, + "type": obj_type, + "ssdeep": ssdeep_hash, + "ppdeep": ppdeep_hash, + } + ) + print(f"✗ {obj_id:15} MISMATCH") + elif ssdeep_hash is None and ppdeep_hash is None: + print(f"⚠ {obj_id:15} BOTH FAILED") + elif ssdeep_hash is None: + print(f"⚠ {obj_id:15} SSDEEP FAILED") + else: + print(f"⚠ {obj_id:15} PPDEEP FAILED") + + # Print summary + print("\n" + "=" * 80) + print("SUMMARY") + print("=" * 80) + print(f"Total tests: {total_tests}") + print(f"Matching hashes: {matching} ({matching / total_tests * 100:.1f}%)") + print(f"Mismatches: {len(mismatches)}") + print(f"ssdeep errors: {len(ssdeep_errors)}") + print(f"ppdeep errors: {len(ppdeep_errors)}") + + # Print detailed mismatch information + if mismatches: + print("\n" + "=" * 80) + print("MISMATCHES DETAIL") + print("=" * 80) + for mismatch in mismatches: + print(f"\nID: {mismatch['id']} ({mismatch['type']})") + print(f" ssdeep: {mismatch['ssdeep']}") + print(f" ppdeep: {mismatch['ppdeep']}") + + # Print error details + if ssdeep_errors: + print("\n" + "=" * 80) + print("SSDEEP ERRORS") + print("=" * 80) + for error in ssdeep_errors: + print(f"{error['id']}: {error['error']}") + + if ppdeep_errors: + print("\n" + "=" * 80) + print("PPDEEP ERRORS") + print("=" * 80) + for error in ppdeep_errors: + print(f"{error['id']}: {error['error']}") + + # Final verdict + print("\n" + "=" * 80) + if matching == total_tests: + print("✓ RESULT: All hashes match! Libraries are compatible.") + print("=" * 80) + sys.exit(0) + else: + print("✗ RESULT: Differences detected! Review mismatches before swapping libraries.") + print("=" * 80) + sys.exit(1) + + +def find_mismatch_file(start_path="/"): + """ + Recursively search filesystem for a file where ssdeep and ppdeep produce different hashes. + + Args: + start_path: Directory to start searching from (default: "/") + """ + try: + import ssdeep + except ImportError: + print("Error: ssdeep not installed") + sys.exit(1) + + try: + import ppdeep + except ImportError: + print("Error: ppdeep not installed") + sys.exit(1) + + print(f"Searching for hash mismatch starting from: {start_path}") + print("Press Ctrl+C to stop\n") + + files_checked = 0 + errors_skipped = 0 + + for root, dirs, files in os.walk(start_path): + # Skip common system/virtual directories + dirs[:] = [ + d + for d in dirs + if d + not in [ + ".git", + "node_modules", + "__pycache__", + ".venv", + "venv", + "Library", + "Applications", + "System", + "Volumes", + "dev", + "proc", + "sys", + ] + ] + + for filename in files: + filepath = os.path.join(root, filename) + + # Skip symlinks and non-regular files + try: + if not os.path.isfile(filepath) or os.path.islink(filepath): + continue + except (OSError, PermissionError): + continue + + files_checked += 1 + if files_checked % 100 == 0: + print(f"Checked {files_checked} files...", end="\r") + + try: + ssdeep_hash = ssdeep.hash_from_file(filepath) + ppdeep_hash = ppdeep.hash_from_file(filepath) + + if ssdeep_hash != ppdeep_hash: + print(f"\n\n{'=' * 80}") + print("MISMATCH FOUND!") + print(f"{'=' * 80}") + print(f"File: {filepath}") + print(f"Size: {os.path.getsize(filepath)} bytes") + print(f"ssdeep: {ssdeep_hash}") + print(f"ppdeep: {ppdeep_hash}") + print(f"{'=' * 80}") + print(f"Total files checked: {files_checked}") + # return filepath + + except (PermissionError, OSError, IOError, Exception): + errors_skipped += 1 + continue + + print("\n\nSearch complete. No mismatches found.") + print(f"Files checked: {files_checked}") + print(f"Errors skipped: {errors_skipped}") + return None + + +def find_random_mismatch(num_tests=10000, length=32): + """ + Generate random byte strings and test for hash mismatches. + + Args: + num_tests: Number of random strings to generate (default: 10000) + length: Length of each random byte string (default: 32) + """ + import random + + try: + import ssdeep + except ImportError: + print("Error: ssdeep not installed") + sys.exit(1) + + try: + import ppdeep + except ImportError: + print("Error: ppdeep not installed") + sys.exit(1) + + print(f"Generating {num_tests} random byte strings of length {length}") + print("Press Ctrl+C to stop\n") + + tests_run = 0 + matches = 0 + mismatches_found = [] + + for i in range(num_tests): + tests_run += 1 + if tests_run % 100 == 0: + print(f"Tested {tests_run}/{num_tests} random strings...", end="\r") + + # Generate random bytes + random_bytes = bytes(random.randint(0, 255) for _ in range(length)) + + try: + ssdeep_hash = ssdeep.hash(random_bytes) + ppdeep_hash = ppdeep.hash(random_bytes) + + if ssdeep_hash == ppdeep_hash: + matches += 1 + else: + mismatches_found.append( + { + "test_num": tests_run, + "data": random_bytes, + "ssdeep": ssdeep_hash, + "ppdeep": ppdeep_hash, + } + ) + print(f"\n\n{'=' * 80}") + print("MISMATCH FOUND!") + print(f"{'=' * 80}") + print(f"Test number: {tests_run}") + print(f"Random bytes (hex): {random_bytes.hex()}") + print(f"Random bytes (repr): {random_bytes!r}") + print(f"Length: {len(random_bytes)}") + print(f"ssdeep: {ssdeep_hash}") + print(f"ppdeep: {ppdeep_hash}") + print(f"{'=' * 80}") + # Don't return immediately, continue testing to find all mismatches + + except Exception as e: + print(f"\nError testing random bytes at iteration {tests_run}: {e}") + continue + + # Print summary + print("\n\nRandom testing complete.") + print(f"Tests run: {tests_run}") + print(f"Matches: {matches}") + print(f"Mismatches: {len(mismatches_found)}") + + if len(mismatches_found) == 0: + print("\n✓ No mismatches found! Libraries appear compatible.") + else: + print(f"\n✗ Found {len(mismatches_found)} mismatch(es)!") + print("\nAll mismatches:") + for idx, mismatch in enumerate(mismatches_found, 1): + print(f"\n {idx}. Test #{mismatch['test_num']}: {mismatch['data'].hex()[:60]}...") + + return mismatches_found + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Compare ssdeep and ppdeep library hash outputs") + parser.add_argument( + "--find-mismatch", + action="store_true", + help="Recursively search filesystem for a file with mismatched hashes", + ) + parser.add_argument( + "--start-path", + type=str, + default="/", + help="Starting directory for mismatch search (default: /)", + ) + parser.add_argument( + "--random-test", + action="store_true", + help="Generate random byte strings to find hash mismatches", + ) + parser.add_argument( + "--num-tests", + type=int, + default=10000, + help="Number of random tests to run (default: 10000)", + ) + parser.add_argument( + "--length", + type=int, + default=32, + help="Length of random byte strings (default: 32)", + ) + + args = parser.parse_args() + + if args.find_mismatch: + find_mismatch_file(args.start_path) + elif args.random_test: + find_random_mismatch(args.num_tests, args.length) + else: + compare_libraries() diff --git a/ppdeep.py b/ppdeep.py index 4ddd77e..a037783 100755 --- a/ppdeep.py +++ b/ppdeep.py @@ -28,7 +28,7 @@ ''' __title__ = 'ppdeep' -__version__ = '20251115' +__version__ = '20260209' __author__ = 'Marcin Ulikowski' import os @@ -78,6 +78,8 @@ def _spamsum(stream, slen): block_hash1 = block_hash2 = int(HASH_INIT) hash_string1 = hash_string2 = str() + # Track the last character stored at each reset point (for rh==0 case at end) + last_char1 = last_char2 = str() stream.seek(0) buf = stream.read(STREAM_BUFF_SIZE) @@ -88,21 +90,30 @@ def _spamsum(stream, slen): block_hash2 = sum_table[block_hash2][c] roll_n = next(roll_c) - roll_h2 = roll_h2 - roll_h1 + (ROLL_WINDOW * b) - roll_h1 = roll_h1 + b - roll_win[roll_n] + # Must use 32-bit unsigned arithmetic to match C's uint32_t behavior + # In C, subtraction that goes negative wraps to large positive values + roll_h2 = (roll_h2 - roll_h1 + (ROLL_WINDOW * b)) & 0xFFFFFFFF + roll_h1 = (roll_h1 + b - roll_win[roll_n]) & 0xFFFFFFFF roll_win[roll_n] = b - roll_h3 = (roll_h3 << 5) & 0xFFFFFFFF - roll_h3 ^= b + roll_h3 = ((roll_h3 << 5) ^ b) & 0xFFFFFFFF - rh = roll_h1 + roll_h2 + roll_h3 + rh = (roll_h1 + roll_h2 + roll_h3) & 0xFFFFFFFF if (rh % block_size) == (block_size - 1): + # Always store the character (C stores to digest[dindex]) + last_char1 = B64[block_hash1] if len(hash_string1) < (SPAMSUM_LENGTH - 1): - hash_string1 += B64[block_hash1] + hash_string1 += last_char1 + last_char1 = str() # Clear after appending block_hash1 = HASH_INIT + # Only track halfdigest while dindex < SPAMSUM_LENGTH/2 + if len(hash_string1) < (SPAMSUM_LENGTH // 2): + last_char2 = str() # Clear like C's halfdigest = '\0' if (rh % (block_size * 2)) == ((block_size * 2) - 1): + last_char2 = B64[block_hash2] if len(hash_string2) < ((SPAMSUM_LENGTH // 2) - 1): - hash_string2 += B64[block_hash2] + hash_string2 += last_char2 + last_char2 = str() # Clear after appending block_hash2 = HASH_INIT buf = stream.read(STREAM_BUFF_SIZE) @@ -110,9 +121,17 @@ def _spamsum(stream, slen): if block_size > BLOCKSIZE_MIN and len(hash_string1) < (SPAMSUM_LENGTH // 2): block_size = (block_size // 2) else: + # Append final character - two paths like C code: + # 1. If rh != 0: use current hash value + # 2. If rh == 0 but we have a stored char: use that if rh != 0: hash_string1 += B64[block_hash1] hash_string2 += B64[block_hash2] + else: + if last_char1: + hash_string1 += last_char1 + if last_char2: + hash_string2 += last_char2 break return '{0}:{1}:{2}'.format(block_size, hash_string1, hash_string2)