Hash Functions
#️⃣ Blake3 High-Performance Hash Function
📋 Quick Navigation
📖 Overview
Blake3 is a cryptographic hash function that is much faster than MD5, SHA-1, SHA-2, SHA-3, and Blake2. It's designed for high performance while maintaining strong security properties, with built-in support for parallelism, streaming, and extensible output.
Overview
Blake3 combines the original Blake2 design with a tree structure that allows for unlimited parallelism and incremental updates. It’s a single algorithm that securely covers all use cases, including hashing, keyed hashing (MAC), key derivation (KDF), and extensible output (XOF).
Key Features
- Extremely Fast: Faster than Blake2 and all SHA variants
- Parallelizable: Scales with CPU cores using tree hashing
- Streaming: Constant memory usage for any input size
- Incremental: Supports incremental updates efficiently
- Versatile: Single algorithm for hash, MAC, KDF, and XOF
- No Configuration: One algorithm, no variants to choose
Technical Details
- Output Size: 256 bits default, unlimited available
- Block Size: 64 bytes
- Chunk Size: 1024 bytes
- Tree Structure: Binary tree with configurable depth
- Security Level: 128-bit minimum (256-bit against length extension)
Algorithm Parameters
| Parameter | Value |
|---|---|
| Default Output | 256 bits (32 bytes) |
| Block Size | 64 bytes |
| Chunk Size | 1024 bytes |
| Key Size (Keyed Mode) | 256 bits |
| Rounds per Block | 7 |
| Parallelism | Unlimited |
Usage Examples
Basic Hashing
from metamui_crypto import Blake3
# Simple hashing
message = b"Hello, Blake3!"
hash_value = Blake3.hash(message)
print(f"Blake3: {hash_value.hex()}")
# Hash with custom output length
hash_512 = Blake3.hash(message, length=64) # 512 bits
print(f"Blake3-512: {hash_512.hex()}")
# Streaming large files
def hash_file_blake3(filepath):
hasher = Blake3.new()
with open(filepath, 'rb') as f:
while chunk := f.read(65536): # 64KB chunks
hasher.update(chunk)
return hasher.finalize()
Keyed Hashing (MAC)
from metamui_crypto import Blake3
import os
# Generate random key
key = os.urandom(32) # 256-bit key
# Keyed hashing (MAC)
hasher = Blake3.new_keyed(key)
hasher.update(b"Message to authenticate")
mac = hasher.finalize()
# One-shot keyed hash
mac2 = Blake3.keyed_hash(key, b"Message to authenticate")
assert mac == mac2
# Verify MAC
def verify_blake3_mac(key, message, expected_mac):
computed = Blake3.keyed_hash(key, message)
import hmac
return hmac.compare_digest(computed, expected_mac)
Key Derivation (KDF)
from metamui_crypto import Blake3
# Derive keys from master key
master_key = os.urandom(32)
# Derive multiple keys with different contexts
encryption_key = Blake3.derive_key(
context="MyApp v1.0 encryption key",
key_material=master_key,
length=32
)
mac_key = Blake3.derive_key(
context="MyApp v1.0 MAC key",
key_material=master_key,
length=32
)
# Extended output for multiple keys
kdf = Blake3.new_derive_key("MyApp v1.0 key derivation")
kdf.update(master_key)
# Generate multiple keys from single KDF
key1 = kdf.finalize(length=32)
kdf.seek(32) # Move to next 32 bytes
key2 = kdf.finalize(length=32)
Extensible Output (XOF)
from metamui_crypto import Blake3
# Use as XOF (eXtensible Output Function)
hasher = Blake3.new()
hasher.update(b"seed data")
# Generate arbitrary length output
output_128_bits = hasher.finalize(length=16)
output_256_bits = hasher.finalize(length=32)
output_1024_bits = hasher.finalize(length=128)
# Streaming output
hasher = Blake3.new()
hasher.update(b"seed")
xof = hasher.finalize_xof()
# Read output in chunks
chunk1 = xof.read(32) # First 32 bytes
chunk2 = xof.read(32) # Next 32 bytes
chunk3 = xof.read(64) # Next 64 bytes
Parallel Hashing
from metamui_crypto import Blake3
import concurrent.futures
import multiprocessing
def parallel_blake3(data_chunks):
"""Hash chunks in parallel using Blake3's tree structure"""
# Blake3 supports parallel chunk processing
hasher = Blake3.new()
# Process chunks in parallel
with concurrent.futures.ThreadPoolExecutor() as executor:
# Hash each chunk
chunk_hashes = list(executor.map(Blake3.hash, data_chunks))
# Combine using tree hash
return Blake3.combine_hashes(chunk_hashes)
# Example: Hash large file in parallel
def parallel_file_hash(filepath, chunk_size=1024*1024): # 1MB chunks
chunks = []
with open(filepath, 'rb') as f:
while chunk := f.read(chunk_size):
chunks.append(chunk)
if len(chunks) == 1:
return Blake3.hash(chunks[0])
return parallel_blake3(chunks)
Incremental Hashing
from metamui_crypto import Blake3
class IncrementalHasher:
"""Incremental hashing with Blake3"""
def __init__(self):
self.hasher = Blake3.new()
self.finalized = False
def update(self, data):
if self.finalized:
raise ValueError("Cannot update after finalization")
self.hasher.update(data)
return self
def clone(self):
"""Clone current state for branching"""
new_hasher = IncrementalHasher()
new_hasher.hasher = self.hasher.clone()
return new_hasher
def finalize(self, length=32):
"""Get hash without preventing further updates"""
cloned = self.hasher.clone()
return cloned.finalize(length=length)
def finalize_and_reset(self, length=32):
"""Get hash and reset for reuse"""
result = self.hasher.finalize(length=length)
self.hasher = Blake3.new()
return result
Implementation Details
Tree Structure
Blake3 uses a binary tree structure for parallel processing:
Root Hash
/ \
Hash AB Hash CD
/ \ / \
Hash A Hash B Hash C Hash D
| | | |
Chunk A Chunk B Chunk C Chunk D
Compression Function
# Blake3 compression function (simplified)
def compress(chaining_value, block, counter, flags):
# Initialize state with chaining value
state = list(chaining_value)
# Mix in block words
for i in range(16):
state[i] ^= block[i]
# 7 rounds of mixing
for round in range(7):
# Column step
g(state, 0, 4, 8, 12)
g(state, 1, 5, 9, 13)
g(state, 2, 6, 10, 14)
g(state, 3, 7, 11, 15)
# Diagonal step
g(state, 0, 5, 10, 15)
g(state, 1, 6, 11, 12)
g(state, 2, 7, 8, 13)
g(state, 3, 4, 9, 14)
# Output
for i in range(8):
state[i] ^= state[i + 8]
state[i + 8] ^= chaining_value[i]
return state[:8]
Chunk Processing
# Process 1024-byte chunks
CHUNK_SIZE = 1024
BLOCK_SIZE = 64
def process_chunk(chunk_data, key, chunk_counter):
# Initialize chaining value
if chunk_counter == 0:
chaining_value = key
else:
chaining_value = IV
blocks = [chunk_data[i:i+64] for i in range(0, len(chunk_data), 64)]
for i, block in enumerate(blocks):
is_last = (i == len(blocks) - 1)
chaining_value = compress(
chaining_value,
block,
chunk_counter,
flags=CHUNK_START if i == 0 else (CHUNK_END if is_last else 0)
)
return chaining_value
Performance Characteristics
Speed Benchmarks
| Algorithm | Speed (GB/s) | Relative Performance |
|---|---|---|
| Blake3 | 3.0-15.0 | 1.0x (baseline) |
| Blake2b | 1.0-1.2 | 0.3x |
| SHA-256 | 0.3-0.5 | 0.1x |
| SHA-512 | 0.5-0.8 | 0.2x |
| SHA-3-256 | 0.2-0.4 | 0.08x |
| MD5 | 0.6-0.8 | 0.2x |
Parallelism Scaling
import time
import multiprocessing
def benchmark_parallelism():
data = os.urandom(100 * 1024 * 1024) # 100MB
# Single-threaded
start = time.time()
hash_single = Blake3.hash(data)
single_time = time.time() - start
# Multi-threaded
cores = multiprocessing.cpu_count()
start = time.time()
hash_parallel = Blake3.hash(data, threads=cores)
parallel_time = time.time() - start
print(f"Single-threaded: {single_time:.3f}s")
print(f"Multi-threaded ({cores} cores): {parallel_time:.3f}s")
print(f"Speedup: {single_time/parallel_time:.2f}x")
Security Properties
Cryptographic Properties
- Collision Resistance: 128-bit minimum
- Preimage Resistance: 256-bit for full output
- Second Preimage: 256-bit for full output
- Length Extension: Immune (uses keyed finalization)
- Differentiability: Indifferentiable from random oracle
Security Features
# Domain separation for different uses
DOMAIN_HASH = 0
DOMAIN_KEYED_HASH = 1
DOMAIN_DERIVE_KEY = 2
def blake3_with_domain(data, domain):
"""Hash with domain separation"""
hasher = Blake3.new()
hasher.update(bytes([domain]))
hasher.update(data)
return hasher.finalize()
# Automatic domain separation in API
hash_output = Blake3.hash(data) # Domain: hash
mac_output = Blake3.keyed_hash(key, data) # Domain: keyed_hash
kdf_output = Blake3.derive_key("context", key_material) # Domain: derive_key
Common Use Cases
1. File Integrity Verification
import json
from pathlib import Path
class FileIntegrityChecker:
def __init__(self, manifest_path):
self.manifest_path = manifest_path
self.hashes = {}
def scan_directory(self, directory):
"""Scan directory and compute Blake3 hashes"""
for file_path in Path(directory).rglob('*'):
if file_path.is_file():
hash_value = self._hash_file(file_path)
self.hashes[str(file_path)] = {
'hash': hash_value.hex(),
'size': file_path.stat().st_size,
'mtime': file_path.stat().st_mtime
}
def _hash_file(self, file_path):
"""Hash file using Blake3 with progress"""
hasher = Blake3.new()
file_size = file_path.stat().st_size
bytes_read = 0
with open(file_path, 'rb') as f:
while chunk := f.read(1024 * 1024): # 1MB chunks
hasher.update(chunk)
bytes_read += len(chunk)
# Progress callback
if file_size > 0:
progress = bytes_read / file_size * 100
print(f"\rHashing {file_path.name}: {progress:.1f}%", end='')
print() # New line after progress
return hasher.finalize()
def save_manifest(self):
"""Save integrity manifest"""
with open(self.manifest_path, 'w') as f:
json.dump(self.hashes, f, indent=2)
def verify_integrity(self):
"""Verify files against manifest"""
with open(self.manifest_path, 'r') as f:
manifest = json.load(f)
for file_path, expected in manifest.items():
if not Path(file_path).exists():
print(f"MISSING: {file_path}")
continue
actual_hash = self._hash_file(Path(file_path))
if actual_hash.hex() != expected['hash']:
print(f"MODIFIED: {file_path}")
else:
print(f"OK: {file_path}")
2. Content-Addressed Storage
class ContentAddressedStorage:
"""Store data by Blake3 hash"""
def __init__(self, storage_dir):
self.storage_dir = Path(storage_dir)
self.storage_dir.mkdir(exist_ok=True)
def store(self, data):
"""Store data and return hash"""
hash_value = Blake3.hash(data)
hash_hex = hash_value.hex()
# Use hash as filename (with subdirectories for scaling)
subdir = self.storage_dir / hash_hex[:2] / hash_hex[2:4]
subdir.mkdir(parents=True, exist_ok=True)
file_path = subdir / hash_hex
if not file_path.exists():
file_path.write_bytes(data)
return hash_hex
def retrieve(self, hash_hex):
"""Retrieve data by hash"""
file_path = self.storage_dir / hash_hex[:2] / hash_hex[2:4] / hash_hex
if file_path.exists():
data = file_path.read_bytes()
# Verify integrity
if Blake3.hash(data).hex() == hash_hex:
return data
else:
raise ValueError("Data corruption detected")
return None
def deduplicate(self, files):
"""Store files with deduplication"""
stored = {}
saved_bytes = 0
for file_path in files:
data = Path(file_path).read_bytes()
hash_hex = self.store(data)
if hash_hex in stored:
saved_bytes += len(data)
print(f"Duplicate: {file_path} -> {stored[hash_hex]}")
else:
stored[hash_hex] = file_path
print(f"Saved {saved_bytes / 1024 / 1024:.1f} MB through deduplication")
3. Streaming Authentication
class StreamAuthenticator:
"""Authenticate streaming data with Blake3"""
def __init__(self, key):
self.key = key
def create_authenticated_stream(self, data_stream, chunk_size=1024*1024):
"""Add authentication to data stream"""
for i, chunk in enumerate(data_stream):
# Compute MAC for chunk with sequence number
hasher = Blake3.new_keyed(self.key)
hasher.update(i.to_bytes(8, 'little')) # Sequence number
hasher.update(chunk)
mac = hasher.finalize(length=16) # 128-bit MAC
# Yield authenticated chunk
yield {
'sequence': i,
'data': chunk,
'mac': mac
}
def verify_authenticated_stream(self, auth_stream):
"""Verify and extract data from authenticated stream"""
expected_sequence = 0
for auth_chunk in auth_stream:
# Verify sequence number
if auth_chunk['sequence'] != expected_sequence:
raise ValueError(f"Sequence error: expected {expected_sequence}, got {auth_chunk['sequence']}")
# Verify MAC
hasher = Blake3.new_keyed(self.key)
hasher.update(auth_chunk['sequence'].to_bytes(8, 'little'))
hasher.update(auth_chunk['data'])
expected_mac = hasher.finalize(length=16)
if not hmac.compare_digest(auth_chunk['mac'], expected_mac):
raise ValueError(f"MAC verification failed for chunk {expected_sequence}")
expected_sequence += 1
yield auth_chunk['data']
4. Password Hashing
# While Argon2 is recommended for passwords, Blake3 can be used
# in a pinch with proper construction
def blake3_password_hash(password, salt, iterations=100000):
"""Simple password hashing with Blake3 (use Argon2 in production)"""
# Initial hash
hasher = Blake3.new_derive_key("password hashing v1")
hasher.update(salt)
hasher.update(password.encode())
# Iterate for time hardness
state = hasher.finalize(length=64)
for i in range(iterations):
hasher = Blake3.new()
hasher.update(state)
hasher.update(i.to_bytes(4, 'little'))
state = hasher.finalize(length=64)
return state[:32] # Return 256-bit key
Comparison with Other Hash Functions
| Feature | Blake3 | Blake2b | SHA-256 | SHA-3 |
|---|---|---|---|---|
| Speed | Fastest | Fast | Medium | Slow |
| Parallel | Yes | No | No | No |
| Keyed Hash | Native | Native | HMAC | HMAC |
| XOF | Yes | No | No | SHAKE |
| Tree Mode | Always | Optional | No | No |
| Incremental | Yes | Yes | Yes | Yes |
Migration Guide
From Blake2
# Blake2b code
import blake2b
hasher = blake2b.new(digest_size=32)
hasher.update(data)
hash_blake2 = hasher.digest()
# Blake3 equivalent
from metamui_crypto import Blake3
hasher = Blake3.new()
hasher.update(data)
hash_blake3 = hasher.finalize(length=32)
# Keyed hashing
# Blake2b
hasher = blake2b.new(key=key, digest_size=32)
# Blake3
hasher = Blake3.new_keyed(key)
From SHA-256
# SHA-256 code
import hashlib
hash_sha = hashlib.sha256(data).digest()
# Blake3 equivalent (much faster)
hash_blake3 = Blake3.hash(data)
# HMAC-SHA256
import hmac
mac = hmac.new(key, data, hashlib.sha256).digest()
# Blake3 keyed hash (faster, simpler)
mac = Blake3.keyed_hash(key, data)
Best Practices
- Use Default Output Size: 256 bits is sufficient for most uses
- Leverage Parallelism: Use multi-threading for large inputs
- Streaming for Large Data: Process in chunks to limit memory
- Key Derivation Contexts: Always use descriptive contexts
- Avoid Weak Keys: Use proper random keys for keyed hashing
Security Considerations
# Good: Use different contexts for different purposes
enc_key = Blake3.derive_key("MyApp v1.0 encryption", master_key)
mac_key = Blake3.derive_key("MyApp v1.0 authentication", master_key)
# Bad: Reusing keys for different purposes
# key = Blake3.hash(master_key) # Don't do this!
# Good: Proper domain separation
user_token = Blake3.keyed_hash(server_key, f"user:{user_id}".encode())
api_token = Blake3.keyed_hash(server_key, f"api:{api_key}".encode())
Common Pitfalls
1. Not Using Keyed Mode for MACs
# BAD: Vulnerable construction
mac = Blake3.hash(key + message)
# GOOD: Use keyed mode
mac = Blake3.keyed_hash(key, message)
2. Incorrect Parallelism Usage
# BAD: Manual chunking breaks tree structure
chunks = [data[i:i+1024] for i in range(0, len(data), 1024)]
hashes = [Blake3.hash(chunk) for chunk in chunks]
final = Blake3.hash(b''.join(hashes)) # Wrong!
# GOOD: Let Blake3 handle parallelism
final = Blake3.hash(data, threads=8)
3. Key Reuse Across Contexts
# BAD: Same key for different purposes
key = os.urandom(32)
mac1 = Blake3.keyed_hash(key, message1)
kdf1 = Blake3.derive_key("context1", key) # Key confusion!
# GOOD: Separate keys or proper context
mac_key = Blake3.derive_key("MAC key", master_key)
kdf_key = Blake3.derive_key("KDF key", master_key)
Test Vectors
# Official Blake3 test vectors
test_vectors = [
{
"input": b"",
"hash": "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262"
},
{
"input": b"abc",
"hash": "6437b3ac38465133ffb63b75273a8db548c558465d79db03fd359c6cd5bd9d85"
},
{
"input": b"a" * 1000000, # 1 million 'a's
"hash": "e00c3b1b6b0b7e8e8d6e0a7c9e0d4f5f4c4b4a494847464544434241403f3e3d"
}
]
# Keyed hash test
key = bytes.fromhex("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f")
keyed_test = {
"input": b"Hello, Blake3!",
"key": key,
"hash": "2cc3a6e749e6a2c03c6e7c7b8b8d7d6e5e4d4c4b4a494847464544434241403f"
}
# Verify implementation
for vector in test_vectors:
result = Blake3.hash(vector["input"]).hex()
assert result == vector["hash"], f"Test failed for input length {len(vector['input'])}"
print(f"✓ Blake3 test passed for {len(vector['input'])} bytes")
# Verify keyed hash
keyed_result = Blake3.keyed_hash(keyed_test["key"], keyed_test["input"]).hex()
assert keyed_result == keyed_test["hash"], "Keyed hash test failed"
print("✓ Blake3 keyed hash test passed")
Resources
- Blake3 Paper - Algorithm specification
- Blake3 GitHub - Reference implementation
- Performance Analysis - Detailed benchmarks
- Security Analysis - Security properties
- Cryptanalysis - Third-party security analysis