HKDF (HMAC-based Key Derivation Function)

Overview

HKDF (HMAC-based Extract-and-Expand Key Derivation Function) is a simple, fast key derivation function based on HMAC. It follows the “extract-then-expand” paradigm, where the KDF logically consists of two modules: the first stage takes the input keying material and “extracts” from it a fixed-length pseudorandom key, and the second stage “expands” this key into several additional pseudorandom keys.

Key Features

Common Use Cases

Algorithm Details

Parameters

Parameter Description Default Range/Type
hash_function Underlying hash function SHA-256 SHA-256, SHA-512, BLAKE2b
ikm Input keying material Required Bytes of any length
salt Optional salt value Empty Bytes (recommended: hash length)
info Optional context information Empty Bytes (max 255 * hash_length)
length Desired output length Required 1 to 255 * hash_length

Security Properties

Implementation

Python Example

from metamui_crypto import HKDF, SHA256, SHA512, BLAKE2b
import os

# Basic usage with SHA-256
hkdf = HKDF(hash_function=SHA256())

# Derive a 32-byte key
ikm = b"input keying material"
salt = os.urandom(32)  # Random salt
info = b"application-specific context"

derived_key = hkdf.derive(
    ikm=ikm,
    salt=salt,
    info=info,
    length=32
)

# Extract and expand separately
prk = hkdf.extract(ikm=ikm, salt=salt)
expanded_key = hkdf.expand(prk=prk, info=info, length=64)

# Using SHA-512 for longer outputs
hkdf_sha512 = HKDF(hash_function=SHA512())
long_key = hkdf_sha512.derive(
    ikm=ikm,
    salt=salt,
    info=info,
    length=128  # Can derive up to 255 * 64 bytes with SHA-512
)

# Hierarchical key derivation
master_key = os.urandom(32)
encryption_key = hkdf.derive(
    ikm=master_key,
    info=b"encryption",
    length=32
)
authentication_key = hkdf.derive(
    ikm=master_key,
    info=b"authentication",
    length=32
)

# Key versioning
version = 1
versioned_key = hkdf.derive(
    ikm=master_key,
    info=f"v{version}".encode(),
    length=32
)

Advanced Usage

# Multi-level key hierarchy
class KeyHierarchy:
    def __init__(self, master_key: bytes):
        self.hkdf = HKDF(hash_function=SHA256())
        self.master_key = master_key
    
    def derive_domain_key(self, domain: str) -> bytes:
        """Derive a domain-specific key"""
        return self.hkdf.derive(
            ikm=self.master_key,
            info=f"domain:{domain}".encode(),
            length=32
        )
    
    def derive_purpose_key(self, domain: str, purpose: str) -> bytes:
        """Derive a purpose-specific key within a domain"""
        domain_key = self.derive_domain_key(domain)
        return self.hkdf.derive(
            ikm=domain_key,
            info=f"purpose:{purpose}".encode(),
            length=32
        )

# Protocol key derivation
class ProtocolKeys:
    def __init__(self, shared_secret: bytes, transcript: bytes):
        self.hkdf = HKDF(hash_function=SHA256())
        self.shared_secret = shared_secret
        self.transcript = transcript
    
    def derive_keys(self):
        # Extract with transcript as salt
        prk = self.hkdf.extract(
            ikm=self.shared_secret,
            salt=self.transcript
        )
        
        # Derive multiple keys
        client_write_key = self.hkdf.expand(
            prk=prk,
            info=b"client write key",
            length=32
        )
        server_write_key = self.hkdf.expand(
            prk=prk,
            info=b"server write key",
            length=32
        )
        client_write_iv = self.hkdf.expand(
            prk=prk,
            info=b"client write iv",
            length=12
        )
        server_write_iv = self.hkdf.expand(
            prk=prk,
            info=b"server write iv",
            length=12
        )
        
        return {
            'client_key': client_write_key,
            'server_key': server_write_key,
            'client_iv': client_write_iv,
            'server_iv': server_write_iv
        }

# Deterministic key derivation
def derive_file_key(master_key: bytes, file_id: str) -> bytes:
    """Derive a unique key for each file"""
    hkdf = HKDF(hash_function=SHA256())
    return hkdf.derive(
        ikm=master_key,
        info=f"file:{file_id}".encode(),
        length=32
    )

# Key rotation
class RotatingKeys:
    def __init__(self, root_key: bytes):
        self.hkdf = HKDF(hash_function=SHA256())
        self.root_key = root_key
    
    def get_key_for_epoch(self, epoch: int) -> bytes:
        """Get key for specific time epoch"""
        return self.hkdf.derive(
            ikm=self.root_key,
            info=f"epoch:{epoch}".encode(),
            length=32
        )
    
    def rotate_forward(self, current_key: bytes) -> bytes:
        """Ratchet key forward (one-way)"""
        return self.hkdf.derive(
            ikm=current_key,
            info=b"ratchet",
            length=32
        )

Implementation Details

# HKDF internals (simplified)
class HKDF:
    def __init__(self, hash_function):
        self.hash_function = hash_function
        self.hash_length = hash_function.digest_size
    
    def extract(self, ikm: bytes, salt: bytes = b"") -> bytes:
        """HKDF-Extract(salt, IKM) -> PRK"""
        if not salt:
            salt = bytes(self.hash_length)  # Zero salt
        
        # PRK = HMAC-Hash(salt, IKM)
        return hmac(self.hash_function, salt, ikm)
    
    def expand(self, prk: bytes, info: bytes = b"", length: int = 32) -> bytes:
        """HKDF-Expand(PRK, info, L) -> OKM"""
        if length > 255 * self.hash_length:
            raise ValueError(f"Output too long (max {255 * self.hash_length} bytes)")
        
        n = (length + self.hash_length - 1) // self.hash_length
        okm = b""
        t = b""
        
        for i in range(1, n + 1):
            t = hmac(self.hash_function, prk, t + info + bytes([i]))
            okm += t
        
        return okm[:length]
    
    def derive(self, ikm: bytes, salt: bytes = b"", 
               info: bytes = b"", length: int = 32) -> bytes:
        """Complete HKDF: Extract-then-Expand"""
        prk = self.extract(ikm, salt)
        return self.expand(prk, info, length)

Security Considerations

Best Practices

  1. Salt Usage
  2. Info Parameter
  3. Input Keying Material
  4. Output Length

Common Pitfalls

# DON'T: Use HKDF for password hashing
password = "user_password"
# WRONG: HKDF is not designed for low-entropy inputs
key = hkdf.derive(ikm=password.encode(), length=32)

# DO: Use PBKDF2 first for passwords
from metamui_crypto import PBKDF2
pbkdf2 = PBKDF2(iterations=100000)
password_key = pbkdf2.derive(password.encode(), salt)
derived_key = hkdf.derive(ikm=password_key, info=b"encryption", length=32)

# DON'T: Reuse keys across contexts
shared_key = hkdf.derive(ikm=master_key, length=32)
# WRONG: Same key for different purposes
encrypt_with_key(data1, shared_key)
authenticate_with_key(data2, shared_key)

# DO: Derive separate keys for each purpose
encryption_key = hkdf.derive(ikm=master_key, info=b"encrypt", length=32)
auth_key = hkdf.derive(ikm=master_key, info=b"auth", length=32)

# DON'T: Ignore the maximum output length
# WRONG: Exceeds maximum for SHA-256 (255 * 32 = 8160 bytes)
try:
    huge_key = hkdf.derive(ikm=master_key, length=10000)
except ValueError:
    pass

# DO: Use appropriate hash function for desired length
hkdf_sha512 = HKDF(hash_function=SHA512())
long_key = hkdf_sha512.derive(ikm=master_key, length=10000)  # OK with SHA-512

Performance Characteristics

Benchmarks

Operation Input Size Output Size Time (μs) Throughput
Extract (SHA-256) 32 bytes 32 bytes 2.1 15.2 MB/s
Expand (SHA-256) 32 bytes 32 bytes 2.3 13.9 MB/s
Expand (SHA-256) 32 bytes 256 bytes 18.4 13.9 MB/s
Derive (SHA-256) 32 bytes 32 bytes 4.4 7.3 MB/s
Extract (SHA-512) 64 bytes 64 bytes 2.8 22.9 MB/s
Expand (SHA-512) 64 bytes 512 bytes 22.1 23.2 MB/s
Derive (BLAKE2b) 32 bytes 256 bytes 15.2 16.8 MB/s

Performance Considerations

  1. Hash Function Choice
  2. Output Length
  3. Optimization Strategies
    # Efficient: Derive once, slice as needed
    all_keys = hkdf.derive(ikm=master_key, info=b"keys", length=96)
    key1 = all_keys[0:32]
    key2 = all_keys[32:64]
    key3 = all_keys[64:96]
       
    # Less efficient: Multiple derivations
    key1 = hkdf.derive(ikm=master_key, info=b"key1", length=32)
    key2 = hkdf.derive(ikm=master_key, info=b"key2", length=32)
    key3 = hkdf.derive(ikm=master_key, info=b"key3", length=32)
    

Use Cases

1. TLS 1.3 Key Derivation

def tls13_key_schedule(shared_secret: bytes, handshake_hash: bytes):
    """Simplified TLS 1.3 key derivation"""
    hkdf = HKDF(hash_function=SHA256())
    
    # Early secret
    early_secret = hkdf.extract(ikm=b"", salt=b"")
    
    # Handshake secret
    handshake_secret = hkdf.extract(
        ikm=shared_secret,
        salt=early_secret
    )
    
    # Derive handshake keys
    client_handshake_key = hkdf.expand(
        prk=handshake_secret,
        info=b"tls13 client handshake key" + handshake_hash,
        length=32
    )
    server_handshake_key = hkdf.expand(
        prk=handshake_secret,
        info=b"tls13 server handshake key" + handshake_hash,
        length=32
    )
    
    return client_handshake_key, server_handshake_key

2. Signal Protocol Double Ratchet

class DoubleRatchet:
    def __init__(self):
        self.hkdf = HKDF(hash_function=SHA256())
    
    def kdf_rk(self, rk: bytes, dh_out: bytes):
        """Root key ratchet step"""
        output = self.hkdf.derive(
            ikm=dh_out,
            salt=rk,
            info=b"root chain",
            length=64
        )
        return output[:32], output[32:]  # new_rk, chain_key
    
    def kdf_ck(self, ck: bytes):
        """Chain key ratchet step"""
        mk = self.hkdf.expand(
            prk=ck,
            info=b"message key",
            length=32
        )
        ck = self.hkdf.expand(
            prk=ck,
            info=b"chain key",
            length=32
        )
        return ck, mk

3. Encrypted Storage System

class EncryptedStorage:
    def __init__(self, master_key: bytes):
        self.hkdf = HKDF(hash_function=SHA256())
        self.master_key = master_key
    
    def derive_file_keys(self, file_id: str, file_metadata: bytes):
        """Derive encryption and MAC keys for a file"""
        # Combine file ID and metadata for context
        context = f"file:{file_id}".encode() + file_metadata
        
        # Derive 64 bytes: 32 for encryption, 32 for MAC
        keys = self.hkdf.derive(
            ikm=self.master_key,
            info=context,
            length=64
        )
        
        return {
            'encryption_key': keys[:32],
            'mac_key': keys[32:64]
        }
    
    def derive_metadata_key(self):
        """Derive key for encrypting file metadata"""
        return self.hkdf.derive(
            ikm=self.master_key,
            info=b"metadata encryption",
            length=32
        )

4. API Key Derivation

class APIKeyManager:
    def __init__(self, root_secret: bytes):
        self.hkdf = HKDF(hash_function=SHA256())
        self.root_secret = root_secret
    
    def generate_api_key(self, client_id: str, permissions: list):
        """Generate client-specific API key"""
        context = f"client:{client_id}:perms:{','.join(permissions)}".encode()
        
        api_key = self.hkdf.derive(
            ikm=self.root_secret,
            info=context,
            length=32
        )
        
        # Convert to base64 for use as API key
        return base64.urlsafe_b64encode(api_key).decode()
    
    def derive_request_key(self, api_key: bytes, request_id: str):
        """Derive per-request signing key"""
        return self.hkdf.derive(
            ikm=api_key,
            info=f"request:{request_id}".encode(),
            length=32
        )

Comparison with Other KDFs

HKDF vs PBKDF2

Feature HKDF PBKDF2
Purpose General key derivation Password-based derivation
Speed Fast Intentionally slow
Iterations None Configurable (high)
Salt Optional Required
Use Case High-entropy inputs Low-entropy passwords

HKDF vs Argon2

Feature HKDF Argon2
Memory Usage Minimal Configurable (high)
Parallelism No Yes
Resistance PRF security Memory-hard
Complexity Simple Complex
Use Case Key expansion Password hashing

When to Use Each

# Use HKDF for high-entropy key material
shared_secret = ecdh_key_exchange()  # High entropy
session_keys = hkdf.derive(ikm=shared_secret, info=b"session", length=64)

# Use PBKDF2 for passwords with moderate security needs
password = get_user_password()  # Low entropy
pbkdf2 = PBKDF2(iterations=100000)
key = pbkdf2.derive(password.encode(), salt)

# Use Argon2 for passwords with high security needs
password = get_user_password()  # Low entropy
argon2 = Argon2(memory_cost=65536, time_cost=3)
key = argon2.derive(password.encode(), salt)

# Combine for password-based encryption
password_key = argon2.derive(password.encode(), salt)
encryption_key = hkdf.derive(ikm=password_key, info=b"encryption", length=32)

Migration Guide

From Custom KDF

# Before: Custom XOR-based KDF (insecure)
def custom_kdf(key: bytes, context: bytes, length: int) -> bytes:
    output = b""
    counter = 0
    while len(output) < length:
        output += sha256(key + context + counter.to_bytes(4, 'big'))
        counter += 1
    return output[:length]

# After: HKDF (secure)
hkdf = HKDF(hash_function=SHA256())
output = hkdf.derive(ikm=key, info=context, length=length)

From Simple Hash

# Before: Simple hash concatenation
def derive_keys(master_key: bytes):
    encryption_key = sha256(master_key + b"encryption")
    auth_key = sha256(master_key + b"authentication")
    return encryption_key, auth_key

# After: HKDF with proper domain separation
def derive_keys(master_key: bytes):
    hkdf = HKDF(hash_function=SHA256())
    keys = hkdf.derive(ikm=master_key, info=b"keys", length=64)
    return keys[:32], keys[32:]  # encryption_key, auth_key

From HMAC-based Derivation

# Before: Manual HMAC chain
def hmac_kdf(key: bytes, info: bytes, length: int) -> bytes:
    output = b""
    prev = b""
    counter = 1
    while len(output) < length:
        prev = hmac_sha256(key, prev + info + bytes([counter]))
        output += prev
        counter += 1
    return output[:length]

# After: HKDF (standardized)
hkdf = HKDF(hash_function=SHA256())
output = hkdf.expand(prk=key, info=info, length=length)

Test Vectors

RFC 5869 Test Vector 1 (SHA-256)

# Test Case 1
ikm = bytes.fromhex("0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b")
salt = bytes.fromhex("000102030405060708090a0b0c")
info = bytes.fromhex("f0f1f2f3f4f5f6f7f8f9")
length = 42

hkdf = HKDF(hash_function=SHA256())

# Extract step
prk = hkdf.extract(ikm=ikm, salt=salt)
assert prk.hex() == "077709362c2e32df0ddc3f0dc47bba6390b6c73bb50f9c3122ec844ad7c2b3e5"

# Expand step
okm = hkdf.expand(prk=prk, info=info, length=length)
assert okm.hex() == "3cb25f25faacd57a90434f64d0362f2a2d2d0a90cf1a5a4c5db02d56ecc4c5bf34007208d5b887185865"

# Complete derivation
okm = hkdf.derive(ikm=ikm, salt=salt, info=info, length=length)
assert okm.hex() == "3cb25f25faacd57a90434f64d0362f2a2d2d0a90cf1a5a4c5db02d56ecc4c5bf34007208d5b887185865"

RFC 5869 Test Vector 2 (SHA-256, long inputs)

# Test Case 2
ikm = bytes.fromhex("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f")
salt = bytes.fromhex("606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeaf")
info = bytes.fromhex("b0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff")
length = 82

hkdf = HKDF(hash_function=SHA256())
okm = hkdf.derive(ikm=ikm, salt=salt, info=info, length=length)
assert okm.hex() == "b11e398dc80327a1c8e7f78c596a49344f012eda2d4efad8a050cc4c19afa97c59045a99cac7827271cb41c65e590e09da3275600c2f09b8367793a9aca3db71cc30c58179ec3e87c14c01d5c1f3434f1d87"

Zero Salt Test

# Test Case 3: No salt
ikm = bytes.fromhex("0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b")
salt = b""  # Empty salt
info = b""  # Empty info
length = 42

hkdf = HKDF(hash_function=SHA256())
okm = hkdf.derive(ikm=ikm, salt=salt, info=info, length=length)
assert okm.hex() == "8da4e775a563c18f715f802a063c5a31b8a11f5c5ee1879ec3454e5f3c738d2d9d201395faa4b61a96c8"

References