.WAV file integrity test script (Python)

Analyzes .wav file structure, reports any inconsistencies.

# ======================================
# .WAV file structure integrity test
# Alex Radzishevsky
# www.radzishevsky.com
# February 2026
# ======================================

import struct
import os
import sys

def get_audio_format(code):
    formats = {
        0x0001: "PCM (Integer)",
        0x0003: "IEEE Float",
        0x0006: "ALAW",
        0x0007: "MULAW",
        0xFFFE: "WAVE_FORMAT_EXTENSIBLE"
    }
    return formats.get(code, f"Unknown/Compressed (0x{code:04x})")

def hex_dump(data, length=64):
    res = []
    for i in range(0, min(len(data), length), 16):
        chunk = data[i:i+16]
        hex_str = " ".join(f"{b:02x}" for b in chunk)
        ascii_str = "".join(chr(b) if 32 <= b <= 126 else "." for b in chunk)
        res.append(f"  {i:04x}:  {hex_str:<48}  |{ascii_str}|")
    return "\n".join(res)

def inspect_wav_comprehensive(file_path):
    if not os.path.exists(file_path):
        print(f"Error: File '{file_path}' not found.")
        return

    file_size = os.path.getsize(file_path)
    print(f"\n{'='*85}")
    print(f" ANALYSIS REPORT: {os.path.basename(file_path)}")
    print(f" SYSTEM FILE SIZE: {file_size} bytes")
    print(f"{'='*85}\n")

    with open(file_path, 'rb') as f:
        # --- 1. INITIAL HEX DUMP ---
        print("[01] HEADER HEX PREVIEW (First 64 Bytes)")
        print(hex_dump(f.read(64)))
        f.seek(0)
        print("-" * 85)

        # --- 2. GLOBAL RIFF HEADER ---
        header = f.read(12)
        if len(header) < 12:
            print("[CRITICAL ERROR] File is smaller than a 12-byte RIFF header. Investigation aborted.")
            return

        tag, r_size, w_tag = struct.unpack('<4sI4s', header)
        expected_riff_end = 8 + r_size
        
        print(f"[02] GLOBAL CONTAINER")
        print(f"  Container ID:    {tag.decode(errors='ignore')} (Expected: RIFF)")
        print(f"  Declared Size:   {r_size} bytes")
        print(f"  Format Type:     {w_tag.decode(errors='ignore')} (Expected: WAVE)")
        
        if r_size != file_size - 8:
            diff = (file_size - 8) - r_size
            status = "TRAILING DATA" if diff > 0 else "TRUNCATED"
            print(f"  !! INTEGRITY ALERT: Size mismatch. System reports {file_size-8}, Header reports {r_size}.")
            print(f"     File status appears: {status} by {abs(diff)} bytes.")
        else:
            print("  Structure: Global header matches system file size.")
        print("-" * 85)

        # --- 3. CHUNK ITERATOR ---
        print("[03] CHUNK EXPLORER")
        mandatory = {'fmt ': False, 'data': False}
        audio_params = {}

        while f.tell() < file_size:
            pos = f.tell()
            
            # Boundary Check
            if file_size - pos < 8:
                print(f"\n  [!] TRAILING DATA: Found {file_size - pos} unexplained bytes at offset {pos}.")
                break

            c_id_raw, c_size = struct.unpack('<4sI', f.read(8))
            c_id = c_id_raw.decode('ascii', errors='ignore').strip()
            
            print(f"\n  CHUNK: [{c_id:<4}] | Offset: {pos:<8} | Size: {c_size} bytes")

            # Validate chunk doesn't overrun file
            if f.tell() + c_size > file_size:
                missing = (f.tell() + c_size) - file_size
                print(f"  [CRITICAL] Error: Chunk [{c_id}] is truncated. {missing} bytes are missing from the file.")
                break

            # Process Specific Chunks
            if c_id == 'fmt':
                mandatory['fmt '] = True
                fmt_data = f.read(c_size)
                a_fmt, n_ch, s_rate, b_rate, b_align, bps = struct.unpack('<HHIIHH', fmt_data[:16])
                audio_params = {'s_rate': s_rate, 'n_ch': n_ch, 'bps': bps}
                
                print(f"    > Format Category: {get_audio_format(a_fmt)}")
                print(f"    > Channels:        {n_ch}")
                print(f"    > Sample Rate:     {s_rate} Hz")
                print(f"    > Bits Per Sample: {bps} bit")
                print(f"    > Byte Rate:       {b_rate} (Calculated: {s_rate * n_ch * (bps // 8)})")
                print(f"    > Block Align:     {b_align} (Calculated: {n_ch * (bps // 8)})")
                
                if b_rate != s_rate * n_ch * (bps // 8):
                    print("    !! ERROR: Byte Rate calculation is incorrect.")
                
                if a_fmt == 0xFFFE and len(fmt_data) >= 40:
                    v_bps, mask = struct.unpack('<HI', fmt_data[18:24])
                    print(f"    > Extensible Info: Valid Bits: {v_bps}, Channel Mask: {hex(mask)}")

            elif c_id == 'data':
                mandatory['data'] = True
                print(f"    > Content: Raw Audio Payload.")
                if audio_params:
                    duration = c_size / (audio_params['s_rate'] * audio_params['n_ch'] * (audio_params['bps'] / 8))
                    print(f"    > Calculated Audio Length: {duration:.3f} seconds")
                f.seek(c_size, 1)

            elif c_id == 'LIST':
                l_type = f.read(4).decode(errors='ignore')
                print(f"    > List Type: {l_type}")
                if l_type == 'INFO':
                    rem = c_size - 4
                    while rem > 8:
                        m_id, m_sz = struct.unpack('<4sI', f.read(8))
                        m_val = f.read(m_sz).decode('ascii', errors='ignore').strip('\x00')
                        print(f"      - {m_id.decode(errors='ignore')}: {m_val}")
                        rem -= (8 + m_sz)
                        if m_sz % 2 != 0: 
                            f.read(1); rem -= 1
                else:
                    f.seek(c_size - 4, 1)

            else:
                print(f"    > Action: Skipping unhandled chunk.")
                f.seek(c_size, 1)

            # Padding Verification
            if c_size % 2 != 0:
                if f.tell() < file_size:
                    pad = f.read(1)
                    if pad != b'\x00':
                        print(f"    [!] Warning: Padding byte at {f.tell()-1} is non-zero (0x{pad.hex()}).")
                else:
                    print(f"    [!] Warning: Missing padding byte for odd-sized chunk at end of file.")

        # --- 4. FINAL SUMMARY ---
        print("\n" + "-" * 85)
        print("[04] FINAL VERDICT")
        missing_mandatory = [k for k, v in mandatory.items() if not v]
        if missing_mandatory:
            print(f"  FAILED: Missing mandatory chunks: {missing_mandatory}")
        elif f.tell() == file_size:
            print("  SUCCESS: RIFF structure is clean and perfectly aligned.")
        else:
            print(f"  COMPLETE: Structure parsed, but identified integrity warnings (see above).")
        print("=" * 85 + "\n")

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python inspect_wav.py <filename.wav>")
    else:
        inspect_wav_comprehensive(sys.argv[1])