#!/bin/bash
#
#  File: cov-analysis
#
#  Version: 1.0.0
#
#  Purpose: Generate LLVM source-based code coverage reports from AFL++ corpus.
#           Replays queue/crash/timeout files through a coverage-instrumented
#           binary, merges .profraw profiles, and produces HTML/text/JSON
#           reports via llvm-profdata and llvm-cov.
#
#  Copyright (c) 2026 by Marc "vanHauser" Heuse (vh@thc.org)
#
#  License: GNU Affero General Public License 3 or any later version)
#

set -euo pipefail

VERSION="1.0.0"

# ── helpers ───────────────────────────────────────────────────────────────────

QUIET=0
VERBOSE=0
FUZZER_LAYOUT=""   # set by cmd_report; one of: afl | flat

log()  { test "$QUIET" -eq 0 && echo "[+] $*" || true; }
logv() { test "$VERBOSE" -eq 1 && echo "[*] $*" || true; }
err()  { echo "[-] $*" >&2; }
# Like log() but writes to stderr. Used by commands (e.g. search) whose stdout
# must stay machine-readable so it can be piped.
loge() { test "$QUIET" -eq 0 && echo "[+] $*" >&2 || true; }
logerr() { test "$QUIET" -eq 0 && echo "[-] $*" >&2 || true; }

# Validate an option's value. Prints error and exits 1 if $2 is missing or
# begins with '-'. Usage: need_arg <opt-name> <value-or-empty>
need_arg() {
  local opt="$1" val="${2-}"
  if test -z "$val"; then
    err "Option $opt requires an argument"
    exit 1
  fi
  case "$val" in
    -*) err "Option $opt requires an argument (got '$val' which looks like a flag)"
        exit 1 ;;
  esac
}

# Echo the LLVM major version that tool selection should match, or nothing.
# LLVM tools must match the clang that produced the .profraw, otherwise
# `llvm-profdata merge` fails with a version mismatch. We derive the version
# from the selected compiler so e.g. CC=clang-22 maps to llvm-profdata-22:
#   1. an explicit version suffix in CC/CXX  (clang-22  -> 22)
#   2. the version reported by the selected clang (CC, else plain `clang`)
llvm_version_hint() {
  local c base v
  for c in "${CC-}" "${CXX-}"; do
    test -n "$c" || continue
    base="$(basename -- "$c" 2>/dev/null)"
    case "$base" in
      *-[0-9]*) echo "${base##*-}"; return 0 ;;
    esac
  done
  c="${CC:-clang}"
  command -v "$c" >/dev/null 2>&1 || return 0
  v="$("$c" --version 2>/dev/null | grep -oiE 'clang version [0-9]+' | grep -oE '[0-9]+' | head -n1)"
  test -n "$v" && echo "$v"
  return 0
}

# Find an LLVM tool, preferring the version matching the selected clang
# (see llvm_version_hint), then the bare name, then versioned (26 down to 11).
find_tool() {
  local tool="$1"
  local ver hint
  hint="$(llvm_version_hint)"
  if test -n "$hint" && command -v "${tool}-${hint}" >/dev/null 2>&1; then
    echo "${tool}-${hint}"
    return 0
  fi
  if command -v "$tool" >/dev/null 2>&1; then
    echo "$tool"
    return 0
  fi
  for ver in 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11; do
    if command -v "${tool}-${ver}" >/dev/null 2>&1; then
      echo "${tool}-${ver}"
      return 0
    fi
  done
  return 1
}

# Extract the instrumented binary path from a coverage command string.
# Skips leading KEY=VALUE env assignments and @@ placeholders.
extract_binary() {
  local cmd="$1"
  local token rc=1
  # Disable pathname expansion so `*`, `?`, `[` in the command string do not
  # pick up files from the cwd while we tokenize on whitespace.
  set -f
  for token in $cmd; do
    case "$token" in
      *=*|@@) continue ;;
      *)      echo "$token"; rc=0; break ;;
    esac
  done
  set +f
  return $rc
}

# Return 0 if the binary was built from the cov-analysis driver
# (coverage_driver.c). Such binaries embed a fixed signature string and read
# their inputs from file arguments (the @@ placeholder), never from stdin.
is_cov_driver_binary() {
  local bin="$1"
  test -n "$bin" || return 1
  grep -qaF '###SIGNATURE_LLVMFUZZERTESTONEINPUT_COVERAGE###' "$bin" 2>/dev/null
}

# If the coverage command has no @@ placeholder but the binary is a cov-analysis
# driver (argv-only input), append @@ so the forgotten placeholder is supplied
# automatically. Echoes the (possibly adjusted) command. A trailing @@ also lets
# the replay use fast batch mode.
add_at_if_driver() {
  local cmd="$1" bin="$2"
  case "$cmd" in
    *@@*) printf '%s' "$cmd"; return 0 ;;
  esac
  if is_cov_driver_binary "$bin"; then
    printf '%s @@' "$cmd"
  else
    printf '%s' "$cmd"
  fi
}

# Classify $AFL_DIR as one of:
#   afl    — AFL++ layout: <dir>/queue or <dir>/*/queue exists
#   flat   — libFuzzer/libafl/honggfuzz flat corpus or crash dir (≥1 regular file)
#   empty  — directory is empty or unreadable
detect_fuzzer_layout() {
  if test -d "$AFL_DIR/queue"; then
    echo "afl"; return 0
  fi
  if compgen -G "$AFL_DIR/*/queue" >/dev/null 2>&1; then
    echo "afl"; return 0
  fi
  if find "$AFL_DIR" -maxdepth 1 -type f -print -quit 2>/dev/null | grep -q .; then
    echo "flat"; return 0
  fi
  echo "empty"
}

# Emit null-delimited queue/corpus file paths for the detected layout.
# Reads globals: AFL_DIR, FUZZER_LAYOUT
find_queue_files() {
  case "${FUZZER_LAYOUT:-afl}" in
    afl)
      local paths=()
      test -d "$AFL_DIR/queue" && paths+=("$AFL_DIR/queue")
      local p
      shopt -s nullglob
      for p in "$AFL_DIR"/*/queue; do paths+=("$p"); done
      shopt -u nullglob
      if test "${#paths[@]}" -gt 0; then
        find "${paths[@]}" -name 'id:*' -print0 2>/dev/null || true
      fi
      ;;
    flat)
      # libFuzzer/libafl/honggfuzz flat corpus: every regular file that isn't a
      # known crash artifact or metadata file.
      find "$AFL_DIR" -maxdepth 1 -type f \
        ! -name 'crash-*' \
        ! -name 'leak-*' \
        ! -name 'oom-*' \
        ! -name 'timeout-*' \
        ! -name 'slow-unit-*' \
        ! -name 'SIG*.fuzz' \
        ! -name 'HONGGFUZZ.REPORT.TXT' \
        -print0 2>/dev/null || true
      ;;
  esac
}

# Emit null-delimited crash/timeout file paths for the detected layout.
# Reads globals: AFL_DIR, FUZZER_LAYOUT
find_crash_timeout_files() {
  case "${FUZZER_LAYOUT:-afl}" in
    afl)
      local paths=()
      test -d "$AFL_DIR/crashes"  && paths+=("$AFL_DIR/crashes")
      test -d "$AFL_DIR/timeouts" && paths+=("$AFL_DIR/timeouts")
      local p
      shopt -s nullglob
      for p in "$AFL_DIR"/*/crashes  ; do paths+=("$p"); done
      for p in "$AFL_DIR"/*/timeouts ; do paths+=("$p"); done
      shopt -u nullglob
      if test "${#paths[@]}" -gt 0; then
        find "${paths[@]}" -name 'id:*' -print0 2>/dev/null || true
      fi
      ;;
    flat)
      # libFuzzer artifacts (crash-*/leak-*/oom-*/timeout-*/slow-unit-*)
      # and honggfuzz crash files (SIG*.fuzz).
      find "$AFL_DIR" -maxdepth 1 -type f \
        \( -name 'crash-*' \
           -o -name 'leak-*' \
           -o -name 'oom-*' \
           -o -name 'timeout-*' \
           -o -name 'slow-unit-*' \
           -o -name 'SIG*.fuzz' \) \
        -print0 2>/dev/null || true
      ;;
  esac
}

# Count null-delimited records from a file-list function
count_files() {
  "$@" | tr -cd '\0' | wc -c
}

# Parse a "FILE:LINE" target spec. Splits on the LAST ':' so paths that contain
# colons still parse. Prints "FILE<TAB>LINE" and returns 0 on success; prints an
# error and returns 1 on a malformed spec.
parse_target_spec() {
  local spec="$1" file line
  case "$spec" in
    *:*) ;;
    *)   err "Target must be FILE:LINE (got '$spec')"; return 1 ;;
  esac
  file="${spec%:*}"
  line="${spec##*:}"
  if test -z "$file"; then
    err "Target FILE part is empty in '$spec'"; return 1
  fi
  case "$line" in
    ''|*[!0-9]*) err "Target LINE must be a positive integer (got '$line')"; return 1 ;;
  esac
  if test "$line" -eq 0; then
    err "Target LINE must be >= 1 (got '$line')"; return 1
  fi
  printf '%s\t%s\n' "$file" "$line"
}

# Classify a source line's coverage from LCOV text on stdin.
# Args: FILE LINE. Prints exactly one of: covered | uncovered | absent.
#   covered   — a matching DA:LINE,COUNT has COUNT > 0
#   uncovered — a matching DA:LINE,COUNT exists with COUNT == 0
#   absent    — no DA:LINE record in any matching SF: block
# An SF: path matches FILE when it equals FILE or ends with "/FILE" (suffix
# match with a path boundary). Returns 0 (state is on stdout).
lcov_line_state() {
  local file="$1" line="$2"
  awk -v tf="$file" -v tl="$line" '
    function endswith(s, suf,   ls, lsuf) {
      ls = length(s); lsuf = length(suf);
      return (ls >= lsuf && substr(s, ls - lsuf + 1) == suf);
    }
    /^SF:/ {
      sf = substr($0, 4);
      inmatch = (sf == tf) || endswith(sf, "/" tf);
      next;
    }
    /^end_of_record/ { inmatch = 0; next }
    inmatch && /^DA:/ {
      split(substr($0, 4), a, ",");
      if (a[1] == tl) {
        seen = 1;
        if (a[2] + 0 > 0) covered = 1;
      }
    }
    END {
      if (covered)    print "covered";
      else if (seen)  print "uncovered";
      else            print "absent";
    }
  '
}

# Emit the coverage replay driver C source
emit_driver() {
  cat << 'DRIVER_EOF'
/* coverage_driver.c - Replay driver for LLVMFuzzerTestOneInput harnesses.
 * Reads files from command-line arguments and calls LLVMFuzzerTestOneInput.
 * Crash handler flushes coverage data on signals so crashing inputs still
 * contribute to the report.
 *
 * Compile and link example:
 *   clang -fprofile-instr-generate -fcoverage-mapping \
 *     -c coverage_driver.c -o coverage_driver.o
 *   clang -fprofile-instr-generate \
 *     coverage_driver.o -L./build -ltarget -o cov
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>

int LLVMFuzzerInitialize(int *argc, char ***argv) __attribute__((weak));
int LLVMFuzzerTestOneInput(const unsigned char*, size_t);

extern int __llvm_profile_write_file(void);

static void crash_handler(int sig) {
    __llvm_profile_write_file();
    fprintf(stderr, "ERROR: Coverage gathering aborted due to signal!\n");
    raise(sig);
}

__attribute__((constructor))
static void install_crash_handlers(void) {
    const int sigs[] = { SIGABRT, SIGSEGV, SIGBUS, SIGFPE, SIGILL, SIGTERM };
    struct sigaction sa = {
        .sa_handler = crash_handler,
        .sa_flags   = SA_RESETHAND,
    };
    sigemptyset(&sa.sa_mask);
    for (int i = 0; i < (int)(sizeof(sigs) / sizeof(sigs[0])); i++)
        sigaction(sigs[i], &sa, NULL);
}

int main(int argc, char **argv) {
    // needed for auto-detection in compiled binaries:
    if (argc == 2 && strcmp(argv[1], "--printsignature") == 0) {
        printf("###SIGNATURE_LLVMFUZZERTESTONEINPUT_COVERAGE###\n");
    }

    if (LLVMFuzzerInitialize) {
        fprintf(stderr, "Running LLVMFuzzerInitialize ...\n");
        LLVMFuzzerInitialize(&argc, &argv);
    }

    for (int i = 1; i < argc; i++) {
        FILE *f = fopen(argv[i], "rb");
        if (f) {
            fseek(f, 0, SEEK_END);
            long len = ftell(f);
            if (len > 0) {
                fseek(f, 0, SEEK_SET);
                unsigned char *buf = (unsigned char *)malloc((size_t)len);
                if (buf) {
                    size_t n_read = fread(buf, 1, (size_t)len, f);
                    if (n_read > 0) {
                        fprintf(stderr, "Running: %s (%d/%d) %zu bytes\n",
                                argv[i], i, argc - 1, n_read);
                        LLVMFuzzerTestOneInput((const unsigned char*)buf, n_read);
                    } else {
                        fprintf(stderr, "Error: Read failed for %s\n", argv[i]);
                    }
                    free(buf);
                }
            }
            fclose(f);
        }
    }

    fprintf(stderr, "Done.\n");
    return 0;
}
DRIVER_EOF
}

# ── usage ─────────────────────────────────────────────────────────────────────

usage_main() {
  cat << 'EOF'
Usage: cov-analysis [command] [options]

Commands:
  report      Generate coverage report from AFL++ corpus (DEFAULT)
  build       Build target with LLVM coverage instrumentation
  driver      Emit coverage_driver.c for LLVMFuzzerTestOneInput harnesses
  diff        Compare coverage between two llvm-cov JSON exports
  stability   Analyze corpus stability (per-line non-deterministic hit counts)
  search      List corpus entries that reach a given FILE:LINE

Run 'cov-analysis <command> --help' for command-specific options.

Global options:
  -V          Print version and exit
  -h, --help  Print this help and exit
EOF

  echo ""
  echo "Help for default command \"report\" (does not need to be specified):"
  echo ""
  usage_report
}

usage_report() {
  cat << 'EOF'
Usage: cov-analysis [report] [options]

Required:
  -d <dir>    Fuzzing output directory (AFL++, libFuzzer, libafl, or honggfuzz)
  -e <cmd>    Coverage command. Use @@ as input file placeholder.
              Omit @@ to feed input via stdin instead. For a cov-analysis
              driver binary (which reads files, not stdin), @@ is appended
              automatically when omitted.

Optional:
  -o <dir>           Report output directory (default: <afl-dir>/cov)
  -t <num>           Parallel replay workers/forks (default: 1)
  -T <secs>          Timeout in seconds for crash/timeout replay (default: 5)
  --layout <kind>    Force layout: 'afl' or 'flat' (default: auto-detect)
  --ignore-regex <r> Filename regex to exclude from llvm-cov reports
                     (default: /usr/include/)
  -v                 Verbose output
  -q                 Quiet mode (suppress all [+] output)
  -V                 Print version and exit
  -h, --help         Print this help and exit

Examples:
  # Standard AFL++ replay with file-based input
  cov-analysis -d out -e "./cov @@"

  # With env vars, custom report dir, 1s timeout
  cov-analysis -d out -e "LD_LIBRARY_PATH=./lib ./cov @@" -o coverage -T 1

  # Replay coverage with 8 parallel workers
  cov-analysis -d out -e "./cov @@" -t 8

  # stdin input (binary reads from stdin)
  cov-analysis -d out -e "./cov"

  # Exclude test files and system headers from report
  cov-analysis -d out -e "./cov @@" --ignore-regex '(/usr/include/|/test/)'

  # libFuzzer corpus (flat directory of corpus files)
  cov-analysis -d ./corpus -e "./cov @@"

  # libafl corpus (flat directory of corpus files), stdin method
  cov-analysis -d ./corpus -e "./cov"

  # honggfuzz workspace (corpus + SIG*.fuzz crash files)
  cov-analysis -d ./hfuzz-workdir -e "./cov @@"
EOF
}

usage_build() {
  cat << 'EOF'
Usage: cov-analysis build <build-command> [args...]

  Sets CC/CXX/CFLAGS/CXXFLAGS/LDFLAGS for LLVM source-based coverage and
  runs the given build command. Must be run once per build step, e.g.:
    cov-analysis build ./configure --disable-shared
    cov-analysis build make -j$(nproc)

  Set CC/CXX environment variables to override the auto-detected clang.

  The report/diff/stability commands pick llvm-profdata / llvm-cov to match
  the selected clang version (e.g. CC=clang-22 -> llvm-profdata-22), so raw
  profiles merge without a version mismatch. Keep CC/CXX consistent between
  building and reporting, or set CC to the versioned clang when reporting.

Options:
  -h, --help    Show this help
  -V            Print version and exit
EOF
}

usage_driver() {
  cat << 'EOF'
Usage: cov-analysis driver [-o output.c]

  Emits coverage_driver.c source to stdout (or to -o FILE).
  Use this for LLVMFuzzerTestOneInput harnesses to replay corpus files.

  The driver loops over all file arguments, calls LLVMFuzzerTestOneInput
  for each, and installs a crash handler that flushes profiling data so
  crashing inputs still contribute to the coverage report.

Options:
  -o <file>     Write driver source to FILE instead of stdout
  -h, --help    Show this help
  -V            Print version and exit

Example:
  cov-analysis driver -o coverage_driver.c
  clang -fprofile-instr-generate -fcoverage-mapping \
    coverage_driver.c -L./build -ltarget -o cov
EOF
}

usage_diff() {
  cat << 'EOF'
Usage: cov-analysis diff [-o <report-dir>] [<OLD_JSON> <NEW_JSON>]

  Compare coverage between two llvm-cov JSON exports and generate an
  HTML diff report showing newly covered, lost, and still-uncovered
  lines and functions.

  Arguments:
    OLD_JSON  Path to the baseline coverage JSON
              (default: <report-dir>/coverage_old.json)
    NEW_JSON  Path to the updated coverage JSON
              (default: <report-dir>/coverage.json)

Options:
  -o <dir>    Report directory for default-path lookups and HTML output
              (default: .). The HTML is written to <dir>/coverage_diff.html.
  -h, --help  Print this help and exit
EOF
}

usage_stability() {
  cat << 'EOF'
Usage: cov-analysis stability [options]

  Run each corpus input N times with LLVM coverage, collect per-line hit
  counts, and flag lines where counts vary across runs as "unstable."
  Reports a stability percentage. If instability is found with the default
  4 runs, reruns for a total of 8 to confirm.

  Resilient to flaky passes: a pass whose profiles cannot be collected or
  merged (e.g. a crashing input that left a truncated .profraw behind) is
  skipped and the run continues with the remaining passes, as long as at
  least 2 passes succeed.

Required:
  -d <dir>    Fuzzing output directory (AFL++, libFuzzer, libafl, or honggfuzz)
  -e <cmd>    Coverage command. Use @@ as input file placeholder.
              Omit @@ to feed input via stdin instead. For a cov-analysis
              driver binary (which reads files, not stdin), @@ is appended
              automatically when omitted.

Optional:
  -n <num>           Number of runs per corpus pass (default: 4)
  -s <prefix>        Only consider source lines whose file path contains
                     this prefix (e.g. -s src/)
  -t <num>           Parallel replay workers (default: 1)
  -T <secs>          Per-input timeout in seconds (default: 5)
  --layout <kind>    Force layout: 'afl' or 'flat' (default: auto-detect)
  -v                 Verbose output
  -q                 Quiet mode (suppress all [+] output)
  -V                 Print version and exit
  -h, --help         Print this help and exit

Examples:
  cov-analysis stability -d out -e "./cov @@"
  cov-analysis stability -d out -e "./cov @@" -n 8 -s src/
  cov-analysis stability -d ./corpus -e "./cov @@" -t 4
EOF
}

usage_search() {
  cat << 'EOF'
Usage: cov-analysis search FILE:LINE -d <dir> -e "<cmd>" [options]

  Report which corpus entries reach a given source line. Each input is replayed
  in isolation through the coverage binary; an input "reaches" FILE:LINE when its
  line-execution count for that line is > 0.

  Matching input paths are printed to stdout (one per line, sorted), so the
  output pipes cleanly. Progress and the summary go to stderr.

Required:
  FILE:LINE   Source location, e.g. src/foo.c:123 (single line; split on last ':')
  -d <dir>    Fuzzing output directory (AFL++, libFuzzer, libafl, or honggfuzz)
  -e <cmd>    Coverage command. Use @@ as input file placeholder.
              Omit @@ to feed input via stdin instead. For a cov-analysis
              driver binary (which reads files, not stdin), @@ is appended
              automatically when omitted.

Optional:
  --crashes          Also scan crash and timeout inputs (default: corpus only)
  -t <num>           Parallel workers for the per-input scan (default: 1)
  -T <secs>          Per-input replay timeout in seconds (default: 5)
  --layout <kind>    Force layout: 'afl' or 'flat' (default: auto-detect)
  -v                 Verbose output
  -q                 Quiet mode (suppress all [+] output)
  -V                 Print version and exit
  -h, --help         Print this help and exit

Examples:
  cov-analysis search src/parser.c:142 -d out -e "./cov @@"
  cov-analysis search src/parser.c:142 -d out -e "./cov @@" --crashes -t 8
  cov-analysis search src/parser.c:142 -d ./corpus -e "./cov"     # stdin input

  # Feed the reaching inputs into another tool:
  cov-analysis search src/parser.c:142 -d out -e "./cov @@" | xargs -I{} cp {} ./hits/
EOF
}

# ── command: build ────────────────────────────────────────────────────────────

cmd_build() {
  if test $# -eq 0; then usage_build; exit 1; fi
  case "$1" in
    -h|--help) usage_build; exit 0 ;;
    -V)        echo "cov-analysis-$VERSION"; exit 0 ;;
  esac

  # Build mode: refuse if an AFL++ compiler is already set. Match on basename
  # against the known afl-* wrappers rather than a substring search, so paths
  # like /opt/waffle/bin/clang do not trigger a false positive.
  local _cc_base _cxx_base
  _cc_base="$(basename -- "${CC-}" 2>/dev/null || true)"
  _cxx_base="$(basename -- "${CXX-}" 2>/dev/null || true)"
  case "$_cc_base $_cxx_base" in
    *afl-clang*|*afl-gcc*|*afl-g++*|*afl-cc*|*afl-c++*)
      err "AFL++ compiler is set in CC/CXX — unset it before building a coverage binary."
      exit 1
      ;;
  esac

  # Auto-detect clang if CC/CXX not set
  if test -z "${CC-}"; then
    if command -v clang >/dev/null 2>&1; then
      export CC=clang
      export CXX=clang++
    else
      for ver in 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11; do
        if command -v "clang-$ver" >/dev/null 2>&1; then
          export CC="clang-$ver"
          export CXX="clang++-$ver"
          break
        fi
      done
    fi
  fi

  test -z "${CC-}" && {
    err "clang not found. Install clang or set CC/CXX."
    exit 1
  }
  echo "[+] Using compiler: $CC / $CXX" >&2

  export CFLAGS="-fprofile-instr-generate -fcoverage-mapping -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1"
  export CXXFLAGS="$CFLAGS"
  export CPPFLAGS="$CFLAGS"
  export LDFLAGS="-fprofile-instr-generate"

  exec "$@"
}

# ── command: driver ──────────────────────────────────────────────────────────

cmd_driver() {
  case "${1-}" in
    -h|--help) usage_driver; exit 0 ;;
    -V)        echo "cov-analysis-$VERSION"; exit 0 ;;
  esac

  if test "${1-}" = "-o"; then
    need_arg "-o" "${2-}"
    emit_driver > "$2"
    echo "[+] coverage_driver.c written to: $2" >&2
  else
    emit_driver
  fi
  exit 0
}

# ── command: stability ───────────────────────────────────────────────────────

cmd_stability() {
  local AFL_DIR=""
  local COVERAGE_CMD=""
  local RUNS=4
  local SOURCE_FILTER=""
  local FORKS=1
  local TIMEOUT=5
  local LLVM_PROFDATA=""
  local LLVM_COV=""
  local STAB_DIR=""
  local COV_BINARY=""
  local USER_RUNS=""   # set if -n was explicitly provided
  local FUZZER_LAYOUT="${FUZZER_LAYOUT:-}"
  local GAWK=""

  if test $# -eq 0; then usage_stability; exit 1; fi

  while [ $# -gt 0 ]; do
    case "$1" in
      -d)        need_arg "-d" "${2-}"; AFL_DIR="$2";                  shift 2 ;;
      -e)        need_arg "-e" "${2-}"; COVERAGE_CMD="$2";             shift 2 ;;
      -n)        need_arg "-n" "${2-}"; RUNS="$2"; USER_RUNS="1";      shift 2 ;;
      -s)        need_arg "-s" "${2-}"; SOURCE_FILTER="$2";            shift 2 ;;
      -t)        need_arg "-t" "${2-}"; FORKS="$2";                    shift 2 ;;
      -T)        need_arg "-T" "${2-}"; TIMEOUT="$2";                  shift 2 ;;
      -v)        VERBOSE=1;                     shift   ;;
      -q)        QUIET=1;                       shift   ;;
      -V)        echo "cov-analysis-$VERSION"; exit 0 ;;
      -h|--help) usage_stability; exit 0 ;;
      --layout)  need_arg "--layout" "${2-}"; FUZZER_LAYOUT="$2";      shift 2 ;;
      *) err "Unknown option: $1"; echo "" >&2; usage_stability >&2; exit 1 ;;
    esac
  done

  # ── validate inputs ────────────────────────────────────────────────────────
  if test -z "$AFL_DIR"; then
    err "Must specify input directory with -d"
    exit 1
  fi
  if test -z "$COVERAGE_CMD"; then
    err "Must specify coverage command with -e"
    exit 1
  fi
  if ! test -d "$AFL_DIR"; then
    err "Input directory does not exist: $AFL_DIR"
    exit 1
  fi
  case "$RUNS" in
    ''|*[!0-9]*|0|1)
      err "Run count (-n) must be a positive integer >= 2: $RUNS"
      exit 1
      ;;
  esac
  case "$FORKS" in
    ''|*[!0-9]*|0)
      err "Replay worker count must be a positive integer: $FORKS"
      exit 1
      ;;
  esac
  case "$TIMEOUT" in
    ''|*[!0-9]*|0)
      err "Timeout (-T) must be a positive integer in seconds: $TIMEOUT"
      exit 1
      ;;
  esac

  if test -n "$FUZZER_LAYOUT"; then
    case "$FUZZER_LAYOUT" in
      afl|flat) ;;
      *) err "--layout must be 'afl' or 'flat' (got '$FUZZER_LAYOUT')"; exit 1 ;;
    esac
  fi

  COV_BINARY="$(extract_binary "$COVERAGE_CMD")"
  if test -z "$COV_BINARY"; then
    err "Could not extract binary path from coverage command: $COVERAGE_CMD"
    exit 1
  fi
  if ! test -x "$COV_BINARY"; then
    err "Coverage binary not found or not executable: $COV_BINARY"
    exit 1
  fi
  logv "Coverage binary : $COV_BINARY"

  # Forgotten @@ on a driver binary → supply it automatically (argv-only input).
  COVERAGE_CMD="$(add_at_if_driver "$COVERAGE_CMD" "$COV_BINARY")"
  logv "Replay command  : $COVERAGE_CMD"

  # ── detect LLVM tools ──────────────────────────────────────────────────────
  LLVM_PROFDATA="$(find_tool llvm-profdata)" || {
    err "llvm-profdata not found. Install LLVM (apt install llvm / dnf install llvm)."
    exit 1
  }
  LLVM_COV="$(find_tool llvm-cov)" || {
    err "llvm-cov not found. Install LLVM (apt install llvm / dnf install llvm)."
    exit 1
  }
  log "LLVM tools      : $LLVM_PROFDATA, $LLVM_COV"

  # Stability aggregation needs gawk (multi-dim arrays, ARGIND). mawk and
  # busybox awk will not work. Prefer an explicit `gawk` binary if the
  # system `awk` is not gawk.
  if awk --version 2>/dev/null | grep -qi '^GNU Awk'; then
    GAWK="awk"
  elif command -v gawk >/dev/null 2>&1; then
    GAWK="gawk"
  else
    err "cov-analysis stability requires GNU awk (gawk)."
    err "Install it (apt install gawk / dnf install gawk)."
    exit 1
  fi
  logv "Using gawk      : $GAWK"

  # ── detect or honor fuzzer layout ──────────────────────────────────────────
  if test -z "$FUZZER_LAYOUT"; then
    FUZZER_LAYOUT="$(detect_fuzzer_layout)"
  fi
  case "$FUZZER_LAYOUT" in
    afl)   log "Fuzzer layout   : AFL++ (queue/crashes/timeouts)" ;;
    flat)  log "Fuzzer layout   : flat (libFuzzer/libafl/honggfuzz)" ;;
    empty)
      err "No input files found in $AFL_DIR"
      err "Expected one of: AFL++ out dir, libFuzzer/libafl corpus, honggfuzz workspace."
      err "Override detection with --layout afl|flat if needed."
      exit 1
      ;;
  esac

  # ── prepare workspace ──────────────────────────────────────────────────────
  STAB_DIR="$(mktemp -d /tmp/cov-analysis-stability.XXXXXX)"
  trap "rm -rf '$STAB_DIR'" EXIT INT TERM

  local CORPUS_SIZE
  CORPUS_SIZE=$(count_files find_queue_files)
  if test "$CORPUS_SIZE" -eq 0; then
    err "No corpus files found in $AFL_DIR"
    exit 1
  fi

  log "Input directory : $AFL_DIR"
  log "Corpus size     : $CORPUS_SIZE inputs"
  log "Coverage binary : $COV_BINARY"
  log "Runs            : $RUNS"
  test -n "$SOURCE_FILTER" && log "Source filter   : $SOURCE_FILTER"

  # ── per-run coverage collection ────────────────────────────────────────────
  # Collect one pass: replay all corpus, merge profraw, export LCOV, parse hits.
  # Accepts run index as $1; reads globals COVERAGE_CMD, FORKS, STAB_DIR,
  # COV_BINARY, SOURCE_FILTER, LLVM_PROFDATA, LLVM_COV, RUNS.
  _stab_collect_pass() {
    local idx="$1"
    local run_dir="$STAB_DIR/run_${idx}"
    mkdir -p "$run_dir"

    log "Collecting pass $idx/$RUNS..."
    export LLVM_PROFILE_FILE="$run_dir/cov-%p.profraw"

    case "$COVERAGE_CMD" in
      *@@*)
        find_queue_files | xargs -0 -r -n 1 -P "$FORKS" \
          bash -c 'timeout --signal=SIGTERM "${2}s" bash -c "${1/@@/$3}"' \
          _ "$COVERAGE_CMD" "$TIMEOUT" \
          >/dev/null 2>&1 || true
        ;;
      *)
        find_queue_files | xargs -0 -r -n 1 -P "$FORKS" \
          bash -c 'timeout --signal=SIGTERM "${2}s" bash -c "$1" < "$3"' \
          _ "$COVERAGE_CMD" "$TIMEOUT" \
          >/dev/null 2>&1 || true
        ;;
    esac

    # A single failed pass must never abort the whole run: stability is a
    # multi-sample measurement, so we skip a bad pass (return 1) and let the
    # caller carry on with the rest. The most common cause is a crashing or
    # timed-out input that left a truncated .profraw behind.
    local profraw_count
    profraw_count=$(find "$run_dir" -name '*.profraw' -printf . 2>/dev/null | wc -c)
    if test "$profraw_count" -eq 0; then
      err "No .profraw files generated in pass $idx; skipping it."
      return 1
    fi
    logv "Pass $idx: merging $profraw_count profraw file(s)..."

    # --failure-mode=all: tolerate individual corrupt/truncated .profraw files
    # (drop them, keep the valid ones); only fail if every profile is invalid.
    "$LLVM_PROFDATA" merge -sparse --failure-mode=all "$run_dir"/*.profraw \
      -o "$STAB_DIR/merged_run_${idx}.profdata" 2>/dev/null || {
      err "llvm-profdata merge failed for pass $idx; skipping it."
      return 1
    }

    logv "Pass $idx: exporting LCOV..."
    "$LLVM_COV" export "$COV_BINARY" \
      --format=lcov \
      "-instr-profile=$STAB_DIR/merged_run_${idx}.profdata" \
      > "$STAB_DIR/run_${idx}.lcov" 2>/dev/null || {
      err "llvm-cov export (lcov) failed for pass $idx; skipping it."
      return 1
    }

    logv "Pass $idx: parsing hit counts..."
    awk -v filter="$SOURCE_FILTER" '
      /^SF:/ { file = substr($0, 4) }
      /^DA:/ {
        if (filter != "" && index(file, filter) == 0) next
        split(substr($0, 4), a, ",")
        print file ":" a[1] "\t" a[2]
      }
    ' "$STAB_DIR/run_${idx}.lcov" | sort > "$STAB_DIR/run_${idx}.hits"
    return 0
  }

  # stab_passes holds the run indices that produced usable per-line hit counts.
  # Failed passes are skipped (see _stab_collect_pass) instead of aborting.
  local stab_passes=()
  local run_idx
  for run_idx in $(seq 1 "$RUNS"); do
    if _stab_collect_pass "$run_idx"; then
      stab_passes+=("$run_idx")
    fi
  done

  log "Collected all data, analyzing ..."

  # Comparing hit counts across runs needs at least two successful passes.
  if test "${#stab_passes[@]}" -lt 2; then
    err "Only ${#stab_passes[@]} of $RUNS pass(es) produced coverage data — need at least 2 to assess stability."
    err "Check that the binary is instrumented with -fprofile-instr-generate (see 'cov-analysis build')."
    exit 1
  fi
  if test "${#stab_passes[@]}" -lt "$RUNS"; then
    err "$((RUNS - ${#stab_passes[@]})) of $RUNS pass(es) failed and were skipped; analyzing the ${#stab_passes[@]} that succeeded."
  fi

  # ── stability analysis ─────────────────────────────────────────────────────
  # Args: the run indices of the successful passes (e.g. 1 3 4). Reads the
  # matching run_<idx>.hits files from $STAB_DIR. Uses gawk ARGIND (1..n in
  # argument order) to track which file each record belongs to.
  # Outputs: one "file:linenum" per unstable line, then "STABILITY_STATS S T".
  _stab_analyze() {
    local indices=("$@")
    local n="${#indices[@]}"
    local hit_files=()
    local i
    for i in "${indices[@]}"; do
      hit_files+=("$STAB_DIR/run_${i}.hits")
    done
    "$GAWK" -v n="$n" '
      { counts[$1][ARGIND] = $2 }
      END {
        total = 0; stable_count = 0
        for (key in counts) {
          total++
          ref = (1 in counts[key]) ? counts[key][1] : ""
          stable = 1
          for (i = 2; i <= n; i++) {
            val = (i in counts[key]) ? counts[key][i] : ""
            if (val != ref) { stable = 0; break }
          }
          if (stable) { stable_count++ }
          else { print key }
        }
        printf "STABILITY_STATS %d %d\n", stable_count, total
      }
    ' "${hit_files[@]}"
  }

  log "Analyzing stability across ${#stab_passes[@]} runs..."
  local analysis_out
  analysis_out="$(_stab_analyze "${stab_passes[@]}")"

  local stable_count total_lines unstable_lines
  stable_count=$(printf '%s\n' "$analysis_out" | awk '/^STABILITY_STATS/{print $2}')
  total_lines=$(printf '%s\n'  "$analysis_out" | awk '/^STABILITY_STATS/{print $3}')
  # grep -v exits 1 (and, under pipefail, aborts) when every line is the
  # STABILITY_STATS record — i.e. when coverage is perfectly stable. Tolerate it.
  unstable_lines="$(printf '%s\n' "$analysis_out" | grep -v '^STABILITY_STATS' | sort -V || true)"
  : "${stable_count:=0}" "${total_lines:=0}"

  # ── extend to double the runs if instability found (default -n only) ───────
  if test -n "$unstable_lines" && test -z "$USER_RUNS"; then
    local extra_end=$((RUNS * 2))
    log "Instability detected, extending from $RUNS to $extra_end runs..."
    for run_idx in $(seq $((RUNS + 1)) "$extra_end"); do
      if _stab_collect_pass "$run_idx"; then
        stab_passes+=("$run_idx")
      fi
    done
    RUNS="$extra_end"

    log "Re-analyzing stability across ${#stab_passes[@]} runs..."
    analysis_out="$(_stab_analyze "${stab_passes[@]}")"
    stable_count=$(printf '%s\n' "$analysis_out" | awk '/^STABILITY_STATS/{print $2}')
    total_lines=$(printf '%s\n'  "$analysis_out" | awk '/^STABILITY_STATS/{print $3}')
    unstable_lines="$(printf '%s\n' "$analysis_out" | grep -v '^STABILITY_STATS' | sort -V || true)"
    : "${stable_count:=0}" "${total_lines:=0}"
  fi

  # ── collapse consecutive unstable lines into ranges ────────────────────────
  local ranges=""
  if test -n "$unstable_lines"; then
    ranges="$(printf '%s\n' "$unstable_lines" | awk '
      BEGIN { prev_file = ""; prev_line = -999; range_start = -999 }
      {
        n = split($0, parts, ":")
        linenum = parts[n] + 0
        file = $0; sub(":" parts[n] "$", "", file)

        if (file != prev_file || linenum != prev_line + 1) {
          if (prev_file != "") {
            if (range_start == prev_line) printf "  %s:%d\n", prev_file, range_start
            else printf "  %s:%d-%d\n", prev_file, range_start, prev_line
          }
          prev_file = file
          range_start = linenum
        }
        prev_line = linenum
      }
      END {
        if (prev_file != "") {
          if (range_start == prev_line) printf "  %s:%d\n", prev_file, range_start
          else printf "  %s:%d-%d\n", prev_file, range_start, prev_line
        }
      }
    ')"
  fi

  # ── print report ───────────────────────────────────────────────────────────
  local unstable_count=$((total_lines - stable_count))
  local pct
  if test "$total_lines" -gt 0; then
    pct="$(awk -v s="$stable_count" -v t="$total_lines" \
      'BEGIN { printf "%.1f", 100.0 * s / t }')"
  else
    pct="n/a"
  fi

  if test "$QUIET" -eq 0; then
    echo ""
    echo "Stability Report"
    echo "--------------------------------------------------------"
    printf "Corpus size : %s inputs\n" "$CORPUS_SIZE"
    if test "${#stab_passes[@]}" -eq "$RUNS"; then
      printf "Runs        : %s\n" "$RUNS"
    else
      printf "Runs        : %d analyzed (%d of %d failed and were skipped)\n" \
        "${#stab_passes[@]}" "$((RUNS - ${#stab_passes[@]}))" "$RUNS"
    fi
    printf "Stability   : %s%% (%d/%d executed lines stable)\n" \
      "$pct" "$stable_count" "$total_lines"

    if test -n "$unstable_lines"; then
      echo ""
      printf "~~ Variable-count lines (%d lines):\n" "$unstable_count"
      echo "   Lines with varying hit counts:"
      echo ""
      printf '%s\n' "$ranges"
      echo ""
      echo "[!] Unstable coverage detected."
    else
      echo ""
      echo "[+] All executed lines are perfectly stable."
    fi
  fi
}

# ── command: search ────────────────────────────────────────────────────────

# Replay ALL selected inputs into one profraw dir for the union pre-check.
# Mirrors cmd_report's replay (trailing-@@ batch when the binary carries the
# LLVMFuzzerTestOneInput signature; mid-@@ loop; stdin loop). Crash/timeout
# inputs are replayed under the -T timeout when INCLUDE_CRASHES=1.
# Reads globals: COVERAGE_CMD, COV_BINARY, FORKS, TIMEOUT, INCLUDE_CRASHES.
_search_replay_union() {
  local outdir="$1"
  export LLVM_PROFILE_FILE="$outdir/cov-%p.profraw"

  case "$COVERAGE_CMD" in
    *@@*)
      local LAST_TOKEN="${COVERAGE_CMD##* }"
      if test "$LAST_TOKEN" = "@@"; then
        if ! is_cov_driver_binary "$COV_BINARY"; then
          LAST_TOKEN="x"
        fi
      fi
      if test "$LAST_TOKEN" = "@@"; then
        local CMD_NO_AT="${COVERAGE_CMD% @@}"
        # shellcheck disable=SC2016
        find_queue_files | xargs -0 -r -n 128 -P "$FORKS" \
          sh -c "${CMD_NO_AT}"' "$@"' -- >/dev/null 2>&1 || true
      else
        find_queue_files | xargs -0 -r -n 1 -P "$FORKS" \
          bash -c 'eval "${1/@@/$2}"' _ "$COVERAGE_CMD" >/dev/null 2>&1 || true
      fi
      ;;
    *)
      find_queue_files | xargs -0 -r -n 1 -P "$FORKS" \
        bash -c 'eval "$1" < "$2"' _ "$COVERAGE_CMD" >/dev/null 2>&1 || true
      ;;
  esac

  if test "$INCLUDE_CRASHES" -eq 1; then
    case "$COVERAGE_CMD" in
      *@@*)
        find_crash_timeout_files | xargs -0 -r -n 1 -P "$FORKS" \
          bash -c 'timeout --signal=SIGTERM "${2}s" bash -c "${1/@@/$3}"' \
          _ "$COVERAGE_CMD" "$TIMEOUT" >/dev/null 2>&1 || true
        ;;
      *)
        find_crash_timeout_files | xargs -0 -r -n 1 -P "$FORKS" \
          bash -c 'timeout --signal=SIGTERM "${2}s" bash -c "$1" < "$3"' \
          _ "$COVERAGE_CMD" "$TIMEOUT" >/dev/null 2>&1 || true
        ;;
    esac
  fi
}

# Per-input worker for the parallel scan. Replays ONE input in isolation, merges
# its profile, and prints the input path iff the target line is `covered`.
# The input path is $1; all other config is read from exported COV_SEARCH_* env
# vars (cmd_search exports them before the xargs fan-out). This function and
# lcov_line_state are `export -f`'d so the xargs `bash -c` subshells can call them.
# Always best-effort: never aborts on a single bad input.
_search_one_input() {
  local in="$1" work state="absent"
  work="$(mktemp -d "${COV_SEARCH_WORKROOT}/w.XXXXXX")" || return 0
  export LLVM_PROFILE_FILE="$work/cov-%p.profraw"

  case "$COV_SEARCH_CMD" in
    *@@*) timeout --signal=SIGTERM "${COV_SEARCH_TIMEOUT}s" \
            bash -c "${COV_SEARCH_CMD/@@/$in}" >/dev/null 2>&1 || true ;;
    *)    timeout --signal=SIGTERM "${COV_SEARCH_TIMEOUT}s" \
            bash -c "$COV_SEARCH_CMD" < "$in" >/dev/null 2>&1 || true ;;
  esac

  if compgen -G "$work/cov-"'*.profraw' >/dev/null 2>&1; then
    if "$COV_SEARCH_PROFDATA" merge -sparse "$work"/cov-*.profraw \
         -o "$work/m.profdata" 2>/dev/null; then
      state="$("$COV_SEARCH_COV" export "$COV_SEARCH_BIN" --format=lcov \
                 "-instr-profile=$work/m.profdata" 2>/dev/null \
               | lcov_line_state "$COV_SEARCH_FILE" "$COV_SEARCH_LINE")"
    fi
  fi

  rm -rf "$work"
  test "$state" = "covered" && printf '%s\n' "$in"
  return 0
}

cmd_search() {
  local AFL_DIR=""
  local COVERAGE_CMD=""
  local TARGET_SPEC=""
  local FORKS=1
  local TIMEOUT=5
  local INCLUDE_CRASHES=0
  local LLVM_PROFDATA=""
  local LLVM_COV=""
  local COV_BINARY=""
  local FUZZER_LAYOUT="${FUZZER_LAYOUT:-}"
  local TARGET_FILE="" TARGET_LINE=""

  if test $# -eq 0; then usage_search; exit 1; fi

  while [ $# -gt 0 ]; do
    case "$1" in
      -d)        need_arg "-d" "${2-}"; AFL_DIR="$2";       shift 2 ;;
      -e)        need_arg "-e" "${2-}"; COVERAGE_CMD="$2";  shift 2 ;;
      -t)        need_arg "-t" "${2-}"; FORKS="$2";         shift 2 ;;
      -T)        need_arg "-T" "${2-}"; TIMEOUT="$2";       shift 2 ;;
      --crashes) INCLUDE_CRASHES=1;                         shift   ;;
      -v)        VERBOSE=1;                                 shift   ;;
      -q)        QUIET=1;                                   shift   ;;
      -V)        echo "cov-analysis-$VERSION"; exit 0 ;;
      -h|--help) usage_search; exit 0 ;;
      --layout)  need_arg "--layout" "${2-}"; FUZZER_LAYOUT="$2"; shift 2 ;;
      -*)        err "Unknown option: $1"; echo "" >&2; usage_search >&2; exit 1 ;;
      *)
        if test -n "$TARGET_SPEC"; then
          err "Unexpected extra argument: $1 (only one FILE:LINE target is allowed)"
          exit 1
        fi
        TARGET_SPEC="$1"; shift ;;
    esac
  done

  # ── validate inputs ──────────────────────────────────────────────────────
  if test -z "$TARGET_SPEC"; then
    err "Must specify a FILE:LINE target (e.g. src/foo.c:123)"; exit 1
  fi
  if test -z "$AFL_DIR"; then
    err "Must specify input directory with -d"; exit 1
  fi
  if test -z "$COVERAGE_CMD"; then
    err "Must specify coverage command with -e"; exit 1
  fi
  if ! test -d "$AFL_DIR"; then
    err "Input directory does not exist: $AFL_DIR"; exit 1
  fi
  case "$FORKS" in
    ''|*[!0-9]*|0) err "Replay worker count must be a positive integer: $FORKS"; exit 1 ;;
  esac
  case "$TIMEOUT" in
    ''|*[!0-9]*|0) err "Timeout (-T) must be a positive integer in seconds: $TIMEOUT"; exit 1 ;;
  esac
  if test -n "$FUZZER_LAYOUT"; then
    case "$FUZZER_LAYOUT" in
      afl|flat) ;;
      *) err "--layout must be 'afl' or 'flat' (got '$FUZZER_LAYOUT')"; exit 1 ;;
    esac
  fi

  # Parse and split the target spec.
  local parsed
  parsed="$(parse_target_spec "$TARGET_SPEC")" || exit 1
  TARGET_FILE="${parsed%$'\t'*}"
  TARGET_LINE="${parsed##*$'\t'}"

  COV_BINARY="$(extract_binary "$COVERAGE_CMD")"
  if test -z "$COV_BINARY"; then
    err "Could not extract binary path from coverage command: $COVERAGE_CMD"; exit 1
  fi
  if ! test -x "$COV_BINARY"; then
    err "Coverage binary not found or not executable: $COV_BINARY"; exit 1
  fi

  # Forgotten @@ on a driver binary → supply it automatically (argv-only input).
  COVERAGE_CMD="$(add_at_if_driver "$COVERAGE_CMD" "$COV_BINARY")"
  logv "Replay command  : $COVERAGE_CMD"

  # ── detect LLVM tools ────────────────────────────────────────────────────
  LLVM_PROFDATA="$(find_tool llvm-profdata)" || {
    err "llvm-profdata not found. Install LLVM (apt install llvm / dnf install llvm)."; exit 1
  }
  LLVM_COV="$(find_tool llvm-cov)" || {
    err "llvm-cov not found. Install LLVM (apt install llvm / dnf install llvm)."; exit 1
  }
  loge "LLVM tools      : $LLVM_PROFDATA, $LLVM_COV"

  # ── detect or honor fuzzer layout ────────────────────────────────────────
  if test -z "$FUZZER_LAYOUT"; then
    FUZZER_LAYOUT="$(detect_fuzzer_layout)"
  fi
  case "$FUZZER_LAYOUT" in
    afl)  loge "Fuzzer layout   : AFL++ (queue/crashes/timeouts)" ;;
    flat) loge "Fuzzer layout   : flat (libFuzzer/libafl/honggfuzz)" ;;
    empty)
      err "No input files found in $AFL_DIR"
      err "Expected one of: AFL++ out dir, libFuzzer/libafl corpus, honggfuzz workspace."
      err "Override detection with --layout afl|flat if needed."
      exit 1 ;;
  esac

  # ── count selected inputs ────────────────────────────────────────────────
  local QUEUE_COUNT CRASH_COUNT=0 TOTAL
  QUEUE_COUNT=$(count_files find_queue_files)
  if test "$QUEUE_COUNT" -eq 0; then
    err "No corpus files found in $AFL_DIR"; exit 1
  fi
  if test "$INCLUDE_CRASHES" -eq 1; then
    CRASH_COUNT=$(count_files find_crash_timeout_files)
  fi
  TOTAL=$((QUEUE_COUNT + CRASH_COUNT))

  loge "Target          : $TARGET_FILE:$TARGET_LINE"
  loge "Inputs to scan  : $TOTAL (queue=$QUEUE_COUNT, crashes/timeouts=$CRASH_COUNT)"

  # ── workspace ──────────────────────────────────────────────────────────────
  local SEARCH_DIR
  SEARCH_DIR="$(mktemp -d /tmp/cov-analysis-search.XXXXXX)"
  trap "rm -rf '$SEARCH_DIR'" EXIT INT TERM

  # ── union pre-check ────────────────────────────────────────────────────────
  loge "Union pre-check : replaying $TOTAL input(s) to test reachability..."
  local UNION_DIR="$SEARCH_DIR/union"
  mkdir -p "$UNION_DIR"
  _search_replay_union "$UNION_DIR"

  local praw_count
  praw_count=$(find "$UNION_DIR" -name 'cov-*.profraw' -printf . 2>/dev/null | wc -c)
  if test "$praw_count" -eq 0; then
    err "No .profraw files generated."
    err "Check that the binary is instrumented with -fprofile-instr-generate."
    err "Use 'cov-analysis build' to build the coverage binary."
    exit 1
  fi

  local union_state="absent"
  if "$LLVM_PROFDATA" merge -sparse "$UNION_DIR"/cov-*.profraw \
       -o "$UNION_DIR/m.profdata" 2>/dev/null; then
    union_state="$("$LLVM_COV" export "$COV_BINARY" --format=lcov \
                     "-instr-profile=$UNION_DIR/m.profdata" 2>/dev/null \
                   | lcov_line_state "$TARGET_FILE" "$TARGET_LINE")"
  fi

  if test "$union_state" != "covered"; then
    if test "$union_state" = "uncovered"; then
      logerr "$TARGET_FILE:$TARGET_LINE is executable but no selected input reaches it."
      test "$INCLUDE_CRASHES" -eq 0 && \
        logerr "(retry with --crashes to also scan crash/timeout inputs)"
    else
      logerr "$TARGET_FILE:$TARGET_LINE is not in the coverage data."
      logerr "The path or line number may be wrong, or the line is non-executable."
    fi
    logerr "0 of $TOTAL inputs reach $TARGET_FILE:$TARGET_LINE"
    exit 0
  fi

  # ── per-input scan ───────────────────────────────────────────────────────
  loge "Reachable in union; scanning $TOTAL input(s) individually (workers=$FORKS)..."
  export COV_SEARCH_CMD="$COVERAGE_CMD" \
         COV_SEARCH_BIN="$COV_BINARY" \
         COV_SEARCH_PROFDATA="$LLVM_PROFDATA" \
         COV_SEARCH_COV="$LLVM_COV" \
         COV_SEARCH_TIMEOUT="$TIMEOUT" \
         COV_SEARCH_FILE="$TARGET_FILE" \
         COV_SEARCH_LINE="$TARGET_LINE" \
         COV_SEARCH_WORKROOT="$SEARCH_DIR"
  export -f _search_one_input lcov_line_state

  {
    find_queue_files
    test "$INCLUDE_CRASHES" -eq 1 && find_crash_timeout_files
  } | xargs -0 -r -n 1 -P "$FORKS" \
        bash -c '_search_one_input "$1"' _ \
    | sort > "$SEARCH_DIR/matches.txt" || true

  local match_count
  # grep -c prints 0 and exits 1 on an empty file; `|| true` keeps that single
  # "0" without appending a second one (which `|| echo 0` would do).
  match_count=$(grep -c . "$SEARCH_DIR/matches.txt" 2>/dev/null || true)
  : "${match_count:=0}"

  cat "$SEARCH_DIR/matches.txt"
  loge "$match_count of $TOTAL inputs reach $TARGET_FILE:$TARGET_LINE"
  exit 0
}

# ── command: diff ────────────────────────────────────────────────────────────

cmd_diff() {
  local REPORT_DIR="."
  # With no arguments and no default reports to diff in the current directory,
  # there is nothing to do — print help instead of erroring (as if -h).
  if test $# -eq 0 \
     && ! test -s "./coverage.json" \
     && ! test -s "./coverage_old.json"; then
    usage_diff
    exit 0
  fi
  case "${1-}" in
    -o)        need_arg "-o" "${2-}"; REPORT_DIR="$2"; shift 2 ;;
    -h|--help) usage_diff; exit 0 ;;
    -V)        echo "cov-analysis-$VERSION"; exit 0 ;;
  esac
  local OLD="${1-}"
  local NEW="${2-}"
  test -z "$OLD" && OLD="$REPORT_DIR/coverage_old.json"
  test -z "$NEW" && NEW="$REPORT_DIR/coverage.json"
  test -s "$OLD" || { err "The old JSON report does not exist: $OLD"; exit 1; }
  test -s "$NEW" || { err "The new JSON report does not exist: $NEW"; exit 1; }
  python3 - -o "$REPORT_DIR/coverage_diff.html" --only-changed "$OLD" "$NEW" << 'PYEOF'
#
# (c) 2026 Marc "vanHauser" Heuse
#
# License: GNU Affero General Public License 3
#

import argparse
import html
import json
import os
from pathlib import Path
from typing import Dict, List, Tuple, Set


def load_json(path: str):
    with open(path, 'r', encoding='utf-8') as f:
        return json.load(f)


def merge_line_state(current: int, new: int) -> int:
    # 0 = unknown/non-executable, 1 = uncovered, 2 = covered
    if current == 2 or new == 2:
        return 2
    if current == 1 or new == 1:
        return 1
    return 0


def extract_file_entries(root) -> Dict[str, List[dict]]:
    entries: Dict[str, List[dict]] = {}
    for obj in root.get('data', []):
        for f in obj.get('files', []):
            name = f.get('filename')
            if not name:
                continue
            entries.setdefault(name, []).append(f)
    return entries


def line_states_from_segments(file_entry: dict) -> Dict[int, int]:
    # Approximate per-line executable/covered state from file segments.
    # Segment format is documented by llvm-cov export JSON tests as a list of
    # [line, col, count, hasCount, isRegionEntry]. Newer LLVM may append fields,
    # so we only use the first five when present.
    segments = file_entry.get('segments', []) or []
    states: Dict[int, int] = {}

    for idx in range(len(segments) - 1):
        cur = segments[idx]
        nxt = segments[idx + 1]
        if len(cur) < 4 or len(nxt) < 2:
            continue

        start_line = int(cur[0])
        end_line = int(nxt[0])
        count = int(cur[2])
        has_count = bool(cur[3])

        if not has_count:
            continue

        new_state = 2 if count > 0 else 1
        for line in range(start_line, end_line + 1):
            states[line] = merge_line_state(states.get(line, 0), new_state)

    return states


def merge_line_states(file_entries: List[dict]) -> Dict[int, int]:
    merged: Dict[int, int] = {}
    for entry in file_entries:
        for line, state in line_states_from_segments(entry).items():
            merged[line] = merge_line_state(merged.get(line, 0), state)
    return merged


def extract_functions(root) -> Dict[str, Dict[str, bool]]:
    # filename -> function name -> covered?
    out: Dict[str, Dict[str, bool]] = {}
    for obj in root.get('data', []):
        for fn in obj.get('functions', []) or []:
            name = fn.get('name', '<unknown>')
            covered = int(fn.get('count', 0) or 0) > 0
            filenames = fn.get('filenames', []) or []
            if not filenames:
                continue
            for filename in filenames:
                file_map = out.setdefault(filename, {})
                file_map[name] = file_map.get(name, False) or covered
    return out


def read_source_lines(filename: str) -> List[str]:
    try:
        with open(filename, 'r', encoding='utf-8', errors='replace') as f:
            return f.read().splitlines()
    except OSError:
        return []


def fmt_pct(numer: int, denom: int) -> str:
    if denom <= 0:
        return 'n/a'
    return f'{(100.0 * numer / denom):.1f}%'


def compute_file_diff(filename: str, base_lines: Dict[int, int], upd_lines: Dict[int, int],
                      base_funcs: Dict[str, bool], upd_funcs: Dict[str, bool]) -> dict:
    all_lines = sorted(set(base_lines) | set(upd_lines))
    newly_covered = [ln for ln in all_lines if base_lines.get(ln, 0) != 2 and upd_lines.get(ln, 0) == 2]
    no_longer_covered = [ln for ln in all_lines if base_lines.get(ln, 0) == 2 and upd_lines.get(ln, 0) != 2]
    still_uncovered = [ln for ln in all_lines if base_lines.get(ln, 0) == 1 and upd_lines.get(ln, 0) == 1]
    executable_now = [ln for ln in all_lines if upd_lines.get(ln, 0) in (1, 2)]
    covered_now = [ln for ln in all_lines if upd_lines.get(ln, 0) == 2]
    executable_before = [ln for ln in all_lines if base_lines.get(ln, 0) in (1, 2)]
    covered_before = [ln for ln in all_lines if base_lines.get(ln, 0) == 2]

    all_fn_names = sorted(set(base_funcs) | set(upd_funcs))
    newly_covered_fns = [n for n in all_fn_names if not base_funcs.get(n, False) and upd_funcs.get(n, False)]
    lost_fns = [n for n in all_fn_names if base_funcs.get(n, False) and not upd_funcs.get(n, False)]
    still_uncovered_fns = [n for n in all_fn_names if not base_funcs.get(n, False) and not upd_funcs.get(n, False)]
    covered_fns_before = sum(1 for n in all_fn_names if base_funcs.get(n, False))
    covered_fns_now = sum(1 for n in all_fn_names if upd_funcs.get(n, False))

    return {
        'filename': filename,
        'base_executable': len(executable_before),
        'base_covered': len(covered_before),
        'upd_executable': len(executable_now),
        'upd_covered': len(covered_now),
        'newly_covered': newly_covered,
        'no_longer_covered': no_longer_covered,
        'still_uncovered': still_uncovered,
        'all_lines': all_lines,
        'newly_covered_fns': newly_covered_fns,
        'lost_fns': lost_fns,
        'still_uncovered_fns': still_uncovered_fns,
        'fn_total': len(all_fn_names),
        'fn_covered_before': covered_fns_before,
        'fn_covered_now': covered_fns_now,
    }


def make_snippet_ranges(lines_of_interest: List[int], max_context: int = 2) -> List[Tuple[int, int]]:
    if not lines_of_interest:
        return []
    ranges: List[Tuple[int, int]] = []
    for line in sorted(set(lines_of_interest)):
        start = max(1, line - max_context)
        end = line + max_context
        if not ranges or start > ranges[-1][1] + 1:
            ranges.append((start, end))
        else:
            ranges[-1] = (ranges[-1][0], max(ranges[-1][1], end))
    return ranges


def render_code_snippets(filename: str, diff: dict, source_lines: List[str]) -> str:
    interesting = diff['newly_covered'] + diff['no_longer_covered'] + diff['still_uncovered']
    ranges = make_snippet_ranges(interesting)
    if not source_lines:
        if not interesting:
            return '<p class="muted">No line-level changes.</p>'
        lis = ''.join(f'<li>{ln}</li>' for ln in interesting)
        return f'<p class="muted">Source file not readable on disk. Interesting lines:</p><ul>{lis}</ul>'
    if not ranges:
        return '<p class="muted">No line-level changes.</p>'

    parts = []
    for start, end in ranges:
        parts.append('<div class="snippet">')
        parts.append(f'<div class="snippet-header">{html.escape(filename)}:{start}-{min(end, len(source_lines))}</div>')
        parts.append('<table class="code">')
        for ln in range(start, min(end, len(source_lines)) + 1):
            src = html.escape(source_lines[ln - 1])
            cls = []
            label = ''
            if ln in diff['newly_covered']:
                cls.append('new-covered')
                label = 'new'
            elif ln in diff['no_longer_covered']:
                cls.append('lost-covered')
                label = 'lost'
            elif ln in diff['still_uncovered']:
                cls.append('still-uncovered')
                label = 'still uncovered'
            state = ' '.join(cls)
            parts.append(
                f'<tr class="{state}"><td class="ln">{ln}</td><td class="tag">{label}</td>'
                f'<td class="src"><pre>{src}</pre></td></tr>'
            )
        parts.append('</table></div>')
    return ''.join(parts)


def render_html(diffs: List[dict], baseline_name: str, updated_name: str, output_path: str):
    total_new = sum(len(d['newly_covered']) for d in diffs)
    total_lost = sum(len(d['no_longer_covered']) for d in diffs)
    total_still = sum(len(d['still_uncovered']) for d in diffs)
    total_new_fns = sum(len(d['newly_covered_fns']) for d in diffs)
    total_lost_fns = sum(len(d['lost_fns']) for d in diffs)

    changed_files = [d for d in diffs if d['newly_covered'] or d['no_longer_covered'] or d['newly_covered_fns'] or d['lost_fns']]
    changed_files_count = len(changed_files)

    rows = []
    for d in diffs:
        row_class = 'regressed' if d['no_longer_covered'] or d['lost_fns'] else ('improved' if d['newly_covered'] or d['newly_covered_fns'] else '')
        rows.append(
            '<tr class="%s">'
            '<td><a href="#file-%s">%s</a></td>'
            '<td>%s</td>'
            '<td>%s</td>'
            '<td>%d</td>'
            '<td>%d</td>'
            '<td>%d</td>'
            '<td>%d</td>'
            '<td>%d</td>'
            '</tr>' % (
                row_class,
                html.escape(d['filename'], quote=True).replace('/', '_').replace(' ', '_'),
                html.escape(d['filename']),
                fmt_pct(d['base_covered'], d['base_executable']),
                fmt_pct(d['upd_covered'], d['upd_executable']),
                len(d['newly_covered']),
                len(d['no_longer_covered']),
                len(d['still_uncovered']),
                len(d['newly_covered_fns']),
                len(d['lost_fns']),
            )
        )

    file_sections = []
    for d in diffs:
        file_id = html.escape(d['filename'], quote=True).replace('/', '_').replace(' ', '_')
        src_lines = read_source_lines(d['filename'])
        code = render_code_snippets(d['filename'], d, src_lines)

        def render_fn_list(title: str, items: List[str], cls: str) -> str:
            if not items:
                return ''
            chips = ''.join(f'<span class="chip {cls}">{html.escape(x)}</span>' for x in items[:200])
            return f'<div class="fn-group"><div class="fn-title">{html.escape(title)}</div><div class="chips">{chips}</div></div>'

        file_sections.append(f'''
<section id="file-{file_id}" class="file-card">
  <div class="file-head">
    <div>
      <h2>{html.escape(d['filename'])}</h2>
      <div class="muted">
        lines: {fmt_pct(d['base_covered'], d['base_executable'])} → {fmt_pct(d['upd_covered'], d['upd_executable'])}
        &nbsp;&nbsp; functions: {fmt_pct(d['fn_covered_before'], d['fn_total'])} → {fmt_pct(d['fn_covered_now'], d['fn_total'])}
      </div>
    </div>
    <div class="pill-row">
      <span class="pill green">+{len(d['newly_covered'])} new lines</span>
      <span class="pill red">-{len(d['no_longer_covered'])} lost lines</span>
      <span class="pill amber">{len(d['still_uncovered'])} still uncovered</span>
    </div>
  </div>
  {render_fn_list('Newly covered functions', d['newly_covered_fns'], 'chip-green')}
  {render_fn_list('No longer covered functions', d['lost_fns'], 'chip-red')}
  {render_fn_list('Still uncovered functions', d['still_uncovered_fns'], 'chip-amber')}
  {code}
</section>
''')

    html_doc = f'''<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>LLVM coverage diff</title>
<style>
:root {{
  --bg: #0b1020;
  --panel: #121933;
  --panel-2: #0f1530;
  --text: #e8ecf8;
  --muted: #9ea8c7;
  --border: #27304f;
  --green: #163a26;
  --green-2: #1e7a45;
  --red: #3d1720;
  --red-2: #b0465d;
  --amber: #413315;
  --amber-2: #ba8f2a;
  --blue: #4d8cff;
}}
* {{ box-sizing: border-box; }}
body {{ margin: 0; font-family: Inter, ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, sans-serif; background: linear-gradient(180deg, #0b1020, #09101b 30%, #081018); color: var(--text); }}
.container {{ max-width: 1400px; margin: 0 auto; padding: 28px; }}
header {{ display: flex; justify-content: space-between; gap: 16px; align-items: end; margin-bottom: 22px; }}
h1 {{ margin: 0; font-size: 32px; }}
.sub {{ color: var(--muted); margin-top: 8px; }}
.cards {{ display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 14px; margin: 20px 0 26px; }}
.card {{ background: rgba(18, 25, 51, 0.88); border: 1px solid var(--border); border-radius: 18px; padding: 18px; box-shadow: 0 10px 30px rgba(0,0,0,.25); }}
.card .k {{ color: var(--muted); font-size: 13px; text-transform: uppercase; letter-spacing: .08em; }}
.card .v {{ font-size: 28px; margin-top: 8px; font-weight: 700; }}
.table-wrap, .file-card {{ background: rgba(18, 25, 51, 0.88); border: 1px solid var(--border); border-radius: 18px; box-shadow: 0 10px 30px rgba(0,0,0,.25); }}
.table-wrap {{ overflow: hidden; margin-bottom: 26px; }}
table.summary {{ width: 100%; border-collapse: collapse; }}
table.summary th, table.summary td {{ padding: 12px 14px; border-bottom: 1px solid rgba(255,255,255,.06); text-align: left; font-variant-numeric: tabular-nums; }}
table.summary th {{ color: var(--muted); font-size: 12px; text-transform: uppercase; letter-spacing: .08em; background: rgba(255,255,255,.02); }}
table.summary tr:hover td {{ background: rgba(255,255,255,.025); }}
table.summary tr.improved td:first-child {{ border-left: 4px solid var(--green-2); }}
table.summary tr.regressed td:first-child {{ border-left: 4px solid var(--red-2); }}
a {{ color: #a8c7ff; text-decoration: none; }}
a:hover {{ text-decoration: underline; }}
.file-card {{ padding: 18px; margin: 18px 0; }}
.file-head {{ display: flex; justify-content: space-between; gap: 16px; align-items: start; margin-bottom: 16px; }}
.file-head h2 {{ margin: 0 0 8px; font-size: 20px; word-break: break-all; }}
.muted {{ color: var(--muted); }}
.pill-row {{ display: flex; gap: 8px; flex-wrap: wrap; }}
.pill {{ padding: 6px 10px; border-radius: 999px; font-size: 12px; border: 1px solid transparent; }}
.pill.green {{ background: rgba(30,122,69,.16); border-color: rgba(30,122,69,.45); }}
.pill.red {{ background: rgba(176,70,93,.16); border-color: rgba(176,70,93,.45); }}
.pill.amber {{ background: rgba(186,143,42,.16); border-color: rgba(186,143,42,.45); }}
.fn-group {{ margin: 14px 0; }}
.fn-title {{ color: var(--muted); font-size: 12px; text-transform: uppercase; letter-spacing: .08em; margin-bottom: 8px; }}
.chips {{ display: flex; gap: 8px; flex-wrap: wrap; }}
.chip {{ padding: 6px 10px; border-radius: 999px; font-size: 12px; font-family: ui-monospace, SFMono-Regular, Menlo, monospace; }}
.chip-green {{ background: rgba(30,122,69,.18); border: 1px solid rgba(30,122,69,.45); }}
.chip-red {{ background: rgba(176,70,93,.18); border: 1px solid rgba(176,70,93,.45); }}
.chip-amber {{ background: rgba(186,143,42,.18); border: 1px solid rgba(186,143,42,.45); }}
.snippet {{ margin-top: 16px; border: 1px solid rgba(255,255,255,.06); border-radius: 14px; overflow: hidden; }}
.snippet-header {{ padding: 10px 12px; background: rgba(255,255,255,.03); color: var(--muted); font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-size: 12px; }}
table.code {{ width: 100%; border-collapse: collapse; }}
table.code td {{ vertical-align: top; border-bottom: 1px solid rgba(255,255,255,.04); }}
table.code .ln {{ width: 72px; color: var(--muted); text-align: right; padding: 0 10px; font-family: ui-monospace, SFMono-Regular, Menlo, monospace; user-select: none; }}
table.code .tag {{ width: 120px; padding: 0 10px; color: var(--muted); text-transform: uppercase; font-size: 11px; letter-spacing: .08em; }}
table.code .src {{ width: auto; }}
table.code pre {{ margin: 0; padding: 0 12px 0 0; white-space: pre-wrap; word-break: break-word; font-family: ui-monospace, SFMono-Regular, Menlo, monospace; line-height: 1.45; }}
tr.new-covered td {{ background: linear-gradient(90deg, rgba(30,122,69,.36), rgba(30,122,69,.12)); }}
tr.lost-covered td {{ background: linear-gradient(90deg, rgba(176,70,93,.34), rgba(176,70,93,.10)); }}
tr.still-uncovered td {{ background: linear-gradient(90deg, rgba(186,143,42,.25), rgba(186,143,42,.08)); }}
footer {{ color: var(--muted); margin-top: 24px; font-size: 12px; }}
@media (max-width: 920px) {{
  .file-head {{ flex-direction: column; }}
  table.summary {{ display: block; overflow-x: auto; }}
}}
</style>
</head>
<body>
<div class="container">
  <header>
    <div>
      <h1>LLVM coverage diff</h1>
      <div class="sub">baseline: {html.escape(baseline_name)} &nbsp;→&nbsp; updated: {html.escape(updated_name)}</div>
    </div>
  </header>

  <section class="cards">
    <div class="card"><div class="k">Files with changes</div><div class="v">{changed_files_count}</div></div>
    <div class="card"><div class="k">Newly covered lines</div><div class="v">{total_new}</div></div>
    <div class="card"><div class="k">No longer covered lines</div><div class="v">{total_lost}</div></div>
    <div class="card"><div class="k">Still uncovered lines</div><div class="v">{total_still}</div></div>
    <div class="card"><div class="k">Newly covered functions</div><div class="v">{total_new_fns}</div></div>
    <div class="card"><div class="k">No longer covered functions</div><div class="v">{total_lost_fns}</div></div>
  </section>

  <div class="table-wrap">
    <table class="summary">
      <thead>
        <tr>
          <th>file</th>
          <th>baseline lines</th>
          <th>updated lines</th>
          <th>new</th>
          <th>lost</th>
          <th>still uncovered</th>
          <th>new fns</th>
          <th>lost fns</th>
        </tr>
      </thead>
      <tbody>
        {''.join(rows)}
      </tbody>
    </table>
  </div>

  {''.join(file_sections)}

  <footer>
    Generated from llvm-cov export JSON. Line-level status is reconstructed from file segment data and is intended for practical diffing.
  </footer>
</div>
</body>
</html>'''

    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(html_doc)


def main():
    ap = argparse.ArgumentParser(description='Generate an HTML diff report from two llvm-cov export JSON files.')
    ap.add_argument('baseline_json')
    ap.add_argument('updated_json')
    ap.add_argument('-o', '--output', default='coverage-diff.html', help='HTML output filename, default is "coverage-diff.html".')
    ap.add_argument('--only-changed', action='store_true', help='Only include files with line/function coverage changes in the report.')
    args = ap.parse_args()

    baseline = load_json(args.baseline_json)
    updated = load_json(args.updated_json)

    base_file_entries = extract_file_entries(baseline)
    upd_file_entries = extract_file_entries(updated)
    base_funcs = extract_functions(baseline)
    upd_funcs = extract_functions(updated)

    filenames = sorted(set(base_file_entries) | set(upd_file_entries) | set(base_funcs) | set(upd_funcs))

    diffs: List[dict] = []
    for filename in filenames:
        diff = compute_file_diff(
            filename,
            merge_line_states(base_file_entries.get(filename, [])),
            merge_line_states(upd_file_entries.get(filename, [])),
            base_funcs.get(filename, {}),
            upd_funcs.get(filename, {}),
        )
        if args.only_changed and not (
            diff['newly_covered'] or diff['no_longer_covered'] or diff['newly_covered_fns'] or diff['lost_fns']
        ):
            continue
        diffs.append(diff)

    diffs.sort(key=lambda d: (
        -(len(d['no_longer_covered']) + len(d['lost_fns']) > 0),
        -(len(d['newly_covered']) + len(d['newly_covered_fns'])),
        d['filename'],
    ))

    out_path = Path(args.output)
    out_path.parent.mkdir(parents=True, exist_ok=True)
    render_html(diffs, os.path.basename(args.baseline_json), os.path.basename(args.updated_json), str(out_path))


if __name__ == '__main__':
    main()
PYEOF
  log "Coverage difference report written to $REPORT_DIR/coverage_diff.html"
  exit 0
}

# ── command: report ───────────────────────────────────────────────────────────

cmd_report() {
  # ── defaults ────────────────────────────────────────────────────────────────
  local AFL_DIR=""
  local COVERAGE_CMD=""
  local REPORT_DIR=""
  local TIMEOUT=5
  local FORKS=1
  local IGNORE_REGEX='/usr/include/'
  local PROFRAW_DIR=""
  local LLVM_PROFDATA=""
  local LLVM_COV=""
  local FUZZER_LAYOUT="${FUZZER_LAYOUT:-}"

  # ── argument parsing ───────────────────────────────────────────────────────
  if test $# -eq 0; then usage_report; exit 1; fi

  while [ $# -gt 0 ]; do
    case "$1" in
      -d)            need_arg "-d" "${2-}"; AFL_DIR="$2";       shift 2 ;;
      -e)            need_arg "-e" "${2-}"; COVERAGE_CMD="$2";  shift 2 ;;
      -o)            need_arg "-o" "${2-}"; REPORT_DIR="$2";    shift 2 ;;
      -t)            need_arg "-t" "${2-}"; FORKS="$2";         shift 2 ;;
      -T)            need_arg "-T" "${2-}"; TIMEOUT="$2";       shift 2 ;;
      -v)            VERBOSE=1;          shift   ;;
      -q)            QUIET=1;            shift   ;;
      -V)            echo "cov-analysis-$VERSION"; exit 0 ;;
      -h|--help)     usage_report; exit 0 ;;
      --ignore-regex) need_arg "--ignore-regex" "${2-}"; IGNORE_REGEX="$2"; shift 2 ;;
      --layout)      need_arg "--layout" "${2-}"; FUZZER_LAYOUT="$2";       shift 2 ;;
      *) err "Unknown option: $1"; echo "" >&2; usage_report >&2; exit 1 ;;
    esac
  done

  # ── validate inputs ────────────────────────────────────────────────────────
  if test -z "$AFL_DIR"; then
    err "Must specify input directory with -d (AFL++, libFuzzer, libafl, or honggfuzz)"
    exit 1
  fi
  if test -z "$COVERAGE_CMD"; then
    err "Must specify coverage command with -e"
    exit 1
  fi
  if ! test -d "$AFL_DIR"; then
    err "Input directory does not exist: $AFL_DIR"
    exit 1
  fi
  case "$FORKS" in
    ''|*[!0-9]*|0)
      err "Replay worker count must be a positive integer: $FORKS"
      exit 1
      ;;
  esac
  case "$TIMEOUT" in
    ''|*[!0-9]*|0)
      err "Timeout (-T) must be a positive integer in seconds: $TIMEOUT"
      exit 1
      ;;
  esac

  # Validate explicit --layout override if supplied
  if test -n "$FUZZER_LAYOUT"; then
    case "$FUZZER_LAYOUT" in
      afl|flat) ;;
      *) err "--layout must be 'afl' or 'flat' (got '$FUZZER_LAYOUT')"; exit 1 ;;
    esac
  fi

  local COV_BINARY
  COV_BINARY="$(extract_binary "$COVERAGE_CMD")"
  if test -z "$COV_BINARY"; then
    err "Could not extract binary path from coverage command: $COVERAGE_CMD"
    exit 1
  fi
  if ! test -x "$COV_BINARY"; then
    err "Coverage binary not found or not executable: $COV_BINARY"
    exit 1
  fi
  logv "Coverage binary: $COV_BINARY"

  # Forgotten @@ on a driver binary → supply it automatically (argv-only input).
  COVERAGE_CMD="$(add_at_if_driver "$COVERAGE_CMD" "$COV_BINARY")"
  logv "Coverage command: $COVERAGE_CMD"

  test -z "$REPORT_DIR" && REPORT_DIR="$AFL_DIR/cov"

  # ── detect LLVM tools ──────────────────────────────────────────────────────
  LLVM_PROFDATA="$(find_tool llvm-profdata)" || {
    err "llvm-profdata not found. Install LLVM (apt install llvm / dnf install llvm)."
    exit 1
  }
  LLVM_COV="$(find_tool llvm-cov)" || {
    err "llvm-cov not found. Install LLVM (apt install llvm / dnf install llvm)."
    exit 1
  }
  log "LLVM tools: $LLVM_PROFDATA, $LLVM_COV"

  # ── detect or honor fuzzer layout ──────────────────────────────────────────
  if test -z "$FUZZER_LAYOUT"; then
    FUZZER_LAYOUT="$(detect_fuzzer_layout)"
  fi
  case "$FUZZER_LAYOUT" in
    afl)  log "Fuzzer layout   : AFL++ (queue/crashes/timeouts)" ;;
    flat) log "Fuzzer layout   : flat (libFuzzer/libafl/honggfuzz)" ;;
    empty)
      err "No input files found in $AFL_DIR"
      err "Expected one of: AFL++ out dir, libFuzzer/libafl corpus, honggfuzz workspace."
      err "Override detection with --layout afl|flat if needed."
      exit 1
      ;;
  esac

  # ── prepare workspace ──────────────────────────────────────────────────────
  PROFRAW_DIR="$(mktemp -d /tmp/cov-analysis-profraw.XXXXXX)"
  trap "rm -rf '$PROFRAW_DIR'" EXIT INT TERM

  # Guard against nuking the filesystem root or a bare "./html" / "./text"
  # that might belong to something else in cwd.
  local _abs_report
  _abs_report="$(cd "$REPORT_DIR" 2>/dev/null && pwd -P || echo "$REPORT_DIR")"
  case "$_abs_report" in
    /|""|"/usr"|"/usr/local"|"/etc"|"/var"|"/opt"|"/home"|"/root")
      err "Refusing to use report directory: $REPORT_DIR (resolves to '$_abs_report')."
      err "Pick a dedicated subdirectory with -o."
      exit 1
      ;;
  esac
  if test "$_abs_report" = "$PWD" && { test -e "$REPORT_DIR/html" || test -e "$REPORT_DIR/text"; }; then
    if ! test -f "$REPORT_DIR/summary.txt" && ! test -f "$REPORT_DIR/coverage.profdata"; then
      err "Refusing to delete '$REPORT_DIR/html' / '$REPORT_DIR/text' — they do not look"
      err "like a previous cov-analysis report (no summary.txt / coverage.profdata)."
      err "Pick a different -o directory."
      exit 1
    fi
  fi

  rm -rf "$REPORT_DIR/html" "$REPORT_DIR/text"
  mkdir -p "$REPORT_DIR/html" "$REPORT_DIR/text"
  export LLVM_PROFILE_FILE="$PROFRAW_DIR/cov-%p.profraw"

  log "Input directory : $AFL_DIR"
  log "Report directory: $REPORT_DIR"
  log "Replay workers : $FORKS"
  logv "Profraw temp dir: $PROFRAW_DIR"

  # ── replay queue files ─────────────────────────────────────────────────────
  local QUEUE_COUNT
  QUEUE_COUNT=$(count_files find_queue_files)
  log "Replaying $QUEUE_COUNT queue files..."

  case "$COVERAGE_CMD" in
    *@@*)
      # Determine if @@ is the last token (enables fast batch mode)
      local LAST_TOKEN="${COVERAGE_CMD##* }"
      if test "$LAST_TOKEN" = "@@"; then
        if ! is_cov_driver_binary "$COV_BINARY"; then
          LAST_TOKEN="x"
        fi
      fi
      if test "$LAST_TOKEN" = "@@"; then
        # Fast batch: strip trailing @@ and let xargs append all files at once.
        # sh -c handles env assignments (e.g. LD_LIBRARY_PATH=./lib ./cov) correctly.
        local CMD_NO_AT="${COVERAGE_CMD% @@}"
        logv "Queue replay: batch (xargs, workers=$FORKS)"
        # shellcheck disable=SC2016
        find_queue_files | xargs -0 -r -n 128 -P "$FORKS" \
          sh -c "${CMD_NO_AT}"' "$@"' -- >/dev/null 2>&1 || true
      else
        # @@ is embedded mid-command (e.g. "./cov -f @@ -extra") — loop mode.
        # Collapse bash -c + sh -c into a single bash -c that evals the
        # substituted command (one fork per file instead of two).
        logv "Queue replay: loop (mid-command @@, workers=$FORKS)"
        find_queue_files | xargs -0 -r -n 1 -P "$FORKS" \
          bash -c 'eval "${1/@@/$2}"' _ "$COVERAGE_CMD" \
          >/dev/null 2>&1 || true
      fi
      ;;
    *)
      # No @@ — feed each file via stdin
      logv "Queue replay: stdin loop (workers=$FORKS)"
      find_queue_files | xargs -0 -r -n 1 -P "$FORKS" \
        bash -c 'eval "$1" < "$2"' _ "$COVERAGE_CMD" \
        >/dev/null 2>&1 || true
      ;;
  esac

  # ── replay crashes and timeouts ────────────────────────────────────────────
  local CRASH_COUNT
  CRASH_COUNT=$(count_files find_crash_timeout_files)
  if test "$CRASH_COUNT" -gt 0; then
    log "Replaying $CRASH_COUNT crash/timeout files (timeout=${TIMEOUT}s each)..."
    case "$COVERAGE_CMD" in
      *@@*)
        find_crash_timeout_files | xargs -0 -r -n 1 -P "$FORKS" \
          bash -c 'timeout --signal=SIGTERM "${2}s" bash -c "${1/@@/$3}"' \
          _ "$COVERAGE_CMD" "$TIMEOUT" \
          >/dev/null 2>&1 || true
        ;;
      *)
        find_crash_timeout_files | xargs -0 -r -n 1 -P "$FORKS" \
          bash -c 'timeout --signal=SIGTERM "${2}s" bash -c "$1" < "$3"' \
          _ "$COVERAGE_CMD" "$TIMEOUT" \
          >/dev/null 2>&1 || true
        ;;
    esac
  fi

  # ── check profraw files were generated ────────────────────────────────────
  local PROFRAW_COUNT
  PROFRAW_COUNT=$(find "$PROFRAW_DIR" -name '*.profraw' -printf . 2>/dev/null | wc -c)
  if test "$PROFRAW_COUNT" -eq 0; then
    err "No .profraw files generated."
    err "Check that the binary is instrumented with -fprofile-instr-generate."
    err "Use 'cov-analysis build' to build the coverage binary."
    exit 1
  fi
  logv "Generated $PROFRAW_COUNT .profraw file(s)"

  # ── merge profiles ─────────────────────────────────────────────────────────
  log "Merging $PROFRAW_COUNT profile(s)..."
  "$LLVM_PROFDATA" merge -sparse "$PROFRAW_DIR"/cov-*.profraw \
    -o "$REPORT_DIR/coverage.profdata" || {
    err "llvm-profdata merge failed"
    exit 1
  }

  # ── generate reports ───────────────────────────────────────────────────────
  local SHOW_OPTS=(
    "$COV_BINARY"
    "-instr-profile=$REPORT_DIR/coverage.profdata"
    "-show-directory-coverage"
    "-show-line-counts-or-regions"
    "-show-branches=count"
    "-ignore-filename-regex=$IGNORE_REGEX"
  )

  log "Generating HTML report..."
  "$LLVM_COV" show "${SHOW_OPTS[@]}" \
    -format=html \
    -output-dir="$REPORT_DIR/html" || {
    err "llvm-cov show (HTML) failed"
    exit 1
  }

  log "Generating text report..."
  "$LLVM_COV" show "${SHOW_OPTS[@]}" \
    -format=text \
    -output-dir="$REPORT_DIR/text" || {
    err "llvm-cov show (text) failed"
    exit 1
  }

  log "Generating summary..."
  "$LLVM_COV" report "$COV_BINARY" \
    "-instr-profile=$REPORT_DIR/coverage.profdata" \
    "-ignore-filename-regex=$IGNORE_REGEX" \
    > "$REPORT_DIR/summary.txt" || {
    err "llvm-cov report failed"
    exit 1
  }

  test -s "$REPORT_DIR/coverage.json" && {
    log "Creating backup of previous coverage.json -> coverage_old.json"
    mv "$REPORT_DIR/coverage.json" "$REPORT_DIR/coverage_old.json"
  }

  log "Generating JSON export..."
  # llvm-cov quirk: --format=text emits JSON; --format=lcov emits LCOV.
  "$LLVM_COV" export "$COV_BINARY" \
    --format=text \
    "-instr-profile=$REPORT_DIR/coverage.profdata" \
    "-ignore-filename-regex=$IGNORE_REGEX" \
    > "$REPORT_DIR/coverage.json" || {
    err "llvm-cov export failed"
    exit 1
  }

  # ── print summary ──────────────────────────────────────────────────────────
  if test "$QUIET" -eq 0; then
    echo ""
    if test "$VERBOSE" -eq 1; then
      echo "=== Coverage Summary ==="
      cat "$REPORT_DIR/summary.txt"
      echo ""
    fi
    echo "[+] HTML report  : file://$REPORT_DIR/html/index.html"
    echo "[+] Text report  : $REPORT_DIR/text/"
    echo "[+] Summary      : $REPORT_DIR/summary.txt"
    echo "[+] JSON export  : $REPORT_DIR/coverage.json"
    echo "[+] Profile data : $REPORT_DIR/coverage.profdata"
  fi
}

# ── main ──────────────────────────────────────────────────────────────────────

# Only run main logic when executed directly (not when sourced from tests).
if [ "${BASH_SOURCE[0]:-$0}" = "$0" ]; then
  test $# -eq 0 && { usage_main; exit 1; }

  # Global flags before command detection
  case "$1" in
    -V) echo "cov-analysis-$VERSION"; exit 0 ;;
    -h|--help) usage_main; exit 0 ;;
  esac

  # Detect optional command; default to "report"
  COMMAND="report"
  case "$1" in
    build|driver|report|diff|stability|search) COMMAND="$1"; shift ;;
  esac

  case "$COMMAND" in
    build)     cmd_build "$@" ;;
    driver)    cmd_driver "$@" ;;
    report)    cmd_report "$@" ;;
    diff)      cmd_diff "$@" ;;
    stability) cmd_stability "$@" ;;
    search)    cmd_search "$@" ;;
  esac
fi
