#!/usr/bin/env bash
# db-07 cross-language byte-identity test.
#
# Builds two input SSTables (newer + older) using db-05 and db-06 binaries,
# then runs `compact OUT.sst newer.sst older.sst` in all three languages
# and asserts sha256 byte-identity. Spot-checks reads with db-06's sstable CLI.
set -euo pipefail

ROOT="$(cd "$(dirname "$0")/.." && pwd)"
DB05_ROOT="$ROOT/../db-05-lsm-memtable"
DB06_ROOT="$ROOT/../db-06-sstable-format"
TMP="$(mktemp -d)"
trap 'rm -rf "$TMP"' EXIT

echo "tmp dir: $TMP"
cd "$ROOT"

# ---------------------------------------------------------------------------
# Build everything (rust release bins for db-05, db-06, db-07; go + cpp for db-07)
# ---------------------------------------------------------------------------
echo "=== build db-05 rust ==="
( cd "$DB05_ROOT/src/rust" && cargo build --release >/dev/null )
DB05_MT="$DB05_ROOT/src/rust/target/release/memtable"

echo "=== build db-06 rust ==="
( cd "$DB06_ROOT/src/rust" && cargo build --release >/dev/null )
DB06_SST="$DB06_ROOT/src/rust/target/release/sstable"

echo "=== build db-07 (rust/go/cpp) ==="
( cd src/rust && cargo build --release >/dev/null )
RUST_BIN="$ROOT/src/rust/target/release/compact"

( cd src/go && go build -o "$TMP/go-compact" ./cmd/compact )
GO_BIN="$TMP/go-compact"

( cd src/cpp && cmake -S . -B build -DCMAKE_BUILD_TYPE=Release >/dev/null \
    && cmake --build build -j >/dev/null )
CPP_BIN="$ROOT/src/cpp/build/compact"

# ---------------------------------------------------------------------------
# Build inputs:
#   newer.mt  -> bulk 50, put key10=NEW-10, del key5
#   older.mt  -> bulk 100, put key50=OLD-50
# ---------------------------------------------------------------------------
echo "=== build feedstock memtables ==="
"$DB05_MT" new "$TMP/newer.mt"
"$DB05_MT" bulk "$TMP/newer.mt" 50
"$DB05_MT" put "$TMP/newer.mt" key10 NEW-10
"$DB05_MT" del "$TMP/newer.mt" key5

"$DB05_MT" new "$TMP/older.mt"
"$DB05_MT" bulk "$TMP/older.mt" 100
"$DB05_MT" put "$TMP/older.mt" key50 OLD-50

echo "=== promote to SSTables (db-06 rust) ==="
"$DB06_SST" build "$TMP/newer.mt" "$TMP/newer.sst"
"$DB06_SST" build "$TMP/older.mt" "$TMP/older.sst"

echo "newer.sst $(wc -c < "$TMP/newer.sst") bytes"
echo "older.sst $(wc -c < "$TMP/older.sst") bytes"

# ---------------------------------------------------------------------------
# Run compaction in each language: drop=false
# ---------------------------------------------------------------------------
echo "=== run compact (drop=false) ==="
"$RUST_BIN" "$TMP/rust.sst" "$TMP/newer.sst" "$TMP/older.sst"
"$GO_BIN"   "$TMP/go.sst"   "$TMP/newer.sst" "$TMP/older.sst"
"$CPP_BIN"  "$TMP/cpp.sst"  "$TMP/newer.sst" "$TMP/older.sst"

sha_rust=$(shasum -a 256 "$TMP/rust.sst" | awk '{print $1}')
sha_go=$(  shasum -a 256 "$TMP/go.sst"   | awk '{print $1}')
sha_cpp=$( shasum -a 256 "$TMP/cpp.sst"  | awk '{print $1}')
echo "  rust: $sha_rust"
echo "  go  : $sha_go"
echo "  cpp : $sha_cpp"
if [ "$sha_rust" != "$sha_go" ] || [ "$sha_rust" != "$sha_cpp" ]; then
    echo "FAIL: sha256 mismatch (drop=false)"
    exit 1
fi
echo "  match: $sha_rust"

# ---------------------------------------------------------------------------
# Spot-checks using db-06's sstable CLI (any lang's output is a valid SST).
# Hex of "NEW-10" = 4e45572d3130
# Hex of "OLD-50" = 4f4c442d3530
# Hex of "val99"  = 76616c3939
# ---------------------------------------------------------------------------
echo "=== spot-check reads via db-06 sstable get ==="
expect_value() {
    local key="$1" want_hex="$2"
    local got
    got=$("$DB06_SST" get "$TMP/rust.sst" "$key")
    if [ "$got" != "value: $want_hex" ]; then
        echo "FAIL: get $key => $got (want value: $want_hex)"
        exit 1
    fi
    echo "  $key => $got"
}
expect_tombstone() {
    local key="$1"
    local got
    got=$("$DB06_SST" get "$TMP/rust.sst" "$key")
    if [ "$got" != "tombstone" ]; then
        echo "FAIL: get $key => $got (want tombstone)"
        exit 1
    fi
    echo "  $key => $got"
}
expect_value "key10" "4e45572d3130"          # NEW-10 (from newer)
expect_value "key50" "4f4c442d3530"          # OLD-50 (from older; newer absent)
expect_value "key99" "76616c3939"            # val99  (from older)
expect_tombstone "key5"                       # tombstone in newer

# ---------------------------------------------------------------------------
# Entry count: 100 (keys 0..99) minus 0 (key5 is a tombstone but still emitted)
# = 100 entries. num_blocks = 1 (well under 4096-byte target).
# ---------------------------------------------------------------------------
echo "=== size & block count ==="
size_line=$("$DB06_SST" size "$TMP/rust.sst")
echo "  $size_line"
if ! echo "$size_line" | grep -q "entries=100"; then
    echo "FAIL: expected entries=100"
    exit 1
fi

# ---------------------------------------------------------------------------
# Now drop=true: tombstones removed → 99 entries.
# ---------------------------------------------------------------------------
echo "=== run compact (drop=true) ==="
"$RUST_BIN" --drop-tombstones "$TMP/rust-d.sst" "$TMP/newer.sst" "$TMP/older.sst"
"$GO_BIN"   --drop-tombstones "$TMP/go-d.sst"   "$TMP/newer.sst" "$TMP/older.sst"
"$CPP_BIN"  --drop-tombstones "$TMP/cpp-d.sst"  "$TMP/newer.sst" "$TMP/older.sst"

sha_rust_d=$(shasum -a 256 "$TMP/rust-d.sst" | awk '{print $1}')
sha_go_d=$(  shasum -a 256 "$TMP/go-d.sst"   | awk '{print $1}')
sha_cpp_d=$( shasum -a 256 "$TMP/cpp-d.sst"  | awk '{print $1}')
echo "  rust: $sha_rust_d"
echo "  go  : $sha_go_d"
echo "  cpp : $sha_cpp_d"
if [ "$sha_rust_d" != "$sha_go_d" ] || [ "$sha_rust_d" != "$sha_cpp_d" ]; then
    echo "FAIL: sha256 mismatch (drop=true)"
    exit 1
fi
echo "  match: $sha_rust_d"

size_d=$("$DB06_SST" size "$TMP/rust-d.sst")
echo "  $size_d"
if ! echo "$size_d" | grep -q "entries=99"; then
    echo "FAIL: expected entries=99 after dropping tombstone"
    exit 1
fi
# key5 should now be absent
got5=$("$DB06_SST" get "$TMP/rust-d.sst" "key5")
if [ "$got5" != "absent" ]; then
    echo "FAIL: key5 should be absent after drop, got: $got5"
    exit 1
fi
echo "  key5 => $got5"

# ---------------------------------------------------------------------------
# Iter matrix: rust's iter output should equal go's and cpp's for the same file.
# (Byte-identity already implies this, but it confirms the output is portable.)
# ---------------------------------------------------------------------------
echo "=== 3-way iter portability check ==="
"$DB06_SST" iter "$TMP/rust.sst" > "$TMP/iter-r.txt"
"$DB06_SST" iter "$TMP/go.sst"   > "$TMP/iter-g.txt"
"$DB06_SST" iter "$TMP/cpp.sst"  > "$TMP/iter-c.txt"
if ! diff -q "$TMP/iter-r.txt" "$TMP/iter-g.txt" >/dev/null; then
    echo "FAIL: iter rust vs go differs"; exit 1
fi
if ! diff -q "$TMP/iter-r.txt" "$TMP/iter-c.txt" >/dev/null; then
    echo "FAIL: iter rust vs cpp differs"; exit 1
fi
echo "  iter outputs identical across all 3 outputs"

echo
echo "CROSS-TEST OK"
