Step 02 — Writer, Index, Footer
Goal
Wire the data-block builder into a full SstWriter that emits
[blocks...][index][footer].
Writer state
SstWriter {
out: Vec<u8>, // file bytes accumulated so far
block: BlockBuilder, // current data block
index: Vec<IndexEntry>, // one entry per flushed block
target: usize, // BLOCK_TARGET (default 4096)
last_key: Option<Vec<u8>>,
}
add
fn add(&mut self, key: &[u8], ty: u8, value: &[u8]) -> Result<(), Error> {
if let Some(prev) = &self.last_key {
if key <= prev.as_slice() { return Err(Error::Unsorted); }
}
if ty == 1 && !value.is_empty() { return Err(Error::BadTombstone); }
let sz = entry_size(key.len(), value.len());
if self.block.count > 0 && self.block.current_size() + sz > self.target {
self.flush_block();
}
self.block.add(key, ty, value);
self.last_key = Some(key.to_vec());
Ok(())
}
flush_block
fn flush_block(&mut self) {
let mut blk = std::mem::replace(&mut self.block, BlockBuilder::new());
let (bytes, first_key) = blk.finish();
let offset = self.out.len() as u64;
let size = bytes.len() as u64;
self.out.extend_from_slice(&bytes);
self.index.push(IndexEntry { key: first_key, offset, size });
}
finish
fn finish(mut self) -> Vec<u8> {
if self.block.count > 0 { self.flush_block(); }
let index_offset = self.out.len() as u64;
self.out.extend_from_slice(&(self.index.len() as u32).to_le_bytes());
for e in &self.index {
self.out.extend_from_slice(&(e.key.len() as u32).to_le_bytes());
self.out.extend_from_slice(&e.offset.to_le_bytes());
self.out.extend_from_slice(&e.size.to_le_bytes());
self.out.extend_from_slice(&e.key);
}
let index_size = self.out.len() as u64 - index_offset;
let num_blocks = self.index.len() as u64;
self.out.extend_from_slice(&index_offset.to_le_bytes());
self.out.extend_from_slice(&index_size.to_le_bytes());
self.out.extend_from_slice(&num_blocks.to_le_bytes());
self.out.extend_from_slice(b"SST1\0\0\0\0");
debug_assert_eq!(self.out.len() as u64,
index_offset + index_size + FOOTER_LEN as u64);
self.out
}
Footer parse
fn parse_footer(file: &[u8]) -> Result<Footer, Error> {
if file.len() < FOOTER_LEN { return Err(Error::Short); }
let tail = &file[file.len() - FOOTER_LEN..];
if &tail[24..32] != b"SST1\0\0\0\0" { return Err(Error::BadMagic); }
Ok(Footer {
index_offset: u64::from_le_bytes(tail[ 0.. 8].try_into().unwrap()),
index_size: u64::from_le_bytes(tail[ 8..16].try_into().unwrap()),
num_blocks: u64::from_le_bytes(tail[16..24].try_into().unwrap()),
})
}
The reader then verifies
footer.index_offset + footer.index_size + 32 == file.len() (returns
IndexOutOfRange otherwise) and parses the index block.
Index block parse
Identical structure to write:
let mut p = footer.index_offset as usize;
let count = read_u32_le(&file[p..]); p += 4;
let mut idx = Vec::with_capacity(count as usize);
for _ in 0..count {
let klen = read_u32_le(&file[p..]) as usize; p += 4;
let off = read_u64_le(&file[p..]); p += 8;
let sz = read_u64_le(&file[p..]); p += 8;
let key = file[p..p+klen].to_vec(); p += klen;
if off + sz > footer.index_offset { // beyond data region
return Err(Error::IndexOutOfRange);
}
idx.push(IndexEntry { key, offset: off, size: sz });
}
get with floor binary search
fn get(&self, key: &[u8]) -> Option<Entry> {
// Floor = rightmost index entry whose first_key <= key.
let pos = match self.index.binary_search_by(|e| e.key.as_slice().cmp(key)) {
Ok(i) => i, // exact first-key match
Err(0) => return None, // key precedes the smallest block
Err(i) => i - 1,
};
let blk = &self.index[pos];
let block_bytes = &self.file[blk.offset as usize
.. (blk.offset + blk.size) as usize];
scan_block(block_bytes, key)
}
scan_block decodes entries in order and returns the matching one
when found, None otherwise (a hit on a tombstone returns
Some(Entry::Tombstone) — the engine layer decides what tombstones
mean).
Self-check
- An empty writer:
finish()length is exactly 36 (4-byte empty index- 32-byte footer).
- After one
add, file length =4 + 9 + |k| + |v|(block) +4 + 4 + 8 + 8 + |k|(index) +32(footer). - For a target of 64 and entries crafted with sizes 30, 30, 30: the first add fits (4+30=34 ≤ 64), the second triggers flush (34+30=64? — equals target, no flush; 34+30=64 ≤ 64), then the third (64+30=94 > 64) → flush; result: two data blocks.