Level · vweevers · Jun 29, 2019 · Jun 28, 2019 · Jun 28, 2019 · Jun 28, 2019
diff --git a/README.md b/README.md
@@ -14,12 +14,12 @@
 
 ```
 npm i level-bench leveldown rocksdb
-npx level-bench run write leveldown
-npx level-bench run write rocksdb
-npx level-bench plot write
+npx level-bench run put leveldown
+npx level-bench run put rocksdb
+npx level-bench plot put
 ```
 
-Yields:
+Yields (outdated):
 
 ![Example plot](example.png)
 
@@ -39,9 +39,9 @@ Yields:
 
 Run a benchmark. The `benchmark` argument must be one of the named benchmarks listed below.
 
-The `target` argument should be a path or an npm package name that is installed nearby (for example `level-bench run write leveldown`). It defaults to the current working directory. A `package.json` must exist alongside the resolved `target`.
+The `target` argument should be a path or an npm package name that is installed nearby (for example `level-bench run put leveldown`). It defaults to the current working directory. A `package.json` must exist alongside the resolved `target`.
 
-To wrap `target` with `encoding-down` or `levelup` (you must install these dependencies yourself) pass `--encode` and/or `--levelup` (or `-el` for short). Alternatively `target` can be something that exports a `levelup` interface, for example `level-bench run write level`.
+To wrap `target` with `encoding-down` or `levelup` (you must install these dependencies yourself) pass `--encode` and/or `--levelup` (or `-el` for short). Alternatively `target` can be something that exports a `levelup` interface, for example `level-bench run put level`.
 
 If `target` does not create persistent databases (like `memdown` or `level-mem`) you must pass `--mem`.
 
@@ -57,46 +57,46 @@ We can compare the performance of two git branches:
 
 ```
 git checkout master && npm i
-level-bench run write
+level-bench run put
 
 git checkout wip && npm i
-level-bench run write
+level-bench run put
 ```
 
 Or check the overhead of `encoding-down`:
 
 ```
-level-bench run write memdown --mem
-level-bench run write memdown --mem --encode
+level-bench run put memdown --mem
+level-bench run put memdown --mem --encode
 ```
 
 Or a specific encoding:
 
 ```
-level-bench run write level --db [--valueEncoding utf8]
-level-bench run write level --db [--valueEncoding json]
+level-bench run put level --db [--valueEncoding utf8]
+level-bench run put level --db [--valueEncoding json]
 ```
 
 Or compare the effect of options:
 
 ```
-level-bench run write leveldown
-level-bench run write leveldown --db [ --no-compression ]
+level-bench run put leveldown
+level-bench run put leveldown --db [ --no-compression ]
 ```
 
 Then plot both (or more) runs with:
 
 ```
-level-bench plot write
+level-bench plot put
 ```
 
 <!-- Lastly, for the adventurous, you can swap out the prototype of `target` with for example some branch of `abstract-leveldown`:
 
 ```
 npm i memdown Level/abstract-leveldown#improved
 
-level-bench run write memdown --name baseline
-level-bench run write memdown --proto abstract-leveldown --name improved
+level-bench run put memdown --name baseline
+level-bench run put memdown --proto abstract-leveldown --name improved
 ``` -->
 
 #### Options
@@ -115,35 +115,35 @@ Yet to document.
 
 ## Benchmarks
 
-### `write`
+### `put`
 
-Perform concurrent `put()` operations on random string keys and values. Options:
+Perform concurrent `put()` operations on random or sequential string keys and values. Records the Simple Moving Average (SMA) of the duration of the last 1000 writes, as well as the Cumulative Moving Average (CMA) of the throughput in MB/s. Options:
 
 - `-n`: amount of operations, default 1e6
 - `--concurrency`: default 4
+- `--keys` (string): one of:
+  - `random` (default): generate pseudo-random numeric keys (0-N) with a certain probability `distribution`
+  - `seq`: non-random, sequential numeric keys (0-N)
+  - `seqReverse`: same keys but in reverse (N-0)
+- `--values` (string): one of:
+  - `random` (default): generate pseudo-random values
+  - `empty`: zero-length values or zero-filled if `valueSize` is set
+- `--seed` (string): seed to use for random numbers, defaults to `'seed'`
+- `--distribution` (string): one of [`zipfian`](https://github.com/vweevers/zipfian-integer), `uniform` (default)
+- `--skew` (floating-point number): Zipfian skew (default 0)
+- `--offset` (number): offset keys (for example to simulate timestamps)
 - `--valueSize`: size of value, as a number in bytes or string with unit (e.g. `--valueSize 1kb`)
+- `--keyAsBuffer`, `--valueAsBuffer` (boolean): if not set, keys and values are written as strings (hex encoded).
 
-_Previously known as `db-bench.js` in `leveldown`._
+Tips:
 
-### `write-random`
-
-Perform concurrent `put()` operations on random UUID string keys. Options:
-
-- `-n`: amount of operations, default 1e7
-- `--concurrency`: default 10
-- `--valueSize`: size of value, as a number in bytes or string with unit (e.g. `--valueSize 1kb`)
-
-### `write-sorted`
-
-Perform concurrent `put()` operations on sorted string keys. Options:
-
-- `-n`: amount of operations, default 1e7
-- `--concurrency`: default 10
-- `--valueSize`: size of value, as a number in bytes or string with unit (e.g. `--valueSize 1kb`)
+- To benchmark writing sorted data, use `--keys seq` or `seqReverse`
+- Be mindful of `--concurrency` when using `--keys seq` or `seqReverse`: a high concurrency can counter the performance benefits of writing keys sequentially
+- To use the `zipfian` distribution with a negative skew, specify it as `--skew=-1` rather than `--skew -1` (which would be interpreted as a flag).
 
 ### `batch-put`
 
-Same as `write`, but in batches rather than singular puts. Perform concurrent `batch()` operations on random string keys and values. Options:
+Same as `put`, but in batches rather than singular puts. Perform concurrent `batch()` operations on random string keys and values. Options:
 
 - `-n`: amount of operations, default 1e6
 - `--batchSize`: default 1000

diff --git a/benchmarks/batch-put.js b/benchmarks/batch-put.js
@@ -14,7 +14,7 @@ exports.defaults = {
   }
 }
 
-exports.plot = require('./write.plot')
+exports.plot = require('./batch-put.plot')
 
 exports.run = function (factory, stream, options) {
   stream.write('Elapsed (ms), Entries, Bytes, Last 1000 Avg Time, MB/s\n')

diff --git a/benchmarks/write.plot.js → benchmarks/batch-put.plot.js b/benchmarks/write.plot.js → benchmarks/batch-put.plot.js
@@ -2,7 +2,6 @@
 
 const e = require('../lib/escape-gnuplot-string')
 
-// Note: also used by batch-put.js
 module.exports = function (title, description, results) {
   const durations = results.map(function (res, i) {
     const file = res.csvFile

diff --git a/benchmarks/index.js b/benchmarks/index.js
@@ -1,8 +1,5 @@
 'use strict'
 
-exports.write = require('./write')
-exports['write-random'] = require('./write-random')
-exports['write-sorted'] = require('./write-sorted')
-
+exports['put'] = require('./put')
 exports['batch-put'] = require('./batch-put')
 exports['self-distribution'] = require('./self-distribution')
diff --git a/benchmarks/write.js → benchmarks/put.js b/benchmarks/write.js → benchmarks/put.js
@@ -1,28 +1,25 @@
 'use strict'
 
-const crypto = require('crypto')
+const keyspace = require('keyspace')
 const ldu = require('../lib/level-du')
-const keyTmpl = '0000000000000000'
 
 exports.defaults = {
   benchmark: {
     n: 1e6,
     concurrency: 4,
-    valueSize: 100
+    valueSize: 100,
+    keys: 'random',
+    values: 'random',
+    seed: 'seed'
   }
 }
 
-exports.plot = require('./write.plot')
+exports.plot = require('./put.plot')
 
 exports.run = function (factory, stream, options) {
-  stream.write('Elapsed (ms), Entries, Bytes, Last 1000 Avg Time, MB/s\n')
+  const generator = keyspace(options.n, options)
 
-  function make16CharPaddedKey () {
-    const r = Math.floor(Math.random() * options.n)
-    const k = keyTmpl + r
-
-    return k.substr(k.length - 16)
-  }
+  stream.write('Elapsed (ms), Entries, Bytes, SMA ms/write, CMA MB/s\n')
 
   function start (db) {
     const startTime = Date.now()
@@ -69,18 +66,14 @@ exports.run = function (factory, stream, options) {
           elapsed +
           ',' + totalWrites +
           ',' + totalBytes +
-          ',' + Math.floor(timesAccum / 1000) +
-          ',' + (Math.floor(((totalBytes / 1048576) / (elapsed / 1000)) * 100) / 100) +
+          ',' + (timesAccum / 1000 / 1e6).toFixed(3) +
+          ',' + ((totalBytes / 1048576) / (elapsed / 1e3)).toFixed(3) +
           '\n')
         timesAccum = 0
       }
 
-      // TODO: though we don't start the clock until after crypto.randomBytes(),
-      // due to concurrency there might be put() callbacks waiting in libuv
-      // while the main thread is blocked? hmz. Maybe use async randomBytes(),
-      // or pregenerated values (bonus: make them deterministic).
-      const key = make16CharPaddedKey()
-      const value = crypto.randomBytes(options.valueSize).toString('hex')
+      const key = generator.key(totalWrites - 1)
+      const value = generator.value()
       const start = process.hrtime()
 
       db.put(key, value, function (err) {
@@ -89,7 +82,8 @@ exports.run = function (factory, stream, options) {
         const duration = process.hrtime(start)
         const nano = (duration[0] * 1e9) + duration[1]
 
-        totalBytes += keyTmpl.length + options.valueSize
+        // TODO: expose something like last "<key|value>Length" on the generator?
+        totalBytes += Buffer.byteLength(key) + Buffer.byteLength(value)
         timesAccum += nano
         inProgress--
         process.nextTick(write)

diff --git a/benchmarks/write-random.plot.js → benchmarks/put.plot.js b/benchmarks/write-random.plot.js → benchmarks/put.plot.js
@@ -7,7 +7,14 @@ module.exports = function (title, description, results) {
     const file = res.csvFile
     const title = res.id(results)
 
-    return `'${e(file)}' using ($1/1000):($2/1000000) title '${e(title)}' ls ${i + 1} axes x1y1`
+    return `'${e(file)}' using ($1/1000):($4) title '${e(title)}' ls ${i + 1} axes x1y1`
+  })
+
+  const throughputs = results.map(function (res, i) {
+    const file = res.csvFile
+    const title = res.id(results)
+
+    return `'${e(file)}' using ($1/1000):($5) w lines title '${e(title)}' ls ${i + 1} axes x1y1`
   })
 
   return `
@@ -16,14 +23,12 @@ module.exports = function (title, description, results) {
   set datafile separator ','
 
   set autoscale y
-  set logscale y
-
+  set ytics mirror
+  set tics in
   set xlabel "Time (seconds)" tc rgb "#999999"
-  set ylabel "Milliseconds/write" tc rgb "#999999"
 
   set key outside tc rgb "#999999"
   set border lc rgb "#999999"
-  set grid
 
   # To plot more than 5 files, add more line styles
   set style line 1 lt 7 ps 0.8 lc rgb "#00FFFF"
@@ -32,8 +37,19 @@ module.exports = function (title, description, results) {
   set style line 4 lt 7 ps 0.8 lc rgb "#F5B700"
   set style line 5 lt 7 ps 0.8 lc rgb "#731DD8"
 
-  set title '${e(title)}' tc rgb "#cccccc" offset 0,0.7 font "Ubuntu Mono,12"
-  set label 1 '${e(description)}' tc rgb "#999999" at graph 0.5,1.04 center front
-
-  plot ${durations.join(', ')}`
+  set multiplot layout 2,1
+    set lmargin at screen 0.1
+
+    set title '${e(title)}' tc rgb "#cccccc" offset 0,0.7 font "Ubuntu Mono,12"
+    set label 1 '${e(description)}' tc rgb "#999999" at graph 0.5,1.10 center front
+    set ylabel 'SMA Milliseconds/write' tc rgb "#999999"
+    set logscale y
+    plot ${durations.join(', ')}
+
+    set title ""
+    set label 1 ""
+    set ylabel 'CMA Throughput MB/s' tc rgb "#999999"
+    set nologscale y
+    plot ${throughputs.join(', ')}
+  unset multiplot`
 }
diff --git a/benchmarks/self-distribution.plot.js b/benchmarks/self-distribution.plot.js
@@ -22,7 +22,6 @@ module.exports = function (title, description, results) {
     if (/seq/.test(res.meta.options.benchmark.keys)) return
 
     const file = res.csvFile
-    const title = results.length === 1 ? '' : res.id(results, 'default')
 
     return [
       `set size 1,${freqPlotsbHeight.toFixed(3)}`,

diff --git a/benchmarks/write-random.js b/benchmarks/write-random.js