From 76d67be3703371fdfed7e802cc1730f2360e7bea Mon Sep 17 00:00:00 2001 From: David Buchanan Date: Tue, 23 Dec 2025 06:19:34 +0000 Subject: [PATCH 1/3] test for unpaired surrogates --- test/test-3string.js | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/test/test-3string.js b/test/test-3string.js index 8d347b2..0ccd720 100644 --- a/test/test-3string.js +++ b/test/test-3string.js @@ -109,6 +109,10 @@ describe('string', () => { () => decode(fromHex('7ba5f702b3a5f702b34c6f72656d20697073756d20646f6c6f722073697420616d65742c20636f6e73656374657475722061646970697363696e6720656c69742e20446f6e6563206d692074656c6c75732c20696163756c6973206e656320766573746962756c756d20717569732c206665726d656e74756d206e6f6e2066656c69732e204d616563656e6173207574206a7573746f20706f73756572652e')), /CBOR decode error: 64-bit integer string lengths not supported/) }) + + it('decodes unpaired surrogates as U+FFFD', () => { + assert.strictEqual(decode(fromHex('63edb88e')), '\uFFFD\uFFFD\uFFFD') + }) } }) @@ -131,6 +135,20 @@ describe('string', () => { assert.strictEqual(toHex(encode(data)), expectedHex, `encode ${fixture.type}`) } }) + + it('should encode unpaired surrogates as U+FFFD', () => { + // short strings + assert.strictEqual( + toHex(encode('😎'.slice(1))), + toHex(encode('\uFFFD')) + ) + + // long strings (>64) + assert.strictEqual( + toHex(encode('A'.repeat(65) + '😎'.slice(1))), + toHex(encode('A'.repeat(65) + '\uFFFD')) + ) + }) } }) }) From a46ee5777d3793b023cf339fd5f22f6db6ebd07a Mon Sep 17 00:00:00 2001 From: David Buchanan Date: Tue, 23 Dec 2025 06:26:16 +0000 Subject: [PATCH 2/3] fix unpaired surrogate encoding --- lib/byte-utils.js | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/byte-utils.js b/lib/byte-utils.js index 2d81a22..b9f1879 100644 --- a/lib/byte-utils.js +++ b/lib/byte-utils.js @@ -301,6 +301,11 @@ function utf8ToBytes (str) { out[p++] = ((c >> 12) & 63) | 128 out[p++] = ((c >> 6) & 63) | 128 out[p++] = (c & 63) | 128 + } else if ((c >= 0xD800) && (c <= 0xDFFF)) { + // Unpaired Surrogate (emit U+FFFD) + out[p++] = 239 + out[p++] = 191 + out[p++] = 189 } else { out[p++] = (c >> 12) | 224 out[p++] = ((c >> 6) & 63) | 128 From 6df69b9ce076a88a11135c600aabb3babd8fab00 Mon Sep 17 00:00:00 2001 From: David Buchanan Date: Tue, 23 Dec 2025 07:09:29 +0000 Subject: [PATCH 3/3] simplify --- lib/byte-utils.js | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/lib/byte-utils.js b/lib/byte-utils.js index b9f1879..f38c09f 100644 --- a/lib/byte-utils.js +++ b/lib/byte-utils.js @@ -301,12 +301,10 @@ function utf8ToBytes (str) { out[p++] = ((c >> 12) & 63) | 128 out[p++] = ((c >> 6) & 63) | 128 out[p++] = (c & 63) | 128 - } else if ((c >= 0xD800) && (c <= 0xDFFF)) { - // Unpaired Surrogate (emit U+FFFD) - out[p++] = 239 - out[p++] = 191 - out[p++] = 189 } else { + if ((c >= 0xD800) && (c <= 0xDFFF)) { + c = 0xFFFD // Unpaired Surrogate + } out[p++] = (c >> 12) | 224 out[p++] = ((c >> 6) & 63) | 128 out[p++] = (c & 63) | 128