From 236a6989a70f602ff1fee1dbdd83be5fb802d43e Mon Sep 17 00:00:00 2001 From: bumblefudge Date: Thu, 13 Mar 2025 19:28:05 +0100 Subject: [PATCH 1/7] initial commit --- examples/helia-101/102-cid-profiles.js | 50 ++++++++++++++++++++++++++ examples/helia-101/README.md | 8 +++++ examples/helia-101/package.json | 8 ++++- 3 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 examples/helia-101/102-cid-profiles.js diff --git a/examples/helia-101/102-cid-profiles.js b/examples/helia-101/102-cid-profiles.js new file mode 100644 index 00000000..41494121 --- /dev/null +++ b/examples/helia-101/102-cid-profiles.js @@ -0,0 +1,50 @@ +import { createReadStream } from 'node:fs' +import { unixfs } from '@helia/unixfs' +import { createHelia } from 'helia' +import { fixedSize } from 'ipfs-unixfs-importer/chunker' +import { balanced } from 'ipfs-unixfs-importer/layout' + + +// set up + +const helia = await createHelia() +const fs = unixfs(helia) + +// using a local copy of https://www.gutenberg.org/files/2600/2600-h/2600-h.htm , curled to this directory but not checked in +const bigHtmlFile = createReadStream('./2600-h.htm') + +// generate CID according to Helia defaults (~== kubo v1 profile) from bigHtmlFile: +const cidBigHTML = await fs.addFile( + { + //path: './bigHtmlFile.htm', + content: bigHtmlFile + }, { + wrapWithDirectory: true + } +) + +console.log('bigHtmlFile test-cid-v1 profile: ', cidBigHTML.toString()) +const stats = await fs.stat(cidBigHTML) +console.log('Stats:', stats) + +// generate CID according to Kubo legacy-cid-v0 profile from bigHtmlFile: +const bigHtmlFile2 = createReadStream('./2600-h.htm') +const cidBigHTML2 = await fs.addFile( + { + //path: './bigHtmlFile.htm', + content: bigHtmlFile2 + }, { + cidVersion: 0, + rawLeaves: false, + layout: balanced({ + maxChildrenPerNode: 174 + }), + chunker: fixedSize({ + chunkSize: 262_144 + }) + } +) + +console.log('bigHtmlFile legacy-cid-v0 profile: ', cidBigHTML2.toString()) +const stats2 = await fs.stat(cidBigHTML2) +console.log('Stats:', stats2) diff --git a/examples/helia-101/README.md b/examples/helia-101/README.md index cec81132..0f55589b 100644 --- a/examples/helia-101/README.md +++ b/examples/helia-101/README.md @@ -28,6 +28,7 @@ - [Running Examples](#running-examples) - [Usage](#usage) - [101 - Basics](#101---basics) + - [102 - CID Profiles](#102---cid-profiles) - [201 - Storage](#201---storage) - [Blockstore](#blockstore) - [Datastore](#datastore) @@ -132,6 +133,13 @@ That's it! We've created a Helia node, added a file to it, and retrieved that f Next we will look at where the bytes that make up the file go. +### 102 - CID Profiles + +The way UnixFS takes arbitrary inputs (files, folders, etc) and turns them into DAGs of smaller binary blocks (addressed by CID) involves a lot of tradeoffs and choices, particularly at the DAG level. +If you need multiple different implementations to produce the same CID for a given input, you need to configure the CID generation accordingly, matching certain configuration variables to those defined in "CID profiles", like those exposed in the `ipfs config profile apply` command in kubo ([docs](https://docs.ipfs.tech/reference/kubo/cli/#ipfs-config-profile-apply)). + +When experimenting with these options, it can be helpful to play with the DAG generator at dag.ipfs.tech. + ### 201 - Storage Out of the box Helia will store all data in-memory. This makes it easy to get started, and to create short-lived nodes that do not persist state between restarts, but what if you want to store large amounts of data for long amounts of time? diff --git a/examples/helia-101/package.json b/examples/helia-101/package.json index 1c4e64fb..bf7f4546 100644 --- a/examples/helia-101/package.json +++ b/examples/helia-101/package.json @@ -7,6 +7,7 @@ "license": "MIT", "scripts": { "101-basics": "node 101-basics.js", + "102-cid-profiles": "node 102-cid-profiles.js", "201-storage": "node 201-storage.js", "301-networking": "node 301-networking.js", "test": "test-node-example test/*" @@ -16,13 +17,18 @@ "@chainsafe/libp2p-yamux": "^7.0.1", "@helia/http": "^2.0.0", "@helia/unixfs": "^4.0.0", + "@ipld/unixfs": "^3.0.0", "@libp2p/bootstrap": "^11.0.7", "@libp2p/identify": "^3.0.7", "@libp2p/tcp": "^10.0.8", "blockstore-core": "^5.0.2", "datastore-core": "^10.0.2", "helia": "^5.0.0", - "libp2p": "^2.1.6" + "ipfs-unixfs": "^11.2.0", + "ipfs-unixfs-exporter": "^13.6.1", + "ipfs-unixfs-importer": "^15.3.1", + "libp2p": "^2.1.6", + "multiformats": "^13.3.1" }, "devDependencies": { "test-ipfs-example": "^1.0.0" From 38b975425b8029bd2c25d0d9f2a206d9cdf6cbda Mon Sep 17 00:00:00 2001 From: bumblefudge Date: Thu, 13 Mar 2025 19:33:28 +0100 Subject: [PATCH 2/7] add expected CIDs to tutorial and .gitignore the sample input --- .gitignore | 1 + examples/helia-101/102-cid-profiles.js | 2 ++ 2 files changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index caf14142..5d22925f 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ playwright-report .envrc .tool-versions .env +examples/helia-101/2600-h.htm diff --git a/examples/helia-101/102-cid-profiles.js b/examples/helia-101/102-cid-profiles.js index 41494121..6b59a100 100644 --- a/examples/helia-101/102-cid-profiles.js +++ b/examples/helia-101/102-cid-profiles.js @@ -23,6 +23,7 @@ const cidBigHTML = await fs.addFile( } ) +//CID: bafybeifrc2vrh76j7dccg2hgihoy66su7jw2vvxoihrswevbdaazlquhpq console.log('bigHtmlFile test-cid-v1 profile: ', cidBigHTML.toString()) const stats = await fs.stat(cidBigHTML) console.log('Stats:', stats) @@ -45,6 +46,7 @@ const cidBigHTML2 = await fs.addFile( } ) +//CID: QmaYSLS6tenji27mAV9Nzr69pZNapQ4PdDp48ESRToYXSr console.log('bigHtmlFile legacy-cid-v0 profile: ', cidBigHTML2.toString()) const stats2 = await fs.stat(cidBigHTML2) console.log('Stats:', stats2) From a71c65e9eeac785227958af73340b2b834d795d7 Mon Sep 17 00:00:00 2001 From: Bumblefudge Date: Fri, 14 Mar 2025 15:22:03 +0100 Subject: [PATCH 3/7] Update examples/helia-101/README.md Co-authored-by: Daniel Norman <1992255+2color@users.noreply.github.com> --- examples/helia-101/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/helia-101/README.md b/examples/helia-101/README.md index 0f55589b..86cf4b59 100644 --- a/examples/helia-101/README.md +++ b/examples/helia-101/README.md @@ -135,7 +135,7 @@ Next we will look at where the bytes that make up the file go. ### 102 - CID Profiles -The way UnixFS takes arbitrary inputs (files, folders, etc) and turns them into DAGs of smaller binary blocks (addressed by CID) involves a lot of tradeoffs and choices, particularly at the DAG level. +The way UnixFS takes arbitrary inputs (files, folders, etc) and turns them into [DAGs](https://proto.school/merkle-dags/03) of smaller binary blocks (addressed by CID) involves a lot of tradeoffs and choices, particularly in the shape and structure of the DAG. If you need multiple different implementations to produce the same CID for a given input, you need to configure the CID generation accordingly, matching certain configuration variables to those defined in "CID profiles", like those exposed in the `ipfs config profile apply` command in kubo ([docs](https://docs.ipfs.tech/reference/kubo/cli/#ipfs-config-profile-apply)). When experimenting with these options, it can be helpful to play with the DAG generator at dag.ipfs.tech. From bfefafd68ccedfc88476a7fdd1b5a650bf0937dc Mon Sep 17 00:00:00 2001 From: Bumblefudge Date: Fri, 14 Mar 2025 15:22:19 +0100 Subject: [PATCH 4/7] Update examples/helia-101/README.md Co-authored-by: Daniel Norman <1992255+2color@users.noreply.github.com> --- examples/helia-101/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/helia-101/README.md b/examples/helia-101/README.md index 86cf4b59..09d9d527 100644 --- a/examples/helia-101/README.md +++ b/examples/helia-101/README.md @@ -136,7 +136,7 @@ Next we will look at where the bytes that make up the file go. ### 102 - CID Profiles The way UnixFS takes arbitrary inputs (files, folders, etc) and turns them into [DAGs](https://proto.school/merkle-dags/03) of smaller binary blocks (addressed by CID) involves a lot of tradeoffs and choices, particularly in the shape and structure of the DAG. -If you need multiple different implementations to produce the same CID for a given input, you need to configure the CID generation accordingly, matching certain configuration variables to those defined in "CID profiles", like those exposed in the `ipfs config profile apply` command in kubo ([docs](https://docs.ipfs.tech/reference/kubo/cli/#ipfs-config-profile-apply)). +If you need multiple different implementations to produce the same CID for a given input, configure CID generation accordingly, matching certain configuration variables to those defined in CID profiles, like those exposed in the `ipfs config profile apply` command in kubo ([docs](https://docs.ipfs.tech/reference/kubo/cli/#ipfs-config-profile-apply)). When experimenting with these options, it can be helpful to play with the DAG generator at dag.ipfs.tech. From c754e86c0b3a9a6517adee598aab1c0286ad1a7a Mon Sep 17 00:00:00 2001 From: bumblefudge Date: Mon, 17 Mar 2025 13:33:01 +0100 Subject: [PATCH 5/7] add tests for addDirectory --- examples/helia-101/102-cid-profiles.js | 63 ++++++++++++++++++++++---- 1 file changed, 54 insertions(+), 9 deletions(-) diff --git a/examples/helia-101/102-cid-profiles.js b/examples/helia-101/102-cid-profiles.js index 6b59a100..366b3465 100644 --- a/examples/helia-101/102-cid-profiles.js +++ b/examples/helia-101/102-cid-profiles.js @@ -5,25 +5,29 @@ import { fixedSize } from 'ipfs-unixfs-importer/chunker' import { balanced } from 'ipfs-unixfs-importer/layout' -// set up +//--------------------------------set up const helia = await createHelia() const fs = unixfs(helia) +const bigDirectorySource = "https://sourceforge.net/projects/freetype/files/latest/download" + +//--------------------------------directory inputs + // using a local copy of https://www.gutenberg.org/files/2600/2600-h/2600-h.htm , curled to this directory but not checked in const bigHtmlFile = createReadStream('./2600-h.htm') // generate CID according to Helia defaults (~== kubo v1 profile) from bigHtmlFile: const cidBigHTML = await fs.addFile( { - //path: './bigHtmlFile.htm', - content: bigHtmlFile + path: './bigHtmlFile.htm', + //content: bigHtmlFile }, { - wrapWithDirectory: true + wrapWithDirectory: false } ) -//CID: bafybeifrc2vrh76j7dccg2hgihoy66su7jw2vvxoihrswevbdaazlquhpq +//kubo CID: bafybeifrc2vrh76j7dccg2hgihoy66su7jw2vvxoihrswevbdaazlquhpq console.log('bigHtmlFile test-cid-v1 profile: ', cidBigHTML.toString()) const stats = await fs.stat(cidBigHTML) console.log('Stats:', stats) @@ -32,8 +36,8 @@ console.log('Stats:', stats) const bigHtmlFile2 = createReadStream('./2600-h.htm') const cidBigHTML2 = await fs.addFile( { - //path: './bigHtmlFile.htm', - content: bigHtmlFile2 + path: './bigHtmlFile.htm', + //content: bigHtmlFile2 }, { cidVersion: 0, rawLeaves: false, @@ -42,11 +46,52 @@ const cidBigHTML2 = await fs.addFile( }), chunker: fixedSize({ chunkSize: 262_144 - }) + }), + wrapWithDirectory: false } ) -//CID: QmaYSLS6tenji27mAV9Nzr69pZNapQ4PdDp48ESRToYXSr +//kubo CID: QmaYSLS6tenji27mAV9Nzr69pZNapQ4PdDp48ESRToYXSr console.log('bigHtmlFile legacy-cid-v0 profile: ', cidBigHTML2.toString()) const stats2 = await fs.stat(cidBigHTML2) console.log('Stats:', stats2) + +//--------------------------------directory inputs + +// big-directory populated with enough small files to trigger HAMT behavior + +// generate CID according to Helia defaults (~== kubo v1 profile) from a big directory: +const cidBigDirectory = await fs.addDirectory( + { + path: 'big-directory/*', + }, { + wrapWithDirectory: false + } +) + +//CID: bafybeid5dv43dj6iwwd5wddkwiztty2i7ln55ri2yz4za5oboqrjtw7x54 +console.log('bigDirectory test-cid-v1 profile: ', cidBigDirectory.toString()) +const stats3 = await fs.stat(cidBigDirectory) +console.log('Stats:', stats3) + +// generate CID according to Kubo legacy-cid-v0 profile from bif directory: +const cidBigDirectory2 = await fs.addDirectory( + { + path: 'big-directory/*', + }, { + cidVersion: 0, + rawLeaves: false, + layout: balanced({ + maxChildrenPerNode: 256 + }), + chunker: fixedSize({ + chunkSize: 262_144 + }), + wrapWithDirectory: false + } +) + +//kubo CID: QmeyiUNRgGQg5g68GvgGm817i7qMv6YqSNvNNLdZtbBsba +console.log('bigDirectory legacy-cid-v0 profile: ', cidBigDirectory2.toString()) +const stats4 = await fs.stat(cidBigDirectory2) +console.log('Stats:', stats4) \ No newline at end of file From fb0a578f459fceb02e964ac7971435d6f7d0ff79 Mon Sep 17 00:00:00 2001 From: Bumblefudge Date: Mon, 17 Mar 2025 13:33:44 +0100 Subject: [PATCH 6/7] Update examples/helia-101/README.md Co-authored-by: Daniel Norman <1992255+2color@users.noreply.github.com> --- examples/helia-101/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/helia-101/README.md b/examples/helia-101/README.md index 09d9d527..4ddf4d65 100644 --- a/examples/helia-101/README.md +++ b/examples/helia-101/README.md @@ -138,7 +138,7 @@ Next we will look at where the bytes that make up the file go. The way UnixFS takes arbitrary inputs (files, folders, etc) and turns them into [DAGs](https://proto.school/merkle-dags/03) of smaller binary blocks (addressed by CID) involves a lot of tradeoffs and choices, particularly in the shape and structure of the DAG. If you need multiple different implementations to produce the same CID for a given input, configure CID generation accordingly, matching certain configuration variables to those defined in CID profiles, like those exposed in the `ipfs config profile apply` command in kubo ([docs](https://docs.ipfs.tech/reference/kubo/cli/#ipfs-config-profile-apply)). -When experimenting with these options, it can be helpful to play with the DAG generator at dag.ipfs.tech. +When experimenting with these options, it can be helpful to play with the [DAG Build](https://dag.ipfs.tech/) . ### 201 - Storage From 5cb338d936c51a2f2993aed87e63a13ec0dd53f8 Mon Sep 17 00:00:00 2001 From: bumblefudge Date: Wed, 2 Apr 2025 17:28:25 +0200 Subject: [PATCH 7/7] switch from addDirectory to globSource, remove extraneous comments --- examples/helia-101/102-cid-profiles.js | 83 +++++++++++++------------- 1 file changed, 43 insertions(+), 40 deletions(-) diff --git a/examples/helia-101/102-cid-profiles.js b/examples/helia-101/102-cid-profiles.js index 366b3465..c4560528 100644 --- a/examples/helia-101/102-cid-profiles.js +++ b/examples/helia-101/102-cid-profiles.js @@ -1,5 +1,5 @@ import { createReadStream } from 'node:fs' -import { unixfs } from '@helia/unixfs' +import { globSource, unixfs } from '@helia/unixfs' import { createHelia } from 'helia' import { fixedSize } from 'ipfs-unixfs-importer/chunker' import { balanced } from 'ipfs-unixfs-importer/layout' @@ -27,7 +27,6 @@ const cidBigHTML = await fs.addFile( } ) -//kubo CID: bafybeifrc2vrh76j7dccg2hgihoy66su7jw2vvxoihrswevbdaazlquhpq console.log('bigHtmlFile test-cid-v1 profile: ', cidBigHTML.toString()) const stats = await fs.stat(cidBigHTML) console.log('Stats:', stats) @@ -51,47 +50,51 @@ const cidBigHTML2 = await fs.addFile( } ) -//kubo CID: QmaYSLS6tenji27mAV9Nzr69pZNapQ4PdDp48ESRToYXSr console.log('bigHtmlFile legacy-cid-v0 profile: ', cidBigHTML2.toString()) const stats2 = await fs.stat(cidBigHTML2) console.log('Stats:', stats2) //--------------------------------directory inputs -// big-directory populated with enough small files to trigger HAMT behavior - -// generate CID according to Helia defaults (~== kubo v1 profile) from a big directory: -const cidBigDirectory = await fs.addDirectory( - { - path: 'big-directory/*', - }, { - wrapWithDirectory: false - } -) - -//CID: bafybeid5dv43dj6iwwd5wddkwiztty2i7ln55ri2yz4za5oboqrjtw7x54 -console.log('bigDirectory test-cid-v1 profile: ', cidBigDirectory.toString()) -const stats3 = await fs.stat(cidBigDirectory) -console.log('Stats:', stats3) - -// generate CID according to Kubo legacy-cid-v0 profile from bif directory: -const cidBigDirectory2 = await fs.addDirectory( - { - path: 'big-directory/*', - }, { - cidVersion: 0, - rawLeaves: false, - layout: balanced({ - maxChildrenPerNode: 256 - }), - chunker: fixedSize({ - chunkSize: 262_144 - }), - wrapWithDirectory: false - } -) - -//kubo CID: QmeyiUNRgGQg5g68GvgGm817i7qMv6YqSNvNNLdZtbBsba -console.log('bigDirectory legacy-cid-v0 profile: ', cidBigDirectory2.toString()) -const stats4 = await fs.stat(cidBigDirectory2) -console.log('Stats:', stats4) \ No newline at end of file +// big-directory populated with many small files and subdirectories to trigger +// recursive encoding, but not enough to trigger a HAMT-directory + +// generate CID according to Helia defaults (~== kubo v1 profile) from a big +// directory via the globSource function (see +// https://ipfs.github.io/helia/functions/_helia_unixfs.index.globSource.html ): + +for await (const entry of fs.addAll(globSource( + './big_directory', + '**/*' + ), { + wrapWithDirectory: true + } + ) +){ + console.log('bigDirectory test-cid-v1 profile: ', entry.cid.toString()) + const stats3 = await fs.stat(entry.cid) + console.log('Stats:', stats3) +} + +// generate CID according to Kubo legacy-cid-v0 profile from same directory: + +for await (const entry of fs.addAll(globSource( + './big_directory', + '**/*' + ), { + cidVersion: 0, + rawLeaves: false, + layout: balanced({ + maxChildrenPerNode: 256 + }), + chunker: fixedSize({ + chunkSize: 262_144 + }), + wrapWithDirectory: true + } + ) +){ + console.log('bigDirectory kubo-cid-v0 profile: ', entry.cid.toString()) + const stats4 = await fs.stat(entry.cid) + console.log('Stats:', stats4) +} \ No newline at end of file