diff --git a/README.md b/README.md index e79bfa4..f755cd4 100644 --- a/README.md +++ b/README.md @@ -28,8 +28,10 @@ $ pip install -e . ## Usage -The library contains functionality to generate MoBIE projects and add data to it. -Check out [the example notebook](https://github.com/mobie/mobie-utils-python/blob/master/examples/create_mobie_project.ipynb) to see how to generate a MoBIE project. +The library contains functionality to generate MoBIE projects, add data to it and create complex views. +For complete examples, please check out the [examples](https://github.com/mobie/mobie-utils-python/blob/master/examples): +- [normal project creation](https://github.com/mobie/mobie-utils-python/blob/master/examples/create_mobie_project.ipynb): generate a MoBIE project for multi-modal data from a CLEM experiment +- [htm project creation](https://github.com/mobie/mobie-utils-python/blob/master/examples/create_mobie_htm_project.ipynb): generate a MoBIE project for high-throughput microscopy from a imaging based SARS-CoV-2 antibody assay. Below is a short code snippet that shows how to use it in a python script. @@ -74,4 +76,4 @@ Run ` --help` to get more information on how to use them. ## Citation -If you use the MoBIE framework in your research, please cite [Whole-body integration of gene expression and single-cell morphology](https://www.biorxiv.org/content/10.1101/2020.02.26.961037v1). +If you use the MoBIE framework in your research, please cite [the MoBIE bioRxiv preprint](https://www.biorxiv.org/content/10.1101/2022.05.27.493763v1). diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..cc19913 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,6 @@ +# MoBIE python examples + +This folder contains notebooks that demonstrate the usage of the MoBIE python library. +Currently, we have the following two example notebooks: +- [create_mobie_project](https://github.com/mobie/mobie-utils-python/blob/master/examples/create_mobie_project.ipynb): generate a MoBIE project for multi-modal data from a CLEM experiment +- [create_mobie_htm_project](https://github.com/mobie/mobie-utils-python/blob/master/examples/create_mobie_htm_project.ipynb): generate a MoBIE project for high-throughput microscopy from a imaging based SARS-CoV-2 antibody assay. diff --git a/examples/create_mobie_htm_project.ipynb b/examples/create_mobie_htm_project.ipynb new file mode 100644 index 0000000..d8e724e --- /dev/null +++ b/examples/create_mobie_htm_project.ipynb @@ -0,0 +1,332 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "6da32cff", + "metadata": {}, + "source": [ + "# Create MoBIE HTM Project\n", + "\n", + "Create a MoBIE project for high-throughput-microscopy data. The test data for this example is available here: https://owncloud.gwdg.de/index.php/s/eu8JMlUFZ82ccHT. It contains 3 wells of a plate from a immunofluorescence based SARS-CoV-2 antibody assay from https://onlinelibrary.wiley.com/doi/full/10.1002/bies.202000257." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38524f13", + "metadata": {}, + "outputs": [], + "source": [ + "# general imports\n", + "import os\n", + "import string\n", + "from glob import glob\n", + "\n", + "import mobie\n", + "import mobie.htm as htm\n", + "import pandas as pd\n", + "\n", + "# the location of the data\n", + "# adapt these paths to your system and the input data you are using\n", + "\n", + "# location of the input data. \n", + "# the example data used in this notebook is available via this link:\n", + "# https://oc.embl.de/index.php/s/IV1709ZlcUB1k99\n", + "example_input_folder = \"/home/pape/Work/data/htm-test-data\"\n", + "\n", + "# the location of the mobie project that will be created\n", + "# we recommend that the mobie project folders have the structure \n", + "# the folder 'data' will contain the sub-folders for individual datasets\n", + "mobie_project_folder = \"/home/pape/Work/data/mobie/mobie_htm_project/data\"\n", + "\n", + "# name of the dataset that will be created.\n", + "# one project can contain multiple datasets\n", + "dataset_name = \"example-dataset\"\n", + "dataset_folder = os.path.join(mobie_project_folder, dataset_name)\n", + "\n", + "# the platform and number of jobs used for computation.\n", + "# choose 'local' to run computations on your machine.\n", + "# for large data, it is also possible to run computation on a cluster;\n", + "# for this purpose 'slurm' (for slurm cluster) and 'lsf' (for lsf cluster) are currently supported\n", + "target = \"local\"\n", + "max_jobs = 4" + ] + }, + { + "cell_type": "markdown", + "id": "fe5a2a13", + "metadata": {}, + "source": [ + "## Adding image data\n", + "\n", + "First, we add all the image data for the 3 wells. Here, we have 3 channels:\n", + "- `serum`: showing the measured immunofluorescence of the human serum\n", + "- `marker`: showing a marker channel for viral RNA\n", + "- `nuclei`: showing the nuclei stained with DAPI\n", + "\n", + "The function `htm.add_images` will add sources to the dataset metadata for all `input_files` that are passed.\n", + "It **will not** add corresponding views to show the individual images. Instead, we will add a grid view below that recreates the plate layout and where all image (and segmentation) sources can be toggled on and off." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aeb0deb1", + "metadata": {}, + "outputs": [], + "source": [ + "# the individual images are stored as h5 files in the folder with the example data.\n", + "# each hdf5 file contains multiple datasets, each corresponding to a different image channel (or segmentation)\n", + "input_files = glob(os.path.join(example_input_folder, \"*.h5\"))\n", + "input_files.sort()\n", + "\n", + "# the resolution in micron for this data, as well as the downscaling factors and chunks to be used in the data conversion\n", + "resolution = [0.65, 0.65]\n", + "scale_factors = 4 * [[2, 2]]\n", + "chunks = [512, 512]\n", + "\n", + "# the 3 image channels (each stored as dataset in the h5 file corresponding to the site)\n", + "channels = [\"serum\", \"marker\", \"nuclei\"]\n", + "for channel_name in channels:\n", + " # image_names determines the names for the corresponding image sources in MoBIE\n", + " image_names = [os.path.splitext(os.path.basename(im))[0] for im in input_files]\n", + " image_names = [f\"{channel_name}-{name}\" for name in image_names]\n", + "\n", + " htm.add_images(input_files, mobie_project_folder, dataset_name,\n", + " image_names, resolution, scale_factors, chunks, key=channel_name,\n", + " target=target, max_jobs=max_jobs, file_format=\"ome.zarr\")" + ] + }, + { + "cell_type": "markdown", + "id": "dbfe819f", + "metadata": {}, + "source": [ + "## Add segmentation data\n", + "\n", + "Next, we add the segmentation data. Here, we have 2 segmentations per site:\n", + "- `cells`: the segmentation of individual cells\n", + "- `nuclei`: the segmentation of individual nuclei\n", + "\n", + "`htm.add_segmentations` works very similar to `htm.add_images`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "49d15bd5", + "metadata": {}, + "outputs": [], + "source": [ + "segmentation_names = [\"cells\", \"nuclei\"]\n", + "for seg_name in segmentation_names:\n", + " image_names = [os.path.splitext(os.path.basename(im))[0] for im in input_files]\n", + " image_names = [f\"segmentation-{seg_name}-{name}\" for name in image_names]\n", + " \n", + " htm.add_segmentations(input_files, mobie_project_folder, dataset_name,\n", + " image_names, resolution, scale_factors, chunks, key=f\"segmentation/{seg_name}\",\n", + " target=target, max_jobs=max_jobs, file_format=\"ome.zarr\")" + ] + }, + { + "cell_type": "markdown", + "id": "5d7e7a6f", + "metadata": {}, + "source": [ + "## Add views to create plate layout\n", + "\n", + "Finally, we create the view with the plate layout and data, using MoBIE `grid` transformations and `regionDisplays`.\n", + "In addition to the layout, we can also add tables associated with wells, or with individual sites (=image positions). Here, we can use the example table for our test data from: https://owncloud.gwdg.de/index.php/s/m1ILROJc7Chnu9h" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79793e3e", + "metadata": {}, + "outputs": [], + "source": [ + "# first, we need to define function that translate source names to site names, site_names to well names and \n", + "# that return the 2d grid position for a given well\n", + "\n", + "\n", + "# extract the site name (= Well name and position in well for an image)\n", + "# here, the site name comes in the source name after the source prefix, i.e.\n", + "# source_name = f\"{prefix}_{site_name}\"\n", + "def to_site_name(source_name, prefix):\n", + " return source_name[(len(prefix) + 1):]\n", + "\n", + "\n", + "# extract the well name from the site name.\n", + "# here, the site name consists of well name and position in the well, i.e.\n", + "# source_name = f\"{well_name}_{position_in_well}\"\n", + "def to_well_name(site_name):\n", + " return site_name.split(\"_\")[0]\n", + "\n", + "\n", + "# map the well name to its position in the 2d grid\n", + "# here, the Wells are called C01, C02, etc.\n", + "def to_position(well_name):\n", + " r,c = well_name[0], well_name[1:]\n", + " r = string.ascii_uppercase.index(r)\n", + " c = int(c) - 1\n", + " return [c, r]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aa6e9438", + "metadata": {}, + "outputs": [], + "source": [ + "# all our source prefixes (= image channel / segmentation names)\n", + "# and the corresponding source types\n", + "source_prefixes = [\"nuclei\", \"serum\", \"marker\", \"segmentation-cells\", \"segmentation-nuclei\"]\n", + "source_types = [\"image\", \"image\", \"image\", \"segmentation\", \"segmentation\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "031d6629", + "metadata": {}, + "outputs": [], + "source": [ + "# compute the contrast limits for the image channels\n", + "# (this is not strictly necessaty, but usually very beneficial for htm data to obtain a reasonable visualization of the data)\n", + "clims_nuclei = htm.compute_contrast_limits(\"nuclei\", dataset_folder, lower_percentile=4, upper_percentile=96, n_threads=max_jobs)\n", + "clims_serum = htm.compute_contrast_limits(\"serum\", dataset_folder, lower_percentile=4, upper_percentile=96, n_threads=max_jobs)\n", + "clims_marker = htm.compute_contrast_limits(\"marker\", dataset_folder, lower_percentile=4, upper_percentile=96, n_threads=max_jobs)\n", + "\n", + "# specifiy the settings for all the sources\n", + "source_settings = [ \n", + " # nucleus channel: color blue\n", + " {\"color\": \"blue\", \"contrastLimits\": clims_nuclei, \"visible\": True},\n", + " # serum channel: color green\n", + " {\"color\": \"green\", \"contrastLimits\": clims_serum, \"visible\": False},\n", + " # marker channel: color red\n", + " {\"color\": \"red\", \"contrastLimits\": clims_marker, \"visible\": False},\n", + " # the settings for the 2 segmentations\n", + " {\"lut\": \"glasbey\", \"tables\": [\"default.tsv\"], \"visible\": False, \"showTable\": False},\n", + " {\"lut\": \"glasbey\", \"tables\": [\"default.tsv\"], \"visible\": False, \"showTable\": False},\n", + "] " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85ea862d", + "metadata": {}, + "outputs": [], + "source": [ + "# create table for the sites (individual images)\n", + "\n", + "# adapt this to where this path is on your system\n", + "site_table_path = \"/home/pape/Work/data/htm-test-data/site-table.tsv\"\n", + "table = pd.read_csv(site_table_path, sep=\"\\t\")\n", + "\n", + "# the tables should be saved in a path relative to the dataset root folder,\n", + "# and are usually stored in the subfolder 'tables', with another sub-folder for each source or view with table(s)\n", + "table_out_path = os.path.join(dataset_folder, \"tables\", \"sites\")\n", + "os.makedirs(table_out_path, exist_ok=True)\n", + "table_out_path = os.path.join(table_out_path, \"default.tsv\")\n", + "\n", + "# we need to rename the site name from its representation in the table (C01-0001) to our representation (C01-1)\n", + "def rename_site(site_name):\n", + " well, image_id = site_name.split(\"-\")\n", + " image_id = int(image_id)\n", + " return f\"{well}_{image_id}\"\n", + "\n", + "table[\"sites\"] = table[\"sites\"].apply(rename_site)\n", + "\n", + "# the first column in tables for a MoBIE region display (which is used internally by the grid view)\n", + "# has to be called \"regionId\"\n", + "table = table.rename(columns={\"sites\": \"region_id\"})\n", + "table.to_csv(table_out_path, sep=\"\\t\", index=False)\n", + "print(table)\n", + "\n", + "# this is the relative path to the table folder, in relation to the dataset folder\n", + "site_table_folder = os.path.split(os.path.relpath(table_out_path, dataset_folder))[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "829020cf", + "metadata": {}, + "outputs": [], + "source": [ + "# we can also create a table for the wells; the procedure here is similar to the case where the images were added\n", + "\n", + "well_table_path = \"/home/pape/Work/data/htm-test-data/well-table.tsv\"\n", + "table = pd.read_csv(well_table_path, sep=\"\\t\")\n", + "\n", + "table_out_path = os.path.join(dataset_folder, \"tables\", \"wells\")\n", + "os.makedirs(table_out_path, exist_ok=True)\n", + "table_out_path = os.path.join(table_out_path, \"default.tsv\")\n", + "\n", + "table = table.rename(columns={\"wells\": \"region_id\"})\n", + "table.to_csv(table_out_path, sep=\"\\t\", index=False)\n", + "print(table)\n", + "\n", + "well_table_folder = os.path.split(os.path.relpath(table_out_path, dataset_folder))[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0b36b9b", + "metadata": {}, + "outputs": [], + "source": [ + "# crate the plate grid view\n", + "dataset_folder = os.path.join(mobie_project_folder, dataset_name)\n", + "htm.add_plate_grid_view(dataset_folder, view_name=\"default\",\n", + " source_prefixes=source_prefixes, source_types=source_types, source_settings=source_settings,\n", + " source_name_to_site_name=to_site_name, site_name_to_well_name=to_well_name,\n", + " well_to_position=to_position, site_table=site_table_folder, well_table=well_table_folder,\n", + " sites_visible=False, menu_name=\"bookmark\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1901c4d1", + "metadata": {}, + "outputs": [], + "source": [ + "mobie.validation.validate_project(mobie_project_folder)" + ] + }, + { + "cell_type": "markdown", + "id": "7d685231", + "metadata": {}, + "source": [ + "For adding the necessary metadata to share the project via s3, and options for uploading it to s3, please check out the last cells of the `ceate_mobie_project.ipynb` notebook (in the same folder on github as this one)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/create_mobie_project.ipynb b/examples/create_mobie_project.ipynb index 4083eb0..9d53e20 100644 --- a/examples/create_mobie_project.ipynb +++ b/examples/create_mobie_project.ipynb @@ -31,23 +31,23 @@ "# location of the input data. \n", "# the example data used in this notebook is available via this link:\n", "# https://oc.embl.de/index.php/s/IV1709ZlcUB1k99\n", - "example_input_data = '/home/pape/Work/data/mobie/mobie-example-data'\n", + "example_input_data = \"/home/pape/Work/data/mobie/mobie-example-data\"\n", "\n", "# the location of the mobie project that will be created\n", - "# note that mobie project folders should always have the structure \n", + "# we recommend that the mobie project folders have the structure \n", "# the folder 'data' will contain the sub-folders for individual datasets\n", - "mobie_project_folder = '/home/pape/Work/data/mobie/mobie_example_project/data'\n", + "mobie_project_folder = \"/home/pape/Work/data/mobie/mobie_example_project/data\"\n", "\n", "# name of the dataset that will be created.\n", "# one project can contain multiple datasets\n", - "dataset_name = 'example-dataset'\n", + "dataset_name = \"example-dataset\"\n", "dataset_folder = os.path.join(mobie_project_folder, dataset_name)\n", "\n", "# the platform and number of jobs used for computation.\n", "# choose 'local' to run computations on your machine.\n", "# for large data, it is also possible to run computation on a cluster;\n", "# for this purpose 'slurm' (for slurm cluster) and 'lsf' (for lsf cluster) are currently supported\n", - "target = 'local'\n", + "target = \"local\"\n", "max_jobs = 4" ] }, @@ -81,10 +81,13 @@ "outputs": [], "source": [ "# The 'default' image for our example dataset is a 2d EM slice showing an overview of the dataset.\n", - "input_file = os.path.join(example_input_data, 'em_overview.tif')\n", + "input_file = os.path.join(example_input_data, \"em_overview.tif\")\n", "\n", "# This is the name that will be given to the image source in mobie.\n", - "raw_name = 'em-raw'\n", + "raw_name = \"raw\"\n", + "# The name of the menu from which the image can be added to the viewer.\n", + "# Here, we choose \"em\", because this is an EM image slice.\n", + "menu_name = \"em\"\n", "\n", "# We need some metadata to create the n5-file in big-data-viewer format:\n", "# - unit: the phyiscal unit of the coordinate system\n", @@ -97,7 +100,7 @@ "# Note that axes are always listed in the order ZYX here (in the java implementation of mobie / big-data-viewer the axis convention is XYZ).\n", "# Also note that the values for all three axes (ZYX) need to be specified. In the case of 2d data, the value\n", "# for Z should be set to 1.\n", - "unit = 'nanometer'\n", + "unit = \"nanometer\"\n", "resolution = (1., 10., 10.)\n", "chunks = (1, 512, 512)\n", "scale_factors = 4 * [[1, 2, 2]]\n", @@ -108,6 +111,7 @@ " root=mobie_project_folder,\n", " dataset_name=dataset_name,\n", " image_name=raw_name,\n", + " menu_name=menu_name,\n", " resolution=resolution,\n", " chunks=chunks,\n", " scale_factors=scale_factors,\n", @@ -137,12 +141,12 @@ "# These tomograms show small areas in higher detail and in 3d.\n", "\n", "# These are the two file names for the tomograms.\n", - "tomo_names = ['27_tomogram.tif', '29_tomogram.tif']\n", + "tomo_names = [\"27_tomogram.tif\", \"29_tomogram.tif\"]\n", "\n", "# We choose chunks and scale factors for 3d data, taking\n", "# into account that the tomograms have a larger extent in the\n", "# XY plane than in Z\n", - "unit = 'nanometer'\n", + "unit = \"nanometer\"\n", "resolution = [5., 5., 5.]\n", "chunks = (32, 128, 128)\n", "scale_factors = [[1, 2, 2], [1, 2, 2],\n", @@ -164,7 +168,7 @@ "\n", "# add the two tomograms\n", "for name, trafo in zip(tomo_names, transformations):\n", - " im_name = f\"em-{os.path.splitext(name)[0]}\"\n", + " im_name = os.path.splitext(name)[0]\n", " im_path = os.path.join(example_input_data, name)\n", " \n", " # we need to pass additional 'view' arguments for the tomograms.\n", @@ -183,6 +187,7 @@ " root=mobie_project_folder,\n", " dataset_name=dataset_name,\n", " image_name=im_name,\n", + " menu_name=\"em\", # also put the tomo sources in the em menu\n", " resolution=resolution,\n", " scale_factors=scale_factors,\n", " transformation=trafo,\n", @@ -202,25 +207,21 @@ "source": [ "# Next, we add a fluorescence image that is also part of the example dataset.\n", "\n", - "input_path = os.path.join(example_input_data, 'fluorescence_downsampled.tif')\n", + "input_path = os.path.join(example_input_data, \"fluorescence_downsampled.tif\")\n", "\n", "# The name of the image in mobie.\n", - "# Note that mobie will use the identifier in front of the first '-'\n", - "# to group images by name.\n", - "# So in this case we will have the two groups 'em' and 'lm'.\n", - "im_name = \"lm-fluorescence\"\n", + "im_name = \"fluorescence\"\n", + "# We choose 'lm' as menu name, because this is a lightmicroscopy source\n", + "menu_name = \"lm\"\n", "\n", "# This is again a 2d image, so we set all values for Z to 1.\n", - "unit = 'nanometer'\n", + "unit = \"nanometer\"\n", "resolution = [1., 100., 100.]\n", "scale_factors = [[1, 2, 2], [1, 2, 2], [1, 2, 2]]\n", "chunks = (1, 512, 512)\n", "\n", "# we set the default display color to green.\n", - "view = metadata.get_default_view(\n", - " \"image\", im_name,\n", - " color=\"green\"\n", - ")\n", + "view = metadata.get_default_view(\"image\", im_name, color=\"green\")\n", "\n", "mobie.add_image(\n", " input_path=input_path,\n", @@ -228,6 +229,7 @@ " root=mobie_project_folder,\n", " dataset_name=dataset_name,\n", " image_name=im_name,\n", + " menu_name=menu_name,\n", " resolution=resolution,\n", " scale_factors=scale_factors,\n", " view=view,\n", @@ -245,8 +247,8 @@ "outputs": [], "source": [ "# as last image, we add a binary mask for the foreground in the image\n", - "input_path = os.path.join(example_input_data, 'em_mask.tif')\n", - "mask_name = \"em-mask\"\n", + "input_path = os.path.join(example_input_data, \"em_mask.tif\")\n", + "mask_name = \"mask\"\n", "\n", "# again, the mask is 2d\n", "unit = \"nanometer\"\n", @@ -260,6 +262,7 @@ " root=mobie_project_folder,\n", " dataset_name=dataset_name,\n", " image_name=mask_name,\n", + " menu_name=\"em\",\n", " resolution=resolution,\n", " chunks=chunks,\n", " scale_factors=scale_factors,\n", @@ -286,9 +289,9 @@ }, "outputs": [], "source": [ - "# we add a segmentation for several objects visible in the em-overview image\n", - "input_path = os.path.join(example_input_data, 'em_segmentation.tif')\n", - "segmentation_name = \"em-segmentation\"\n", + "# we add a segmentation for several of the cells visible in the em-overview image\n", + "input_path = os.path.join(example_input_data, \"em_segmentation.tif\")\n", + "segmentation_name = \"cells\"\n", "\n", "unit = \"nanometer\"\n", "resolution = [1., 30., 30.]\n", @@ -301,6 +304,7 @@ " root=mobie_project_folder,\n", " dataset_name=dataset_name,\n", " segmentation_name=segmentation_name,\n", + " menu_name=\"em-segmentation\",\n", " resolution=resolution,\n", " chunks=chunks,\n", " scale_factors=scale_factors,\n", @@ -312,9 +316,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Adding and updating bookmarks\n", + "## Updating views\n", "\n", - "TODO desribe" + "The `create_view` function can be used to create new views or update existing views (if `overwrite=True` is set).\n", + "Here, we use it to update the default view for our dataset." ] }, { @@ -328,13 +333,12 @@ "source_list = [[raw_name], [segmentation_name]]\n", "settings = [ \n", " {\"color\": \"white\", \"contrastLimits\": [0., 255.]},\n", - " {\"color\": \"glasbey\", \"opacity\": 0.75}\n", + " {\"lut\": \"glasbey\", \"opacity\": 0.75}\n", "]\n", - "mobie.metadata.add_dataset_bookmark(dataset_folder, \"default\",\n", - " sources=source_list, display_settings=settings,\n", - " overwrite=True)\n", "\n", - "# TODO add a bookmark with affine transform and a grid bookmark for the tomograms" + "mobie.create_view(dataset_folder, \"default\",\n", + " sources=source_list, display_settings=settings,\n", + " overwrite=True)" ] }, { @@ -366,12 +370,12 @@ "# this allows specifying object stores that are different from aws\n", "# here, we use the object store located at EMBL Heidelberg as service endpoint.\n", "# to use an aws s3 endpoint, set it to https://s3.amazonaws.com \n", - "bucket_name = 'my-test-bucket'\n", + "bucket_name = \"my-test-bucket\"\n", "\n", - "service_endpoint = 'https://s3.embl.de'\n", + "service_endpoint = \"https://s3.embl.de\"\n", "\n", "metadata.add_remote_project_metadata(\n", - " mobie_project_folder,\n", + "mobie_project_folder,\n", " bucket_name,\n", " service_endpoint\n", ")\n", @@ -415,9 +419,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python3.bkp" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -429,7 +433,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.8" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/examples/experimental/add_slice_grid_view.py b/examples/experimental/add_slice_grid_view.py new file mode 100644 index 0000000..d64bd5f --- /dev/null +++ b/examples/experimental/add_slice_grid_view.py @@ -0,0 +1,15 @@ +from mobie.metadata.slice_grid_view import create_slice_grid + + +def add_slice_grid_view(): + dataset_folder = "/home/pape/Work/data/mobie/mobie_example_project/data/example-dataset" + source = "em-27_tomogram" + create_slice_grid( + dataset_folder, source, n_slices=9, + view_name="slice-grid", menu_name="slice-grids", + overwrite=True + ) + + +if __name__ == "__main__": + add_slice_grid_view() diff --git a/examples/experimental/create_advanced_views.ipynb b/examples/experimental/create_advanced_views.ipynb new file mode 100644 index 0000000..49aff7c --- /dev/null +++ b/examples/experimental/create_advanced_views.ipynb @@ -0,0 +1,95 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c19a1d18", + "metadata": {}, + "source": [ + "# Create advanced views\n", + "\n", + "TODO" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "24255e3b", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import mobie\n", + "\n", + "# the location of the mobie project that will be created\n", + "# before running this example, you must first run \"create_mobie_project.ipynb\"\n", + "# so that this project is generated\n", + "# and of course you need to update the file path accordingly\n", + "mobie_project_folder = \"/home/pape/Work/data/mobie/mobie_example_project/data\"" + ] + }, + { + "cell_type": "markdown", + "id": "8f6b308f", + "metadata": {}, + "source": [ + "## Create a grid view\n", + "\n", + "TODO" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "13c536fc", + "metadata": {}, + "outputs": [], + "source": [ + "dataset_name = \"example-dataset\"\n", + "dataset_folder = os.path.join(mobie_project_folder, dataset_name)\n", + "\n", + "# TODO get the correct display settings\n", + "sources = [[\"27_tomogram\"], [\"29_tomogram\"]]\n", + "mobie.create_grid_view(dataset_folder, \"test-grid\", sources)" + ] + }, + { + "cell_type": "markdown", + "id": "34fd7797", + "metadata": {}, + "source": [ + "## Create slice grid view\n", + "\n", + "TODO" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "04efbcef", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/project_creation_script.py b/examples/scripts/create_mobie_project.py similarity index 100% rename from examples/project_creation_script.py rename to examples/scripts/create_mobie_project.py diff --git a/mobie/__init__.py b/mobie/__init__.py index 4c7ae6e..4f65e05 100644 --- a/mobie/__init__.py +++ b/mobie/__init__.py @@ -3,6 +3,6 @@ from .registration import add_registered_source from .segmentation import add_segmentation from .traces import add_traces -from .view_utils import combine_views, merge_view_file +from .view_utils import create_view, create_grid_view, combine_views, merge_view_file from .__version__ import __version__, SPEC_VERSION diff --git a/mobie/experimental.py b/mobie/experimental.py new file mode 100644 index 0000000..375ba89 --- /dev/null +++ b/mobie/experimental.py @@ -0,0 +1,230 @@ +import warnings + +import numpy as np +import elf.transformation as trafo_utils + +from . import metadata as mobie_metadata +from .view_utils import _write_view +from .metadata import source_metadata as source_utils +from .metadata import view_metadata as view_utils + + +def _get_slice_grid( + dataset_folder, + dataset_metadata, + source, + n_slices, + view_name, + menu_name, + initial_transforms, + display_settings, + is_exclusive, +): + sources = dataset_metadata["sources"] + views = dataset_metadata["views"] + + source_type, source_data = next(iter(sources[source].items())) + if source_type != "image": + raise ValueError(f"create_slice_grid is only supported for image sources, got {source_type}.") + + z_axis = 0 + shape = source_utils.get_shape(source_data["imageData"], dataset_folder) + ax_len = shape[z_axis] + # spacing_pixels = shape[z_axis] / n_slices + if ax_len % n_slices != 0: + msg = f"Can't evenly split volume with length {ax_len} into {n_slices} slices." + warnings.warn(msg) + + # load the data transformation and compose it with the + resolution = source_utils.get_resolution(source_data["imageData"], dataset_folder) + data_transform = source_utils.get_transformation(source_data["imageData"], dataset_folder, resolution=resolution) + if initial_transforms is not None: + # TODO + raise NotImplementedError + + shape_vector = np.array(list(shape) + [1], dtype="float64") + transformed_shape = data_transform @ shape_vector + assert transformed_shape.shape == (4,) + z_extent = shape_vector[0] + z_spacing = z_extent / n_slices + + # compute the individual slice transformations (shifts) + # to enable axis != 0 we would also need to do an axis rotation here + source_transformations = [] + grid_sources = [] + for slice_id in range(n_slices): + slice_trafo_params = trafo_utils.affine_matrix_3d(translation=[slice_id * z_spacing, 0.0, 0.0]) + # TODO do we need the resolution here?! + slice_trafo_params = trafo_utils.native_to_bdv(slice_trafo_params) + name_after_trafo = f"{source}_z{slice_id}" + slice_trafo = view_utils.get_affine_source_transform( + [source], slice_trafo_params, source_names_after_transform=[name_after_trafo] + ) + source_transformations.append(slice_trafo) + grid_sources.append(name_after_trafo) + + # add the grid transformation + nested_grid_sources = [[src] for src in grid_sources] + grid = view_utils.get_transformed_grid_source_transform(nested_grid_sources) + source_transformations.append(grid) + + if display_settings is None: + display_name = f"{source}-slice-grid" + # if no display settings were passed, try to use the default view for this source + try: + reference_displays = views[source]["sourceDisplays"] + assert len(reference_displays) == 1 + display_settings = reference_displays[0] + assert "imageDisplay" in display_settings + display_settings["imageDisplay"]["name"] = display_name + display_settings["imageDisplay"]["sources"] = grid_sources + # and if we don't have a default view, use the default imageSource settings + except Exception: + warnings.warn(f"Could not parse the display settings for {source}, using default settings") + display_settings = view_utils.get_image_display(display_name, grid_sources) + else: + assert isinstance(display_settings, dict) + # there are two options of passing display settings: either as full 'imageDisplay' + # or as kwargs for 'get_image_display'. They need to be treated differently + if "imageDisplay" in display_settings: + display_name = display_settings["imageDisplay"]["name"] + elif "name" in display_settings: + display_name = display_settings["name"] + else: + display_name = f"{source}-slice-grid" + + new_view = view_utils.get_view([display_name], [source_type], [grid_sources], [display_settings], + is_exclusive=is_exclusive, menu_name=menu_name, + source_transforms=source_transformations) + return new_view + + +def create_slice_grid( + dataset_folder, + source, + n_slices, + view_name, + menu_name, + initial_transforms=None, + display_settings=None, + overwrite=False, + view_file=None, + is_exclusive=True, + return_view=False, +): + """Create a grid that aligns n slices of a source shifted so that each n-th slice is aligned with the origin. + + Arguments: + dataset_folder [str] - + source [str] - + n_slices [int] - + view_name [str] - name for the sliced view + menu_name [str] - menu name for the sliced view + initial_transforms [list[dict]] - list of transformations to be applied before slicing. (default: None) + display_settings [dict] - display settings for the resluting view. + By default will use the display settings of the default view for this source. (default: None) + overwrite [bool] - whether to overwrite existing views (default: False) + view_file [str] - file path for a view file to which the view should be saved. + By default it will be saved in the dataset metadata. (default: None) + is_exclusive [bool] - whether this is an exclusive view (default: True) + return_view [bool] - return the generated view instead of serializing it (default: False) + """ + dataset_metadata = mobie_metadata.read_dataset_metadata(dataset_folder) + if source not in dataset_metadata["sources"]: + raise ValueError(f"The source {source} could not be found in the dataset at {dataset_folder}.") + sliced_view = _get_slice_grid( + dataset_folder, dataset_metadata, source, n_slices, + view_name, menu_name, initial_transforms, display_settings, is_exclusive + ) + _write_view( + dataset_folder, dataset_metadata, view_file, view_name, sliced_view, overwrite, return_view + ) + + +def create_slice_grid_with_reference_view( + dataset_folder, + source, + reference_view, + n_slices, + view_name, + menu_name=None, + initial_transforms=None, + display_settings=None, + overwrite=False, + view_file=None, + is_exclusive=True, + return_view=False, +): + """Create a grid that aligns n slices of a source shifted so that each n-th slice is aligned with the origin, + taking other settings from a reference view. + + The reference view will be used to derive initial transformation, display settigns and + other view specific parameters, unless over-written by a more explicit parameter. + + Arguments: + dataset_folder [str] - + source [str] - + reference_view [str] - + n_slices [int] - + view_name [str] - name for the sliced view. + menu_name [str] - menu name for the sliced view. + By default will be taken from the reference view (default: None). + initial_transforms [list[dict]] - list of transformations to be applied before slicing. + By default will be taken from the reference view. (default: None) + display_settings [dict] - display settings for the resluting view. + By default will be taken from the reference view (default: None) + overwrite [bool] - whether to overwrite existing views (default: False) + view_file [str] - file path for a view file to which the view should be saved. + By default it will be saved in the dataset metadata. (default: None) + is_exclusive [bool] - whether this is an exclusive view (default: True) + return_view [bool] - return the generated view instead of serializing it (default: False) + """ + dataset_metadata = mobie_metadata.read_dataset_metadata(dataset_folder) + if source not in dataset_metadata["sources"]: + raise ValueError(f"The source {source} could not be found in the dataset at {dataset_folder}.") + + # load the reference view and use it for default settings, transformations etc. + views = dataset_metadata["views"] + if reference_view not in views: + raise ValueError("The reference view {reference_view} could not be found in the dataset at {dataset_folder}.") + ref_view = views[reference_view] + + if menu_name is None: + menu_name = ref_view["uiSelectionGroup"] + + if initial_transforms is None and "sourceTransforms" in ref_view: + # get the transformations for this source from the reference view + source_transforms = ref_view["sourceTranforms"] + initial_transforms = [] + for trafo in source_transforms: + trafo_type, trafo_params = next(iter(trafo)) + if trafo_type != "affine": + raise ValueError(f"Only support reference views with affine transformations, got {trafo_type}") + if source not in trafo_params["sources"]: + continue + initial_transforms.append( + view_utils.get_affine_source_transform([source], trafo_params["parameters"], + timepoints=trafo_params.get("timepoints", None), + name=trafo_params.get("name", None)) + ) + + if display_settings is None and "sourceDisplays" in ref_view: + # get the display settings for this source from the reference view + source_displays = ref_view["sourceDisplays"] + for display in source_displays: + if "imageDisplay" not in display: + continue + settings = next(iter(display.values())) + if source in settings["sources"]: + settings.pop("sources") + settings["sources"] = [source] + display_settings = view_utils.get_image_display(**settings) + break + + sliced_view = _get_slice_grid( + dataset_folder, dataset_metadata, source, n_slices, view_name, + menu_name, initial_transforms, display_settings, is_exclusive + ) + return _write_view( + dataset_folder, dataset_metadata, view_file, view_name, sliced_view, overwrite, return_view + ) diff --git a/mobie/htm/__init__.py b/mobie/htm/__init__.py index b7c5450..4f3bdae 100644 --- a/mobie/htm/__init__.py +++ b/mobie/htm/__init__.py @@ -1,2 +1,3 @@ from .data_import import add_images, add_segmentations from .grid_views import add_plate_grid_view, get_merged_plate_grid_view +from .utils import compute_contrast_limits diff --git a/mobie/htm/data_import.py b/mobie/htm/data_import.py index 4044764..a068380 100644 --- a/mobie/htm/data_import.py +++ b/mobie/htm/data_import.py @@ -42,12 +42,12 @@ def _copy_image_data(files, key, root, return input_names, metadata_paths -def _require_dataset(root, dataset_name, file_format, is_default_dataset): +def _require_dataset(root, dataset_name, file_format, is_default_dataset, is2d): ds_exists = utils.require_dataset(root, dataset_name, file_format) dataset_folder = os.path.join(root, dataset_name) if not ds_exists: metadata.create_dataset_structure(root, dataset_name, [file_format]) - metadata.create_dataset_metadata(dataset_folder) + metadata.create_dataset_metadata(dataset_folder, is2d=is2d) metadata.add_dataset(root, dataset_name, is_default_dataset) @@ -89,11 +89,13 @@ def add_images(files, root, resolution, scale_factors, chunks, key=None, file_format="bdv.n5", tmp_folder=None, target="local", max_jobs=multiprocessing.cpu_count(), - unit="micrometer", is_default_dataset=False): - assert len(files) == len(image_names) + unit="micrometer", is_default_dataset=False, is2d=None): + assert len(files) == len(image_names), f"{len(files)}, {len(image_names)}" # require the dataset - _require_dataset(root, dataset_name, file_format, is_default_dataset) + if is2d is None: + is2d = len(resolution) == 2 + _require_dataset(root, dataset_name, file_format, is_default_dataset, is2d=is2d) tmp_folder = f"tmp_{dataset_name}_{image_names[0]}" if tmp_folder is None else tmp_folder # copy all the image data into the dataset with the given file format @@ -114,11 +116,14 @@ def add_segmentations(files, root, resolution, scale_factors, chunks, key=None, file_format="bdv.n5", tmp_folder=None, target="local", max_jobs=multiprocessing.cpu_count(), - add_default_tables=True, unit="micrometer", is_default_dataset=False): + add_default_tables=True, unit="micrometer", + is_default_dataset=False, is2d=None): assert len(files) == len(segmentation_names) # require the dataset - _require_dataset(root, dataset_name, file_format, is_default_dataset) + if is2d is None: + is2d = len(resolution) == 2 + _require_dataset(root, dataset_name, file_format, is_default_dataset, is2d=is2d) tmp_folder = f"tmp_{dataset_name}_{segmentation_names[0]}" if tmp_folder\ is None else tmp_folder diff --git a/mobie/htm/grid_views.py b/mobie/htm/grid_views.py index 43d4b8b..a1c553f 100644 --- a/mobie/htm/grid_views.py +++ b/mobie/htm/grid_views.py @@ -2,7 +2,7 @@ import numpy as np import mobie -from ..tables import compute_source_annotation_table +from ..tables import compute_region_table def _get_display(name, source_type, sources, settings): @@ -53,7 +53,7 @@ def _get_sources_and_site_names(metadata, source_prefixes, source_name_to_site_n for source_prefix, sources in this_sources.items() } site_names = all_site_names[source_prefixes[0]] - assert all(snames == site_names for snames in all_site_names.values()) + assert all(snames == site_names for snames in all_site_names.values()), f"{site_names}, {all_site_names.values()}" return this_sources, site_names @@ -65,7 +65,7 @@ def get_transformed_plate_grid_view(metadata, source_prefixes, site_table=None, well_table=None, well_to_position=None, name_filter=None, sites_visible=True, wells_visible=True, - add_annotation_displays=True): + add_region_displays=True): assert len(source_prefixes) == len(source_types) == len(source_settings) this_sources, site_names = _get_sources_and_site_names(metadata, source_prefixes, source_name_to_site_name, name_filter) @@ -104,9 +104,9 @@ def get_transformed_plate_grid_view(metadata, source_prefixes, all_site_sources.update(well_sources) # create the annotation display for the sites - if add_annotation_displays: + if add_region_displays: assert site_table is not None - site_display = mobie.metadata.get_source_annotation_display( + site_display = mobie.metadata.get_region_display( "sites", all_site_sources, table_data={"tsv": {"relativePath": site_table}}, tables=["default.tsv"], @@ -125,9 +125,9 @@ def get_transformed_plate_grid_view(metadata, source_prefixes, source_transforms.append(plate_trafo) # create the annotation display for wells to plate - if add_annotation_displays: + if add_region_displays: assert well_table is not None - well_display = mobie.metadata.get_source_annotation_display( + well_display = mobie.metadata.get_region_display( "wells", plate_sources, table_data={"tsv": {"relativePath": well_table}}, tables=["default.tsv"], @@ -152,7 +152,7 @@ def get_merged_plate_grid_view(metadata, source_prefixes, source_types, site_table=None, well_table=None, well_to_position=None, name_filter=None, sites_visible=True, wells_visible=True, - add_annotation_displays=True): + add_region_displays=True): assert len(source_prefixes) == len(source_types) == len(source_settings) this_sources, site_names = _get_sources_and_site_names(metadata, source_prefixes, source_name_to_site_name, name_filter) @@ -206,11 +206,11 @@ def get_merged_plate_grid_view(metadata, source_prefixes, source_types, source_displays.append(display) # add the source annotation displays if configured - if add_annotation_displays: + if add_region_displays: # create the annotation display for the sites assert site_table is not None - site_display = mobie.metadata.get_source_annotation_display( + site_display = mobie.metadata.get_region_display( "sites", all_site_sources, table_data={"tsv": {"relativePath": site_table}}, tables=["default.tsv"], @@ -224,7 +224,7 @@ def get_merged_plate_grid_view(metadata, source_prefixes, source_types, all_plate_sources = {well: [f"{well}_{prefix}" for prefix in source_prefixes] for well in well_names} assert well_table is not None - well_display = mobie.metadata.get_source_annotation_display( + well_display = mobie.metadata.get_region_display( "wells", all_plate_sources, table_data={"tsv": {"relativePath": well_table}}, tables=["default.tsv"], @@ -256,7 +256,7 @@ def _get_default_site_table(ds_folder, metadata, source_prefixes, wells = [site_name_to_well_name(name) for name in site_names] sources = {name: source_prefixes for name in site_names} - compute_source_annotation_table(sources, table_path, wells=wells) + compute_region_table(sources, table_path, wells=wells) return rel_table_folder @@ -273,7 +273,7 @@ def _get_default_well_table(ds_folder, metadata, source_prefixes, wells = list(set([site_name_to_well_name(name) for name in site_names])) sources = {well: source_prefixes for well in wells} - compute_source_annotation_table(sources, table_path) + compute_region_table(sources, table_path) return rel_table_folder @@ -284,16 +284,16 @@ def add_plate_grid_view(ds_folder, view_name, menu_name, site_table=None, well_table=None, well_to_position=None, name_filter=None, sites_visible=True, wells_visible=True, - add_annotation_displays=True, + add_region_displays=True, use_transformed_grid=False): metadata = mobie.metadata.read_dataset_metadata(ds_folder) - if site_table is None and add_annotation_displays: + if site_table is None and add_region_displays: site_table = _get_default_site_table(ds_folder, metadata, source_prefixes, source_name_to_site_name, site_name_to_well_name, name_filter) - if well_table is None and add_annotation_displays: + if well_table is None and add_region_displays: well_table = _get_default_well_table(ds_folder, metadata, source_prefixes, source_name_to_site_name, site_name_to_well_name, @@ -308,7 +308,7 @@ def add_plate_grid_view(ds_folder, view_name, menu_name, site_table=site_table, well_table=well_table, name_filter=name_filter, sites_visible=sites_visible, wells_visible=wells_visible, - add_annotation_displays=add_annotation_displays) + add_region_displays=add_region_displays) else: view = get_merged_plate_grid_view(metadata, source_prefixes, source_types, source_settings, menu_name, @@ -318,6 +318,6 @@ def add_plate_grid_view(ds_folder, view_name, menu_name, name_filter=name_filter, site_table=site_table, well_table=well_table, sites_visible=sites_visible, wells_visible=wells_visible, - add_annotation_displays=add_annotation_displays) + add_region_displays=add_region_displays) metadata["views"][view_name] = view mobie.metadata.write_dataset_metadata(ds_folder, metadata) diff --git a/mobie/htm/utils.py b/mobie/htm/utils.py new file mode 100644 index 0000000..6ad3831 --- /dev/null +++ b/mobie/htm/utils.py @@ -0,0 +1,46 @@ +import json +import os +from concurrent import futures + +import numpy as np +from elf.io import open_file +from tqdm import tqdm +from ..metadata import read_dataset_metadata + + +def compute_contrast_limits( + source_prefix, dataset_folder, lower_percentile, upper_percentile, n_threads, cache_path=None +): + if cache_path is not None and os.path.exists(cache_path): + with open(cache_path) as f: + return json.load(f) + sources = read_dataset_metadata(dataset_folder)["sources"] + + def compute_clim_im(source_name): + path = os.path.join( + dataset_folder, + sources[source_name]["image"]["imageData"]["ome.zarr"]["relativePath"] + ) + with open_file(path, "r") as f: + data = f["s0"][:] + cmin = np.percentile(data, lower_percentile) + cmax = np.percentile(data, upper_percentile) + return cmin, cmax + + source_names = [name for name in sources.keys() if name.startswith(source_prefix)] + with futures.ThreadPoolExecutor(n_threads) as tp: + results = list(tqdm( + tp.map(compute_clim_im, source_names), + total=len(source_names), + desc=f"Compute contrast limits for {source_prefix}" + )) + + cmin = np.median([res[0] for res in results]) + cmax = np.median([res[1] for res in results]) + clim = [float(cmin), float(cmax)] + + if cache_path is not None: + with open(cache_path, "w") as f: + json.dump(clim, f) + + return clim diff --git a/mobie/metadata/__init__.py b/mobie/metadata/__init__.py index 82ce561..2e74db9 100644 --- a/mobie/metadata/__init__.py +++ b/mobie/metadata/__init__.py @@ -1,4 +1,3 @@ -from .bookmark_metadata import add_additional_bookmark, add_dataset_bookmark, add_grid_bookmark from .dataset_metadata import (add_view_to_dataset, copy_dataset_folder, create_dataset_structure, create_dataset_metadata, read_dataset_metadata, set_is2d, write_dataset_metadata) @@ -9,9 +8,9 @@ from .remote_metadata import (add_remote_dataset_metadata, add_remote_project_metadata, add_remote_source_metadata, upload_source) from .source_metadata import add_source_to_dataset, get_image_metadata, get_segmentation_metadata -from .view_metadata import (is_grid_view, create_source_annotation_display, +from .view_metadata import (is_grid_view, create_region_display, get_affine_source_transform, get_crop_source_transform, get_default_view, get_merged_grid_source_transform, - get_image_display, get_segmentation_display, get_source_annotation_display, + get_image_display, get_segmentation_display, get_region_display, get_transformed_grid_source_transform, get_grid_view, get_view, get_viewer_transform) diff --git a/mobie/metadata/bookmark_metadata.py b/mobie/metadata/bookmark_metadata.py deleted file mode 100644 index e292d10..0000000 --- a/mobie/metadata/bookmark_metadata.py +++ /dev/null @@ -1,150 +0,0 @@ -import os -import warnings - -from .dataset_metadata import add_view_to_dataset, read_dataset_metadata, write_dataset_metadata -from .utils import read_metadata, write_metadata -from .view_metadata import get_view, get_grid_view -from ..validation.utils import validate_with_schema - -# TODO add more convenience for source and viewer transforms ? - - -def create_bookmark_view(sources, all_sources, display_settings, - source_transforms, viewer_transform, display_group_names): - all_source_names = set(all_sources.keys()) - source_types = [] - for source_list in sources: - - invalid_source_names = list(set(source_list) - all_source_names) - if invalid_source_names: - raise ValueError(f"Invalid source names: {invalid_source_names}") - - this_source_types = list(set( - [list(all_sources[source].keys())[0] for source in source_list] - )) - if len(this_source_types) > 1: - raise ValueError(f"Inconsistent source types: {this_source_types}") - source_types.append(this_source_types[0]) - - if display_group_names is None: - display_group_names = [f'{source_type}-group-{i}' for i, source_type in enumerate(source_types)] - - menu_name = "bookmark" - view = get_view(display_group_names, source_types, - sources, display_settings, - is_exclusive=True, - menu_name=menu_name, - source_transforms=source_transforms, - viewer_transform=viewer_transform) - return view - - -def _check_bookmark(bookmark_name, bookmarks, overwrite): - if bookmark_name in bookmarks: - msg = f"Bookmark {bookmark_name} is already present." - if overwrite: - warnings.warn(msg) - else: - raise ValueError(msg) - - -def add_dataset_bookmark(dataset_folder, bookmark_name, - sources, display_settings, - source_transforms=None, viewer_transform=None, - display_group_names=None, overwrite=False): - """ Add or update a view in dataset.json:views. - - Views can reproduce any given viewer state. - - Arguments: - dataset_folder [str] - path to the dataset folder - bookmark_name [str] - name of the view - sources [list[list[str]]] - - display_settings [list[dict]] - - source_transforms [list[dict]] - - viewer_transform [dict] - - display_group_names [list[str]] - - overwrite [bool] - whether to overwrite existing views (default: False) - """ - all_sources = read_dataset_metadata(dataset_folder)['sources'] - view = create_bookmark_view(sources, all_sources, display_settings, - source_transforms, viewer_transform, - display_group_names) - validate_with_schema(view, 'view') - add_view_to_dataset(dataset_folder, bookmark_name, view, overwrite=overwrite) - - -def add_additional_bookmark(dataset_folder, bookmark_file_name, bookmark_name, - sources, display_settings, - source_transforms=None, viewer_transform=None, - display_group_names=None, overwrite=False): - """ Add or update a view in a bookmark file in /misc/bookmarks - - Views can reproduce any given viewer state. - - Arguments: - dataset_folder [str] - path to the dataset folder - bookmark_file_name [str] - name of the bookmark file - bookmark_name [str] - name of the bookmark - overwrite [bool] - whether to overwrite existing bookmarks (default: False) - """ - if not bookmark_file_name.endswith('.json'): - bookmark_file_name += '.json' - bookmark_file = os.path.join(dataset_folder, "misc", "views", bookmark_file_name) - - metadata = read_metadata(bookmark_file) - bookmarks = metadata.get("views", {}) - _check_bookmark(bookmark_name, bookmarks, overwrite) - - all_sources = read_dataset_metadata(dataset_folder)['sources'] - view = create_bookmark_view(sources, all_sources, display_settings, - source_transforms, viewer_transform, - display_group_names) - validate_with_schema(view, 'view') - - bookmarks[bookmark_name] = view - metadata['views'] = bookmarks - write_metadata(bookmark_file, metadata) - - -def add_grid_bookmark(dataset_folder, name, sources, table_folder=None, - display_groups=None, display_group_settings=None, - positions=None, bookmark_file_name=None, - overwrite=False): - """ Add or update a grid view. - - Arguments: - dataset_folder [str] - path to the dataset folder - name [str] - name of this bookmark - sources [list[list[str]]] - sources to be arranged in the grid - table_folder [str] - path to the table folder, relative to the dataset folder (default: None) - display_groups [dict[str, str] - (default: None) - display_group_settings [dict[str, dict]] - (default: None) - positions [list[list[int]]] - (default: None) - bookmark_file_name [str] - name of the bookmark file, - will be added to 'views' in datasets.json by default (default: None) - overwrite [bool] - whether to overwrite existing bookmarks (default: False) - """ - dataset_metadata = read_dataset_metadata(dataset_folder) - views = dataset_metadata['views'] - - if bookmark_file_name is None: # bookmark goes into dataset.json:bookmarks - bookmarks = views - else: # bookmark goes into extra bookmark file - if not bookmark_file_name.endswith('.json'): - bookmark_file_name += '.json' - bookmark_file = os.path.join(dataset_folder, "misc", "bookmarks", bookmark_file_name) - bookmarks = read_metadata(bookmark_file).get('bookmarks', {}) - _check_bookmark(name, bookmarks, overwrite) - - view = get_grid_view(dataset_folder, name, sources, menu_name="bookmark", - table_folder=table_folder, display_groups=display_groups, - display_group_settings=display_group_settings, positions=positions) - validate_with_schema(view, 'view') - - bookmarks[name] = view - if bookmark_file_name is None: - dataset_metadata['views'] = bookmarks - write_dataset_metadata(dataset_folder, dataset_metadata) - else: - write_metadata(bookmark_file_name, bookmarks) diff --git a/mobie/metadata/source_metadata.py b/mobie/metadata/source_metadata.py index 56d9c2e..a5ab0b1 100644 --- a/mobie/metadata/source_metadata.py +++ b/mobie/metadata/source_metadata.py @@ -1,19 +1,140 @@ +import json import os import warnings -from pybdv.metadata import get_bdv_format + +import elf.transformation as trafo_utils +from pybdv import metadata as bdv_metadata + from .dataset_metadata import read_dataset_metadata, write_dataset_metadata from .utils import get_table_metadata from .view_metadata import get_default_view from ..validation import validate_source_metadata, validate_view_metadata +from ..validation.utils import load_json_from_s3 + + +# +# functionality for querying source metadata +# + + +def _load_bdv_metadata(dataset_folder, storage): + xml_path = os.path.join(dataset_folder, storage["relativePath"]) + return xml_path + + +def _load_json_from_file(path): + if not os.path.exists(path): + return None + with open(path) as f: + attrs = json.load(f) + return attrs + + +def _load_ome_zarr_metadata(dataset_folder, storage, data_format): + if data_format == "ome.zarr": + attrs_path = os.path.join(dataset_folder, storage["relativePath"], ".zattrs") + attrs = _load_json_from_file(attrs_path) + else: + assert data_format == "ome.zarr.s3" + address = os.path.join(storage["s3Address"], ".zattrs") + try: + attrs = load_json_from_s3(address) + except Exception: + attrs = None + return None if attrs is None else attrs["multiscales"][0] + + +def _load_image_metadata(source_metadata, dataset_folder): + image_metadata = None + for data_format, storage in source_metadata.items(): + if data_format.startswith("bdv"): + image_metadata = _load_bdv_metadata(dataset_folder, storage) + elif data_format.startswith("ome.zarr"): + image_metadata = _load_ome_zarr_metadata(dataset_folder, storage, data_format) + if image_metadata is not None: + return data_format, image_metadata + raise RuntimeError(f"Could not load the image metadata for {image_metadata}") + + +def get_shape(source_metadata, dataset_folder): + data_format, image_metadata = _load_image_metadata(source_metadata, dataset_folder) + if data_format.startswith("bdv"): + shape = bdv_metadata.get_size(image_metadata, setup_id=0) + elif data_format == "ome.zarr": + dataset_path = image_metadata["datasets"][0]["path"] + array_path = os.path.join( + dataset_folder, source_metadata["storage"][data_format]["relativePath"], dataset_path, ".zarray" + ) + array_metadata = _load_json_from_file(array_path) + shape = array_metadata["shape"] + elif data_format == "ome.zarr.s3": + dataset_path = image_metadata["datasets"][0]["path"] + address = source_metadata[data_format]["s3Address"] + array_address = os.path.join(address, dataset_path, ".zarray") + array_metadata = load_json_from_s3(array_address) + shape = array_metadata["shape"] + else: + raise ValueError(f"Unsupported data format {data_format}") + return shape + + +def _bdv_transform_to_affine_matrix(transforms, resolution): + assert isinstance(transforms, dict) + transforms = list(transforms.values()) + # TODO do we need to pass the resolution here ???? + transforms = [trafo_utils.bdv_to_native(trafo, resolution=resolution) for trafo in transforms] + # TODO is this the correct order of concatenation? + transform = transforms[0] + for trafo in transforms[1:]: + transform = transform @ trafo + return transform + + +# load the transformation from the metadata of this source +def get_transformation(source_metadata, dataset_folder, to_affine_matrix=True, resolution=None): + data_format, image_metadata = _load_image_metadata(source_metadata, dataset_folder) + if data_format.startswith("bdv"): + transform = bdv_metadata.get_affine(image_metadata, setup_id=0) + if to_affine_matrix: + # TODO + if resolution is None: + pass + transform = _bdv_transform_to_affine_matrix(transform, resolution) + elif data_format.startswith("ome.zarr"): + if to_affine_matrix: + transform = trafo_utils.ngff_to_native(image_metadata) + else: + raise ValueError(f"Unsupported data format {data_format}") + return transform + + +def get_resolution(source_metadata, dataset_folder): + data_format, image_metadata = _load_image_metadata(source_metadata, dataset_folder) + if data_format.startswith("bdv"): + resolution = bdv_metadata.get_resolution(image_metadata, setup_id=0) + elif data_format.startswith("ome.zarr"): + transforms = image_metadata["datasets"][0]["coordinateTransformations"] + resolution = [1.0, 1.0, 1.0] + for trafo in transforms: + if trafo["type"] == "scale": + resolution = trafo["scale"] + else: + raise ValueError(f"Unsupported data format {data_format}") + return resolution + + +# +# functionality for creating source metadata and adding it to datasets +# def _get_file_format(path): if not os.path.exists(path): raise ValueError(f"{path} does not exist.") - elif path.endswith('.xml'): - file_format = get_bdv_format(path) - elif path.endswith('.ome.zarr'): - file_format = 'ome.zarr' + elif path.endswith(".xml"): + file_format = bdv_metadata.get_bdv_format(path) + elif path.endswith(".ome.zarr"): + file_format = "ome.zarr" else: raise ValueError(f"Could not infer file format from {path}.") return file_format @@ -85,7 +206,7 @@ def add_source_to_dataset( view_metadata = dataset_metadata["views"] # validate the arguments - if source_type not in ('image', 'segmentation'): + if source_type not in ("image", "segmentation"): raise ValueError(f"Expect source_type to be 'image' or 'segmentation', got {source_type}") if source_name in sources_metadata or source_name in view_metadata: @@ -115,8 +236,3 @@ def add_source_to_dataset( dataset_metadata["views"] = view_metadata write_dataset_metadata(dataset_folder, dataset_metadata) - - -# TODO -def update_source_metadata(): - pass diff --git a/mobie/metadata/view_metadata.py b/mobie/metadata/view_metadata.py index d4fc45b..5bd80e1 100644 --- a/mobie/metadata/view_metadata.py +++ b/mobie/metadata/view_metadata.py @@ -5,7 +5,7 @@ import numpy as np from .dataset_metadata import read_dataset_metadata from .utils import get_table_metadata -from ..tables import check_source_annotation_table, compute_source_annotation_table +from ..tables import check_region_table, compute_region_table # @@ -61,7 +61,7 @@ def get_segmentation_display(name, sources, **kwargs): return {"segmentationDisplay": segmentation_display} -def get_source_annotation_display(name, sources, table_data, tables, **kwargs): +def get_region_display(name, sources, table_data, tables, **kwargs): opacity = kwargs.pop("opacity", 0.5) lut = kwargs.pop("lut", "glasbey") annotation_display = { @@ -257,7 +257,7 @@ def get_viewer_transform(affine=None, normalized_affine=None, position=None, nor def get_view(names, source_types, sources, display_settings, is_exclusive, menu_name, - source_transforms=None, viewer_transform=None, source_annotation_displays=None): + source_transforms=None, viewer_transform=None, region_displays=None): """ Create view for a multiple sources and optional transformations. Arguments: @@ -269,7 +269,7 @@ def get_view(names, source_types, sources, display_settings, menu_name [str] - menu name for this view source_transforms [list[dict]] - (default: None) viewer_transform [dict] - (default: None) - source_annotation_displays [list[dict]] - (default: None) + region_displays [list[dict]] - (default: None) """ if len(names) != len(source_types) != len(sources) != len(display_settings): @@ -311,12 +311,12 @@ def get_view(names, source_types, sources, display_settings, source_displays.append(display) - if source_annotation_displays is not None: - for name, settings in source_annotation_displays.items(): + if region_displays is not None: + for name, settings in region_displays.items(): source_map = settings.pop("sources") table_data = settings.pop("tableData") assert isinstance(source_map, dict) - display = get_source_annotation_display(name, source_map, table_data, **settings) + display = get_region_display(name, source_map, table_data, **settings) source_displays.append(display) view["sourceDisplays"] = source_displays @@ -390,8 +390,8 @@ def _to_merged_grid(sources, name, positions, center_at_origin, encode_source): return source_transforms -def create_source_annotation_display(name, sources, dataset_folder, table_folder=None, region_ids=None, **kwargs): - """Get a source annotation display and create the corresponding table. +def create_region_display(name, sources, dataset_folder, table_folder=None, region_ids=None, **kwargs): + """Get a region display and create the corresponding table. """ if isinstance(sources, list) and region_ids is None: sources = {ii: source_list for ii, source_list in enumerate(sources)} @@ -408,18 +408,18 @@ def create_source_annotation_display(name, sources, dataset_folder, table_folder os.makedirs(table_folder_path, exist_ok=True) default_table_path = os.path.join(table_folder_path, "default.tsv") if not os.path.exists(default_table_path): - compute_source_annotation_table(sources, default_table_path) - check_source_annotation_table(sources, default_table_path) + compute_region_table(sources, default_table_path) + check_region_table(sources, default_table_path) - source_annotation_display = get_source_annotation_display( + region_display = get_region_display( name, sources, table_data=get_table_metadata(table_folder), tables=["default.tsv"], **kwargs )["regionDisplay"] - source_annotation_display.pop("name") + region_display.pop("name") - return {name: source_annotation_display} + return {name: region_display} # supporting grid views with transform (if trafo names change) is currently rather cumbersome: @@ -529,8 +529,7 @@ def get_grid_view(dataset_folder, name, sources, menu_name=None, source_transforms = additional_source_transforms + source_transforms # create the source annotation display for this grid view, this will show the table for this grid view! - source_annotation_displays = create_source_annotation_display(name, grid_sources, dataset_folder, table_folder, - region_ids=region_ids) + region_displays = create_region_display(name, grid_sources, dataset_folder, table_folder, region_ids=region_ids) if menu_name is None: menu_name = "grid" @@ -539,7 +538,7 @@ def get_grid_view(dataset_folder, name, sources, menu_name=None, sources=display_sources, display_settings=display_settings, source_transforms=source_transforms, - source_annotation_displays=source_annotation_displays, + region_displays=region_displays, is_exclusive=True, menu_name=menu_name) return view diff --git a/mobie/migration/migrate_v2/intermediate/migrate_grid_spec.py b/mobie/migration/migrate_v2/intermediate/migrate_grid_spec.py index d8b094a..1f2519d 100644 --- a/mobie/migration/migrate_v2/intermediate/migrate_grid_spec.py +++ b/mobie/migration/migrate_v2/intermediate/migrate_grid_spec.py @@ -8,12 +8,12 @@ def update_grid_view(trafo, name): - params = trafo['grid'] + params = trafo["grid"] # update sources sources = { source_id: source_list - for source_id, source_list in enumerate(params['sources']) + for source_id, source_list in enumerate(params["sources"]) } grid_params = {"sources": sources} additional_fields = ["name", "sourceNamesAfterTransformation", "timepoints"] @@ -21,11 +21,11 @@ def update_grid_view(trafo, name): if add_field_name in params: grid_params[add_field_name] = params[add_field_name] if "positions" in params: - grid_params["positions"] = {source_id: pos for source_id, pos in enumerate(params['positions'])} + grid_params["positions"] = {source_id: pos for source_id, pos in enumerate(params["positions"])} table_data = params["tableData"] tables = ["default.tsv"] - annotation_display = metadata.view_metadata.get_source_annotation_display( + annotation_display = metadata.view_metadata.get_region_display( name, sources, table_data, tables ) @@ -35,20 +35,20 @@ def update_grid_view(trafo, name): def update_views(views): new_views = {} for name, view in views.items(): - has_source_trafo = 'sourceTransforms' in view + has_source_trafo = "sourceTransforms" in view if has_source_trafo: - trafos = view['sourceTransforms'] + trafos = view["sourceTransforms"] trafo_types = [list(trafo.keys())[0] for trafo in trafos] - has_grid_trafo = 'grid' in trafo_types + has_grid_trafo = "grid" in trafo_types if has_grid_trafo: new_view = deepcopy(view) new_trafos = [] for trafo in trafos: - if list(trafo.keys())[0] == 'grid': + if list(trafo.keys())[0] == "grid": trafo, annotation_display = update_grid_view(trafo, name) - new_view['sourceDisplays'].append(annotation_display) + new_view["sourceDisplays"].append(annotation_display) new_trafos.append(trafo) - new_view['sourceTransforms'] = new_trafos + new_view["sourceTransforms"] = new_trafos new_views[name] = new_view continue @@ -58,18 +58,18 @@ def update_views(views): def update_tables(views, dataset_folder): for name, view in views.items(): - displays = view['sourceDisplays'] + displays = view["sourceDisplays"] for disp in displays: - if list(disp.keys())[0] == 'sourceAnnotationDisplay': - props = disp['sourceAnnotationDisplay'] + if list(disp.keys())[0] == "sourceAnnotationDisplay": + props = disp["sourceAnnotationDisplay"] table_folder = os.path.join( - dataset_folder, props['tableData']['tsv']['relativePath'] + dataset_folder, props["tableData"]["tsv"]["relativePath"] ) tables = glob(os.path.join(table_folder, "*.tsv")) for table_path in tables: - table = pd.read_csv(table_path, sep='\t') - table = table.rename(columns={'grid_id': 'annotation_id'}) - table.to_csv(table_path, sep='\t', index=False) + table = pd.read_csv(table_path, sep="\t") + table = table.rename(columns={"grid_id": "annotation_id"}) + table.to_csv(table_path, sep="\t", index=False) def migrate_grid_spec(dataset_folder): @@ -78,17 +78,17 @@ def migrate_grid_spec(dataset_folder): See https://github.com/mobie/mobie-viewer-fiji/issues/343 for details """ ds_meta = metadata.read_dataset_metadata(dataset_folder) - views = ds_meta['views'] + views = ds_meta["views"] new_views = update_views(views) update_tables(new_views, dataset_folder) - ds_meta['views'] = new_views + ds_meta["views"] = new_views metadata.write_dataset_metadata(dataset_folder, ds_meta) - views_folder = os.path.join(dataset_folder, 'misc', 'views') - view_files = glob(os.path.join(views_folder, '*.json')) + views_folder = os.path.join(dataset_folder, "misc", "views") + view_files = glob(os.path.join(views_folder, "*.json")) for view_file in view_files: - with open(view_file, 'r') as f: - views = json.load(f)['views'] + with open(view_file, "r") as f: + views = json.load(f)["views"] new_views = update_views(views) update_tables(new_views, dataset_folder) - metadata.utils.write_metadata(view_file, {'views': new_views}) + metadata.utils.write_metadata(view_file, {"views": new_views}) diff --git a/mobie/tables/__init__.py b/mobie/tables/__init__.py index dd0de87..17e32d7 100644 --- a/mobie/tables/__init__.py +++ b/mobie/tables/__init__.py @@ -1,3 +1,3 @@ from .default_table import compute_default_table -from .source_annotation_table import compute_source_annotation_table, check_source_annotation_table +from .region_table import compute_region_table, check_region_table from .traces_table import compute_trace_default_table diff --git a/mobie/tables/source_annotation_table.py b/mobie/tables/region_table.py similarity index 92% rename from mobie/tables/source_annotation_table.py rename to mobie/tables/region_table.py index 0565243..f976c98 100644 --- a/mobie/tables/source_annotation_table.py +++ b/mobie/tables/region_table.py @@ -2,7 +2,7 @@ import pandas as pd -def compute_source_annotation_table(sources, table_path, **additional_columns): +def compute_region_table(sources, table_path, **additional_columns): first_col_name = "region_id" if isinstance(sources, list): @@ -23,7 +23,7 @@ def compute_source_annotation_table(sources, table_path, **additional_columns): table.to_csv(table_path, sep="\t", index=False, na_rep="nan") -def check_source_annotation_table(sources, table_path): +def check_region_table(sources, table_path): first_col_name = "region_id" table = pd.read_csv(table_path, sep="\t") diff --git a/mobie/validation/__init__.py b/mobie/validation/__init__.py index 19ede42..94a92d2 100644 --- a/mobie/validation/__init__.py +++ b/mobie/validation/__init__.py @@ -1,4 +1,5 @@ from .dataset import validate_dataset from .metadata import validate_source_metadata, validate_view_metadata from .project import validate_project +from .utils import validate_with_schema from .views import validate_views diff --git a/mobie/validation/dataset.py b/mobie/validation/dataset.py index 8d992aa..f4b3c83 100644 --- a/mobie/validation/dataset.py +++ b/mobie/validation/dataset.py @@ -42,7 +42,8 @@ def validate_dataset(dataset_folder, require_local_data=True, require_remote_dat desc=f"Check views for dataset {ds_name}" ): validate_view_metadata( - view, sources=all_sources, dataset_folder=dataset_folder, assert_true=assert_true + view, sources=all_sources, dataset_folder=dataset_folder, assert_true=assert_true, + dataset_metadata=dataset_metadata ) # check the (potential) additional view files @@ -53,7 +54,8 @@ def validate_dataset(dataset_folder, require_local_data=True, require_remote_dat views = json.load(f)["views"] for name, view in views.items(): validate_view_metadata( - view, sources=all_sources, dataset_folder=dataset_folder, assert_true=assert_true + view, sources=all_sources, dataset_folder=dataset_folder, assert_true=assert_true, + dataset_metadata=dataset_metadata ) diff --git a/mobie/validation/metadata.py b/mobie/validation/metadata.py index a919e1a..6574e80 100644 --- a/mobie/validation/metadata.py +++ b/mobie/validation/metadata.py @@ -1,15 +1,13 @@ import os -import json from glob import glob import numpy as np import pandas as pd -import s3fs from elf.io import open_file from jsonschema import ValidationError from pybdv.metadata import get_name, get_data_path -from .utils import _assert_true, _assert_equal, validate_with_schema +from .utils import _assert_true, _assert_equal, validate_with_schema, load_json_from_s3 from ..xml_utils import parse_s3_xml @@ -40,7 +38,7 @@ def _load_table(table_path): return pd.read_csv(table_path, sep="\t" if os.path.splitext(table_path)[1] == ".tsv" else ",") -def check_tables(table_folder, assert_true): +def check_segmentation_tables(table_folder, assert_true): msg = f"Could not find table root folder at {table_folder}" assert_true(os.path.isdir(table_folder), msg) @@ -64,27 +62,19 @@ def check_tables(table_folder, assert_true): def _check_bdv_n5_s3(xml, assert_true): path_in_bucket, server, bucket, _ = parse_s3_xml(xml) - address = os.path.join(server, bucket, path_in_bucket) + address = os.path.join(server, bucket, path_in_bucket, "attributes.json") try: - fs = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": server}) - store = s3fs.S3Map(root=os.path.join(bucket, path_in_bucket), s3=fs) - attrs = store["attributes.json"] + attrs = load_json_from_s3(address) except Exception: assert_true(False, f"Can't find bdv.n5.s3 file at {address}") - attrs = json.loads(attrs.decode("utf-8")) assert_true("n5" in attrs, "Invalid n5 file at {address}") def _check_ome_zarr_s3(address, name, assert_true, assert_equal): - server = "/".join(address.split("/")[:3]) - path = "/".join(address.split("/")[3:]) try: - fs = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": server}) - store = s3fs.S3Map(root=path, s3=fs) - attrs = store[".zattrs"] + attrs = load_json_from_s3(os.path.join(address, ".zattrs")) except Exception: - assert_true(False, f"Can't find ome.zarr..s3 file at {address}") - attrs = json.loads(attrs.decode("utf-8")) + assert_true(False, f"Can't find ome.zarr..s3file at {address}") ome_name = attrs["multiscales"][0]["name"] assert_equal(name, ome_name, f"Source name and name in ngff metadata don't match: {name} != {ome_name}") @@ -147,11 +137,12 @@ def validate_source_metadata(name, metadata, dataset_folder=None, if "tableData" in metadata: table_folder = os.path.join(dataset_folder, metadata["tableData"]["tsv"]["relativePath"]) - check_tables(table_folder, assert_true) + check_segmentation_tables(table_folder, assert_true) -def check_annotation_tables(table_folder, tables, assert_true): +def check_region_tables(table_folder, tables, assert_true, expected_col=None): ref_grid_ids = None + have_expected_col = False for table_name in tables: table_path = os.path.join(table_folder, table_name) msg = f"Table {table_path} does not exist." @@ -172,8 +163,29 @@ def check_annotation_tables(table_folder, tables, assert_true): msg = f"The grid ids for the table {table_path} are inconsistent with the grid ids in other tables" assert_true(np.array_equal(ref_grid_ids, this_grid_ids), msg) + if expected_col is not None: + have_expected_col = expected_col in table -def validate_view_metadata(view, sources=None, dataset_folder=None, assert_true=_assert_true): + if expected_col is not None: + msg = f"Could not find the expected column {expected_col} in any of the tables in {table_folder}" + assert_true(have_expected_col, msg) + + +def check_expected_column(table_folder, tables, expected_col, assert_true): + have_expected_col = False + for table_name in tables: + table_path = os.path.join(table_folder, table_name) + msg = f"Table {table_path} does not exist." + assert_true(os.path.exists(table_path), msg) + + table = _load_table(table_path) + have_expected_col = expected_col in table + + msg = f"Could not find the expected column {expected_col} in any of the tables in {table_folder}" + assert_true(have_expected_col, msg) + + +def validate_view_metadata(view, sources=None, dataset_folder=None, assert_true=_assert_true, dataset_metadata=None): # static validation with json schema try: validate_with_schema(view, "view") @@ -224,7 +236,7 @@ def validate_view_metadata(view, sources=None, dataset_folder=None, assert_true= msg = f"Found wrong sources {wrong_sources} in sourceDisplay" assert_true(len(wrong_sources) == 0, msg) - # dynamic validation of annotation tables + # dynamic validation of tables in region displays if displays is not None and dataset_folder is not None: for display in displays: display_type = list(display.keys())[0] @@ -232,5 +244,25 @@ def validate_view_metadata(view, sources=None, dataset_folder=None, assert_true= display_metadata = list(display.values())[0] table_folder = os.path.join(dataset_folder, display_metadata["tableData"]["tsv"]["relativePath"]) tables = display_metadata.get("tables") + color_by_col = display_metadata.get("colorByColumn", None) if tables is not None: - check_annotation_tables(table_folder, tables, assert_true) + check_region_tables(table_folder, tables, assert_true, color_by_col) + + # dynamic validation of tables in segmentation displays + if displays is not None and dataset_metadata is not None: + assert dataset_folder is not None + display_type = list(display.keys())[0] + if display_type == "segmentationDisplay": + display_metadata = list(display.values())[0] + color_by_col = display_metadata.get("colorByColumn", None) + if color_by_col is None: + return + tables = display_metadata.get("tables", None) + msg = f"colorByColumn is set to {color_by_col}, but no tables are set in the segmentation display" + assert_true(tables is not None, msg) + sources = display_metadata["sources"] + for source in sources: + table_folder = os.path.join( + dataset_folder, dataset_metadata["sources"][source]["tableData"]["tsv"]["relativePath"] + ) + check_expected_column(table_folder, tables, color_by_col, assert_true) diff --git a/mobie/validation/utils.py b/mobie/validation/utils.py index 6fb8eaa..dc36297 100644 --- a/mobie/validation/utils.py +++ b/mobie/validation/utils.py @@ -4,6 +4,7 @@ import jsonschema import requests +import s3fs SCHEMA_URLS = { @@ -54,6 +55,17 @@ def validate_with_schema(metadata, schema): jsonschema.validate(instance=metadata, schema=schema) +def load_json_from_s3(address): + server = "/".join(address.split("/")[:3]) + root_path = "/".join(address.split("/")[3:-1]) + fname = address.split("/")[-1] + fs = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": server}) + store = s3fs.S3Map(root=root_path, s3=fs) + attrs = store[fname] + attrs = json.loads(attrs.decode("utf-8")) + return attrs + + def _assert_equal(val, exp, msg=""): if val != exp: raise ValueError(msg) diff --git a/mobie/view_utils.py b/mobie/view_utils.py index 4778341..180ad3f 100644 --- a/mobie/view_utils.py +++ b/mobie/view_utils.py @@ -1,8 +1,158 @@ import argparse import json +import os import warnings + from . import metadata as mobie_metadata -from .validation import validate_view_metadata, validate_views +from .validation import validate_view_metadata, validate_views, validate_with_schema + +# +# view creation +# + + +def _create_view( + sources, all_sources, display_settings, source_transforms, viewer_transform, display_group_names, menu_name +): + all_source_names = set(all_sources.keys()) + source_types = [] + for source_list in sources: + + invalid_source_names = list(set(source_list) - all_source_names) + if invalid_source_names: + raise ValueError(f"Invalid source names: {invalid_source_names}") + + this_source_types = list(set( + [list(all_sources[source].keys())[0] for source in source_list] + )) + if len(this_source_types) > 1: + raise ValueError(f"Inconsistent source types: {this_source_types}") + source_types.append(this_source_types[0]) + + if display_group_names is None: + display_group_names = [f"{source_type}-group-{i}" for i, source_type in enumerate(source_types)] + + view = mobie_metadata.get_view( + display_group_names, source_types, + sources, display_settings, + is_exclusive=True, + menu_name=menu_name, + source_transforms=source_transforms, + viewer_transform=viewer_transform + ) + return view + + +def _write_view(dataset_folder, view_file, view_name, view, overwrite, return_view): + # we don't write the view, but return it + if return_view: + return view + # write the view to the dataset + elif view_file is None: + mobie_metadata.add_view_to_dataset(dataset_folder, view_name, view, overwrite=overwrite) + return + + # write the view to an external view file + if os.path.exists(view_file): + with open(view_file, "r") as f: + views = json.load(f)["views"] + else: + views = {} + + if view_name in views: + msg = f"The view {view_name} is alread present in {view_file}." + if overwrite: + warnings.warn(msg + " It will be over-written.") + else: + raise ValueError(msg) + + views[view_name] = view + with open(view_file, "w") as f: + json.dump({"views": views}, f) + + +def create_view( + dataset_folder, view_name, + sources, display_settings, + source_transforms=None, + viewer_transform=None, + display_group_names=None, + menu_name="bookmark", + overwrite=False, + view_file=None, + return_view=False, +): + """Add or update a view in dataset.json:views. + + Views can reproduce any given viewer state. + + Arguments: + dataset_folder [str] - path to the dataset folder + view_name [str] - name of the view + sources [list[list[str]]] - nested list of sources for this view. + Each inner list contains the sources for one of the source displays. + display_settings [list[dict]] - List of display settings for the source displays. + source_transforms [list[dict]] - List of source transformations. (default: None) + viewer_transform [dict] - the viewer transformation. (default:None) + display_group_names [list[str]] - the names for the source displays (default: None) + menu_name [str] - name for the menu where this view will be saved (default: bookmark) + overwrite [bool] - whether to overwrite existing views (default: False) + view_file [str] - name of the view file where this view should be saved. + By default it will be saved directly in the dataset metadata (default: None) + return_view [bool] - whether to return the created view instead of + saving it to the dataset or to an external view file (default: False) + """ + dataset_metadata = mobie_metadata.read_dataset_metadata(dataset_folder) + all_sources = dataset_metadata["sources"] + view = _create_view(sources, all_sources, display_settings, + source_transforms, viewer_transform, + display_group_names, menu_name=menu_name) + validate_with_schema(view, "view") + return _write_view(dataset_folder, view_file, view_name, view, + overwrite=overwrite, return_view=return_view) + + +def create_grid_view( + dataset_folder, view_name, sources, + table_folder=None, + display_groups=None, + display_group_settings=None, + positions=None, + menu_name="bookmark", + overwrite=False, + view_file=None, + return_view=False, +): + """ Add or update a grid view. + + Arguments: + dataset_folder [str] - path to the dataset folder + view_name [str] - name of the view + sources [list[list[str]]] - sources to be arranged in the grid + table_folder [str] - path to the table folder, relative to the dataset folder (default: None) + display_groups [dict[str, str] - (default: None) + display_group_settings [dict[str, dict]] - (default: None) + positions [list[list[int]]] - (default: None) + menu_name [str] - name of the menu from whil this view can be selected (default: bookmark) + overwrite [bool] - whether to overwrite existing view (default: False) + view_file [str] - name of the view file where this view should be saved. + By default it will be saved directly in the dataset metadata (default: None) + return_view [bool] - whether to return the created view instead of + saving it to the dataset or to an external view file (default: False) + """ + view = mobie_metadata.get_grid_view( + dataset_folder, view_name, sources, menu_name=menu_name, + table_folder=table_folder, display_groups=display_groups, + display_group_settings=display_group_settings, positions=positions + ) + validate_with_schema(view, "view") + return _write_view(dataset_folder, view_file, view_name, view, + overwrite=overwrite, return_view=return_view) + + +# +# view merging / combination +# def merge_view_file(dataset_folder, view_file, overwrite=False): diff --git a/test/metadata/test_bookmark_metadata.py b/test/metadata/test_bookmark_metadata.py deleted file mode 100644 index a02efa8..0000000 --- a/test/metadata/test_bookmark_metadata.py +++ /dev/null @@ -1,149 +0,0 @@ -import multiprocessing -import os -import unittest -from shutil import rmtree - -import numpy as np -from elf.io import open_file - -from mobie import add_image, add_segmentation -from mobie.metadata import read_dataset_metadata -from mobie.metadata.utils import read_metadata - - -# TODO add tests for source and viewer transformations -class TestBookmarkMetadata(unittest.TestCase): - test_folder = './test-folder' - root = './test-folder/data' - dataset_name = 'test' - raw_name = 'test-raw' - extra_name = 'extra-im' - seg_name = 'test-seg' - extra_seg_name = 'extra-seg' - shape = (16, 32, 32) - chunks = (8, 16, 16) - - def init_dataset(self): - data_path = os.path.join(self.test_folder, 'data.h5') - data_key = 'data' - with open_file(data_path, 'a') as f: - f.create_dataset(data_key, data=np.random.rand(*self.shape)) - - seg_path = os.path.join(self.test_folder, 'seg.h5') - with open_file(seg_path, 'a') as f: - f.create_dataset(data_key, data=np.random.randint(0, 100, size=self.shape)) - - scales = [[2, 2, 2]] - max_jobs = min(4, multiprocessing.cpu_count()) - - tmp_folder = os.path.join(self.test_folder, 'tmp-init-raw') - add_image(data_path, data_key, self.root, self.dataset_name, self.raw_name, - resolution=(1, 1, 1), chunks=self.chunks, scale_factors=scales, - tmp_folder=tmp_folder, max_jobs=max_jobs) - - tmp_folder = os.path.join(self.test_folder, 'tmp-init-extra') - add_image(data_path, data_key, self.root, self.dataset_name, self.extra_name, - resolution=(1, 1, 1), chunks=self.chunks, scale_factors=scales, - tmp_folder=tmp_folder, max_jobs=max_jobs) - - tmp_folder = os.path.join(self.test_folder, 'tmp-init-seg') - add_segmentation(seg_path, data_key, self.root, self.dataset_name, self.seg_name, - resolution=(1, 1, 1), chunks=self.chunks, scale_factors=scales, - tmp_folder=tmp_folder, max_jobs=max_jobs) - - tmp_folder = os.path.join(self.test_folder, 'tmp-init-extra_seg') - add_segmentation(seg_path, data_key, self.root, self.dataset_name, self.extra_seg_name, - resolution=(1, 1, 1), chunks=self.chunks, scale_factors=scales, - tmp_folder=tmp_folder, max_jobs=max_jobs) - - def setUp(self): - os.makedirs(self.test_folder, exist_ok=True) - self.init_dataset() - - def tearDown(self): - try: - rmtree(self.test_folder) - except OSError: - pass - - def test_add_dataset_bookmark(self): - from mobie.metadata import add_dataset_bookmark - - dataset_folder = os.path.join(self.root, self.dataset_name) - bookmark_name = 'my-bookmark' - - sources = [[self.raw_name], [self.seg_name]] - display_settings = [ - {"color": "white", "contrastLimits": [0., 1000.]}, - {"opacity": 0.8, "lut": "viridis", "colorByColumn": "n_pixels"} - ] - - add_dataset_bookmark(dataset_folder, bookmark_name, - sources, display_settings) - dataset_metadata = read_dataset_metadata(dataset_folder) - self.assertIn(bookmark_name, dataset_metadata["views"]) - - def test_add_additional_bookmark(self): - from mobie.metadata import add_additional_bookmark - - dataset_folder = os.path.join(self.root, self.dataset_name) - bookmark_file_name = "more-bookmarks.json" - bookmark_name = 'my-bookmark' - - sources = [[self.raw_name], [self.seg_name]] - display_settings = [ - {"color": "white", "contrastLimits": [0., 1000.]}, - {"opacity": 0.8, "lut": "viridis", "colorByColumn": "n_pixels"} - ] - - add_additional_bookmark(dataset_folder, bookmark_file_name, bookmark_name, - sources, display_settings) - - bookmark_file = os.path.join(dataset_folder, "misc", "views", bookmark_file_name) - self.assertTrue(os.path.exists(bookmark_file)) - bookmarks = read_metadata(bookmark_file)["views"] - self.assertIn(bookmark_name, bookmarks) - - def test_add_grid_bookmark(self): - from mobie.metadata import add_grid_bookmark - dataset_folder = os.path.join(self.root, self.dataset_name) - - # test vanilla grid bookmark - bookmark_name = 'simple-grid' - sources = [[self.raw_name, self.seg_name], [self.extra_name, self.extra_seg_name]] - add_grid_bookmark(dataset_folder, bookmark_name, sources) - dataset_metadata = read_dataset_metadata(dataset_folder) - self.assertIn(bookmark_name, dataset_metadata["views"]) - - # test bookmark with positions - bookmark_name = 'grid-with-pos' - sources = [[self.raw_name, self.seg_name], [self.extra_name, self.extra_seg_name]] - positions = [[0, 0], [1, 1]] - add_grid_bookmark(dataset_folder, bookmark_name, sources, - positions=positions) - dataset_metadata = read_dataset_metadata(dataset_folder) - self.assertIn(bookmark_name, dataset_metadata["views"]) - - # test bookmark with custom settings - bookmark_name = 'custom-setting-grid' - sources = [[self.raw_name, self.seg_name], [self.extra_name, self.extra_seg_name]] - display_groups = { - self.raw_name: 'ims1', - self.extra_name: 'ims2', - self.seg_name: 'segs', - self.extra_seg_name: 'segs' - } - display_group_settings = { - 'ims1': {'color': 'white', 'opacity': 1.}, - 'ims2': {'color': 'green', 'opacity': 0.75}, - 'segs': {'lut': 'glasbey', 'opacity': 0.6} - } - add_grid_bookmark(dataset_folder, bookmark_name, sources, - display_groups=display_groups, - display_group_settings=display_group_settings) - dataset_metadata = read_dataset_metadata(dataset_folder) - self.assertIn(bookmark_name, dataset_metadata["views"]) - - -if __name__ == '__main__': - unittest.main() diff --git a/test/test_view_utils.py b/test/test_view_utils.py index fc83acf..d86ccf7 100644 --- a/test/test_view_utils.py +++ b/test/test_view_utils.py @@ -8,6 +8,150 @@ import mobie import numpy as np +from elf.io import open_file +from mobie.metadata.utils import read_metadata + + +class TestViewCreation(unittest.TestCase): + test_folder = "./test-folder" + root = "./test-folder/data" + dataset_name = "test" + raw_name = "test-raw" + extra_name = "extra-im" + seg_name = "test-seg" + extra_seg_name = "extra-seg" + shape = (16, 32, 32) + chunks = (8, 16, 16) + + def init_dataset(self): + data_path = os.path.join(self.test_folder, "data.h5") + data_key = "data" + with open_file(data_path, "a") as f: + f.create_dataset(data_key, data=np.random.rand(*self.shape)) + + seg_path = os.path.join(self.test_folder, "seg.h5") + with open_file(seg_path, "a") as f: + f.create_dataset(data_key, data=np.random.randint(0, 100, size=self.shape)) + + scales = [[2, 2, 2]] + max_jobs = min(4, mp.cpu_count()) + + tmp_folder = os.path.join(self.test_folder, "tmp-init-raw") + mobie.add_image( + data_path, data_key, self.root, self.dataset_name, self.raw_name, + resolution=(1, 1, 1), chunks=self.chunks, scale_factors=scales, + tmp_folder=tmp_folder, max_jobs=max_jobs + ) + + tmp_folder = os.path.join(self.test_folder, "tmp-init-extra") + mobie.add_image( + data_path, data_key, self.root, self.dataset_name, self.extra_name, + resolution=(1, 1, 1), chunks=self.chunks, scale_factors=scales, + tmp_folder=tmp_folder, max_jobs=max_jobs + ) + + tmp_folder = os.path.join(self.test_folder, "tmp-init-seg") + mobie.add_segmentation( + seg_path, data_key, self.root, self.dataset_name, self.seg_name, + resolution=(1, 1, 1), chunks=self.chunks, scale_factors=scales, + tmp_folder=tmp_folder, max_jobs=max_jobs + ) + + tmp_folder = os.path.join(self.test_folder, "tmp-init-extra_seg") + mobie.add_segmentation( + seg_path, data_key, self.root, self.dataset_name, self.extra_seg_name, + resolution=(1, 1, 1), chunks=self.chunks, scale_factors=scales, + tmp_folder=tmp_folder, max_jobs=max_jobs + ) + + def setUp(self): + os.makedirs(self.test_folder, exist_ok=True) + self.init_dataset() + + def tearDown(self): + try: + rmtree(self.test_folder) + except OSError: + pass + + def test_create_view(self): + from mobie import create_view + + dataset_folder = os.path.join(self.root, self.dataset_name) + bookmark_name = "my-bookmark" + + sources = [[self.raw_name], [self.seg_name]] + display_settings = [ + {"color": "white", "contrastLimits": [0., 1000.]}, + {"opacity": 0.8, "lut": "viridis", "colorByColumn": "n_pixels"} + ] + + create_view(dataset_folder, bookmark_name, sources, display_settings) + dataset_metadata = mobie.metadata.read_dataset_metadata(dataset_folder) + self.assertIn(bookmark_name, dataset_metadata["views"]) + + def test_create_external_view(self): + from mobie import create_view + + dataset_folder = os.path.join(self.root, self.dataset_name) + bookmark_file_name = "more-bookmarks.json" + bookmark_name = "my-bookmark" + + sources = [[self.raw_name], [self.seg_name]] + display_settings = [ + {"color": "white", "contrastLimits": [0., 1000.]}, + {"opacity": 0.8, "lut": "viridis", "colorByColumn": "n_pixels"} + ] + + view_file = os.path.join(dataset_folder, "misc", "views", bookmark_file_name) + create_view( + dataset_folder, bookmark_name, sources, display_settings, view_file=view_file) + + self.assertTrue(os.path.exists(view_file)) + bookmarks = read_metadata(view_file)["views"] + self.assertIn(bookmark_name, bookmarks) + + def test_create_grid_view(self): + from mobie import create_grid_view + dataset_folder = os.path.join(self.root, self.dataset_name) + + # test vanilla grid bookmark + bookmark_name = "simple-grid" + sources = [[self.raw_name, self.seg_name], [self.extra_name, self.extra_seg_name]] + create_grid_view(dataset_folder, bookmark_name, sources) + dataset_metadata = mobie.metadata.read_dataset_metadata(dataset_folder) + self.assertIn(bookmark_name, dataset_metadata["views"]) + + # test bookmark with positions + bookmark_name = "grid-with-pos" + sources = [[self.raw_name, self.seg_name], [self.extra_name, self.extra_seg_name]] + positions = [[0, 0], [1, 1]] + create_grid_view(dataset_folder, bookmark_name, sources, positions=positions) + dataset_metadata = mobie.metadata.read_dataset_metadata(dataset_folder) + self.assertIn(bookmark_name, dataset_metadata["views"]) + + # test bookmark with custom settings + bookmark_name = "custom-setting-grid" + sources = [[self.raw_name, self.seg_name], [self.extra_name, self.extra_seg_name]] + display_groups = { + self.raw_name: "ims1", + self.extra_name: "ims2", + self.seg_name: "segs", + self.extra_seg_name: "segs" + } + display_group_settings = { + "ims1": {"color": "white", "opacity": 1.}, + "ims2": {"color": "green", "opacity": 0.75}, + "segs": {"lut": "glasbey", "opacity": 0.6} + } + create_grid_view( + dataset_folder, bookmark_name, sources, + display_groups=display_groups, + display_group_settings=display_group_settings + ) + dataset_metadata = mobie.metadata.read_dataset_metadata(dataset_folder) + self.assertIn(bookmark_name, dataset_metadata["views"]) + class TestViewUtils(unittest.TestCase): root = "./data"