import json
from dandi.dandiapi import DandiAPIClient
from tqdm.notebook import tqdm
client = DandiAPIClient()
dandisets = list(client.get_dandisets())
Identify NWB dandisets¶
Most dandisets hold NWB-formatted data, but DANDI also hold data of other formats.
Let's start by filtering down to only the dandisets that contain at least one NWB file.
We can do this by querying the metadata of each dandiset, which tells us the data formats within in raw_metadata["assetsSummary"]["dataStandard"]
.
If no data has been uploaded to that dandiset, the "dataStandard" field is not present.
We handle this by using the .get
method to iterate over an empty list.
nwb_dandisets = []
for dandiset in tqdm(dandisets):
raw_metadata = dandiset.get_raw_metadata()
if any(
"NWB" in data_standard["name"]
for data_standard in raw_metadata["assetsSummary"].get("dataStandard", [])
):
nwb_dandisets.append(dandiset)
print(f"There are currently {len(nwb_dandisets)} NWB datasets on DANDI!")
0%| | 0/465 [00:00<?, ?it/s]
There are currently 277 NWB datasets on DANDI!
Filtering dandisets: species¶
Let's use the nwb_dandisets
list from the previous recipe and see which of them used mice in their study.
You can find this information in raw_metadata["assetsSummary"]["species"]
.
We'll use the same .get
trick as above for if no data has been uploaded.
mouse_nwb_dandisets = []
for dandiset in tqdm(nwb_dandisets):
raw_metadata = dandiset.get_raw_metadata()
if any(
"mouse" in species["name"]
for species in raw_metadata["assetsSummary"].get("species", [])
):
mouse_nwb_dandisets.append(dandiset)
print(f"There are currently {len(mouse_nwb_dandisets)} NWB datasets on DANDI that use mice!")
0%| | 0/277 [00:00<?, ?it/s]
There are currently 118 NWB datasets on DANDI that use mice!
Filtering by session: species and sex¶
Let's say you have identified a dandiset of interest, "000005", and you want to identify all of the sessions on female mice.
You can do this by querying asset-level metadata.
Assets correspond to individual NWB files, and contain metadata extracted from those files.
The metadata of each asset contains a .wasAttributedTo
attribute, which is a list of Participant
objects corresponding to the subjects for that session.
We do that by first testing that attribute exists (is not None
- some older dandisets may not have included it) and then checking the value of its name
parameter.
dandiset = client.get_dandiset("000005")
female_mouse_nwb_sessions = []
assets = list(dandiset.get_assets())
for asset in tqdm(assets):
asset_metadata = asset.get_metadata()
subjects = asset_metadata.wasAttributedTo
if any(
subject.species and "mouse" in subject.species.name.lower()
and subject.sex and subject.sex.name == "Female"
for subject in subjects
):
female_mouse_nwb_sessions.append(asset)
print(f"Dandiset #5 has {len(female_mouse_nwb_sessions)} out of {len(assets)} files that use female mice!")
0%| | 0/148 [00:00<?, ?it/s]
Dandiset #5 has 69 out of 148 files that use female mice!
Going beyond¶
These examples show a few types of queries, but since the metadata structures are quite rich on both the dandiset and asset levels, they enable many complex queries beyond the examples here.
These metadata structures are also expanding over time as DANDI becomes more strict about what counts as essential metadata.
The .get_raw_metadata
method of both client.get_dandiset(...)
and client.get_dandiset(...).get_assets()
provides a nice view into the available fields.
Note: for any attribute, it is recommended to first check that it is not None
before checking for its value.
print(json.dumps(dandisets[0].get_raw_metadata(), indent=4))
{ "id": "DANDI:000003/0.230629.1955", "doi": "10.48324/dandi.000003/0.230629.1955", "url": "https://dandiarchive.org/dandiset/000003/0.230629.1955", "name": "Physiological Properties and Behavioral Correlates of Hippocampal Granule Cells and Mossy Cells", "about": [ { "name": "hippocampus", "schemaKey": "Anatomy", "identifier": "UBERON:0002421" } ], "access": [ { "status": "dandi:OpenAccess", "schemaKey": "AccessRequirements", "contactPoint": { "email": "petersen.peter@gmail.com", "schemaKey": "ContactPoint" } } ], "license": [ "spdx:CC-BY-4.0" ], "version": "0.230629.1955", "@context": "https://raw.githubusercontent.com/dandi/schema/master/releases/0.6.0/context.json", "citation": "Senzai, Yuta; Fernandez-Ruiz, Antonio; Buzs\u00e1ki, Gy\u00f6rgy (2023) Physiological Properties and Behavioral Correlates of Hippocampal Granule Cells and Mossy Cells (Version 0.230629.1955) [Data set]. DANDI archive. https://doi.org/10.48324/dandi.000003/0.230629.1955", "keywords": [ "cell types", "current source density", "laminar recordings", "oscillations", "mossy cells", "granule cells", "optogenetics" ], "protocol": [], "schemaKey": "Dandiset", "identifier": "DANDI:000003", "repository": "https://dandiarchive.org", "contributor": [ { "name": "Senzai, Yuta", "roleName": [ "dcite:Author", "dcite:ContactPerson", "dcite:DataCollector", "dcite:FormalAnalysis" ], "schemaKey": "Person", "affiliation": [], "includeInCitation": true }, { "name": "Fernandez-Ruiz, Antonio", "roleName": [ "dcite:Author", "dcite:FormalAnalysis" ], "schemaKey": "Person", "identifier": "0000-0001-8481-0796", "affiliation": [], "includeInCitation": true }, { "name": "Buzs\u00e1ki, Gy\u00f6rgy", "roleName": [ "dcite:Author" ], "schemaKey": "Person", "identifier": "0000-0002-3100-4800", "affiliation": [ { "name": "New York University Langone Medical Center", "schemaKey": "Affiliation", "identifier": "https://ror.org/005dvqh91" } ], "includeInCitation": true }, { "url": "https://www.nih.gov/", "name": "National Institutes of Health", "roleName": [ "dcite:Funder" ], "schemaKey": "Organization", "identifier": "https://ror.org/01cwqze88", "awardNumber": "NIHMH54671, MH107396, NS090583", "contactPoint": [], "includeInCitation": false }, { "url": "http://hnf.jp/", "name": "Nakajima Foundation", "roleName": [ "dcite:Funder" ], "schemaKey": "Organization", "identifier": "https://ror.org/000k40t10", "contactPoint": [], "includeInCitation": false }, { "url": "http://www.nsf.gov/", "name": "National Science Foundation", "roleName": [ "dcite:Funder" ], "schemaKey": "Organization", "identifier": "https://ror.org/021nxhr62", "awardNumber": "PIRE", "contactPoint": [], "includeInCitation": false }, { "url": "https://www.simonsfoundation.org", "name": "Simons Foundation", "roleName": [ "dcite:Funder" ], "schemaKey": "Organization", "identifier": "https://ror.org/01cmst727", "contactPoint": [], "includeInCitation": false } ], "dateCreated": "2020-03-15T22:56:55.655000+00:00", "description": "Data from \"Physiological Properties and Behavioral Correlates of Hippocampal Granule Cells and Mossy Cells\" Senzai, Buzsaki, Neuron 2017. Electrophysiology recordings of hippocampus during theta maze exploration.", "publishedBy": { "id": "urn:uuid:1d146605-5d67-4517-91b4-51cd3b9a5cf3", "name": "DANDI publish", "endDate": "2023-06-29T19:55:30.807473+00:00", "schemaKey": "PublishActivity", "startDate": "2023-06-29T19:55:30.807473+00:00", "wasAssociatedWith": [ { "id": "urn:uuid:0fca6ff4-84ca-42fe-bbed-85d3fb4291f0", "name": "DANDI API", "version": "0.1.0", "schemaKey": "Software", "identifier": "RRID:SCR_017571" } ] }, "studyTarget": [], "assetsSummary": { "species": [ { "name": "House mouse", "schemaKey": "SpeciesType", "identifier": "http://purl.obolibrary.org/obo/NCBITaxon_10090" } ], "approach": [ { "name": "electrophysiological approach", "schemaKey": "ApproachType" }, { "name": "behavioral approach", "schemaKey": "ApproachType" } ], "schemaKey": "AssetsSummary", "dataStandard": [ { "name": "Neurodata Without Borders (NWB)", "schemaKey": "StandardsType", "identifier": "RRID:SCR_015242" } ], "numberOfBytes": 2559248010229, "numberOfFiles": 101, "numberOfSubjects": 16, "variableMeasured": [ "DecompositionSeries", "LFP", "Units", "Position", "ElectricalSeries" ], "measurementTechnique": [ { "name": "signal filtering technique", "schemaKey": "MeasurementTechniqueType" }, { "name": "fourier analysis technique", "schemaKey": "MeasurementTechniqueType" }, { "name": "spike sorting technique", "schemaKey": "MeasurementTechniqueType" }, { "name": "behavioral technique", "schemaKey": "MeasurementTechniqueType" }, { "name": "multi electrode extracellular electrophysiology recording technique", "schemaKey": "MeasurementTechniqueType" } ] }, "datePublished": "2023-06-29T19:55:30.807473+00:00", "schemaVersion": "0.6.0", "ethicsApproval": [], "wasGeneratedBy": [], "relatedResource": [ { "url": "https://doi.org/10.1016/j.neuron.2016.12.011", "name": "Physiological Properties and Behavioral Correlates of Hippocampal Granule Cells and Mossy Cells", "relation": "dcite:IsDescribedBy", "schemaKey": "Resource", "identifier": "doi:10.1016/j.neuron.2016.12.011" } ], "manifestLocation": [ "https://dandiarchive.s3.amazonaws.com/dandisets/000003/0.230629.1955/assets.yaml" ] }