"""NeuronUnit interface to Neuroelectro.org.
Interface for creating tests using neuroelectro.org as reference data.
Example workflow:
x = NeuroElectroDataMap()
x.set_neuron(nlex_id='nifext_152')
# neurolex.org ID for 'Amygdala basolateral nucleus pyramidal neuron'.
x.set_ephysprop(id=23) # neuroelectro.org ID for 'Spike width'.
x.set_article(pmid=18667618) # Pubmed ID for Fajardo et al, 2008 (J. Neurosci.)
x.get_values() # Gets values for spike width from this paper.
width = x.val # Spike width reported in that paper.
t = neurounit.tests.SpikeWidthTest(spike_width=width)
c = sciunit.Candidate() # Instantiation of your model (or other candidate)
c.execute = code_that_runs_your_model
result = sciunit.run(t,m)
print result.score
#
# OR
#
x = NeuroElectroSummary()
x.set_neuron(nlex_id='nifext_152') # neurolex.org ID for 'Amygdala basolateral
# nucleus pyramidal neuron'.
x.set_ephysprop(id=2) # neuroelectro.org ID for 'Spike width'.
x.get_values() # Gets values for spike width from this paper.
width = x.mean # Mean Spike width reported across all matching papers.
...
"""
import json
from pprint import pprint
import shelve
import hashlib
import pickle
import requests
try: # Python 2
from urllib import urlencode
from urllib2 import urlopen, URLError
except ImportError: # Python 3
from urllib.parse import urlencode
from urllib.request import urlopen, URLError
import numpy as np
DUMP = True
API_VERSION = 1
API_SUFFIX = '/api/%d/' % API_VERSION
DEVELOPER = False
if DEVELOPER:
DOMAIN = 'http://localhost:8000'
else:
DOMAIN = 'http://neuroelectro.org'
API_URL = DOMAIN+API_SUFFIX
[docs]def is_neuroelectro_up():
"""Check if neuroelectro.org is up."""
url = "http://neuroelectro.org"
request = requests.get(url)
return request.status_code == 200
[docs]class NeuroElectroError(Exception):
"""Base class for NeuroElectro errors."""
pass
[docs]class Neuron:
"""Describes a neuron type in NeuroElectro."""
id = None
nlex_id = None
name = None
[docs]class EphysProp:
"""Describes an electrophysiolical property in NeuroElectro."""
id = None
nlex_id = None
name = None
[docs]class Article:
"""Describes a journal article in NeuroElectro."""
id = None
pmid = None
[docs]class NeuroElectroData(object):
"""Abstract class based on neuroelectro.org data using that site's API."""
def __init__(self, neuron=None, ephysprop=None,
get_values=False, cached=True):
"""Constructor.
Args:
neuron (dict): Dictionary of information about the neuron
ephys_prop (dict): Dictionary of information about the
electrophysiological property
get_values (bool): Whether to get the values from NeuroElectro
cached (bool): Whether to use a cached value if it is available
"""
self.neuron = Neuron()
if neuron:
for key, value in neuron.items():
setattr(self.neuron, key, value)
self.ephysprop = EphysProp()
if ephysprop:
for key, value in ephysprop.items():
setattr(self.ephysprop, key, value)
self.require_attrs = None
self.get_one_match = True # By default only get the first match
self.cached = cached
if get_values:
self.get_values()
url = API_URL # Base URL.
[docs] def set_names(self, neuron_name, ephysprop_name):
"""Set the names of the neurons (i.e. their types)."""
self.set_neuron(name=neuron_name)
self.set_ephysprop(name=ephysprop_name)
[docs] def set_neuron(self, **kwargs):
"""Set the biological neuron lookup attributes."""
for key, value in kwargs.items():
if key in ['id', 'nlex_id', 'name']:
setattr(self.neuron, key, value)
[docs] def set_ephysprop(self, **kwargs):
"""Set the electrophysiological property lookup attributes."""
for key, value in kwargs.items():
if key in ['id', 'nlex_id', 'name']:
setattr(self.ephysprop, key, value)
[docs] def make_url(self, params=None):
"""Create the full URL to the neuroelectro API."""
url = self.url+"?"
query = {}
# Change these for consistency in the neuroelectro.org API.
query['n'] = self.neuron.id
query['nlex'] = self.neuron.nlex_id
query['n__name'] = self.neuron.name
query['e'] = self.ephysprop.id
query['e__name'] = self.ephysprop.name
query = {key: value for key, value in query.items()
if value is not None}
if params is not None:
for key in params.keys():
if params[key] is not None:
query[key] = params[key]
url += urlencode(query)
return url
[docs] def get_json(self, params=None, quiet=False):
"""Get JSON data from neuroelectro.org.
Data is based on the currently set neuron and ephys property.
Use 'params' to constrain the data returned.
"""
url = self.make_url(params=params)
if not quiet:
print(url)
try:
url_result = urlopen(url, None, 3) # Get the page.
html = url_result.read() # Read out the HTML (actually JSON)
except URLError as e:
try:
html = e.read().decode('utf-8')
self.json_object = json.loads(html)
if 'error_message' in self.json_object:
raise NeuroElectroError(self.json_object['error_message'])
except:
if hasattr(e, 'reason'):
raise NeuroElectroError(e.reason)
raise NeuroElectroError("NeuroElectro.org appears to be down.")
else:
html = html.decode('utf-8')
self.json_object = json.loads(html)
return self.json_object
[docs] def get_values(self, params=None, quiet=False):
"""Get values from neuroelectro.org.
We will use 'params' in the future to specify metadata
(e.g. temperature) that neuroelectro.org will provide.
"""
db = shelve.open('neuroelectro-cache') if self.cached else {}
contents = (self.__class__, self.neuron, self.ephysprop, params)
if DUMP:
pickled = pickle.dumps(contents)
identifier = hashlib.sha224(pickled).hexdigest()
if not quiet:
print("Getting %s%s data values from neuroelectro.org"
% ("cached " if identifier in db else "",
self.ephysprop.name))
if identifier in db:
print("Using cached value.")
self.json_object = json.loads(db[identifier])
else:
self.get_json(params=params, quiet=quiet)
if DUMP:
db[identifier] = json.dumps(self.json_object)
if 'objects' in self.json_object:
data = self.json_object['objects']
else:
data = None
# All the summary matches in neuroelectro.org for this combination
# of neuron and ephys property.
if data:
self.api_data = data[0] if self.get_one_match else data
else:
self.api_data = None
# For now, we are just going to take the first match.
# If neuron_id and ephysprop_id where both specified,
# there should be only one anyway.
if self.cached:
db.close()
return self.api_data
[docs] def check(self):
"""See if data requested from the server were obtained successfully."""
if self.require_attrs:
for attr in self.require_attrs:
if not hasattr(self, attr):
raise AttributeError(("The attribute '%s' was "
"not found.") % attr)
[docs]class NeuroElectroDataMap(NeuroElectroData):
"""Class for getting single reported values from neuroelectro.org."""
url = API_URL+'nedm/'
article = Article()
require_attrs = ['val', 'sem']
[docs] def set_article(self, id_=None, pmid=None):
"""Set the biological neuron using a NeuroLex ID."""
self.article.id = id_
self.article.pmid = pmid
[docs] def make_url(self, params=None):
"""Create the full URL to the neuroelectro API."""
url = super(NeuroElectroDataMap, self).make_url(params=params)
query = {}
query['a'] = self.article.id
query['pmid'] = self.article.pmid
query = {key: value for key, value in query.items()
if value is not None}
url += '&'+urlencode(query)
return url
[docs] def get_values(self, params=None, quiet=False):
"""Get values from neuroelectro.org.
We will use 'params' in the future to specify metadata
(e.g. temperature) that neuroelectro.org will provide.
"""
data = super(NeuroElectroDataMap, self).get_values(params=params,
quiet=quiet)
if data:
self.neuron.name = data['ncm']['n']['name']
# Set the neuron name from the json data.
self.ephysprop.name = data['ecm']['e']['name']
# Set the ephys property name from the json data.
self.val = data['val']
self.sem = data['err']
self.n = data['n']
self.check()
return data
[docs]class NeuroElectroSummary(NeuroElectroData):
"""Class for getting summary values (across reports) from
neuroelectro.org.
"""
url = API_URL+'nes/'
require_attrs = ['mean', 'std']
[docs] def get_values(self, params=None, quiet=False):
"""Get values from neuroelectro.org.
We will use 'params' in the future to specify metadata
(e.g. temperature) that neuroelectro.org will provide.
"""
data = super(NeuroElectroSummary, self).get_values(params=params,
quiet=quiet)
if data:
self.neuron.name = data['n']['name']
# Set the neuron name from the json data.
self.ephysprop.name = data['e']['name']
# Set the ephys property name from the json data.
self.mean = data['value_mean']
self.std = data['value_sd']
self.n = data['num_articles']
self.check()
return data
[docs] def get_observation(self, params=None, show=False):
"""Get the observation from neuroelectro.org."""
values = self.get_values(params=params)
if show:
pprint(values)
observation = {'mean': self.mean, 'std': self.std}
return observation
[docs]class NeuroElectroPooledSummary(NeuroElectroDataMap):
"""Class for getting summary values from neuroelectro.org.
Values are computed by pooling each report's mean and std across reports.
"""
[docs] def get_values(self, params=None, quiet=False):
"""Get all papers reporting the neuron's property value."""
self.get_one_match = False # We want all matches
if params is None:
params = {}
params['limit'] = 999
data = super(NeuroElectroPooledSummary, self).get_values(params=params,
quiet=quiet)
if data:
# Ensure data from api matches the requested params
data = [item for item in data
if (item['ecm']['e']['name'] == self.ephysprop.name.lower()
or
item['ecm']['e']['id'] == self.ephysprop.id)
and
(item['ncm']['n']['nlex_id'] == self.neuron.nlex_id
or
item['ncm']['n']['id'] == self.neuron.id)
]
# Set the neuron name and prop from the first json data object.
self.neuron_name = data[0]['ncm']['n']['name']
self.ephysprop_name = data[0]['ecm']['e']['name']
# Pool each paper by weighing each mean by the paper's N and std
stats = self.get_pooled_stats(data, quiet)
self.mean = stats['mean']
self.std = stats['std']
self.n = stats['n']
self.items = stats['items']
# Needed by check()
self.val = stats['mean']
self.sem = stats['sem']
self.check()
else:
raise RuntimeError("No data was returned by the NeuroElectro API")
return self.items
[docs] def get_observation(self, params=None, show=False):
"""Get the observation from neuroelectro.org."""
values = self.get_values(params=params)
if show:
pprint(values)
observation = {'mean': self.mean, 'std': self.std, 'n': self.n}
return observation
[docs] def get_pooled_stats(self, data, quiet=True):
"""Get pooled statistics from the data."""
lines = []
means = []
sems = []
stds = []
ns = []
sources = []
if not quiet:
print("Raw Values")
# Collect raw values for each paper from NeuroElectro
for item in data:
err_is_sem = item['error_type'] == "sem" # SEM or std
err = (item['err_norm'] if item['err_norm'] is not None
else item['err'])
sem = err if err_is_sem else None
std = err if not err_is_sem else None
mean = (item['val_norm'] if item['val_norm'] is not None
else item['val'])
n = item['n']
source = item['source']
means.append(mean)
sems.append(sem)
stds.append(std)
ns.append(n)
sources.append(source)
if not quiet:
print({'mean': mean, 'std': std, 'sem': sem, 'n': n})
# Fill in missing values
self.fill_missing_ns(ns)
self.fill_missing_sems_stds(sems, stds, ns)
if not quiet:
print("---------------------------------------------------")
print("Filled in Values (computed or median where missing)")
for i, _ in enumerate(means):
line = {'mean': means[i], 'std': stds[i], 'sem': sems[i],
'n': ns[i], "source": sources[i]}
lines.append(line)
print(line)
# Compute the weighted grand_mean
# grand_mean = SUM( N[i]*Mean[i] ) / SUM(N[i])
# grand_std = SQRT( SUM( (N[i]-1)*std[i]^2 ) / SUM(N[i]-1) )
ns_np = np.array(ns)
means_np = np.array(means)
stds_np = np.array(stds)
n_sum = ns_np.sum()
grand_mean = np.sum(ns_np * means_np) / n_sum
grand_std = np.sqrt(np.sum((ns_np-1)*(stds_np**2))/np.sum(ns_np-1))
grand_sem = grand_std / np.sqrt(n_sum)
return {'mean': grand_mean, 'sem': grand_sem, 'std': grand_std,
'n': n_sum, 'items': lines}
[docs] def fill_missing_ns(self, ns):
"""Fill in the missing N's with median N."""
none_free_ns = np.array(ns)[ns != np.array(None)]
if none_free_ns:
n_median = int(np.median(none_free_ns))
else:
n_median = 1 # If no N's reported at all, weigh all means equally
for i, _ in enumerate(ns):
if ns[i] is None:
ns[i] = n_median
[docs] def fill_missing_sems_stds(self, sems, stds, ns):
"""Fill in computable sems/stds."""
for i, _ in enumerate(sems):
# Check if sem or std is computable
if sems[i] is None and stds[i] is not None:
sems[i] = stds[i] / np.sqrt(ns[i])
if stds[i] is None and sems[i] is not None:
stds[i] = sems[i] * np.sqrt(ns[i])
# Fill in the remaining missing using median std
none_free_stds = np.array(stds)[stds != np.array(None)]
if none_free_stds:
std_median = np.median(none_free_stds)
else: # If no stds or SEMs reported at all, raise error
# Perhaps the median std of all cells for this property could
# be used. However, NE API nes interface does not support summary
# prop values without specifying the neuron id
msg = 'No StDevs or SEMs reported for "%s" property "%s"'
msg = msg % (self.neuron_name, self.ephysprop_name)
raise NotImplementedError(msg)
for i, _ in enumerate(stds):
if stds[i] is None:
stds[i] = std_median
sems[i] = std_median / np.sqrt(ns[i])