Source code for neuronunit.neuroelectro

"""NeuronUnit interface to Neuroelectro.org.

Interface for creating tests using neuroelectro.org as reference data.

Example workflow:

x = NeuroElectroDataMap()
x.set_neuron(nlex_id='nifext_152')
# neurolex.org ID for 'Amygdala basolateral nucleus pyramidal neuron'.
x.set_ephysprop(id=23) # neuroelectro.org ID for 'Spike width'.
x.set_article(pmid=18667618) # Pubmed ID for Fajardo et al, 2008 (J. Neurosci.)
x.get_values() # Gets values for spike width from this paper.
width = x.val # Spike width reported in that paper.

t = neurounit.tests.SpikeWidthTest(spike_width=width)
c = sciunit.Candidate() # Instantiation of your model (or other candidate)
c.execute = code_that_runs_your_model
result = sciunit.run(t,m)
print result.score
#
# OR
#
x = NeuroElectroSummary()
x.set_neuron(nlex_id='nifext_152') # neurolex.org ID for 'Amygdala basolateral
                                   # nucleus pyramidal neuron'.
x.set_ephysprop(id=2) # neuroelectro.org ID for 'Spike width'.
x.get_values() # Gets values for spike width from this paper.
width = x.mean # Mean Spike width reported across all matching papers.
...
"""

import json
from pprint import pprint
import shelve
import hashlib
import pickle
import requests
try:  # Python 2
    from urllib import urlencode
    from urllib2 import urlopen, URLError
except ImportError:  # Python 3
    from urllib.parse import urlencode
    from urllib.request import urlopen, URLError

import numpy as np
DUMP = True

API_VERSION = 1
API_SUFFIX = '/api/%d/' % API_VERSION
DEVELOPER = False
if DEVELOPER:
    DOMAIN = 'http://localhost:8000'
else:
    DOMAIN = 'http://neuroelectro.org'
API_URL = DOMAIN+API_SUFFIX


[docs]def is_neuroelectro_up():
    """Check if neuroelectro.org is up."""
    url = "http://neuroelectro.org"
    request = requests.get(url)
    return request.status_code == 200


[docs]class NeuroElectroError(Exception):
    """Base class for NeuroElectro errors."""

    pass


[docs]class Neuron:
    """Describes a neuron type in NeuroElectro."""

    id = None
    nlex_id = None
    name = None


[docs]class EphysProp:
    """Describes an electrophysiolical property in NeuroElectro."""

    id = None
    nlex_id = None
    name = None


[docs]class Article:
    """Describes a journal article in NeuroElectro."""

    id = None
    pmid = None


[docs]class NeuroElectroData(object):
    """Abstract class based on neuroelectro.org data using that site's API."""

    def __init__(self, neuron=None, ephysprop=None,
                 get_values=False, cached=True):
        """Constructor.

        Args:
            neuron (dict): Dictionary of information about the neuron
            ephys_prop (dict): Dictionary of information about the
                               electrophysiological property
            get_values (bool): Whether to get the values from NeuroElectro
            cached (bool): Whether to use a cached value if it is available
        """
        self.neuron = Neuron()
        if neuron:
            for key, value in neuron.items():
                setattr(self.neuron, key, value)
        self.ephysprop = EphysProp()
        if ephysprop:
            for key, value in ephysprop.items():
                setattr(self.ephysprop, key, value)
        self.require_attrs = None
        self.get_one_match = True  # By default only get the first match
        self.cached = cached
        if get_values:
            self.get_values()

    url = API_URL  # Base URL.

[docs]    def set_names(self, neuron_name, ephysprop_name):
        """Set the names of the neurons (i.e. their types)."""
        self.set_neuron(name=neuron_name)
        self.set_ephysprop(name=ephysprop_name)

[docs]    def set_neuron(self, **kwargs):
        """Set the biological neuron lookup attributes."""
        for key, value in kwargs.items():
            if key in ['id', 'nlex_id', 'name']:
                setattr(self.neuron, key, value)

[docs]    def set_ephysprop(self, **kwargs):
        """Set the electrophysiological property lookup attributes."""
        for key, value in kwargs.items():
            if key in ['id', 'nlex_id', 'name']:
                setattr(self.ephysprop, key, value)

[docs]    def make_url(self, params=None):
        """Create the full URL to the neuroelectro API."""
        url = self.url+"?"
        query = {}
        # Change these for consistency in the neuroelectro.org API.
        query['n'] = self.neuron.id
        query['nlex'] = self.neuron.nlex_id
        query['n__name'] = self.neuron.name
        query['e'] = self.ephysprop.id
        query['e__name'] = self.ephysprop.name
        query = {key: value for key, value in query.items()
                 if value is not None}

        if params is not None:
            for key in params.keys():
                if params[key] is not None:
                    query[key] = params[key]

        url += urlencode(query)
        return url

[docs]    def get_json(self, params=None, quiet=False):
        """Get JSON data from neuroelectro.org.

        Data is based on the currently set neuron and ephys property.
        Use 'params' to constrain the data returned.
        """
        url = self.make_url(params=params)
        if not quiet:
            print(url)
        try:
            url_result = urlopen(url, None, 3)  # Get the page.
            html = url_result.read()  # Read out the HTML (actually JSON)
        except URLError as e:
            try:
                html = e.read().decode('utf-8')
                self.json_object = json.loads(html)
                if 'error_message' in self.json_object:
                    raise NeuroElectroError(self.json_object['error_message'])
            except:
                if hasattr(e, 'reason'):
                    raise NeuroElectroError(e.reason)
            raise NeuroElectroError("NeuroElectro.org appears to be down.")

        else:
            html = html.decode('utf-8')
            self.json_object = json.loads(html)
        return self.json_object

[docs]    def get_values(self, params=None, quiet=False):
        """Get values from neuroelectro.org.

        We will use 'params' in the future to specify metadata
        (e.g. temperature) that neuroelectro.org will provide.
        """
        db = shelve.open('neuroelectro-cache') if self.cached else {}
        contents = (self.__class__, self.neuron, self.ephysprop, params)
        if DUMP:
            pickled = pickle.dumps(contents)
        identifier = hashlib.sha224(pickled).hexdigest()
        if not quiet:
            print("Getting %s%s data values from neuroelectro.org"
                  % ("cached " if identifier in db else "",
                      self.ephysprop.name))
        if identifier in db:
            print("Using cached value.")
            self.json_object = json.loads(db[identifier])
        else:
            self.get_json(params=params, quiet=quiet)
            if DUMP:
                db[identifier] = json.dumps(self.json_object)
        if 'objects' in self.json_object:
            data = self.json_object['objects']
        else:
            data = None
        # All the summary matches in neuroelectro.org for this combination
        # of neuron and ephys property.
        if data:
            self.api_data = data[0] if self.get_one_match else data
        else:
            self.api_data = None
        # For now, we are just going to take the first match.
        # If neuron_id and ephysprop_id where both specified,
        # there should be only one anyway.
        if self.cached:
            db.close()
        return self.api_data

[docs]    def check(self):
        """See if data requested from the server were obtained successfully."""
        if self.require_attrs:
            for attr in self.require_attrs:
                if not hasattr(self, attr):
                    raise AttributeError(("The attribute '%s' was "
                                          "not found.") % attr)


[docs]class NeuroElectroDataMap(NeuroElectroData):
    """Class for getting single reported values from neuroelectro.org."""

    url = API_URL+'nedm/'
    article = Article()
    require_attrs = ['val', 'sem']

[docs]    def set_article(self, id_=None, pmid=None):
        """Set the biological neuron using a NeuroLex ID."""
        self.article.id = id_
        self.article.pmid = pmid

[docs]    def make_url(self, params=None):
        """Create the full URL to the neuroelectro API."""
        url = super(NeuroElectroDataMap, self).make_url(params=params)
        query = {}
        query['a'] = self.article.id
        query['pmid'] = self.article.pmid
        query = {key: value for key, value in query.items()
                 if value is not None}
        url += '&'+urlencode(query)
        return url

[docs]    def get_values(self, params=None, quiet=False):
        """Get values from neuroelectro.org.

        We will use 'params' in the future to specify metadata
        (e.g. temperature) that neuroelectro.org will provide.
        """
        data = super(NeuroElectroDataMap, self).get_values(params=params,
                                                           quiet=quiet)
        if data:
            self.neuron.name = data['ncm']['n']['name']
            # Set the neuron name from the json data.
            self.ephysprop.name = data['ecm']['e']['name']
            # Set the ephys property name from the json data.
            self.val = data['val']
            self.sem = data['err']
            self.n = data['n']
            self.check()
        return data


[docs]class NeuroElectroSummary(NeuroElectroData):
    """Class for getting summary values (across reports) from
    neuroelectro.org.
    """

    url = API_URL+'nes/'
    require_attrs = ['mean', 'std']

[docs]    def get_values(self, params=None, quiet=False):
        """Get values from neuroelectro.org.

        We will use 'params' in the future to specify metadata
        (e.g. temperature) that neuroelectro.org will provide.
        """
        data = super(NeuroElectroSummary, self).get_values(params=params,
                                                           quiet=quiet)
        if data:
            self.neuron.name = data['n']['name']
            # Set the neuron name from the json data.
            self.ephysprop.name = data['e']['name']
            # Set the ephys property name from the json data.
            self.mean = data['value_mean']
            self.std = data['value_sd']
            self.n = data['num_articles']
            self.check()
        return data

[docs]    def get_observation(self, params=None, show=False):
        """Get the observation from neuroelectro.org."""
        values = self.get_values(params=params)
        if show:
            pprint(values)
        observation = {'mean': self.mean, 'std': self.std}
        return observation


[docs]class NeuroElectroPooledSummary(NeuroElectroDataMap):
    """Class for getting summary values from neuroelectro.org.

    Values are computed by pooling each report's mean and std across reports.
    """

[docs]    def get_values(self, params=None, quiet=False):
        """Get all papers reporting the neuron's property value."""
        self.get_one_match = False  # We want all matches

        if params is None:
            params = {}

        params['limit'] = 999

        data = super(NeuroElectroPooledSummary, self).get_values(params=params,
                                                                 quiet=quiet)

        if data:
            # Ensure data from api matches the requested params
            data = [item for item in data
                    if (item['ecm']['e']['name'] == self.ephysprop.name.lower()
                        or
                        item['ecm']['e']['id'] == self.ephysprop.id)
                    and
                       (item['ncm']['n']['nlex_id'] == self.neuron.nlex_id
                        or
                        item['ncm']['n']['id'] == self.neuron.id)
                    ]

            # Set the neuron name and prop from the first json data object.
            self.neuron_name = data[0]['ncm']['n']['name']
            self.ephysprop_name = data[0]['ecm']['e']['name']

            # Pool each paper by weighing each mean by the paper's N and std
            stats = self.get_pooled_stats(data, quiet)

            self.mean = stats['mean']
            self.std = stats['std']
            self.n = stats['n']
            self.items = stats['items']

            # Needed by check()
            self.val = stats['mean']
            self.sem = stats['sem']

            self.check()

        else:
            raise RuntimeError("No data was returned by the NeuroElectro API")

        return self.items

[docs]    def get_observation(self, params=None, show=False):
        """Get the observation from neuroelectro.org."""
        values = self.get_values(params=params)

        if show:
            pprint(values)

        observation = {'mean': self.mean, 'std': self.std, 'n': self.n}

        return observation

[docs]    def get_pooled_stats(self, data, quiet=True):
        """Get pooled statistics from the data."""
        lines = []
        means = []
        sems = []
        stds = []
        ns = []
        sources = []

        if not quiet:
            print("Raw Values")

        # Collect raw values for each paper from NeuroElectro
        for item in data:

            err_is_sem = item['error_type'] == "sem"  # SEM or std
            err = (item['err_norm'] if item['err_norm'] is not None
                   else item['err'])

            sem = err if err_is_sem else None
            std = err if not err_is_sem else None
            mean = (item['val_norm'] if item['val_norm'] is not None
                    else item['val'])
            n = item['n']
            source = item['source']

            means.append(mean)
            sems.append(sem)
            stds.append(std)
            ns.append(n)
            sources.append(source)

            if not quiet:
                print({'mean': mean, 'std': std, 'sem': sem, 'n': n})

        # Fill in missing values
        self.fill_missing_ns(ns)
        self.fill_missing_sems_stds(sems, stds, ns)

        if not quiet:
            print("---------------------------------------------------")
            print("Filled in Values (computed or median where missing)")

            for i, _ in enumerate(means):
                line = {'mean': means[i], 'std': stds[i], 'sem': sems[i],
                        'n': ns[i], "source": sources[i]}
                lines.append(line)
                print(line)

        # Compute the weighted grand_mean
        # grand_mean  = SUM( N[i]*Mean[i] ) / SUM(N[i])
        # grand_std = SQRT( SUM( (N[i]-1)*std[i]^2 ) / SUM(N[i]-1) )

        ns_np = np.array(ns)
        means_np = np.array(means)
        stds_np = np.array(stds)
        n_sum = ns_np.sum()

        grand_mean = np.sum(ns_np * means_np) / n_sum
        grand_std = np.sqrt(np.sum((ns_np-1)*(stds_np**2))/np.sum(ns_np-1))
        grand_sem = grand_std / np.sqrt(n_sum)

        return {'mean': grand_mean, 'sem': grand_sem, 'std': grand_std,
                'n': n_sum, 'items': lines}

[docs]    def fill_missing_ns(self, ns):
        """Fill in the missing N's with median N."""
        none_free_ns = np.array(ns)[ns != np.array(None)]

        if none_free_ns:
            n_median = int(np.median(none_free_ns))
        else:
            n_median = 1  # If no N's reported at all, weigh all means equally

        for i, _ in enumerate(ns):
            if ns[i] is None:
                ns[i] = n_median

[docs]    def fill_missing_sems_stds(self, sems, stds, ns):
        """Fill in computable sems/stds."""
        for i, _ in enumerate(sems):

            # Check if sem or std is computable
            if sems[i] is None and stds[i] is not None:
                sems[i] = stds[i] / np.sqrt(ns[i])

            if stds[i] is None and sems[i] is not None:
                stds[i] = sems[i] * np.sqrt(ns[i])

        # Fill in the remaining missing using median std
        none_free_stds = np.array(stds)[stds != np.array(None)]

        if none_free_stds:
            std_median = np.median(none_free_stds)

        else:  # If no stds or SEMs reported at all, raise error

            # Perhaps the median std of all cells for this property could
            # be used. However, NE API nes interface does not support summary
            # prop values without specifying the neuron id
            msg = 'No StDevs or SEMs reported for "%s" property "%s"'
            msg = msg % (self.neuron_name, self.ephysprop_name)
            raise NotImplementedError(msg)

        for i, _ in enumerate(stds):
            if stds[i] is None:
                stds[i] = std_median
                sems[i] = std_median / np.sqrt(ns[i])
Source code for neuronunit.neuroelectro

NeuronUnit

Navigation

Related Topics