Source code for abagen.mouse.mouse

# -*- coding: utf-8 -*-
"""
Functions to fetch mouse unionization (i.e., expression) data
"""

import itertools

from nibabel.volumeutils import Recoder
import numpy as np
import pandas as pd

from .io import fetch_allenref_structures, fetch_rubinov2015_structures
from .utils import _coerce_inputs, _make_api_query

# available attributes of unionization query
_UNIONIZATION_ATTRIBUTES = [
    'expression_density',
    'expression_energy',
    'sum_expressing_pixel_intensity',
    'sum_expressing_pixels',
    'sum_pixel_intensity',
    'sum_pixels',
    'voxel_energy_cv',
    'voxel_energy_mean'
]


def _get_experiments_from_gene(id=None, acronym=None, name=None,
                               slicing_direction='sagittal', verbose=False):
    """
    Fetches experiment IDs associated with specified gene(s)

    One of `id`, `acronym`, or `name` must be provided.

    Parameters
    ----------
    id : int, optional
        Numerical gene ID
    acronym : str, optional
        Short-form gene acronym (case sensitive)
    name : str, optional
        Full gene name (case sensitive)
    slicing_direction : {'sagittal', 'coronal'}, optional
        Slicing direction of brain tissue
    verbose : bool, optional
        Whether to print status messages. Default: False

    Returns
    -------
    experiments : list of int
        List of experiment IDs that probed the specified gene(s)
    """

    directions = ['sagittal', 'coronal']
    if slicing_direction not in directions:
        raise ValueError('Slicing_direction {} is invalid. Must be in {}.'
                         .format(slicing_direction, directions))

    criteria = [
        '[failed$eqfalse]',
        'products[id$eq1]',
        'genes{}'.format(_coerce_inputs(id=id, acronym=acronym, name=name)),
        'plane_of_section[name$eq{}]'.format(slicing_direction)
    ]

    info = _make_api_query('SectionDataSet', criteria=criteria,
                           attributes='data_sets.id', verbose=verbose)

    return [exp['id'] for exp in info]


def _get_unionization_from_experiment(experiment_id, structures=None,
                                      attributes=None, average=True,
                                      verbose=False):
    """
    Gets unionization data for provided experiment(s) `experiment_id`

    Parameters
    ----------
    experiment_id : int or list
        Numerical experiment ID. If multiple experiments are provided the
        requested `attributes` will be averaged across experiments
    structures : list, optional
        List of structures (id, acronym, or name) for which to get unionization
        information associated with provided `experiment_id`. If not specified
        uses structures documented in [MI1]_. Specifying either the id or name
        is recommended as acronyms are not unique to structures. Default: None
    attributes : str or list, optional
        Which attributes / information to obtain for the provided structure.
        See :func:`abagen.mouse.available_unionization_info` for list of
        available attributes to request. If not specified all available
        attributes will be returned. Default: None
    average : bool, optional
        Whether to average across experiments if `experiment_id` is provided as
        a list. Only experiments probing the same gene will be considered for
        averaging. Default: True
    verbose : bool, optional
        Whether to print status messages. Default: False

    Returns
    -------
    unionization : pandas.DataFrame
        Where columns are unionization attributes and the index corresponds to
        gene ids and strucuture ids
    """

    if isinstance(experiment_id, (str, int)):
        experiment_id = [experiment_id]

    if structures is None:
        # read default structure list (from Rubinov et al., 2015, PNAS)
        structures = fetch_rubinov2015_structures(entry_type='id')
    elif isinstance(structures, (str, int)):
        structures = [structures]

    # we need to coerce all provided structures to be integer ids, NOT strings
    # so fetch all available structures then recode them to ids
    if any(isinstance(f, str) for f in structures):
        structs = np.asarray(fetch_allenref_structures(verbose=False))
        structs = Recoder(structs.tolist(), fields=['acronym', 'id', 'name'])
        structures = list(set(structs.id.get(f) for f in structures))

    # determine which attributes to request; if we don't have to request all
    # of them then we can speed up the API call
    if attributes is None:
        attributes = ['expression_density']
    elif attributes == 'all':
        attributes = _UNIONIZATION_ATTRIBUTES
    elif isinstance(attributes, str):
        attributes = [attributes]

    includes = [
        'structure_unionizes', 'genes'
    ]
    criteria = [
        '[id$in{}]'.format(
            ','.join([str(f) for f in experiment_id])
        ),
        'products[id$eq1]',
        'structure_unionizes[structure_id$in{}]'.format(
            ','.join([str(f) for f in structures])
        ),
    ]
    req_attributes = [
        'id', 'structure_unionizes', 'structure_unionizes.structure_id'
    ] + [
        'structure_unionizes.' + attr for attr in attributes
    ]

    info = _make_api_query('SectionDataSet', includes=includes,
                           criteria=criteria, attributes=req_attributes,
                           verbose=verbose)

    for n, exp in enumerate(info):
        keep = exp['structure_unionizes']
        for struc in keep:
            struc['gene_id'] = exp['genes'][0]['id']
            struc['experiment_id'] = exp['id']
        info[n] = keep

    # construct data frame from requested unionization info
    info = pd.DataFrame(list(itertools.chain.from_iterable(info)))
    if average:
        info = info.groupby(['gene_id', 'structure_id']).mean()
    else:
        info = info.set_index(['gene_id', 'experiment_id', 'structure_id'])

    return info[attributes]


[docs]def available_unionization_info():
    """ Lists attributes for :func:`abagen.mouse.get_unionization_from_gene`
    """

    return _UNIONIZATION_ATTRIBUTES


[docs]def get_unionization_from_gene(id=None, acronym=None, name=None,
                               slicing_direction='sagittal', structures=None,
                               attributes=None, average=True, verbose=False):
    """
    Gets unionization data for provided gene(s)

    One of `id`, `acronym`, or `name` must be provided.

    Parameters
    ----------
    id : int, optional
        Numerical gene ID
    acronym : str, optional
        Short-form gene acronym (case sensitive)
    name : str, optional
        Full gene name (case sensitive)
    slicing_direction : {'sagittal', 'coronal'}, optional
        Slicing direction of brain tissue
    structures : list, optional
        List of structures (id, acronym, or name) for which to get unionization
        information associated with provided `experiment_id`. If not specified
        uses structures documented in [MI1]_. Specifying either the id or name
        is recommended as acronyms are not unique to structures. Default: None
    attributes : str or list, optional
        Which attributes / information to obtain for the provided gene. See
        :func:`abagen.mouse.available_gene_info` for list of available
        attributes to request. If not specified then only 'expression_density'
        will be returned. Specifying 'all' will return all information.
        Default: None
    average : bool, optional
        Whether to average across experiments if there are multiple experiments
        corresponding to any provided gene(s). Only experiments probing the
        same gene will be considered for averaging, and distinct structures
        will be retained. Default: True
    verbose : bool, optional
        Whether to print status messages. Default: False

    Returns
    -------
    unionization : pandas.DataFrame
        Where columns are unionization attributes and the index corresponds to
        strucuture and gene ids (if `experiments` is provided as a list
        with multiple genes). If `average=False`, `experiments` will also be a
        level in index

    Examples
    --------
    >>> from abagen import mouse
    >>> mouse.get_unionization_from_gene(acronym='Pdyn',
    ...                                  structures=[22, 31])  # doctest: +NORMALIZE_WHITESPACE
                          expression_density
    gene_id structure_id
    18376   22                      0.024840
            31                      0.017199
    >>> mouse.get_unionization_from_gene(acronym=['Ace', 'Cd99'],
    ...                                  structures=[22, 31])  # doctest: +NORMALIZE_WHITESPACE
                          expression_density
    gene_id structure_id
    11210   22                      0.001283
            31                      0.001427
    163028  22                      0.067537
            31                      0.056442
    """  # noqa

    directions = ['sagittal', 'coronal']
    if slicing_direction not in directions:
        raise ValueError('Slicing_direction {} is invalid. Must be in {}.'
                         .format(slicing_direction, directions))

    if structures is None:
        # read default structure list (from Rubinov et al., 2015, PNAS)
        structures = fetch_rubinov2015_structures(entry_type='id')
    elif isinstance(structures, (str, int)):
        structures = [structures]

    # we need to coerce all provided structures to be integer ids, NOT strings
    # so fetch all available structures then recode them to ids
    if any(isinstance(f, str) for f in structures):
        structs = np.asarray(fetch_allenref_structures(verbose=False))
        structs = Recoder(structs.tolist(), fields=['acronym', 'id', 'name'])
        structures = list(set(structs.id.get(f) for f in structures))

    exp_ids = _get_experiments_from_gene(id=id, acronym=acronym, name=name,
                                         slicing_direction=slicing_direction,
                                         verbose=verbose)

    data = _get_unionization_from_experiment(exp_ids, structures=structures,
                                             attributes=attributes,
                                             average=average, verbose=verbose)

    return data