Source code for abagen.mouse.gene

# -*- coding: utf-8 -*-
"""
Functions to make mouse gene queries and manipulations
"""

import re
import pandas as pd
from .utils import _coerce_inputs, _make_api_query


# available attributes of gene query
_GENE_ATTRIBUTES = [
    'acronym',
    'alias_tags',
    'chromosome_id',
    'ensembl_id',
    'entrez_id',
    'genomic_reference_update_id',
    'homologene_id',
    'id',
    'legacy_ensembl_gene_id',
    'name',
    'organism_id',
    'original_name',
    'original_symbol',
    'reference_genome_id',
    'sphinx_id',
    'version_status'
]


[docs]def available_gene_info(): """ Lists available attributes for :func:`abagen.mouse.get_gene_info` """ return _GENE_ATTRIBUTES.copy()
[docs]def get_gene_info(id=None, acronym=None, name=None, attributes=None, verbose=False): """ Queries Allen API for information about given gene One of `id`, `acronym`, or `name` must be provided. Parameters ---------- id : int, optional Numerical gene ID acronym : str, optional Short-form gene acronym (case sensitive) name : str, optional Full gene name (case sensitive) attributes : str or list, optional Which attributes / information to obtain for the provided gene. See :func:`abagen.mouse.available_gene_info` for list of available attributes to request. If not specified all available attributes will be returned. Default: None verbose : bool, optional Whether to print status messages. Default: False Returns ------- info : pandas.DataFrame If `attributes` is a str, returns an int or str depending on specified attribute. If `attributes` is a list, return a dict where keys are attributes and values are str or int. Raises ------ ValueError The provided gene is invalid Examples -------- Get gene ID and name corresponding to gene acronym 'Pdyn': >>> from abagen import mouse >>> mouse.get_gene_info(acronym='Pdyn', ... attributes=['id', 'name']) # doctest: +NORMALIZE_WHITESPACE id name acronym Pdyn 18376 prodynorphin You can also supply multiple genes to the query: >>> mouse.get_gene_info(acronym=['Ace', 'Cd99'], ... attributes=['id', 'name']) # doctest: +NORMALIZE_WHITESPACE id name acronym Ace 11210 angiotensin I converting enzyme (peptidyl-dipe... Cd99 163028 CD99 antigen """ # noqa criteria = [ _coerce_inputs(id=id, acronym=acronym, name=name), 'products[id$eq1]' ] provided = re.search(r'\[(\S+)\$', criteria[0]).group(1) # determine which attributes to request; if we don't have to request all # of them then we can speed up the API call if attributes is None: attributes = _GENE_ATTRIBUTES elif isinstance(attributes, str): attributes = [attributes] attributes = [a for a in attributes if a not in provided] for attr in attributes: if attr not in _GENE_ATTRIBUTES: raise ValueError('Provided attribute "{}" is invalid; please ' 'check valid attributes with ' 'abagen.mouse.available_gene_info().' .format(attr)) info = _make_api_query('Gene', criteria=criteria, attributes=attributes + [provided], verbose=verbose) info = pd.DataFrame(info).set_index(provided)[attributes] return info.sort_index()