Source code for drcme.tsne

"""
The :mod:`drcme.tsne` module contains wrapper functions for
applying t-SNE to the electrophysiology and morphology data.
"""
import numpy as np
import pandas as pd
from sklearn import manifold
import logging


[docs]def combined_tsne(df_1, df_2, perplexity=25, n_iter=20000): """Perform t-SNE on two data sets Parameters ---------- df_1 : DataFrame First data set df_2 : DataFrame Second data set perplexity : float t-SNE perplexity parameter n_iter : int Maximum number of iterations for t-SNE optimization Returns ------- DataFrame Contains "x" and "y" coordinates for 2D t-SNE embedding of combined data set """ all_together = np.vstack([df_1.values, df_2.values]) all_ids = df_1.index.tolist() + df_2.index.tolist() tsne = manifold.TSNE(n_components=n_components, init='pca', random_state=0, verbose=2, n_iter=n_iter, perplexity=perplexity) Y = tsne.fit_transform(all_together) return pd.DataFrame({"x": Y[:, 0], "y": Y[:, 1]}, index=all_ids)
[docs]def dual_modal_tsne(ephys_df, morph_df, relative_ephys_weight=1., perplexity=25, n_iter=20000): """Perform t-SNE on electrophysiology and morphology data sets Parameters ---------- ephys_df : DataFrame Electrophysiology data set morph_df : DataFrame Morphology data set relative_ephys_weight : float, optional Relative weighting of electrophysiology data perplexity : float t-SNE perplexity parameter n_iter : int Maximum number of iterations for t-SNE optimization Returns ------- DataFrame Contains "x" and "y" coordinates for 2D t-SNE embedding for samples that exist in both `ephys_df` and `morph_df` """ morph_ids = morph_df.index.values # Get ephys data for cells with morphologies ids_with_morph_for_ephys = [s for s in morph_ids if s in ephys_df.index.tolist()] ephys_df_joint = ephys_df.loc[ids_with_morph_for_ephys, :] # Only use morphs that have ephys mask = [s in ephys_df_joint.index.tolist() for s in morph_ids] morph_df_joint = morph_df.loc[mask, :] logging.debug("ephys joint shape ", ephys_df_joint.shape) logging.debug("morph joint shape ", morph_df_joint.shape) elmo_data = np.hstack([morph_df_joint.values, relative_ephys_weight * ephys_df_joint.values]) tsne = manifold.TSNE(n_components=n_components, init='pca', random_state=0, verbose=2, n_iter=n_iter, perplexity=perplexity) Y = tsne.fit_transform(elmo_data) return pd.DataFrame({"x": Y[:, 0], "y": Y[:, 1]}, index=ephys_df_joint.index.values)