Conditioned generation of cell states

In this tutorial, we will use a pre-trained diffusion model to generate cell states conditioned on their cell type labels.

Let’s begin by importing the required packages.

[1]:
import os

import numpy as np
import matplotlib.pyplot as plt

import scanpy as sc

import torch

from scmg.model.contrastive_embedding import CellEmbedder, decode_cell_state_embedding
from scmg.model.manifold_generation import ConditionalDiffusionModel, generate_cells, generate_transition_cells
from scmg.model.cell_type_search import CellTypeSearcher

Load the trained SCMG models.

[2]:
# Load the autoencoder model
model_path = 'models/embedder'

scmg_model = torch.load(os.path.join(model_path, 'model.pt'),
                        map_location=torch.device('cpu'))
scmg_model.load_state_dict(torch.load(os.path.join(model_path, 'best_state_dict.pth'),
                                      map_location=torch.device('cpu')))

device = 'cpu'
scmg_model.to(device)
scmg_model.eval()

# Load the diffusion model
model_d_path = 'models/diffusion_model'

model_d = torch.load(os.path.join(model_d_path, 'model.pt'),
                     map_location=torch.device('cpu'))
model_d.load_state_dict(torch.load(os.path.join(model_d_path, 'best_state_dict.pth'),
                                   map_location=torch.device('cpu')))

device = 'cpu'
model_d.to(device)
model_d.eval()
[2]:
ConditionalDiffusionModel(
  (network): MLPDenoiser(
    (fc1): Linear(in_features=1280, out_features=2048, bias=True)
    (blocks): ModuleList(
      (0-7): 8 x RecurrentBlock(
        (fc1): Linear(in_features=3328, out_features=2048, bias=True)
        (act): LeakyReLU(negative_slope=0.01)
        (fc2): Linear(in_features=2048, out_features=2048, bias=True)
        (norm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
      )
    )
    (fc2): Linear(in_features=3328, out_features=512, bias=True)
  )
)

Load the standard reference dataset of the global manifold

[3]:
adata_ref = sc.read_h5ad('data/ref_global_cell_state_manifold.h5ad')

Let’s print out the avaibale cell types to generate.

[4]:
print(model_d.condition_classes)
['Activated T cells', 'Adenohypophyseal placode', 'Adipocyte cells (Cyp2e1+)', 'Adipocyte progenitor cells', 'Adipose tissue macrophages', 'Adrenocortical cells', 'Advanced Mesoderm', 'Airway club cells', 'Airway goblet cells', 'Allantois', 'Alveolar Type 1 cells', 'Alveolar Type 2 cells', 'Amacrine cells', 'Amacrine/Horizontal precursor cells', 'Amniochorionic mesoderm', 'Amniochorionic mesoderm A', 'Amniochorionic mesoderm B', 'Amniotic ectoderm', 'Angioblast', 'Anterior', 'Anterior floor plate', 'Anterior intermediate mesoderm', 'Anterior primitive streak', 'Anterior roof plate', 'Anteromedial cerebral pole', 'Apical ectodermal ridge', 'Arachnoid', 'Arachnoid barrier cells', 'Arterial endothelial cells', 'Ascending loop of Henle', 'Astro-Epen', 'Astrocytes', 'Atrial cardiomyocytes', 'Axial Mesoderm', 'Axon tract-associated microglia', 'B cell', 'B cell progenitors', 'B cells', 'B-1 B cell', 'B-1a B cell', 'B-1b B cell', 'B-2 B cell', 'Basal keratinocytes', 'Bergmann glia', 'Bergmann glial cell', 'Biliary epithelial cells', 'Bipolar precursor cells', 'Bladder urothelial cells', 'Blood progenitors', 'Border-associated macrophages', 'Border-associated macrophages (Cd74+)', 'Border-associated macrophages (Ms4a8a+)', 'Brain capillary endothelial cells', 'Brain endothelium', 'Brain pericytes', 'Branchial arch epithelium', 'Brown adipocyte cells', 'CB GABA', 'CB Glut', 'CD14-positive monocyte', 'CD14-positive, CD16-positive monocyte', 'CD141-positive myeloid dendritic cell', 'CD16-negative, CD56-bright natural killer cell, human', 'CD16-positive, CD56-dim natural killer cell, human', 'CD1c-positive myeloid dendritic cell', 'CD34-positive, CD56-positive, CD117-positive common innate lymphoid precursor, human', 'CD4-positive helper T cell', 'CD4-positive, alpha-beta T cell', 'CD4-positive, alpha-beta memory T cell', 'CD8-alpha-alpha-positive, alpha-beta intraepithelial T cell', 'CD8-positive, alpha-beta T cell', 'CD8-positive, alpha-beta cytokine secreting effector T cell', 'CD8-positive, alpha-beta cytotoxic T cell', 'CD8-positive, alpha-beta memory T cell', 'CD8-positive, alpha-beta memory T cell, CD45RO-positive', 'CGE GABA', 'CNU GABA', 'CNU-HYa GABA', 'CNU-HYa Glut', 'Cajal-Retzius', 'Cajal-Retzius cell', 'Cajal-Retzius cells', 'Cardiac mesoderm', 'Cardiomyocytes', 'Caudal', 'Caudal lateral epiblast', 'Caudal neuroectoderm', 'Cerebellar Purkinje cells', 'Cerebellum GABAergic', 'Cerebellum glutamatergic', 'Cerebellum-related cells', 'Cholinergic amacrine cells', 'Chondrocyte and osteoblast progenitors', 'Chondrocytes', 'Chondrocytes (Atp1a2+)', 'Chondrocytes (Otor+)', 'Choroid plexus', 'Ciliary margin cells', 'Ciliated nodal cells', 'Cochlear hair cells', 'Collecting duct intercalated cells', 'Collecting duct principal cells', 'Committed oligodendrocyte precursor', 'Committed oligodendrocyte precursors', 'Cone precursor cells', 'Conjunctival goblet cells', 'Connecting tubule', 'Connective tissue progenitors', 'Conventional dendritic cells', 'Corneal epithelial cells', 'Cortical Interneurons (Prox1+)', 'Cortical hem', 'Cortical or hippocampal glutamatergic', 'Corticofugal neurons', 'Cranial motor neurons', 'Cycling microglia', 'Cycling perivascular macrophages', 'DN1 thymic pro-T cell', 'DN3 thymocyte', 'DN4 thymocyte', 'Deep-layer neurons', 'Definitive early erythroblasts (CD36-)', 'Definitive endoderm', 'Definitive erythroblasts (CD36+)', 'Definitive erythroid cells', 'Dental epithelial cells', 'Dermatome', 'Dermomyotome', 'Di/mesencephalon excitatory neurons', 'Di/mesencephalon inhibitory neurons', 'Di/telencephalon', 'Diencephalic roof plate', 'Diencephalon', 'Diencephalon glutamatergic', 'Distal convoluted tubule', 'Dorsal diencephalon', 'Dorsal early ectoderm', 'Dorsal forebrain', 'Dorsal hindbrain', 'Dorsal hindbrain and spinal cord', 'Dorsal midbrain', 'Dorsal midbrain glutamatergic', 'Dorsal root ganglion neurons', 'Dorsal telencephalon', 'Early Trophectoderm', 'Early chondrocytes', 'Early choroid plexus', 'Early ectoderm', 'Early fibroblasts', 'Early macrophage', 'Embryonic visceral endoderm', 'Emergent Mesoderm', 'Endocardial cells', 'Endoderm', 'Endothelial', 'Endothelium', 'Enteric neurons', 'Ependymal cells', 'Ependymal-like', 'Epiblast', 'Epidermis', 'Epithalamus glutamatergic', 'Erythroblasts', 'Erythrocyte', 'Erythroid progenitor', 'ExE Mesoderm', 'Extraembryonic ectoderm', 'Extraembryonic endoderm', 'Extraembryonic mesoderm', 'Extraembryonic visceral endoderm', 'Eye field', 'Facial mesenchyme', 'Fibroblasts', 'First heart field', 'Floor plate', 'Floorplate and p3 domain', 'Forebrain', 'Forebrain GABAergic', 'Forebrain astrocyte', 'Forebrain glutamatergic', 'Forebrain/midbrain', 'Foregut epithelial cells', 'Foregut epithelium', 'Fourth-ventricle roof plate', 'Fusing epithelium', 'GABAergic cortical interneurons', 'GABAergic neuron', 'GABAergic neurons', 'Glomerular endothelial cells', 'Glutamatergic neurons', 'Granular keratinocytes', 'Granulocytes', 'Gut', 'Gut and lung epithelium', 'HY GABA', 'HY Glut', 'HY Gnrh1 Glut', 'HY MM Glut', 'Hematoendothelial lineage', 'Hematoendothelial progenitors', 'Hematopoietic stem cells (Cd34+)', 'Hematopoietic stem cells (Mpo+)', 'Hemogenic Endothelial Progenitors', 'Hepatocytes', 'Hindbrain', 'Hindbrain GABAergic', 'Hindbrain floor plate', 'Hindbrain glutamatergic', 'Hindbrain glycinergic', 'Hindbrain roof plate', 'Hindbrain serotoninergic', 'Hofbauer cell', 'Horizontal cells', 'Hypoblast', 'Hypothalamic floor-plate like', 'Hypothalamus', 'Hypothalamus (Sim1+)', 'Hypothalamus glutamatergic', 'IT-ET Glut', 'IgA plasma cell', 'IgG plasma cell', 'IgM plasma cell', 'Immune', 'Inhibitory interneurons', 'Inner Cell Mass', 'Inner Cell Mass-Trophectoderm Transition', 'Inner cell mass', 'Intermediate meninges 1', 'Intermediate meninges 2', 'Intermediate mesoderm', 'Intermediate neuronal progenitors', 'Intermediate progenitor cells', 'Intestinal enteroendocrine cells', 'Intestinal goblet cells', 'Kupffer cell', 'Kupffer cells', 'LSX GABA', 'Langerhans cell', 'Lateral nasal pit', 'Lateral plate and intermediate mesoderm', 'Lens', 'Lens epithelial cells', 'Leydig cells', 'Limb mesenchyme progenitors', 'Liver endothelium', 'Liver sinusoidal endothelial cells', 'Lung cells (Eln+)', 'Lung epithelium', 'Lung progenitor cells', 'Lymphatic vessel endothelial cells', 'M cell of gut', 'MB Dopa', 'MB GABA', 'MB Glut', 'MB-HB Sero', 'MGE GABA', 'MH-LH Glut', 'MOB-CR Glut', 'MOB-DG-IMN', 'MY GABA', 'MY Glut', 'Mast cells', 'Mast cells (P2rx7+)', 'Megakaryocytes', 'Melanocyte cells', 'Mesencephalon/MHB', 'Mesenchymal stromal cells', 'Mesenchyme', 'Mesoderm', 'Mesodermal progenitors (Tbx6+)', 'Metanephric mesenchyme', 'Microglia', 'Microvascular endothelial cells', 'Midbrain', 'Midbrain GABAergic', 'Midbrain basal plate', 'Midbrain dopaminergic', 'Midbrain floor plate', 'Midbrain glutamatergic', 'Midbrain-hindbrain GABAergic', 'Midbrain-hindbrain boundary', 'Midgut/Hindgut epithelial cells', 'Midgut/Hindgut epithelium', 'Mixed mesoderm', 'Mixed region astrocytes', 'Monocytes', 'Monocytic myeloid-derived suppressor cells', 'Motor neuron', 'Motor neurons', 'Mueller cell', 'Multiciliated ependymal cells', 'Muscle progenitor cells', 'Muscle progenitor cells (Prdm1+)', 'Myelinating Schwann cells', 'Myelinating Schwann cells (Tgfb2+)', 'Myoblasts', 'Myocytes', 'Myofibroblasts', 'Myotubes', 'NKp44-negative group 3 innate lymphoid cell, human', 'NKp44-positive group 3 innate lymphoid cell, human', 'NMPs and spinal cord progenitors', 'NP-CT-L6b Glut', 'Naive retinal progenitor cells', 'Nascent Mesoderm', 'Nascent mesoderm', 'Natural killer cells', 'Nephron progenitors', 'Neural crest', 'Neural crest (PNS glia)', 'Neural crest (PNS neurons)', 'Neural progenitor cells (Neurod1+)', 'Neural progenitor cells (Ror1+)', 'Neuromesodermal progenitors', 'Neuron progenitor cells', 'Neuronal intermediate progenitor', 'Neurons (Slc17a8+)', 'Node', 'Non-Neural Ectoderm', 'Non-cycling microglia', 'Non-cycling perivascular macrophages', 'Noradrenergic neurons', 'Notochord', 'OEG', 'OFF-bipolar cell', 'ON-bipolar cell', 'Olfactory bulb cells', 'Olfactory ensheathing cell', 'Olfactory ensheathing cells', 'Olfactory epithelial cells', 'Olfactory epithelium', 'Olfactory pit', 'Olfactory pit cells', 'Olfactory sensory neurons', 'Oligo', 'Oligodendrocyte', 'Oligodendrocyte precursor cell', 'Oligodendrocyte progenitor cells', 'Optic cup', 'Oral ectoderm', 'Osteoblast progenitors A', 'Osteoblast progenitors B', 'Osteoclasts', 'Otic epithelial cells', 'Otic epithelium', 'Otic sensory neurons', 'Otic vesicle', 'P GABA', 'P Glut', 'PMN myeloid-derived suppressor cells', 'PV-containing retinal ganglion cells', 'Pancreatic acinar cells', 'Pancreatic epithelium', 'Pancreatic islets', 'Parasympathetic neurons', 'Parathyroid epithelial cells', 'Paraxial mesoderm A', 'Paraxial mesoderm B', 'Paraxial mesoderm C', 'Parietal endoderm', 'Pericyte', 'Pericytes', 'Photoreceptor precursor cells', 'Pineal Glut', 'Pineal gland', 'Pituitary gland cells', 'Pituitary neuroendocrine', 'Pituitary/Pineal gland progenitors', 'Placodal area', 'Plasmacytoid dendritic cells', 'Platelet', 'Podocytes', 'Posterior floor plate', 'Posterior hindbrain glutamatergic', 'Posterior intermediate mesoderm', 'Posterior roof plate', 'Pre-epidermal keratinocytes', 'Pre-osteoblasts (Sp7+)', 'Primitive Streak', 'Primitive erythroid cells', 'Primitive streak and adjacent ectoderm', 'Primordial germ cells', 'Proximal tubule cells', 'Purkinje cell', 'Regulatory T cells', 'Renal epithelium', 'Retinal ganglion cells', 'Retinal neurons', 'Retinal pigment cells', 'Retinal primordium', 'Retinal progenitor cells', 'Rod precursor cells', 'Roof plate', 'Rostral neuroectoderm', 'Satellite glial cells', 'Schwann cell', 'Schwann cell precursor', 'Sclerotome', 'Second heart field', 'Sensory neuron', 'Skeletal muscle progenitors', 'Somatic mesoderm', 'Spinal cord', 'Spinal cord (dorsal)', 'Spinal cord (ventral)', 'Spinal cord dorsal progenitors', 'Spinal cord excitatory neurons', 'Spinal cord glutamatergic', 'Spinal cord inhibitory neurons', 'Spinal cord motor neurons', 'Spinal cord ventral progenitors', 'Spinal cord/r7/r8', 'Splanchnic mesoderm', 'Subcommissural organ hypendymal cell', 'Subplate neurons', 'Suprachiasmatic nucleus', 'Surface ectoderm', 'Sympathetic neurons', 'T cell', 'T cells', 'T follicular helper cell', 'T-helper 1 cell', 'T-helper 17 cell', 'TH Glut', 'Telencephalon', 'Thalamic neuronal precursors', 'Thalamus glutamatergic', 'Thymic epithelial cells', 'Thyroid gland cells', 'Tooth junctional epithelium', 'Trophectoderm', 'Upper-layer neurons', 'Ureteric bud', 'Vascular', 'Vascular smooth muscle', 'Venous and capillary endothelial cells', 'Ventral hindbrain', 'Ventral midbrain', 'Ventricular cardiomyocytes', 'Visceral endoderm', 'White blood cells', 'Zona limitans intrathalamica', 'acinar cell', 'acinar cell of salivary gland', 'activated CD4-positive, alpha-beta T cell', 'activated CD8-positive, alpha-beta T cell', 'adipocyte of epicardial fat of left ventricle', 'adventitial cell', 'alpha-beta T cell', 'alternatively activated macrophage', 'alveolar macrophage', 'alveolar type 1 fibroblast cell', 'alveolar type 2 fibroblast cell', 'amacrine cell', 'aortic endothelial cell', 'astrocyte', 'basal cell', 'basal cell of epidermis', 'basal cell of prostate epithelium', 'basal epithelial cell of tracheobronchial tree', 'basophil', 'basophil mast progenitor cell', 'bipolar neuron', 'bladder urothelial cell', 'blood cell', 'blood vessel endothelial cell', 'brain pericyte', 'bronchial goblet cell', 'bronchial smooth muscle cell', 'bronchus fibroblast of lung', 'brush cell', 'brush cell of trachebronchial tree', 'capillary endothelial cell', 'cardiac endothelial cell', 'cardiac muscle cell', 'cardiac muscle myoblast', 'cell of skeletal muscle', 'cerebral cortex GABAergic interneuron', 'cerebral cortex endothelial cell', 'chondrocyte', 'ciliated cell', 'ciliated columnar cell of tracheobronchial tree', 'ciliated epithelial cell', 'classical monocyte', 'club cell', 'colon epithelial cell', 'common dendritic progenitor', 'common lymphoid progenitor', 'common myeloid progenitor', 'conjunctival epithelial cell', 'connective tissue cell', 'conventional dendritic cell', 'cord blood hematopoietic stem cell', 'corneal epithelial cell', 'cortical cell of adrenal gland', 'cortical thymic epithelial cell', 'craniofacial cell', 'decidual natural killer cell, human', 'dendritic cell', 'dendritic cell, human', 'dermis microvascular lymphatic vessel endothelial cell', 'double negative thymocyte', 'double-positive, alpha-beta thymocyte', 'duct epithelial cell', 'duodenum glandular cell', 'early T lineage precursor', 'early lymphoid progenitor', 'early pro-B cell', 'effector CD4-positive, alpha-beta T cell', 'effector CD8-positive, alpha-beta T cell', 'effector memory CD4-positive, alpha-beta T cell', 'effector memory CD8-positive, alpha-beta T cell, terminally differentiated', 'elicited macrophage', 'embryonic fibroblast', 'embryonic stem cell', 'endocardial cell', 'endocrine cell', 'endodermal cell', 'endothelial cell', 'endothelial cell of artery', 'endothelial cell of coronary artery', 'endothelial cell of hepatic sinusoid', 'endothelial cell of lymphatic vessel', 'endothelial cell of placenta', 'endothelial cell of sinusoid', 'endothelial cell of uterus', 'endothelial cell of vascular tree', 'endothelial tip cell', 'enteric neuron', 'enteric smooth muscle cell', 'enterocyte', 'enterocyte of colon', 'enterocyte of epithelium of large intestine', 'enterocyte of epithelium of small intestine', 'enteroendocrine cell', 'eosinophil', 'ependymal cell', 'epicardial adipocyte', 'epidermal cell', 'epithelial cell', 'epithelial cell of alveolus of lung', 'epithelial cell of exocrine pancreas', 'epithelial cell of lacrimal sac', 'epithelial cell of large intestine', 'epithelial cell of lower respiratory tract', 'epithelial cell of lung', 'epithelial cell of nephron', 'epithelial cell of prostate', 'epithelial cell of proximal tubule', 'epithelial cell of thymus', 'epithelial cell of uterus', 'erythroblast', 'erythrocyte', 'erythroid lineage cell', 'erythroid progenitor cell', 'eurydendroid cell', 'eye photoreceptor cell', 'fast muscle cell', 'fat cell', 'fenestrated cell', 'fibro/adipogenic progenitor cell', 'fibroblast', 'fibroblast of breast', 'fibroblast of cardiac tissue', 'fibroblast of lung', 'fibroblast of mammary gland', 'forebrain radial glial cell', 'fraction A pre-pro B cell', 'gamma-delta T cell', 'ganglion interneuron', 'germ cell', 'germinal center B cell', 'glandular epithelial cell', 'glial cell', 'glutamatergic neuron', 'goblet cell', 'granule cell', 'granulocyte', 'granulocyte monocyte progenitor cell', 'granulocytopoietic cell', 'group 2 innate lymphoid cell', 'group 3 innate lymphoid cell', 'gut endothelial cell', 'head mesodermal cell', 'hematopoietic multipotent progenitor cell', 'hematopoietic precursor cell', 'hematopoietic stem cell', 'hepatoblast', 'hepatocyte', 'immature B cell', 'immature NK T cell', 'immature Schwann cell', 'immature T cell', 'immature innate lymphoid cell', 'immature natural killer cell', 'inflammatory cell', 'inflammatory macrophage', 'inhibitory interneuron', 'inhibitory motor neuron', 'innate lymphoid cell', 'intermediate monocyte', 'interstitial cell of Cajal', 'intestinal crypt stem cell', 'intestinal crypt stem cell of large intestine', 'intestinal crypt stem cell of small intestine', 'intestinal enteroendocrine cell', 'intestinal epithelial cell', 'intestinal tuft cell', 'intestine goblet cell', 'intrahepatic cholangiocyte', 'ionocyte', 'keratinocyte', 'keratinocyte stem cell', 'keratocyte', 'kidney capillary endothelial cell', 'kidney collecting duct epithelial cell', 'kidney collecting duct intercalated cell', 'kidney collecting duct principal cell', 'kidney connecting tubule epithelial cell', 'kidney cortex artery cell', 'kidney distal convoluted tubule epithelial cell', 'kidney epithelial cell', 'kidney interstitial fibroblast', 'kidney loop of Henle ascending limb epithelial cell', 'kidney loop of Henle epithelial cell', 'kidney loop of Henle thick ascending limb epithelial cell', 'kidney loop of Henle thin ascending limb epithelial cell', 'kidney loop of Henle thin descending limb epithelial cell', 'kidney proximal convoluted tubule epithelial cell', 'kidney proximal straight tubule epithelial cell', 'large intestine goblet cell', 'large pre-B-II cell', 'late pro-B cell', 'lateral mesodermal cell', 'lens fiber cell', 'leukocyte', 'limb cell', 'liver dendritic cell', 'luminal cell of prostate epithelium', 'luminal epithelial cell of mammary gland', 'lung ciliated cell', 'lung macrophage', 'lung microvascular endothelial cell', 'lung pericyte', 'lung secretory cell', 'lymphoid lineage restricted progenitor cell', 'macrophage', 'macrophage dendritic cell progenitor', 'mast cell', 'mature B cell', 'mature NK T cell', 'mature conventional dendritic cell', 'medium spiny neuron', 'medullary thymic epithelial cell', 'megakaryocyte', 'megakaryocyte-erythroid progenitor cell', 'melanocyte', 'melanocyte of skin', 'memory B cell', 'mesangial cell', 'mesenchymal cell', 'mesenchymal lymphangioblast', 'mesenchymal stem cell', 'mesenchymal stem cell of adipose tissue', 'mesodermal cell', 'mesothelial cell', 'microglial cell', 'monocyte', 'motor neuron', 'mucosal invariant T cell', 'mucus secreting cell', 'multi-ciliated epithelial cell', 'muscle cell', 'muscle precursor cell', 'myelocyte', 'myeloid cell', 'myeloid dendritic cell', 'myeloid lineage restricted progenitor cell', 'myoepithelial cell', 'myoepithelial cell of mammary gland', 'myofibroblast cell', 'myometrial cell', 'naive B cell', 'naive regulatory T cell', 'naive thymus-derived CD4-positive, alpha-beta T cell', 'naive thymus-derived CD8-positive, alpha-beta T cell', 'nasal mucosa goblet cell', 'native cell', 'natural killer cell', 'neural progenitor cell', 'neuroendocrine cell', 'neuron', 'neuronal brush cell', 'neutrophil', 'non-classical monocyte', 'oligodendrocyte', 'oligodendrocyte precursor cell', 'osteoblast', 'osteoclast', 'pancreatic A cell', 'pancreatic D cell', 'pancreatic PP cell', 'pancreatic acinar cell', 'pancreatic ductal cell', 'pancreatic endocrine cell', 'pancreatic stellate cell', 'paneth cell of colon', 'paneth cell of epithelium of small intestine', 'parietal epithelial cell', 'peptic cell', 'pericyte', 'photoreceptor cell', 'pigmented ciliary epithelial cell', 'plasma cell', 'plasmablast', 'plasmacytoid dendritic cell', 'platelet', 'podocyte', 'podocyte (sensu Diptera)', 'pre-B-I cell', 'pre-conventional dendritic cell', 'precursor B cell', 'preosteoblast', 'primitive red blood cell', 'primordial germ cell', 'pro-B cell', 'professional antigen presenting cell', 'progenitor cell', 'progenitor cell of endocrine pancreas', 'progenitor cell of mammary luminal epithelium', 'promonocyte', 'promyelocyte', 'pulmonary artery endothelial cell', 'pulmonary interstitial fibroblast', 'pulmonary ionocyte', 'radial glial cell', 'regular atrial cardiac myocyte', 'regular ventricular cardiac myocyte', 'regulatory T cell', 'renal intercalated cell', 'renal interstitial pericyte', 'respiratory basal cell', 'respiratory epithelial cell', 'respiratory goblet cell', 'reticular cell', 'reticulocyte', 'retina horizontal cell', 'retinal bipolar neuron', 'retinal blood vessel endothelial cell', 'retinal cone cell', 'retinal ganglion cell', 'retinal pigment epithelial cell', 'retinal rod cell', 'rod bipolar cell', 'salivary gland cell', 'secretory cell', 'sensory neuron', 'serous cell of epithelium of bronchus', 'serous cell of epithelium of trachea', 'serous secreting cell', 'skeletal muscle fiber', 'skeletal muscle satellite cell', 'skeletal muscle satellite stem cell', 'skin fibroblast', 'slow muscle cell', 'small intestine goblet cell', 'small pre-B-II cell', 'smooth muscle cell', 'smooth muscle cell of trachea', 'smooth muscle myoblast', 'somite', 'sperm', 'stellate neuron', 'stem cell', 'stem cell of epidermis', 'stromal cell', 'stromal cell of lamina propria of small intestine', 'subcutaneous fat cell', 'surface ectodermal cell', 'sympathetic neuron', 'tendon cell', 'thymocyte', 'thyroid follicular cell', 'tongue muscle cell', 'tracheal goblet cell', 'tracheobronchial goblet cell', 'tracheobronchial serous cell', 'tracheobronchial smooth muscle cell', 'transit amplifying cell', 'transit amplifying cell of colon', 'transit amplifying cell of small intestine', 'type A enteroendocrine cell', 'type B pancreatic cell', 'type D enteroendocrine cell', 'type I NK T cell', 'type I enteroendocrine cell', 'type I pneumocyte', 'type II pneumocyte', 'type L enteroendocrine cell', 'type N enteroendocrine cell', 'uterine smooth muscle cell', 'vascular associated smooth muscle cell', 'vascular leptomeningeal cell', 'vein endothelial cell', 'ventricular cardiac muscle cell', 'visceromotor neuron']

Generation of cell types

As an example, let’s generate a few cell types for gastrulation.

[5]:
# Define the cell types and numbers to generate
target_cell_type_dict = {
    'Epiblast' : 300,
    'Primitive streak and adjacent ectoderm' : 300,
    'Rostral neuroectoderm' : 300,
    'Nascent mesoderm' : 300,
    'Anterior primitive streak' : 300,
}

cond_classes = []
for cell_type in target_cell_type_dict:
    cond_classes.extend([cell_type] * target_cell_type_dict[cell_type])

# Generate the cells
generated_cells = generate_cells(model_d, cond_classes)

# Decode the generated the gene expression levels of the generated cells
# Use the experimental condition of the dataset Qiu_Organogenesis_MM_2022:all for gene expression decoding
adata_generated = decode_cell_state_embedding(scmg_model, generated_cells, ['Qiu_Organogenesis_MM_2022:all'] * generated_cells.shape[0])
adata_generated.obs['cell_type'] = cond_classes
adata_generated
[5]:
AnnData object with n_obs × n_vars = 1500 × 18108
    obs: 'cell_type'
    obsm: 'X_ce_latent'

We can plot the generated cells on an UMAP.

[6]:
sc.pp.neighbors(adata_generated, use_rep='X_ce_latent', n_neighbors=30)
sc.tl.umap(adata_generated)
sc.pl.umap(adata_generated, color='cell_type')
../_images/tutorials_conditioned_generation_of_cell_states_13_0.png

Generation of continuous cell state trajectories

We can iteratively generate the whole developmental trajectory of a cell type starting from the epiblast (embryonic stem cell). This process requires the user to have certain prorir knowledge of the developmental process of interest.

As an example, let’s generate the trajectory from epiblast to pancreatic islet B cells. We begin by generating a population of epiblast cells.

[7]:
generated_cells_query =  generate_cells(model_d, ['Epiblast'] * 200)

We can search the neighbor cell types of the generated epiblast cells.

[8]:
cts = CellTypeSearcher(adata_ref)
ct_match_df = cts.search_ref_cell_types(generated_cells_query, radius=2)
ct_match_df[:20]
[8]:
weight
Epiblast 0.129208
Primitive streak and adjacent ectoderm 0.097211
Primitive Streak 0.043280
Trophectoderm 0.012249
Rostral neuroectoderm 0.010646
Inner Cell Mass 0.010090
Anterior primitive streak 0.007708
embryonic stem cell 0.007708
Inner cell mass 0.006908
germ cell 0.006688
Inner Cell Mass-Trophectoderm Transition 0.006513
Definitive endoderm 0.004518
Nascent mesoderm 0.003944
Primordial germ cells 0.003875
Early Trophectoderm 0.003838
primordial germ cell 0.003836
Hypoblast 0.003603
Nascent Mesoderm 0.003459
Emergent Mesoderm 0.003308
Caudal lateral epiblast 0.002270

Because the pancreatic islet cells are developed from the endoderm which originates from the anterior part of the primitive streak, let’s choose primitive streak as our next cell type.

By interpolating the cell type embedding vectors, we can let the diffusion model to generate a continous population of cells for the cell type transition.

[9]:
generated_cells, cond_classes = generate_transition_cells(model_d, 'Epiblast', 'Primitive Streak', 500)

Now we have a population of primitive streak cells, using which we can search for their neighbor cell types. By iteratively repeating this process, we can reconstruct the entire trajectory.

[10]:
generated_cells_query = generated_cells[np.array(cond_classes) == 'Primitive Streak']
ct_match_df = cts.search_ref_cell_types(generated_cells_query, radius=2)
ct_match_df[:20]
[10]:
weight
Primitive Streak 0.121187
Epiblast 0.091966
Primitive streak and adjacent ectoderm 0.076221
Nascent Mesoderm 0.030924
Emergent Mesoderm 0.028855
Anterior primitive streak 0.028536
Nascent mesoderm 0.018618
germ cell 0.018388
Primordial germ cells 0.017211
embryonic stem cell 0.013809
Definitive endoderm 0.013602
Rostral neuroectoderm 0.009774
Caudal lateral epiblast 0.009184
primordial germ cell 0.008438
Node 0.008097
Trophectoderm 0.004328
Mesoderm 0.004298
Non-Neural Ectoderm 0.003607
Endoderm 0.003602
Caudal neuroectoderm 0.003296

Let’s take a shortcut here. If we already know all the intermediate cell types, we can generate the trajectory with a few lines of code.

[11]:
traj_cell_types = ['Epiblast', 'Primitive Streak', 'Definitive endoderm', 'Gut',
                   'Pancreatic epithelium', 'Pancreatic islets', 'type B pancreatic cell'
                   ]

generated_cells = []
cond_classes = []

for i in range(len(traj_cell_types) - 1):
    start_cell_type = traj_cell_types[i]
    end_cell_type = traj_cell_types[i + 1]

    local_generated_cells, local_cond_classes = generate_transition_cells(model_d, start_cell_type, end_cell_type, 500)

    for j in range(len(local_generated_cells)):
        generated_cells.append(local_generated_cells[j])
        cond_classes.append(local_cond_classes[j])

generated_cells = np.array(generated_cells)
cond_classes = np.array(cond_classes)

The generated cells are in the latent space. We can use the SCMG decoder to reconstruct their gene expression profiles.

[12]:
adata_generated = decode_cell_state_embedding(scmg_model, generated_cells, ['Qiu_Organogenesis_MM_2022:all'] * len(generated_cells))
adata_generated.obs['cell_type'] = cond_classes

# Visualize the generated cells by UMAP
sc.pp.neighbors(adata_generated, use_rep='X_ce_latent', n_neighbors=30)
sc.tl.umap(adata_generated)
sc.pl.umap(adata_generated, color='cell_type')
../_images/tutorials_conditioned_generation_of_cell_states_26_1.png

Using the generated continuous trajectory, we can perform pseudo time analysis.

[13]:
adata_generated.uns["iroot"] = np.flatnonzero(adata_generated.obs['cell_type'] == 'Epiblast')[0]

sc.tl.dpt(adata_generated)
adata_generated.obs['dpt_rele_rank'] = adata_generated.obs['dpt_pseudotime'].rank() / adata_generated.shape[0]

sc.pl.umap(adata_generated, color='dpt_pseudotime', cmap='jet')
WARNING: Trying to run `tl.dpt` without prior call of `tl.diffmap`. Falling back to `tl.diffmap` with default parameters.
../_images/tutorials_conditioned_generation_of_cell_states_28_1.png

Using pseudo-tiem, we can trace the gene expression changes along the developmental trajectory.

[14]:
# Order the genes and cells by their order of expression along the trajectory
X_normalized = adata_generated.X / adata_generated.X.sum(axis=0)[None, :]
adata_generated.var['mean'] = adata_generated.X.mean(axis=0)
adata_generated.var['gene_mean_time'] = np.sum(X_normalized * adata_generated.obs['dpt_rele_rank'].values[:, None], axis=0)

cell_order = adata_generated.obs['dpt_pseudotime'].sort_values().index.values
gene_order = adata_generated.var['gene_mean_time'].sort_values().index.values
adata_ordered = adata_generated[cell_order, gene_order].copy()

adata_ordered.X = adata_ordered.X / adata_ordered.X.mean(axis=0)[None, :]

# Only plot the top highly variable genes
sc.pp.highly_variable_genes(adata_generated, n_top_genes=5000)
adata_generated.var['max'] = adata_generated.X.max(axis=0)
hv_genes = adata_generated.var_names[
    adata_generated.var['highly_variable']
]

# Plot the gene expression levels of the top highly variable genes along the trajectory
fig, ax = plt.subplots(figsize=(4, 4), dpi=100)

X = adata_ordered[:, adata_ordered.var.index.isin(hv_genes)].X.T

ax.imshow(X, vmax=5, cmap='inferno_r', aspect=X.shape[1] / X.shape[0])
ax.set_xticks(np.linspace(0, adata_ordered.shape[0], num=11),
              [f'{x:.1f}' for x in np.linspace(0, 1, num=11)],
              size=8)
ax.set_yticks(np.linspace(0, X.shape[0], num=11),
              np.linspace(0, X.shape[0], num=11, dtype=int),
              size=8)

ax.set_xlabel('pseudotime')
ax.set_ylabel('genes')
plt.show()
../_images/tutorials_conditioned_generation_of_cell_states_30_0.png
[ ]: