Source code for delnx.tl._gsea

from collections.abc import Sequence
from typing import Any

import gseapy as gp
import pandas as pd

from delnx.ds._gmt import get_gene_sets

MIN_GENESET_SIZE = 5
MAX_GENESET_SIZE = 500


[docs] def gsea( genes: Sequence[str], background: Sequence[str] | None = None, gene_sets: dict[str, list[str]] | None = None, collection: str = "all", url: str | None = None, filepath: str | None = None, geneset_key: str = "geneset", genesymbol_key: str = "genesymbol", method: str = "enrichr", return_object: bool = False, min_genes: int = MIN_GENESET_SIZE, max_genes: int = MAX_GENESET_SIZE, ) -> pd.DataFrame | Any: """ Run enrichment analysis for a single gene list using Enrichr. Parameters ---------- genes : Sequence[str] List of gene symbols to analyze. background : Sequence[str], optional Background gene list to use for enrichment analysis. If None, uses all genes in the gene sets. gene_sets : dict[str, list[str]], optional Pre-loaded gene sets as a dictionary where keys are gene set names and values are lists of gene symbols. If None, will load gene sets based on the provided collection, URL, or filepath. collection : str Name of the collection to load gene sets from. Default is "all". url : str, optional URL to load the GMT file from. If None, uses the default collection. filepath : str, optional Local file path to load the GMT file from. If None, uses the default collection. geneset_key : str Column name for the gene set name in the output dictionary. Default is "geneset". genesymbol_key : str Column name for the gene symbol in the output dictionary. Default is "genesymbol". method : str Method to use for enrichment analysis. Currently only "enrichr" is supported. return_object : bool If True, returns the gseapy Enrichr object. If False, returns a pandas DataFrame with results. min_genes : int Minimum number of genes in a gene set to include in the analysis. Default is 5. max_genes : int Maximum number of genes in a gene set to include in the analysis. Default is 500. """ if method != "enrichr": raise ValueError(f"Unsupported method: {method}") if gene_sets is None: gene_sets = get_gene_sets( collection=collection, url=url, filepath=filepath, geneset_key=geneset_key, genesymbol_key=genesymbol_key, min_genes=min_genes, max_genes=max_genes, ) enr = gp.enrichr( gene_list=list(genes), background=list(background) if background is not None else None, gene_sets=gene_sets, outdir=None, no_plot=True, ) return enr if return_object else enr.res2d