# Imports and setup

import json
import os
from pathlib import Path

import numpy as np
import pandas as pd
from matplotlib.colors import Normalize

import himalayas
from himalayas import Matrix, Annotations, Analysis
from himalayas.plot import Plotter

print(f"HiMaLAYAS version: {himalayas.__version__}")

# Set working directory if running in a notebook environment
if "__file__" not in globals():
    os.chdir(Path().resolve())

# Enable inline plotting for notebooks
%matplotlib inline

HiMaLAYAS version: 0.0.11

# Load GO BP annotations and summarize coverage

DATA_DIR = Path("data")
GO_BP_PATH = DATA_DIR / "go_bp_name_to_orfs.json"

with GO_BP_PATH.open("r", encoding="utf-8") as fh:
    go_bp = json.load(fh)


term_sizes = [len(set(orfs)) for orfs in go_bp.values()]
all_orfs = {orf for orfs in go_bp.values() for orf in orfs}

print(f"GO BP terms loaded: {len(term_sizes):,}")
print(f"Min term size: {min(term_sizes)}")
print(f"Max term size: {max(term_sizes)}")
print(f"Unique ORFs across all terms: {len(all_orfs):,}")

GO BP terms loaded: 1,095
Min term size: 5
Max term size: 243
Unique ORFs across all terms: 4,927

# Load the GI matrix and inspect basic stats

MATRIX_PATH = DATA_DIR / "gi_pcc_sampled.tsv"

DF = pd.read_csv(
    MATRIX_PATH,
    sep="	",
    index_col=0,
)

print(f"Matrix shape: {DF.shape[0]:,} x {DF.shape[1]:,}")
print(f"Row/column labels identical: {DF.index.equals(DF.columns)}")
print(f"Value range: [{DF.min().min():.3f}, {DF.max().max():.3f}]")

DF.head()

Matrix shape: 1,053 x 1,053
Row/column labels identical: True
Value range: [-0.317, 0.845]

# Build categorical and continuous rails for label bars

# Set gene order from matrix
genes = DF.index


# Helper functions for categorical rails
def build_binary_gene_map(
    genes,
    positive_set,
    pos_label: str,
    neg_label: str,
):
    """
    Returns {gene -> pos_label/neg_label} based on membership in positive_set.
    """
    return {gene: (pos_label if gene in positive_set else neg_label) for gene in genes}


def load_essential_orfs(path: Path) -> set[str]:
    """
    Loads one ORF per line.
    """
    return {line.strip() for line in path.read_text().splitlines() if line.strip()}


def load_uncharacterized_orfs(path: Path) -> set[str]:
    """
    Loads uncharacterized ORFs from a JSON object keyed by ORF.
    """
    with path.open() as f:
        return set(json.load(f).keys())


def map_essential_genes(genes, essential_orfs: set[str]):
    """
    Maps genes to essential/nonessential labels and returns a label-to-color mapping.
    """
    gene_map = build_binary_gene_map(
        genes,
        essential_orfs,
        pos_label="essential",
        neg_label="nonessential",
    )
    colors = {
        "essential": "#d73027",
        "nonessential": "#ffffff",
    }
    return gene_map, colors


def map_uncharacterized_genes(
    genes,
    uncharacterized_orfs: set[str],
    pos_label: str = "uncharacterized",
    neg_label: str = "characterized",
):
    """
    Maps genes to uncharacterized/characterized labels and returns a label-to-color mapping.
    """
    gene_map = build_binary_gene_map(
        genes,
        uncharacterized_orfs,
        pos_label=pos_label,
        neg_label=neg_label,
    )
    colors = {
        pos_label: "#1e90ff",
        neg_label: "#ffffff",
    }
    return gene_map, colors


# Categorical rails
essential_path = DATA_DIR / "yeast_essential_orfs.txt"
essential_orfs = load_essential_orfs(essential_path)
gene_essential_map, gene_essential_colors = map_essential_genes(genes, essential_orfs)

print("Total genes:", len(genes))
print(
    "Essential in matrix:",
    sum(v == "essential" for v in gene_essential_map.values()),
)

unchar_path = DATA_DIR / "yeast_uncharacterized_orfs.json"
uncharacterized_orfs = load_uncharacterized_orfs(unchar_path)
gene_characterization_map, gene_characterization_colors = map_uncharacterized_genes(
    genes,
    uncharacterized_orfs,
)

print(
    "Uncharacterized in matrix:",
    sum(v == "uncharacterized" for v in gene_characterization_map.values()),
)

# Continuous rail from the loaded matrix
row_variance_map = DF.var(axis=1).astype(float).to_dict()
row_variance_values = np.fromiter(row_variance_map.values(), dtype=float)
row_variance_min = float(np.nanmin(row_variance_values))
row_variance_max = float(np.nanmax(row_variance_values))

print(f"Row-variance range: [{row_variance_min:.3f}, {row_variance_max:.3f}]")

Total genes: 1053
Essential in matrix: 356
Uncharacterized in matrix: 18
Row-variance range: [0.001, 0.014]

# Run clustering/enrichment and prepare optional cluster labels

LINKAGE_METHOD = "ward"
LINKAGE_METRIC = "euclidean"
LINKAGE_THRESHOLD = 16
OPTIMAL_ORDERING = True

matrix = Matrix(DF)
annotations = Annotations(go_bp, matrix)

analysis = (
    Analysis(matrix, annotations)
    .cluster(
        linkage_method=LINKAGE_METHOD,
        linkage_metric=LINKAGE_METRIC,
        linkage_threshold=LINKAGE_THRESHOLD,
        optimal_ordering=OPTIMAL_ORDERING,
        min_cluster_size=30,
    )
    .enrich(min_overlap=2)
    .finalize(col_cluster=True)
)

results = analysis.results

# Keep significant terms
results_sig = results.filter("qval <= 0.05")

# Optional post-hoc label table for inspection/export
cluster_labels = results_sig.cluster_labels(
    rank_by="p",
    label_mode="top_term",
    max_words=6,
)

print(f"All enriched rows: {len(results.df):,}")
print(f"Significant rows (q<=0.05): {len(results_sig.df):,}")
print(cluster_labels.head())

/Users/irahorecka/Desktop/harddrive_desktop/PhD/University of Toronto/Rost Lab/GitHub/himalayas/src/himalayas/core/annotations.py:72: RuntimeWarning: Dropped 264/1095 annotations with no overlap to matrix labels
  warn(

All enriched rows: 709
Significant rows (q<=0.05): 331
   cluster                                              label          pval  \
0        1                    GPI anchor biosynthetic process  3.222730e-12   
1        2                         vesicle-mediated transport  1.237408e-26   
2        3                     mRNA splicing, via spliceosome  2.829998e-16   
3        4                            cytoplasmic translation  8.925220e-15   
4        5  mitochondrial respiratory chain complex IV ass...  1.794876e-19   

           qval         score    n  \
0  8.788137e-11  3.222730e-12  148   
1  4.386613e-24  1.237408e-26   92   
2  1.433192e-14  2.829998e-16  263   
3  3.954988e-13  8.925220e-15  358   
4  1.590709e-17  1.794876e-19   78   

                                                term  
0                    GPI anchor biosynthetic process  
1                         vesicle-mediated transport  
2                     mRNA splicing, via spliceosome  
3                            cytoplasmic translation  
4  mitochondrial respiratory chain complex IV ass...

# Results table
display(results.df.head(5), results.df.shape)

# Significant subset
display(results_sig.df.head(5), results_sig.df.shape)

# Cluster sizes and example membership
display(results.clusters.cluster_sizes)
example_cluster = int(results.clusters.unique_clusters[0])
display(sorted(results.clusters.cluster_to_labels[example_cluster])[:10])

# Top terms for the example cluster
display(results_sig.df.query("cluster == @example_cluster").sort_values("pval").head(5))

# Label -> cluster ID lookup
example_label = results.matrix.labels[0]
display(results.clusters.label_to_cluster[example_label])

# Method and key parameters
display(results.method)
display(results.params)

(709, 8)

(331, 8)

{1: 148, 6: 52, 2: 92, 4: 358, 5: 78, 7: 62, 3: 263}

['ACK1',
 'ADH1',
 'ALG14',
 'ALG2',
 'ALG3',
 'ALG5',
 'ALG6',
 'ALG8',
 'ARC15',
 'ARC18']

1

'hypergeom'

{'linkage_threshold': 16.0}

# Configure and render the full plot

LABEL_COLOR = "black"
BACKGROUND_COLOR = "white"

vals = matrix.values
mask = np.isfinite(vals) & (vals != 0)
vlim = float(np.percentile(np.abs(vals[mask]), 99))

plotter = (
    Plotter(results_sig)
    # Build core layout and matrix
    .set_background(color=BACKGROUND_COLOR)
    .plot_title(
        "HiMaLAYAS - Yeast Genetic Interaction Similarity Matrix",
        color=LABEL_COLOR,
        fontsize=17,
    )
    .plot_dendrogram(
        axes=[0.06, 0.16, 0.09, 0.79],
        data_pad=0.5,
        color="#888888",
        linewidth=0.75,
    )
    .plot_matrix(
        cmap="RdBu_r",
        center=0,
        vmin=-vlim,
        vmax=vlim,
        outer_lw=0,
        figsize=(14, 10),
        subplots_adjust={"left": 0.15, "right": 0.62, "bottom": 0.16, "top": 0.95},
    )
    .plot_matrix_axis_labels(
        xlabel="Gene",
        ylabel="Gene",
        fontsize=16,
        font="DejaVu Sans",
        color=LABEL_COLOR,
        xlabel_pad=6.0,
        ylabel_pad=0.007,
    )
    .set_label_panel(
        axes=[0.62, 0.16, 0.36, 0.79],
        gutter_color=BACKGROUND_COLOR,
        text_pad=0.02,
    )
    .plot_cluster_labels(
        rank_by="p",  # Set rank_by="q" to rank by q-values instead.
        label_mode="top_term",
        max_words=24,
        wrap_text=True,
        wrap_width=40,
        overflow="wrap",
        font="DejaVu Sans",
        fontsize=17,
        color=LABEL_COLOR,
        skip_unlabeled=False,
        placeholder_text="—",
        placeholder_color="#b22222",
        placeholder_alpha=0.6,
        label_fields=("label", "p"),
        boundary_color=LABEL_COLOR,
        boundary_lw=1,
        boundary_alpha=0.8,
        dendro_boundary_alpha=0.0,
        label_sep_xmin=None,
        label_sep_xmax=0.5,
        label_sep_color=LABEL_COLOR,
        label_sep_lw=1,
        label_sep_alpha=0.4,
    )
    # Set label rails and bar-label styles
    .plot_label_bar(
        values=gene_essential_map,
        mode="categorical",
        colors=gene_essential_colors,
        width=0.04,
        left_pad=0.06,
        right_pad=0.0,
        name="essentiality",
        title="Essential",
    )
    .plot_label_bar(
        values=gene_characterization_map,
        mode="categorical",
        colors=gene_characterization_colors,
        width=0.04,
        left_pad=0.02,
        right_pad=0.0,
        name="characterization",
        title="Uncharacterized",
    )
    .plot_label_bar(
        values=row_variance_map,
        mode="continuous",
        cmap="Greens",
        vmin=row_variance_min,
        vmax=row_variance_max,
        width=0.04,
        left_pad=0.02,
        right_pad=0.0,
        name="row_variance",
        title="Row variance",
    )
    .plot_cluster_bar(
        norm=Normalize(0, 30),
        width=0.04,
        left_pad=0.02,
        right_pad=0.00,
        name="sigbar",
        title="Enrichment",
    )
    .plot_bar_labels(
        font="DejaVu Sans",
        fontsize=14,
        color=LABEL_COLOR,
        pad=4,
        rotation=90,
    )
    .set_label_track_order(("essentiality", "characterization", "row_variance", "sigbar"))
    # Set legend style
    .add_colorbar(
        name="matrix",
        cmap="RdBu_r",
        norm=Normalize(-vlim, vlim),
        label="Profile similarity (PCC)",
        ticks=[-vlim, 0, vlim],
    )
    .add_colorbar(
        name="row_variance",
        cmap="Greens",
        norm=Normalize(row_variance_min, row_variance_max),
        label="Row variance",
        ticks=[row_variance_min, row_variance_max],
    )
    .add_colorbar(
        name="enrichment",
        cmap="YlOrBr",
        norm=Normalize(0, 30),
        label=r"Enrichment ($-\log_{10}p$)",
        ticks=[0, 10, 20, 30],
    )
    .plot_colorbars(
        ncols=2,
        height=0.13,
        gap=0.06,
        hpad=0.06,
        vpad=0.07,
        fontsize=14,
        font="DejaVu Sans",
        color=LABEL_COLOR,
        border_color=LABEL_COLOR,
        border_width=1.0,
        border_alpha=0.9,
        tick_decimals=3,
    )
)

plotter.show()

from himalayas.plot import plot_dendrogram_condensed

condensed = plot_dendrogram_condensed(
    results_sig,
    rank_by="p",
    label_mode="top_term",
    figsize=(4, 8),
    sigbar_cmap="YlOrBr",
    sigbar_min_logp=0.0,
    sigbar_max_logp=30.0,
    fontsize=18,
    font="DejaVu Sans",
    max_words=24,
    wrap_text=True,
    wrap_width=34,
    overflow="ellipsis",
    omit_words=(),
    label_fields=("label", "n", "p", "q"),
    label_color=LABEL_COLOR,
    placeholder_text="—",
    placeholder_color="#b22222",
    placeholder_alpha=0.6,
    skip_unlabeled=False,
    label_left_pad=0.06,
    dendrogram_color="#888888",
    dendrogram_lw=1.5,
    background_color=BACKGROUND_COLOR,
)

condensed.show()

# Define a helper to run zoomed cluster analysis


def run_zoom_analysis(
    *,
    results,
    cluster_id,
    go_bp,
    linkage_threshold,
    linkage_method="ward",
    linkage_metric="euclidean",
    background_matrix,
    optimal_ordering=True,
    min_cluster_size=6,
    min_overlap=2,
    qval_cutoff=0.05,
):
    """
    Runs a localized re-clustering and GO BP enrichment analysis within a selected cluster and
    returns zoomed matrix, results, and filtered results.
    """
    zoom_view = results.subset(cluster=cluster_id)
    zoom_matrix = zoom_view.matrix
    zoom_annotations = Annotations(go_bp, zoom_matrix)

    zoom_analysis = (
        Analysis(zoom_matrix, zoom_annotations)
        .cluster(
            linkage_method=linkage_method,
            linkage_metric=linkage_metric,
            linkage_threshold=linkage_threshold,
            optimal_ordering=optimal_ordering,
            min_cluster_size=min_cluster_size,
        )
        .enrich(min_overlap=min_overlap, background=background_matrix)
        .finalize(col_cluster=True)
    )
    zoom_results = zoom_analysis.results
    zoom_results_sig = zoom_results.filter(f"qval <= {qval_cutoff}")
    return zoom_matrix, zoom_results, zoom_results_sig

# Run zoomed analysis and render the zoomed plot

CLUSTER_ID = 4
ZOOM_LINKAGE_THRESHOLD = 8

zoom_matrix, zoom_results, zoom_results_sig = run_zoom_analysis(
    results=results,
    cluster_id=CLUSTER_ID,
    go_bp=go_bp,
    linkage_threshold=ZOOM_LINKAGE_THRESHOLD,
    linkage_method=LINKAGE_METHOD,
    linkage_metric=LINKAGE_METRIC,
    optimal_ordering=OPTIMAL_ORDERING,
    background_matrix=matrix,
    min_cluster_size=6,
)

vals = zoom_matrix.values
mask = np.isfinite(vals) & (vals != 0)
vlim = float(np.percentile(np.abs(vals[mask]), 99))

zoom_row_variance_map = zoom_matrix.df.var(axis=1).astype(float).to_dict()
zoom_row_variance_values = np.fromiter(zoom_row_variance_map.values(), dtype=float)
zoom_row_variance_min = float(np.nanmin(zoom_row_variance_values))
zoom_row_variance_max = float(np.nanmax(zoom_row_variance_values))

plotter = (
    Plotter(zoom_results_sig)
    .set_background(color=BACKGROUND_COLOR)
    .plot_title(
        f"Yeast Genetic Interaction Similarity Matrix (Cluster {CLUSTER_ID})",
        color=LABEL_COLOR,
        fontsize=15,
    )
    .plot_dendrogram(
        axes=[0.06, 0.16, 0.09, 0.79],
        data_pad=0.5,
        color="#888888",
        linewidth=0.75,
    )
    .plot_matrix(
        cmap="RdBu_r",
        center=0,
        vmin=-vlim,
        vmax=vlim,
        outer_lw=0,
        figsize=(12, 7),
        subplots_adjust={"left": 0.15, "right": 0.62, "bottom": 0.16, "top": 0.95},
    )
    .plot_matrix_axis_labels(
        xlabel="Gene",
        ylabel="Gene",
        fontsize=14,
        font="DejaVu Sans",
        color=LABEL_COLOR,
        xlabel_pad=6.0,
        ylabel_pad=0.007,
    )
    .set_label_panel(
        axes=[0.62, 0.16, 0.36, 0.79],
        gutter_color=BACKGROUND_COLOR,
        text_pad=0.02,
    )
    .plot_cluster_labels(
        rank_by="p",
        label_mode="top_term",
        max_words=24,
        wrap_text=True,
        wrap_width=40,
        overflow="wrap",
        font="DejaVu Sans",
        fontsize=12,
        color=LABEL_COLOR,
        label_fields=("label", "p"),
        boundary_color=LABEL_COLOR,
        boundary_lw=1,
        boundary_alpha=0.8,
        dendro_boundary_alpha=0.0,
        label_sep_xmin=None,
        label_sep_xmax=0.5,
        label_sep_color=LABEL_COLOR,
        label_sep_lw=1,
        label_sep_alpha=0.4,
    )
    .plot_label_bar(
        values=gene_essential_map,
        mode="categorical",
        colors=gene_essential_colors,
        left_pad=0.06,
        width=0.04,
        right_pad=0.0,
        name="essentiality",
        title="Essential",
    )
    .plot_label_bar(
        values=gene_characterization_map,
        mode="categorical",
        colors=gene_characterization_colors,
        width=0.04,
        left_pad=0.02,
        right_pad=0.0,
        name="characterization",
        title="Uncharacterized",
    )
    .plot_label_bar(
        values=zoom_row_variance_map,
        mode="continuous",
        cmap="Greens",
        vmin=zoom_row_variance_min,
        vmax=zoom_row_variance_max,
        width=0.04,
        left_pad=0.02,
        right_pad=0.0,
        name="row_variance",
        title="Row variance",
    )
    .plot_cluster_bar(
        norm=Normalize(0, 20),
        name="sigbar",
        title="Enrichment",
        width=0.04,
        left_pad=0.02,
        right_pad=0.0,
    )
    .plot_bar_labels(
        font="DejaVu Sans",
        fontsize=12,
        color=LABEL_COLOR,
        pad=4,
        rotation=90,
    )
    .set_label_track_order(("essentiality", "characterization", "row_variance", "sigbar"))
    .add_colorbar(
        name="matrix",
        cmap="RdBu_r",
        norm=Normalize(-vlim, vlim),
        label="Profile similarity (PCC)",
        ticks=[-vlim, 0, vlim],
    )
    .add_colorbar(
        name="row_variance",
        cmap="Greens",
        norm=Normalize(zoom_row_variance_min, zoom_row_variance_max),
        label="Row variance",
        ticks=[zoom_row_variance_min, zoom_row_variance_max],
    )
    .add_colorbar(
        name="enrichment",
        cmap="YlOrBr",
        norm=Normalize(0, 20),
        label=r"Enrichment ($-\log_{10}p$)",
        ticks=[0, 10, 20],
    )
    .plot_colorbars(
        ncols=2,
        height=0.16,
        gap=0.08,
        hpad=0.06,
        vpad=0.09,
        fontsize=11,
        font="DejaVu Sans",
        color=LABEL_COLOR,
        border_color=LABEL_COLOR,
        border_width=1.0,
        border_alpha=0.9,
        tick_decimals=3,
    )
)

plotter.show()

/Users/irahorecka/Desktop/harddrive_desktop/PhD/University of Toronto/Rost Lab/GitHub/himalayas/src/himalayas/core/annotations.py:72: RuntimeWarning: Dropped 658/1095 annotations with no overlap to matrix labels
  warn(

# Run nested zoom analysis and render the subcluster plot

PARENT_CLUSTER_ID = 4
SUBCLUSTER_ID = 1
SUBCLUSTER_LINKAGE_THRESHOLD = 4
SUBCLUSTER_PATH = f"{PARENT_CLUSTER_ID}.{SUBCLUSTER_ID}"

subcluster_matrix, subcluster_results, subcluster_results_sig = run_zoom_analysis(
    results=zoom_results,
    cluster_id=SUBCLUSTER_ID,
    go_bp=go_bp,
    linkage_threshold=SUBCLUSTER_LINKAGE_THRESHOLD,
    linkage_method=LINKAGE_METHOD,
    linkage_metric=LINKAGE_METRIC,
    optimal_ordering=OPTIMAL_ORDERING,
    min_cluster_size=2,
    background_matrix=matrix,
)

vals = subcluster_matrix.values
mask = np.isfinite(vals) & (vals != 0)
vlim = float(np.percentile(np.abs(vals[mask]), 99))

subcluster_row_variance_map = subcluster_matrix.df.var(axis=1).astype(float).to_dict()
subcluster_row_variance_values = np.fromiter(subcluster_row_variance_map.values(), dtype=float)
subcluster_row_variance_min = float(np.nanmin(subcluster_row_variance_values))
subcluster_row_variance_max = float(np.nanmax(subcluster_row_variance_values))

plotter = (
    Plotter(subcluster_results_sig)
    .set_background(color=BACKGROUND_COLOR)
    .plot_title(
        f"Yeast Genetic Interaction Similarity Matrix (Cluster {SUBCLUSTER_PATH})",
        color=LABEL_COLOR,
        fontsize=14,
    )
    .plot_dendrogram(
        axes=[0.06, 0.16, 0.09, 0.79],
        data_pad=0.5,
        color="#888888",
        linewidth=0.75,
    )
    .plot_matrix(
        cmap="RdBu_r",
        center=0,
        vmin=-vlim,
        vmax=vlim,
        outer_lw=0,
        figsize=(12, 7),
        subplots_adjust={"left": 0.15, "right": 0.62, "bottom": 0.16, "top": 0.95},
    )
    .plot_matrix_axis_labels(
        xlabel="Gene",
        ylabel="Gene",
        fontsize=14,
        font="DejaVu Sans",
        color=LABEL_COLOR,
        xlabel_pad=6.0,
        ylabel_pad=0.06,
    )
    .plot_row_ticks(
        max_labels=60,
        fontsize=11,
        position="right",
    )
    .set_label_panel(
        axes=[0.62, 0.16, 0.36, 0.79],
        gutter_color=BACKGROUND_COLOR,
        text_pad=0.02,
    )
    .plot_label_bar(
        values=gene_essential_map,
        mode="categorical",
        colors=gene_essential_colors,
        left_pad=0.22,
        width=0.04,
        right_pad=0.0,
        name="essentiality",
        title="Essential",
    )
    .plot_label_bar(
        values=gene_characterization_map,
        mode="categorical",
        colors=gene_characterization_colors,
        width=0.04,
        left_pad=0.02,
        right_pad=0.0,
        name="characterization",
        title="Uncharacterized",
    )
    .plot_label_bar(
        values=subcluster_row_variance_map,
        mode="continuous",
        cmap="Greens",
        vmin=subcluster_row_variance_min,
        vmax=subcluster_row_variance_max,
        width=0.04,
        left_pad=0.02,
        right_pad=0.0,
        name="row_variance",
        title="Row variance",
    )
    .plot_bar_labels(
        font="DejaVu Sans",
        fontsize=12,
        color=LABEL_COLOR,
        pad=4,
        rotation=90,
    )
    .set_label_track_order(("essentiality", "characterization", "row_variance"))
    .add_colorbar(
        name="matrix",
        cmap="RdBu_r",
        norm=Normalize(-vlim, vlim),
        label="Profile similarity (PCC)",
        ticks=[-vlim, 0, vlim],
    )
    .add_colorbar(
        name="row_variance",
        cmap="Greens",
        norm=Normalize(subcluster_row_variance_min, subcluster_row_variance_max),
        label="Row variance",
        ticks=[subcluster_row_variance_min, subcluster_row_variance_max],
    )
    .plot_colorbars(
        ncols=2,
        height=0.035,
        gap=0.08,
        hpad=0.06,
        vpad=0.09,
        fontsize=11,
        font="DejaVu Sans",
        color=LABEL_COLOR,
        border_color=LABEL_COLOR,
        border_width=1.0,
        border_alpha=0.9,
        tick_decimals=3,
    )
)

plotter.show()

/Users/irahorecka/Desktop/harddrive_desktop/PhD/University of Toronto/Rost Lab/GitHub/himalayas/src/himalayas/core/annotations.py:72: RuntimeWarning: Dropped 1062/1095 annotations with no overlap to matrix labels
  warn(

	GAA1	GPI18	RFA1	COP1	COG6	KRE5	GPI8	GPI16	YPT1	COG5	...	TUB3	HOG1	ABM1	VPS53	ALE2	MST27	CUE3	TAF7	MPC3	RAD27
GAA1	0.000	0.368	-0.114	0.269	0.108	0.513	0.5195	0.672	0.105	0.177	...	-0.076	0.006	0.051	0.000	-0.044	0.019	0.152	0.090	0.000	-0.111
GPI18	0.368	0.000	-0.151	0.252	0.254	0.483	0.3445	0.453	0.216	0.236	...	-0.101	0.116	-0.026	0.000	-0.089	-0.076	0.060	0.150	0.000	-0.090
RFA1	-0.114	-0.151	0.000	-0.177	-0.250	-0.220	-0.1685	-0.165	-0.232	-0.221	...	-0.042	0.076	-0.062	0.000	-0.043	-0.004	-0.041	-0.100	0.000	0.218
COP1	0.269	0.252	-0.177	0.002	0.454	0.224	0.3715	0.307	0.422	0.440	...	-0.056	0.024	0.031	0.193	-0.001	-0.015	0.083	0.000	-0.004	-0.088
COG6	0.108	0.254	-0.250	0.454	0.002	0.148	0.2290	0.187	0.523	0.788	...	-0.010	0.047	0.038	0.180	0.006	-0.046	0.088	-0.012	-0.033	-0.060

Welcome to HiMaLAYAS Quickstart¶

Load GO BP Annotations¶

Load the Matrix¶

Essential and Uncharacterized Gene Rails¶

Cluster and Enrich¶

Inspect Results and Clusters¶

Plot the Annotated Matrix¶

Condensed Dendrogram¶

Nested Zoom Workflow¶

Plot the Zoomed Matrix¶

Plot a Nested Subcluster¶

	cluster	term	k	K	n	N	pval	qval
0	6	DNA replication	34	43	52	1053	1.798541e-42	1.275166e-39
1	2	vesicle-mediated transport	33	48	92	1053	1.237408e-26	4.386613e-24
2	2	endoplasmic reticulum to Golgi vesicle-mediate...	24	31	92	1053	3.139344e-21	7.079147e-19
3	6	DNA repair	23	44	52	1053	3.993877e-21	7.079147e-19
4	7	cell division	26	54	62	1053	2.138848e-20	2.707850e-18

	cluster	term	k	K	n	N	pval	qval
25	1	GPI anchor biosynthetic process	18	23	148	1053	3.222730e-12	8.788137e-11
28	1	fungal-type cell wall organization	18	25	148	1053	3.570637e-11	8.729591e-10
30	1	cell wall organization	15	18	148	1053	4.866293e-11	1.112968e-09
47	1	protein N-linked glycosylation	13	17	148	1053	7.445657e-09	1.099786e-07
50	1	dolichol-linked oligosaccharide biosynthetic p...	9	9	148	1053	1.728488e-08	2.402937e-07