moon_example.Rmd

---
title: "MOON analysis - example with phosphoproteomic and transcriptomic"
author: "Anne-Claire Kroger and Aurelien Dugourd"
date: "2023-09-28"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)

library(readr)
library(cosmosR)
library(decoupleR)
library(data.table)
library(pheatmap)
library(ggplot2)
library(GSEABase)
library(piano)

```

# Import the data

The example data provided here is the ouput of a MOFA analysis on for the glioblastoma (gbm) cohort of the cptac data (https://paynelab.github.io/cptac/). MOFA was run with the RNA, proteomics and phosphoproteomics.

```{r}
load("moon_example_data.RData")

phospho <- readRDS("gbm_mofaf15_weights_phospho.RDS")
#meta_network_filtered: the PKN oulled from Omnipath, filtered & cleaned
#gbm_weights_rna: rna weights output from MOFA analysis for all factors retrieved with the "get_weights" function

```

# Compute TF actvity from MOFA output with decoupleR
```{r}
#load TF targets from omnipath KSN
load("KSN.RData")
#run ulm algorithm
gbm_kinases <- decoupleR::run_ulm(phospho, 
                              KSN, 
                              .source='source', 
                              .target='target', 
                              minsize = 5) 
```

```{r}
gbm_kinases_df <- as.data.frame(dcast(as.data.table(gbm_kinases[,2:4]), source ~ condition, value.var = "score"))
rownames(gbm_kinases_df) <- gbm_kinases_df$source
gbm_kinases_df <- gbm_kinases_df[,-1]

gbm_kinases_top <- gbm_kinases_df[apply(gbm_kinases_df,1,function(x){max(abs(x)) > 7}),]

pheatmap(gbm_kinases_top, treeheight_row = 0, treeheight_col = 0)
```

# Compute TF actvity from MOFA output with decoupleR
```{r}
#load TF targets from collectri
# TFN <- decoupleR::get_collectri()
# save(TFN, file = "TFN.RData")
load("TFN.RData")
#run ulm algorithm
gbm_tf <- decoupleR::run_ulm(gbm_weights_rna, 
                              TFN, 
                              .source='source', 
                              .target='target', 
                              minsize = 5) 
```

```{r}
gbm_tf_df <- as.data.frame(dcast(as.data.table(gbm_tf[,2:4]), source ~ condition, value.var = "score"))
rownames(gbm_tf_df) <- gbm_tf_df$source
gbm_tf_df <- gbm_tf_df[,-1]

scores_tf_top <- gbm_tf_df[apply(gbm_tf_df,1,function(x){max(abs(x)) > 7}),]

pheatmap(scores_tf_top, treeheight_row = 0, treeheight_col = 0)
```

Here we can show how what the data behind a TF activty estimation actually looks like.
```{r}
factor_n <- "Factor2"

TFN_TF <- TFN[TFN$source == "REST",c(2,3)]
RNA_to_merge <- as.data.frame(gbm_weights_rna[,factor_n,drop = F])
RNA_to_merge$target <- row.names(RNA_to_merge)
TFN_TF <- merge(TFN_TF, RNA_to_merge)
names(TFN_TF)[3] <- "RNA"

addon <- as.data.frame(gbm_weights_rna[!(row.names(gbm_weights_rna) %in% TFN_TF$target),factor_n,drop = F])
addon$mor <- 0
addon$target <- row.names(addon)
names(addon)[1] <- "RNA"

TFN_TF <- as.data.frame(rbind(TFN_TF, addon))

ggplot(TFN_TF, aes(x = mor, y = RNA)) + geom_point() +
  stat_summary(fun.data= mean_cl_normal) +
  geom_smooth(method='lm', formula= y~x) +
  theme_minimal()
```

# Filter & compress the network according to our input TFs
```{r}
TF_data <- gbm_tf #TF activity results
RNA_data <- gbm_weights_rna #rna weights output from MOFA analysis 
factor_n <- "Factor1" #MOFA factor number for this analysis to run on 

n_steps <- 3 #number of connection layers to be considered

#RNA input for later filtering
RNA_input <- RNA_data[,factor_n]
names(RNA_input) <- rownames(RNA_data)

#TF input from MOFA as downstream input
TF_inputs_f <- TF_data[TF_data$condition == factor_n,]

#TF results for one factor as named vector
TF_inputs <- TF_inputs_f$score
names(TF_inputs) <- TF_inputs_f$source
downstream_inputs <- TF_inputs

#filter TFs (downstream input) for TFs actually in the PKN
downstream_inputs_filtered <- cosmosR:::filter_input_nodes_not_in_pkn(downstream_inputs, 
                                                                      meta_network_filtered)

#filter PKN for TFs that are downstream of our kinases we have in the network 
meta_network_filtered <- cosmosR:::keep_observable_neighbours(meta_network_filtered, 
                                                              n_steps, 
                                                              names(downstream_inputs_filtered))

#compress PKN in a way that nodes with the same children are united into one node (names get merges with "_")
meta_network_compressed_list <- compress_same_children(meta_network_filtered, 
                                                       sig_input = c("wig"=""), #give it an empty name vector
                                                       metab_input = downstream_inputs_filtered)
meta_network_compressed <- meta_network_compressed_list$compressed_network

#cleanup again to remove empty/ redunant connections
meta_network_compressed <- meta_network_cleanup(meta_network_compressed)

```

# Run MOON in interations
```{r}
meta_network_compressed_current <- meta_network_compressed

before <- 1
after <- 0
i <- 1
while (before != after & i < 10) {
  before <- length(meta_network_compressed_current[,1])
  MOON_res <- cosmosR::moon( downstream_input = downstream_inputs_filtered, 
                                                 meta_network = meta_network_compressed_current, 
                                                 n_layers = n_steps, 
                                                 statistic = "ulm") 
  
  meta_network_compressed_current <- filter_incohrent_TF_target(MOON_res, TFN, meta_network_compressed_current, RNA_input)
  after <- length(meta_network_compressed_current[,1])
  i <- i + 1
}

```
```{r}
scores_kin_top <- MOON_res[abs(MOON_res$score) > 4 & MOON_res$level != 0,]

ggplot(scores_kin_top, aes(x=reorder(as.factor(source), -score), y=score, fill= as.factor(level))) +
  geom_bar(stat = "identity") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  xlab("kinases/ nodes") +
  guides(fill=guide_legend(title="level"))

```
Decompress the network score results and save them as CSV for cytoscape.
```{r}
node_signatures <- meta_network_compressed_list$node_signatures
duplicated_parents <- meta_network_compressed_list$duplicated_signatures
duplicated_parents_df <- data.frame(duplicated_parents)
duplicated_parents_df$source_original <- row.names(duplicated_parents_df)
names(duplicated_parents_df)[1] <- "source"

addons <- data.frame(names(node_signatures)[-which(names(node_signatures) %in% duplicated_parents_df$source_original)]) 
names(addons)[1] <- "source"
addons$source_original <- addons$source

final_leaves <- meta_network_compressed_current[!(meta_network_compressed_current$target %in% meta_network_compressed_current$source),"target"]
final_leaves <- as.data.frame(cbind(final_leaves,final_leaves))
names(final_leaves) <- names(addons)

addons <- as.data.frame(rbind(addons,final_leaves))

mapping_table <- as.data.frame(rbind(duplicated_parents_df,addons))

MOON_res <- merge(MOON_res, mapping_table, by = "source")

#save the whole res for later
MOON_res <- MOON_res[,c(4,2,3)]

#Add the RNA data to the attribute table
RNA_data <- as.data.frame(gbm_weights_rna[,factor_n,drop = F])
RNA_data$source_original <- row.names(RNA_data)
RNA_data <- RNA_data[,c(2,1)]
names(RNA_data)[2] <- "RNA_data"

MOON_res <- merge(MOON_res, RNA_data, by = "source_original",all.x = T)

```

```{r}
SIF <- meta_network_filtered
names(SIF)[3] <- "sign"
write_csv(MOON_res, file = "results/node_attributes.csv")
write_csv(SIF, file = "results/SIF_network.csv")
```

Create a reduced soltuion network from a given upstream node.
```{r}
plot(density(MOON_res$score))

gbm_kinases_df_factor <- gbm_kinases_df[,factor_n]
names(gbm_kinases_df_factor) <- row.names(gbm_kinases_df)

#Selected the top signficant kinase activties for current factor
gbm_kinases_df_factor <- gbm_kinases_df_factor[abs(gbm_kinases_df_factor) > 2]

#Get the reduced solution network that connect TFs with consistent kinase activities
solution_network <- reduce_solution_network(decoupleRnival_res = MOON_res, 
                                            meta_network = meta_network_filtered,
                                            cutoff = 1, 
                                            upstream_input = gbm_kinases_df_factor, 
                                            RNA_input = RNA_input, 
                                            n_steps = n_steps)

SIF_reduced <- solution_network$SIF
names(SIF_reduced)[3] <- "sign"
ATT_reduced <- solution_network$ATT

#Exprot to csv and can be importated as network and attributes in cytoscape (see online tutorials to import csv networks in catoscape for visualisation)
write_csv(ATT_reduced, file = "results/ATT_reduced.csv")
write_csv(SIF_reduced, file = "results/SIF_reduced.csv")

```
# Pathway control analysis
```{r}
background_nodes <- unique(ATT_reduced$nodes)
```

```{r import_gmt function, include=FALSE}
import_gmt <- function(gmtfile, fast = T){
  if(fast)
  {
    genesets = GSEABase::getGmt(con = gmtfile)
    genesets = unlist(genesets)
    
    gene_to_term =plyr::ldply(genesets,function(geneset){
      temp <- geneIds(geneset)
      temp2 <- setName(geneset)
      temp3 <- as.data.frame(cbind(temp,rep(temp2,length(temp))))
      
    },.progress = plyr::progress_text())
    names(gene_to_term) <- c("gene","term")
    return(gene_to_term[complete.cases(gene_to_term),])
  }
  else
  {
    genesets = getGmt(con = gmtfile)
    genesets = unlist(genesets)
    
    gene_to_term <- data.frame(NA,NA)
    names(gene_to_term) <- c("gene","term")
    for (geneset in genesets)
    {
      temp <- geneIds(geneset)
      temp2 <- setName(geneset)
      temp3 <- as.data.frame(cbind(temp,rep(temp2,length(temp))))
      names(temp3) <- c("gene","term")
      gene_to_term <- rbind(gene_to_term,temp3)
    }
    
    return(gene_to_term[complete.cases(gene_to_term),])
  }
}

## Feature set
pathways_df <- data.frame(import_gmt("c2.cp.v2022.1.Hs.symbols.gmt"))

pathways_NABA_KEGG <- data.frame(pathways_df[grepl("NABA_",pathways_df$term) |  grepl("KEGG_",pathways_df$term),])
names(pathways_NABA_KEGG) <- c("target","source")
```

```{r,  include=FALSE}
top_nodes <- unique(ATT_reduced[abs(ATT_reduced$score) > 2, "nodes"])
pathways_NABA_KEGG <- pathways_NABA_KEGG[pathways_NABA_KEGG$target %in% background_nodes,]
names(SIF_reduced)[c(1:3)] <-  c("source", "target", "interaction")

pathway_control_set <- list()
i <- 1
for(node_of_interest in top_nodes)
{
  downstream_nodes <- unique(unlist(cosmosR:::keep_controllable_neighbours(SIF_reduced, n_steps = 2, input_nodes = node_of_interest)[,c(1,2)]))
  
  if(length(downstream_nodes) > 0)
  {
    downstream_nodes <- downstream_nodes[-which(downstream_nodes == node_of_interest)]
    downstream_nodes <- downstream_nodes[which(downstream_nodes %in% background_nodes)]
    if(length(downstream_nodes) > 0)
    {
      res_ORA <- as.data.frame(piano::runGSAhyper(genes = downstream_nodes, universe = background_nodes, gsc = piano::loadGSC(pathways_NABA_KEGG))$resTab)
      res_ORA$log2fold_ratio <- log2((res_ORA[,3]/(res_ORA[,3]+res_ORA[,4])) / (res_ORA[,5]/(res_ORA[,5]+res_ORA[,6])))
      res_ORA$node_of_interest <- node_of_interest
      res_ORA$pathway <- row.names(res_ORA)
  
      pathway_control_set[[i]] <- res_ORA 
      i <- i + 1
    }
  } 
}
pathway_control_set <- do.call(rbind,pathway_control_set)
```

```{r}
pathway_control_df <- reshape2::dcast(pathway_control_set, pathway~node_of_interest, value.var = "p-value")
row.names(pathway_control_df) <- pathway_control_df$pathway

pathway_control_df <- pathway_control_df[,-1]
pathway_control_df <- pathway_control_df[,apply(pathway_control_df, 2, function(x){min(x) < 0.1})]


```

We can then visualise the result of the pathway control analysis in a heatmap

```{r, fig.height=3.3, fig.width=9.5}
threshold_pval <- 0.0000001

pathway_control_df_top <- pathway_control_df[!grepl("CANCER",row.names(pathway_control_df)),]
pathway_control_df_top <- pathway_control_df_top[apply(pathway_control_df_top, 1, function(x){min(x) < threshold_pval}),apply(pathway_control_df_top, 2, function(x){min(x) < threshold_pval})]
pathway_control_df_top <- -log10(pathway_control_df_top)
# pathway_control_df_top[pathway_control_df_top < 3] <- NA
pathway_control_df_top[pathway_control_df_top >= 10] <- 10
pathway_control_df_top[pathway_control_df_top >= 5 & pathway_control_df_top < 10] <- 5
pathway_control_df_top[pathway_control_df_top >= 2 & pathway_control_df_top < 5] <- 2
pathway_control_df_top[pathway_control_df_top <2] <- 0

row.names(pathway_control_df_top) <- tolower(gsub("_"," ",gsub("KEGG","",row.names(pathway_control_df_top))))
names(pathway_control_df_top) <- gsub("Metab__","",gsub("_[a-z$]","",names(pathway_control_df_top)))
pheatmap::pheatmap(pathway_control_df_top, angle_col = 315, na_col = "grey", cluster_rows = T, cluster_cols = T, display_numbers = F, number_color = "black", color = colorRampPalette(c("white","red"))(100), treeheight_row = 0, treeheight_col = 0)
```