Source code for BLEval.computeSpearman

import os
import yaml
import argparse
import itertools
import numpy as np
import pandas as pd
import networkx as nx
from tqdm import tqdm
import multiprocessing
from pathlib import Path
import concurrent.futures
from itertools import permutations
from collections import defaultdict
from multiprocessing import Pool, cpu_count
from networkx.convert_matrix import from_pandas_adjacency

[docs]def Spearman(evalObject, algorithmName): """ A function to compute median pairwirse Spearman correlation of predicted ranked edges, i.e., the outputs of different datasets generated from the same reference network, for a given algorithm. :param evalObject: An object of class :class:`BLEval.BLEval`. :type evalObject: BLEval :param algorithmName: Name of the algorithm for which the Spearman correlation is computed. :type algorithmName: str :returns: - median: Median of Spearman correlation values - mad: Median Absolute Deviation of the Spearman correlation values """ rankDict = {} sim_names = [] for dataset in tqdm(evalObject.input_settings.datasets): trueEdgesDF = pd.read_csv(str(evalObject.input_settings.datadir)+'/'+ \ dataset['name'] + '/' +\ dataset['trueEdges'], sep = ',', header = 0, index_col = None) possibleEdges = list(permutations(np.unique(trueEdgesDF.loc[:,['Gene1','Gene2']]), r = 2)) PredEdgeDict = {'|'.join(p):0 for p in possibleEdges} outDir = str(evalObject.output_settings.base_dir) + \ str(evalObject.input_settings.datadir).split("inputs")[1] + \ "/" + dataset["name"] + "/" + algorithmName #algos = evalObject.input_settings.algorithms rank_path = outDir+"/rankedEdges.csv" if not os.path.isdir(outDir): continue try: predEdgeDF = pd.read_csv(rank_path, sep="\t", header=0, index_col=None) except: print("Skipping spearman computation for ", algorithmName, "on path", outDir) continue for key in PredEdgeDict.keys(): subDF = predEdgeDF.loc[(predEdgeDF['Gene1'] == key.split('|')[0]) & (predEdgeDF['Gene2'] == key.split('|')[1])] if len(subDF)>0: PredEdgeDict[key] = np.abs(subDF.EdgeWeight.values[0]) rankDict[dataset["name"]] = PredEdgeDict sim_names.append(dataset["name"]) df2 = pd.DataFrame.from_dict(rankDict) spearmanDF = df2.corr(method='spearman') df = spearmanDF.where(np.triu(np.ones(spearmanDF.shape), k = 1).astype(np.bool)) df = df.stack().reset_index() df.columns = ['Row','Column','Value'] return(df.Value.median(),df.Value.mad())