Source code for BLEval.computeSpearman

import os
import yaml
import argparse
import itertools
import numpy as np
import pandas as pd
import networkx as nx
from tqdm import tqdm
import multiprocessing
from pathlib import Path
import concurrent.futures
from itertools import permutations
from collections import defaultdict
from multiprocessing import Pool, cpu_count
from networkx.convert_matrix import from_pandas_adjacency

[docs]def Spearman(evalObject, algorithmName):
    """
    A function to compute median pairwirse Spearman correlation
    of predicted ranked edges, i.e., the outputs of different datasets
    generated from the same reference network, for a given algorithm.
    

    :param evalObject: An object of class :class:`BLEval.BLEval`.
    :type evalObject: BLEval
      
    :param algorithmName: Name of the algorithm for which the Spearman correlation is computed.
    :type algorithmName: str
      
      
    :returns:
        - median: Median of Spearman correlation values
        - mad: Median Absolute Deviation of  the Spearman correlation values
    """

    rankDict = {}
    sim_names = []
    for dataset in tqdm(evalObject.input_settings.datasets):
        trueEdgesDF = pd.read_csv(str(evalObject.input_settings.datadir)+'/'+ \
                      dataset['name'] + '/' +\
                      dataset['trueEdges'], sep = ',',
                      header = 0, index_col = None)
        possibleEdges = list(permutations(np.unique(trueEdgesDF.loc[:,['Gene1','Gene2']]),
                                     r = 2))
        PredEdgeDict = {'|'.join(p):0 for p in possibleEdges}

        outDir = str(evalObject.output_settings.base_dir) + \
                 str(evalObject.input_settings.datadir).split("inputs")[1] + \
                 "/" + dataset["name"] + "/" + algorithmName
        #algos = evalObject.input_settings.algorithms
        rank_path = outDir+"/rankedEdges.csv"
        if not os.path.isdir(outDir):
            continue
        try:
            predEdgeDF = pd.read_csv(rank_path, sep="\t", header=0, index_col=None)
        except:
            print("Skipping spearman computation for ", algorithmName, "on path", outDir)
            continue

        for key in PredEdgeDict.keys():
            subDF = predEdgeDF.loc[(predEdgeDF['Gene1'] == key.split('|')[0]) &
                           (predEdgeDF['Gene2'] == key.split('|')[1])]
            if len(subDF)>0:
                PredEdgeDict[key] = np.abs(subDF.EdgeWeight.values[0])
        rankDict[dataset["name"]] = PredEdgeDict
        sim_names.append(dataset["name"])
    df2 = pd.DataFrame.from_dict(rankDict)
    spearmanDF = df2.corr(method='spearman')


    df = spearmanDF.where(np.triu(np.ones(spearmanDF.shape),  k = 1).astype(np.bool))

    df = df.stack().reset_index()
    df.columns = ['Row','Column','Value']

    return(df.Value.median(),df.Value.mad())