Source code for BLEval.computeNetMotifs

import pandas as pd
import numpy as np
import seaborn as sns
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(rc={"lines.linewidth": 2}, palette  = "deep", style = "ticks")
from sklearn.metrics import precision_recall_curve, roc_curve, auc
from itertools import product, permutations, combinations, combinations_with_replacement
from tqdm import tqdm
import networkx as nx

[docs]def Motifs(datasetDict, inputSettings): ''' Computes ratios of the counts of various network motifs for each algorithm for a given dataset. The ratios are computed by dividing the counts of various network motifs in the predicted top-k network, to their respective values in the reference network. :param datasetDict: A dictionary containing the dataset name, path to reference network. :type datasetDict: dict :param inputSettings: An object of class :class:`BLEval.InputSettings`. :type inputSettings: :class:`BLEval.InputSettings` :returns: - FBL: A dataframe containing ratios of three-node feedback loop motis - FFL: A dataframe containing ratios of three-node feedforward loop motis - MI: A dataframe containing ratios of two-node mutual interaction motis ''' # Read file for trueEdges trueEdgesDF = pd.read_csv(str(inputSettings.datadir)+'/'+ datasetDict['name'] + '/' +datasetDict['trueEdges'], sep = ',', header = 0, index_col = None) possibleEdges = list(permutations(np.unique(trueEdgesDF.loc[:,['Gene1','Gene2']]), r = 2)) EdgeDict = {'|'.join(p):0 for p in possibleEdges} refGraph = nx.DiGraph() for key in EdgeDict.keys(): u = key.split('|')[0] v = key.split('|')[1] if len(trueEdgesDF.loc[(trueEdgesDF['Gene1'] == u) & (trueEdgesDF['Gene2'] == v)])>0: refGraph.add_edge(u,v) numEdges = len(refGraph.edges()) refFB, refFF, refMI = getNetProp(refGraph) # To avoid dividing by zero while computing the ratios # set the motif counts in reference network to 1 if is 0 if refFB == 0: refFB = 1 if refFF == 0: refFF = 1 if refMI == 0: refMI = 1 # set-up outDir that stores output directory name outDir = "outputs/"+str(inputSettings.datadir).split("inputs/")[1]+ '/' + datasetDict['name'] dataDict = {} # dataDict['Conn. Comp'] = {} dataDict['FFL'] = {} dataDict['FBL'] = {} dataDict['Mutual'] = {} for algo in tqdm(inputSettings.algorithms, total = len(inputSettings.algorithms), unit = " Algorithms"): if algo[0] == 'PPCOR' or algo[0] == 'PIDC': continue # check if the output rankedEdges file exists if Path(outDir + '/' +algo[0]+'/rankedEdges.csv').exists(): # Initialize Precsion predDF = pd.read_csv(outDir + '/' +algo[0]+'/rankedEdges.csv', \ sep = '\t', header = 0, index_col = None) predDF = predDF.loc[(predDF['Gene1'] != predDF['Gene2'])] predDF.drop_duplicates(keep = 'first', inplace=True) predDF.reset_index(drop = True, inplace= True) # check if ranked edges list is empty # if so, it is just set to an empty set if not predDF.shape[0] == 0: # we want to ensure that we do not include # edges without any edge weight # so check if the non-zero minimum is # greater than the edge weight of the top-kth # node, else use the non-zero minimum value. predDF.EdgeWeight = predDF.EdgeWeight.round(6) predDF.EdgeWeight = predDF.EdgeWeight.abs() # Use num True edges or the number of # edges in the dataframe, which ever is lower maxk = min(predDF.shape[0], numEdges) edgeWeightTopk = predDF.iloc[maxk-1].EdgeWeight nonZeroMin = np.nanmin(predDF.EdgeWeight.replace(0, np.nan).values) bestVal = max(nonZeroMin, edgeWeightTopk) newDF = predDF.loc[(predDF['EdgeWeight'] >= bestVal)] predGraph = nx.DiGraph() for key in EdgeDict.keys(): u = key.split('|')[0] v = key.split('|')[1] if len(newDF.loc[(newDF['Gene1'] == u) & (newDF['Gene2'] == v)])>0: predGraph.add_edge(u,v) # dataDict['Conn. Comp'][algo[0]], dataDict['FBL'][algo[0]], dataDict['FFL'][algo[0]], dataDict['Mutual'][algo[0]] = getNetProp(predGraph) dataDict['FBL'][algo[0]], dataDict['FFL'][algo[0]], dataDict['Mutual'][algo[0]] = getNetProp(predGraph) dataDict['FBL'][algo[0]] = dataDict['FBL'][algo[0]]/refFB dataDict['FFL'][algo[0]] = dataDict['FFL'][algo[0]]/refFF dataDict['Mutual'][algo[0]] = dataDict['Mutual'][algo[0]]/refMI else: # no edges are predicted, set to 0! dataDict['FBL'][algo[0]] = 0 dataDict['FFL'][algo[0]] = 0 dataDict['Mutual'][algo[0]] = 0 else: print(outDir + '/' +algo[0]+'/rankedEdges.csv', \ ' does not exist. Skipping...') dataDict['FBL'][algo[0]] = 0 dataDict['FFL'][algo[0]] = 0 dataDict['Mutual'][algo[0]] = 0 dataDF = pd.DataFrame(dataDict) return dataDF['FBL'], dataDF['FFL'], dataDF['Mutual']
[docs]def getNetProp(inGraph): ''' A helper function to compute counts of various network motifs. :param inGraph: An graph object of class :class:`networkx.DiGraph`. :type inGraph: :obj:networkx.DiGraph :returns: - A value corresponding to the number of three-node feedback loops - A value corresponding to the number of three-node feedforward loops - A value corresponding to the number of two-node mutual interaction ''' # number of weakly connected components in # reference network # numCC = len(list(nx.weakly_connected_components(inGraph))) # number of feedback loop # in reference network allCyc = nx.simple_cycles(inGraph) cycSet = set() for cyc in allCyc: if len(cyc) == 3: cycSet.add(frozenset(cyc)) numFB = len(cycSet) # number of feedfwd loops # in reference network allPaths = [] allPathsSet = set() for u,v in inGraph.edges(): allPaths = nx.all_simple_paths(inGraph, u, v, cutoff=2) for p in allPaths: if len(p) > 2: allPathsSet.add(frozenset(p)) numFF= len(allPathsSet) # number of mutual interactions numMI = 0.0 for u,v in inGraph.edges(): if (v,u) in inGraph.edges(): numMI += 0.5 return numFB, numFF, numMI