import pandas as pd
import numpy as np
import seaborn as sns
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(rc={"lines.linewidth": 2}, palette = "deep", style = "ticks")
from sklearn.metrics import precision_recall_curve, roc_curve, auc
from itertools import product, permutations, combinations, combinations_with_replacement
from tqdm import tqdm
import networkx as nx
[docs]def Motifs(datasetDict, inputSettings):
'''
Computes ratios of the counts of various network motifs
for each algorithm for a given dataset. The ratios are
computed by dividing the counts of various network motifs
in the predicted top-k network, to their respective values
in the reference network.
:param datasetDict: A dictionary containing the dataset name, path to reference network.
:type datasetDict: dict
:param inputSettings: An object of class :class:`BLEval.InputSettings`.
:type inputSettings: :class:`BLEval.InputSettings`
:returns:
- FBL: A dataframe containing ratios of three-node feedback loop motis
- FFL: A dataframe containing ratios of three-node feedforward loop motis
- MI: A dataframe containing ratios of two-node mutual interaction motis
'''
# Read file for trueEdges
trueEdgesDF = pd.read_csv(str(inputSettings.datadir)+'/'+ datasetDict['name'] +
'/' +datasetDict['trueEdges'],
sep = ',',
header = 0, index_col = None)
possibleEdges = list(permutations(np.unique(trueEdgesDF.loc[:,['Gene1','Gene2']]),
r = 2))
EdgeDict = {'|'.join(p):0 for p in possibleEdges}
refGraph = nx.DiGraph()
for key in EdgeDict.keys():
u = key.split('|')[0]
v = key.split('|')[1]
if len(trueEdgesDF.loc[(trueEdgesDF['Gene1'] == u) &
(trueEdgesDF['Gene2'] == v)])>0:
refGraph.add_edge(u,v)
numEdges = len(refGraph.edges())
refFB, refFF, refMI = getNetProp(refGraph)
# To avoid dividing by zero while computing the ratios
# set the motif counts in reference network to 1 if is 0
if refFB == 0:
refFB = 1
if refFF == 0:
refFF = 1
if refMI == 0:
refMI = 1
# set-up outDir that stores output directory name
outDir = "outputs/"+str(inputSettings.datadir).split("inputs/")[1]+ '/' + datasetDict['name']
dataDict = {}
# dataDict['Conn. Comp'] = {}
dataDict['FFL'] = {}
dataDict['FBL'] = {}
dataDict['Mutual'] = {}
for algo in tqdm(inputSettings.algorithms,
total = len(inputSettings.algorithms), unit = " Algorithms"):
if algo[0] == 'PPCOR' or algo[0] == 'PIDC':
continue
# check if the output rankedEdges file exists
if Path(outDir + '/' +algo[0]+'/rankedEdges.csv').exists():
# Initialize Precsion
predDF = pd.read_csv(outDir + '/' +algo[0]+'/rankedEdges.csv', \
sep = '\t', header = 0, index_col = None)
predDF = predDF.loc[(predDF['Gene1'] != predDF['Gene2'])]
predDF.drop_duplicates(keep = 'first', inplace=True)
predDF.reset_index(drop = True, inplace= True)
# check if ranked edges list is empty
# if so, it is just set to an empty set
if not predDF.shape[0] == 0:
# we want to ensure that we do not include
# edges without any edge weight
# so check if the non-zero minimum is
# greater than the edge weight of the top-kth
# node, else use the non-zero minimum value.
predDF.EdgeWeight = predDF.EdgeWeight.round(6)
predDF.EdgeWeight = predDF.EdgeWeight.abs()
# Use num True edges or the number of
# edges in the dataframe, which ever is lower
maxk = min(predDF.shape[0], numEdges)
edgeWeightTopk = predDF.iloc[maxk-1].EdgeWeight
nonZeroMin = np.nanmin(predDF.EdgeWeight.replace(0, np.nan).values)
bestVal = max(nonZeroMin, edgeWeightTopk)
newDF = predDF.loc[(predDF['EdgeWeight'] >= bestVal)]
predGraph = nx.DiGraph()
for key in EdgeDict.keys():
u = key.split('|')[0]
v = key.split('|')[1]
if len(newDF.loc[(newDF['Gene1'] == u) &
(newDF['Gene2'] == v)])>0:
predGraph.add_edge(u,v)
# dataDict['Conn. Comp'][algo[0]], dataDict['FBL'][algo[0]], dataDict['FFL'][algo[0]], dataDict['Mutual'][algo[0]] = getNetProp(predGraph)
dataDict['FBL'][algo[0]], dataDict['FFL'][algo[0]], dataDict['Mutual'][algo[0]] = getNetProp(predGraph)
dataDict['FBL'][algo[0]] = dataDict['FBL'][algo[0]]/refFB
dataDict['FFL'][algo[0]] = dataDict['FFL'][algo[0]]/refFF
dataDict['Mutual'][algo[0]] = dataDict['Mutual'][algo[0]]/refMI
else:
# no edges are predicted, set to 0!
dataDict['FBL'][algo[0]] = 0
dataDict['FFL'][algo[0]] = 0
dataDict['Mutual'][algo[0]] = 0
else:
print(outDir + '/' +algo[0]+'/rankedEdges.csv', \
' does not exist. Skipping...')
dataDict['FBL'][algo[0]] = 0
dataDict['FFL'][algo[0]] = 0
dataDict['Mutual'][algo[0]] = 0
dataDF = pd.DataFrame(dataDict)
return dataDF['FBL'], dataDF['FFL'], dataDF['Mutual']
[docs]def getNetProp(inGraph):
'''
A helper function to compute
counts of various network motifs.
:param inGraph: An graph object of class :class:`networkx.DiGraph`.
:type inGraph: :obj:networkx.DiGraph
:returns:
- A value corresponding to the number of three-node feedback loops
- A value corresponding to the number of three-node feedforward loops
- A value corresponding to the number of two-node mutual interaction
'''
# number of weakly connected components in
# reference network
# numCC = len(list(nx.weakly_connected_components(inGraph)))
# number of feedback loop
# in reference network
allCyc = nx.simple_cycles(inGraph)
cycSet = set()
for cyc in allCyc:
if len(cyc) == 3:
cycSet.add(frozenset(cyc))
numFB = len(cycSet)
# number of feedfwd loops
# in reference network
allPaths = []
allPathsSet = set()
for u,v in inGraph.edges():
allPaths = nx.all_simple_paths(inGraph, u, v, cutoff=2)
for p in allPaths:
if len(p) > 2:
allPathsSet.add(frozenset(p))
numFF= len(allPathsSet)
# number of mutual interactions
numMI = 0.0
for u,v in inGraph.edges():
if (v,u) in inGraph.edges():
numMI += 0.5
return numFB, numFF, numMI