Source code for BLRun.grnvbemRunner
import os
import pandas as pd
from BLRun.runner import Runner
[docs]class GRNVBEMRunner(Runner):
"""Concrete runner for the GRN-VBEM GRN inference algorithm."""
[docs] def generateInputs(self):
'''
Function to generate desired inputs for GRNVBEM.
It will create the input folder at self.working_dir if it
does not exist already. The input folder will contain an ExpressionData.csv with
cells ordered according to the pseudotime along the columns, and genes along
the rows. If the files already exist, this function will overwrite it.
'''
ExpressionData = pd.read_csv(self.input_dir / self.exprData,
header = 0, index_col = 0)
PTData = pd.read_csv(self.input_dir / self.pseudoTimeData,
header = 0, index_col = 0)
colNames = PTData.columns
for idx in range(len(colNames)):
# Select cells belonging to each pseudotime trajectory
colName = colNames[idx]
index = PTData[colName].index[PTData[colName].notnull()]
exprName = "ExpressionData"+str(idx)+".csv"
subPT = PTData.loc[index,:]
subExpr = ExpressionData[index]
# Order columns by PseudoTime
newExpressionData = subExpr[subPT.sort_values([colName]).index.astype(str)]
newExpressionData.insert(loc = 0, column = 'GENES', \
value = newExpressionData.index)
# Write .csv file
newExpressionData.to_csv(self.working_dir / exprName,
sep = ',', header = True, index = False)
[docs] def run(self):
'''
Function to run GRN-VBEM algorithm
'''
PTData = pd.read_csv(self.input_dir / self.pseudoTimeData,
header = 0, index_col = 0)
colNames = PTData.columns
for idx in range(len(colNames)):
cmdToRun = ' '.join(['docker run --rm',
f"-v {self.working_dir}:/usr/working_dir",
f'{self.image} /bin/sh -c \"time -v -o',
"/usr/working_dir/time" + str(idx) + ".txt",
'./GRNVBEM',
"/usr/working_dir/ExpressionData" + str(idx) + ".csv",
"/usr/working_dir/outFile" + str(idx) + ".txt", '\"'])
self._run_docker(cmdToRun, append=(idx > 0))
[docs] def parseOutput(self):
'''
Function to parse outputs from GRNVBEM.
'''
workDir = self.working_dir
PTData = pd.read_csv(self.input_dir / self.pseudoTimeData,
header = 0, index_col = 0)
colNames = PTData.columns
OutSubDF = [0]*len(colNames)
for indx in range(len(colNames)):
outFileName = 'outFile'+str(indx)+'.txt'
# Quit if output file does not exist
if not (workDir / outFileName).exists():
print(str(workDir / outFileName) + ' does not exist, skipping...')
return
# Read output
OutSubDF[indx] = pd.read_csv(workDir / outFileName, sep = '\t', header = 0)
outDF = pd.concat(OutSubDF)
FinalDF = outDF[outDF['Probability'] == outDF.groupby(['Parent','Child'])['Probability'].transform('max')]
self._write_ranked_edges(
FinalDF.sort_values('Probability', ascending=False).rename(
columns={'Parent': 'Gene1', 'Child': 'Gene2', 'Probability': 'EdgeWeight'}
)[['Gene1', 'Gene2', 'EdgeWeight']]
)