Source code for BLRun.leapRunner
import os
import pandas as pd
import numpy as np
from BLRun.runner import Runner
[docs]class LEAPRunner(Runner):
"""Concrete runner for the LEAP GRN inference algorithm."""
[docs] def generateInputs(self):
'''
Function to generate desired inputs for LEAP.
If the folder/files under self.input_dir exist,
this function will not do anything.
'''
ExpressionData = pd.read_csv(self.input_dir / self.exprData,
header = 0, index_col = 0)
PTData = pd.read_csv(self.input_dir / self.pseudoTimeData,
header = 0, index_col = 0)
colNames = PTData.columns
for idx in range(len(colNames)):
# Select cells belonging to each pseudotime trajectory
colName = colNames[idx]
index = PTData[colName].index[PTData[colName].notnull()]
exprName = "ExpressionData"+str(idx)+".csv"
subPT = PTData.loc[index,:]
subExpr = ExpressionData[index]
# Order columns by PseudoTime
newExpressionData = subExpr[subPT.sort_values([colName]).index.astype(str)]
newExpressionData.insert(loc = 0, column = 'GENES', \
value = newExpressionData.index)
# Write .csv file
newExpressionData.to_csv(self.working_dir / exprName,
sep = ',', header = True, index = False)
[docs] def run(self):
'''
Function to run LEAP algorithm
Requires the maxLag parameter
'''
maxLag = str(self.params['maxLag'])
PTData = pd.read_csv(self.input_dir / self.pseudoTimeData,
header = 0, index_col = 0)
colNames = PTData.columns
for idx in range(len(colNames)):
cmdToRun = ' '.join(['docker run --rm',
f"-v {self.working_dir}:/usr/working_dir",
f'{self.image} /bin/sh -c \"time -v -o',
"/usr/working_dir/time" + str(idx) + ".txt",
'Rscript runLeap.R',
"/usr/working_dir/ExpressionData" + str(idx) + ".csv",
maxLag,
"/usr/working_dir/outFile" + str(idx) + ".txt", '\"'])
self._run_docker(cmdToRun, append=(idx > 0))
[docs] def parseOutput(self):
'''
Function to parse outputs from LEAP.
'''
workDir = self.working_dir
PTData = pd.read_csv(self.input_dir / self.pseudoTimeData,
header = 0, index_col = 0)
colNames = PTData.columns
OutSubDF = [0]*len(colNames)
for indx in range(len(colNames)):
outFileName = 'outFile'+str(indx)+'.txt'
# Quit if output file does not exist
if not (workDir / outFileName).exists():
print(str(workDir / outFileName) + ' does not exist, skipping...')
return
# Read output
OutSubDF[indx] = pd.read_csv(workDir / outFileName, sep = '\t', header = 0)
OutSubDF[indx].Score = np.abs(OutSubDF[indx].Score)
outDF = pd.concat(OutSubDF)
FinalDF = outDF[outDF['Score'] == outDF.groupby(['Gene1','Gene2'])['Score'].transform('max')]
self._write_ranked_edges(FinalDF.sort_values('Score', ascending=False).rename(
columns={'Score': 'EdgeWeight'}
)[['Gene1', 'Gene2', 'EdgeWeight']])