Source code for BLRun.grnvbemRunner
import os
import pandas as pd
from pathlib import Path
import numpy as np
[docs]def generateInputs(RunnerObj):
'''
Function to generate desired inputs for GRNVBEM.
It will create the input folder at RunnerObj.dataset.name/GRNVBEM/ if it
does not exist already. The input folder will contain an ExpressionData.csv with
cells ordered according to the pseudotime along the columns, and genes along
the rows. If the files already exist, this function will overwrite it.
'''
if not RunnerObj.inputDir.joinpath("GRNVBEM").exists():
print("Input folder for GRNVBEM does not exist, creating input folder...")
RunnerObj.inputDir.joinpath("GRNVBEM").mkdir(exist_ok = False)
ExpressionData = pd.read_csv(RunnerObj.inputDir.joinpath(RunnerObj.exprData),
header = 0, index_col = 0)
PTData = pd.read_csv(RunnerObj.inputDir.joinpath(RunnerObj.cellData),
header = 0, index_col = 0)
colNames = PTData.columns
for idx in range(len(colNames)):
# Select cells belonging to each pseudotime trajectory
colName = colNames[idx]
index = PTData[colName].index[PTData[colName].notnull()]
exprName = "GRNVBEM/ExpressionData"+str(idx)+".csv"
subPT = PTData.loc[index,:]
subExpr = ExpressionData[index]
# Order columns by PseudoTime
newExpressionData = subExpr[subPT.sort_values([colName]).index.astype(str)]
newExpressionData.insert(loc = 0, column = 'GENES', \
value = newExpressionData.index)
# Write .csv file
newExpressionData.to_csv(RunnerObj.inputDir.joinpath(exprName),
sep = ',', header = True, index = False)
[docs]def run(RunnerObj):
'''
Function to run GRN-VBEM algorithm
'''
inputPath = "data" + str(RunnerObj.inputDir).split(str(Path.cwd()))[1]
# make output dirs if they do not exist:
outDir = "outputs/"+str(RunnerObj.inputDir).split("inputs/")[1]+"/GRNVBEM/"
os.makedirs(outDir, exist_ok = True)
PTData = pd.read_csv(RunnerObj.inputDir.joinpath(RunnerObj.cellData),
header = 0, index_col = 0)
colNames = PTData.columns
for idx in range(len(colNames)):
exprName = "/GRNVBEM/ExpressionData"+str(idx)+".csv"
outPath = 'data/' + str(outDir) + 'outFile'+str(idx)+'.txt'
cmdToRun = ' '.join(['docker run --rm -v',
str(Path.cwd())+':/VBEM/data/ grnvbem:base /bin/sh -c \"time -v -o',
"data/" + str(outDir) + 'time'+str(idx)+'.txt',
'./GRNVBEM', inputPath+exprName, outPath, '\"'])
print(cmdToRun)
os.system(cmdToRun)
[docs]def parseOutput(RunnerObj):
'''
Function to parse outputs from GRNVBEM.
'''
outDir = "outputs/"+str(RunnerObj.inputDir).split("inputs/")[1]+"/GRNVBEM/"
PTData = pd.read_csv(RunnerObj.inputDir.joinpath(RunnerObj.cellData),
header = 0, index_col = 0)
colNames = PTData.columns
OutSubDF = [0]*len(colNames)
for indx in range(len(colNames)):
outFileName = 'outFile'+str(indx)+'.txt'
# Quit if output file does not exist
if not Path(outDir+outFileName).exists():
print(outDir+outFileName+' does not exist, skipping...')
return
# Read output
OutSubDF[indx] = pd.read_csv(outDir+outFileName, sep = '\t', header = 0)
outDF = pd.concat(OutSubDF)
FinalDF = outDF[outDF['Probability'] == outDF.groupby(['Parent','Child'])['Probability'].transform('max')]
outFile = open(outDir + 'rankedEdges.csv','w')
outFile.write('Gene1'+'\t'+'Gene2'+'\t'+'EdgeWeight'+'\n')
for idx, row in FinalDF.sort_values(['Probability'], ascending = False).iterrows():
outFile.write('\t'.join([row['Parent'],row['Child'],str(row['Probability'])])+'\n')
outFile.close()