Source code for BLRun.jump3Runner
import os
import pandas as pd
from pathlib import Path
import numpy as np
from sklearn import preprocessing
[docs]def generateInputs(RunnerObj):
'''
Function to generate desired inputs for JUMP3.
If the folder/files under RunnerObj.datadir exist,
this function will not do anything.
'''
if not RunnerObj.inputDir.joinpath("JUMP3").exists():
print("Input folder for JUMP3 does not exist, creating input folder...")
RunnerObj.inputDir.joinpath("JUMP3").mkdir(exist_ok = False)
if not RunnerObj.inputDir.joinpath("JUMP3/ExpressionData.csv").exists():
ExpressionData = pd.read_csv(RunnerObj.inputDir.joinpath(RunnerObj.exprData),
header = 0, index_col = 0)
newExpressionData = ExpressionData.T.copy()
PTData = pd.read_csv(RunnerObj.inputDir.joinpath(RunnerObj.cellData),
header = 0, index_col = 0)
# make sure the indices are strings for both dataframes
newExpressionData.index = newExpressionData.index.map(str)
PTData.index = PTData.index.map(str)
# Acc. to JUMP3:
# In input argument Time, the first time point of each time series must be 0.
# Also has to be an integer!
newExpressionData['Time'] = PTData['PseudoTime']-PTData['PseudoTime'].min()
if 'Experiment' in PTData:
newExpressionData['Experiment'] = PTData['Experiment']
else:
# generate it from cell number Ex_y, where x is experiment number
#newExpressionData['Experiment'] = [int(x.split('_')[0].strip('E')) for x in PTData.index.astype(str)]
newExpressionData['Experiment'] = 1
newExpressionData.to_csv(RunnerObj.inputDir.joinpath("JUMP3/ExpressionData.csv"),
sep = ',', header = True, index = False)
[docs]def run(RunnerObj):
'''
Function to run GRN-VBEM algorithm
'''
inputPath = "data" + str(RunnerObj.inputDir).split(str(Path.cwd()))[1] + \
"/JUMP3/ExpressionData.csv"
# make output dirs if they do not exist:
outDir = "outputs/"+str(RunnerObj.inputDir).split("inputs/")[1]+"/JUMP3/"
os.makedirs(outDir, exist_ok = True)
outPath = "data/" + str(outDir) + 'outFile.txt'
cmdToRun = ' '.join(['docker run --rm -v', str(Path.cwd())+':/JUMP3/data/ jump3:base /bin/sh -c \"time -v -o', "data/" + str(outDir) + 'time.txt', './runJump3',
inputPath, outPath, '\"'])
print(cmdToRun)
os.system(cmdToRun)
[docs]def parseOutput(RunnerObj):
'''
Function to parse outputs from JUMP3.
'''
# Quit if output directory does not exist
outDir = "outputs/"+str(RunnerObj.inputDir).split("inputs/")[1]+"/JUMP3/"
if not Path(outDir+'outFile.txt').exists():
print(outDir+'outFile.txt'+'does not exist, skipping...')
return
# Read output
OutDF = pd.read_csv(outDir+'outFile.txt', sep = ',')
# Sort values in a matrix using code from:
# https://stackoverflow.com/questions/21922806/sort-values-of-matrix-in-python
OutMatrix = np.abs(OutDF.values)
idx = np.argsort(OutMatrix, axis = None)[::-1]
rows, cols = np.unravel_index(idx, OutDF.shape)
DFSorted = OutMatrix[rows, cols]
# read input file for list of gene names
ExpressionData = pd.read_csv(RunnerObj.inputDir.joinpath('ExpressionData.csv'),
header = 0, index_col = 0)
GeneList = list(ExpressionData.index)
outFile = open(outDir + 'rankedEdges.csv','w')
outFile.write('Gene1'+'\t'+'Gene2'+'\t'+'EdgeWeight'+'\n')
for row, col, val in zip(rows, cols, DFSorted):
outFile.write('\t'.join([GeneList[row],GeneList[col],str(val)])+'\n')
outFile.close()