Source code for BLRun.jump3Runner
import os
import pandas as pd
import numpy as np
from BLRun.runner import Runner
[docs]class JUMP3Runner(Runner):
"""Concrete runner for the JUMP3 GRN inference algorithm."""
[docs] def generateInputs(self):
'''
Function to generate desired inputs for JUMP3.
If the folder/files under self.input_dir exist,
this function will not do anything.
'''
# Create ExpressionData.csv file in the created input directory
JUMP3_EXPRESSION_FILE = self.working_dir / "ExpressionData.csv"
if not JUMP3_EXPRESSION_FILE.exists():
ExpressionData = pd.read_csv(self.input_dir / self.exprData,
header = 0, index_col = 0)
newExpressionData = ExpressionData.T.copy()
PTData = pd.read_csv(self.input_dir / self.pseudoTimeData,
header = 0, index_col = 0)
# make sure the indices are strings for both dataframes
newExpressionData.index = newExpressionData.index.map(str)
PTData.index = PTData.index.map(str)
# Acc. to JUMP3:
# In input argument Time, the first time point of each time series must be 0.
# Also has to be an integer!
newExpressionData['Time'] = PTData['PseudoTime']-PTData['PseudoTime'].min()
if 'Experiment' in PTData:
newExpressionData['Experiment'] = PTData['Experiment']
else:
# generate it from cell number Ex_y, where x is experiment number
#newExpressionData['Experiment'] = [int(x.split('_')[0].strip('E')) for x in PTData.index.astype(str)]
newExpressionData['Experiment'] = 1
newExpressionData.to_csv(JUMP3_EXPRESSION_FILE,
sep = ',', header = True, index = False)
[docs] def run(self):
'''
Function to run JUMP3 algorithm
'''
cmdToRun = ' '.join(['docker run --rm',
f"-v {self.working_dir}:/usr/working_dir",
f'{self.image} /bin/sh -c \"time -v -o',
"/usr/working_dir/time.txt",
'./runJump3',
"/usr/working_dir/ExpressionData.csv", "/usr/working_dir/outFile.txt", '\"'])
self._run_docker(cmdToRun)
[docs] def parseOutput(self):
'''
Function to parse outputs from JUMP3.
'''
workDir = self.working_dir
outFile = workDir / 'outFile.txt'
# Quit if output file does not exist
if not outFile.exists():
print(str(outFile) + ' does not exist, skipping...')
return
# Read output
OutDF = pd.read_csv(outFile, sep = ',')
# Sort values in a matrix using code from:
# https://stackoverflow.com/questions/21922806/sort-values-of-matrix-in-python
OutMatrix = np.abs(OutDF.values)
idx = np.argsort(OutMatrix, axis = None)[::-1]
rows, cols = np.unravel_index(idx, OutDF.shape)
DFSorted = OutMatrix[rows, cols]
# read input file for list of gene names
ExpressionData = pd.read_csv(self.input_dir / 'ExpressionData.csv',
header = 0, index_col = 0)
GeneList = list(ExpressionData.index)
self._write_ranked_edges(pd.DataFrame({
'Gene1': [GeneList[r] for r in rows],
'Gene2': [GeneList[c] for c in cols],
'EdgeWeight': DFSorted,
}))