Source code for BLRun.scsglRunner
import os
import pandas as pd
from BLRun.runner import Runner
[docs]class SCSGLRunner(Runner):
"""Concrete runner for the scSGL GRN inference algorithm."""
[docs] def generateInputs(self):
'''
Function to generate desired inputs for scSGL.
If the folder/files under self.input_dir exist,
this function will not do anything.
'''
# Create ExpressionData.csv file in the created input directory
SCSGL_EXPRESSION_FILE = self.working_dir / "ExpressionData.csv"
if not SCSGL_EXPRESSION_FILE.exists():
# input data
ExpressionData = pd.read_csv(self.input_dir / self.exprData,
header = 0, index_col = 0)
# Write gene expression data in SCSGL folder
ExpressionData.to_csv(SCSGL_EXPRESSION_FILE,
sep = ',', header = True)
SCSGL_GROUND_TRUTH_FILE = self.working_dir / "GroundTruthNetwork.csv"
if not SCSGL_GROUND_TRUTH_FILE.exists():
groundTruthNetworkData = pd.read_csv(self.ground_truth_file,
header = 0, index_col = 0)
# Write reference network data in SCSGL folder
groundTruthNetworkData.to_csv(SCSGL_GROUND_TRUTH_FILE,
sep = ',', header = True)
[docs] def run(self):
'''
Function to run SCSGL algorithm
'''
pos_density = str(self.params['pos_density'])
neg_density = str(self.params['neg_density'])
assoc = str(self.params['assoc'])
cmdToRun = ' '.join(['docker run --rm',
f"-v {self.working_dir}:/usr/working_dir",
'--expose=41269',
f'{self.image} /bin/sh -c \"time -v -o',
"/usr/working_dir/time.txt", 'python run_scSGL.py',
'--expression_file=/usr/working_dir/ExpressionData.csv',
'--ground_truth_net_file=/usr/working_dir/GroundTruthNetwork.csv',
'--out_file=/usr/working_dir/outFile.txt',
'--pos_density='+pos_density, '--neg_density='+neg_density, '--assoc='+assoc,
'\"'])
self._run_docker(cmdToRun)
[docs] def parseOutput(self):
'''
Function to parse outputs from SCSGL.
'''
workDir = self.working_dir
outFile = workDir / 'outFile.txt'
# Quit if output file does not exist
if not outFile.exists():
print(str(outFile) + ' does not exist, skipping...')
return
# Read output file
OutDF = pd.read_csv(outFile, sep = '\t', header = 0)
OutDF.sort_values(by="EdgeWeight", ascending=False, inplace=True)
self._write_ranked_edges(OutDF[['Gene1', 'Gene2', 'EdgeWeight']])