Source code for BLRun.scsglRunner

import os
import pandas as pd

from BLRun.runner import Runner


[docs]class SCSGLRunner(Runner): """Concrete runner for the scSGL GRN inference algorithm."""
[docs] def generateInputs(self): ''' Function to generate desired inputs for scSGL. If the folder/files under self.input_dir exist, this function will not do anything. ''' # Create ExpressionData.csv file in the created input directory SCSGL_EXPRESSION_FILE = self.working_dir / "ExpressionData.csv" if not SCSGL_EXPRESSION_FILE.exists(): # input data ExpressionData = pd.read_csv(self.input_dir / self.exprData, header = 0, index_col = 0) # Write gene expression data in SCSGL folder ExpressionData.to_csv(SCSGL_EXPRESSION_FILE, sep = ',', header = True) SCSGL_GROUND_TRUTH_FILE = self.working_dir / "GroundTruthNetwork.csv" if not SCSGL_GROUND_TRUTH_FILE.exists(): groundTruthNetworkData = pd.read_csv(self.ground_truth_file, header = 0, index_col = 0) # Write reference network data in SCSGL folder groundTruthNetworkData.to_csv(SCSGL_GROUND_TRUTH_FILE, sep = ',', header = True)
[docs] def run(self): ''' Function to run SCSGL algorithm ''' pos_density = str(self.params['pos_density']) neg_density = str(self.params['neg_density']) assoc = str(self.params['assoc']) cmdToRun = ' '.join(['docker run --rm', f"-v {self.working_dir}:/usr/working_dir", '--expose=41269', f'{self.image} /bin/sh -c \"time -v -o', "/usr/working_dir/time.txt", 'python run_scSGL.py', '--expression_file=/usr/working_dir/ExpressionData.csv', '--ground_truth_net_file=/usr/working_dir/GroundTruthNetwork.csv', '--out_file=/usr/working_dir/outFile.txt', '--pos_density='+pos_density, '--neg_density='+neg_density, '--assoc='+assoc, '\"']) self._run_docker(cmdToRun)
[docs] def parseOutput(self): ''' Function to parse outputs from SCSGL. ''' workDir = self.working_dir outFile = workDir / 'outFile.txt' # Quit if output file does not exist if not outFile.exists(): print(str(outFile) + ' does not exist, skipping...') return # Read output file OutDF = pd.read_csv(outFile, sep = '\t', header = 0) OutDF.sort_values(by="EdgeWeight", ascending=False, inplace=True) self._write_ranked_edges(OutDF[['Gene1', 'Gene2', 'EdgeWeight']])