import os
import pandas as pd
from BLRun.runner import Runner
[docs]class SINCERITIESRunner(Runner):
"""Concrete runner for the SINCERITIES GRN inference algorithm."""
[docs] def run(self):
'''
Function to run SINCERITIES algorithm
'''
PTData = pd.read_csv(self.input_dir / self.pseudoTimeData,
header = 0, index_col = 0)
colNames = PTData.columns
for idx in range(len(colNames)):
cmdToRun = ' '.join(['docker run --rm',
f"-v {self.working_dir}:/usr/working_dir",
f'{self.image} /bin/sh -c \"time -v -o',
"/usr/working_dir/time" + str(idx) + ".txt",
'Rscript MAIN.R',
"/usr/working_dir/ExpressionData" + str(idx) + ".csv",
"/usr/working_dir/outFile" + str(idx) + ".txt", '\"'])
self._run_docker(cmdToRun, append=(idx > 0))
[docs] def parseOutput(self):
'''
Function to parse outputs from SINCERITIES.
'''
workDir = self.working_dir
PTData = pd.read_csv(self.input_dir / self.pseudoTimeData,
header = 0, index_col = 0)
colNames = PTData.columns
OutSubDF = [0]*len(colNames)
for idx in range(len(colNames)):
# Read output
outFile = 'outFile'+str(idx)+'.txt'
if not (workDir / outFile).exists():
# Quit if output file does not exist
print(str(workDir / outFile) + ' does not exist, skipping...')
return
OutSubDF[idx] = pd.read_csv(workDir / outFile, sep = ',', header = 0)
# megre the dataframe by taking the maximum value from each DF
# From here: https://stackoverflow.com/questions/20383647/pandas-selecting-by-label-sometimes-return-series-sometimes-returns-dataframe
outDF = pd.concat(OutSubDF)
# Group by rows code is from here:
# https://stackoverflow.com/questions/53114609/pandas-how-to-remove-duplicate-rows-but-keep-all-rows-with-max-value
res = outDF[outDF['Interaction'] == outDF.groupby(['SourceGENES','TargetGENES'])['Interaction'].transform('max')]
# Sort values in the dataframe
finalDF = res.sort_values('Interaction',ascending=False)
finalDF.drop(labels = 'Edges',axis = 'columns', inplace = True)
# SINCERITIES output is incorrectly orderd
finalDF.columns = ['Gene2','Gene1','EdgeWeight']
self._write_ranked_edges(finalDF[['Gene1', 'Gene2', 'EdgeWeight']])