import os
import pandas as pd
from pathlib import Path
import numpy as np
[docs]def run(RunnerObj):
'''
Function to run SINCERITIES algorithm
:param RunnerObj: A BLRun object
'''
inputPath = "data" + str(RunnerObj.inputDir).split(str(Path.cwd()))[1] + \
"/SINCERITIES/"
# make output dirs if they do not exist:
outDir = "outputs/"+str(RunnerObj.inputDir).split("inputs/")[1]+"/SINCERITIES/"
os.makedirs(outDir, exist_ok = True)
PTData = pd.read_csv(RunnerObj.inputDir.joinpath(RunnerObj.cellData),
header = 0, index_col = 0)
colNames = PTData.columns
for idx in range(len(colNames)):
inFile = "ExpressionData"+str(idx)+".csv"
outPath = 'data/' + str(outDir) + 'outFile'+str(idx)+'.txt'
cmdToRun = ' '.join(['docker run --rm -v',
str(Path.cwd())+':/SINCERITIES/data/ sincerities:base /bin/sh -c \"time -v -o',
"data/" + str(outDir) + 'time'+str(idx)+'.txt', 'Rscript MAIN.R',
inputPath+inFile, outPath, '\"'])
print(cmdToRun)
os.system(cmdToRun)
[docs]def parseOutput(RunnerObj):
'''
Function to parse outputs from SINCERITIES.
:param RunnerObj: An instance of the :class:`BLRun`
'''
outDir = "outputs/"+str(RunnerObj.inputDir).split("inputs/")[1]+"/SINCERITIES/"
PTData = pd.read_csv(RunnerObj.inputDir.joinpath(RunnerObj.cellData),
header = 0, index_col = 0)
colNames = PTData.columns
OutSubDF = [0]*len(colNames)
for idx in range(len(colNames)):
# Read output
outFile = 'outFile'+str(idx)+'.txt'
if not Path(outDir+outFile).exists():
# Quit if output file does not exist
print(outDir+outFile+' does not exist, skipping...')
return
OutSubDF[idx] = pd.read_csv(outDir+outFile, sep = ',', header = 0)
# megre the dataframe by taking the maximum value from each DF
# From here: https://stackoverflow.com/questions/20383647/pandas-selecting-by-label-sometimes-return-series-sometimes-returns-dataframe
outDF = pd.concat(OutSubDF)
# Group by rows code is from here:
# https://stackoverflow.com/questions/53114609/pandas-how-to-remove-duplicate-rows-but-keep-all-rows-with-max-value
res = outDF[outDF['Interaction'] == outDF.groupby(['SourceGENES','TargetGENES'])['Interaction'].transform('max')]
# Sort values in the dataframe
finalDF = res.sort_values('Interaction',ascending=False)
finalDF.drop(labels = 'Edges',axis = 'columns', inplace = True)
# SINCERITIES output is incorrectly orderd
finalDF.columns = ['Gene2','Gene1','EdgeWeight']
finalDF.to_csv(outDir+'rankedEdges.csv',sep='\t',
columns = ['Gene1','Gene2','EdgeWeight'],index = False)