import os
import pandas as pd
from pathlib import Path
import numpy as np
[docs]def run(RunnerObj):
'''
Function to run PPCOR algorithm
'''
inputPath = "data" + str(RunnerObj.inputDir).split(str(Path.cwd()))[1] + \
"/PPCOR/ExpressionData.csv"
# make output dirs if they do not exist:
outDir = "outputs/"+str(RunnerObj.inputDir).split("inputs/")[1]+"/PPCOR/"
os.makedirs(outDir, exist_ok = True)
outPath = "data/" + str(outDir) + 'outFile.txt'
cmdToRun = ' '.join(['docker run --rm -v', str(Path.cwd())+':/data/ ppcor:base /bin/sh -c \"time -v -o', "data/" + str(outDir) + 'time.txt', 'Rscript runPPCOR.R',
inputPath, outPath, '\"'])
print(cmdToRun)
os.system(cmdToRun)
[docs]def parseOutput(RunnerObj):
'''
Function to parse outputs from PPCOR.
'''
# Quit if output directory does not exist
outDir = "outputs/"+str(RunnerObj.inputDir).split("inputs/")[1]+"/PPCOR/"
if not Path(outDir+'outFile.txt').exists():
print(outDir+'outFile.txt'+'does not exist, skipping...')
return
# Read output
OutDF = pd.read_csv(outDir+'outFile.txt', sep = '\t', header = 0)
# edges with significant p-value
part1 = OutDF.loc[OutDF['pValue'] <= float(RunnerObj.params['pVal'])]
part1 = part1.assign(absCorVal = part1['corVal'].abs())
# edges without significant p-value
part2 = OutDF.loc[OutDF['pValue'] > float(RunnerObj.params['pVal'])]
outFile = open(outDir + 'rankedEdges.csv','w')
outFile.write('Gene1'+'\t'+'Gene2'+'\t'+'EdgeWeight'+'\n')
for idx, row in part1.sort_values('absCorVal', ascending = False).iterrows():
outFile.write('\t'.join([row['Gene1'],row['Gene2'],str(row['corVal'])])+'\n')
for idx, row in part2.iterrows():
outFile.write('\t'.join([row['Gene1'],row['Gene2'],str(0)])+'\n')
outFile.close()