### File path ###
#Time = '24h'
Time = '48h'
#Inpfile = paste('NormalizedFiles.TF/NormalizedDatabyVoom.HighCPM.nobatch', Time, 'tsv', sep=".")
Inpfile = paste('NormalizedFiles.all/NormalizedDatabyVoom.HighCPM.nobatch', Time, 'tsv', sep=".")

### Reading normalized TF gene expression file
X <- read.delim(Inpfile, fileEncoding="latin1")
colnames(X) <- read.delim(Inpfile, fileEncoding="latin1", header=F, colClasses = "character")[1,1:ncol(X)]

### Getting medium information
DMSO_G = c("Axitinib","Bucillamine","Sunitinib","Doxorubicin","CyclosporinA","Ibuprofen","ValproicAcid","Acetonylacetone","Cyclopamine","Acrylamide","Amiodarone","Atorvastatin","Chlorpromazine","Chlorpheniramine", "Digoxin", "VerapamilHCl", "AmitriptylineHCl", "Itraconazole", "LithocholicAcid", "ButylatedHA")

WATER_G = c("Cisplatin", "Gentamicin", "Thioacetamide", "MethapyrileneHCl")

### Getting all toxic property information
GLOMERULUS_G = c("Axitinib","Bucillamine","Sunitinib","Doxorubicin")
TUBULE_G = c("Cisplatin","Gentamicin","CyclosporinA","Ibuprofen")
NERVE_G = c("ValproicAcid","Acetonylacetone","Cyclopamine","Acrylamide")
HEPATO_G = c("Amiodarone","Atorvastatin","Chlorpromazine","Chlorpheniramine")
CARDIO_G = c("Digoxin","VerapamilHCl","AmitriptylineHCl","Itraconazole")
NGC_G = c("LithocholicAcid","Thioacetamide","ButylatedHA","MethapyrileneHCl")

### Getting time, concentration, and repeat information
TIME_G = c("24h","48h")
CONC_G = c("1/1","1/2","1/4","1/8","1/16")
REPEAT_G = c("1","2")

### Generate all sample headers
ALL_SAMPLES = list()
cell.vnames = list()
for(d in c(GLOMERULUS_G,TUBULE_G,NERVE_G,HEPATO_G,CARDIO_G,NGC_G)) {
  for(t in TIME_G) {
    for(c in CONC_G) {
      SAME_GROUP = c()
      for(r in REPEAT_G) {
        name = paste(d,t,c,r,sep="_")
        SAME_GROUP = c(SAME_GROUP, name)
      }
      ALL_SAMPLES = c(ALL_SAMPLES, list(SAME_GROUP))
      name = paste(d,t,c,sep="_")
      cell.vnames = c(cell.vnames, list(name))
    }
  }
}

### Generate all DMSO headers
for(t in TIME_G) {
  name = paste("DMSO",t,sep="_")
  cell.vnames = c(cell.vnames, list(name))
  SAME_GROUP = c()
  for(d in DMSO_G) {
    for(c in "DMSO") {
      for(r in REPEAT_G) {
        name = paste(d,t,c,r,sep="_")
        SAME_GROUP = c(SAME_GROUP, name)
      }
    }
  }
  ALL_SAMPLES = c(ALL_SAMPLES, list(SAME_GROUP))
}

### Generate all Water headers
for(t in TIME_G) {
  name = paste("Water",t,sep="_")
  cell.vnames = c(cell.vnames, list(name))
  SAME_GROUP = c()
  for(d in WATER_G) {
    for(c in "Water") {
      for(r in REPEAT_G) {
        name = paste(d,t,c,r,sep="_")
        SAME_GROUP = c(SAME_GROUP, name)
      }
    }
  }
  ALL_SAMPLES = c(ALL_SAMPLES, list(SAME_GROUP))
}

cell.list = ALL_SAMPLES
cell.use = unlist(ALL_SAMPLES)

#############################################
## DEG analysis
#############################################
### setting group ids
library(edgeR)

group_ids = rep(0, ncol(X))
cell_types = rep("No type", ncol(X))
for(s in 1:length(colnames(X))) {
  for(id in 1:length(cell.list)) {
    sample = colnames(X)[s]
    if(match(sample, cell.list[[id]], FALSE)) {
      group_ids[s] = id
      cell_types[s] = cell.vnames[id]
      message(paste("### Found", cell_types[s]))
    }
  }
}

group = factor(group_ids)
design = model.matrix(~0+group)
write.table(design, file=paste("design", Time, "tsv", sep="."), sep="\t", quote=F)

fit <- lmFit(X, design)

gnames = unique(colnames(design))
gnum = length(gnames)
com = "contr.matrix <- makeContrasts("
for(i in 1:gnum) {
  g_1 = gnames[i]
  g_1_g = gsub("_.*$", "", cell.vnames[as.integer(gsub("group", "", gnames[i]))])
  g_2_g = ""
  if(match(g_1_g, DMSO_G, FALSE)) {
    g_2_g = "DMSO"
  } else if(match(g_1_g, WATER_G, FALSE)) {
    g_2_g = "Water"
  } else {
    next
  }
  mid = unique(group_ids)[grep(g_2_g, cell.vnames[unique(group_ids)])]
  g_2 = paste("group", mid, sep="")
  g_1_2 = paste(g_1, g_2, sep="-")
  com = paste(com, g_1_2, ",", sep="")
}
com = paste(com, "levels=design)")
eval(parse(text=com))
print(contr.matrix)

contrasts <- contrasts.fit(fit, contr.matrix)
efit <- eBayes(contrasts)
pdf(paste("SAplot", Time, "pdf", sep="."))
plotSA(efit, main="Final model: Mean-variance trend")
dev.off()

#############################################
## Heatmap drawing
#############################################

library(stringr)
FDRthres = 0.01
LFCthres = log2(2.0)
DEGs.sig = rep(list(matrix(nrow=0, ncol=0)), 5)
DEGs.sig.all = vector()
LFC.all = matrix(nrow=0, ncol=0)
APV.all = matrix(nrow=0, ncol=0)
for(coef_id in 1:ncol(efit)) {
    DEGs = topTable(efit, adjust="BH", p.value=FDRthres, number=dim(efit)[1], lfc=LFCthres, coef=coef_id)
    compare_name = paste(cell.vnames[as.integer(unlist(str_extract_all(colnames(contrasts)[coef_id], "[0-9]+")))], collapse="-")
    for(i in 1:length(CONC_G)) {
      Conc = CONC_G[i]
      if(length(grep(paste(Conc,"-",sep=""), compare_name)) > 0) {
        DEGs.sig[[i]] = rbind(DEGs.sig[[i]], DEGs)
        message(paste("### ", compare_name, "is added to heatmap DEGs.sig of", Conc, "\n"))
        DEGs.sig.all = unique(c(DEGs.sig.all, rownames(DEGs)))
      }
    }
    origname = paste(cell.vnames[as.integer(unlist(str_extract_all(colnames(contrasts)[coef_id], "[0-9]+")))], collapse="-")
    fname = sprintf("DEG.%s.tsv", gsub("/", "|", origname))
    write.table(DEGs, file=fname, sep="\t", quote=F, row.names=T)
    message(paste("Writing",fname,sep=" ", "\n"))

    DEGs.all = topTable(efit, adjust="BH", number=dim(efit)[1], coef=coef_id)
    LFC <- DEGs.all["logFC"]
    colnames(LFC) = origname
    LFC.all <- as.data.frame(merge(LFC.all,LFC,by="row.names",all=T))
    rownames(LFC.all) <- unlist(LFC.all[,1])
    LFC.all <- LFC.all[-1]

    APV <- DEGs.all["adj.P.Val"]
    colnames(APV) = origname
    APV.all <- as.data.frame(merge(APV.all,APV,by="row.names",all=T))
    rownames(APV.all) <- unlist(APV.all[,1])
    APV.all <- APV.all[-1]
}
fname = sprintf("LFC.all.%s.tsv", Time)
write.table(LFC.all, file=fname, sep="\t", quote=F, row.names=T)
fname = sprintf("APV.all.%s.tsv", Time)
write.table(APV.all, file=fname, sep="\t", quote=F, row.names=T)

library(pheatmap)
pairs.breaks <- c(seq(-3, 3, length.out=100))

for(i in 1:length(CONC_G)) {

  Conc = CONC_G[i]

  fname = sprintf("Heatmap.wardD2.%s.%s.pdf", Time, gsub("/", "|", Conc))
  pdf(fname)

  M <- LFC.all[,grep(paste(Conc,"-",sep=""), colnames(LFC.all))]
  colnames(M) = gsub("-.*$", "", colnames(M))
  #colnames(M) = gsub("-W.*$", "-W", colnames(M))
  #colnames(M) = gsub("-D.*$", "-D", colnames(M))

  #M <- X[,grep(paste(Conc,"_",sep=""), colnames(X))]
  #message(paste("### The colnames of M is", colnames(M), "\n"))
  #xodd <- seq(1, ncol(M), 2)
  #xeven <- seq(2, ncol(M), 2)
  #M <- M[xodd] + M[xeven]
  #colnames(M) = gsub("_1$", "", colnames(M))

  M <- M[unique(match(rownames(DEGs.sig[[i]]), rownames(M))), ]
  ### To make contrast among samples do scale by genes ###
  # M <- na.omit(M)
  M <- na.omit(t(scale(t(M))))

  pheatmap(M, cluster_cols=T, cluster_rows=T, labels_row=substring(rownames(M), 17), labels_col=colnames(M), fontsize_row = 3, fontsize_col = 8, angle_col="90", breaks = pairs.breaks, clustering_method = "ward.D2")

  dev.off()
}

for(d in c(GLOMERULUS_G,TUBULE_G,NERVE_G,HEPATO_G,CARDIO_G,NGC_G)) {
  name = paste(d,Time,sep="_")
  M <- LFC.all[,grep(name, colnames(LFC.all))]

  colnames(M) = gsub("-.*$", "", colnames(M))
  rownames(M) = gsub(":.*$", "", rownames(M))
  M <- cbind(Ensembl = rownames(M), M)
  fname = sprintf("LFC.%s.%s.tsv", d, Time)
  write.table(M, file=fname, sep="\t", quote=F, row.names=F)
}

q()
