From d41e0c87505bc00201adbd20178cabaf94375b83 Mon Sep 17 00:00:00 2001 From: philipperuiz <philippe.ruiz@inrae.fr> Date: Mon, 11 Dec 2023 14:07:03 +0100 Subject: [PATCH 1/2] plsda update to integrate multilevel analyses and add some output --- R/plsda_fun.r | 293 +++++++++++++++++++++++++++----------------------- 1 file changed, 160 insertions(+), 133 deletions(-) diff --git a/R/plsda_fun.r b/R/plsda_fun.r index f704b18..1aaeed1 100644 --- a/R/plsda_fun.r +++ b/R/plsda_fun.r @@ -6,8 +6,11 @@ #' @param output Output directory #' @param column1 Factor to test. #' @param rank Taxonomy rank to merge features that have same taxonomy at a certain taxonomic rank (among rank_names(data), or 'ASV' for no glom) -#' -#' +#' @param axis Select the axis to plot +#' @param multilevel Use Within matrix decomposition for repeated measurements +#' @param ind.names either a character vector of names for the individuals to be plotted, or FALSE for no names. If TRUE, the row names of the first (or second) data matrix is used as names (see mixOmics Details). +#' @param ellipse Logical indicating if ellipse plots should be plotted (see mixOmics Details). +#' @param progressBar Silence the progress bar. #' @return Return a list object with plots, and loadings table. #' #' @import futile.logger @@ -20,162 +23,186 @@ #' @export -plsda_fun <- function(data = data, output = "./plsda/", column1 = "", - rank = "ASV"){ - +plsda_fun <- function (data = data, output = "./plsda/", column1 = "", + rank = "ASV", axis = c(1, 2), multilevel = NULL, + ind.names = FALSE, + ellipse = FALSE, progressBar = FALSE) +{ outF <- list() - - if(!dir.exists(output)){ - dir.create(output,recursive=T) + if (!dir.exists(output)) { + dir.create(output, recursive = T) } - - - flog.info('Preparing tables...') - if(rank != 'ASV'){ - flog.info('Glom rank...') - physeqDA <- tax_glom(data, taxrank=rank) + flog.info("Preparing tables...") + if (rank != "ASV") { + flog.info("Glom rank...") + physeqDA <- tax_glom(data, taxrank = rank) otable <- otu_table(physeqDA) - row.names(otable) <- tax_table(physeqDA)[,rank] + row.names(otable) <- tax_table(physeqDA)[, rank] } else { physeqDA <- data otable <- otu_table(physeqDA) } mdata <- sample_data(physeqDA) - flog.info('Done.') - - flog.info('Removing levels with only one sample...') - fun <- paste('lvl_to_remove <- names(table(mdata$',column1,')[table(mdata$',column1,') <= 1])',sep='') - eval(parse(text=fun)) - flog.debug(paste0('Level "',lvl_to_remove, '" will be removed.')) - if(length(lvl_to_remove) > 0){ - fun <- paste('sample_to_remove <- rownames(mdata[mdata$',column1,' == lvl_to_remove])') - eval(parse(text=fun)) - flog.info(paste('Removing ',sample_to_remove,sep='')) - otable <- otable[,colnames(otable)!=sample_to_remove] - mdata <- mdata[rownames(mdata)!=sample_to_remove] - + flog.info("Done.") + flog.info("Removing levels with only one sample...") + fun <- paste("lvl_to_remove <- names(table(mdata$", column1, + ")[table(mdata$", column1, ") <= 1])", sep = "") + eval(parse(text = fun)) + flog.debug(paste0("Level \"", lvl_to_remove, "\" will be removed.")) + if (length(lvl_to_remove) > 0) { + fun <- paste("sample_to_remove <- rownames(mdata[mdata$", + column1, " == lvl_to_remove])") + eval(parse(text = fun)) + flog.info(paste("Removing ", sample_to_remove, sep = "")) + otable <- otable[, colnames(otable) != sample_to_remove] + mdata <- mdata[rownames(mdata) != sample_to_remove] } - flog.info('Done.') - - flog.info('PLSDA...') - fun <- paste('plsda.res = plsda(t(otable+1), mdata$',column1,', ncomp = 5, logratio="CLR")',sep='') - eval(parse(text=fun)) - flog.info('Done.') - - flog.info('Plotting PLSDA individuals...') - png(paste(output,'/plsda_indiv_',column1,'_',rank,'.png',sep='')) - background = background.predict(plsda.res, comp.predicted=2, dist = "max.dist") - fun <- paste('outF$plsda.plotIndiv <- plotIndiv(plsda.res, - comp= 1:2, - group = mdata$',column1,', - ind.names=FALSE, - ellipse=TRUE, - legend=TRUE, - title= "PLSDA plot of individuals", - background = background)',sep='') - eval(parse(text=fun)) + flog.info("Done.") + flog.info("PLSDA...") + if (taxa_are_rows(physeqDA)) { + otable <- t(otable + 1) + } else { + otable <- otable + 1 + } + if (!is.null(multilevel)) { + multilevel <- mdata[, multilevel] |> data.frame() + Y <- mdata[, column1] + multilevel <- data.frame(multilevel, Y) + multilevel[, 1] <- multilevel[, 1] |> factor() |> as.numeric() + otable <- withinVariation(otable, multilevel) + fun <- paste("plsda.res <- plsda(otable, as.numeric(factor(multilevel[, 2])), + ncomp = 5, logratio=\"none\")", sep = "") + } else { + fun <- paste("plsda.res <- plsda(otable, mdata$", column1, + ", ncomp = 5, logratio=\"CLR\")", sep = "") + } + eval(parse(text = fun)) + flog.info("Done.") + flog.info("Plotting PLSDA individuals...") + png(paste(output, "/plsda_indiv_", column1, "_", rank, ".png", + sep = "")) + background = background.predict(plsda.res, comp.predicted = 2, + dist = "max.dist") + if (all(axis == c(1, 2))) { + fun <- paste("outF$plsda.plotIndiv <- plotIndiv(plsda.res,\n \tcomp= 1:2,\n \tgroup = mdata$", + column1, + ",\n \tind.names=", + ind.names, + ",\n \tellipse=", + ellipse, + ",\n \tlegend=TRUE,\n \ttitle= \"PLSDA plot of individuals\",\n \tbackground = background)", + sep = "") + } else if (all(axis == c(2, 3))) { + fun <- paste("outF$plsda.plotIndiv <- plotIndiv(plsda.res,\n \tcomp= 2:3,\n \tgroup = mdata$", + column1, + ",\n \tind.names=", + ind.names, + ",\n \tellipse=", + ellipse, + ",\n \tlegend=TRUE,\n \ttitle= \"PLSDA plot of individuals\",\n \tbackground = background)", + sep = "") + } + eval(parse(text = fun)) dev.off() - flog.info('Done.') - - plot_plsda_perf <- function(){ - flog.info('Plotting PLSDA performance...') - perf.plsda <- perf(plsda.res, validation = "Mfold", folds = 5, progressBar = FALSE, auc = TRUE, nrepeat = 10) - - plotperf <- plot(perf.plsda, col = color.mixo(1:3), sd = TRUE, legend.position = "horizontal", title = "PLSDA performance plot") - # plotperf <- recordPlot() - # invisible(dev.off()) - # - # png(paste(output,'/plsda_perf_',column1,'_',rank,'.png',sep='')) - # replayPlot(plotperf) - # dev.off() - flog.info('Done.') - + flog.info("Done.") + plot_plsda_perf <- function() { + flog.info("Plotting PLSDA performance...") + perf.plsda <- perf(plsda.res, validation = "Mfold", folds = 5, + progressBar = progressBar, auc = TRUE, nrepeat = 10) + plotperf <- plot(perf.plsda, col = color.mixo(1:3), sd = TRUE, + legend.position = "horizontal", title = "PLSDA performance plot") + flog.info("Done.") return(plotperf) } - - # tryCatch(plot_perf(), error = function(e) { flog.warn("PLSDA perf function not working.")}) - # outF$splsda.plotPerf <- plot_plsda_perf() - - - - tune_splsda <- function(){ - flog.info('Tune SPLDA...') - fun <- paste('tune.splsda <- tune.splsda(t(otable+1), - mdata$',column1, - ', ncomp = 4, - validation = "Mfold", - folds = 4, - progressBar = FALSE, - dist = "max.dist", - nrepeat = 10)',sep='') - eval(parse(text=fun)) - - # plot(tune.splsda, col = color.jet(4), title = "Error rates SPLSDA") - # outF$splsda.plotError <- recordPlot() - # invisible(dev.off()) - # - # png(paste(output,'/splsda_error_',column1,'_',rank,'.png',sep='')) - # replayPlot(outF$splsda.plotError) - # dev.off() - - ncomp <- tune.splsda$choice.ncomp$ncomp + 1 - select.keepX <- tune.splsda$choice.keepX[1:ncomp-1] - r_lst <- list("ncomp" = ncomp, "selectkeepX" = select.keepX) - flog.info('Done.') - return(r_lst) + tune_splsda <- function() { + flog.info("Tune SPLDA...") + if (!is.null(multilevel)) { + fun <- paste("tune.splsda <- tune.splsda(otable, as.numeric(factor(multilevel[, 2])), + ncomp = 4,\n \tvalidation = \"Mfold\",\n \tfolds = 4,\n \tprogressBar = ", + progressBar, + ",\n \tdist = \"max.dist\",\n \tnrepeat = 10)", + sep = "") + } else { + fun <- paste("tune.splsda <- tune.splsda(otable,\n \tmdata$", + column1, + ", ncomp = 4,\n \tvalidation = \"Mfold\",\n \tfolds = 4,\n \tprogressBar = ", + progressBar, + ",\n \tdist = \"max.dist\",\n \tnrepeat = 10)", + sep = "") + } + eval(parse(text = fun)) + ncomp <- tune.splsda$choice.ncomp$ncomp + 1 + select.keepX <- tune.splsda$choice.keepX[1:ncomp - 1] + r_lst <- list(ncomp = ncomp, selectkeepX = select.keepX) + flog.info("Done.") + return(r_lst) } - - # tryCatch(tune_splsda(), error = function(e) { stop("At least one class is not represented in one fold.") }) r_list <- tune_splsda() ncomp <- r_list$ncomp select.keepX <- r_list$selectkeepX - - flog.info(paste('keepX: ',select.keepX,sep='')) - - flog.info('SPLSDA...') - fun <- paste('splsda.res <- splsda(t(otable+1), mdata$',column1,', ncomp = ncomp, keepX = select.keepX, logratio= "CLR")',sep='') - eval(parse(text=fun)) - flog.info('Done.') - flog.info('Plot Individuals...') - png(filename=paste(output,'/splsda_indiv_',column1,'_',rank,'.png',sep=''),width=480,height=480) - fun <- paste('outF$splsda.plotIndiv <- plotIndiv(splsda.res, comp= c(1:2), group = mdata$',column1,', ind.names = FALSE, ellipse = TRUE, legend = TRUE, title = "sPLS-DA on ',column1,'")',sep='') - eval(parse(text=fun)) + flog.info(paste("keepX: ", select.keepX, sep = "")) + flog.info("SPLSDA...") + if (!is.null(multilevel)) { + fun <- paste("splsda.res <- splsda(otable, as.numeric(factor(multilevel[, 2])), + ncomp = ncomp, keepX = select.keepX, logratio= \"none\")", + sep = "") + } else { + fun <- paste("splsda.res <- splsda(otable, mdata$", + column1, ", ncomp = ncomp, keepX = select.keepX, logratio= \"CLR\")", + sep = "") + } + eval(parse(text = fun)) + flog.info("Done.") + flog.info("Plot Individuals...") + png(filename = paste(output, "/splsda_indiv_", column1, "_", + rank, ".png", sep = ""), width = 480, height = 480) + if (all(axis == c(1, 2))) { + fun <- paste("outF$splsda.plotIndiv <- plotIndiv(splsda.res, comp= c(1:2), group = mdata$", + column1, + ",\n \tind.names=", + ind.names, + ",\n \tellipse=", + ellipse, + ",\n \tlegend = TRUE, title = \"sPLS-DA on ", + column1, "\")", sep = "") + } else if (all(axis == c(2, 3))) { + fun <- paste("outF$splsda.plotIndiv <- plotIndiv(splsda.res, comp= c(2:3), group = mdata$", + column1, + ",\n \tind.names=", + ind.names, + ",\n \tellipse=", + ellipse, + ",\n \tlegend = TRUE, title = \"sPLS-DA on ", + column1, "\")", sep = "") + } + eval(parse(text = fun)) dev.off() - flog.info('Done.') - - flog.info('SPLSDA performance...') + flog.info("Done.") + flog.info("SPLSDA performance...") perf.splsda <- perf(splsda.res, validation = "Mfold", folds = 5, - dist = 'max.dist', nrepeat = 10, - progressBar = FALSE) - - png(paste(output,'/splsda_perf_',column1,'_',rank,'.png',sep='')) + dist = "max.dist", nrepeat = 10, progressBar = progressBar) + png(paste(output, "/splsda_perf_", column1, "_", rank, ".png", + sep = "")) plot(perf.splsda, col = color.mixo(5)) dev.off() - - outF[["loadings"]] = list() - for (comp in 1:ncomp){ - plotLoadings(splsda.res, comp = comp, title = paste('Loadings on comp',comp,sep=''), contrib = 'max', method = 'mean') + for (comp in 1:ncomp) { + plotLoadings(splsda.res, comp = comp, title = paste("Loadings on comp", + comp, sep = ""), contrib = "max", method = "mean") outF$loadings[[glue::glue("comp{comp}")]] <- recordPlot() invisible(dev.off()) - - png(paste(output,'/splsda_loadings_',column1,'_',rank,'_comp',comp,'.png',sep=''),width=800,height=800) + png(paste(output, "/splsda_loadings_", column1, "_", + rank, "_comp", comp, ".png", sep = ""), width = 800, + height = 800) replayPlot(outF$loadings[[glue::glue("comp{comp}")]]) - dev.off() + dev.off() } - - # plotArrow(splsda.res, legend=T) - # outF$splsda.plotArrow <- recordPlot() - # invisible(dev.off()) - # - # png(paste(output,'/splsda_arrow_',column1,'_',rank,'.png',sep='')) - # replayPlot(outF$splsda.plotArrow) - # dev.off() - outF$splsda.loadings_table = splsda.res$loadings$X - write.table(splsda.res$loadings$X,paste(output,'/splsda_table_',column1,'_',rank,'.csv',sep=''),quote=FALSE,sep="\t",col.names=NA) - flog.info('Done.') - + write.table(splsda.res$loadings$X, paste(output, "/splsda_table_", + column1, "_", rank, ".csv", sep = ""), quote = FALSE, + sep = "\t", col.names = NA) + flog.info("Done.") + outF$splsda.res = splsda.res + outF$plsda.res = plsda.res return(outF) - } -- GitLab From e55b8431a88d9eae1e2fc983184002816aab7662 Mon Sep 17 00:00:00 2001 From: Etienne Rifa <etienne.rifa[at]insa-toulouse.fr> Date: Wed, 10 Jan 2024 10:54:17 +0100 Subject: [PATCH 2/2] typo + document --- R/plsda_fun.r | 4 ++-- man/plsda_fun.Rd | 22 +++++++++++++++++++++- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/R/plsda_fun.r b/R/plsda_fun.r index 1aaeed1..7ec21fb 100644 --- a/R/plsda_fun.r +++ b/R/plsda_fun.r @@ -7,7 +7,7 @@ #' @param column1 Factor to test. #' @param rank Taxonomy rank to merge features that have same taxonomy at a certain taxonomic rank (among rank_names(data), or 'ASV' for no glom) #' @param axis Select the axis to plot -#' @param multilevel Use Within matrix decomposition for repeated measurements +#' @param multilevel Factor (metadata column name) used within matrix decomposition for repeated measurements #' @param ind.names either a character vector of names for the individuals to be plotted, or FALSE for no names. If TRUE, the row names of the first (or second) data matrix is used as names (see mixOmics Details). #' @param ellipse Logical indicating if ellipse plots should be plotted (see mixOmics Details). #' @param progressBar Silence the progress bar. @@ -115,7 +115,7 @@ plsda_fun <- function (data = data, output = "./plsda/", column1 = "", return(plotperf) } tune_splsda <- function() { - flog.info("Tune SPLDA...") + flog.info("Tune SPLSDA...") if (!is.null(multilevel)) { fun <- paste("tune.splsda <- tune.splsda(otable, as.numeric(factor(multilevel[, 2])), ncomp = 4,\n \tvalidation = \"Mfold\",\n \tfolds = 4,\n \tprogressBar = ", diff --git a/man/plsda_fun.Rd b/man/plsda_fun.Rd index 475a46f..83184ec 100644 --- a/man/plsda_fun.Rd +++ b/man/plsda_fun.Rd @@ -4,7 +4,17 @@ \alias{plsda_fun} \title{PLSDA from MixOmics package} \usage{ -plsda_fun(data = data, output = "./plsda/", column1 = "", rank = "ASV") +plsda_fun( + data = data, + output = "./plsda/", + column1 = "", + rank = "ASV", + axis = c(1, 2), + multilevel = NULL, + ind.names = FALSE, + ellipse = FALSE, + progressBar = FALSE +) } \arguments{ \item{data}{A phyloseq object (output from decontam or generate_phyloseq)} @@ -14,6 +24,16 @@ plsda_fun(data = data, output = "./plsda/", column1 = "", rank = "ASV") \item{column1}{Factor to test.} \item{rank}{Taxonomy rank to merge features that have same taxonomy at a certain taxonomic rank (among rank_names(data), or 'ASV' for no glom)} + +\item{axis}{Select the axis to plot} + +\item{multilevel}{Factor (metadata column name) used within matrix decomposition for repeated measurements} + +\item{ind.names}{either a character vector of names for the individuals to be plotted, or FALSE for no names. If TRUE, the row names of the first (or second) data matrix is used as names (see mixOmics Details).} + +\item{ellipse}{Logical indicating if ellipse plots should be plotted (see mixOmics Details).} + +\item{progressBar}{Silence the progress bar.} } \value{ Return a list object with plots, and loadings table. -- GitLab