This R script links together the most relevant functions for preprocessing data, running cNMTF and prioritising variants.


###############################################################
# Full executable script for cNMTF
#
# Corresponding author:
# Luis Leal, Imperial College London, email: lgl15@imperial.ac.uk
###############################################################


#-------------------------------------------------
# 1. Installation
#-------------------------------------------------


            # Clean your current workspace

            rm(list=ls()) ; gc()

            # Load libraries

            library("devtools") #For the installation of cnmtf package
            library("biomaRt") #For querying SNV functional consequences from ENSEMBL
            library("LDcorSV") #For Linkage disequilibrium assessment
            library("doParallel") #For parallel computing
            library("igraph") #For SNV-SNV network construction
            library("ade4") #For Principal components analysis of patients genotypes
            library("VennDiagram") #For plotting Venn diagrams of prioritised variatns
            library("snpStats") #For plotting QQplots in genomic control of population structure
            library("ComplexHeatmap") #For plotting heatmaps of cluster membership
            library("RDAVIDWebService") #For annotating the SNVs and genes with functional data

            # Installation of cnmtf package from our github repository

            devtools::install_github("lgl15/cnmtf")


            # Create a directory for your analysis

            dir.create("./test/")

            
            
#-------------------------------------------------
# 2. Preprocessing the data
#-------------------------------------------------


            # Find variants in high Linkage-disequilibrium
            
            find.snps.ld( file.LD = "./test/fileLD.RData", #File to save results
                          type.ld = "gene", #Find LD within the same gene region
                          tmap = tmap, #Mapping of SNPs to genes, chr and genomic position 
                          R = R) #Relationship matrix


            # Create SNV-SNV network
            
            create.network( net.type = "ppi", #Type of reference network
                            dedges = dedges, #Object with edges from reference network
                        
                            #Parameters for Linkage Disequilibrium
                            remove.highLD = TRUE,
                            ld.tao = 0.8, #Treshold of LD
                            res.ld = "./test/fileLD.RData", #Table of LD
                            
                            #Parameters to construct Wu
                            R.snps = rownames(R), #List of SNPs in R
                            work.dat = "./test/", #Working directory
                            trait.project = "test", #Trait
                            n.cores = 3, #Number of cores for parallel computing
                            tmap = tmap, #Mapping of SNPs to genes
                            plot.file = "Gu_ppi_test_venn.pdf" )



#-------------------------------------------------
# 3. Running cNMTF
#-------------------------------------------------
            

                score.cnmtf( R = R, #Relationship matrix
                             out = out, #Categorical outcome variable
                             pop = pop, #Population variable
                             log.file = "logfile_my_experiment", #Log-file to track the performance
            
                             #Variables to Save/Load data and workspaces
                             name.exp = "my_experiment", #Name of experiment to save files
                             work.dat = "./test/", #Folder to save and load workspaces
            
                             #Number of clusters
                             define.k = "user", #Method to define k1
                             k1 = 10, #Number of clusters of SNVs
                             k2 = nlevels(out), #Number of clusters of patients
            
                             #Penalisation parameters
                             wparameters = list( gamma1 = 0.5, #Weight for the SNV network, Lu
                                                 gamma2 = 1, #Weight for the outcome matrix, Vo
                                                 gamma3 = 1), #Weight for the kernel matrix, A
                             save.parameters = TRUE, #Save parameters to file
                             run.t.par = 4, #Number of repetitions for parameters fitting
                             max.try0 = 4,  #Maximum number of tries to fit the parameter
                             snps.known = NULL, #List of known SNV associated with the trait
            
                             #Variables to control performance of the algorithm
                             parallel.opt = TRUE, #Run some instances of the algorithm in parallel
                             n.cores = 3, #Number of cores to use in the parallel processing
                             init = 0, #Type of seeding/initialisation of matrices in the algorithm
                             do.U = TRUE, #Perform clustering of SNPs
                             calcObj = 20, #Check convergency every 20 iterations
                             calcObj2 = 40, #Start checking convergency after first 40 iterations
                             iters = 300, #Number of iterations
                             run.t.exp = 10, #Number of repetitions for the experiment
                             display.iters = FALSE, #Display the iterations of function cnmtf
            
                             #Randomisations
                             score.pvalues = TRUE, #Estimate p-values for the scores
                             random.parallel = TRUE, #Run each randomisation in parallel
                             randomisations = 100, #Number of randomisations
            
                             #Construction of penalisation terms
                             file.Gu = "./test/Gu_ppi_test.RData" #Workspace with SNV-SNV network
            
                        )




#-------------------------------------------------
# 4. Prioritising SNVs
#-------------------------------------------------


        # Calculate the delta score and create Manhattan plots

            analyze.cnmtf (  trait.project = "test", #Trait under analysis
                             name.exp = "my_experiment", #Experiment under analysis 
                             work.dat = "./test/", #Folder with the results of score.cnmtf
                             alpha.cnmtf = 0.005,   #Significance level of the scores
                             d.conf = NULL, #Optional. Confounder variables.
                             snps.known = NULL, #Optional. List of SNVs associated with the disease
                             snps.known2 = NULL, #Optional. A second list of SNVs.
                             tmap = tmap) #Mapping of SNPs to genes, chr and genomic position


        # Map variants to genes and add functional annotations
        # Your email account must be registered in the The Database for Annotation, Visualization and Integrated Discovery (DAVID ) [https://david.ncifcrf.gov/webservice/register.htm]

            t.res = annotate.results( trait.project = "test", #Trait under analysis
                                        name.exp = "my_experiment", #Experiment under analysis 
                                        work.dat = "./test/", #Working directory
                                        add.david.annotations = TRUE, #Use DAVID web service
                                        email.david = "email@example.com", #Email registered in DAVID.
                                        add.ensemble.conseq = FALSE, #Add SNV consequences from ENSEMBL
                                        tmap = tmap, #Mapping of SNPs to genes, chr and genomic position
                                        file.LD = "./test/fileLD.RData", #Workspace with pairwise LD
                                        ld.tao = 0.8, #Treshold of LD
                                        snps.known = NULL, #Optional. List of known associations.
                                        snps.known2 = NULL) #Optional. A second list of SNVs.

        #Extract table of prioritised variants
                                              
            (t.snvs = t.res[[1]])