Rcmd Multivariate
SummaryIncluded librariesPackage variablesDescriptionGeneral documentationMethods
Summary
  Rcmd::Multivariate - Statistical tools for multivariate analysis
Package variables
No package variables defined.
Included modules
G::Messenger
Rcmd::Handler
SubOpt
Inherit
Exporter
Synopsis
No synopsis!
Description
  This class is a part of G-language Genome Analysis Environment, 
  collecting interfaces to clustering algorithms of R language.
Methods
pcaDescriptionCode
Methods description
pcacode    nextTop
  Name: pca   -   principal component analysis for given arraies

  Descriptions:
    This method shows eigenvector matrix, factor loadin gmatrix and
    canonical correlation diagram by principal component analysis (PCA).

  Usage:
    pca(\@array1_of_values, \@array2_of_values, ...);
      or
    pca(\@array1_of_values, \@array2_of_values, ..., -label => \@label_of_factors);

  Options:
   -output       output toggle option (default: show)
                 "g" to generate graph without displaying.
   -filename     output filename of the bar plot graph (default: pca_plot.pdf)
   -label        labels or names of the factors (should be the same size as the given array)
   -category     category labels of the arrays (size should be the same as the number of arrays)

   Author:
     Kazuki Oshita (cory@g-language.org)
Kazuharu Arakawa (gaou@g-language.org)
History: 20141111-01 major overhaul upon merging to G-language GAE. 20130321-01 initial posting
Methods code
pcadescriptionprevnextTop
sub pca {
    &opt_default(output => 'show', label => [], category => [], filename => 'pca_plot.pdf');

    my @args     = opt_get(@_);
    my $output   = opt_val('output');
    my @label    = @{opt_val('label')};
    my @category = @{opt_val('category')};
    my $filename = opt_val('filename');

    my $rcmd = Rcmd->new();

    my (%data_table, @R_names);
    for my $i (0 .. $#args) {
	my $R_name= 'Array' . $i;
	push @R_names, $R_name;

	$data_table{$R_name}= $_[$i];
	$rcmd->array($R_name, @{$args[$i]});
    }

    unless($#label > -1){
	push(@label, 'Factor' . $_) for (1..(scalar(@{$args[0]})));
    }
    $rcmd->sarray('label', @label);

    $rcmd->sarray('category', @category) if($#category > -1);
    my @R_commands= (
		     'CMP <- complete.cases('.join(', ', @R_names).')',
		     ('d.table <- data.frame('.$R_names[0].'=1:'.($#{$data_table{$R_names[0]}}+1).', row.names=label)')
		    );

    for my $key (@R_names) {
	push @R_commands, $key.' <- '.$key.'[CMP]';
	push @R_commands, 'd.table$'.$key.' <- '.$key;
    }

    my $col = $#category > -1 ? 'text(scores[,1], scores[,2], col=as.numeric(unclass(factor(category))), labels=category)' : '';
    my $dot = $#category > -1 ? 'n' : 'p", pch=".';

    my @res = $rcmd->exec(
		@R_commands,
                't.table = t(d.table)',
 		'PC <- prcomp(t.table)',
                'summary(PC)$importance[,c(1:6)]',

                'cat("\n[PC1 Top 10 Factors]\n")',
		'pc1 = PC$rotation[,1]',
                'top10 = order(abs(pc1), decreasing=TRUE)[1:10]',
                'top = cbind(label[top10], round(pc1[top10], digits=3))',
                'colnames(top) = c("Factor", "Loading")',
                'rownames(top) = label[top10]',
                'print(top[,2], quote=FALSE, row.names=FALSE)',

                'cat("\n[PC2 Top 10 Factors]\n")',
		'pc2 = PC$rotation[,2]',
                'top10 = order(abs(pc2), decreasing=TRUE)[1:10]',
                'top = cbind(label[top10], round(pc2[top10], digits=3))',
                'colnames(top) = c("Factor", "Loading")',
                'rownames(top) = label[top10]',
                'print(top[,2], quote=FALSE, row.names=FALSE)',

                'scores=PC$x',
		"pdf('./graph/".$filename."')",
                'label1 <- paste("PC1 (", round(summary(PC)$importance[2,1]*100), "% )")',
                'label2 <- paste("PC2 (", round(summary(PC)$importance[2,2]*100), "% )")',
 		'plot(scores[,1], scores[,2], type="' . $dot . '", main="Scatter plot of the PCA scores", xlab=label1, ylab=label2)',
		$col
	       );

    msg_gimv('graph/'.$filename) if $output eq 'show';

    return '';
}
General documentation
AUTHORTop
  Kazuki Oshita, <cory@g-language.org<gt>