sub pca
{ &opt_default(output => 'show', label => [], category => [], filename => 'pca_plot.pdf');
my @args = opt_get(@_);
my $output = opt_val('output');
my @label = @{opt_val('label')};
my @category = @{opt_val('category')};
my $filename = opt_val('filename');
my $rcmd = Rcmd->new();
my (%data_table, @R_names);
for my $i (0 .. $#args) {
my $R_name= 'Array' . $i;
push @R_names, $R_name;
$data_table{$R_name}= $_[$i];
$rcmd->array($R_name, @{$args[$i]});
}
unless($#label > -1){
push(@label, 'Factor' . $_) for (1..(scalar(@{$args[0]})));
}
$rcmd->sarray('label', @label);
$rcmd->sarray('category', @category) if($#category > -1);
my @R_commands= (
'CMP <- complete.cases('.join(', ', @R_names).')',
('d.table <- data.frame('.$R_names[0].'=1:'.($#{$data_table{$R_names[0]}}+1).', row.names=label)')
);
for my $key (@R_names) {
push @R_commands, $key.' <- '.$key.'[CMP]';
push @R_commands, 'd.table$'.$key.' <- '.$key;
}
my $col = $#category > -1 ? 'text(scores[,1], scores[,2], col=as.numeric(unclass(factor(category))), labels=category)' : '';
my $dot = $#category > -1 ? 'n' : 'p", pch=".';
my @res = $rcmd->exec(
@R_commands,
't.table = t(d.table)',
'PC <- prcomp(t.table)',
'summary(PC)$importance[,c(1:6)]',
'cat("\n[PC1 Top 10 Factors]\n")',
'pc1 = PC$rotation[,1]',
'top10 = order(abs(pc1), decreasing=TRUE)[1:10]',
'top = cbind(label[top10], round(pc1[top10], digits=3))',
'colnames(top) = c("Factor", "Loading")',
'rownames(top) = label[top10]',
'print(top[,2], quote=FALSE, row.names=FALSE)',
'cat("\n[PC2 Top 10 Factors]\n")',
'pc2 = PC$rotation[,2]',
'top10 = order(abs(pc2), decreasing=TRUE)[1:10]',
'top = cbind(label[top10], round(pc2[top10], digits=3))',
'colnames(top) = c("Factor", "Loading")',
'rownames(top) = label[top10]',
'print(top[,2], quote=FALSE, row.names=FALSE)',
'scores=PC$x',
"pdf('./graph/".$filename."')",
'label1 <- paste("PC1 (", round(summary(PC)$importance[2,1]*100), "% )")',
'label2 <- paste("PC2 (", round(summary(PC)$importance[2,2]*100), "% )")',
'plot(scores[,1], scores[,2], type="' . $dot . '", main="Scatter plot of the PCA scores", xlab=label1, ylab=label2)',
$col
);
msg_gimv('graph/'.$filename) if $output eq 'show';
return '';} |