| Summary | Included libraries | Package variables | Description | General documentation | Methods |
Rcmd::Clustering - Interfaces to clustering algorithms of R language.
This class is a part of G-language Genome Analysis Environment,
collecting interfaces to clustering algorithms of R language.
| hclust | Description | Code |
| kmeans | Description | Code |
| sample_data_for_clustering | No description | Code |
| set_clust_data | No description | Code |
| som | Description | Code |
| hclust | code | next | Top |
Name: hclust - Hierarchical clustering analysis for given arraies
Descriptions:
Hierarchical clustering analysis methods for given arrays.
Ward method uses the ward.D2 by default.
Installation of amap library for R language is required.
run R as a super user - sudo R - and type the followings:
install.packages('amap')
Usage:
hclust(\@array1_of_values, \@array2_of_values, ...);
or
hclust(\@array1_of_values, \@array2_of_values, ..., -label => \@grouping_label);
Options:
-output output toggle option (default: show)
"g" to generate graph without displaying.
-filename output filename of the clustering graph (default: hclust.pdf)
-method the agglomeration method to be used (default: ward).
'ward', 'single', 'complate', 'average', 'centroid', 'median' or 'mcquitty'
-distmethod the distance measure method (default: correlation)
'euclidean', 'maximum', 'manhattan', 'canberra', 'binary', 'kendall'
'spearman', 'pearson' (not centered Pearson), 'abspearson' (Absolute Pearson),
'correlation' (Centered Pearson) or 'abscorrelation' (Absolute correlation)
this option is based on 'Dist' method in 'amap' library in R.
-label labels or names of the data series
Author:
Kazuki Oshita (cory@g-language.org) |
| kmeans() | code | prev | next | Top |
Name: kmeans() - clustering with K-means method
Description:
Clustering with K-means method with using R language.
Number of cluster centers can be given by -centers option (default: 10)
and number of iterations is given by -iter.max (default: 10).
Returned value corresponds to result$cluster of kmeans() in R.
(a vector of cluster numbers to which each point is allocated)
Usage:
@cluster = $rcmd->kmeans(\@array1, \@array2, \@array3, ..., -label=>\@label);
Arrays correspond to the columns (data series), and labels for each of
these arrays can be given by -label option.
Options:
-label labels or names of the data series.
-centers number of cluster centers (default: 5)
-iter.max number of iterations (default: 10)
-filename output filename of the graph (default: kmeans.pdf)
-output output toggle option (default: show)
"g" to generate graph without displaying.
-sampledata use sample data (default: 0)
Author:
Kazuharu Arakawa (gaou@sfc.keio.ac.jp) |
| som() | code | prev | next | Top |
Name: som() - clustering using Self-Organizing Map
Description:
Clustering with Self-Organizing Map (SOM) using R language.
Installation of GeneSOM library for R language is required.
run R as a super user - sudo R - and type the following:
install.packages('som'))
Returns a two-dimensional array correspondingn to the
result$visual of som() in R's GeneSOM library.
Usage:
@result = som(\@array1, \@array2, \@array3, ..., -label=>\@label);
Arrays correspond to the columns (data series), and labels for each of
these arrays can be given by -label option.
Options:
-label labels or names of the data series.
-xdim x-dimension of the map (default: 3)
-ydim y-dimension of the map (default: 3)
-filename output filename of the graph (default: som.pdf)
-output output toggle option (default: show)
"g" to generate graph without displaying.
-sampledata use sample data (default: 0)
Author:
Kazuharu Arakawa (gaou@sfc.keio.ac.jp) |
| hclust | description | prev | next | Top |
&opt_default(output => 'show', label => [], filename => 'hclust.pdf', method => 'ward', distmethod => 'correlation'); my @args= opt_get(@_); my $output= opt_val('output'); my @label= @{opt_val('label')}; my $filename= opt_val('filename'); my $method= opt_val('method'); my $distmethod= opt_val('distmethod'); my @all_methods= ('ward', 'single', 'complate', 'average', 'centroid', 'median', 'mcquitty'); unless (grep /^$method$/, @all_methods) { $method= 'ward'; } $method = 'ward.D2' if ($method = 'word'); my @all_dist_methods= ('euclidean', 'maximum', 'manhattan', 'canberra', 'binary', 'pearson', 'abspearson', 'correlation', 'abscorrelation', 'spearman', 'kendall'); unless (grep /^$distmethod$/, @all_dist_methods) { $distmethod= 'pearson'; } my $rcmd = Rcmd->new(); my (%data_table, @R_names); for my $i (0 .. $#args) { my $R_name= 'array'.$i; push @R_names, $R_name; $data_table{$R_name}= $_[$i]; $rcmd->array($R_name, @{$args[$i]}); } # 'label' list object (if required)}
$rcmd->sarray('label', @label) if $#label > -1; my @R_commands= ( 'CMP <- complete.cases('.join(', ', @R_names).')', ('d.table <- data.frame('.$R_names[0].'=1:'.($#{$data_table{$R_names[0]}}+1).', row.names=label)')x!! ($#label > -1), ('d.table <- data.frame('.$R_names[0].'=1:'.($#{$data_table{$R_names[0]}}+1).')')x!! ($#label == -1) ); for my $key (@R_names) { push @R_commands, $key.' <- '.$key.'[CMP]'; push @R_commands, 'd.table$'.$key.' <- '.$key; } $rcmd->exec( @R_commands, 'library("amap")', "pdf('./graph/".$filename."')", 'hc <- hclust(Dist(d.table, method="'.$distmethod.'"), method="'.$method.'")', 'plot(hc)', ); msg_gimv('graph/'.$filename) if $output eq 'show'; return '';
| kmeans | description | prev | next | Top |
my $rcmd = new Rcmd(); $rcmd->set_mode('tmp'); opt_default("iter.max"=>10, filename=>"kmeans.pdf", output=>"show", sampledata=>0, centers=>5); my @args = opt_get(@_); my $centers = opt_val("centers"); my $iter = opt_val("iter.max"); my $output = opt_val("output"); my $filename = opt_val("filename"); my $sampledata = opt_val("sampledata"); my $label = opt_val("label") || ''; if($sampledata){ $rcmd->sample_data_for_clustering(); }else{ $rcmd->set_clust_data(@args, -label=>$label); } my @result = $rcmd->exec( 'require(stats)', "rclust.kmeans<-kmeans(rclust,$centers,$iter)", 'rclust.kmeans$cluster' ); if($output =~ /g/ || $output =~ /show/){ $rcmd->exec( "pdf('./graph/".$filename."')", 'plot(rclust,col=rclust.kmeans$cluster)', "points(rclust.kmeans\$centers, col=1:$centers,pch=8)" ); msg_gimv("graph/$filename") if ($output =~ /show/); } $rcmd->set_mode(); return @result;}
| sample_data_for_clustering | description | prev | next | Top |
my $rcmd = shift; $rcmd->exec( 'rclust<-rbind(matrix(rnorm(100,sd=0.3),ncol=2),' . 'matrix(rnorm(100,mean=1,sd=0.3),ncol=2))' );}
| set_clust_data | description | prev | next | Top |
my $rcmd = shift; my @args = opt_get(@_); my $label = opt_val("label") || ''; my $flag = 0; foreach (@args){ if ($flag == 0){ $flag ++; $rcmd->array('rclust', @$_); }else{ $rcmd->array('tmp', @$_); $rcmd->exec('rclust <- rbind(rclust, tmp)'); } } if (length $label){ $rcmd->sarray('label', @$label); $rcmd->exec('dimnames(rclust) <- list(label, NULL)'); }}
| som | description | prev | next | Top |
my $rcmd = new Rcmd(); $rcmd->set_mode('tmp'); opt_default(filename=>"som.pdf", output=>"show", xdim=>3, ydim=>3, sampledata=>0, topo=>'hexa', neigh=>'gaussian'); my @args = opt_get(@_); my $xdim = opt_val("xdim"); my $ydim = opt_val("ydim"); my $filename = opt_val("filename"); my $output = opt_val("output"); my $sampledata = opt_val("sampledata"); my $label = opt_val("label") || ''; my $topo = opt_val('topo'); my $neigh = opt_val('neigh'); if($sampledata){ $rcmd->sample_data_for_clustering(); }else{ $rcmd->set_clust_data(@args, -label=>$label); } $rcmd->exec( 'require(som)', "rclust.som<-som(rclust, $xdim, $ydim, topo='hexa', neigh='gaussian')", 'rclust.som$visual' ); my @result; open(FILE, $rcmd->{log}) || die($!); while(<FILE>){ if(/qerror/){ @result = (); while(<FILE>){ chomp; if(/^\d/){ my (undef, $x, $y, $qerror) = split(/\s+/, $_, 4); push(@result, [$x, $y, $qerror]); } } } } close(FILE); if($output =~ /g/ || $output =~ /show/){ $rcmd->exec( "pdf('./graph/".$filename."')", 'plot(rclust.som)' ); msg_gimv("graph/$filename") if ($output =~ /show/); } $rcmd->set_mode(); return @result;}