Rcmd Clustering
SummaryIncluded librariesPackage variablesDescriptionGeneral documentationMethods
Summary
  Rcmd::Clustering - Interfaces to clustering algorithms of R language.
Package variables
No package variables defined.
Included modules
G::Messenger
SubOpt
Inherit
Exporter
Synopsis
No synopsis!
Description
    This class is a part of G-language Genome Analysis Environment, 
    collecting interfaces to clustering algorithms of R language.
Methods
hclustDescriptionCode
kmeansDescriptionCode
sample_data_for_clustering
No description
Code
set_clust_data
No description
Code
somDescriptionCode
Methods description
hclustcode    nextTop
  Name: hclust   -   Hierarchical clustering analysis for given arraies

  Descriptions:
    Hierarchical clustering analysis methods for given arrays.

    Ward method uses the ward.D2 by default.

    Installation of amap library for R language is required.
      run R as a super user - sudo R - and type the followings:
        install.packages('amap')

  Usage:
    hclust(\@array1_of_values, \@array2_of_values, ...);
      or
    hclust(\@array1_of_values, \@array2_of_values, ..., -label => \@grouping_label);

 Options:
   -output       output toggle option (default: show)
                 "g" to generate graph without displaying.
   -filename     output filename of the clustering graph (default: hclust.pdf)
   -method       the agglomeration method to be used (default: ward).
                   'ward', 'single', 'complate', 'average', 'centroid', 'median' or 'mcquitty'
   -distmethod   the distance measure method (default: correlation)
                   'euclidean', 'maximum', 'manhattan', 'canberra', 'binary', 'kendall'
                   'spearman', 'pearson' (not centered Pearson), 'abspearson' (Absolute Pearson),
                   'correlation' (Centered Pearson) or 'abscorrelation' (Absolute correlation)
                 this option is based on 'Dist' method in 'amap' library in R.
   -label        labels or names of the data series

  Author:
     Kazuki Oshita (cory@g-language.org)
History: 20130321-01 complete rewrite by cory, exported by default 20070612-01 converted to Rcmd::Clustering 20030904-01 initial posting (G::Tools::RCluster)
kmeans()codeprevnextTop
  Name: kmeans()   -   clustering with K-means method

  Description:
    
    Clustering with K-means method with using R language.
    Number of cluster centers can be given by -centers option (default: 10)
    and number of iterations is given by -iter.max (default: 10).

    Returned value corresponds to result$cluster of kmeans() in R.
    (a vector of cluster numbers to which each point is allocated)
    
  Usage:
    @cluster = $rcmd->kmeans(\@array1, \@array2, \@array3, ..., -label=>\@label);

    Arrays correspond to the columns (data series), and labels for each of
    these arrays can be given by -label option.

  Options:
   -label           labels or names of the data series.
   -centers          number of cluster centers (default: 5)
   -iter.max        number of iterations (default: 10)
   -filename        output filename of the graph (default: kmeans.pdf)
   -output          output toggle option (default: show)
                    "g" to generate graph without displaying.
   -sampledata      use sample data (default: 0)

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)
History: 20070612-01 converted to Rcmd::Clustering 20030904-01 initial posting (G::Tools::RCluster)
som()codeprevnextTop
  Name: som()   -   clustering using Self-Organizing Map

  Description:
    Clustering with Self-Organizing Map (SOM) using R language.
    Installation of GeneSOM library for R language is required.
        run R as a super user - sudo R - and type the following:
        install.packages('som')) 
    
    Returns a two-dimensional array correspondingn to the 
    result$visual of som() in R's GeneSOM library.

  Usage:
    @result = som(\@array1, \@array2, \@array3, ..., -label=>\@label);

    Arrays correspond to the columns (data series), and labels for each of
    these arrays can be given by -label option.

  Options:
   -label        labels or names of the data series.
   -xdim         x-dimension of the map (default: 3)
   -ydim         y-dimension of the map (default: 3)
   -filename     output filename of the graph (default: som.pdf)
   -output       output toggle option (default: show)
                 "g" to generate graph without displaying.
   -sampledata   use sample data (default: 0)

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)
History: 20141110-01 exported by default 20070612-01 converted to Rcmd::Clustering 20030904-01 initial posting (G::Tools::RCluster)
Methods code
hclustdescriptionprevnextTop
sub hclust {
    &opt_default(output => 'show', label => [], filename => 'hclust.pdf', method => 'ward', distmethod => 'correlation');

    my @args= opt_get(@_);
    my $output= opt_val('output');
    my @label=  @{opt_val('label')};
    my $filename= opt_val('filename');
    my $method= opt_val('method');
    my $distmethod= opt_val('distmethod');

    my @all_methods= ('ward', 'single', 'complate', 'average', 'centroid', 'median', 'mcquitty');
    unless (grep /^$method$/, @all_methods) {
        $method= 'ward';
    }
    $method = 'ward.D2' if ($method = 'word');

    my @all_dist_methods= ('euclidean', 'maximum', 'manhattan', 'canberra', 'binary', 'pearson',
                           'abspearson', 'correlation', 'abscorrelation', 'spearman', 'kendall');
    unless (grep /^$distmethod$/, @all_dist_methods) {
        $distmethod= 'pearson';
    }

    my $rcmd = Rcmd->new();

    my (%data_table, @R_names);
    for my $i (0 .. $#args) {
        my $R_name= 'array'.$i;
        push @R_names, $R_name;

        $data_table{$R_name}= $_[$i];
        $rcmd->array($R_name, @{$args[$i]});
    }

    # 'label' list object (if required)
$rcmd->sarray('label', @label) if $#label > -1; my @R_commands= ( 'CMP <- complete.cases('.join(', ', @R_names).')', ('d.table <- data.frame('.$R_names[0].'=1:'.($#{$data_table{$R_names[0]}}+1).', row.names=label)')x!! ($#label > -1), ('d.table <- data.frame('.$R_names[0].'=1:'.($#{$data_table{$R_names[0]}}+1).')')x!! ($#label == -1) ); for my $key (@R_names) { push @R_commands, $key.' <- '.$key.'[CMP]'; push @R_commands, 'd.table$'.$key.' <- '.$key; } $rcmd->exec( @R_commands, 'library("amap")', "pdf('./graph/".$filename."')", 'hc <- hclust(Dist(d.table, method="'.$distmethod.'"), method="'.$method.'")', 'plot(hc)', ); msg_gimv('graph/'.$filename) if $output eq 'show'; return '';
}
kmeansdescriptionprevnextTop
sub kmeans {
    my $rcmd = new Rcmd();
    $rcmd->set_mode('tmp');

    opt_default("iter.max"=>10, filename=>"kmeans.pdf", output=>"show", sampledata=>0, centers=>5);
    my @args = opt_get(@_);
    my $centers = opt_val("centers");
    my $iter = opt_val("iter.max");
    my $output = opt_val("output");
    my $filename = opt_val("filename");
    my $sampledata = opt_val("sampledata");
    my $label = opt_val("label") || '';

    if($sampledata){
	$rcmd->sample_data_for_clustering();
    }else{
	$rcmd->set_clust_data(@args, -label=>$label);
    }

    my @result = $rcmd->exec(
			     'require(stats)',
			     "rclust.kmeans<-kmeans(rclust,$centers,$iter)",
			     'rclust.kmeans$cluster'
			     );

    if($output =~ /g/ || $output =~ /show/){
	$rcmd->exec(
		    "pdf('./graph/".$filename."')",
		    'plot(rclust,col=rclust.kmeans$cluster)',
		    "points(rclust.kmeans\$centers, col=1:$centers,pch=8)"
		    );

	msg_gimv("graph/$filename") if ($output =~ /show/);
    }
    
    $rcmd->set_mode();
    return @result;
}
sample_data_for_clusteringdescriptionprevnextTop
sub sample_data_for_clustering {
    my $rcmd = shift;

    $rcmd->exec(
		'rclust<-rbind(matrix(rnorm(100,sd=0.3),ncol=2),' .
		'matrix(rnorm(100,mean=1,sd=0.3),ncol=2))' 
		);
}
set_clust_datadescriptionprevnextTop
sub set_clust_data {
    my $rcmd = shift;

    my @args = opt_get(@_);
    my $label = opt_val("label") || '';

    my $flag = 0;
    foreach (@args){
	if ($flag == 0){
	    $flag ++;

	    $rcmd->array('rclust', @$_);
	}else{
	    $rcmd->array('tmp', @$_);
	    $rcmd->exec('rclust <- rbind(rclust, tmp)');
	}
    }

    if (length $label){
	$rcmd->sarray('label', @$label);
	$rcmd->exec('dimnames(rclust) <- list(label, NULL)');
    }
}
somdescriptionprevnextTop
sub som {
    my $rcmd = new Rcmd();
    $rcmd->set_mode('tmp');

    opt_default(filename=>"som.pdf", output=>"show", xdim=>3, ydim=>3, sampledata=>0, topo=>'hexa', neigh=>'gaussian');
    my @args       = opt_get(@_);
    my $xdim       = opt_val("xdim");
    my $ydim       = opt_val("ydim");
    my $filename   = opt_val("filename");
    my $output     = opt_val("output");
    my $sampledata = opt_val("sampledata");
    my $label      = opt_val("label") || '';
    my $topo       = opt_val('topo');
    my $neigh      = opt_val('neigh');

    if($sampledata){
	$rcmd->sample_data_for_clustering();
    }else{
	$rcmd->set_clust_data(@args, -label=>$label);
    }

    $rcmd->exec(
		'require(som)',
		"rclust.som<-som(rclust, $xdim, $ydim, topo='hexa', neigh='gaussian')",
		'rclust.som$visual'
		);

    my @result;
    open(FILE, $rcmd->{log}) || die($!);
    while(<FILE>){
	if(/qerror/){
	    @result = ();
	    while(<FILE>){
		chomp;
		if(/^\d/){
		    my (undef, $x, $y, $qerror) = split(/\s+/, $_, 4);
		    push(@result, [$x, $y, $qerror]);
		}
	    }
	}
    }
    close(FILE);

    if($output =~ /g/ || $output =~ /show/){
	$rcmd->exec(
                    "pdf('./graph/".$filename."')",
		    'plot(rclust.som)'
		    );

	msg_gimv("graph/$filename") if ($output =~ /show/);
    }
    
    $rcmd->set_mode();
    return @result;
}
General documentation
No general documentation available.