Rcmd Clustering
SummaryPackage variablesDescriptionGeneral documentationMethods
Summary
  Rcmd::Clustering - Interfaces to clustering algorithms of R language.
Package variables
No package variables defined.
Included modules
G::Messenger
SubOpt
Synopsis
No synopsis!
Description
    This class is a part of G-language Genome Analysis Environment, 
    collecting interfaces to clustering algorithms of R language.
Methods
hclust
No description
Code
kmeans
No description
Code
sample_data_for_clustering
No description
Code
set_clust_data
No description
Code
som
No description
Code
Methods description
None available.
Methods code
hclustdescriptionprevnextTop
sub hclust {
    my $rcmd = shift;
    $rcmd->set_mode('tmp');

    opt_default("dist_method"=>"euclidean", "hclust_method"=>"com", 
		filename=>"hclust.png", output=>"show", sampledata=>0);
    my @args = opt_get(@_);
    my $dist_method = opt_val("dist_method");
    my $hclust_method = opt_val("hclust_method");
    my $filename= opt_val("filename");
    my $output = opt_val("output");
    my $label = opt_val("label");
    my $sampledata = opt_val("sampledata");

    if($sampledata){
	$rcmd->sample_data_for_clustering();
    }else{
	$rcmd->set_clust_data(@args, -label=>$label);
    }

    $rcmd->exec(
		'require(mva)',
		"rclust.hclust<-hclust(dist(rclust,\" $dist_method\"),\" $hclust_method\")"
		);

    my @result = $rcmd->exec('rclust.hclust$merge');

    if($output =~ /g/ || $output =~ /show/){
	$rcmd->exec(
		    'postscript("/tmp/rclust.ps")',
		    'plot(rclust.hclust,hang=-1)'
		    );

	mkdir('graph', 0777);
	system("convert -rotate 90 /tmp/rclust.ps graph/$filename");
	msg_gimv("graph/$filename") if ($output =~ /show/);
    }

    shift @result;

    $rcmd->set_mode();
    return @result;
}
kmeansdescriptionprevnextTop
sub kmeans {
    my $rcmd = shift;
    $rcmd->set_mode('tmp');

    opt_default("iter.max"=>10, filename=>"kmeans.png", output=>"show", sampledata=>0, centers=>5);
    my @args = opt_get(@_);
    my $centers = opt_val("centers");
    my $iter = opt_val("iter.max");
    my $output = opt_val("output");
    my $filename = opt_val("filename");
    my $sampledata = opt_val("sampledata");
    my $label = opt_val("label");

    if($sampledata){
	$rcmd->sample_data_for_clustering();
    }else{
	$rcmd->set_clust_data(@args, -label=>$label);
    }

    $rcmd->exec(
		'require(mva)',
		"rclust.kmeans<-kmeans(rclust,$centers,$iter)"
		);

    my @result = $rcmd->exec('rclust.kmeans$cluster');

    if($output =~ /g/ || $output =~ /show/){
	$rcmd->exec(
		    'postscript("/tmp/kmeans.ps")',
		    'plot(rclust,col=rclust.kmeans$cluster)',
		    "points(rclust.kmeans\$centers, col=1:$centers,pch=8)"
		    );

	mkdir('graph', 0777);
	system("convert -rotate 90 /tmp/kmeans.ps graph/$filename");
	msg_gimv("graph/$filename") if ($output =~ /show/);
    }
    
    $rcmd->set_mode();
    return @result;
}
sample_data_for_clusteringdescriptionprevnextTop
sub sample_data_for_clustering {
    my $rcmd = shift;

    $rcmd->exec(
		'rclust<-rbind(matrix(rnorm(100,sd=0.3),ncol=2),' .
		'matrix(rnorm(100,mean=1,sd=0.3),ncol=2))' 
		);
}
set_clust_datadescriptionprevnextTop
sub set_clust_data {
    my $rcmd = shift;

    my @args = opt_get(@_);
    my $label = opt_val("label");

    my $flag = 0;
    foreach (@args){
	if ($flag == 0){
	    $flag ++;

	    $rcmd->array('rclust', @$_);
	}else{
	    $rcmd->array('tmp', @$_);
	    $rcmd->exec('rclust <- cbind(rclust, tmp)');
	}
    }

    if (length $label){
	$rcmd->sarray('label', @$label);
	$rcmd->exec('dimnames(rclust) <- list(label, NULL)');
    }
}
somdescriptionprevnextTop
sub som {
    my $rcmd = shift;
    $rcmd->set_mode('tmp');

    opt_default(filename=>"som.png", output=>"show", xdim=>3, ydim=>3, sampledata=>0);
    my @args = opt_get(@_);
    my $xdim = opt_val("xdim");
    my $ydim = opt_val("ydim");
    my $filename= opt_val("filename");
    my $output = opt_val("output");
    my $sampledata = opt_val("sampledata");
    my $label = opt_val("label");

    if($sampledata){
	$rcmd->sample_data_for_clustering();
    }else{
	$rcmd->set_clust_data(@args, -label=>$label);
    }

    my $message = $rcmd->exec(
			      'require(GeneSOM)'
			      );

    croak(
	"\n\nPackage GeneSOM not installed.\nrun\n\tinstall.packages(\'GeneSOM\')\n" . 
	"in R as root.\n\n"
	) if ($message =~ /FALSE/);

    $rcmd->exec(
		"rclust.som<-som(rclust, $xdim, $ydim)",
		'rclust.som$visual'
		);

    my @result;
    open(FILE, $rcmd->{log}) || die($!);
    while(<FILE>){
	if(/qerror/){
	    @result = ();
	    while(<FILE>){
		chomp;
		if(/^\d/){
		    my (undef, $x, $y, $qerror) = split(/\s+/, $_, 4);
		    push(@result, [$x, $y, $qerror]);
		}
	    }
	}
    }
    close(FILE);

    if($output =~ /g/ || $output =~ /show/){
	$rcmd->exec(
		    'postscript("/tmp/som.ps")',
		    'plot(rclust.som)'
		    );

	mkdir('graph', 0777);
	system("convert -rotate 90 /tmp/som.ps graph/$filename");
	msg_gimv("graph/$filename") if ($output =~ /show/);
    }
    
    $rcmd->set_mode();
    return @result;
}
General documentation
$rcmd->som()Top
  Name: $rcmd->som()   -   clustering using Self-Organizing Map

  Description:
    Clustering with Self-Organizing Map (SOM) using R language.
    Installation of GeneSOM library for R language is required.
        run R as a super user - sudo R - and type the following:
        install.packages('GeneSOM')) 
    
    Returns a two-dimensional array correspondingn to the 
    result$visual of som() in R's GeneSOM library.

  Usage:
    $rcmd = new Rcmd();
    @result = $rcmd->som(\@array1, \@array2, \@array3, ..., -label=>\@label);
      or
    @result = $rcmd->som(-sampledata=>1);

    Arrays correspond to the columns (data series), and labels for each of
    these arrays can be given by -label option.

  Options:
   -label        labels or names of the data series.
   -xdim         x-dimension of the map (default: 3)
   -ydim         y-dimension of the map (default: 3)
   -filename     output filename of the graph (default: som.png)
   -output       output toggle option (default: show)
                 "g" to generate graph without displaying.
   -sampledata   use sample data (default: 0)

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)
History: 20070612-01 converted to Rcmd::Clustering 20030904-01 initial posting (G::Tools::RCluster)
$rcmd->hclust()Top
  Name: $rcmd->hclust()   -   hierarchical clustering analysis

  Description:
    Hierarchical clustering using R language.
    Distance measure to use can be specified by -dist_method option.
    (euclidean:default, maximum, manhattan, canberra, binary)
    Clustering method can be specified by -hclust_method option. 
    (complete:default, ward, single, average, mcquitty, median, centroid)

    This method automatically runs dist() in R to calculate the dissimilarities
    of the given data and passes the result to hclust() of R.

    Returned value corresponds to result$merge of hclust() in R (n-1 by 2 matrix).
    Row i describes the merging of clusters at step i.
    
  Usage:
    $rcmd = new Rcmd();
    @merge = $rcmd->hclust(\@array1, \@array2, \@array3, ..., -label=>\@label);
      or
    @merge = $rcmd->hclust(-sampledata=>1);

    Arrays correspond to the columns (data series), and labels for each of
    these arrays can be given by -label option.

 Options:
   -label           labels or names of the data series.
   -dist_method     distance measure (euclidean:default, maximum, manhattan, 
                    canberra, binary)
   -hclust_method   clustering method (complete:default, ward, single, average, 
                    mcquitty, median, centroid) 
   -filename        output filename of the graph (default: hclust.png)
   -output          output toggle option (default: show)
                    "g" to generate graph without displaying.
   -sampledata      use sample data (default: 0)

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)
History: 20070612-01 converted to Rcmd::Clustering 20030904-01 initial posting (G::Tools::RCluster)
$rcmd->kmeans()Top
  Name: $rcmd->kmeans()   -   clustering with K-means method

  Description:
    
    Clustering with K-means method with using R language.
    Number of cluster centers can be given by -centers option (default: 10)
    and number of iterations is given by -iter.max (default: 10).

    Returned value corresponds to result$cluster of kmeans() in R.
    (a vector of cluster numbers to which each point is allocated)
    
  Usage:
    $rcmd = new Rcmd();
    @cluster = $rcmd->kmeans(\@array1, \@array2, \@array3, ..., -label=>\@label);
      or
    @cluster = $rcmd->kmeans(-sampledata=>1);

    Arrays correspond to the columns (data series), and labels for each of
    these arrays can be given by -label option.

  Options:
   -label           labels or names of the data series.
   -center          number of cluster centers (default: 5)
   -iter.max        number of iterations (default: 10)
   -filename        output filename of the graph (default: hclust.png)
   -output          output toggle option (default: show)
                    "g" to generate graph without displaying.
   -sampledata      use sample data (default: 0)

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)
History: 20070612-01 converted to Rcmd::Clustering 20030904-01 initial posting (G::Tools::RCluster)