Changeset 15052

Show
Ignore:
Timestamp:
07/02/08 20:15:15 (6 months ago)
Author:
ayu
Message:
  • added 'furthest neighbor method'
Location:
lang/cplusplus/misc/clustercpp
Files:
2 added
5 modified

Legend:

Unmodified
Added
Removed
  • lang/cplusplus/misc/clustercpp/include/ward.h

    r15000 r15052  
    66#include <boost/shared_ptr.hpp> 
    77#include <iostream> 
     8#include <ctime> 
     9#include <cstdlib> 
     10 
    811 
    912#include "wardtypes.h" 
    1013#include "wardupdator.h" 
     14#include "maxdistupdator.h" 
    1115 
    1216 
     
    1923    ~Ward(); 
    2024 
    21     template < class VECTORSTYPE > 
     25    template < class UPDATOR, class VECTORSTYPE > 
    2226    HISTORIES* cluster(VECTORSTYPE* vectors){ 
    23       WardUpdator updator; 
     27      std::clock_t t1,t2; 
     28      t1 = std::clock(); 
     29      //WardUpdator updator; 
     30      //MaxDistUpdator updator; 
     31      UPDATOR updator; 
    2432      History h = updator.create_matrix(vectors); 
     33      t2 = std::clock(); 
     34      double T1 = static_cast<double>(t2-t1) / CLOCKS_PER_SEC; 
     35      std::cout << "Time: building_matrix=" << T1 << std::endl; 
     36 
    2537      INDEX size = updator.size(); 
    2638 
  • lang/cplusplus/misc/clustercpp/include/wardupdator.h

    r15000 r15052  
    1717    History create_matrix(SPARSEVECTORS* vectors); 
    1818    inline INDEX size(){  return mat->size()+1; } 
     19  protected: 
     20    MATRIX* mat; 
     21 
     22    virtual double calc_distance(MATRIXROW* objp, INDEX j, INDEX p, INDEX q, INDEX sqp, INDEX sp, INDEX sq, VALUE mqp); 
     23    inline VALUE elm(INDEX i, INDEX j){ return (*((*mat)[i]))[j]; } 
    1924 
    2025  private: 
    21     MATRIX* mat; 
    2226    std::vector<INDEX> sizes; 
    2327    std::set<INDEX> kieta; 
     28    CACHES* caches; 
    2429 
    2530    void initcache(); 
    26  
    27     virtual double calc_distance(MATRIXROW* objp, INDEX j, INDEX p, INDEX q, INDEX sqp, INDEX sp, INDEX sq, VALUE mqp); 
    28  
    29     inline VALUE elm(INDEX i, INDEX j){  
    30       return (*((*mat)[i]))[j];} 
    31     CACHES* caches; 
    3231    inline void savecache(INDEX i, VALUE d, INDEX i1){ 
    3332      CacheP p = CacheP(new Cache); 
  • lang/cplusplus/misc/clustercpp/src/Makefile

    r12159 r15052  
    11CXXFLAGS=-O3 -ffast-math -funroll-loops -Wall -fno-common -march=nocona -DBUILDMAIN 
    2 ward: ward.o wardupdator.o 
    3         g++ $(CXXFLAGS) -o ward -I../include ward.o wardupdator.o 
     2 
     3ward: ward.o wardupdator.o maxdistupdator.o 
     4        g++ $(CXXFLAGS) -o ward -I../include ward.o wardupdator.o maxdistupdator.o 
    45 
    56wardupdator.o: wardupdator.cpp 
     
    910        g++ $(CXXFLAGS) -c -I../include ward.cpp 
    1011 
     12maxdistupdator.o: maxdistupdator.cpp 
     13        g++ $(CXXFLAGS) -c -I../include maxdistupdator.cpp 
    1114 
    1215 
  • lang/cplusplus/misc/clustercpp/src/ward.cpp

    r15000 r15052  
    1111#include "ward.h" 
    1212#include "wardupdator.h" 
     13#include "maxdistupdator.h" 
    1314 
    1415namespace clustercpp{ 
     
    3738    ans.push_back(std::string(line, p)); 
    3839    return ans.size(); 
     40  } 
     41 
     42  void printmat(MATRIX* mat){ 
     43    for(MATRIX::iterator i=mat->begin(); i != mat->end(); i++){ 
     44      for(MATRIXROW::iterator j=i->second->begin(); j != i->second->end(); j++){ 
     45        std::cout << "(" << i->first << "," << j->first << ")=" << j->second << ","; 
     46      } 
     47      std::cout << std::endl; 
     48    } 
     49    std::cout << "--------------------" << std::endl; 
    3950  } 
    4051 
     
    108119 
    109120  clustercpp::Ward ward; 
    110   std::clock_t t1,t2,t3; 
    111   t1 = std::clock(); 
     121  std::clock_t t2,t3; 
    112122  t2 = std::clock(); 
    113123  //clustercpp::printmat(mat); 
    114   HISTORIES* hist = ward.cluster(vectors.get()); 
     124  HISTORIES* hist = ward.cluster<clustercpp::MaxDistUpdator>(vectors.get()); 
    115125  t3 = std::clock(); 
     126  int t = 0; 
    116127  for(HISTORIES::iterator it=hist->begin(); it!=hist->end(); it++){ 
    117128    idlabel.push_back("("+idlabel[it->index1]+"+"+idlabel[it->index2]+")"); 
    118     std::cout << "dist "<< it->index1 << "+" << it->index2 << " / " << it->distance << std::endl; // ")  " << idlabel[idlabel.size()-1] << std::endl; 
     129    std::cout << t << "  dist "<< it->index1 << "+" << it->index2 << " / " << it->distance << std::endl; // ")  " << idlabel[idlabel.size()-1] << std::endl; 
     130    t++; 
    119131  } 
    120   double T1 = static_cast<double>(t2-t1) / CLOCKS_PER_SEC; 
    121132  double T2 = static_cast<double>(t3-t2) / CLOCKS_PER_SEC; 
    122   std::cout << "Time: building_matrix=" << T1 << "  clustering=" << T2 << std::endl; 
     133  std::cout << "Time: clustering=" << T2 << std::endl; 
    123134 
    124135  return 0; 
  • lang/cplusplus/misc/clustercpp/src/wardupdator.cpp

    r15000 r15052  
    66 
    77namespace clustercpp{ 
    8   void printmat(MATRIX* mat){ 
    9     for(MATRIX::iterator i=mat->begin(); i != mat->end(); i++){ 
    10       for(MATRIXROW::iterator j=i->second->begin(); j != i->second->end(); j++){ 
    11         std::cout << "(" << i->first << "," << j->first << ")=" << j->second << ","; 
    12       } 
    13       std::cout << std::endl; 
    14     } 
    15     std::cout << "--------------------" << std::endl; 
    16   } 
    17  
    188  WardUpdator::WardUpdator(): sizes(), kieta(){ 
    199    caches = new CACHES; 
     
    8171      mat->insert(MATRIX::value_type(i, p)); 
    8272      SPARSEVECTOR::iterator e2 = v.end(); 
     73      double vsum = 0.0; 
     74      for(SPARSEVECTOR::iterator it0=v.begin();it0!=v.end();it0++){ 
     75        vsum += (it0->second) * (it0->second); 
     76      } 
     77 
    8378      for(INDEX j=0; j<i; j++){ 
    8479        SPARSEVECTOR& u = (*sparsevectors)[j];   
    8580        //ベクトル間のユークリッド距離を計算 
    86         double pp = 0.0; 
     81        double pp = vsum; 
    8782        SPARSEVECTOR::iterator e = u.end(); 
    8883        for(SPARSEVECTOR::iterator it=u.begin();it!=e;it++){ 
    8984          SPARSEVECTOR::iterator it2 = v.find(it->first); 
     85          double x = it->second; 
     86          pp += x*x; 
    9087          if(it2!=e2){ 
    91             VALUE x = (it2->second)-(it->second); 
    92             pp += x*x; 
     88            pp += -2*x*(it2->second); 
    9389          } 
    9490        } 
     
    136132 
    137133      double tmpdist = calc_distance(objp, j, p, q, sqp, sp, sq, mqp); 
    138       /* 
    139       double tmpdist; 
    140       if(j < p){ //TODO ここをobjpの参照先を書き換えるのでなく、リストにして返す or ローカルに保存しっぱなしでどうか。。。 
    141         tmpdist = (*objp)[j] = ((sp+sizes[j])*elm(p,j)+(sq+sizes[j])*elm(q,j)-sizes[j]*mqp)/(sqp+sizes[j]); 
    142       }else if(p<j && j<q){ 
    143         tmpdist = (*objp)[j] = ((sp+sizes[j])*elm(j,p)+(sq+sizes[j])*elm(q,j)-sizes[j]*mqp)/(sqp+sizes[j]); 
    144         (*mat)[j]->erase(p);  //今後使わなくなる成分の消去 
    145       }else{ 
    146         tmpdist = (*objp)[j] =((sp+sizes[j])*elm(j,p)+(sq+sizes[j])*elm(j,q)-sizes[j]*mqp)/(sqp+sizes[j]);         
    147         (*mat)[j]->erase(p); //今後使わなくなる成分の消去 
    148         (*mat)[j]->erase(q); //今後使わなくなる成分の消去 
    149       }  
    150       */         
    151134       
    152135      //今の所最小距離のものを一時記憶 
     
    191174  double WardUpdator::calc_distance(MATRIXROW* objp, INDEX j, INDEX p, INDEX q, INDEX sqp, INDEX sp, INDEX sq, VALUE mqp){ 
    192175      double tmpdist; 
     176      INDEX sizes_j = sizes[j]; 
    193177      if(j < p){ //TODO ここをobjpの参照先を書き換えるのでなく、リストにして返す or ローカルに保存しっぱなしでどうか。。。 
    194         tmpdist = (*objp)[j] = ((sp+sizes[j])*elm(p,j)+(sq+sizes[j])*elm(q,j)-sizes[j]*mqp)/(sqp+sizes[j]); 
     178        return (*objp)[j] = ((sp+sizes_j)*elm(p,j)+(sq+sizes_j)*elm(q,j)-sizes_j*mqp)/(sqp+sizes_j); 
    195179      }else if(p<j && j<q){ 
    196         tmpdist = (*objp)[j] = ((sp+sizes[j])*elm(j,p)+(sq+sizes[j])*elm(q,j)-sizes[j]*mqp)/(sqp+sizes[j]); 
     180        tmpdist = ((sp+sizes_j)*elm(j,p)+(sq+sizes_j)*elm(q,j)-sizes_j*mqp)/(sqp+sizes_j); 
    197181        (*mat)[j]->erase(p);  //今後使わなくなる成分の消去 
    198182      }else{ 
    199         tmpdist = (*objp)[j] =((sp+sizes[j])*elm(j,p)+(sq+sizes[j])*elm(j,q)-sizes[j]*mqp)/(sqp+sizes[j]);         
     183        tmpdist = ((sp+sizes_j)*elm(j,p)+(sq+sizes_j)*elm(j,q)-sizes_j*mqp)/(sqp+sizes_j);         
    200184        (*mat)[j]->erase(p); //今後使わなくなる成分の消去 
    201185        (*mat)[j]->erase(q); //今後使わなくなる成分の消去 
    202186      }          
    203       return tmpdist; 
     187      return (*objp)[j] = tmpdist; 
    204188  } 
    205189}