Changeset 12076
- Timestamp:
- 05/20/08 23:43:43 (5 years ago)
- Location:
- lang/cplusplus/misc/clustercpp
- Files:
-
- 2 added
- 3 modified
-
include/ward.h (modified) (1 diff)
-
include/wardupdator.h (added)
-
src/Makefile (modified) (1 diff)
-
src/ward.cpp (modified) (6 diffs)
-
src/wardupdator.cpp (added)
Legend:
- Unmodified
- Added
- Removed
-
lang/cplusplus/misc/clustercpp/include/ward.h
r12017 r12076 36 36 class Ward{ 37 37 HISTORIES* hist; 38 MATRIX* mat;39 CACHES* caches;40 38 41 39 public: 42 40 Ward(); 43 41 ~Ward(); 44 HISTORIES* cluster(MATRIX* mat, INDEX size); 45 MATRIX* create_matrix(VECTOR* vectors, INDEX vectorNum); 46 private: 47 void update_matrix(INDEX p, INDEX q, INDEX nextnodenum, std::set<INDEX>& kieta, MATRIXROW* objp, std::vector<INDEX>& sizes); 48 void initcache(); 49 inline VALUE elm(INDEX i, INDEX j){ 50 return (*((*mat)[i]))[j];} 51 inline void savecache(INDEX i, VALUE d, INDEX i1){ 52 CacheP p = CacheP(new Cache); 53 p->distance=d; 54 p->index=i1; 55 (*caches)[i] = p; 56 } 57 inline CacheP getcache(INDEX i){return (*caches)[i];} 58 inline bool hascache(INDEX p){return caches->find(p) != caches->end();} 59 inline bool hasrow(INDEX p){return mat->find(p) != mat->end();} 60 inline bool hascol(INDEX p, INDEX q){return (*mat)[p]->find(q) != (*mat)[p]->end();} 42 HISTORIES* cluster(VECTOR* vectors, INDEX vectorNum); 61 43 }; 62 44 } -
lang/cplusplus/misc/clustercpp/src/Makefile
r12008 r12076 1 ward: ward.cpp 2 g++ -O3 -ffast-math -funroll-loops -Wall -fno-common -march=nocona -o ward -I../include ward.cpp 1 CXXFLAGS=-O3 -ffast-math -funroll-loops -Wall -fno-common -march=nocona 2 ward: ward.o wardupdator.o 3 g++ $(CXXFLAGS) -o ward -I../include ward.o wardupdator.o 4 5 wardupdator.o: wardupdator.cpp 6 g++ $(CXXFLAGS) -c -I../include wardupdator.cpp 7 8 ward.o: ward.cpp 9 g++ $(CXXFLAGS) -c -I../include ward.cpp 10 11 12 13 clean: 14 rm *.o 15 rm ward -
lang/cplusplus/misc/clustercpp/src/ward.cpp
r12018 r12076 9 9 10 10 #include "ward.h" 11 #include "wardupdator.h" 11 12 12 13 namespace clustercpp{ 13 void printmat(MATRIX* mat){ 14 for(MATRIX::iterator i=mat->begin(); i != mat->end(); i++){ 15 for(MATRIXROW::iterator j=i->second->begin(); j != i->second->end(); j++){ 16 std::cout << "(" << i->first << "," << j->first << ")=" << j->second << ","; 17 } 18 std::cout << std::endl; 19 } 20 std::cout << "--------------------" << std::endl; 21 } 22 23 Ward::Ward(){ 14 Ward::Ward() { 24 15 hist = new HISTORIES; 25 caches = new CACHES;26 mat = new MATRIX;27 16 } 28 17 29 18 Ward::~Ward(){ 30 delete mat;31 delete caches;32 19 delete hist; 33 20 } 34 35 //行列作成36 MATRIX* Ward::create_matrix(VECTOR* vectors, INDEX vectorNum){37 for(INDEX i=1; i<vectorNum; i++){38 VECTOR& v = vectors[i];39 MATRIXROWP p = MATRIXROWP(new MATRIXROW);40 mat->insert(MATRIX::value_type(i, p));41 for(INDEX j=0; j<i; j++){42 VECTOR& u = vectors[j];43 //ベクトル間のユークリッド距離を計算44 double pp = 0.0;45 INDEX k = 0;46 VECTOR::iterator e = u.end();47 for(VECTOR::iterator it=u.begin();it!=e;it++){48 VALUE x = v[k]-(*it);49 pp += x*x;50 k++;51 }52 (*p)[j] = std::sqrt(pp);53 }54 }55 return mat;56 }57 58 struct compmatrow : public std::binary_function<MATRIXROWPAIR, MATRIXROWPAIR, bool> {59 bool operator ()(const MATRIXROWPAIR &x, const MATRIXROWPAIR &y) const { return x.second < y.second; }60 };61 struct compcache : public std::binary_function<CACHEPAIR, CACHEPAIR, bool> {62 bool operator ()(const CACHEPAIR &x, const CACHEPAIR &y) const { return (x.second)->distance < (y.second)->distance; }63 };64 21 65 void Ward::initcache(){66 for(MATRIX::iterator i=mat->begin();i!=mat->end();i++){67 MATRIXROW::iterator it = std::min_element(i->second->begin(), i->second->end(), compmatrow());68 if(it != i->second->end()){69 savecache(i->first, it->second, it->first);70 }71 }72 }73 74 22 void printcache(CACHES* caches){ 75 23 std::cout << "Caches "; … … 80 28 } 81 29 82 HISTORIES* Ward::cluster(MATRIX* mat, INDEX size){ 83 initcache(); 84 //printmat(mat); 85 std::vector<INDEX> sizes(size, 1); 30 HISTORIES* Ward::cluster(VECTOR* vectors, INDEX vectorNum){ 31 32 WardUpdator updator; 33 History h = updator.create_matrix(vectors, vectorNum); 34 INDEX size = updator.size(); 35 86 36 hist->clear(); 87 std::set<INDEX> kieta;88 37 INDEX nextnodenum; 89 38 nextnodenum=size; 39 90 40 while(hist->size()<size-1){ 91 41 //最小キャッシュを選択 92 42 //printcache(caches); 93 if(caches->size()==0) break;94 CACHES::iterator i=std::min_element(caches->begin(), caches->end(), compcache());95 43 96 VALUE distance = (i->second)->distance; 97 INDEX p = (i->second)->index; 98 INDEX q = i->first; 44 INDEX p = h.index1; 45 INDEX q = h.index2; 99 46 if(p>q){ 100 47 INDEX tmp = p; … … 102 49 q=tmp; 103 50 } 104 ///std::cout << "joining " << distance << " ,q " << q << " ,p " << p << " => " << nextnodenum << std::endl; 105 106 //消えたもの集合を更新 107 kieta.insert(p); 108 kieta.insert(q); 109 //キャッシュから、今回消えるものをあらかじめ排除 110 caches->erase(p); 111 caches->erase(q); 112 //std::cout << "erased" << std::endl; 113 114 MATRIXROW* objp = new MATRIXROW; 115 if(objp==0) std::cout << "POINTER ERROR" << std::endl; 116 mat->insert(MATRIX::value_type(nextnodenum, MATRIXROWP(objp))); 117 update_matrix(p, q, nextnodenum, kieta, objp, sizes); 118 119 //std::cout << "len(cache) " << caches->size() << std::endl; 120 mat->erase(p); 121 mat->erase(q); 122 sizes.push_back(sizes[q]+sizes[p]); 123 History h; 124 h.distance = distance; 51 //結果の格納 125 52 h.index1 = q; 126 53 h.index2 = p; 127 54 hist->push_back(h); 128 //printmat(mat); 55 56 //行列の更新 57 h = updator.update_matrix(p, q, nextnodenum); 58 129 59 nextnodenum++; 130 60 } … … 132 62 } 133 63 134 void Ward::update_matrix(INDEX p, INDEX q, INDEX nextnodenum, std::set<INDEX>& kieta, MATRIXROW* objp, std::vector<INDEX>& sizes){135 VALUE mqp = elm(q,p);136 INDEX sp = sizes[p];137 INDEX sq = sizes[q];138 INDEX sqp = sp+sq;139 double mindist=-1.0;140 INDEX minj =0;141 std::set<INDEX>::iterator kietaend = kieta.end();142 for(INDEX j=0;j<nextnodenum; j++){143 144 if(kieta.find(j) != kietaend){145 continue; //すでに消えてるなら何もしない146 }147 double tmpdist;148 if(j < p){149 tmpdist = (*objp)[j] = ((sp+sizes[j])*elm(p,j)+(sq+sizes[j])*elm(q,j)-sizes[j]*mqp)/(sqp+sizes[j]);150 }else if(p<j && j<q){151 tmpdist = (*objp)[j] = ((sp+sizes[j])*elm(j,p)+(sq+sizes[j])*elm(q,j)-sizes[j]*mqp)/(sqp+sizes[j]);152 (*mat)[j]->erase(p); //今後使わなくなる成分の消去153 }else{154 tmpdist = (*objp)[j] =((sp+sizes[j])*elm(j,p)+(sq+sizes[j])*elm(j,q)-sizes[j]*mqp)/(sqp+sizes[j]);155 (*mat)[j]->erase(p); //今後使わなくなる成分の消去156 (*mat)[j]->erase(q); //今後使わなくなる成分の消去157 }158 if(tmpdist<mindist or mindist<0.0){159 mindist=tmpdist;160 minj=j;161 }162 if(hasrow(j)){ //Q. この判定必要? A. 必要。まだ合体させられてないのに、matの行自体は消えていることがあるよ。163 if((*mat)[j]->size()==0){164 caches->erase(j);165 mat->erase(j);166 //一見ここでkieta.insert(j)したくなるが、そうしてはいけない!167 //kietaにinsertするのは、真にその点が消えた時=他の点と合体させられた時のみである。168 //そうでないのにここでinsertしてしまうと、まだ消滅していないのに、未来永劫計算対象からはずされてしまうのだっ!!169 }else{170 CacheP c = getcache(j);171 if(c->index==p || c->index==q){172 MATRIXROW::iterator it = std::min_element((*mat)[j]->begin(), (*mat)[j]->end(), compmatrow());173 savecache(j, it->second, it->first);174 }175 }176 }177 }178 //新ノードからの距離をキャッシュに記録179 if(mindist>=0.0){180 savecache(nextnodenum, mindist, minj);181 }182 }183 64 } 184 65 … … 200 81 201 82 while(line!=""){ 202 //std::cout << line << std::endl;203 83 std::vector<std::string> data; 204 84 split(line, "\t", data); … … 218 98 std::clock_t t1,t2,t3; 219 99 t1 = std::clock(); 220 MATRIX* mat = ward.create_matrix(&(*vectors)[0], vecsize);221 100 t2 = std::clock(); 222 101 //clustercpp::printmat(mat); 223 HISTORIES* hist = ward.cluster( mat, vecsize);102 HISTORIES* hist = ward.cluster(&(*vectors)[0], vecsize); 224 103 t3 = std::clock(); 225 104 for(HISTORIES::iterator it=hist->begin(); it!=hist->end(); it++){ 226 105 idlabel.push_back("("+idlabel[it->index1]+"+"+idlabel[it->index2]+")"); 227 std::cout << "dist "<< it-> distance <<std::endl;//<< "" << idlabel[idlabel.size()-1] << std::endl;106 std::cout << "dist "<< it->index1 << "+" << it->index2 << " / " << it->distance << std::endl; // ") " << idlabel[idlabel.size()-1] << std::endl; 228 107 } 229 108 double T1 = static_cast<double>(t2-t1) / CLOCKS_PER_SEC;
![(please configure the [header_logo] section in trac.ini)](/share/chrome/site/your_project_logo.png)