Changeset 11041 for lang/python
- Timestamp:
- 05/04/08 00:56:21 (7 months ago)
- Location:
- lang/python/yoyaku/yoyaku/engine
- Files:
-
- 2 added
- 1 modified
-
HyokiyureData.py (added)
-
TopicSegmentation.py (modified) (4 diffs)
-
TopicSegmentationJP.py (added)
Legend:
- Unmodified
- Added
- Removed
-
lang/python/yoyaku/yoyaku/engine/TopicSegmentation.py
r11026 r11041 22 22 for j in range(len(tfs)): 23 23 if i < j: 24 ans[i][j] = sum([tfs[i][x]*tfs[j][x] for x in set(tfs[i].keys())&set(tfs[j].keys())]) / (tflen[i]*tflen[j]) 24 w = (tflen[i]*tflen[j]) 25 if w != 0: 26 ans[i][j] = sum([tfs[i][x]*tfs[j][x] for x in set(tfs[i].keys())&set(tfs[j].keys())]) / w 27 else: 28 ans[i][j] = 0.0 25 29 elif i > j: 26 30 ans[i][j] = ans[j][i] … … 79 83 return w / float(a) 80 84 81 def split_matrix_rec(mat ):85 def split_matrix_rec(mat, evaluate_coeff=1.2): 82 86 MAX_TOPIC = 50 83 87 size = len(mat) … … 86 90 orgssep = [] 87 91 D.append(mat[0][size-1]/float(size**2)) 88 c = 1.289 92 def evaluate(s): 90 93 a = sum(s) / float(len(s)) 91 94 v = sum([(x-a)**2 for x in s])/float(len(s)) 92 ans = a+ c*math.sqrt(v)95 ans = a+evaluate_coeff*math.sqrt(v) 93 96 #print "evaluate", ans 94 97 return ans … … 128 131 return orgssep[:b] 129 132 130 def segmentation(tfs, sharpen_window=5 ):133 def segmentation(tfs, sharpen_window=5, evaluate_coeff=1.2): 131 134 matrix = precluster_matrix(sharpen_matrix(tfmatrix(tfs), window=sharpen_window)) 132 return split_matrix_rec(matrix )135 return split_matrix_rec(matrix, evaluate_coeff)
![(please configure the [header_logo] section in trac.ini)](/share/chrome/site/your_project_logo.png)