Changeset 11041 for lang/python

Show
Ignore:
Timestamp:
05/04/08 00:56:21 (7 months ago)
Author:
ayu
Message:
Location:
lang/python/yoyaku/yoyaku/engine
Files:
2 added
1 modified

Legend:

Unmodified
Added
Removed
  • lang/python/yoyaku/yoyaku/engine/TopicSegmentation.py

    r11026 r11041  
    2222        for j in range(len(tfs)): 
    2323            if i < j: 
    24                 ans[i][j] = sum([tfs[i][x]*tfs[j][x] for x in set(tfs[i].keys())&set(tfs[j].keys())]) / (tflen[i]*tflen[j]) 
     24                w = (tflen[i]*tflen[j]) 
     25                if w != 0: 
     26                    ans[i][j] = sum([tfs[i][x]*tfs[j][x] for x in set(tfs[i].keys())&set(tfs[j].keys())]) / w 
     27                else: 
     28                    ans[i][j] = 0.0 
    2529            elif i > j: 
    2630                ans[i][j] = ans[j][i] 
     
    7983    return w / float(a) 
    8084 
    81 def split_matrix_rec(mat): 
     85def split_matrix_rec(mat, evaluate_coeff=1.2): 
    8286    MAX_TOPIC = 50 
    8387    size = len(mat) 
     
    8690    orgssep = [] 
    8791    D.append(mat[0][size-1]/float(size**2)) 
    88     c = 1.2 
    8992    def evaluate(s): 
    9093        a = sum(s) / float(len(s)) 
    9194        v = sum([(x-a)**2 for x in s])/float(len(s)) 
    92         ans = a+c*math.sqrt(v) 
     95        ans = a+evaluate_coeff*math.sqrt(v) 
    9396        #print "evaluate", ans 
    9497        return ans 
     
    128131    return orgssep[:b] 
    129132 
    130 def segmentation(tfs, sharpen_window=5): 
     133def segmentation(tfs, sharpen_window=5, evaluate_coeff=1.2): 
    131134    matrix = precluster_matrix(sharpen_matrix(tfmatrix(tfs), window=sharpen_window)) 
    132     return split_matrix_rec(matrix) 
     135    return split_matrix_rec(matrix, evaluate_coeff)