root/platform/tdiary/util/tdiarysearch/search.rb

Revision 35969, 11.2 kB (checked in by wtnabe, 2 years ago)

/platform/tdiary/util/tdiarysearch/search.rb: setup_patterns()内でエンコーディング指定漏れ

  • Property svn:executable set to *
Line 
1#!/usr/bin/env ruby
2#
3# tdiarysearch
4#
5# Copyright (C) 2003-2005 Minero Aoki
6#
7# This program is free software.
8# You can distribute/modify this program under the terms of
9# the GNU GPL, General Public License version 2.
10#
11# $originalId: search.rb,v 1.14 2005/07/27 07:16:07 aamine Exp $
12#
13# Project home page: http://i.loveruby.net/w/tdiarysearch.html
14#
15
16#
17# Static Configurations
18#
19
20LOGGING = false
21LOGFILE_NAME = 'search.log'
22DEBUG = $DEBUG
23
24#
25# HTML Templates
26#
27
28def unindent(str)
29  str.gsub(/^#{str[/\A(?:\t+| +)/]}/, '')
30end
31
32HEADER = unindent <<'EOS'
33  <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
34  <html lang="ja">
35  <head>
36    <meta http-equiv="Content-Type" content="text/html; charset=<%= TDIARY_ENCODING %>">
37    <meta http-equiv="Content-Language" content="ja">
38    <meta name="robots" content="noindex">
39    <link rel="stylesheet" href="theme/base.css" type="text/css" media="all">
40    <link rel="stylesheet" href="theme/<%= theme %>/<%= theme %>.css" title="<%= theme %>" type="text/css" media="all">
41    <title>tDiary Search</title>
42  </head>
43  <body>
44EOS
45
46FOOTER = unindent <<'EOS'
47  </body>
48  </html>
49EOS
50
51SEARCH_FORM = unindent <<"EOS"
52  <form method="post" action="#{File.basename(__FILE__)}">
53  <input type="text" name="q" size="20" value="<%= patterns.map {|re| escape(re.source) }.join(' ') %>">
54  <input type="submit" value="Search">
55  <%
56      if theme
57  %><input type="hidden" name="theme" value="on"><%
58      end
59  %>
60  </form>
61EOS
62
63SEARCH_PAGE = unindent <<"EOS"
64  <h1>tDiary Search</h1>
65  #{SEARCH_FORM}
66EOS
67
68TOO_MANY_HITS = 50
69
70SEARCH_RESULT = unindent <<"EOS"
71  <h1>tDiary Search: Search Result</h1>
72  #{SEARCH_FORM}
73  <%
74      nhits = 0
75      toomanyhits = false
76      match_components(patterns) {|diary, fragment, component|
77        nhits += 1
78        if nhits > TOO_MANY_HITS
79          toomanyhits = true
80          break
81        end
82  %>
83  <div class="day">
84  <h2><a href="<%= url(diary, fragment) %>"><%= diary.y_m_d %></a></h2>
85  <div class="body">
86  <div class="section">
87  <p><%= short_html(component) %></p>
88  </div>
89  </div>
90  </div><%
91      }
92  %>
93  <p><%= toomanyhits ? 'too many hits.' : nhits.to_s+' hits.' %></p>
94  #{SEARCH_FORM}
95EOS
96
97SEARCH_ERROR = unindent <<"EOS"
98  #{SEARCH_FORM}
99  <%= escape(reason) %>.
100EOS
101
102HISTORY = unindent <<"EOS"
103  <h1>tDiary Search: Search History</h1>
104  #{SEARCH_FORM}
105  <ul>
106  <%
107      recent_queries.sort_by {|t,q| -t.to_i }.each do |time, query|
108  %><li><%= time.strftime('%Y-%m-%d %H:%M:%S') %> | <a href="#{File.basename(__FILE__)}?q=<%= escape_url(query) %>"><%= escape(query) %></a></li>
109  <%
110      end
111  %></ul>
112  #{SEARCH_FORM}
113EOS
114
115#
116# Main
117#
118
119if File.symlink?(__FILE__)
120  tdiarylib = File.dirname(File.readlink(__FILE__))
121else
122  tdiarylib = File.dirname(__FILE__)
123end
124$:.unshift tdiarylib
125require 'tdiary'
126require 'tdiary/defaultio'
127require 'erb'
128
129TDIARY_ENCODING = (TDIARY_VERSION >= '2.3.0') ? 'utf-8' : 'euc-jp'
130
131class WrongQuery < StandardError; end
132
133Z_SPACE = "\241\241"   # zen-kaku space
134
135BEGIN { $stdout.binmode }
136
137def main
138  $KCODE = TDIARY_ENCODING
139  cgi = CGI.new
140  @config = TDiary::Config.new(cgi)
141  @config.options['apply_plugin'] = true
142  html = '<html><head><title></title></head><body><p>error</p></body></html>'
143  begin
144    html = generate_page(cgi)
145  ensure
146    send_html cgi, html
147  end
148  exit 0
149end
150
151def generate_page(cgi)
152  query = nil
153  begin
154    theme = @config.theme
155    if LOGGING and File.file?(query_log()) and cgi.valid?('history')
156      return history_page(theme)
157    end
158    begin
159      return search_form_page(theme) unless cgi.valid?('q')
160      initialize_tdiary_plugins cgi
161      query = @config.to_native([cgi.params['q']].flatten.compact.join(' '))
162      patterns = setup_patterns(query)
163      html = search_result_page(theme, patterns)
164      save_query(query, query_log()) if LOGGING
165      return html
166    rescue WrongQuery => err
167      return search_error_page(theme, (patterns || []), err.message)
168    end
169  rescue Exception => err
170    html = ''
171    html << HEADER
172    html << "<pre>\n"
173    html << 'q=' << escape(query) << "\n" if query
174    html << escape(err.class.name) << "\n" if DEBUG
175    html << escape(err.message) << "\n"
176    html << err.backtrace.map {|i| escape(i) }.join("\n") if DEBUG
177    html << "</pre>\n"
178    html << FOOTER
179    return html
180  end
181end
182
183def send_html(cgi, html)
184  print cgi.header('status' => '200 OK',
185                   'type' => 'text/html',
186                   'charset' => TDIARY_ENCODING,
187                   'Content-Length' => html.length.to_s,
188                   'Cache-Control' => 'no-cache',
189                   'Pragma' => 'no-cache')
190  print html unless cgi.request_method == 'HEAD'
191end
192
193def setup_patterns(query)
194  patterns = split_string(query).map {|pat|
195    check_pattern pat
196    Regexp.new( Regexp.quote(pat), Regexp::IGNORECASE, TDIARY_ENCODING )
197  }
198  raise WrongQuery, 'no pattern' if patterns.empty?
199  raise WrongQuery, 'too many sub patterns' if patterns.length > 8
200  patterns
201end
202
203def check_pattern(pat)
204  raise WrongQuery, 'no pattern' unless pat
205  raise WrongQuery, 'empty pattern' if pat.empty?
206  raise WrongQuery, "pattern too short: #{pat}" if pat.length < 2
207  raise WrongQuery, 'pattern too long' if pat.length > 128
208end
209
210def split_string(str)
211  str.split(/[\s#{Z_SPACE}]+/ou).reject {|w| w.empty? }
212end
213
214def save_query(query, file)
215  File.open(file, 'a') {|f|
216    begin
217      f.flock(File::LOCK_EX)
218      f.puts "#{Time.now.to_i}: #{query.dump}"
219    ensure
220      f.flock(File::LOCK_UN)
221    end
222  }
223end
224
225#
226# eRuby Dispatchers and Helper Routines
227#
228
229def search_form_page(theme)
230  patterns = []
231  ERB.new(HEADER + SEARCH_FORM + FOOTER).result(binding())
232end
233
234def search_result_page(theme, patterns)
235  ERB.new(HEADER + SEARCH_RESULT + FOOTER).result(binding())
236end
237
238def search_error_page(theme, patterns, reason)
239  ERB.new(HEADER + SEARCH_ERROR + FOOTER).result(binding())
240end
241
242def history_page(theme)
243  patterns = []
244  ERB.new(HEADER + HISTORY + FOOTER).result(binding())
245end
246
247def query_log
248  "#{@config.data_path}#{LOGFILE_NAME}"
249end
250
251N_SHOW_QUERY_MAX = 20
252
253def recent_queries
254  return unless File.file?(query_log())
255  File.readlines(query_log()).reverse[0, N_SHOW_QUERY_MAX].map {|line|
256    time, q = *line.split(/:/, 2)
257    [Time.at(time.to_i), eval(q)]
258  }
259end
260
261INF = 1 / 0.0
262
263def match_components(patterns)
264  foreach_diary_from_latest do |diary|
265    next unless diary.visible?
266    num = 1
267    diary.each_section do |sec|
268      if patterns.all? {|re| re =~ sec.to_src }
269        yield diary, fragment('p', num), sec
270      end
271      num += 1
272    end
273    diary.each_visible_comment(INF) do |cmt, num|
274      if patterns.all? {|re| re =~ cmt.body }
275        yield diary, fragment('c', num), cmt
276      end
277    end
278  end
279end
280
281def fragment(type, num)
282  sprintf('%s%02d', type, num)
283end
284
285#
286# tDiary Implementation Dependent
287#
288
289def foreach_diary_from_latest(&block)
290  foreach_data_file(@config.data_path.sub(%r</+\z>, '')) do |path|
291    read_diaries(path).sort_by {|diary| diary.date }.reverse_each(&block)
292  end
293end
294
295def foreach_data_file(data_path, &block)
296  Dir.glob("#{data_path}/[0-9]*/*.td2").sort.reverse_each do |path|
297    yield path.untaint
298  end
299end
300
301def read_diaries(path)
302  d = nil
303  diaries = {}
304  load_tdiary_textdb(path) do |header, body|
305    d = diary_class(header['Format']).new(header['Date'], '', body)
306    d.show(header['Visible'] != 'false')
307    diaries[d.ymd] = d
308  end
309  (Years[d.y] ||= []).push(d.m) if d
310  load_comments diaries, path
311  diaries.values
312end
313
314DIARY_CLASS_CACHE = {}
315
316def diary_class(style)
317  c = DIARY_CLASS_CACHE[style]
318  return c if c
319  require "tdiary/#{style.downcase}_style.rb"
320  c = eval("TDiary::#{style.capitalize}Diary")
321  c.__send__(:include, DiaryClassDelta)
322  DIARY_CLASS_CACHE[style] = c
323  c
324end
325
326module DiaryClassDelta
327  def ymd
328    date().strftime('%Y%m%d')
329  end
330
331  def y_m_d
332    date().strftime('%Y-%m-%d')
333  end
334
335  def y
336    '%04d' % date().year
337  end
338
339  def m
340    '%02d' % date().month
341  end
342end
343
344def load_comments(diaries, path)
345  cmtfile = path.sub(/2\z/, 'c')
346  return unless File.file?(cmtfile)
347  load_tdiary_textdb(cmtfile) do |header, body|
348    c = TDiary::Comment.new(header['Name'], header['Mail'], body,
349                            Time.at(header['Last-Modified'].to_i))
350    c.show = (header['Visible'] != 'false')
351    d = diaries[header['Date']]
352    d.add_comment c if d
353  end
354end
355
356def load_tdiary_textdb(path)
357  File.open(path) {|f|
358    ver = f.gets.strip
359    raise "unkwnown format: #{ver}" unless ver == 'TDIARY2.00.00' or ver == 'TDIARY2.01.00'
360    f.each('') do |header|
361      h = {}
362      header.untaint.strip.each do |line|
363        n, v = *line.split(':', 2)
364        h[n.strip] = v.strip
365      end
366      yield h, f.gets("\n.\n").chomp(".\n").untaint
367    end
368  }
369end
370
371def short_html(component)
372  # Section classes do not have common superclass, we can't use class here.
373  case component.class.name
374  when /Section/
375    section = component
376    if section.subtitle
377      sprintf('%s<br>%s',
378              tdiary2text(section.subtitle_to_html),
379              tdiary2text(section.body_to_html))
380    else
381      tdiary2text(section.body_to_html)
382    end
383  when /Comment/
384    cmt = component
385    shorten(escape((cmt.name + ': ' + cmt.body)))
386  else
387    raise "must not happen: #{component.class}"
388  end
389end
390
391def tdiary2text(html)
392  re = Regexp.new('<[^>]*>', Regexp::EXTENDED, TDIARY_ENCODING)
393  shorten(apply_tdiary_plugins(html).gsub(re, ''))
394end
395
396Years = {}
397
398TDiary::Plugin.__send__(:public, :apply_plugin)
399def apply_tdiary_plugins(html)
400  @plugin.apply_plugin(html, false)
401end
402
403@plugin = nil
404
405def initialize_tdiary_plugins(cgi)
406  @plugin = TDiary::Plugin.new('conf' => @config,
407                               'secure' => false,
408                               'diaries' => {},
409                               'cgi' => cgi,
410                               'index' => @config.index,
411                               'years' => Years,
412                               'cache_path' => @config.cache_path ||
413                                               @config.data_path)
414end
415
416#
417# Utils
418#
419
420HTML_ESCAPE_TABLE = {
421  '&' => '&amp;',
422  '<' => '&lt;',
423  '>' => '&gt;',
424  '"' => '&quot;'
425}
426
427def escape(str)
428  tbl = HTML_ESCAPE_TABLE
429  str.gsub(/[&"<>]/) {|ch| tbl[ch] }
430end
431
432def escape_url(u)
433  escape(urlencode(u))
434end
435
436def urlencode(str)
437  str.gsub(/[^\w-]/n) {|ch| sprintf('%%%02x', ch[0]) }
438end
439
440def shorten(str)
441  re = Regexp.new('\A.{0,120}', Regexp::MULTILINE, TDIARY_ENCODING)
442  str.slice(re)
443end
444
445def url(diary, fragment)
446  if ( html_anchor_enabled? )
447    "#{@config.index}#{diary.ymd}.html\##{fragment}"
448  else
449    "#{@config.index}?date=#{diary.ymd}\##{fragment}"
450  end
451end
452
453def html_anchor_enabled?
454  if ( @html_anchor.nil? )
455    @html_anchor = @config.options2['sp.selected'].include?( 'html_anchor.rb' )
456  end
457
458  return @html_anchor
459end
460
461#
462# Old Ruby Compatibility
463#
464
465if RUBY_VERSION < '1.8.0'
466  class String
467    remove_method :slice
468    def slice(re, n = 0)
469      m = re.match(self) or return nil
470      m[n]
471    end
472  end
473end
474
475unless Array.method_defined?(:all?)
476  module Enumerable
477    def all?
478      each do |i|
479        return false unless yield(i)
480      end
481      true
482    end
483  end
484end
485
486unless Array.method_defined?(:sort_by)
487  module Enumerable
488    def sort_by
489      map {|i| [yield(i), i] }.sort.map {|val, i| i }
490    end
491  end
492end
493
494unless MatchData.method_defined?(:captures)
495  class MatchData
496    def captures
497      to_a()[1..-1]
498    end
499  end
500end
501
502main
Note: See TracBrowser for help on using the browser.