root/lang/perl/String-Diff/trunk/lib/String/Diff.pm @ 21926

Revision 21926, 9.3 kB (checked in by yappo, 6 years ago)

r438@haruna (orig r388): ko | 2007-06-10 15:30:59 +0900

r436@haruna: ko | 2007-06-10 15:29:31 +0900
fixed to invalid handling of zeros bug. rt.cpan #26493

Line 
1package String::Diff;
2
3use strict;
4use warnings;
5use base qw(Exporter);
6our @EXPORT_OK = qw( diff_fully diff diff_merge diff_regexp );
7
8use Algorithm::Diff qw( sdiff );
9
10our $VERSION = '0.02';
11
12our %DEFAULT_MARKS = (
13    remove_open  => '[',
14    remove_close => ']',
15    append_open  => '{',
16    append_close => '}',
17    separator    => '', # for diff_merge
18);
19
20sub diff_fully {
21    my($old, $new, %opts) = @_;
22    my $old_diff = [];
23    my $new_diff = [];
24
25    if ($opts{linebreak}) {
26        my @diff = sdiff( map{ my @l = map { ( $_, "\n") } split /\n/, $_; pop @l; [ @l ]} $old, $new);
27        for my $line (@diff) {
28            if ($line->[0] eq 'c') {
29                # change
30                my($old_diff_tmp, $new_diff_tmp) = _fully($line->[1], $line->[2]);
31                push @{ $old_diff }, @{ $old_diff_tmp };
32                push @{ $new_diff }, @{ $new_diff_tmp };
33            } elsif ($line->[0] eq '-') {
34                # remove
35                push @{ $old_diff }, ['-', $line->[1]];
36            } elsif ($line->[0] eq '+') {
37                # append
38                push @{ $new_diff }, ['+', $line->[2]];
39            } else {
40                # unchage
41                push @{ $old_diff }, ['u', $line->[1]];
42                push @{ $new_diff }, ['u', $line->[2]];
43            }
44        }
45    } else {
46        ($old_diff, $new_diff) = _fully($old, $new);
47    }
48    wantarray ? ($old_diff, $new_diff) : [ $old_diff, $new_diff];
49}
50
51sub _fully {
52    my($old, $new) = @_;
53    my @old_diff = ();
54    my @new_diff = ();
55    my $old_str;
56    my $new_str;
57
58    my @diff = sdiff( map{[ split //, $_ ]} $old, $new);
59    my $last_mode = $diff[0]->[0];
60    for my $line (@diff) {
61        if ($last_mode ne $line->[0]) {
62            push @old_diff, [$last_mode, $old_str] if defined $old_str;
63            push @new_diff, [$last_mode, $new_str] if defined $new_str;
64
65            # skip concut
66            push @old_diff, ['s', ''] unless defined $old_str;
67            push @new_diff, ['s', ''] unless defined $new_str;
68
69            $old_str = $new_str = undef;
70        }
71 
72        $old_str .= $line->[1];
73        $new_str .= $line->[2];
74        $last_mode = $line->[0];
75    }
76    push @old_diff, [$last_mode, $old_str] if defined $old_str;
77    push @new_diff, [$last_mode, $new_str] if defined $new_str;
78
79    @old_diff = _fully_filter('-', @old_diff);
80    @new_diff = _fully_filter('+', @new_diff);
81
82    return (\@old_diff, \@new_diff);
83}
84
85sub _fully_filter {
86    my($c_mode, @diff) = @_;
87    my @filter = ();
88    my $last_line = ['', ''];
89
90    for my $line (@diff) {
91        $line->[0] = $c_mode if $line->[0] eq 'c';
92        if ($last_line->[0] eq $line->[0]) {
93            $last_line->[1] .= $line->[1];
94            next;
95        }
96        push @filter, $last_line if length $last_line->[1];
97        $last_line = $line;
98    }
99    push @filter, $last_line if length $last_line->[1];
100   
101    @filter;
102}
103
104sub diff {
105    my($old, $new, %opts) = @_;
106    my($old_diff, $new_diff) = diff_fully($old, $new, %opts);
107    %opts = (%DEFAULT_MARKS, %opts);
108
109    my $old_str = _str($old_diff, %opts);
110    my $new_str = _str($new_diff, %opts);
111
112    wantarray ? ($old_str, $new_str) : [ $old_str, $new_str];
113}
114
115sub _str {
116    my($diff, %opts) = @_;
117    my $str = '';
118
119    for my $parts (@{ $diff }) {
120        if ($parts->[0] eq '-') {
121            $str .= "$opts{remove_open}$parts->[1]$opts{remove_close}";
122        } elsif ($parts->[0] eq '+') {
123            $str .= "$opts{append_open}$parts->[1]$opts{append_close}";
124        } else {
125            $str .= $parts->[1];
126        }
127    }
128    $str;
129}
130
131sub diff_merge {
132    my($old, $new, %opts) = @_;
133    my($old_diff, $new_diff) = diff_fully($old, $new, %opts);
134    %opts = (%DEFAULT_MARKS, %opts);
135
136    my $old_c = 0;
137    my $new_c = 0;
138    my $str = '';
139
140    LOOP:
141    while (scalar(@{ $old_diff }) > $old_c && scalar(@{ $new_diff }) > $new_c) {
142        my $old_str = $opts{regexp} ? quotemeta $old_diff->[$old_c]->[1] : $old_diff->[$old_c]->[1];
143        my $new_str = $opts{regexp} ? quotemeta $new_diff->[$new_c]->[1] : $new_diff->[$new_c]->[1];
144
145        if ($old_diff->[$old_c]->[0] eq 'u' && $new_diff->[$new_c]->[0] eq 'u') {
146            $str .= $old_str;
147            $old_c++;
148            $new_c++;
149        } elsif ($old_diff->[$old_c]->[0] eq '-' && $new_diff->[$new_c]->[0] eq '+') {
150            $str .= "$opts{remove_open}$old_str";
151            $str .= "$opts{remove_close}$opts{separator}$opts{append_open}" unless $opts{regexp};
152            $str .= $opts{separator} if $opts{regexp};
153            $str .= "$new_str$opts{append_close}";
154            $old_c++;
155            $new_c++;
156        } elsif ($old_diff->[$old_c]->[0] eq 'u' && $new_diff->[$new_c]->[0] eq '+') {
157            $str .= "$opts{append_open}$new_str$opts{append_close}";
158            $new_c++;
159        } elsif ($old_diff->[$old_c]->[0] eq '-' && $new_diff->[$new_c]->[0] eq 'u') {
160            $str .= "$opts{remove_open}$old_str$opts{remove_close}";
161            $old_c++;
162        }
163    }
164
165    $str .= _list_gc($old_diff, $old_c, %opts);
166    $str .= _list_gc($new_diff, $new_c, %opts);
167
168    $str;
169}
170
171sub _list_gc {
172    my($diff, $c, %opts) = @_;
173    my $str = '';
174    while (scalar(@{ $diff }) > $c) {
175        my $_str = $opts{regexp} ? quotemeta $diff->[$c]->[1] : $diff->[$c]->[1];
176        if ($diff->[$c]->[0] eq '-') {
177            $str .= "$opts{remove_open}$_str$opts{remove_close}";
178        } elsif ($diff->[$c]->[0] eq '+') {
179            $str .= "$opts{append_open}$_str$opts{append_close}";
180        } else {
181            $str .= $_str;
182        }
183        $c++;
184    }
185    $str;
186}
187
188my %regexp_opts = (
189    remove_open  => '(?:',
190    remove_close => ')',
191    append_open  => '(?:',
192    append_close => ')',
193    separator    => '|',
194    regexp       => 1,
195);
196
197sub diff_regexp {
198    my($old, $new, %opts) = @_;
199    diff_merge($old, $new, %opts, %regexp_opts);
200}
201
2021;
203__END__
204
205=head1 NAME
206
207String::Diff - Simple diff to String
208
209=head1 SYNOPSIS
210
211  use String::Diff;
212  use String::Diff qw( diff_fully diff diff_merge diff_regexp );# export functions
213
214  # simple diff
215  my($old, $new) = String::Diff::diff('this is Perl', 'this is Ruby');
216  print "$old\n";# this is [Perl]
217  print "$new\n";# this is {Ruby}
218
219  my $diff = String::Diff::diff('this is Perl', 'this is Ruby');
220  print "$diff->[0]\n";# this is [Perl]
221  print "$diff->[1]\n";# this is {Ruby}
222
223  my $diff = String::Diff::diff('this is Perl', 'this is Ruby',
224      remove_open => '<del>',
225      remove_close => '</del>',
226      append_open => '<ins>',
227      append_close => '</ins>',
228  );
229  print "$diff->[0]\n";# this is <del>Perl</del>
230  print "$diff->[1]\n";# this is <ins>Ruby</ins>
231
232  # merged
233  my $diff = String::Diff::diff_merge('this is Perl', 'this is Ruby');
234  print "$diff\n";# this is [Perl]{Ruby}
235
236  my $diff = String::Diff::diff_merge('this is Perl', 'this is Ruby',
237      remove_open => '<del>',
238      remove_close => '</del>',
239      append_open => '<ins>',
240      append_close => '</ins>',
241  );
242  print "$diff\n";# this is <del>Perl</del><ins>Ruby</ins>
243
244  # change to default marks
245  %String::Diff::DEFAULT_MARKS = (
246      remove_open  => '<del>',
247      remove_close => '</del>',
248      append_open  => '<ins>',
249      append_close => '</ins>',
250      separator    => '&lt;-OLD|NEW-&gt;', # for diff_merge
251  );
252
253  # generated for regexp
254  my $diff = String::Diff::diff_regexp('this is Perl', 'this is Ruby');
255  print "$diff\n";# this\ is\ (?:Perl|Ruby)
256
257  # detailed list
258  my $diff = String::Diff::diff_fully('this is Perl', 'this is Ruby');
259  for my $line (@{ $diff->[0] }) {
260      print "$line->[0]: '$line->[1]'\n";
261  }
262  # u: 'this is '
263  # -: 'Perl'
264
265  for my $line (@{ $diff->[1] }) {
266      print "$line->[0]: '$line->[1]'\n";
267  }
268  # u: 'this is '
269  # +: 'Ruby'
270
271=head1 DESCRIPTION
272
273String::Diff is the difference of a consecutive string is made.
274after general diff is done, the difference in the line is searchable.
275
276the mark of the addition and the deletion can be freely changed.
277the color is colored to the terminal with ANSI, using the HTML display it.
278
279after the line is divided, diff is taken when 'linebreak' option is specified.
280
281  my($old_string, $new_string) = String::Diff::diff_fully('this is Perl', 'this is Ruby', linebreak => 1);
282  my($old_string, $new_string) = String::Diff::diff('this is Perl', 'this is Ruby', linebreak => 1);
283  my $string = String::Diff::diff_merge('this is Perl', 'this is Ruby', linebreak => 1);
284  my $string = String::Diff::diff_regexp('this is Perl', 'this is Ruby', linebreak => 1);
285
286In diff and diff_merge methods the mark of the difference can be changed.
287
288  my $diff = String::Diff::diff('this is Perl', 'this is Ruby',{
289      remove_open => '<del>',
290      remove_close => '</del>',
291      append_open => '<ins>',
292      append_close => '</ins>',
293  });
294
295
296=head1 METHODS
297
298=over 4
299
300=item diff_fully
301
302  the list that divides diff according to the mark is returnd.
303
304    my($old_string, $new_string) = String::Diff::diff_fully('this is Perl', 'this is Ruby');
305
306=item diff
307
308  abd the mark of the deletion and the addition is given to the string.
309
310=item diff_merge
311
312  old and new string is merged with diff.
313
314=item diff_regexp
315
316  the regular expression to which old string and new string are matched with regexp is returned.
317
318=back
319
320=head1 AUTHOR
321
322Kazuhiro Osawa E<lt>ko@yappo.ne.jpE<gt>
323
324=head1 SEE ALSO
325
326L<Algorithm::Diff>
327
328=head1 LICENSE
329
330This library is free software; you can redistribute it and/or modify
331it under the same terms as Perl itself.
332
333=cut
Note: See TracBrowser for help on using the browser.