Changeset 35409 for lang/perl/MSWord-ExtractContent
- Timestamp:
- 09/17/09 16:58:57 (4 years ago)
- Location:
- lang/perl/MSWord-ExtractContent/trunk
- Files:
-
- 3 modified
-
lib/File/Extract/MSWord.pm (modified) (1 diff)
-
lib/MSWord/ExtractContent.pm (modified) (3 diffs)
-
t/97_podspell.t (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
lang/perl/MSWord-ExtractContent/trunk/lib/File/Extract/MSWord.pm
r35406 r35409 30 30 __END__ 31 31 32 =encoding utf-833 34 =for stopwords35 MSWord36 37 32 =head1 NAME 38 33 -
lang/perl/MSWord-ExtractContent/trunk/lib/MSWord/ExtractContent.pm
r35406 r35409 300 300 = substr $self->_table_stream, $self->{_fcClx}, $self->{_lcbClx}; 301 301 302 $self->{_prm_encoding} = ord(substr $clx, 0, 1, q{}); 302 while (length $clx > 0) { 303 my $clxt = ord(substr $clx, 0, 1, q{}); 304 305 last if $clxt == 2; # plcfpcd 306 307 if ($clxt == 1) { # grpprl => SKIP 308 my $skip = _get_short(substr $clx, 0, 2, q{}); 309 310 substr $clx, 0, $skip, q{}; 311 } 312 else { 313 croak "Unknown CLX block."; 314 } 315 } 316 croak "PCDs not found" if length $clx <= 0; 317 303 318 304 319 my $length = _get_long(substr $clx, 0, 4, q{}); 305 if ($length != length $clx) { 306 carp "Unmatched PCD length."; 307 } 308 309 my $n = ( length($clx) - $LENGTH_CP ) / ( $LENGTH_CP + $LENGTH_PCD ); 320 321 my $n = ( $length - $LENGTH_CP ) / ( $LENGTH_CP + $LENGTH_PCD ); 310 322 printf {*STDERR} "number of PCDs: %d\n", $n if $DEBUG; 311 323 … … 501 513 1; 502 514 __END__ 503 504 =encoding utf-8505 506 =for stopwords507 functionalities508 utf509 msword510 CHP CHaracter PAP PAragraph511 515 512 516 =head1 NAME … … 609 613 =head1 LIMITATIONS 610 614 615 Only support Microsoft Word binary document. 616 Does not support Microsoft Word XML document (.docx). 617 611 618 This module does not handle PAP (PAragraph Properties) and CHP (CHaracter 612 619 Properties), that define paragraphs and characters style. -
lang/perl/MSWord-ExtractContent/trunk/t/97_podspell.t
r35406 r35409 11 11 Nobuaki 12 12 ITO 13 functionalities 13 14 utf 15 docx 16 msword 17 CHP 18 CHaracter 19 PAP 20 PAragraph
![(please configure the [header_logo] section in trac.ini)](/share/chrome/site/your_project_logo.png)