This file is indexed.

/usr/share/perl5/MsOffice/Word/HTML/Writer.pm is in libmsoffice-word-html-writer-perl 1.01-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

   1
   2
   3
   4
   5
   6
   7
   8
   9
  10
  11
  12
  13
  14
  15
  16
  17
  18
  19
  20
  21
  22
  23
  24
  25
  26
  27
  28
  29
  30
  31
  32
  33
  34
  35
  36
  37
  38
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
package MsOffice::Word::HTML::Writer;

use warnings;
use strict;
use MIME::QuotedPrint qw/encode_qp/;
use MIME::Base64      qw/encode_base64/;
use MIME::Types;
use Carp;
use Params::Validate qw/validate SCALAR HASHREF/;


our $VERSION = '1.01';


sub new {
  my $class = shift;

  # validate named parameters
  my $param_spec = {
    title        => {type => SCALAR, optional => 1},
    head         => {type => SCALAR, optional => 1},
    hf_head      => {type => SCALAR, optional => 1},
    WordDocument => {type => HASHREF, optional => 1},
   };
  my %params = validate(@_, $param_spec);

  # create instance
  my $self = {
    MIME_parts   => [],
    sections     => [{}],
    title        => $params{title}
                 || "Document generated by MsOffice::Word::HTML::Writer",
    head         => $params{head}      || "",
    hf_head      => $params{hf_head}   || "",
    WordDocument => $params{WordDocument},
   };

  bless $self, $class;
}


sub create_section {
  my $self = shift;

  # validate named parameters
  my $param_spec = {page => {type => HASHREF, optional => 1}};
  $param_spec->{$_} = {type => SCALAR, optional => 1}
    for qw/header footer first_header first_footer new_page/;
  my %params = validate(@_, $param_spec);

  # if first automatic section is empty, delete it
  $self->{sections} = []
    if scalar(@{$self->{sections}}) == 1 && !$self->{sections}[0]{content};

  # add the new section
  push @{$self->{sections}}, \%params;
}


sub write {
  my $self = shift;

  # add html arguments to current section content
  $self->{sections}[-1]{content} .= join ("", @_);
}



sub save_as {
  my ($self, $filename) = @_;

  # default extension is ".doc"
  $filename .= ".doc" unless $filename =~ /\.\w{1,5}$/;

  # open the file
  open my $fh, ">:crlf", $filename
    or croak "could not open >$filename: $!";

  # write content and close
  print $fh $self->content;
  close $fh;
}


sub attach {
  my ($self, $name, $open1, $open2, @other) = @_;

  # open a handle to the attachment (need to dispatch according to number
  # of args, because perlfunc/open() has complex prototyping behaviour)
  my $fh;
  if (@other) { 
    open $fh, $open1, $open2, @other
      or croak "open $open1, $open2, @other : $!"; 
  }
  elsif ($open2) {
    open $fh, $open1, $open2
      or croak "open $open1, $open2 : $!"; 
  }
  else {
    open $fh, $open1
      or croak "open $open1 : $!"; 
  }

  # slurp the content
  binmode($fh) unless $name =~ /\.(html?|css|te?xt|rtf)$/i;
  local $/;
  my $attachment = <$fh>;

  # add the attachment (filename and content)
  push @{$self->{MIME_parts}}, ["files/$name", $attachment];
}


sub page_break {
  my ($self, $break) = @_;
  $break ||= 'always';
  return qq{<br clear='all' style='page-break-before:$break'>\n};
}


sub tab {
  my ($self, $n_tabs) = @_;
  $n_tabs ||= 1;
  return qq{<span style='mso-tab-count:$n_tabs'></span>};
}

sub field {
  my ($self, $fieldname, $args, $content) = @_;

  for ($args, $content) {
    $_ ||= "";                              # undef replaced by empty string
    s/&/&amp;/g,  s/</&lt;/g, s/>/&gt;/g;   # replace HTML entities
  }

  my $field;

  # when args : long form of field encoding
  if ($args) {
    my $space = qq{<span style='mso-spacerun:yes'> </span>};
    $field = qq{<span style='mso-element:field-begin'></span>}
           . $space . $fieldname . $space . $args
           . qq{<span style='mso-element:field-separator'></span>}
           . $content
           . qq{<span style='mso-element:field-end'></span>};
  }
  # otherwise : short form of field encoding
  else {
    $field = qq{<span style='mso-field-code:"$fieldname"'>$content</span>};
  }

  return $field;
}

sub quote {
  my ($self, $text) = @_;
  my $args = $text;
  $args =~ s/"/\\"/g;
  $args = qq{"$args"};
  $args =~ s/"/&quot;/g;
  return $self->field('QUOTE', $args, $text);
}



sub content {
  my ($self) = @_;

  # separator for parts in MIME document
  my $boundary = qw/__NEXT_PART__/;

  # MIME multipart header
  my $mime = qq{MIME-Version: 1.0\n}
           . qq{Content-Type: multipart/related; boundary="$boundary"\n\n}
           . qq{MIME document generated by MsOffice::Word::HTML::Writer\n\n};

  # generate each part (main document must be first)
  my @parts = $self->_MIME_parts;
  my $filelist = $self->_filelist(@parts);
  for my $pair ($self->_main, @parts, $filelist) {
    my ($filename, $content) = @$pair;
    my $mime_type = MIME::Types->new->mimeTypeOf($filename);
    my ($encoding, $encoded);
    if ($mime_type =~ /^text|xml$/) {
      $encoding = 'quoted-printable';
      $content  =~ s/\r\n/\n/g;
      $encoded  = encode_qp($content, ''); # '': no "soft line breaks"
    }
    else {
      $encoding = 'base64';
      $encoded  = encode_base64($content);
    }

    $mime .= qq{--$boundary\n}
          .  qq{Content-Location: file:///C:/foo/$filename\n}
          .  qq{Content-Transfer-Encoding: $encoding\n}
          .  qq{Content-Type: $mime_type\n\n}
          .  $encoded
          . "\n";
  }

  # close last MIME part
  $mime .= "--$boundary--\n";

  return $mime;
}


#======================================================================
# PRIVATE METHODS
#======================================================================

sub _main {
  my ($self) = @_;

  # body : concatenate content from all sections
  my $body = "";
  my $i = 1;
  foreach my $section (@{$self->{sections}}) {

    # section break
    if ($i > 1) {
      # type of break
      my $break = $section->{new_page};
      $break = 'always' if $break && $break !~ /\w/; # if true but not a word
      $break ||= 'auto';                             # if false
      # otherwise, type of break will just be the word given in {new_page}

      # insert into body
      my $style = qq{page-break-before:$break;mso-break-type:section-break};
      $body .= qq{<br clear=all style='$style'>\n};
    }

    # section content
    $body .= qq{<div class="Section$i">\n$section->{content}\n</div>\n};

    $i += 1;
  }

  # assemble head and body into a full document
  my $html
    = qq{<html xmlns:v="urn:schemas-microsoft-com:vml"\n}
    . qq{      xmlns:o="urn:schemas-microsoft-com:office:office"\n}
    . qq{      xmlns:w="urn:schemas-microsoft-com:office:word"\n}
    . qq{      xmlns:m="http://schemas.microsoft.com/office/2004/12/omml"\n}
    . qq{      xmlns="http://www.w3.org/TR/REC-html40">\n}
    . $self->_head
    . qq{<body>\n$body</body>\n}
    . qq{</html>\n};
  return ["main.htm", $html];
}


sub _head {
  my ($self) = @_;

  # HTML head : link to filelist, title, view format and styles
  my $head 
    = qq{<head>\n}
    . qq{<link rel=File-List href="files/filelist.xml">\n}
    . qq{<title>$self->{title}</title>\n}
    . $self->_xml_WordDocument
    . qq{<style>\n} . $self->_section_styles . qq{</style>\n}
    . $self->{head}
    . qq{</head>\n};
  return $head;
}



sub _xml_WordDocument {
  my ($self) = @_;
  my $xml_root = $self->{WordDocument} or return "";
  return "<xml><w:WordDocument>\n" 
       . _w_xml($xml_root)
       . "</w:WordDocument></xml>\n";
}


sub _w_xml {
  my $node = shift;
  my $xml = "";
  while (my ($k, $v) = each %$node) {
    $xml .= $v ? (               # élément avec contenu
                   "<w:$k>"
                  . (ref $v ? _w_xml($v) : $v)
                  . "</w:$k>\n" )
               : "<w:$k />\n";     # élément sans contenu
  }
  return $xml;
}


sub _section_styles {
  my ($self) = @_;

  my $styles = "";
  my $i = 1;
  foreach my $section (@{$self->{sections}}) {

    my $properties = "";

    # page properties (size and margin)
    foreach my $prop (qw/size margin/) {
      my $val = $section->{page}{$prop} or next;
      $properties .= qq{  $prop:$val;\n};
    }

    # headers and footers 
    my $has_first_page;
    foreach my $prop (qw/header_margin footer_margin/) {
      my $val = $section->{page}{$prop} or next;
      (my $property = $prop) =~ s/_/-/g;
      $properties .= qq{  mso-$property:$val;\n};
    }
    foreach my $hf (qw/header footer first_header first_footer/) {
      $section->{$hf} or next;
      $has_first_page = 1 if $hf =~ /^first/;
      (my $property = $hf) =~ s/_/-/;
      $properties 
        .= qq{  mso-$property:url("files/header_footer.htm") $hf$i;\n};
    }
    $properties .= qq{  mso-title-page:yes;\n} if $has_first_page;

    # style definitions for this section
    $styles .= qq[\@page Section$i {\n$properties}\n]
            .  qq[div.Section$i {page:Section$i}\n];
    $i += 1;
  }

  return $styles;
}


sub _MIME_parts {
  my ($self) = @_;

  # attachments supplied by user
  my @parts = @{$self->{MIME_parts}};

  # additional attachment : computed file with headers and footers
  my $hf_content = $self->_header_footer;
  unshift @parts, ["files/header_footer.htm", $hf_content] if $hf_content;

  return @parts;
}


sub _header_footer {
  my ($self) = @_;

  # create a div for each header/footer in each section
  my $hf_divs = "";
  my $i = 1;
  foreach my $section (@{$self->{sections}}) {

    # deal with headers/footers defined in that section
    foreach my $hf (qw/header footer first_header first_footer/) {
      $section->{$hf} or next;
      (my $style = $hf) =~ s/^first_//;
      $hf_divs .= qq{<div style='mso-element:$style' id='$hf$i'>\n}
               .  $section->{$hf} . "\n"
               .  qq{</div>\n};
    }

    $i += 1;
  }

  # if at least one such div, need to create an attached file
  my $header_footer = !$hf_divs ? "" :
        qq{<html>\n}
      . qq{<head>\n}
      . qq{<link id=Main-File rel=Main-File href="../main.htm">\n}
      . $self->{hf_head}
      . qq{</head>\n}
      . qq{<body>\n} . $hf_divs . qq{</body>\n}
      . qq{</html>\n};

  return $header_footer;
}



sub _filelist {
  my ($self, @parts) = @_;

  # xml header
  my $xml = qq{<xml xmlns:o="urn:schemas-microsoft-com:office:office">\n}
          . qq{ <o:MainFile HRef="../main.htm"/>\n};

  # refer to each attached file
  foreach my $part (@parts) {
    $xml .= qq{ <o:File HRef="$part->[0]"/>\n};
  }

  # the filelist is itself an attached file
  $xml .= qq{ <o:File HRef="filelist.xml"/>\n};

  # closing tag;
  $xml .=  qq{</xml>\n};

  return ["files/filelist.xml", $xml];
}



1;

__END__

=head1 NAME

MsOffice::Word::HTML::Writer - Writing documents for MsWord in HTML format

=head1 SYNOPSIS

  use MsOffice::Word::HTML::Writer;
  my $doc = MsOffice::Word::HTML::Writer->new(
    title        => "My new doc",
    WordDocument => {View => 'Print'},
  );
  
  $doc->write("<p>hello, world</p>", 
              $doc->page_break, 
              "<p>hello from another page</p>");
  
  $doc->create_section(
    page => {size   => "21.0cm 29.7cm",
             margin => "1.2cm 2.4cm 2.3cm 2.4cm"},
    header => sprintf("Section 2, page %s of %s", 
                                  $doc->field('PAGE'), 
                                  $doc->field('NUMPAGES')),
    footer => sprintf("printed at %s", 
                                  $doc->field('PRINTDATE')),
    new_page => 1, # or 'left', or 'right'
  );
  $doc->write("this is the second section, look at header/footer");
  
  $doc->attach("my_image.gif", $path_to_my_image);
  $doc->write("<img src='files/my_image.gif'>");
  
  $doc->save_as("/path/to/some/file");

=head1 DESCRIPTION

=head2 Goal

The present module is one way to programatically generate documents
targeted for Microsoft Word (MsWord). It doesn't need
MsWord to be installed, and doesn't even require a Win32 machine
(which is why it is not in the C<Win32> namespace).

=head2 MsWord and HTML

MsWord can read documents encoded in old native binary format, in Rich
Text Format (RTF), in XML (either ODF or OOXML), or -- maybe this is
less known -- in HTML, with some special markup for pagination and
other MsWord-specific features. Such HTML documents are often in
several parts, because attachments like images or headers/footers need
to be in separate files; however, since it is more convenient to carry
all data in a single file, MsWord also supports the "MHTML" format (or
"MHT" for short), i.e. an encapsulation of a whole HTML tree into a
single file encoded in MIME multipart format. This format can be
generated interactively from MsWord by calling the "SaveAs" menu and
choosing the F<.mht> extension.

Documents saved with a F<.mht> extension will not directly 
reopen in MsWord : when clicking on such documents, Windows
chooses Internet Explorer as the default display program.
However, these documents can be simply renamed with a
F<.doc> extension, and will then open directly in MsWord.
By the way, the same can be done with XML or RTF documents.
That is to say, MsWord is able to recognize the internal
format of a file, without any dependency on the filename.

=head2 Features of the module

C<MsOffice::Word::HTML::Writer> helps you to programatically generate
MsWord documents in MHT format. The advantage of this technique is
that one can rely on standard HTML mechanisms for layout control, such
as styles, tables, divs, etc. Of course this markup can be produced
using your favorite HTML templating module; the added value
of C<MsOffice::Word::HTML::Writer> is to help building the 
MIME multipart file, and provide some abstractions for 
representing MsWord-specific features (headers, footers, fields, etc.).

=head2 Advantages of MHT format

The MHT format is probably the most convenient
way for programmatic document generation, because

=over

=item *

unlike Excel, MsWord native binary format (used in versions up to 2003)
is unpublished and therefore cannot be generated without the MsWord executable.

=item *

remote control of the MsWord program through an OLE connection,
as in L<Win32::Word::Writer|Win32::Word::Writer>, requires a
local installation of Microsoft Office, and is not well
suited for server-side generation because the MsWord program might hang
or might open dialog boxes that require user input.

=item *

generation of documents in RTF is possible, but 
authoring the models requires deep knowledge of the RTF structure
--- see L<RTF::Writer>.

=item *

authoring models in XML also requires
deep knowledge of the XML structure.

Instead of working directly at the XML level, one could use the
L<OpenOffice::OODoc> distribution on CPAN, which provides programmatic
access to the "ODF" XML format used by OpenOffice. MsWord is able to
read and produce such ODF files, but this is not fully satisfactory
because in that mode many MsWord features are disabled or restricted.

The XML format used by MsWord is called "OOXML"; to
my knowledge, there is no CPAN module providing an API to 
this format.


=back

By contrast, C<MsOffice::Word::HTML::Writer> allows you to 
produce documents even with little knowledge of MsWord.
Besides, since the content is in HTML, it can be assembled
with any HTML tool, and therefore also requires little knowledge
of Perl.

One word of warning, however : opening MHT documents in MsWord is
a bit slower than native binary or RTF documents, because MsWord needs to
parse the HTML, compute the layout and convert it into its internal
representation.  Therefore MHT format is not recommended for very
large documents.

=head2 Usage

C<MsOffice::Word::HTML::Writer> is used in production
at Geneva courts of law, for generating thousands of documents
per day, from hundreds of models, with an architecture of 
reusable document parts implemented by Template Toolkit mechanisms
(macros, blocks and views).


=head1 METHODS

B<General convention> : method names that start
with a I<verb> may change the internal state of the 
writer object (for example L</write>, L</create_section>);
method names that are I<nouns> return data without modifying
the internal state (for example L</field>, L</content>, L<page_break>).



=head2 new

    my $doc = MsOffice::Word::HTML::Writer->new(%params);

Creates a new writer object. Optional parameters are :

=over

=item title

document title

=item head

any HTML declarations you may want to include in the
C<head> part of the generated document (for example
inline CSS styles or links to attached stylesheets).

=item hf_head

any HTML declarations you may want to include in the
C<head> part of the I<headers and footers> HTML document
(MsWord requires headers and footers to be 
specified as C<div>s in a separate HTML document).

=item WordDocument

a hashref of options to include as an XML island in the 
HTML C<head>, corresponding to various options in the 
MsWord "Tools/Options" panel. These will be included
in a XML element named C<< <w:WordDocument> >>, and
all children elements will be automatically prefixed
by C<w:>. The hashref may contain nested hashrefs, such as

  WordDocument => { View => 'Print',
                    Compatibility => {DoNotExpandShiftReturn => "",
                                      BreakWrappedTables     => ""} }

Names and values of options
must be found from the Microsoft documentation, or from
reverse engineering of HTML files generated by MsWord.

=back

Parameters may also be passed as a hashref instead of a hash.


=head2 write

  $doc->write("<p>hello, world</p>");

Adds some HTML into the document body.


=head2 attach

  $doc->attach($localname, $filename);
  $doc->attach($localname, "<", \$content);
  $doc->attach($localname, "<&", $filehandle);

Adds an attachment into the document; the attachment will be encoded
as a MIME part and will be accessible under C<files/$localname>.

The remaining arguments to C<attach> specify the source of the attachment;
they are directly passed to L<perlfunc/open> and therefore have the same
API flexibility : you can specify a filename, a reference to a memory
variable, a reference to another filehandle, etc.



=head2 create_section

  $doc->create_section(
    page => {size   => "21.0cm 29.7cm",
             margin => "1.2cm 2.4cm 2.3cm 2.4cm"},
    header => sprintf("Section 2, page %s of %s", 
                                  $doc->field('PAGE'), 
                                  $doc->field('NUMPAGES')),
    footer => sprintf("printed at %s", 
                                  $doc->field('PRINTDATE')),
    new_page => 1, # or 'left', or 'right'
  );

Opens a new section within the document
(or, if this is called before any L</write>, 
setups pagination parameters for the first section).
Subsequent calls to the L</write> method will add content to
that section, until the next L</create_section> call.

Pagination parameters are all optional and may be given
either as a hash or as a hashref; accepted parameters are :

=over

=item page

Hashref of CSS page styles, such as :

=over

=item size

Paper size (for example C<21cm 29.7cm>)

=item margin

Margins (top right bottom left).

=item header_margin

Margin for header

=item footer_margin

Margin for footer

=back


=item header

Header content (in HTML)

=item first_header

Header content for the first page of that section.

=item footer

Footer content (in HTML).

=item first_footer

Footer content for the first page.

=item new_page

If true, a page break will be inserted before the new section.
If the argument is the word C<'left'> or C<'right'>, one or two
page breaks will be inserted so that the next page is formatted
as a left (right) page.

=back



=head2 save_as

  $doc->save_as("/path/to/some/file");

Generates the MIME document and saves it at the given location.
If no extension is present, file extension F<.doc> will be added
by default to the filename.


=head2 content

Returns the whole MIME-encoded document as a single string; this is
used internally by the L</save_as> method.  Direct call is useful if
you don't want to save the document into a file, but want to do
something else like embedding it in a message or a ZIP file, or
returning it as an HTTP response.


=head2 page_break

  my $html = $doc->page_break;
  my $html = $doc->page_break('left');
  my $html = $doc->page_break('right');

Returns HTML markup for encoding a page break.
If an argument C<'left'> or C<'right'> is given, one or two
page breaks will be inserted so that the next page is formatted
as a left (right) page.

=head2 tab

  my $html = $doc->tab($n_tabs);

Returns HTML markup for encoding one or several tabs. If C<$n_tab> is
omitted, it defaults to 1.


=head2 field

  my $html = $doc->field($fieldname, $args, $content);

Returns HTML markup for a MsWord field.

Optional C<$args> is a string with arguments or flags for
the field. See MsWord help documentation for the list of
field names and their associated arguments or flags.

Optional C<$content> is the initial displayed content for the
field (because unfortunately MsWord does not immediately compute
the field content when opening the document; users will have
to explicitly request to update all fields, by selecting the whole
document and then hitting the F9 key).

Here are some examples :

  my $header = sprintf "%s of %s", $doc->field('PAGE'), 
                                   $doc->field('NUMPAGES');
  my $footer = sprintf "created at %s, printed at %s", 
                 doc->field(CREATEDATE => '\\@ "d MM yyyy"'),
                 doc->field(PRINTDATE  => '\\@ "dddd d MMMM yyyy" \\* Upper');
  my $quoted = $doc->field('QUOTE', '"hello, world"', 'hello, world');

=head2 quote

  my $html = $doc->quote($text);

Shortcut to produce a QUOTE field (see last field example just above).


=head1 AUTHORING MHT DOCUMENTS

=head2 HTML for MsWord

MsWord does not support the full HTML and CSS standard,
so authoring MHT documents requires some trial and error.
Basic divs, spans, paragraphs and tables,
are reasonably supported, together with their common CSS
properties; but fancier features  like floats, absolute 
positioning, etc. may yield some surprises.

To specify widths and heights, you will get better results
by using CSS properties rather than attributes of the 
HTML table model.

In case of difficulties for implementing specific features, 
try to see what MsWord does with that feature when saving
a document in HTML format (plain HTM, not MHT!). 
The generated HTML is quite verbose, but after eliminating
unnecessary tags one can sometimes figure out which are 
the key tags (they start with C<o:>  or C<w:>) or the
key attributes (they start with C<mso->) which correspond
to the desired functionality.

=head2 Collaboration with the Template Toolkit

The L<Template Toolkit|Template> (TT for short) 
is a very helpful tool for generating the HTML.
Below are some hints about collaboration between
the two modules.

=head3 Client code calls both TT and Word::HTML::Writer

The first mode is to use the Template Toolkit for
generating various document parts, and then assemble
them into C<MsOffice::Word::HTML::Writer>.

  use Template;
  my $tmpl_app = Template->new(%options);
  $tmpl_app->process("doctmpl/html_head.tt", \%data, \my $html_head);
  $tmpl_app->process("doctmpl/body.tt",      \%data, \my $body);
  $tmpl_app->process("doctmpl/header.tt",    \%data, \my $header);
  $tmpl_app->process("doctmpl/footer.tt",    \%data, \my $footer);
  
  use MsOffice::Word::HTML::Writer;
  my $doc = MsOffice::Word::HTML::Writer->new(
    title  => $data{title},
    head   => $html_head,
  );
  $doc->create_section(
    header => $header,
    footer => $footer,
  );
  $doc->write($body);
  $doc->save_as("/path/to/some/file");

This architecture is straightforward, but various document parts 
are split into several templates, which might be inconvenient
when maintaining a large body of document templates.

=head3 HTML parts as blocks in a single template

Document parts might also be encoded as blocks within one
single template : 

  [% BLOCK html_head %]
  <style>...CSS...</style>
  [% END; # BLOCK html_head %]
  
  [% BLOCK body %]
    Hello, world
  [% END; # BLOCK body %]
  
  etc.

Then the client code calls each block in turn to gather
the various parts :

  use Template::Context;
  my $tmpl_ctxt = Template::Context->new(%options);
  my $tmpl      = $tmpl_ctxt->template("doctmpl/all_blocks.tt");
  my $html_head = $tmpl_ctxt->process($tmpl->blocks->{html_head}, \%data);
  my $body      = $tmpl_ctxt->process($tmpl->blocks->{body},      \%data);
  my $header    = $tmpl_ctxt->process($tmpl->blocks->{header},    \%data);
  my $footer    = $tmpl_ctxt->process($tmpl->blocks->{footer},    \%data);
  
  # assemble into MsOffice::Word::HTML::Writer, same as before


=head3 Template toolkit calls MsOffice::Word::HTML::Writer

Now let's look at a different architecture: the client code
calls the Template toolkit, which in turn calls
C<MsOffice::Word::HTML::Writer>. 

The most common way to call modules from TT is to use
a I<TT plugin>; but since there is currently 
no TT plugin for C<MsOffice::Word::HTML::Writer>,
we will just tell TT that templates can load regular
Perl modules, by turning on the C<LOAD_PERL> option.

The client code looks like any other TT application; but the output of
the L<process|Template/process> method is a fully-fledged MHT
document, instead of plain HTML.

  use Template;
  my $tmpl_app = Template->new(LOAD_PERL => 1, %other_options);
  $tmpl_app->process("doc_template.tt", \%data, \my $msword_doc);

Within C<doc_template.tt>, we have

  [% # main entry point
  
     # gather various parts
     SET html_head = PROCESS html_head;
     SET header    = PROCESS header;
     SET footer    = PROCESS footer;
     SET body      = PROCESS body;
  
     # create Word::HTML::Writer object
     USE msword = MsOffice.Word.HTML.Writer(head=html_head);
  
     # setup section format
     CALL msword.create_section(
        page => {size          => "21.0cm 29.7cm",
                 margin        => "1cm 2.5cm 1cm 2.5cm",
                 header_margin => "1cm",
                 footer_margin => "0cm",},
        header => header,
        footer => footer
      );
  
      # write the body
     CALL msword.write(body);
  
     # return the MIME-encoded MsWord document
     msword.content();  %]
  
  [% BLOCK html_head %]
  ...

=head3 Inheritance through TT views

The above architecture can be refined one step further,
by using L<TT views|Template::Manual::Views> to 
encapsulate documents. Views have an inheritance mechanism,
so it becomes possible to define families of document
templates, that inherit properties or methods from common
ancestors. Let us start with F<generic_letter.tt2>, 
a generic letter template :

  [% VIEW generic_letter
        title="Generic letter template";
  
       BLOCK main;
         USE msword = MsOffice.Word.HTML.Writer(
            title => view.title,
            head  => view.html_head(),
         );
         view.write_body();
         msword.content();
       END; # BLOCK main
    
       BLOCK write_body;
         CALL msword.create_section(
            page   => {size          => "21.0cm 29.7cm",
                       margin        => "1cm 2.5cm 1cm 2.5cm"},
            header => view.header(),
            footer => view.footer()
         );
         CALL msword.write(view.body());
       END; # BLOCK write_body
    
       BLOCK body;
         view.letter_head();
         view.letter_body();
       END; # BLOCK body
    
       BLOCK letter_body; %]
        Generic letter body; please override BLOCK letter_body in subviews
    [% END; # BLOCK letter_body;
  
       # ... other blocks for header, footer, letter_head, etc.
  
     END; # VIEW generic_letter
  
  [% # call main() method if this templated was loaded directly
     letter.main() UNLESS component.caller %]

This is quite similar to an object-oriented class : assignments
within the view are like object attributes (i.e. the C<title>
variable), and blocks within the view are like methods.

After the end of the view, we call the C<main> method, but 
only if that view was called directly from client code.
If the view is inherited, as displayed below, then the 
call to C<main> will be from the subview.

Now we can define a specific letter template that inherits
from the generic letter and overrides the C<letter_body> block :

  [% PROCESS generic_letter.tt2; # loads the parent view 
  
     VIEW advertisement;
  
       BLOCK letter_body; %]
  
         <p>Dear [% receiver.name %],</p>
         <p>You have won a wonderful [% article %].
            Just call us at [% sender.phone %].</p>
         <p>Best regards,</p>
         [% view.signature(name => sender.name ) %]
  
  [%   END; # BLOCK letter_body
     END; # VIEW advertisement
  
     advertisement.main() UNLESS component.caller %]


=head1 TO DO

Many features could be added; for example:

  - odd/even pages
  - link same header/footers across several sections
  - multiple columns
  - watermarks (I tried hard to reverse engineer MsWord behaviour, 
    but it still doesn't work ... couldn't figure out all details 
    of VML markup)

Contributions welcome!


=head1 AUTHOR

Laurent Dami, C<< <laurent DOT dami AT etat DOT geneve DOT ch> >>

=head1 BUGS

Please report any bugs or feature requests to
C<bug-win32-word-html-writer at rt.cpan.org>, or through the web interface at
L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=MsOffice-Word-HTML-Writer>.
I will be notified, and then you'll automatically be notified of progress on
your bug as I make changes.

=head1 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc MsOffice::Word::HTML::Writer

You can also look for information at:

=over 4

=item * AnnoCPAN: Annotated CPAN documentation

L<http://annocpan.org/dist/MsOffice-Word-HTML-Writer>

=item * CPAN Ratings

L<http://cpanratings.perl.org/d/MsOffice-Word-HTML-Writer>

=item * RT: CPAN's request tracker

L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=MsOffice-Word-HTML-Writer>

=item * Search CPAN

L<http://search.cpan.org/dist/MsOffice-Word-HTML-Writer>

=back

=head1 SEE ALSO

L<Win32::Word::Writer>, L<RTF::Writer>, L<Spreadsheet::WriteExcel>,
L<OpenOffice::OODoc>.


=head1 COPYRIGHT & LICENSE

Copyright 2009 Laurent Dami, all rights reserved.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.