#!/usr/bin/perl -w print "\nThis is HTML2ConTeXt. Version 2002-05-15\n"; print "I'll try to convert your HTML file for ConTeXt.\n"; print "copyleft Henning Hraban Ramm, http://www.fiee.net/texnique/\n\n"; unless ($ARGV[0]) {die "You must name a file to convert! $!"}; my $HTMLDatei = $ARGV[0]; unless (-T $HTMLDatei) { print "$HTMLDatei not found!\n"; if (-T $HTMLDatei.'.htm') { $HTMLDatei .= ".htm"; } if (-T $HTMLDatei.'.html') { $HTMLDatei .= ".html"; } } # unless $HTMLDatei =~ s/\\/\//g; my $Table="n"; my $Encod="win"; open (QUELLE, $HTMLDatei) or die "Can't open $HTMLDatei! $!"; my $TeXDatei = $HTMLDatei; $TeXDatei =~ s/\.htm$/\.tex/i; print $TeXDatei."\n"; open (ZIEL, ">".$TeXDatei) or die "Can't make $TeXDatei! $!"; while () { # single entities and chars s§&(.)uml;§\\\"$1§g; s§&(.)acute;§\\´$1§g; s§&(.)grave;§\\`$1§g; s§&(.)circ;§\\^$1§g; s§&(.)ring;§\\°$1§g; if ($Encod eq "win") { s§ß§ß§g; } else { s§ß§\\ss{}§g; } # if Encoding s§&(\#150|endash);§--§g; # endash s§ - § -- §g; # endash s§ §~§g; # non breaking space s§"([^<>]*)"§\\quotation{$1}§g; s§&(r|l)aquo;([^<>]*)&(l|r)aquo;§\\quotation{$2}§g; s§&\#132;([^<>]*)"§\\quotation{$1}§g; s§&\#132;([^<>]*)$§\\quotation{$1§g; # uncompleted line s§\s("|\")§ \\quotation{§g; # begin quote s§"\s§} §g; # end quote s§"§\"§g; # quote s§([^\\=\s])\"§$1}§g; # end quote s§%§|~|\\%{}§g; # percent s§<§<§g; s§>§>§g; s§&§\&§g; s§&sup(.);§^$1§g; s§&frac(.)(.);§\\frac{$1}{$2}§g; s§&\#133;§ §g; # s§§§g; # s§§§g; # TeX words and marks s§TEX§TeX§g; s§pdfTeX§\\pdfTeX{}§gi; s§ppchTeX§\\pdfTeX{}§gi; s§ConTeXt§\\ConTeXt{}§g; s§CONTEXT§\\ConTeXt{}§g; s§(\s)TeX§$1\\TeX{}§g; # environments s§]*>§\\starttext§gi; s§§\\stoptext§gi; s§(
|)§\\startquotation§gi; s§(<\/BLOCKQUOTE>|<\/QUOTE>)§\\stopquotation§gi; s§]*>§§gi; # delete all divs s§]*(>|$)§§gi; # delete all font tags # Headers s§

§\\chapter{§gi; s§

§\\section{§gi; s§

§\\subsection{§gi; s§

§\\subsubsection{§gi; s§§}§gi; # Links s§(.*)§\\goto{$3}[URL($2)]§gi; s§(.*)§\\reference[$2]{$3}§gi; # Tables if ($Table eq "y") { s§]*)>§\\bTABLE \%$1 §gi; s§§\\eTABLE§gi; s§§\\eTD §gi; s§]*)>§\\bTD §gi; s§§\\eTR §gi; s§]*)>§\\bTR §gi; } else { s§]*>§§gi; # delete all table tags } # if Table # Images s§]*)>§\\externalfigure[$1]§gi; s§]*)\"([^<>]*)$§\\externalfigure[$2]\t\% $1 $3§gi; # Lists s§
    §\\startitemize\[1\]§gi; s§
      §\\startitemize\[n\]§gi; s§
      §\\startitemize\[1\]§gi; # ? s§§\\stopitemize§gi; s§
    1. §\\item §gi; s§
      §\\item §gi; # s§
      §\\item §gi; # s§
    2. §§gi; s§]*>§§gi; # s§

      §\\par§gi; s§

      §\n\n§gi; s§]*>§\n§gi; s§]*>§\\blank §gi; s§<(PRE|TT|CODE)>§\\type{§gi; s§<(STRONG|B)>§{\\bf §gi; s§<(EM|I|U)>§{\\em §gi; s§^$§\\stop$1§gi; s§^<([^\s]*)([^<>]*)>$§\\start$1\[$2\]§gi; s§§}§gi; # all other closing tags become } s§<([^\s]*)(\s)(.*)>§\\$1\[$3\]\{§gi; # all other opening tags become { s§<([^\s]*)>§\\$1\{§gi; # all other opening tags become { s§^\s*§§g; # remove trailing spaces print ZIEL; print "."; } # while print "\n"; close (ZIEL); close (QUELLE); # \goto{text}[URL(Link)]