#!/usr/local/bin/perl ################################################################################ # Copyright 1998-1999 by Jens Lippmann (lippmann@rbg.informatik.th-darmstadt.de) # # Permission to use, copy, modify, and distribute this software for any # purpose and without fee is hereby granted, provided that the above # copyright notice appears in all copies. This software is provided "as is" # and without any express or implied warranties. # # # cweb2html, a converter for CWEB LaTeX to LaTeX2HTML parseable LaTeX. # # This program converts CWEB produced TeX and substitutes several # plain TeX macros with LaTeX macros. # The output may be translated with LaTeX2HTML. # Use with LaTeX2HTML to translate HyTeX specific commands. # Corresponding to HTMLMAC.STY, HTMLMAC.TEX, HTMLMAC.PERL. # # $Source: /home/latex2ht/cvs/latex2html/user/cweb2html/cweb2html,v $ # $RCSfile: cweb2html,v $ # $Revision: 1.2 $ # $Date: 1999/04/09 19:25:32 $ # $Author: JCL $ # $State: Exp $ # # Handles: # - TeX macros \.{ \? \\{ \&{ \, \) \| $<$ $>$ \1...\9 # - TeX macros \X...\X # - TeX macros in code section \T{ \vb{ # - math symbols in code section # # BUGS: - does not prevent \vb{} string substitution in \.{} environment # - does not avoid command substitution in \verb/../ strings. # ################################################################################ # History # # $Log: cweb2html,v $ # Revision 1.2 1999/04/09 19:25:32 JCL # changed my e-Mail address # # Revision 1.1 1998/02/24 02:29:49 latex2html # for 98.1 # # Revision 1.1 1998/02/24 02:15:28 latex2html # ready for 98.1 # # Revision 1.16 1997/06/25 01:45:32 lippmann # debugged # # Revision 1.15 1997/06/05 16:12:19 lippmann # styled argument handling, new synopsis (-b changed to -bc !) # introduced -bak/+bak switch, -pretty changed to -fold/+fold # improved help information (usage) # new backup mechanism (stores files in backup directory instead tagging them) # new folding mechanism (replaced awk script with improved Perl logic) # # Revision 1.14 1997/06/02 16:45:08 lippmann # renamed PROCESSCWEBTEXOFF to CWEB2HTMLOFF. # setting CWEB2HTMLOFF to 1 causes the script to behave like a dummy # # Revision 1.13 1997/06/02 16:07:18 lippmann # renamed from ProcessCwebTex to cweb2html # # Revision 1.12 1997/06/01 11:57:41 lippmann # removed (Cweb,Hyweb) mode selection, produces different include file # names instead # # Revision 1.11 1997/06/01 08:19:08 lippmann # adapted to new name space of htmlmac.sty: # # HTmode, HTpretty, HTnoderef -> HTCweb... # HTweb -> HTCweb # \ciao -> \HTCwebdocumentend # 1 -> HTCweboneright # 2 -> HTCweboneleft # 3 -> HTCweboptbreak # 4 -> HTCwebthisleft # 5 -> HTCwebbigoptbreak # 6 -> HTCwebbreak # 7 -> HTCwebbigbreak # 8 -> HTCwebclearleft # 9 -> HTCwebempty # idit -> HTCwebidit # idma -> HTCwebidma # idbo -> HTCwebidbo # idtt -> HTCwebidtt # discr -> HTCwebdiscr # mathque -> HTCwebmathque # smllspc -> HTCwebsmllspc # # Revision 1.10 1997/05/31 01:49:53 lippmann # o \HTwebNodeRef has a new synopsis that allows to handle a refinement list # (multi-part refinements cited in the list of refinements) # o tested with multi-part refinements in Cweb and Hyweb mode, works in # conjunction with new htmlmac.sty, htmlmac.perl # o backward compatibility on by default, use +b to switch off (faster then) # # Revision 1.9 1997/05/25 21:16:04 lippmann # o backward compatibility is on by defaults, switch of with '+b' # o handles now \Xnum(, num)*:...\X, occurring in cweave generated list of # refinements with multi-part refinements # o handling of multi-part refinements still is immature # # Revision 1.8 1997/02/27 13:49:57 lippmann # handling of comments/verbatim much improved # # Revision 1.7 1997/02/26 03:24:03 lippmann # reverted flaw in % line concatenation # # Revision 1.6 1997/02/09 15:34:49 lippmann # fixed bug with \HTwebpar # # Revision 1.5 1997/02/09 15:26:20 lippmann # allow our special hycweave to end code sections also with \cio # # Revision 1.4 1997/01/26 19:19:07 lippmann # introduced -b (backwards compatibility) switch and fixed minor bugs # # Revision 1.3 1996/12/11 12:35:48 lippmann # changed to get to work with Schrod's LaTeX CWEB # o handles now \X : ... \X if is non-zero # o sets an outer mode macro to cweb or hyweb depending on cwebmac.tex # or hywebmac.tex inclusion # o handles preliminary \end{document} # o \B -> \HTwebB # o \par -> \HTwebpar in code section (program state), was: \cio # # Revision 1.2 1996/09/24 03:26:53 lippmann # env variable to switch off processing of the script should be used this way: # setenv PROCESSCWEBTEXOFF 1 # # Revision 1.1.1.1 1996/07/08 15:46:29 liefke # initial version for Darmstadt # # Revision 1.1.1.1 1996/07/08 15:36:06 liefke # initial version for Darmstadt # # Revision 1.6 1996/06/20 02:22:09 lippmann # - the parsing of \X..\X has a totally new face. # + is welded into HTweb macros # + newlines between \X and \X are handled much better # # - Synopsis changed: # + input file list now possible # + .tex is no more appended! # # ################################################################################ exit(0) if $ENV{'CWEB2HTMLOFF'}; # set this if awk cannot be found $AUTHOR = "lippmann"; $DEBUG = 0; $FOLD = 0; $KEEPBAK = 0; $BAKDIR = "bak"; $BACKCOMP = 1; $DESTDIR = "."; $SRCDIR = "."; $DS = '/'; $FILE = undef; $RCSID = '$Id: cweb2html,v 1.2 1999/04/09 19:25:32 JCL Exp $'; $RCSID =~ /[^ ]* ([^ ]*),v ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*)/; $PRGFILE = $1; $RCSFILE = "$1,v"; $RCSREV = $2; $RCSDATE = $3; $RCSTIME = $4; $RCSAUTHOR = $5; $RCSSTATE = $6; while ($ARGV[0] =~ /^-/ && (! ($ARGV[0] =~ /^--$/))) { $_ = shift; if (/^-dest_dir$/) { &mis_usage("") if !$ARGV[0]; $DESTDIR = shift; } elsif (/^-fold$/) { $FOLD=1; } elsif (/^\+fold$/) { $FOLD=0; } elsif (/^-debug$/) { $DEBUG++; } elsif (/^\+debug$/) { $DEBUG=0; } elsif (/^-bc$/) { $BACKCOMP=1; } elsif (/^\+bc$/) { $BACKCOMP=0; } elsif (/^-bak$/) { $KEEPBAK=1; } elsif (/^\+bak$/) { $KEEPBAK=0; } elsif (/^-h|help/) { &help_usage; } else { &usage(""); } } &mis_usage("") if !$ARGV[0]; &main; sub main { foreach $FILE (@ARGV) { # get leading path of file name and store in $SRCDIR if ($FILE =~ s/^(.*)$DS//) { $SRCDIR = $1; } else { $SRCDIR = "."; } print "$PRGFILE v$RCSREV -- Doing $FILE\n"; &process_file; } } sub process_file { die "$PRGFILE: error: Could not find $SRCDIR/$FILE\n" unless -f "$SRCDIR/$FILE"; if ($KEEPBAK) { mkdir("$DESTDIR/$BAKDIR",0755) unless -d "$DESTDIR/$BAKDIR"; die "$PRGFILE: error: Could not create backup directory <$DESTDIR/$BAKDIR>\n" unless -d "$DESTDIR/$BAKDIR"; rename("$SRCDIR/$FILE", "$DESTDIR/$BAKDIR/$FILE") || die "$PRGFILE: error: Could not create backup file <$DESTDIR/$BAKDIR/$FILE>\n"; } open(HELP,">&STDOUT"); &slurp_file($KEEPBAK ? "$DESTDIR/$BAKDIR/$FILE" : "$SRCDIR/$FILE"); # file must have been read before we open output (dest dir may be src dir) open (STDOUT, ">$DESTDIR/$FILE"); &pass1; if ($FOLD) { # close associated file and restore STDOUT open (STDOUT, ">&HELP"); &slurp_file("$DESTDIR/$FILE"); open (STDOUT, ">$DESTDIR/$FILE"); &pass2; } open (STDOUT, ">&HELP"); close HELP; } # Reads the entire input file into a single string. sub slurp_file { local($filename) = @_; local(%string); open(INPUT,"<$filename") || die "$PRGFILE: error: Could not open $filename\n"; while () { $string{'STRING'} .= $_; } $_ = delete $string{'STRING'}; # Blow it away and return the result } sub pass1 { local($verbatim) = 0; local($code_section) = 0; # Change input files to hook in our HTWEB style. # The closing \n is for LaTeX2HTML bugs (percent after \input...) s/\\input\Whywebmac\W/\\input{htcwebhy}\n/; s/\\input\Wcwebmac\W/\\input{htcweb}\n/; local($contents) = $_; while ($contents =~ s/.*\n//) { $_ = $&; if (/^\s*%/) { # comment print; } elsif (/\\begin\s*\{(LVerbatim|verbatim)\*?\}/) { # fails if inside \verb ! $verbatim++; print; } elsif (/\\end\s*\{(LVerbatim|verbatim)\*?\}/) { # fails if inside \verb ! $verbatim--; print; } elsif ($verbatim) { print; } else { # Concat lines truncated with % (percent sign). # The % might be inserted by pass2 to make the lines readable. # One reason yet to do this is compliance with LaTeX2HTML # versions which do not recognize \foo in \foo% at end of line. # Another reason is to get bubble names parsed trucated this way. # Also, concat lines truncated with \ (occurs within \.{}). # This will not be reversed, so awkward line lengthes can occur # with verbatim strings of unusual length. # This approach produces wrong results on: # - % at line end and space, tab at next line: # leaves space, tab in the text (should be removed, too) # - % at line end followed by an empty line: # results in one newline between the tokens (must be two, ie., \par) # while (s/([^\\]|^)((\\\\)*)%+[ \t]*\n/$1$2/ || s/([^\\]|^)((\\\\)*)\\\n/$1$2\\ /) { # peek and fetch next line if available $contents =~ s/(.*\n?)//; $_ .= $1; } if (s/([^\\]|^)\\B\\/$1\\HTCwebB\\/o) {$code_section = 1;} # try first to convert \.{}, \\{}, \&{}, \?, \, and \) # take care that \\, \& etc. in a \.{} or \vb{} macro are unaffected # therefore handle contents of \.{} and \vb{} macros first # Do double substitution to catch \vb{}\vb{} or \.{}\.{} (problem with [^\\|^]) # handle verbatim strings in \vb s/([^\\]|^)\\vb\{(([^\\\}]|\\.)*)\}/$1."\\vb {".&subst_verbatim($2)."}"/geo; s/([^\\]|^)\\vb\{(([^\\\}]|\\.)*)\}/$1."\\vb {".&subst_verbatim($2)."}"/geo; # replace \.{ if not \\.{ s/([^\\]|^)\\\.\{(([^\\\}]|\\.)*)\}/$1."\\HTCwebidtt{".&subst_verbatim($2)."}"/geo; s/([^\\]|^)\\\.\{(([^\\\}]|\\.)*)\}/$1."\\HTCwebidtt{".&subst_verbatim($2)."}"/geo; # process the total (maybe new) line with the other macros s/\\\?/\\HTCwebmathque/go; s/\\\\\{/\\HTCwebidit\{/go; s/\\\&\{/\\HTCwebidbo\{/go; s/\\\,/\\HTCwebsmllspc/go; s/\\\)/\\HTCwebdiscr/go; s/\\\|(\\_|[a-zA-Z])/\\HTCwebidma\{$1\}/go; s/\$<\$/\$/>/go; # The \ commands must get parsed order-sensitive. # This is not done by LaTeX2HTML when they occur # within others. s/\\hbox\{\\(\d+)\}/\\\1/go; if ($BACKCOMP) { s/\\1/\\HTCweboneright/go; s/\\2/\\HTCweboneleft/go; s/\\3([0-9])/\\HTCweboptbreak{$1}/go; s/\\4/\\HTCwebthisleft/go; s/\\5/\\HTCwebbigoptbreak/go; s/\\6/\\HTCwebbreak/go; s/\\7/\\HTCwebbigbreak/go; s/\\8/\\HTCwebclearleft/go; s/\\9/\\HTCwebempty/go; } # a kludge to spoof LaTeX2HTML s/^\\end\{document\}$/\\HTCwebdocumentend/; if ($code_section) { local($pre,$value); # replace unmasked dollars with alternate math sign # we don't care of \\$ (\\ at EOL) here # Do it twice to catch $$ (display math, maybe in comments) s/([^\\]|^)\$/$1\~/g; s/([^\\]|^)\$/$1\~/g; # we need a better 'end of code section' marker than # provided by CWEB. # This delimits \B both in htmlmac.sty and htmlmac.perl. s/\\(par|cio)$/\\HTCwebpar/; } # handle escape command for constants s/([^\\]|^)\\T\{([^\}]*)\}/$1."\\T\{".&subst_const($2)."\}"/geo; # replace $\X...\X$ resp. \X...\X with \HTCwebNodeRef{..}{..}{..} # this task must be done *after* dollar signs are handled # revert $ conversion for \PB{~\X((not \X)*)\X~} (must occur in 1 line) s/\\PB\{\~\\X(([^\\]|(\\[^X]))*)\\X\~\}/\\PB\{\$\\X$1\\X\$\}/go; # and rip out \PB. This is necessary to get catcodes working, i.e. # \X...\X must not be subjected as argument to any macro. # This is a kludge to get it working with LaTeX CWEB, where \PB is # defined for 'restricted program mode', but cwebmac.tex defines it # to \relax nevertheless. s/\\PB(\{[\~\$]?\\X)/$1/g unless $code_section; # handle \X...\X null refs of HYWEB resp. normal refs of CWEB # handle case that a newline may appear between \X...\X while (/(\$)?\\X([\d, ]+):(\\underline\{\\HTCwebidbo\{)?/) { local($before,$after,$math,$labellist,$pretty) = ($`,$',$1,$2,$3); # There must be an '\\X' following $math = '\$' if $math; $pretty = '\}\}' if $pretty; local($cmdtail); # we need this tail only for external references $cmdtail = '~\\HTCwebX' unless $labellist; if (!($after =~ s/$pretty\\X$math/\}$cmdtail/)) { # go beyond end of line to substitute! $contents =~ s/$pretty\\X$math/\}$cmdtail/; } if ($labellist) { # In Cweb, $labellist may be a comma separated list of numbers. # (multiply occurring refinements are handled this way in the # list of refinements). # Make comma separated list with elements {section_}{}. $labellist =~ s/(\d+)/\{section_$1\}\{$1\}/g; $_ = join('',$before,"\\HTCwebNodeRef\{",$labellist,"\}\{",$after); } else { $_ = join('',$before,"\\HTCwebX~\\HTCwebNodeRefExt\{",$after); } } # then, care of \X...\X normal refs for HYWEB while (/(\$?)\\X((\\noderef\{[^\}]*\}\{[^\}]*\}(, )?)+):\\underline\{\\HTCwebidbo\{/) { local($before,$after,$math,$labellist) = ($`,$',$1,$2); # There must be an '\\X' following $math =~ s/\$/\\\$/; if (!($after =~ s/\}\}\\X$math/\}/)) { # go beyond end of line to substitute! $contents =~ s/\}\}\\X$math/\}/; } $labellist =~ s/\\noderef//g; $_ = join('',$before,"\\HTCwebNodeRef\{",$labellist,"\}\{",$after); } if (/\\HTCwebpar$/) {$code_section = 0;} print; } } } sub subst_const { local($_) = @_; s/\\\^/{\\hex}/g; s/\\\~/{\\oct}/g; $_; } sub subst_verbatim { local($_) = @_; s/\\\\/\{\\BS\}/g; s/\\\{/\{\\LB\}/g; s/\\\}/\{\\RB\}/g; s/\\~/\{\\TL\}/g; s/\\ /\{\\SP\}/g; s/\\_/\{\\UL\}/g; s/\\&/\{\\AM\}/g; s/\\\^/\{\\CF\}/g; $_; } # Parse file in $_ and truncate long lines with percent signs to improve # readability of output. # Precondition: Percents at end of non-comment lines cannot occur, because such # lines are concatenated during pass one. # sub pass2 { local($verbatim) = 0; local($contents) = $_; while ($contents =~ s/.*\n//) { $_ = $&; if (/^\s*%/) { # comment print; } elsif (/\\begin\s*\{(LVerbatim|verbatim)\*?\}/) { # fails if inside \verb ! $verbatim++; print; } elsif (/\\end\s*\{(LVerbatim|verbatim)\*?\}/) { # fails if inside \verb ! $verbatim--; print; } elsif ($verbatim) { print; } elsif (length >= 80) { if (/((^|[^\\])(\\\\)*)\\verb/) { # long line containing \verb, try to break before or forget it if (length($`) || length($1)) { print $`, $1, "%\n\\verb", $'; } else { print; } } else { # long line without \verb, banzai! if (s/((^|[^\\])(\\\\)*)(%+)/$1%\n/) { local($comment) = $4.$'; # we have a semi-line comment. # take it to a single line and shorten remainder. # this saves false breakpoint detection in comments. &cutline; print $_, $comment; } else { &cutline; print; } } } else { print; } } } # Cut line longer than 79 chars (included the trailing newline) by # finding a breakpoint and inserting a %. # Strategy: find all breakpoints occurring < 80 chars, choose last one # before 70. char if exists, else next one if exists, else do no break. # A breakpoint is any unmasked opening/closing brace not followed # by white space (is precious here!) or % (comment lines are already # handled) and not occurring at line end. # Preconditions: # a) line must not be or end with a comment (=> false break) # b) catcoded {, } (=> false break) # Modifies $_. # sub cutline { while (length >= 80) { local($pre,$pat,$aft,$idx,$lastidx,$done,$lastdone); while (/((^|[^\\])(\\\\)*)(\{|\})([^\s%])/) { ($pre,$pat,$aft) = ($`.$1,$4,$5.$'); $done .= $pre . $pat; $idx = length($done); $_ = $aft; last if $idx >= 70; $lastdone = $done; $lastidx = $idx; } if (!$idx) { # found no breakpoint at all, out return; } elsif ($idx < 70) { # found no suitable break above 69. char print $done, "%\n"; } elsif ($idx < 80) { # found perfect breakpoint print $done, "%\n"; } elsif ($lastidx) { # breakpoint above 79. char and breakpoint below 70., take latter print $lastdone, "%\n"; $_ = join('', $pre, $pat, $aft); } else { # single breakpoint above 79. char, take it print $done, "%\n"; } } } sub help_usage { print "Booting system, please stand by...\n\n"; print "HLP% $PRGFILE internal help system Version 7.6plus \"rightsized\"\n"; print "HLP% User requested : .\n"; print "HLP% Available ressources : .\n"; print "HLP% Activating : .\n"; print "HLP% Have a nice day!\n"; &usage; } sub mis_usage { print "AAAAAAAAAAAAAAAAAAAAAAArgh!!!!\n"; print "You didn't invoke me the right way. Watch the error message below!\n\n"; &usage; } sub usage { print (<<"_USAGE_"); RCS version: $RCSFILE Rev $RCSREV by $AUTHOR (last change $RCSDATE by $RCSAUTHOR) Usage: $PRGFILE [+-bc] [+-bak] [+-debug] [-dest_dir ] [+-fold] [-h[elp]] [more_files] where specifies output directory and may have a full path name specification. The arguments denoted with [ ] backets are optional. Giving an option with - sets it on, and + turns it definitely off. For each given , this perl script processes the Cweb-like TeX contents and stores the result in /. The resulting file works together with htcweb.sty and can be fed into LaTeX2HTML to produce HTML output. Options: -bc +bc Set/Unset backwards compatiblity to LaTeX2HTML releases 95.3 and 96.1x. Set by default. -bak +bak Keep/Discard original input files in backup directory. If they should be kept, they are stored in a backup direcory in . Else if they should be discarded, original files will be overwritten unless differs from the residing directory of the original file. Discards them by default. -debug +debug Sets/Zeroes debug level (reserved). Debug level is 0 by default. -dest_dir Specifies directory where to store the output files. -fold +fold If on, lines of the output file longer than 80 chars are broken with %. This may cause insertion of a space in HTML output with older LaTeX2HTML versions. Off by default. -h -help Prints this. _USAGE_ die("\n$PRGFILE: @_\n") if @_; exit(1); }