forked from LeenkxTeam/LNXSDK
		
	
		
			
	
	
		
			315 lines
		
	
	
		
			6.9 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
		
		
			
		
	
	
			315 lines
		
	
	
		
			6.9 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| 
								 | 
							
								#! /usr/bin/perl -w
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Script to turn PCRE2 man pages into HTML
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Subroutine to handle font changes and other escapes
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								sub do_line {
							 | 
						||
| 
								 | 
							
								my($s) = $_[0];
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								$s =~ s/</</g;                   # Deal with < and >
							 | 
						||
| 
								 | 
							
								$s =~ s/>/>/g;
							 | 
						||
| 
								 | 
							
								$s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
							 | 
						||
| 
								 | 
							
								$s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
							 | 
						||
| 
								 | 
							
								$s =~ s"\\e"\\"g;
							 | 
						||
| 
								 | 
							
								$s =~ s/(?<=Copyright )\(c\)/©/g;
							 | 
						||
| 
								 | 
							
								$s;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Subroutine to ensure not in a paragraph
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								sub end_para {
							 | 
						||
| 
								 | 
							
								if ($inpara)
							 | 
						||
| 
								 | 
							
								  {
							 | 
						||
| 
								 | 
							
								  print TEMP "</PRE>\n" if ($inpre);
							 | 
						||
| 
								 | 
							
								  print TEMP "</P>\n";
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								$inpara = $inpre = 0;
							 | 
						||
| 
								 | 
							
								$wrotetext = 0;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Subroutine to start a new paragraph
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								sub new_para {
							 | 
						||
| 
								 | 
							
								&end_para();
							 | 
						||
| 
								 | 
							
								print TEMP "<P>\n";
							 | 
						||
| 
								 | 
							
								$inpara = 1;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Main program
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								$innf = 0;
							 | 
						||
| 
								 | 
							
								$inpara = 0;
							 | 
						||
| 
								 | 
							
								$inpre = 0;
							 | 
						||
| 
								 | 
							
								$wrotetext = 0;
							 | 
						||
| 
								 | 
							
								$toc = 0;
							 | 
						||
| 
								 | 
							
								$ref = 1;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
							 | 
						||
| 
								 | 
							
								  {
							 | 
						||
| 
								 | 
							
								  $toc = 1 if $ARGV[0] eq "-toc";
							 | 
						||
| 
								 | 
							
								  shift;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Initial output to STDOUT
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								print <<End ;
							 | 
						||
| 
								 | 
							
								<html>
							 | 
						||
| 
								 | 
							
								<head>
							 | 
						||
| 
								 | 
							
								<title>$ARGV[0] specification</title>
							 | 
						||
| 
								 | 
							
								</head>
							 | 
						||
| 
								 | 
							
								<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
							 | 
						||
| 
								 | 
							
								<h1>$ARGV[0] man page</h1>
							 | 
						||
| 
								 | 
							
								<p>
							 | 
						||
| 
								 | 
							
								Return to the <a href="index.html">PCRE2 index page</a>.
							 | 
						||
| 
								 | 
							
								</p>
							 | 
						||
| 
								 | 
							
								<p>
							 | 
						||
| 
								 | 
							
								This page is part of the PCRE2 HTML documentation. It was generated
							 | 
						||
| 
								 | 
							
								automatically from the original man page. If there is any nonsense in it,
							 | 
						||
| 
								 | 
							
								please consult the man page, in case the conversion went wrong.
							 | 
						||
| 
								 | 
							
								<br>
							 | 
						||
| 
								 | 
							
								End
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								print "<ul>\n" if ($toc);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								while (<STDIN>)
							 | 
						||
| 
								 | 
							
								  {
							 | 
						||
| 
								 | 
							
								  # Handle lines beginning with a dot
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  if (/^\./)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    # Some of the PCRE2 man pages used to contain instances of .br. However,
							 | 
						||
| 
								 | 
							
								    # they should have all been removed because they cause trouble in some
							 | 
						||
| 
								 | 
							
								    # (other) automated systems that translate man pages to HTML. Complain if
							 | 
						||
| 
								 | 
							
								    # we find .br or .in (another macro that is deprecated).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    if (/^\.br/ || /^\.in/)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
							 | 
						||
| 
								 | 
							
								      print STDERR "*** $_\n";
							 | 
						||
| 
								 | 
							
								      die "*** Processing abandoned\n";
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    # Instead of .br, relevant "literal" sections are enclosed in .nf/.fi.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    elsif (/^\.nf/)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      $innf = 1;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    elsif (/^\.fi/)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      $innf = 0;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    # Handling .sp is subtle. If it is inside a literal section, do nothing if
							 | 
						||
| 
								 | 
							
								    # the next line is a non literal text line; similarly, if not inside a
							 | 
						||
| 
								 | 
							
								    # literal section, do nothing if a literal follows, unless we are inside
							 | 
						||
| 
								 | 
							
								    # a .nf/.fi section or about to enter one. The point being that the <pre>
							 | 
						||
| 
								 | 
							
								    # and </pre> that delimit literal sections will do the spacing. Always skip
							 | 
						||
| 
								 | 
							
								    # if no previous output.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    elsif (/^\.sp/)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      if ($wrotetext)
							 | 
						||
| 
								 | 
							
								        {
							 | 
						||
| 
								 | 
							
								        $_ = <STDIN>;
							 | 
						||
| 
								 | 
							
								        if ($inpre)
							 | 
						||
| 
								 | 
							
								          {
							 | 
						||
| 
								 | 
							
								          print TEMP "\n" if (/^[\s.]/);
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								        else
							 | 
						||
| 
								 | 
							
								          {
							 | 
						||
| 
								 | 
							
								          print TEMP "<br>\n<br>\n" if ($innf || /^\.nf/ || !/^[\s.]/);
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								        redo;    # Now process the lookahead line we just read
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      &new_para();
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    elsif (/^\.SH\s*("?)(.*)\1/)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      # Ignore the NAME section
							 | 
						||
| 
								 | 
							
								      if ($2 =~ /^NAME\b/)
							 | 
						||
| 
								 | 
							
								        {
							 | 
						||
| 
								 | 
							
								        <STDIN>;
							 | 
						||
| 
								 | 
							
								        next;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      &end_para();
							 | 
						||
| 
								 | 
							
								      my($title) = &do_line($2);
							 | 
						||
| 
								 | 
							
								      if ($toc)
							 | 
						||
| 
								 | 
							
								        {
							 | 
						||
| 
								 | 
							
								        printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
							 | 
						||
| 
								 | 
							
								          $ref, $ref);
							 | 
						||
| 
								 | 
							
								        printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
							 | 
						||
| 
								 | 
							
								          $ref);
							 | 
						||
| 
								 | 
							
								        $ref++;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      else
							 | 
						||
| 
								 | 
							
								        {
							 | 
						||
| 
								 | 
							
								        print TEMP "<br><b>\n$title\n</b><br>\n";
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    elsif (/^\.SS\s*("?)(.*)\1/)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      &end_para();
							 | 
						||
| 
								 | 
							
								      my($title) = &do_line($2);
							 | 
						||
| 
								 | 
							
								      print TEMP "<br><b>\n$title\n</b><br>\n";
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    elsif (/^\.B\s*(.*)/)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      &new_para() if (!$inpara);
							 | 
						||
| 
								 | 
							
								      $_ = &do_line($1);
							 | 
						||
| 
								 | 
							
								      s/"(.*?)"/$1/g;
							 | 
						||
| 
								 | 
							
								      print TEMP "<b>$_</b>\n";
							 | 
						||
| 
								 | 
							
								      $wrotetext = 1;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    elsif (/^\.I\s*(.*)/)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      &new_para() if (!$inpara);
							 | 
						||
| 
								 | 
							
								      $_ = &do_line($1);
							 | 
						||
| 
								 | 
							
								      s/"(.*?)"/$1/g;
							 | 
						||
| 
								 | 
							
								      print TEMP "<i>$_</i>\n";
							 | 
						||
| 
								 | 
							
								      $wrotetext = 1;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    # A comment that starts "HREF" takes the next line as a name that
							 | 
						||
| 
								 | 
							
								    # is turned into a hyperlink, using the text given, which might be
							 | 
						||
| 
								 | 
							
								    # in a special font. If it ends in () or (digits) or punctuation, they
							 | 
						||
| 
								 | 
							
								    # aren't part of the link.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    elsif (/^\.\\"\s*HREF/)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      $_=<STDIN>;
							 | 
						||
| 
								 | 
							
								      chomp;
							 | 
						||
| 
								 | 
							
								      $_ = &do_line($_);
							 | 
						||
| 
								 | 
							
								      $_ =~ s/\s+$//;
							 | 
						||
| 
								 | 
							
								      $_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
							 | 
						||
| 
								 | 
							
								      print TEMP "<a href=\"$1.html\">$_</a>\n";
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    # A comment that starts "HTML" inserts literal HTML
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    elsif (/^\.\\"\s*HTML\s*(.*)/)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      print TEMP $1;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    # A comment that starts < inserts that HTML at the end of the
							 | 
						||
| 
								 | 
							
								    # *next* input line - so as not to get a newline between them.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    elsif (/^\.\\"\s*(<.*>)/)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      my($markup) = $1;
							 | 
						||
| 
								 | 
							
								      $_=<STDIN>;
							 | 
						||
| 
								 | 
							
								      chomp;
							 | 
						||
| 
								 | 
							
								      $_ = &do_line($_);
							 | 
						||
| 
								 | 
							
								      $_ =~ s/\s+$//;
							 | 
						||
| 
								 | 
							
								      print TEMP "$_$markup\n";
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    # A comment that starts JOIN joins the next two lines together, with one
							 | 
						||
| 
								 | 
							
								    # space between them. Then that line is processed. This is used in some
							 | 
						||
| 
								 | 
							
								    # displays where two lines are needed for the "man" version. JOINSH works
							 | 
						||
| 
								 | 
							
								    # the same, except that it assumes this is a shell command, so removes
							 | 
						||
| 
								 | 
							
								    # continuation backslashes.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    elsif (/^\.\\"\s*JOIN(SH)?/)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      my($one,$two);
							 | 
						||
| 
								 | 
							
								      $one = <STDIN>;
							 | 
						||
| 
								 | 
							
								      $two = <STDIN>;
							 | 
						||
| 
								 | 
							
								      $one =~ s/\s*\\e\s*$// if (defined($1));
							 | 
						||
| 
								 | 
							
								      chomp($one);
							 | 
						||
| 
								 | 
							
								      $two =~ s/^\s+//;
							 | 
						||
| 
								 | 
							
								      $_ = "$one $two";
							 | 
						||
| 
								 | 
							
								      redo;            # Process the joined lines
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    # .EX/.EE are used in the pcre2demo page to bracket the entire program,
							 | 
						||
| 
								 | 
							
								    # which is unmodified except for turning backslash into "\e".
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    elsif (/^\.EX\s*$/)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      print TEMP "<PRE>\n";
							 | 
						||
| 
								 | 
							
								      while (<STDIN>)
							 | 
						||
| 
								 | 
							
								        {
							 | 
						||
| 
								 | 
							
								        last if /^\.EE\s*$/;
							 | 
						||
| 
								 | 
							
								        s/\\e/\\/g;
							 | 
						||
| 
								 | 
							
								        s/&/&/g;
							 | 
						||
| 
								 | 
							
								        s/</</g;
							 | 
						||
| 
								 | 
							
								        s/>/>/g;
							 | 
						||
| 
								 | 
							
								        print TEMP;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    # Ignore anything not recognized
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    next;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  # Line does not begin with a dot. Replace blank lines with new paragraphs
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  if (/^\s*$/)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    &end_para() if ($wrotetext);
							 | 
						||
| 
								 | 
							
								    next;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  # Convert fonts changes and output an ordinary line. Ensure that indented
							 | 
						||
| 
								 | 
							
								  # lines are marked as literal.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  $_ = &do_line($_);
							 | 
						||
| 
								 | 
							
								  &new_para() if (!$inpara);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  if (/^\s/)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    if (!$inpre)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								      print TEMP "<pre>\n";
							 | 
						||
| 
								 | 
							
								      $inpre = 1;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  elsif ($inpre)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    print TEMP "</pre>\n";
							 | 
						||
| 
								 | 
							
								    $inpre = 0;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  # Add <br> to the end of a non-literal line if we are within .nf/.fi
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  $_ .= "<br>\n" if (!$inpre && $innf);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  print TEMP;
							 | 
						||
| 
								 | 
							
								  $wrotetext = 1;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# The TOC, if present, will have been written - terminate it
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								print "</ul>\n" if ($toc);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Copy the remainder to the standard output
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								close(TEMP);
							 | 
						||
| 
								 | 
							
								open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								print while (<TEMP>);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								print <<End ;
							 | 
						||
| 
								 | 
							
								<p>
							 | 
						||
| 
								 | 
							
								Return to the <a href="index.html">PCRE2 index page</a>.
							 | 
						||
| 
								 | 
							
								</p>
							 | 
						||
| 
								 | 
							
								End
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								close(TEMP);
							 | 
						||
| 
								 | 
							
								unlink("/tmp/$$");
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# End
							 |