Extracting documentation from C code.

Mon May 20 03:02:48 AEST 1991

I originally had replied only to the original poster but there seems to be
enough interest that I thought Id reply here. I have written a ksh script
that uses nawk to implement just such a beast (Im working on a replacement
written in perl). You can embed your documentation in ANY type of
source file you wish (C, C++, Pascal, etc).

Here is the script and a sample C source file for those of you that 
are interested. Let me know of any comments you might have.

______________________ "And miles to go before I sleep." ______________________
 Brad Appleton           brad at ssd.csd.harris.com       Harris Computer Systems
                             uunet!hcx1!brad           Fort Lauderdale, FL USA
~~~~~~~~~~~~~~~~~~~~ Disclaimer: I said it, not my company! ~~~~~~~~~~~~~~~~~~~

#! /bin/sh
# This is a shell archive.  Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file".  To overwrite existing
# files, type "sh file -c".  You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g..  If this archive is complete, you
# will see the following message at the end:
#		"End of shell archive."
# Contents:  xdoc.ksh strsplit.c
# Wrapped by brad at hcx1 on Thu May 16 14:23:24 1991
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'xdoc.ksh' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'xdoc.ksh'\"
else
echo shar: Extracting \"'xdoc.ksh'\" \(6282 characters\)
sed "s/^X//" >'xdoc.ksh' <<'END_OF_FILE'
X#!/bin/ksh
X#
X# xdoc - eXtract DOCumentation from structured C-Comments
X#
X# Created  by Brad Appleton
X
XNAME="`basename $0`"
X
XSYNOPSIS="\
X$NAME  [-n] [-f function] [-i string] [-p pattern] [-mname] [-s section]
X             [-t type=name] [-I subsection(s)] [-X subsection(s)]
X             [file ...]\
X"
X
XDESCRIPTION="\
X$NAME will extract the documentation associated with the named section from
Xthe given files. If no section is given, then FILE is assumed.\
X"
X
XOPTIONS="\
X-n              Dont print section title
X-f function     Extract text for the given function
X-i string       Indent text using the given string (default=3-spaces)
X-p pattern      Specify the pattern to trim of the beginning of each line
X                (default=\"[ \\t]\")
X-m name         Use the {t,n}roff -mname macros to format the ouput
X-s section      Extract text for the named section
X-t type=name    Extract text for the named type
X-I subsections  Specify which subsection(s) of the named section are to be
X                included in the output. If multiple subsections are desired
X                then the list must be placed in a single command-line argument.
X                By default, all subsections are printed.
X-X subsections  Specify which subsection(s) of the named section are to be
X                excluded from the output. If multiple subsections are desired
X                then the list must be placed in a single command-line argument.
X                By default, no subsections are excluded.
X"
X
Xset +o nounset
X
Xfunction print_usage {
X  print -u2 "\nUsage: ${SYNOPSIS}"
X  if [ "$OPTIONS" ] ; then
X    print -u2 "\n${OPTIONS}"
X  fi
X  if [ "$DESCRIPTION" ] ; then
X    print -u2 "\n${DESCRIPTION}"
X  fi
X  print
X  exit ${1-:2}
X}
X
Xalias warn_user="print -u2 '** '"
X
Xfunction error_msg {
X  print -u2 "${NAME}: $*"
X}
X
Xfunction fatal_msg {
X  print -u2 "${NAME}: $*"
X  exit 2
X}
X
XSTARTPAT='^[ 	]'
XKWD='FILE'; isFUNC=0; isSECTION=0; isTYPE=0; NOHEADINGS=0;
XINCLUDES='.*'; EXCLUDES='';
X
X## trim all leading and trailing whitespace, and compress whitespace
Xfunction tidylist {
X   print "$*" | sed -e 's/^[ 	]//' -e 's/[ 	]*$//' -e 's/[ 	][ 	]*/ /g'
X}
X
X## parse options
Xwhile getopts ':f:m:s:t:i:p:I:X:' OPT
X  do case "$OPT" in
X    n) NOTITLE='TRUE';;
X	f) KWD='FUNCTION'; IDENT="$OPTARG" ; isFUNC=1;;
X    s) KWD='SECTION'; IDENT="$OPTARG"; isSECTION=1 ;;
X    t) KWD="$(print ${OPTARG%=*} | tr '[a-z]' '[A-Z]')"; IDENT="${OPTARG#*=}";
X       isTYPE=1 ;;
X    i) INDENT="${OPTARG}" ;;
X    p) STARTPAT="^${OPTARG#\^}" ;;
X    m) MACROS="-m${OPTARG}" ;;
X    I) INCLUDES="${OPTARG}" ;;
X    X) EXCLUDES="${OPTARG}" ;;
X	:) error_msg "$OPTARG requires a value"; print_usage ;;
X	\?) error_msg "unknown option $OPTARG"; print_usage ;;
X  esac
Xdone
Xshift OPTIND-1
X
Xif [ $# -eq 0  -a  -t 1 ] ; then
X  print_usage;
X  exit 2;
Xfi
X
Xtest $isFUNC -ne 0  -a  $isSECTION -ne 0  &&  badopts='TRUE'
Xtest $isFUNC -ne 0  -a  $isTYPE -ne 0  &&  badopts='TRUE'
Xtest $isTYPE -ne 0  -a  $isSECTION -ne 0  &&  badopts='TRUE'
Xif [ "$badopts" ] ; then
X  fatal_msg "only one of -f, -s, and -t may be used"
Xfi
X
XPATTERN="\^${KWD}:${IDENT:+[ 	]*${IDENT}[^A-Z0-9]*}"
X
X## trim and compress all spaces and tabs in subsection lists
XINCLUDES="$( tidylist $INCLUDES )"
XEXCLUDES="$( tidylist $EXCLUDES )"
X
Xif [ "$NOTITLE" ] ; then
X  ENTITLED='1'
Xelse
X  ENTITLED='0'
Xfi
X
Xawk '
X  BEGIN {
X    processing=0; ignore=0; entitled='$ENTITLED'; indent="'"${INDENT:-   }"'";
X    keyword="'"$KWD"'"; section='$isSECTION'; macros="'"${MACROS:-}"'";
X    startpat="'"$STARTPAT"'";
X    nincl = split( "'"$INCLUDES"'", includes, " " );
X    nexcl = split( "'"$EXCLUDES"'", excludes, " " );
X  }
X
X  function is_needed(subsection) {
X    ## first see if it is excluded
X    for ( i = 1 ; i <= nexcl ; i++ )
X      if ( match(subsection, excludes[i]) )  return  0;  ## not-needed
X    
X    ## now see if it is included
X    for ( i = 1 ; i <= nincl ; i++ )
X      if ( match(subsection, includes[i]) )  return  1;  ## needed
X
X    return  0;  ## not-needed
X  }
X
X  function uncomment(filename, textline) {
X    text = textline;
X    if ( match(filename, "^.*\.[CHchly]$") ) {  ## C and C++ comments
X      gsub( /\/\//, "", text );
X      gsub( /\/\*/, "", text );
X      gsub( /\*\//, "", text );
X      sub( /^[ \t]*\*\**/, "", text );
X    }
X    else {  ## assume sh, csh, or ksh comments
X      sub( /^[ \t]*##*/, "", text );
X    }
X
X    return  text;
X  }
X
X  function print_heading( level, heading, name, purpose ) {
X    if ( macros == "-man" )
X      printf( ".SH \"%s\"\n", heading );
X    else if ( macros == "-me" )
X      printf( ".sh %d \"%s\"\n", level, heading );
X    else if ( macros == "-mm" )
X      printf( ".H %d \"%s\"\n", level, heading );
X    else
X      printf( "%s:\n", heading );
X
X    if ( name != "" ) {
X      if ( macros != "" )
X        printf( "%s \\- %s\n", name, purpose );
X      else
X        printf( "%s%s -- %s\n", indent, name, purpose );
X    }
X  }
X
X  function start_paragraph( str ) {
X    if ( macros == "-man" )
X      printf( ".PP\n" );
X    else if ( macros == "-me" )
X      printf( ".pp\n" );
X    else if ( macros == "-mm" )
X      printf( ".P\n" );
X    else
X      printf( "%s", str );
X  }
X
X  /\^\^/ {
X    processing=0; ignore=0; next;
X  }
X
X  /'"$PATTERN"'/ {
X    ++processing; purpose=$0; name="'"$IDENT"'";
X    if ( name == "" )  name=FILENAME;
X    re = sprintf( "^.*:[ \t]*%s[-:=# \t]*", name );
X    if ( re != "" )  sub( re, "", purpose );
X    if ( section ) {
X      if ( !entitled ) {
X        ++entitled;
X        print_heading( 1, name, "", "" );
X        start_paragraph( "" );
X      }
X      else
X        start_paragraph( "\n" );
X    }
X    else { 
X      if ( !entitled ) {
X        ++entitled;
X        print_heading( 1, keyword, name, purpose );
X      }
X    }
X    next;
X  }
X
X  /\^[A-Z][-_A-Z0-9]*:/ {
X    if ( !processing )  next;
X    title=$0;
X    sub( "^.*\\^", "", title );
X    sub( ":.*$", "", title );
X    gsub( "[-_]", " ", title );
X    if ( is_needed(title) ) {
X      ignore=0;
X      print_heading( 2, title, "", "" );
X      start_paragraph( "" );
X    }
X    else {
X      ++ignore;
X    }
X    next;
X  }
X
X  {
X    if ( !processing )  next;
X    if ( ignore )  next;
X    line = uncomment(FILENAME, $0);
X    if ( startpat != "" )  sub( startpat, "", line );
X    if ( macros == "" ) {
X       printf( "%s", indent );
X    }
X    print line;
X  }
X' "$@"
X
END_OF_FILE
if test 6282 -ne `wc -c <'xdoc.ksh'`; then
    echo shar: \"'xdoc.ksh'\" unpacked with wrong size!
fi
chmod +x 'xdoc.ksh'
# end of 'xdoc.ksh'
fi
if test -f 'strsplit.c' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'strsplit.c'\"
else
echo shar: Extracting \"'strsplit.c'\" \(6901 characters\)
sed "s/^X//" >'strsplit.c' <<'END_OF_FILE'
X/**************************************************************************
X** ^FILE: strsplit.c - split and join strings
X**
X** ^DESCRIPTION:
X**    This file implemets the following functions:
X**
X**       strsplit() -- split a string up into a vector of tokens
X**       strjoin() -- join a vector of tokens into a single string
X**
X** ^HISTORY:
X**    01/02/91 	Brad Appleton 	<brad at ssd.csd.harris.com> 	Created
X***^^**********************************************************************/
X
X#include <stdio.h>
X#include <ctype.h>
X
X#define  CHARNULL  (vhar *)NULL
Xstatic char WhiteSpace[] = " \t\n\r\v\f";
X
X
X/***************************************************************************
X** ^FUNCTION: strsplit - split a string into tokens
X**
X** ^SYNOPSIS:
X*/
X#ifndef __ANSI_C__
X   int  strsplit( vec, token_str, separators )
X/*
X** ^PARAMETERS:
X*/
X   char **vec[];
X/*    -- pointer to the string vector to be allocated
X*/
X   char token_str[];
X/*    -- the string to be split up
X*/
X   char separators[];
X/*    -- the delimiters that separate tokens
X*/
X#endif  /* !__ANSI_C__ */
X
X/* ^DESCRIPTION:
X**    Strsplit will split token_str up into  a vector of tokens that are
X**    separated by one or more characters from <separators>. The number
X**    of tokens found is returned and storage is allocated for the given
X**    vector (which may later be deallocated using free()).
X**
X**    If <separators> is NULL or empty, then the set of whitespace characters
X**    is used as the token delimiters.
X**
X** ^REQUIREMENTS:
X**    vec must be non-NULL (it must be a valid address).
X**    token_str should be non-null and non-empty
X**
X** ^SIDE-EFECTS:
X**    All leading and trailing characters from <separators> are removed
X**    from token_str. Furthermore, all remaining sequences in token_str
X**    of characters from <separators> are replaced with a single NUL-byte.
X**
X**    Token_str holds the actual storage for all the strings in the newly
X**    created vector.
X**
X** ^RETURN-VALUE:
X**    The number of tokens parsed.
X**
X** ^ALGORITHM:
X**    - count the number of tokens present while at the same time removing
X**      all leading and trailing delimiters, and replacing all other sequences
X**      of delimiters with the NUL character.
X**    - allocate a vector large enough to point to all the token strings.
X**    - for i in 0 .. (numtokens - 1) do
X**         - vector[i] = token_str
X**         - advance token_str to point at the next character past the
X**           rightmost NUL-byte (which should be the start of the next token).
X**      end-for
X**    - return the number of tokens parsed.
X***^^**********************************************************************/
X#ifdef __ANSI_C__
X   int strsplit( char **vec[], char token_str[], const char separators[] )
X#endif
X{
X   register   char c, *pread, *pwrite;
X   int   i, count = 0;
X
X   if ( !token_str )    return   0;
X      /* if delim-string is NULL, whitespace is used */
X   if ( !separators )   separators = WhiteSpace;
X
X      /* trim leading separators */
X   pread = token_str;
X   while ( strchr(separators, *pread) )   ++pread;
X   token_str = pwrite = pread;
X
X      /*
X      ** make first pass through string, counting # of tokens and
X      ** separating all tokens by a single '\0'
X      */
X   while ( c = *pread++ ) {
X      if ( !strchr(separators, c) )   {
X         *pwrite++ = c;
X      }
X      else {
X         *pwrite++ = '\0';   /* null terminate this token */
X         ++count;                /* update token count */
X         while ( strchr(separators, *pread) )   ++pread;
X      }
X   }/*while*/
X   if ( *(pwrite - 1) )  {
X      ++count;         /* dont forget last token */
X      *pwrite = '\0';   /* null-terminate */
X   }
X
X      /* allocate space for the caller's vector (remember NULL at the end) */
X   (*vec) = (char **)malloc( (1 + count) * sizeof( char * ) );
X   if ( !*vec ) {
X      fprintf( stderr, "out of memory in strsplit() - aborting\n" );
X      exit( -1 );
X   }
X
X      /* now go thru token-string again assigning pointers from vector */
X   pread = token_str;
X   for ( i = 0 ; i < count ; i++ ) {
X      (*vec)[i] = pread;   /* assign pointer */
X      pread += strlen( pread ) + 1;
X   }/* end-for */
X
X      /* set up the trailing pointer to NULL at the end */
X   (*vec)[ count ] = CHARNULL;
X   return   count;
X}
X
X
X/***************************************************************************
X** ^FUNCTION: strjoin - join a vector of tokens together
X**
X** ^SYNOPSIS:
X*/
X#ifndef __ANSI_C__
X   char  *strjoin( argv, separator )
X/*
X** ^PARAMETERS:
X*/
X   char *argv[];
X/*    -- pointer to the string vector to join together
X*/
X   char separator[];
X/*    -- the the string to use to separate tokens (if NULL, " " is used)
X*/
X#endif  /* !__ANSI_C__ */
X
X/* ^DESCRIPTION:
X**    Strjoin will make a single string out of the given vector by copying
X**    all the tokens from the given vector (in order) to a newly allocated
X**    string. Tokens will be separated by a single occurence of <separator>.
X**
X**    If <separator> is NULL then a single space is used as the separator.
X**    If <separator> is empty, then no separator is used and the tokens are
X**    simply concatenated together.
X**
X** ^REQUIREMENTS:
X**    argv must be non-NULL (it must be a valid address), and must be
X**    terminated by a pointer to NULL (argv[last+1] == NULL).
X**
X** ^SIDE-EFECTS:
X**    Storage is allocated.
X**
X** ^RETURN-VALUE:
X**    The address of the newly-joined result (which should be deallocated
X**    using free()). Returns NULL if nothing was joined.
X**
X** ^ALGORITHM:
X**    - count the number of characters to place in the joined-result.
X**    - allocate a string large-enough to copy the joined-result into.
X**    - copy each string into the string (with <separator> between tokens).
X**    - 0 return the result.
X***^^**********************************************************************/
X#ifdef __ANSI_C__
X   char *strjoin( const char *argv[], const char separator[] )
X#endif
X{
X   size_t  sz = 0;
X   register char *p;
X   register CONST char *a, **av;
X   register int  seplen;
X   char *result;
X
X      /* if argv is NULL, nothing to do */
X   if ( !argv )  return  CHARNULL;
X   if ( !separator )  separator = " ";
X   seplen = strlen( separator );
X
X      /* figure out how much space we need */
X   for ( av = argv ; *av ; av++ ) {
X      if ( !**av )  continue;
X      sz += strlen( *av );
X      if ( seplen  &&  *(av + 1) )  sz += seplen;
X   }
X
X      /* allocate space */
X   result = (char *)malloc( (sz + 1) * sizeof(char) );
X   if ( !result )  syserr( "malloc failed in strjoin()" );
X
X      /* join the strings together */
X   *result = '\0';
X   for ( av = argv, p = result ; (a = *av) ; av++ ) {
X      if ( !*a )  continue;
X      while ( (*p = *a++) ) ++p;  /* copy token */
X      if ( seplen  &&  *(av + 1) ) {
X         a = separator;
X         while ( (*p = *a++) ) ++p;  /* copy separator */
X      }/*end-if*/
X   }/*end-for*/
X
X   return  result;
X}
END_OF_FILE
if test 6901 -ne `wc -c <'strsplit.c'`; then
    echo shar: \"'strsplit.c'\" unpacked with wrong size!
fi
# end of 'strsplit.c'
fi
echo shar: End of shell archive.
exit 0
______________________ "And miles to go before I sleep." ______________________
 Brad Appleton           brad at ssd.csd.harris.com       Harris Computer Systems
                             uunet!hcx1!brad           Fort Lauderdale, FL USA
~~~~~~~~~~~~~~~~~~~~ Disclaimer: I said it, not my company! ~~~~~~~~~~~~~~~~~~~