wanderinghorse.net website

Artifact [1631ae7e3d]
Login

Artifact [1631ae7e3d]

Artifact 1631ae7e3de1904efcb788178c6d17a18ad507859bb273ece1582968af7f50b5:


########################################################################
# tmplish is an API for working with plain-text documents which may
# contain embedded blocks of TCL code. The primary motivation was
# originally for generating dynamic content for CGI applications.
#
# Author: Stephan Beal <https://wanderinghorse.net/home/stephan/>
#
# 2024-11-11:
#
# The author disclaims copyright to this source code.  In place of a
# legal notice, here is a blessing:
#
#  * May you do good and not evil.
#  * May you find forgiveness for yourself and forgive others.
#  * May you share freely, never taking more than you give.
########################################################################
package provide tmplish 0.0.1

namespace eval tmplish {
  ######################################################################
  # Internal detail for compile(). $list = list of all chars in the
  # input.  $chType is "?" or "%". $nStart = the index of the opening
  # $chType char.  $nLen = the length of $list (which we pass in,
  # rather than use llength, simply because we already have it handy).
  # Returns the index of the '>' character of the closer.
  #
  # Special cases:
  #
  # - If $chType is "#" then the first instance of (?>, %>, ]>, #>)
  #   will close the block. The intent is to be able to easily comment
  #   out a whole block or add documentation which does not need to be
  #   eval'd.
  #
  # - For <? or <# blocks, if it hits the end of the input without
  #   finding a closing ...>, it returns -1 but does not error out. In
  #   that case, the rest of the doc is to be considered part of the
  #   code block.
  #
  proc find-closer {list chType chCloser nStart nLen} {
    set i [expr {$nStart + 1}]
    set chPrev [lindex $list $i]
    for {} {$i < $nLen} {incr i} {
      set ch [lindex $list $i]
      if {$ch eq ">" &&
          (($chCloser eq $chPrev)
           || ("#" eq $chType && $chPrev in {? % \# \]}))} {
        break;
      }
      set chPrev $ch
    }
    if {$i >= $nLen} {
      if {$chType in {? #}} {
        set i -1
      } else {
        #return -code error "Did not find matching ${chType}> starting at index $nStart."
        error "Did not find matching ${chCloser}> in tmpl starting at index $nStart."
      }
    }
    return $i
  }

  ######################################################################
  # Usage: compile ?-file? body outputCall
  #
  # If -file is provided, treat $body as a filename and read content
  # from there, otherwise treat it as a tmplish template.
  #
  # Takes a "tmplish"-format template and compiles it to TCL code,
  # returning that code. Processing the template requires passing it
  # to [uplevel 0] or [eval].
  #
  # $outputCall must either be the name of a function or the string
  # {apply aLambda} which refers to a lambda which takes a single
  # argument and is responsible for rendering the processed template,
  # piece by piece, by emitting that argument however is appropriate
  # for the app (e.g. appending it to a list or sending it to puts
  # -nonewline). The callback is called one time for each chunk of
  # input and is responsible for rendering it faithfully (e.g. adding
  # no extra whitespace). It need not render it, per se - it can
  # append it to a buffer or some such.
  #
  # $body is a "plain text document" with optional markup:
  #
  # - <% ... %> gets replaced by a call to the callback, passing it
  #   [expr {...}]. i.e. the result of the expression becomes part of
  #   the output.
  #
  # - <[...]> is effecitively shorthand for <%[...]%>, but also
  #   results in simpler "compiled" output than that form does.
  #
  # - <? ... ?> gets replaced by a call to uplevel 0 {...}. If the
  #   block wants to produce any output, it must generate it itself,
  #   calling the same callback which is passed to this function.  If
  #   an opening <? is found but no ?> then the remainder of the
  #   document is considered to be part of the block. This is,
  #   incidentally, how PHP's code block tags work and we do it for
  #   the same reason: it sidesteps uncomfortable questions about how
  #   to handle a newline after a closing ?> at the end of a document,
  #   noting that a single extraneous whitespace may change the
  #   semantics of the output. e.g. in the case of PHP or this code's
  #   predecessor, emitting out-of-band whitespace can lead to output
  #   being emited before HTTP headers, effectively corrupting an HTTP
  #   response.
  #
  # - <# ...> is "commented out" and elided entirely from the
  #   template.  The comment runs until the first instance of (%>, ?>,
  #   ]>, #>) or until the end of the document (as for unclosed <?...?>
  #   blocks).
  #
  # The text surrounding such markup is passed on _almost_ untouched:
  # any characters which must be escaped to pass through one level of
  # interpolation are backslash-escaped: '$', '[', and '\'. When the
  # template is evaluated, those extra backslashes will disappear.
  #
  # Any initial whitespace content in $body is ignored for the simple
  # reason that practice with previous implementations of this routine
  # (in other languages) suggests that it's useful to do so. If the
  # first non-whitespace content is one or more <? ?> blocks, they are
  # treated as whitespace for this purpose because historically such
  # blocks have frequently been used to set up various state for the
  # doc being processed, without producing any output.  Conversely,
  # <%...%> and <[...]> blocks, because they inherently produce
  # output, are _not_ counted as whitespace.
  #
  # There is currently no way to escape or configure the above markup
  # delimiters, so the content must not contain them except as markup
  # for this function.
  #
  # For example:
  #
  #   set body {...the template...}
  #   set output {{arg} {puts -nonewline $arg}}
  #   set compiled [tmplish compile $body {apply $output}]
  ##   or:
  ##   proc myout {arg} {puts -nonewline $arg}
  ##   set compiled [tmplish compile $body myout]
  #   uplevel 0 $compiled
  #
  # Sidebars:
  #
  # - This could be done _much_ more efficiently in C but this impl is
  #   intended to run in both canonical TCL and JimTCL.
  #
  # - If TCL had heredocs, this would be simpler (no escaping of
  #   arbitrarily large strings).
  #
  # Potential TODOs:
  #
  # - Add an optional -config {...} flag which sets the opening and
  #   closing tags, e.g. {exprOpen => "<(" exprClose => ")>"}, and
  #   optionally set the callback in the config object. Or just take
  #   all args in a single config object, JavaScript-style.
  #
  proc compile {args} {
    set usage "Usage: ?-file? body outputCall"
    set nArgs [llength $args]
    if {"-file" eq [lindex $args 0]} {
      if {3 != $nArgs} { error $usage }
      set fp [open [lindex $args 1] r]
      set body [read $fp]
      close $fp
      set outputCall [lindex $args 2]
    } elseif {2 == $nArgs} {
      lassign $args body outputCall
    } else {
      error $usage
    }
    set chars [split $body ""]
    set nLen [llength $chars]
    set seenContent 0; # gets set to 1 when first non-whitespace is seen.
    set obuf [list]; # output result buffer
    set outBegin "$outputCall \""; # start of each chunk of output
    append obuf $outBegin
    set qIsOpen 1; # 1 when the output call quoted string is opened, waiting closing
    set gotOpen 0; # 1 when we have seen a markup opener
    array set chClosers {? ? % % [ ] \# \#}; # map of opener to closer
    for {set i 0} {$i < $nLen} {incr i} {
      set ch [lindex $chars $i]
      #puts stderr "ch=$ch"
      if {!$seenContent && $ch in {" " "\n" "\t"}} {
        continue
      } elseif {$gotOpen && $ch in {% ? # [}} {
        set gotOpen 0
        if {$qIsOpen} {
          append obuf "\"\n";
          set qIsOpen 0
        }
        set iEnd [find-closer $chars $ch $chClosers($ch) $i $nLen]
        if {$iEnd < 0} {
          # Can only happen for <? blocks which have no closer: slurp
          # until the end of input.
          set iEnd [expr {$nLen + 1}]; # will be accounted for below
        }
        incr i
        #puts stderr "i=$i iEnd=$iEnd str=[join [lrange $chars $i [expr {$iEnd - 2}]]]"
        if {"%" eq $ch} {
          set seenContent 1
          set outStart "$outputCall \[expr \{"
          set outEnd "\}\]\n"
        } elseif {"?" eq $ch} {
          set outStart "uplevel 0 \{"
          set outEnd "\}\n"
        } elseif {"\[" eq $ch} {
          set seenContent 1
          set outStart "$outputCall \["
          set outEnd "]\n"
        } else {
          # it's a "#", which we simply skip over
          set outStart ""
        }
        if {"" ne $outStart} {
          append obuf $outStart \
            [join [lrange $chars $i [expr {$iEnd - 2}]] ""] \
            $outEnd
        }
        set i $iEnd
        #puts "CLOSER=[lindex $chars $i]"
        continue
      } elseif {!$qIsOpen} {
        append obuf $outBegin
        set qIsOpen 1
      }
      switch -exact -- $ch {
        "\"" - "\[" - "\$" - "\\" {
          if {$gotOpen} { append obuf "<" }
          append obuf "\\" $ch
          # Not strictly needed but may arguably improve readability...
          #"\n" { append obuf "\\n" }
          #"\t" { append obuf "\\t" }
          #"\v" { append obuf "\\v" }
          #"\f" { append obuf "\\f" }
        }
        "<" {
          if {$gotOpen} {
            append obuf "<<"
          } else {
            set gotOpen 1
            continue
          }
        }
        default {
          if {$gotOpen} { append obuf "<" }
          append obuf $ch
        }
      }
      set seenContent 1
      set gotOpen 0
    }
    if {$qIsOpen} {
      append obuf "\"\n"
    }
    #puts stderr "obuf=$obuf"
    #return [join $obuf ""]
    return $obuf
  }

  ######################################################################
  # Usage: ?-file? body outputCall ?upLevel=1?
  #
  # Passes (?-file? $body $outputCall) to [compile] then calls [uplevel
  # $upLevel ...]  on the result of that call.
  proc run {args} {
    set usage "?-file? body outputCall ?upLevel=1?"
    set ulvl 1
    set nArgs [llength $args]
    if {"-file" eq [lindex $args 0]} {
      if {$nArgs == 4} {
        set ulvl [lindex $args 3]
        set args [lrange $args 0 2]
      } elseif {$nArgs != 3} {
        error $usage
      }
    } elseif {$nArgs == 3} {
      set ulvl [lindex $args 2]
      set args [lrange $args 0 1]
    } elseif {$nArgs != 2} {
      error $usage
    }
    uplevel $ulvl [compile {*}$args]
  }

  namespace export compile run
  namespace ensemble create
};  # namespace tmplish

if {"--tmplish-test" in $argv} {
  apply {{} {
    set outLambda {{arg} {puts -nonewline $arg}}
    proc outFunc {arg} {puts -nonewline $arg}
    proc noop {args} {}
    set body {
      <? noop "ignored" ?>
      <? noop "also also ignored" ?>
      clock=<[clock seconds]>
      x=<% $x %>!
      x+5=<%$x + 5%>!!
      This {is} [a] \x $tmpl. x+3=<%$x + 3%>!
      And this is a code block: <?
      outFunc "this is from a code block"?>
    }
    set t [tmplish compile $body outFunc]
    #set t [tmplish compile $body {apply $outLambda}]
    set x 11
    puts "<COMPILED>$t</COMPILED>"
    puts "<EVALED>"; uplevel 0 $t; puts "</EVALED>"

    puts "\n****************************************"
    puts "And once again via a file:\n"
    # Note that [file tempfile] works very differently
    # in JimTCL and canonical TCL.
    set tmpfile [info script].tmp
    set fp [open $tmpfile w]
    puts -nonewline $fp $body
    close $fp
    if {[catch {
      tmplish run -file $tmpfile {apply $outLambda}
    } msg]} {
      file delete $tmpfile
      error $msg
    }
    puts ""
    file delete $tmpfile
  }}
}