; An example of an interesting (S)XML transformation
; The example has been suggested by Dr. David Durand, a member of
; Brown University's Scholarly Technology Group (STG).
; Originally, the example intended to move punctuation inside a tag,
; that is, convert
; Click here! ==> Click here!
; Even in this simple formulation, XSLT solution is painful. We should
; note a few desired complications. For one thing, we definitely don't
; want to pull in the punctuation in the following context:
;some scheme comment
; Furthermore, we may need to avoid moving punctuation into the content
; of certain elements, for example:
For more details, see the paper Krishnamurthi2001.
; The content of the 'cite' element is typically a bibliographic key. It
; is not appropriate to add any punctuation to it.
; Lastly, we do want to pull in the punctuation recursively, as in
This is awesome!
; We would like to see the explanation mark inside the innermost
; appropriate element ( in our case).
; The desired transformation is done by the pre-post-order iterator
; with the help of two stylesheets. One of them specifies which
; elements can accept punctuations and which should not. The second
; stylesheet is generic. Both stylesheets instruct pre-post-order to do
; *preorder*, that is, call-by-name evaluations. The reason is that
; the transformation is a mix of breadth-first and depth-first
; The following is a Bigloo-specific module declaration.
; Other Scheme systems have something similar.
; (module pull-punct-sxml
; (include "myenv-bigloo.scm")
; (include "srfi-13-local.scm") ; or import from SRFI-13 if available
; (include "char-encoding.scm")
; (include "util.scm")
; (include "look-for-str.scm")
; (include "input-parse.scm")
; (include "SSAX-code.scm")
; (include "SXML-tree-trans.scm")
; (include "SXML-to-HTML.scm")
; $Id: pull-punct-sxml.scm,v 1.7 2004/08/06 23:03:20 oleg Exp $
; A non-linear pattern matcher
(define (pattern-var? x)
(and (symbol? x) (char=? #\_ (string-ref (symbol->string x) 0))))
; A dumb match of two ordered trees, one of which may contain variables
; Match two trees. tree1 may contain variables
; (as decided by the pattern-var? predicate above)
; variables match the corresponding branch in tree2.
; Env (bindings) contains associations of variables to values.
; tree1 may contain several occurrences of the same variable.
; All these occurrences must match the same value.
; A variable match is entered into the binding. A variable _ is an
; exception: its match is never entered into the binding.
; The function returns the resulting binding or #f if the match fails.
(define (match-tree tree1 tree2 env)
((pair? tree1) ; Recursively match pairs
(env-new (match-tree (car tree1) (car tree2) env)))
(match-tree (cdr tree1) (cdr tree2) env-new)))
(and (null? tree2) env))
((eq? '_ tree1) env) ; _ matches everything
((assq tree1 env) =>
(lambda (prev-binding) ; variable occurred before
(and (equal? (cdr prev-binding) tree2) env)))
(cons (cons tree1 tree2) env)) ; new variable, enter fresh binding
(and (equal? tree1 tree2) env))))
(lambda (tree1 tree2 expected)
(assert (equal? expected (match-tree tree1 tree2 '()))))))
(test '_x '(seq 1 (seq-empty)) '((_x seq 1 (seq-empty))))
(test '(seq-empty) '(seq 1 (seq-empty)) #f)
(test '(seq 1 (seq-empty)) '(seq 1 (seq-empty)) '())
(test '(seq _x (seq-empty)) '(seq 1 (seq-empty)) '((_x . 1)))
(test '(seq _x _y) '(seq 1 (seq-empty)) '((_y seq-empty) (_x . 1)))
(test '(seq _x _y _z) '(seq 1 (seq-empty)) #f)
(test '(seq _x (seq _y _z)) '(seq 1 (seq 2 (seq-empty)))
'((_z seq-empty) (_y . 2) (_x . 1)))
(test '(seq _x (seq _x _z)) '(seq 1 (seq 2 (seq-empty))) #f)
(test '(seq _x (seq _x _z)) '(seq 1 (seq 1 (seq-empty)))
'((_z seq-empty) (_x . 1)))
; end of the non-linear pattern matcher
; A sample document
; Punctuation is defined as a member of the following charset
(define punctuation '(#\. #\, #\? #\! #\; #\:))
; The first step of the transformation is parsing the source
; document into an abstract syntax tree (SXML) form. Using a SSAX XML
; parser [SSAX], this step can be accomplished as follows:
(lambda (port) (ssax:xml->sxml port '()))))
">>>The following is the sample document to transform, in its SXML form" nl)
; The identity function for use in a SXSLT stylesheet
(define sxslt-id (lambda x x))
; The algorithm consists of two distinct phases, which are pushed from the
; root to the leaves. The first phase is a classification, performed
; by a stylesheet classify-ss below. Given an SXML node, the
; stylesheet transforms it into the following node:
; If the source SXML node is a string:
; if the string starts with a 'punctuation', remove the punctuation and
; return a node:
; (*move-me* punctuation-char-string orig-string-with-punctuation-removed)
; Otherwise, return the string unchanged.
; If the source node is an attribute list, a 'cite' node, or a
; node with an empty content, we return the node as it is.
; Any other source SXML node is transformed into
; (*can-accept* original-node)
; That is, nodes that can accept punctuation or can yield punctuation to
; move are wrapped into identifying SXML nodes. Nodes that can't accept
; the punctuation are left unwrapped.
; Note, that any XML name can be a valid Scheme identifier. However, not
; every Scheme identifier can be a name of an XML element. This property
; lets us introduce administrative names such as *move-me* and
; *can-accept* without the fear of name clashes.
. ,(lambda (trigger str)
((equal? str "") str)
((memv (string-ref str 0) punctuation)
(string (string-ref str 0))
(substring str 1 (string-length str))))
; the following nodes never accept the punctuation
(@ *preorder* . ,sxslt-id)
(cite *preorder* . ,sxslt-id)
. ,(lambda (tag . elems)
((null? elems) (cons tag elems)) ; no children, won't accept
((match-tree '((@ . _)) elems '()) ; no children but attributes
(cons tag elems)) ; ... won't accept
(cons tag elems)))))
; The following macro does sort of a 'destructuring-bind'
((match-bind (var ...) body ...)
(let ((var (cdr
(assq (string->symbol (string-append "_" (symbol->string 'var))) env)))
; The transformation phase looks at the classified children of the node.
; When we see a (*can-accept* original-node) immediately followed by a
; (*move-me* punctuation-char-string string) node, we move the
; punctuation inside the original-node. We then apply the algorithm
; recursively to the children.
; This phase of the algorithm is generic.
(*text* . ,(lambda (trigger str) str))
(@ *preorder* . ,sxslt-id) ; Don't mess with attributes, leave them
(*move-me* ; remove the wrapper
. ,(lambda (trigger str1 str2)
(string-append str1 str2)))
(*can-accept* ; remove the wrapper and transform
*preorder* ; the node (recursively)
. ,(lambda (trigger node)
(pre-post-order node transform-ss)))
. ,(lambda (tag . elems)
(cout nl "on entry: " (cons tag elems) nl)
(pre-post-order elems classify-ss)))
;(define (handle-one elem)
; (cout "handle-one:" elem nl)
; (pre-post-order elem transform-ss))
(cout nl "classified" nl)
(let loop ((classified classified))
((null? classified) classified)
;((null? (cdr classified))
; (map handle-one classified))
((match-tree '((*can-accept* _node)
(*move-me* _punctuation _str)
(match-bind (node punctuation str rest)
(cout nl "pull in: " (list node punctuation str rest) nl)
(cons (append node (list punctuation))
(cons str (loop rest)))))
(cons (car classified)
(loop (cdr classified))))
(cout nl ">>>The transformed SXML tree is as follows" nl)
(cout nl ">>>Here is the result of pretty printing of the transformed SXML
tree into HTML" nl)
some text awesome!
This is a citation
. Move in