Paste number 9991: FAQ conversion program - v4

Paste number 9991: FAQ conversion program - v4
Pasted by: mmmdonuts
When:12 years, 2 months ago
Share:Tweet this! | http://paste.lisp.org/+7PJ
Channel:#scheme
Paste contents:
Raw Source | XML | Display As
; imports for PLT - all portable libraries
(require (lib "ssax.ss" "ssax")
         (lib "sxml-match.ss" "sxml-match")
         (lib "1.ss" "srfi")
         (lib "13.ss" "srfi"))

(define base-path "~/dev/schemefaq/")
(define site-url "http://community.schemewiki.org/")

(define source-files
  '(general.xml implementation.xml language.xml macros.xml misc.xml programming.xml standards.xml))

(define other-files
  '(build.xml faq.xml))

(define xref-targets
  '((general     lisp uses emacs)
    (language    dottedapp dyntop)
    (macros      hygienic portablemacro macroellipses nestedmacro multidefine macrodata)
    (misc        sexp sicp)
    (programming cont)
    (standards   standards R6RS reference srfi implementations libraries ffi java specialimpl debuggers)))

; super-inefficient due to xref-targets list format; doesn't matter
(define (xref-id->url id)
  (call/cc 
   (lambda (return)
     (for-each
      (lambda (targets)
        (if (memq id (cdr targets))
            (return (format "?scheme-faq-~a#~a" (car targets) id))))
      xref-targets))))

(define (convert-file filename)
  (printf "Processing ~a~n" filename)
  (apply
   string-append
   (sxml->faq
    (cadr
     (call-with-input-file (string-append base-path (symbol->string filename))
       (lambda (port) (ssax:xml->sxml port '())))))))

; compress-spaces : replaces sequences of multiple whitespace characters with a single space or newline
; easier to do this with a native regex procedure, but this is portable
(define (compress-spaces s)
  (list->string
   (reverse
    (string-fold
     (lambda (c t)
       (let ((c-prev (if (null? t) #\Z (car t)))) 
         (if (char-whitespace? c)
             (if (char-whitespace? c-prev)
                 (if (and (char=? c #\newline)
                          (not (char=? c-prev #\newline)))
                     (cons c (cdr t))
                     t)
                 (cons c t))
             (cons c t))))
     '()
     s))))

; compress-whitespace : string ->string 
; Replaces sequences of multiple whitespace characters with a single space,
; and eliminates any leading or trailing whitespace.
(define (compress-whitespace s)
  (list->string
   (reverse
    (string-fold
     (lambda (c t) 
       (if (char-whitespace? c)
           (if (or (null? t)
                   (char-whitespace? (car t)))
               t
               (cons #\space t))
           (cons c t)))
     '()
     s))))

; convert-markup : match markup found in para bodies and convert to wiliki format
; a para body is a list of strings and sxml.
(define (convert-markup body)  
  (apply 
   string-append
   (map
    (lambda (frag)
      (sxml-match frag
        ; match cases ordered alphabetically
        [(application ,text)
         (format "{{{~a}}} " (compress-whitespace text))]
        [(b ,text)
         (format "'''~a''' " text)]
        [(blockquote (para ,text))
         (format "~n<<<~n~a~n>>>~n" text)]
        [(br)
         "%~"]
        [(citation ,text)
         (format "[~a] " (compress-spaces text))]
        [(citetitle ,text)
         (format "'''~a''' " text)]
        [(command ,text)
         (format "{{{~a}}} " (compress-spaces text))]
        [(emphasis ,text)
         (format "''~a'' " text)]
        [(function ,text)
         (format "{{{~a}}} " (compress-spaces text))]
        [(informaltable (tgroup (thead (row (entry ,head1) (entry ,head2)))
                                (tbody (row (entry ,body1 ...)
                                            (entry ,body2 ...))
                                       ...)))
         (string-append (format "|| '''~a''' || '''~a''' ||\n" head1 head2)
                        (format "|| ~a || ~a ||\n"
                                (convert-markup body1)
                                (convert-markup body2))
                        ...)]
        [(itemizedlist (listitem (para ,text ...)) ...)
         (string-append (format "- ~a\n" (convert-body text))
                        ...)]
        [(itemizedlist
          (listitem
           (formalpara
            (title ,title ...)
            (para ,body ...)))
          ...)
         (string-append
          (format ":~a:~n~a" (convert-header title) (convert-para body))
          ...)]        
        [(itemizedlist
          (listitem
           (formalpara
            (title ,title ...)
            (para ,body ...)
            ...))
          ...)
         (string-append
          (format ":~a:~n~a\n" (convert-header title)
                  (string-append (string-append (convert-para body) "\n\n")
                                 ...))
          ...)]
        [(literal ,text)
         (format "{{{~a}}} " text)]
        [(programlisting ,code ...)
         (format "~n{{{~n~a~n}}}~n" (convert-markup code))]
        [(quote ,text ...)
         (format "\"~a\" " (convert-markup text))]
        [(remark ,text)
         (format "'''~a''' " (compress-whitespace text))]
        [(replaceable ,text)
         (format "{{{~a}}} " text)]
        [(row (entry (application ,name)) (entry (ulink (@ (url ,url)))))
         (format "- [~a ~a]~n" url name)]
        [(simplelist (member ,text ...) ...)
         (string-append
          (format "- ~a~n" (convert-para text))
          ...)]
        [(superscript ,text)
         (format "^~a " text)]
        [(ulink (@ (url ,url)) ,body ...)
         (let ((body (convert-body body)))
           (if body
               (format "[~a ~a] " url body)
               (format "[~a] " url)))]
        [(xref (@ (linkend ,id)))
         (format "[~a~a here]" site-url (xref-id->url (string->symbol id)))]
        [,other
          (if (string? other)
              (string-trim (compress-spaces other))
              (begin
                (printf "Markup not handled: ~a~n" other)
                (format "~a" other)))]))
    body)))

(define (convert-body frags)
  (if (null? frags)
      #f
      (convert-markup frags)))

(define (convert-para body)
  (string-append
   (convert-markup body)
   "\n\n"))

(define (convert-header body)
  (compress-whitespace
   (convert-markup body)))

; extract-para-bodies : extract bodies from paras
; Converts a list of paras to a list of body lists
; each body list is a list of text or sxml values
(define (extract-para-bodies paras)
  (map
   (lambda (sxml)
     (sxml-match sxml
       [(para ,body ...) body]
       [(remark ,body ...) (cons "WTF is this doing here? " body)]
       [,else (error "expected PARA - found: " else)]))
   paras))

; convert-using : convert the specified paras using proc
; Extracts bodies from the specified paras, converts each body with proc,
; and turns the resulting list of strings into a single string
(define (convert-using proc paras)
  (apply string-append (map proc (extract-para-bodies paras))))

(define (convert-qanda sxml)
  (sxml-match sxml
    [(qandaentry
      (question (@ (id (,id #f))) ,question ...)
      (answer ,answer ...))
     (format "~a** ~a~n~a~n"
             (if id (format "[[$$label ~a]]~n" id) "")
             (convert-using convert-header question)
             (convert-using convert-para answer))]
    ; match question without answer
    [(qandaentry
      (question (@ (id (,id #f))) ,question ...))
     (format "~a** ~a~n"
             (if id (format "[[$$label ~a]]~n" id) "")
             (convert-using convert-header question))]))

(define (sxml->faq sxml)
  (sxml-match sxml
    [(qandadiv
      (title ,title)
      ,entry
      ...)
     (list
      (format "* ~a~n~n" title)
      (convert-qanda entry)
      ...)]
    [,else (error "expected QANDADIV - found" else)]))

(define faq-sources (map convert-file source-files))
(display (list-ref faq-sources 0)) ; display one for checking purposes

This paste has no annotations.

Colorize as:
Show Line Numbers

Lisppaste pastes can be made by anyone at any time. Imagine a fearsomely comprehensive disclaimer of liability. Now fear, comprehensively.