Regex.java
package org.klojang.templates;
import java.util.regex.Pattern;
import static java.util.regex.Pattern.compile;
/**
* Tokens and regular expressions used by the template parser to parse templates. For all
* intents and purposes, this is an internal class. However, by making this class and the
* constants defined in it public, the API documentation as a whole becomes more
* self-contained as it is the only class from which you can infer which syntactical
* constructs are available in a Klojang template. This class might also be useful for
* toolmakers (e.g. when writing a syntax highlighting plugin).
*/
public final class Regex {
private static final int MULTILINE = Pattern.MULTILINE | Pattern.DOTALL;
/**
* Regular expression for {@linkplain VarGroup variable group} names. Variable groups
* can be specified inline (within the template) using this syntax:
* {@code ~%vargroup:varname%}. For example: {@code ~%html:firstName%}. Variable group
* names must start with a letter and be followed by zero or more letters, digits,
* underscores or hyphens. "begin" and "end" are illegal names for variable groups.
*/
public static final String REGEX_VAR_GROUP = "([a-zA-Z][a-zA-Z0-9_\\-]*)";
/**
* Regular expression for nested template names and path segments within a variable
* name. Since these names may correspond to keys in {@code Map} objects, there are very
* few constraints on what constitutes a valid name. They must consist of at least one
* character, and they must not contain any of the following characters:
* {@code ~%:.\n\r\0}. Of course, if the names are to correspond to, for example, bean
* properties, they are externally constrained: they must be valid Java identifiers.
*/
public static final String REGEX_NAME = "([^~%:.\\n\\r\u0000]+)";
/**
* <p>Regular expression for path strings. Variable names are paths through an
* object graph. For example: {@code ~%company.address.city%}. This variable would map
* to the {@code city} property of the {@code Address} object within the {@code Company}
* object within the object that you populate the template with. Each of the name
* segments must match {@link #REGEX_NAME}. In practice, you are more likely to use
* nested and doubly-nested templates, and then use simple names at the appropriate
* nesting level (e.g. {@code ~%city%}).
*
* <p><b>Do not confuse this regular expression with
* {@link #REGEX_INCLUDE_PATH})</b>. The latter is used for included templates, in which
* you specify a path to a file system or classpath resource.
*
* @see org.klojang.path.Path
*/
public static final String REGEX_PATH
= "("
+ REGEX_NAME
+ "(\\." + REGEX_NAME + ")*"
+ ")";
/**
* Regular expression for template variables. The pattern for a variable name is:
* {@code ~%[vargroup:]varname%}, where {@code vargroup} is {@link #REGEX_VAR_GROUP} and
* {@code varname} is {@link #REGEX_PATH}.
*/
public static final String REGEX_VARIABLE
= "~%"
+ "(" + REGEX_VAR_GROUP + ":)?"
+ REGEX_PATH
+ "%";
/**
* <p>Regular expression for a template variable that is placed inside an HTML
* comment. For example: {@code <!-- ~%firstName% -->}. This is rendered just like
* {@code ~%firstName%}. However, when using HTML comments, the raw, unprocessed
* template still renders nicely in a browser — without "odd" tilde-percent
* sequences spoiling the HTML page. This works even better if you also provide a
* placeholder value, as in the following example:
* {@code <!-- ~%firstName% -->John<!--%-->}. This, too, renders just like
* {@code ~%firstName%}. Now, when the browser renders the raw template, it will display
* the string "John", because it is outside any HTML comments. But when
* <i>Klojang Templates</i> renders the template, "John" will have
* disappeared, and the only thing that remains is the value of {@code firstName}.
*
* <p>Note that the entire construct ({@code <!-- ~%firstName% -->John<!--%-->})
* <b>must</b> be on a single line. If you want to provide a placeholder value
* that spans multiple lines, use the syntax in the example below:
*
* <blockquote><pre>{@code
* <tr>
* <td>
* <!-- ~%firstName% -->
* <!--%-->
* This entire piece of text, and
* the placeholder tags on either
* side of it, will be gone when
* the template is rendered
* <!--%-->
* </td>
* </tr>
* }</pre></blockquote>
*
* <p>However, contrary to the single-line syntax, this value is not recorded
* <i>as the placeholder for</i> the preceding variable. It is just something that
* will be visible in the raw template, but gone in the rendered version.
*
* <p>The space character surrounding the variable (as in
* {@code <!-- ~%firstName% -->}) is optional. You may also omit it
* ({@code <!--~%firstName%-->}). Multiple spaces or other characters are not allowed.
*
* @see VarGroup#DEF
* @see #REGEX_PLACEHOLDER
*/
public static final String REGEX_CMT_VARIABLE
= "<!-- ?"
+ REGEX_VARIABLE
+ " ?-->((.*?)<!--%-->)?";
/**
* <p>
* Regular expression for inline templates begin tags. The following examples are all
* valid begin tags:
* </p>
* <ul>
* <li>{@code ~%%begin:foo%}
* <li>{@code <!-- ~%%begin:foo%}
* <li>{@code <!-- ~%%begin:foo% -->}
* </ul>
* <p>
* However, the parser enforces an extra symmetry:
* </p>
* <ul>
* <li><span style="background-color:#eaeaea">{@code <!-- ~%%begin:foo% -->}</span> <b>must</b> terminate with <span style="background-color:#eaeaea">{@code <!-- ~%%end:foo% -->}</span>
* <li><span style="background-color:#eaeaea">{@code <!-- ~%%begin:foo%}</span> <b>must</b> terminate with <span style="background-color:#eaeaea">{@code ~%%end:foo% -->}</span>
* <li><span style="background-color:#eaeaea">{@code ~%%begin:foo%}</span> <b>must</b> terminate with <span style="background-color:#eaeaea">{@code ~%%end:foo%}</span>
* </ul>
* <p>
* The space character following "<!--" and/or preceding "-->" is optional.
* Multiple spaces or other characters are not allowed.
* </p>
*/
public static final String REGEX_INLINE_TEMPLATE_BEGIN
= "(<!-- ?)?~%%begin:" + REGEX_NAME + "%( ?-->)?";
/**
* <p>
* Regular expression for inline templates end tags. The following examples are all
* valid end tags:
* </p>
* <ul>
* <li>{@code ~%%end:foo%}
* <li>{@code ~%%end:foo% -->}
* <li>{@code <!-- ~%%end:foo% -->}
* </ul>
* <p>
* However, the parser enforces an extra symmetry:
* </p>
* <ul>
* <li><span style="background-color:#eaeaea">{@code <!-- ~%%begin:foo% -->}</span> <b>must</b> terminate with <span style="background-color:#eaeaea">{@code <!-- ~%%end:foo% -->}</span>
* <li><span style="background-color:#eaeaea">{@code <!-- ~%%begin:foo%}</span> <b>must</b> terminate with <span style="background-color:#eaeaea">{@code ~%%end:foo% -->}</span>
* <li><span style="background-color:#eaeaea">{@code ~%%begin:foo%}</span> <b>must</b> terminate with <span style="background-color:#eaeaea">{@code ~%%end:foo%}</span>
* </ul>
* <p>
* The space character following "<!--" and/or preceding "-->" is optional.
* Multiple spaces or other characters are not allowed.
* </p>
*/
public static final String REGEX_INLINE_TEMPLATE_END
= "(<!-- ?)?~%%end:" + REGEX_NAME + "%( ?-->)?";
/**
* Regular expression for the path specified in an included template. Templates are
* included in another template using this syntax:
* {@code ~%%include:/path/to/template.html%%} or
* {@code ~%%include:template-name:/path/to/template.html%%}. The path is a sequence of
* one more valid URL characters. So: letters, digits and:
* {@code _-~:;/?#!$&%,@+.=[]()}.
*/
public static final String REGEX_INCLUDE_PATH
= "([a-zA-Z0-9_~:;/?#!$&%,@+.=\\-\\[\\]()]+?)";
/**
* Regular expression for included templates. This is the basic pattern:
* {@code ~%%include:[template-name:]path%%}. If no name is provided, the template name
* will be the base name of the last path element. So for
* {@code ~%%include:/path/to/foo.html%%} that would be "foo".
*/
public static final String REGEX_INCLUDED_TEMPLATE
= "~%%include:"
+ "(" + REGEX_NAME + ":)?"
+ REGEX_INCLUDE_PATH
+ "%%";
/**
* Regular expression for an included template that is placed inside an HTML comment.
* For example: {@code <!-- ~%%include:/path/to/foo.html%% -->}.
*/
public static final String REGEX_CMT_INCLUDED_TEMPLATE
= "<!-- ?"
+ REGEX_INCLUDED_TEMPLATE
+ " ?-->";
/**
* Regular expression for ditch blocks. A ditch block consists of a pair of
* {@code <!--%%-->} tokens and any text between them. A ditch block is the
* <i>Klojang Templates</i> equivalent of an HTML or Java comment. Ditch blocks
* can be used to "comment out" nested templates, template variables, static HTML, etc.
* They cannot themselves be nested inside any syntactical construct provided by
* <i>Klojang Templates</i>, including nested templates.
*/
public static final String REGEX_DITCH_BLOCK = "<!--%%-->(.*?)<!--%%-->";
// Used only for syntax error detection:
static final String DITCH_BLOCK_TOKEN = "<!--%%-->";
static final String PLACEHOLDER_TOKEN = "<!--%-->";
/**
* Regular expression for placeholders. A placeholder consists of a pair of
* {@code <!--%-->} tokens and any text between them. When a template is rendered by
* <i>Klojang Templates</i>, these tokens, and any text between them are erased from
* the template. However, since {@code <!--%-->} is a self-closed HTML comment, a
* browser would display what is between these tokens when rendering the raw,
* unprocessed template. Contrary to {@link #REGEX_DITCH_BLOCK ditch blocks},
* placeholders may appear inside a nested template.
*/
public static final String REGEX_PLACEHOLDER = "<!--%-->(.*?)<!--%-->";
static final Pattern VARIABLE = compile(REGEX_VARIABLE);
static final Pattern CMT_VARIABLE = compile(REGEX_CMT_VARIABLE);
static final Pattern INLINE_TEMPLATE_BEGIN = compile(REGEX_INLINE_TEMPLATE_BEGIN);
static final Pattern INLINE_TEMPLATE_END = compile(REGEX_INLINE_TEMPLATE_END);
static final Pattern INCLUDED_TEMPLATE = compile(REGEX_INCLUDED_TEMPLATE);
static final Pattern CMT_INCLUDED_TEMPLATE = compile(REGEX_CMT_INCLUDED_TEMPLATE);
static final Pattern DITCH_BLOCK = compile(REGEX_DITCH_BLOCK, MULTILINE);
static final Pattern PLACEHOLDER = compile(REGEX_PLACEHOLDER, MULTILINE);
/**
* Prints the regular expressions.
*/
public static void printAll() {
System.out.println("VARIABLE ................: " + VARIABLE);
System.out.println("CMT_VARIABLE ............: " + CMT_VARIABLE);
System.out.println("INLINE_TEMPLATE_BEGIN ...: " + INLINE_TEMPLATE_BEGIN);
System.out.println("INLINE_TEMPLATE_END .....: " + INLINE_TEMPLATE_END);
System.out.println("INCLUDED_TEMPLATE .......: " + INCLUDED_TEMPLATE);
System.out.println("CMT_INCLUDED_TEMPLATE ...: " + CMT_INCLUDED_TEMPLATE);
System.out.println("DITCH_BLOCK .............: " + DITCH_BLOCK);
System.out.println("PLACEHOLDER .............: " + PLACEHOLDER);
}
private Regex() {
throw new UnsupportedOperationException();
}
}