diff --git a/app/TextFormatter.inc b/app/TextFormatter.inc index dd89cf20..1bb14234 100644 --- a/app/TextFormatter.inc +++ b/app/TextFormatter.inc @@ -57,14 +57,18 @@ */ public function t($string) { - // Remove chars - $string = htmlspecialchars($string, ENT_NOQUOTES); + // Use Textile + \hhu\z\lib\Textile::load(); + $textileParser = new \Netcarver\Textile\Parser(); + $string = $textileParser->textileThis($string); // Important text + // TODO Deprecated $string = str_replace('[strong]', '', $string); $string = str_replace('[/strong]', '', $string); // Create tables + // TODO Deprecated $string = preg_replace('/(\[table\])\s+/u', '$1', $string); $string = preg_replace('/\s*(\[tr\])\s*/u', '$1', $string); $string = preg_replace('%\s+(\[/table\])%u', '$1', $string); @@ -78,10 +82,6 @@ $string = str_replace('[td]', '', $string); $string = str_replace('[/td]', '', $string); - // Create links - $string = preg_replace('!(^|\s)"([^"]+)":(https?://[^\s]+)(\s|$)!i', '$1$2$4', $string); - $string = preg_replace('!(^|\s)(https?://[^\s]+)(\s|$)!i', '$1$2$3', $string); - // Handle Seminarymedia $seminarymedia = array(); preg_match_all('/\[seminarymedia:(\d+)\]/iu', $string, $matches); //, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); @@ -111,7 +111,8 @@ // Return processed string - return nl2br($string); + //return nl2br($string); + return $string; } diff --git a/app/lib/Netcarver/Textile/DataBag.php b/app/lib/Netcarver/Textile/DataBag.php new file mode 100644 index 00000000..0a1d9401 --- /dev/null +++ b/app/lib/Netcarver/Textile/DataBag.php @@ -0,0 +1,99 @@ + + * use Netcarver\Textile\DataBag; + * $plant = new DataBag(array('key' => 'value')); + * $plant->flower('rose')->color('red'); + * + */ + +class DataBag +{ + /** + * The data array stored in the bag. + * + * @var array + */ + + protected $data; + + /** + * Constructor. + * + * @param array|null $data The initial data array stored in the bag + */ + + public function __construct(array $data = null) + { + $this->data = (array) $data; + } + + /** + * Adds a value to the bag. + * + * Empty values are rejected, unless the + * second argument is set TRUE. + * + * + * use Netcarver\Textile\DataBag; + * $plant = new DataBag(array('key' => 'value')); + * $plant->flower('rose')->color('red')->emptyValue(false, true); + * + * + * @param string $name The name + * @param array $params Arguments + * @return DataBag + */ + + public function __call($name, array $params) + { + if (!empty($params[1]) || !empty($params[0])) { + $this->data[$name] = $params[0]; + } + + return $this; + } +} diff --git a/app/lib/Netcarver/Textile/Parser.php b/app/lib/Netcarver/Textile/Parser.php new file mode 100644 index 00000000..4c150ec3 --- /dev/null +++ b/app/lib/Netcarver/Textile/Parser.php @@ -0,0 +1,4042 @@ + + * All rights reserved. + * + * Thanks to Carlo Zottmann for refactoring + * Textile's procedural code into a class framework + * + * Additions and fixes Copyright (c) 2006 Alex Shiels https://twitter.com/tellyworth + * Additions and fixes Copyright (c) 2010 Stef Dawson http://stefdawson.com/ + * Additions and fixes Copyright (c) 2010-13 Netcarver https://github.com/netcarver + * Additions and fixes Copyright (c) 2011 Jeff Soo http://ipsedixit.net/ + * Additions and fixes Copyright (c) 2012 Robert Wetzlmayr http://wetzlmayr.com/ + * Additions and fixes Copyright (c) 2012-13 Jukka Svahn http://rahforum.biz/ + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name Textile nor the names of its contributors may be used to + * endorse or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* +Textile usage examples. + +Block modifier syntax: + + Header: h(1-6). + Paragraphs beginning with 'hn. ' (where n is 1-6) are wrapped in header tags. + Example: h1. Header... ->

Header...

+ + Paragraph: p. (also applied by default) + Example: p. Text ->

Text

+ + Blockquote: bq. + Example: bq. Block quotation... ->
Block quotation...
+ + Blockquote with citation: bq.:http://citation.url + Example: bq.:http://textism.com/ Text... + ->
Text...
+ + Footnote: fn(1-100). + Example: fn1. Footnote... ->

Footnote...

+ + Numeric list: #, ## + Consecutive paragraphs beginning with # are wrapped in ordered list tags. + Example:
  1. ordered list
+ + Bulleted list: *, ** + Consecutive paragraphs beginning with * are wrapped in unordered list tags. + Example: + + Definition list: + Terms ;, ;; + Definitions :, :: + Consecutive paragraphs beginning with ; or : are wrapped in definition list tags. + Example:
term
definition
+ + Redcloth-style Definition list: + - Term1 := Definition1 + - Term2 := Extended + definition =: + +Phrase modifier syntax: + + _emphasis_ -> emphasis + __italic__ -> italic + *strong* -> strong + **bold** -> bold + ??citation?? -> citation + -deleted text- -> deleted + +inserted text+ -> inserted + ^superscript^ -> superscript + ~subscript~ -> subscript + @code@ -> computer code + %(bob)span% -> span + + ==notextile== -> leave text alone (do not format) + + "linktext":url -> linktext +"linktext(title)":url -> linktext + "$":url -> url + "$(title)":url -> url + + !imageurl! -> + !imageurl(alt text)! -> alt text + !imageurl!:linkurl -> + +ABC(Always Be Closing) -> ABC + +Linked Notes: + + Allows the generation of an automated list of notes with links. + + Linked notes are composed of three parts, a set of named _definitions_, a set of + _references_ to those definitions and one or more _placeholders_ indicating where + the consolidated list of notes is to be placed in your document. + + Definitions: + + Each note definition must occur in its own paragraph and should look like this... + + note#mynotelabel. Your definition text here. + + You are free to use whatever label you wish after the # as long as it is made up + of letters, numbers, colon(:) or dash(-). + + References: + + Each note reference is marked in your text like this[#mynotelabel] and + it will be replaced with a superscript reference that links into the list of + note definitions. + + List placeholder(s): + + The note list can go anywhere in your document. You have to indicate where + like this: + + notelist. + + notelist can take attributes (class#id) like this: notelist(class#id). + + By default, the note list will show each definition in the order that they + are referenced in the text by the _references_. It will show each definition with + a full list of backlinks to each reference. If you do not want this, you can choose + to override the backlinks like this... + + notelist(class#id)!. Produces a list with no backlinks. + notelist(class#id)^. Produces a list with only the first backlink. + + Should you wish to have a specific definition display backlinks differently to this + then you can override the backlink method by appending a link override to the + _definition_ you wish to customise. + + note#label. Uses the citelist's setting for backlinks. + note#label!. Causes that definition to have no backlinks. + note#label^. Causes that definition to have one backlink (to the first ref.) + note#label*. Causes that definition to have all backlinks. + + Any unreferenced notes will be left out of the list unless you explicitly state + you want them by adding a '+'. Like this... + + notelist(class#id)!+. Giving a list of all notes without any backlinks. + + You can mix and match the list backlink control and unreferenced links controls + but the backlink control (if any) must go first. Like so: notelist^+. , not + like this: notelist+^. + + Example... + Scientists say[#lavader] the moon is small. + + note#other. An unreferenced note. + + note#lavader(myliclass). "Proof":http://example.com of a small moon. + + notelist(myclass#myid)+. + + Would output (the actual IDs used would be randomised)... + +

Scientists say1 the moon is small.

+ +
    +
  1. a + Proof of a small moon.
  2. +
  3. An unreferenced note.
  4. +
+ + The 'a b c' backlink characters can be altered too. + For example if you wanted the notes to have numeric backlinks starting from 1: + + notelist:1. + +Table syntax: + + Simple tables: + + |a|simple|table|row| + |And|Another|table|row| + |With an||empty|cell| + + |=. My table caption goes here + |_. A|_. table|_. header|_.row| + |A|simple|table|row| + + Note: Table captions *must* be the first line of the table else treated as a center-aligned cell. + + Tables with attributes: + + table{border:1px solid black}. My table summary here + {background:#ddd;color:red}. |{}| | | | + + To specify thead / tfoot / tbody groups, add one of these on its own line + above the row(s) you wish to wrap (you may specify attributes before the dot): + + |^. # thead + |-. # tbody + |~. # tfoot + + Column groups: + + |:\3. 100| + + Becomes: + + + You can omit either or both of the \N or width values. You may also + add cells after the colgroup definition to specify col elements with + span, width, or standard Textile attributes: + + |:. 50|(firstcol). |\2. 250||300| + + Becomes: + + + + + + + + (Note that, per the HTML specification, you should not add span + to the colgroup if specifying col elements.) + +Applying Attributes: + + Most anywhere Textile code is used, attributes such as arbitrary css style, + css classes, and ids can be applied. The syntax is fairly consistent. + + The following characters quickly alter the alignment of block elements: + + < -> left align ex. p<. left-aligned para + > -> right align h3>. right-aligned header 3 + = -> centred h4=. centred header 4 + <> -> justified p<>. justified paragraph + + These will change vertical alignment in table cells: + + ^ -> top ex. |^. top-aligned table cell| + - -> middle |-. middle aligned| + ~ -> bottom |~. bottom aligned cell| + + Plain (parentheses) inserted between block syntax and the closing dot-space + indicate classes and ids: + + p(hector). paragraph ->

paragraph

+ + p(#fluid). paragraph ->

paragraph

+ + (classes and ids can be combined) + p(hector#fluid). paragraph ->

paragraph

+ + Curly {brackets} insert arbitrary css style + + p{line-height:18px}. paragraph ->

paragraph

+ + h3{color:red}. header 3 ->

header 3

+ + Square [brackets] insert language attributes + + p[no]. paragraph ->

paragraph

+ + %[fr]phrase% -> phrase + + Usually Textile block element syntax requires a dot and space before the block + begins, but since lists don't, they can be styled just using braces + + #{color:blue} one ->
    + # big
  1. one
  2. + # list
  3. big
  4. +
  5. list
  6. +
+ + Using the span tag to style a phrase + + It goes like this, %{color:red}the fourth the fifth% + -> It goes like this, the fourth the fifth + +Ordered list start and continuation: + + You can control the start attribute of an ordered list like so; + + #5 Item 5 + # Item 6 + + You can resume numbering list items after some intervening anonymous block like so... + + #_ Item 7 + # Item 8 +*/ + +/** + * Textile parser. + * + * The Parser class takes Textile input and + * converts it to well formatted HTML. This is + * the library's main class, hosting the parsing + * functionality and exposing a simple + * public interface for you to use. + * + * The most basic use case would involve initialising + * a instance of the class and calling the textileThis + * method, parsing the given Textile input in unrestricted + * mode. + * + * + * $parser = new \Netcarver\Textile\Parser(); + * echo $parser->textileThis('h1. Hello World!'); + * + * + * @see Parser::__construct() + * @see Parser::textileThis() + * @see Parser::textileRestricted() + */ + +class Parser +{ + /** + * Version number. + * + * @var string + */ + + protected $ver = '3.5.5'; + + /** + * Regular expression snippets. + * + * @var array + */ + + protected $regex_snippets; + + /** + * Pattern for horizontal align. + * + * @var string + */ + + protected $hlgn = "(?:\<(?!>)|<>|>|<|(?|\<\>|\=|[()]+(?! ))"; + + /** + * Pattern for vertical align. + * + * @var string + */ + + protected $vlgn = "[\-^~]"; + + /** + * Pattern for HTML classes and IDs. + * + * Does not allow classes/ids/languages/styles to span across + * newlines if used in a dotall regular expression. + * + * @var string + */ + + protected $clas = "(?:\([^)\n]+\))"; + + /** + * Pattern for language attribute. + * + * @var string + */ + + protected $lnge = "(?:\[[^]\n]+\])"; + + /** + * Pattern for style attribute. + * + * @var string + */ + + protected $styl = "(?:\{[^}\n]+\})"; + + /** + * Regular expression pattern for column spans in tables. + * + * @var string + */ + + protected $cspn = "(?:\\\\[0-9]+)"; + + /** + * Regular expression for row spans in tables. + * + * @var string + */ + + protected $rspn = "(?:\/[0-9]+)"; + + /** + * Regular expression for horizontal or vertical alignment. + * + * @var string + */ + + protected $a; + + /** + * Regular expression for column or row spans in tables. + * + * @var string + */ + + protected $s; + + /** + * Pattern that matches a class, style, language and horizontal alignment attributes. + * + * @var string + */ + + protected $c; + + /** + * Pattern that matches class, style and language attributes. + * + * Will allows all 16 possible permutations of class, style and language attributes. + * , c, cl, cs, cls, csl, l, lc, ls, lcs, lsc, s, sc, sl, scl or slc + * + * @var string + */ + protected $cls; + + /** + * Whitelisted block tags. + * + * @var array + */ + + protected $blocktag_whitelist = array(); + + /** + * Pattern for punctation. + * + * @var string + */ + + protected $pnct = '[\!"#\$%&\'()\*\+,\-\./:;<=>\?@\[\\\]\^_`{\|}\~]'; + + /** + * Pattern for URL. + * + * @var string + */ + + protected $urlch; + + /** + * Matched marker symbols. + * + * @var string + */ + + protected $syms = '¤§µ¶†‡•∗∴◊♠♣♥♦'; + + /** + * HTML rel attribute used for links. + * + * @var string + */ + + protected $rel; + + /** + * Array of footnotes + * + * @var array + */ + protected $fn; + + /** + * Shelved content. + * + * Stores fragments of the source text that have been parsed + * and require no more processing. + * + * @var array + */ + + protected $shelf = array(); + + /** + * Restricted mode. + * + * @var bool + */ + + protected $restricted = false; + + /** + * Disallow images. + * + * @var bool + */ + + protected $noimage = false; + + /** + * Lite mode. + * + * @var bool + */ + + protected $lite = false; + + /** + * Accepted link protocols. + * + * @var array + */ + + protected $url_schemes = array(); + + /** + * Restricted link protocols. + * + * @var array + */ + + protected $restricted_url_schemes = array( + 'http', + 'https', + 'ftp', + 'mailto', + ); + + /** + * Unrestricted link protocols. + * + * @var array + */ + + protected $unrestricted_url_schemes = array( + 'http', + 'https', + 'ftp', + 'mailto', + 'file', + 'tel', + 'callto', + 'sftp', + ); + + /** + * Span tags. + * + * @var array + */ + + protected $span_tags = array( + '*' => 'strong', + '**' => 'b', + '??' => 'cite', + '_' => 'em', + '__' => 'i', + '-' => 'del', + '%' => 'span', + '+' => 'ins', + '~' => 'sub', + '^' => 'sup', + ); + + /** + * Patterns for finding glyphs. + * + * An array of regex patterns used to find text features + * such as apostrophes, fractions and em-dashes. Each + * entry in this array must have a corresponding entry in + * the $glyph_replace array. + * + * @var null|array + * @see Parser::$glyph_replace + */ + + protected $glyph_search = null; + + /** + * Glyph replacements. + * + * An array of replacements used to insert typographic glyphs + * into the text. Each entry must have a corresponding entry in + * the $glyph_search array and may refer to values captured in + * the corresponding search regex. + * + * @var null|array + * @see Parser::$glyph_search + */ + + protected $glyph_replace = null; + + /** + * Indicates whether glyph substitution is required. + * + * Dirty flag, set by setSymbol(), indicating the parser needs to + * rebuild the glyph substitutions before the next parse. + * + * @var bool + * @see Parser::setSymbol() + */ + + protected $rebuild_glyphs = true; + + /** + * Relative image path. + * + * @var string + */ + + protected $relativeImagePrefix = ''; + + /** + * Maximum nesting level for inline elements. + * + * @var int + */ + + protected $max_span_depth = 5; + + /** + * Server document root. + * + * @var string + */ + + protected $doc_root; + + /** + * Target document type. + * + * @var string + */ + + protected $doctype; + + /** + * Substitution symbols. + * + * Basic symbols used in textile glyph replacements. To override these, call + * setSymbol method before calling textileThis or textileRestricted. + * + * @var array + * @see Parser::setSymbol() + */ + + protected $symbols = array( + 'quote_single_open' => '‘', + 'quote_single_close' => '’', + 'quote_double_open' => '“', + 'quote_double_close' => '”', + 'apostrophe' => '’', + 'prime' => '′', + 'prime_double' => '″', + 'ellipsis' => '…', + 'emdash' => '—', + 'endash' => '–', + 'dimension' => '×', + 'trademark' => '™', + 'registered' => '®', + 'copyright' => '©', + 'half' => '½', + 'quarter' => '¼', + 'threequarters' => '¾', + 'degrees' => '°', + 'plusminus' => '±', + 'fn_ref_pattern' => '{marker}', + 'fn_foot_pattern' => '{marker}', + 'nl_ref_pattern' => '{marker}', + ); + + /** + * Dimensionless images flag. + * + * @var bool + */ + + protected $dimensionless_images = false; + + /** + * Directory separator. + * + * @var string + */ + + protected $ds = '/'; + + /** + * Whether mbstring extension is installed. + * + * @var bool + */ + + protected $mb; + + /** + * Multi-byte conversion map. + * + * @var array + */ + + protected $cmap = array(0x0080, 0xffff, 0, 0xffff); + + /** + * Stores note index. + * + * @var int + */ + + protected $note_index = 1; + + /** + * Stores unreferenced notes. + * + * @var array + */ + + protected $unreferencedNotes = array(); + + /** + * Stores note lists. + * + * @var array + */ + + protected $notelist_cache = array(); + + /** + * Stores notes. + * + * @var array + */ + + protected $notes = array(); + + /** + * Stores URL references. + * + * @var array + */ + + protected $urlrefs = array(); + + /** + * Stores span depth. + * + * @var int + */ + + protected $span_depth = 0; + + /** + * Unique ID used for reference tokens. + * + * @var string + */ + + protected $uid; + + /** + * Token reference index. + * + * @var int + */ + + protected $refIndex = 1; + + /** + * Stores references values. + * + * @var array + */ + + protected $refCache = array(); + + /** + * Matched open and closed quotes. + * + * @var array + */ + + protected $quotes = array( + '"' => '"', + "'" => "'", + '(' => ')', + '{' => '}', + '[' => ']', + '«' => '»', + '»' => '«', + '‹' => '›', + '›' => '‹', + '„' => '“', + '‚' => '‘', + '‘' => '’', + '”' => '“', + ); + + /** + * Regular expression that matches starting quotes. + * + * @var string + */ + + protected $quote_starts; + + /** + * Ordered list starts. + * + * @var array + */ + + protected $olstarts = array(); + + /** + * Link prefix. + * + * @var string + */ + + protected $linkPrefix; + + /** + * Link index. + * + * @var int + */ + + protected $linkIndex = 1; + + /** + * Constructor. + * + * The constructor allows setting options that affect the + * class instance as a whole, such as the output doctype. + * To instruct the parser to return HTML5 markup instead of + * XHTML, set $doctype argument to 'html5'. + * + * + * $parser = new \Netcarver\Textile\Parser('html5'); + * echo $parser->textileThis('HTML(HyperText Markup Language)"); + * + * + * @param string $doctype The output document type, either 'xhtml' or 'html5' + * @throws \InvalidArgumentException + * @api + */ + + public function __construct($doctype = 'xhtml') + { + $doctypes = array( + 'xhtml', + 'html5', + ); + + if (!in_array($doctype, $doctypes, true)) { + throw new \InvalidArgumentException('Invalid doctype given.'); + } else { + $this->doctype = $doctype; + } + + $uid = uniqid(rand()); + $this->uid = 'textileRef:'.$uid.':'; + $this->linkPrefix = $uid.'-'; + $this->a = "(?:$this->hlgn|$this->vlgn)*"; + $this->s = "(?:$this->cspn|$this->rspn)*"; + $this->c = "(?:$this->clas|$this->styl|$this->lnge|$this->hlgn)*"; + + $this->cls = '(?:'. + "$this->clas(?:". + "$this->lnge(?:$this->styl)?|$this->styl(?:$this->lnge)?". + ')?|'. + "$this->lnge(?:". + "$this->clas(?:$this->styl)?|$this->styl(?:$this->clas)?". + ')?|'. + "$this->styl(?:". + "$this->clas(?:$this->lnge)?|$this->lnge(?:$this->clas)?". + ')?'. + ')?'; + + if ($this->isUnicodePcreSupported()) { + $this->regex_snippets = array( + 'acr' => '\p{Lu}\p{Nd}', + 'abr' => '\p{Lu}', + 'nab' => '\p{Ll}', + 'wrd' => '(?:\p{L}|\p{M}|\p{N}|\p{Pc})', + 'mod' => 'u', // Make sure to mark the unicode patterns as such, Some servers seem to need this. + 'cur' => '\p{Sc}', + 'digit' => '\p{N}', + 'space' => '(?:\p{Zs}|\h|\v)', + 'char' => '(?:[^\p{Zs}\h\v])', + ); + } else { + $this->regex_snippets = array( + 'acr' => 'A-Z0-9', + 'abr' => 'A-Z', + 'nab' => 'a-z', + 'wrd' => '\w', + 'mod' => '', + 'cur' => '', + 'digit' => '\d', + 'space' => '(?:\s|\h|\v)', + 'char' => '\S', + ); + } + extract($this->regex_snippets); + $this->urlch = '['.$wrd.'"$\-_.+!*\'(),";\/?:@=&%#{}|\\^~\[\]`]'; + $this->quote_starts = implode('|', array_map('preg_quote', array_keys($this->quotes))); + + if (defined('DIRECTORY_SEPARATOR')) { + $this->ds = constant('DIRECTORY_SEPARATOR'); + } + + if (php_sapi_name() === 'cli') { + $this->doc_root = getcwd(); + } elseif (!empty($_SERVER['DOCUMENT_ROOT'])) { + $this->doc_root = $_SERVER['DOCUMENT_ROOT']; + } elseif (!empty($_SERVER['PATH_TRANSLATED'])) { + $this->doc_root = $_SERVER['PATH_TRANSLATED']; + } + + $this->doc_root = rtrim($this->doc_root, $this->ds).$this->ds; + } + + /** + * Defines a substitution symbol. + * + * Call this you need to redefine a substitution symbol to + * be used when parsing a Textile document. + * + * @param string $name Name of the symbol to assign a new value to. + * @param string $value New value for the symbol. + * @return Parser + * @api + */ + + public function setSymbol($name, $value) + { + $this->symbols[$name] = $value; + $this->rebuild_glyphs = true; + return $this; + } + + /** + * Gets a symbol definitions. + * + * This method can be used to get a symbol definition, or an + * array containing the full symbol table. + * + * @param string|null $name The name of the symbol, or NULL if requesting the symbol table + * @return array|string The symbol table or the requested symbol + * @throws \InvalidArgumentException + * @api + */ + + public function getSymbol($name = null) + { + if ($name !== null) { + if (isset($this->symbols[$name])) { + return $this->symbols[$name]; + } + + throw new \InvalidArgumentException('The specified name does not match any symbols.'); + } + + return $this->symbols; + } + + /** + * Sets base image directory path. + * + * This is used when Textile is supplied with a relative image path. + * Allows client systems to have PHP-Textile convert relative image paths to + * absolute or prefixed paths. This method is used to set that base path, + * usually a absolute HTTP address pointing to a directory. + * + * + * $parser = new \Netcarver\Textile\Parser(); + * $parser->setRelativeImagePrefix('http://static.example.com/'); + * + * + * @param string $prefix The string to prefix all relative image paths with + * @return Parser + * @api + */ + + public function setRelativeImagePrefix($prefix = '') + { + $this->relativeImagePrefix = $prefix; + return $this; + } + + /** + * Toggles image dimension attributes. + * + * If $dimensionless is set to TRUE, image width and height attributes + * will not be included in rendered image tags. Normally, Textile will add + * dimensions height images that specify a relative path, as long + * as the image file can be accessed. + * + * + * $parser = new \Netcarver\Textile\Parser(); + * echo $parser->setDimensionlessImages(false)->textileThis('Hello World!'); + * + * + * @param bool $dimensionless TRUE to disable image dimensions, FALSE to enable + * @return Parser + * @api + */ + + public function setDimensionlessImages($dimensionless = true) + { + $this->dimensionless_images = (bool) $dimensionless; + return $this; + } + + /** + * Whether images will get dimensions or not. + * + * This method will return the state of + * the state of the $dimensionless_images property. + * + * + * $parser = new \Netcarver\Textile\Parser(); + * if ($parser->getDimensionlessImages() === true) + * { + * echo 'Images do not get dimensions.'; + * } + * + * + * @return bool TRUE if images will not get dimensions, FALSE otherwise + * @api + */ + + public function getDimensionlessImages() + { + return (bool) $this->dimensionless_images; + } + + /** + * Gets Textile version number. + * + * + * $parser = new \Netcarver\Textile\Parser(); + * echo $parser->getVersion(); + * + * + * @return string Version + * @api + */ + + public function getVersion() + { + return $this->ver; + } + + /** + * Encodes the given text. + * + * + * $parser = new \Netcarver\Textile\Parser(); + * $parser->textileEncode('Some content to encode.'); + * + * + * @param string $text The text to be encoded + * @return string The encoded text + * @api + */ + + public function textileEncode($text) + { + $text = preg_replace("/&(?![#a-z0-9]+;)/i", "x%x%", $text); + $text = str_replace("x%x%", "&", $text); + return $text; + } + + /** + * Parses the given Textile input in un-restricted mode. + * + * This method should be used to parse any trusted Textile + * input, such as articles created by well-known + * authorised users. + * + * This method allows users to mix raw HTML and Textile. + * If you want to parse untrusted input, see the + * textileRestricted method instead. Using this less + * restrictive method on untrusted input, like comments + * and forum posts, will lead to XSS issues, as users + * will be able to use any HTML code, JavaScript links + * and Textile attributes in their input. + * + * + * $parser = new \Netcarver\Textile\Parser(); + * echo $parser->textileThis('h1. Hello World!'); + * + * + * @param string $text The Textile input to parse + * @param bool $lite Switch to lite mode + * @param bool $encode Encode input and return + * @param bool $noimage Disables images + * @param bool $strict This argument is ignored + * @param string $rel Relationship attribute applied to generated links + * @return string Parsed $text + * @see Parser::textileRestricted() + * @api + */ + + public function textileThis($text, $lite = false, $encode = false, $noimage = false, $strict = false, $rel = '') + { + $this->prepare($lite, $noimage, $rel); + $this->url_schemes = $this->unrestricted_url_schemes; + + if ($encode) { + trigger_error( + 'Use of the $encode argument is discouraged. Use Parser::textileEncode() instead.', + E_USER_DEPRECATED + ); + return $this->textileEncode($text); + } + + return $this->textileCommon($text, $lite); + } + + /** + * Parses the given Textile input in restricted mode. + * + * This method should be used for any untrusted user input, + * including comments or forum posts. + * + * This method escapes any raw HTML input, ignores unsafe + * attributes, links only whitelisted URL schemes + * and by default also prevents the use of images and + * extra Textile formatting, accepting only paragraphs + * and blockquotes as valid block tags. + * + * + * $parser = new \Netcarver\Textile\Parser(); + * echo $parser->textileRestricted('h1. Hello World!'); + * + * + * @param string $text The Textile input to parse + * @param bool $lite Controls lite mode, allowing extra formatting + * @param bool $noimage Allow images + * @param string $rel Relationship attribute applied to generated links + * @return string Parsed $text + * @see Parser::textileThis() + * @api + */ + + public function textileRestricted($text, $lite = true, $noimage = true, $rel = 'nofollow') + { + $this->prepare($lite, $noimage, $rel); + $this->url_schemes = $this->restricted_url_schemes; + $this->restricted = true; + + // Escape any raw html + $text = $this->encodeHTML($text, 0); + + return $this->textileCommon($text, $lite); + } + + /** + * Parses Textile syntax. + * + * This method performs common parse actions. + * + * @param string $text The input to parses + * @param bool $lite Controls lite mode + * @return string Parsed input + */ + + protected function textileCommon($text, $lite) + { + $text = $this->cleanWhiteSpace($text); + $text = $this->cleanUniqueTokens($text); + + if ($lite) { + $this->blocktag_whitelist = array('bq', 'p'); + $text = $this->blocks($text."\n\n"); + } else { + $this->blocktag_whitelist = array( + 'bq', + 'p', + 'bc', + 'notextile', + 'pre', + 'h[1-6]', + 'fn'.$this->regex_snippets['digit'].'+', + '###', + ); + $text = $this->blocks($text); + $text = $this->placeNoteLists($text); + } + + $text = $this->retrieve($text); + $text = $this->replaceGlyphs($text); + $text = $this->retrieveTags($text); + $text = $this->retrieveURLs($text); + + $text = str_replace("
", "
\n", $text); + + return $text; + } + + /** + * Prepares the glyph patterns from the symbol table. + * + * @see Parser::setSymbol() + * @see Parser::getSymbol() + */ + + protected function prepGlyphs() + { + if ($this->rebuild_glyphs === false) { + return; + } + + extract($this->symbols, EXTR_PREFIX_ALL, 'txt'); + extract($this->regex_snippets); + $pnc = '[[:punct:]]'; + + if ($cur) { + $cur = '(?:['.$cur.']'.$space.'*)?'; + } + + $this->glyph_search = array(); + $this->glyph_replace = array(); + + // Dimension sign + $this->glyph_search[] = '/([0-9]+[\])]?[\'"]? ?)[xX]( ?[\[(]?)(?=[+-]?'.$cur.'[0-9]*\.?[0-9]+)/'.$mod; + $this->glyph_replace[] = '$1'.$txt_dimension.'$2'; + + // Apostrophe + $this->glyph_search[] = '/('.$wrd.'|\))\'('.$wrd.')/'.$mod; + $this->glyph_replace[] = '$1'.$txt_apostrophe.'$2'; + + // Back in '88/the '90s but not in his '90s', '1', '1.' '10m' or '5.png' + $this->glyph_search[] = '/('.$space.')\'(\d+'.$wrd.'?)\b(?![.]?['.$wrd.']*?\')/'.$mod; + $this->glyph_replace[] = '$1'.$txt_apostrophe.'$2'; + + // Single open following open bracket + $this->glyph_search[] = "/([([{])'(?=\S)/".$mod; + $this->glyph_replace[] = '$1'.$txt_quote_single_open; + + // Single closing + $this->glyph_search[] = '/(\S)\'(?='.$space.'|'.$pnc.'|<|$)/'.$mod; + $this->glyph_replace[] = '$1'.$txt_quote_single_close; + + // Default single opening + $this->glyph_search[] = "/'/"; + $this->glyph_replace[] = $txt_quote_single_open; + + // Double open following an open bracket. Allows things like Hello ["(Mum) & dad"] + $this->glyph_search[] = '/([([{])"(?=\S)/'.$mod; + $this->glyph_replace[] = '$1'.$txt_quote_double_open; + + // Double closing + $this->glyph_search[] = '/(\S)"(?='.$space.'|'.$pnc.'|<|$)/'.$mod; + $this->glyph_replace[] = '$1'.$txt_quote_double_close; + + // Default double opening + $this->glyph_search[] = '/"/'; + $this->glyph_replace[] = $txt_quote_double_open; + + // 3+ uppercase acronym + $this->glyph_search[] = '/\b(['.$abr.']['.$acr.']{2,})\b(?:[(]([^)]*)[)])/'.$mod; + + if ($this->doctype === 'html5') { + $this->glyph_replace[] = '$1'; + } else { + $this->glyph_replace[] = '$1'; + } + + // 3+ uppercase + $this->glyph_search[] = '/('.$space.'|^|[>(;-])(['.$abr.']{3,})'. + '(['.$nab.']*)(?='.$space.'|'.$pnc.'|<|$)(?=[^">]*?(<|$))/'.$mod; + $this->glyph_replace[] = '$1'.$this->uid.':glyph:$2$3'; + + // Ellipsis + $this->glyph_search[] = '/([^.]?)\.{3}/'; + $this->glyph_replace[] = '$1'.$txt_ellipsis; + + // em dash + $this->glyph_search[] = '/--/'; + $this->glyph_replace[] = $txt_emdash; + + // en dash + $this->glyph_search[] = '/ - /'; + $this->glyph_replace[] = ' '.$txt_endash.' '; + + // Trademark + $this->glyph_search[] = '/(\b ?|'.$space.'|^)[([]TM[])]/i'.$mod; + $this->glyph_replace[] = '$1'.$txt_trademark; + + // Registered + $this->glyph_search[] = '/(\b ?|'.$space.'|^)[([]R[])]/i'.$mod; + $this->glyph_replace[] = '$1'.$txt_registered; + + // Copyright + $this->glyph_search[] = '/(\b ?|'.$space.'|^)[([]C[])]/i'.$mod; + $this->glyph_replace[] = '$1'.$txt_copyright; + + // 1/4 + $this->glyph_search[] = '/[([]1\/4[])]/'; + $this->glyph_replace[] = $txt_quarter; + + // 1/2 + $this->glyph_search[] = '/[([]1\/2[])]/'; + $this->glyph_replace[] = $txt_half; + + // 3/4 + $this->glyph_search[] = '/[([]3\/4[])]/'; + $this->glyph_replace[] = $txt_threequarters; + + // Degrees -- that's a small 'oh' + $this->glyph_search[] = '/[([]o[])]/'; + $this->glyph_replace[] = $txt_degrees; + + // Plus minus + $this->glyph_search[] = '/[([]\+\/-[])]/'; + $this->glyph_replace[] = $txt_plusminus; + + // No need to rebuild next run unless a symbol is redefined + $this->rebuild_glyphs = false; + } + + /** + * Sets the maximum allowd link index. + * + * @return int + * @since 3.5.5 + */ + + protected function getMaxLinkIndex() + { + return 1000000; + } + + /** + * Prepares the parser for parsing. + * + * This method prepares the transient internal state of + * Textile parser in preparation for parsing a new document. + * + * @param bool $lite Controls lite mode + * @param bool $noimage Disallow images + * @param string $rel A relationship attribute applied to links + */ + + protected function prepare($lite, $noimage, $rel) + { + if ($this->linkIndex >= $this->getMaxLinkIndex()) { + $this->linkPrefix .= '-'; + $this->linkIndex = 1; + } + + $this->unreferencedNotes = array(); + $this->notelist_cache = array(); + $this->notes = array(); + $this->urlrefs = array(); + $this->shelf = array(); + $this->fn = array(); + $this->span_depth = 0; + $this->refIndex = 1; + $this->refCache = array(); + $this->note_index = 1; + $this->rel = $rel; + $this->lite = $lite; + $this->noimage = $noimage; + $this->prepGlyphs(); + } + + /** + * Cleans a HTML attribute value. + * + * This method checks for presence of URL encoding in the value. + * If the number encoded characters exceeds the thereshold, + * the input is discarded. Otherwise the encoded + * instances are decoded. + * + * This method also strips any ", ' and = characters + * from the given value. This method does not guarantee + * valid HTML or full sanitization. + * + * @param string $in The input string + * @return string Cleaned string + */ + + protected function cleanAttribs($in) + { + $tmp = $in; + $before = -1; + $after = 0; + $max = 3; + $i = 0; + + while (($after != $before) && ($i < $max)) { + $before = strlen($tmp); + $tmp = rawurldecode($tmp); + $after = strlen($tmp); + $i++; + } + + if ($i === $max) { + // If we hit the max allowed decodes, assume the input is tainted and consume it. + $out = ''; + } else { + $out = str_replace(array('"', "'", '='), '', $tmp); + } + + return $out; + } + + /** + * Constructs a HTML tag from an object. + * + * This is a helper method that creates a new + * instance of \Netcarver\Textile\Tag. + * + * @param string $name The HTML element name + * @param array $atts HTML attributes applied to the tag + * @param bool $selfclosing Determines if the tag should be selfclosing + * @return Tag + */ + + protected function newTag($name, $atts, $selfclosing = true) + { + return new Tag($name, $atts, $selfclosing); + } + + /** + * Parses Textile attributes. + * + * @param string $in The Textile attribute string to be parsed + * @param string $element Focus the routine to interpret the attributes as applying to a specific HTML tag + * @param bool $include_id If FALSE, IDs are not included in the attribute list + * @param string $autoclass An additional classes applied to the output + * @return string HTML attribute list + * @see Parser::parseAttribsToArray() + */ + + protected function parseAttribs($in, $element = '', $include_id = true, $autoclass = '') + { + $o = $this->parseAttribsToArray($in, $element, $include_id, $autoclass); + + return $this->formatAttributeString($o); + } + + /** + * Converts an array of named attribute => value mappings to a string. + * + * @param array $attribute_array + * @return string + */ + + protected function formatAttributeString(array $attribute_array) + { + $out = ''; + + if (count($attribute_array)) { + foreach ($attribute_array as $k => $v) { + $out .= " $k=\"$v\""; + } + } + + return $out; + } + + /** + * Parses Textile attributes into an array. + * + * @param string $in The Textile attribute string to be parsed + * @param string $element Focus the routine to interpret the attributes as applying to a specific HTML tag + * @param bool $include_id If FALSE, IDs are not included in the attribute list + * @param string $autoclass An additional classes applied to the output + * @return array HTML attributes as key => value mappings + * @see Parser::parseAttribs() + */ + + protected function parseAttribsToArray($in, $element = '', $include_id = true, $autoclass = '') + { + $style = ''; + $class = ''; + $lang = ''; + $colspan = ''; + $rowspan = ''; + $span = ''; + $width = ''; + $id = ''; + $atts = ''; + $align = ''; + + $matched = $in; + if ($element == 'td') { + if (preg_match("/\\\\([0-9]+)/", $matched, $csp)) { + $colspan = $csp[1]; + } + + if (preg_match("/\/([0-9]+)/", $matched, $rsp)) { + $rowspan = $rsp[1]; + } + } + + if ($element == 'td' or $element == 'tr') { + if (preg_match("/($this->vlgn)/", $matched, $vert)) { + $style[] = "vertical-align:" . $this->vAlign($vert[1]); + } + } + + if (preg_match("/\{([^}]*)\}/", $matched, $sty)) { + if ($sty[1] = $this->cleanAttribs($sty[1])) { + $style[] = rtrim($sty[1], ';'); + } + $matched = str_replace($sty[0], '', $matched); + } + + if (preg_match("/\[([^]]+)\]/U", $matched, $lng)) { + // Consume entire lang block -- valid or invalid. + $matched = str_replace($lng[0], '', $matched); + if (preg_match("/\[([a-zA-Z]{2}(?:[\-\_][a-zA-Z]{2})?)\]/U", $lng[0], $lng)) { + $lang = $lng[1]; + } + } + + if (preg_match("/\(([^()]+)\)/U", $matched, $cls)) { + + $class_regex = "/^([-a-zA-Z 0-9_\.]*)$/"; + + // Consume entire class block -- valid or invalid. + $matched = str_replace($cls[0], '', $matched); + + // Only allow a restricted subset of the CSS standard characters for classes/ids. + // No encoding markers allowed. + if (preg_match("/\(([-a-zA-Z 0-9_\.\:\#]+)\)/U", $cls[0], $cls)) { + $hashpos = strpos($cls[1], '#'); + // If a textile class block attribute was found with a '#' in it + // split it into the css class and css id... + if (false !== $hashpos) { + if (preg_match("/#([-a-zA-Z0-9_\.\:]*)$/", substr($cls[1], $hashpos), $ids)) { + $id = $ids[1]; + } + + if (preg_match($class_regex, substr($cls[1], 0, $hashpos), $ids)) { + $class = $ids[1]; + } + } else { + if (preg_match($class_regex, $cls[1], $ids)) { + $class = $ids[1]; + } + } + } + } + + if (preg_match("/([(]+)/", $matched, $pl)) { + $style[] = "padding-left:" . strlen($pl[1]) . "em"; + $matched = str_replace($pl[0], '', $matched); + } + + if (preg_match("/([)]+)/", $matched, $pr)) { + $style[] = "padding-right:" . strlen($pr[1]) . "em"; + $matched = str_replace($pr[0], '', $matched); + } + + if (preg_match("/($this->hlgn)/", $matched, $horiz)) { + $style[] = "text-align:" . $this->hAlign($horiz[1]); + } + + if ($element == 'col') { + if (preg_match("/(?:\\\\([0-9]+))?{$this->regex_snippets['space']}*([0-9]+)?/", $matched, $csp)) { + $span = isset($csp[1]) ? $csp[1] : ''; + $width = isset($csp[2]) ? $csp[2] : ''; + } + } + + if ($this->restricted) { + $o = array(); + $class = trim($autoclass); + if ($class) { + $o['class'] = $this->cleanAttribs($class); + } + + if ($lang) { + $o['lang'] = $this->cleanAttribs($lang); + } + + ksort($o); + return $o; + } else { + $class = trim($class . ' ' . $autoclass); + } + + $o = array(); + if ($class) { + $o['class'] = $this->cleanAttribs($class); + } + + if ($colspan) { + $o['colspan'] = $this->cleanAttribs($colspan); + } + + if ($id && $include_id) { + $o['id'] = $this->cleanAttribs($id); + } + + if ($lang) { + $o['lang'] = $this->cleanAttribs($lang); + } + + if ($rowspan) { + $o['rowspan'] = $this->cleanAttribs($rowspan); + } + + if ($span) { + $o['span'] = $this->cleanAttribs($span); + } + + if ($style) { + $so = ''; + $tmps = array(); + foreach ($style as $s) { + $parts = explode(';', $s); + foreach ($parts as $p) { + if ($p = trim(trim($p), ":")) { + $tmps[] = $p; + } + } + } + + sort($tmps); + foreach ($tmps as $p) { + if (!empty($p)) { + $so .= $p.';'; + } + } + $style = trim(str_replace(array("\n", ';;'), array('', ';'), $so)); + + $o['style'] = $style; + } + + if ($width) { + $o['width'] = $this->cleanAttribs($width); + } + + ksort($o); + return $o; + } + + /** + * Checks whether the text is not enclosed by a block tag. + * + * @param string $text The input string + * @return bool TRUE if the text is not enclosed + */ + + protected function hasRawText($text) + { + $r = preg_replace( + '@<(p|hr|br|img|blockquote|div|form|table|ul|ol|dl|pre|h[1-6])[^>]*?'.chr(62).'.*]*?>@si', + '', + trim($text) + ); + $r = trim(preg_replace('@<(br|hr|img)[^>]*?/?>@i', '', trim($r))); + return '' != $r; + } + + /** + * Parses textile table structures into HTML. + * + * @param string $text The textile input + * @return string The parsed text + */ + + protected function tables($text) + { + $text = $text . "\n\n"; + return preg_replace_callback( + "/^(?:table(?P_?{$this->s}{$this->a}{$this->cls})\.". + "(?P.*)?\n)?^(?P{$this->a}{$this->cls}\.? ?\|.*\|){$this->regex_snippets['space']}*\n\n/smU", + array(&$this, "fTable"), + $text + ); + } + + /** + * Constructs a HTML table from a textile table structure. + * + * This method is used by Parser::tables() to process + * found table structures. + * + * @param array $matches + * @return string HTML table + * @see Parser::tables() + */ + + protected function fTable($matches) + { + $tatts = $this->parseAttribs($matches['tatts'], 'table'); + $space = $this->regex_snippets['space']; + + $sum = trim($matches['summary']) + ? ' summary="'.htmlspecialchars(trim($matches['summary']), ENT_QUOTES, 'UTF-8').'"' + : ''; + $cap = ''; + $colgrp = ''; + $last_rgrp = ''; + $c_row = 1; + + foreach (preg_split("/\|{$space}*?$/m", $matches['rows'], -1, PREG_SPLIT_NO_EMPTY) as $row) { + + $row = ltrim($row); + + // Caption -- can only occur on row 1, otherwise treat '|=. foo |...' + // as a normal center-aligned cell. + if (($c_row <= 1) && preg_match( + "/^\|\=(?P$this->s$this->a$this->cls)\. (?P[^\n]*)(?P.*)/s", + ltrim($row), + $cmtch + )) { + $capts = $this->parseAttribs($cmtch['capts']); + $cap = "\t".trim($cmtch['cap'])."\n"; + $row = ltrim($cmtch['row']); + if (empty($row)) { + continue; + } + } + + $c_row += 1; + + // Colgroup + if (preg_match("/^\|:(?P$this->s$this->a$this->cls\. .*)/m", ltrim($row), $gmtch)) { + // Is this colgroup def missing a closing pipe? If so, there + // will be a newline in the middle of $row somewhere. + $nl = strpos($row, "\n"); + $idx = 0; + + foreach (explode('|', str_replace('.', '', $gmtch['cols'])) as $col) { + $gatts = $this->parseAttribs(trim($col), 'col'); + $colgrp .= "\t" : $gatts." />")."\n"; + $idx++; + } + + $colgrp .= "\t\n"; + + if ($nl === false) { + continue; + } else { + // Recover from our missing pipe and process the rest of the line. + $row = ltrim(substr($row, $nl)); + } + } + + // Row group + $rgrpatts = $rgrp = ''; + + if (preg_match( + "/(:?^\|(?P$this->vlgn)(?P$this->s$this->a$this->cls)\.{$space}*$\n)?^(?P.*)/sm", + ltrim($row), + $grpmatch + )) { + if (isset($grpmatch['part'])) { + if ($grpmatch['part'] === '^') { + $rgrp = 'head'; + } elseif ($grpmatch['part'] === '~') { + $rgrp = 'foot'; + } elseif ($grpmatch['part'] === '-') { + $rgrp = 'body'; + } + } + + if (isset($grpmatch['part'])) { + $rgrpatts = $this->parseAttribs($grpmatch['rgrpatts']); + } + + if (isset($grpmatch['row'])) { + $row = $grpmatch['row']; + } + } + + if (preg_match("/^(?P$this->a$this->cls\. )(?P.*)/m", ltrim($row), $rmtch)) { + $ratts = $this->parseAttribs($rmtch['ratts'], 'tr'); + $row = $rmtch['row']; + } else { + $ratts = ''; + } + + $cells = array(); + $cellctr = 0; + + foreach (explode("|", $row) as $cell) { + $ctyp = "d"; + + if (preg_match("/^_(?=[{$this->regex_snippets['space']}[:punct:]])/", $cell)) { + $ctyp = "h"; + } + + if (preg_match("/^(?P_?$this->s$this->a$this->cls\. )(?P.*)/s", $cell, $cmtch)) { + $catts = $this->parseAttribs($cmtch['catts'], 'td'); + $cell = $cmtch['cell']; + } else { + $catts = ''; + } + + if (!$this->lite) { + $a = array(); + + if (preg_match('/(?'.$this->regex_snippets['space'].'*)(?P.*)/s', $cell, $a)) { + $cell = $this->redclothLists($a['cell']); + $cell = $this->textileLists($cell); + $cell = $a['space'] . $cell; + } + } + + if ($cellctr > 0) { + // Ignore first 'cell': it precedes the opening pipe + $cells[] = $this->doTagBr("t$ctyp", "\t\t\t$cell"); + } + + $cellctr++; + } + + $grp = ''; + + if ($rgrp && $last_rgrp) { + $grp .= "\t\n"; + } + + if ($rgrp) { + $grp .= "\t\n"; + } + + $last_rgrp = ($rgrp) ? $rgrp : $last_rgrp; + $rows[] = $grp."\t\t\n" . join("\n", $cells) . ($cells ? "\n" : "") . "\t\t"; + unset($cells, $catts); + } + + $rows = join("\n", $rows) . "\n"; + $close = ''; + + if ($last_rgrp) { + $close = "\t\n"; + } + + return "\n".$cap.$colgrp.$rows.$close."\n\n"; + } + + /** + * Parses RedCloth-style definition lists into HTML. + * + * @param string $text The textile input + * @return string The parsed text + */ + + protected function redclothLists($text) + { + return preg_replace_callback( + "/^([-]+$this->cls[ .].*:=.*)$(?![^-])/smU", + array(&$this, "fRedclothList"), + $text + ); + } + + /** + * Constructs a HTML definition list from a RedCloth-style definition structure. + * + * This method is used by Parser::redclothLists() to process + * found definition list structures. + * + * @param array $m + * @return string HTML definition list + * @see Parser::redclothLists() + */ + + protected function fRedclothList($m) + { + $in = $m[0]; + $out = array(); + $text = preg_split('/\n(?=[-])/m', $in); + foreach ($text as $nr => $line) { + $m = array(); + if (preg_match("/^[-]+(?P$this->cls)\.? (?P.*)$/s", $line, $m)) { + $content = trim($m['content']); + $atts = $this->parseAttribs($m['atts']); + + if (!preg_match( + "/^(.*?){$this->regex_snippets['space']}*:=(.*?)". + "{$this->regex_snippets['space']}*(=:|:=)?". + "{$this->regex_snippets['space']}*$/s", + $content, + $xm + )) { + $xm = array( $content, $content, '' ); + } + + list(, $term, $def,) = $xm; + $term = trim($term); + $def = trim($def, ' '); + + if (empty($out)) { + if (''==$def) { + $out[] = ""; + } else { + $out[] = '
'; + } + } + + if ('' != $term) { + $pos = strpos($def, "\n"); + $def = str_replace("\n", "
", trim($def)); + if (0 === $pos) { + $def = '

' . $def . '

'; + } + $term = str_replace("\n", "
", $term); + + $term = $this->graf($term); + $def = $this->graf($def); + + $out[] = "\t$term"; + + if ($def) { + $out[] = "\t
$def
"; + } + } + } + } + $out[] = '
'; + return implode("\n", $out); + } + + /** + * Parses Textile list structures into HTML. + * + * Searches for ordered, un-ordered and definition lists in the + * textile input and generates HTML lists for them. + * + * @param string $text The input + * @return string The parsed text + */ + + protected function textileLists($text) + { + return preg_replace_callback( + "/^((?:[*;:]+|[*;:#]*#(?:_|\d+)?)$this->cls[ .].*)$(?![^#*;:])/smU", + array(&$this, "fTextileList"), + $text + ); + } + + /** + * Constructs a HTML list from a Textile list structure. + * + * This method is used by Parser::textileLists() to process + * found list structures. + * + * @param array $m + * @return string HTML list + * @see Parser::textileLists() + */ + + protected function fTextileList($m) + { + $text = preg_split('/\n(?=[*#;:])/m', $m[0]); + $pt = ''; + foreach ($text as $nr => $line) { + $nextline = isset($text[$nr+1]) ? $text[$nr+1] : false; + if (preg_match("/^(?P[#*;:]+)(?P_|\d+)?(?P$this->cls)[ .](?P.*)$/s", $line, $m)) { + $tl = $m['tl']; + $st = $m['st']; + $atts = $m['atts']; + $content = trim($m['content']); + $nl = ''; + $ltype = $this->liType($tl); + $litem = (strpos($tl, ';') !== false) ? 'dt' : ((strpos($tl, ':') !== false) ? 'dd' : 'li'); + $showitem = (strlen($content) > 0); + + if ('o' === $ltype) { + // Handle list continuation/start attribute on ordered lists. + if (!isset($this->olstarts[$tl])) { + $this->olstarts[$tl] = 1; + } + + if (strlen($tl) > strlen($pt)) { + // First line of this level of ol -- has a start attribute? + if ('' == $st) { + // No => reset count to 1. + $this->olstarts[$tl] = 1; + } elseif ('_' !== $st) { + // Yes, and numeric => reset to given. + // TRICKY: the '_' continuation marker just means + // output the count so don't need to do anything + // here. + $this->olstarts[$tl] = (int) $st; + } + } + + if ((strlen($tl) > strlen($pt)) && '' !== $st) { + // Output the start attribute if needed. + $st = ' start="' . $this->olstarts[$tl] . '"'; + } + + if ($showitem) { + // TRICKY: Only increment the count for list items; + // not when a list definition line is encountered. + $this->olstarts[$tl] += 1; + } + } + + if (preg_match("/^(?P[#*;:]+)(_|[\d]+)?($this->cls)[ .].*/", $nextline, $nm)) { + $nl = $nm['nextlistitem']; + } + + if ((strpos($pt, ';') !== false) && (strpos($tl, ':') !== false)) { + // We're already in a
so flag not to start another + $lists[$tl] = 2; + } + + $tabs = str_repeat("\t", strlen($tl)-1); + $atts = $this->parseAttribs($atts); + if (!isset($lists[$tl])) { + $lists[$tl] = 1; + $line = "$tabs<" . $ltype . "l$atts$st>" . (($showitem) ? "\n$tabs\t<$litem>" . $content : ''); + } else { + $line = ($showitem) ? "$tabs\t<$litem$atts>" . $content : ''; + } + + if ((strlen($nl) <= strlen($tl))) { + $line .= (($showitem) ? "" : ''); + } + + foreach (array_reverse($lists) as $k => $v) { + if (strlen($k) > strlen($nl)) { + $line .= ($v==2) ? '' : "\n$tabsliType($k) . "l>"; + if ((strlen($k) > 1) && ($v != 2)) { + $line .= ""; + } + unset($lists[$k]); + } + } + $pt = $tl; // Remember the current Textile tag + } + + $out[] = $line; + } + + $out = implode("\n", $out); + return $this->doTagBr($litem, $out); + } + + /** + * Determines the list type from the Textile input symbol. + * + * @param string $in Textile input containing the possible list marker + * @return string Either 'd', 'o', 'u' + */ + + protected function liType($in) + { + $m = array(); + $type = 'd'; + if (preg_match('/^(?P[#*]+)/', $in, $m)) { + $type = ('#' === substr($m['type'], -1)) ? 'o' : 'u'; + } + return $type; + } + + /** + * Adds br tags within the specified container tag. + * + * @param string $tag The tag + * @param string $in The input + * @return string + */ + + protected function doTagBr($tag, $in) + { + return preg_replace_callback( + '@<(?P'.preg_quote($tag).')(?P[^>]*?)>(?P.*)(?P)@s', + array(&$this, 'fBr'), + $in + ); + } + + /** + * Adds br tags to paragraphs and headings. + * + * @param string $in The input + * @return string + */ + + protected function doPBr($in) + { + return preg_replace_callback( + '@<(?Pp|h[1-6])(?P[^>]*?)>(?P.*)(?P)@s', + array(&$this, 'fPBr'), + $in + ); + } + + /** + * Less restrictive version of fBr method. + * + * Used only in paragraphs and headings where the next row may + * start with a smiley or perhaps something like '#8 bolt...' + * or '*** stars...'. + * + * @param string $m The input + * @return string + */ + + protected function fPBr($m) + { + // Replaces
\n instances that are not followed by white-space, + // or at end, with single LF. + $content = preg_replace( + "~{$this->regex_snippets['space']}*\n(?![{$this->regex_snippets['space']}|])~i", + "\n", + $m['content'] + ); + // Replaces those LFs that aren't followed by white-space, or at end, with
. + $content = preg_replace("/\n(?![\s|])/", '
', $content); + return '<'.$m['tag'].$m['atts'].'>'.$content.$m['closetag']; + } + + /** + * Formats line breaks. + * + * @param string $m The input + * @return string + */ + + protected function fBr($m) + { + $content = preg_replace("@(.+)(?|
|||)\n(?![#*;:\s|])@", '$1
', $m['content']); + return '<'.$m['tag'].$m['atts'].'>'.$content.$m['closetag']; + } + + /** + * Splits the given input into blocks. + * + * Blocks are separated by double line-break boundaries, and processed + * the blocks one by one. + * + * @param string $text Textile source text + * @return string Input text with blocks processed + */ + + protected function blocks($text) + { + $regex = '/^(?P'.join('|', $this->blocktag_whitelist).')'. + '(?P'.$this->a.$this->cls.')\.(?P\.?)(?::(?P\S+))? (?P.*)$/Ss'. + $this->regex_snippets['mod']; + + $textblocks = preg_split('/(\n{2,})/', $text, null, PREG_SPLIT_DELIM_CAPTURE); + + $eatWhitespace = false; + $whitespace = ''; + $ext = ''; + $out = array(); + + foreach ($textblocks as $block) { + + // Line is just whitespace, keep it for the next block. + if (trim($block) === '') { + if ($eatWhitespace === false) { + $whitespace .= $block; + } + continue; + } + + if (!$ext) { + $tag = 'p'; + $atts = ''; + $cite = ''; + $graf = ''; + $eat = false; + } + + $eatWhitespace = false; + $anonymous_block = !preg_match($regex, $block, $m); + + if (!$anonymous_block) { + // Last block was extended, so close it + if ($ext) { + $out[count($out)-1] .= $c1; + } + + // Extract the new block's parts + extract($m); + list($o1, $o2, $content, $c2, $c1, $eat) = $this->fBlock($m); + + // Leave off c1 if this block is extended, we'll close it at the start of the next block + $block = $o1.$o2.$content.$c2; + if (!$ext) { + $block .= $c1; + } + } else { + if ($ext || strpos($block, ' ') !== 0) { + list($o1, $o2, $content, $c2, $c1, $eat) = $this->fBlock(array( + 0, + $tag, + $atts, + $ext, + $cite, + $block, + )); + + // Skip $o1/$c1 because this is part of a continuing extended block + if ($tag == 'p' && !$this->hasRawText($content)) { + $block = $content; + } else { + $block = $o2.$content.$c2; + } + } else { + $block = $this->graf($block); + } + } + + $block = $this->doPBr($block); + $block = $whitespace. str_replace('
', '
', $block); + + if ($ext && $anonymous_block) { + $out[count($out)-1] .= $block; + } elseif (!$eat) { + $out[] = $block; + } + + if ($eat) { + $eatWhitespace = true; + } else { + $whitespace = ''; + } + + } + + if ($ext) { + $out[count($out)-1] .= $c1; + } + + return join('', $out); + } + + /** + * Formats the given block. + * + * Adds block tags and formats the text content inside + * the block. + * + * @param string $m The block content to format + * @return array + */ + + protected function fBlock($m) + { + list(, $tag, $att, $ext, $cite, $content) = $m; + $atts = $this->parseAttribs($att); + $space = $this->regex_snippets['space']; + + $o1 = ''; + $o2 = ''; + $c2 = ''; + $c1 = ''; + $eat = false; + + if ($tag === 'p') { + // Is this an anonymous block with a note definition? + $notedef = preg_replace_callback( + "/ + ^note\# # start of note def marker + (?P