You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

254 lines
8.5 KiB

  1. <?php
  2. /**
  3. * Hoa
  4. *
  5. *
  6. * @license
  7. *
  8. * New BSD License
  9. *
  10. * Copyright © 2007-2016, Hoa community. All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or without
  13. * modification, are permitted provided that the following conditions are met:
  14. * * Redistributions of source code must retain the above copyright
  15. * notice, this list of conditions and the following disclaimer.
  16. * * Redistributions in binary form must reproduce the above copyright
  17. * notice, this list of conditions and the following disclaimer in the
  18. * documentation and/or other materials provided with the distribution.
  19. * * Neither the name of the Hoa nor the names of its contributors may be
  20. * used to endorse or promote products derived from this software without
  21. * specific prior written permission.
  22. *
  23. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  24. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  25. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  26. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE
  27. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  28. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  29. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  30. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  31. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  32. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33. * POSSIBILITY OF SUCH DAMAGE.
  34. */
  35. namespace Hoa\Compiler\Llk;
  36. use Hoa\Compiler;
  37. use Hoa\Consistency;
  38. use Hoa\Stream;
  39. /**
  40. * Class \Hoa\Compiler\Llk.
  41. *
  42. * Provide a generic LL(k) compiler compiler using the PP language.
  43. * Support: skip (%skip), token (%token), token namespace (ns1:token name value
  44. * -> ns2), rule (rule:), disjunction (|), capturing (operators ( and )),
  45. * quantifiers (?, +, * and {n,m}), node (#node) with options (#node:options),
  46. * skipped token (::token::), kept token (<token>), token unification (token[i])
  47. * and rule unification (rule()[j]).
  48. *
  49. * @copyright Copyright © 2007-2016 Hoa community
  50. * @license New BSD License
  51. */
  52. class Llk
  53. {
  54. /**
  55. * Load parser from a file that contains the grammar.
  56. * Example:
  57. * %skip space \s
  58. *
  59. * %token word [a-zA-Z]+
  60. * %token number [0-9]+(\.[0-9]+)?
  61. * %token open_par \(
  62. * %token close_par \)
  63. * %token equal =
  64. * %token plus \+
  65. * %token minus \-
  66. * %token divide \/
  67. * %token times \*
  68. *
  69. * #equation:
  70. * formula() ::equal:: <number>
  71. *
  72. * formula:
  73. * factor()
  74. * (
  75. * ::plus:: formula() #addition
  76. * | ::minus:: formula() #substraction
  77. * )?
  78. *
  79. * factor:
  80. * operand()
  81. * (
  82. * ::times:: factor() #product
  83. * | ::divide:: factor() #division
  84. * )?
  85. *
  86. * operand:
  87. * <word>
  88. * | ::minus::? <number> #number
  89. * | ::open_par:: formula() ::close_par::
  90. *
  91. * Use tabs or spaces, it does not matter.
  92. * Instructions follow the form: %<instruction>. Only %skip and %token are
  93. * supported.
  94. * Rules follow the form: <rule name>:<new line>[<space><rule><new line>]*.
  95. * Contexts are useful to set specific skips and tokens. We give a full
  96. * example with context + unification (for fun) to parse <a>b</a>:
  97. * %skip space \s
  98. * %token lt < -> in_tag
  99. * %token inner [^<]*
  100. *
  101. * %skip in_tag:space \s
  102. * %token in_tag:slash /
  103. * %token in_tag:tagname [^>]+
  104. * %token in_tag:gt > -> default
  105. *
  106. * #foo:
  107. * ::lt:: <tagname[0]> ::gt::
  108. * <inner>
  109. * ::lt:: ::slash:: ::tagname[0]:: ::gt::
  110. *
  111. * @param \Hoa\Stream\IStream\In $stream Stream that contains the
  112. * grammar.
  113. * @return \Hoa\Compiler\Llk\Parser
  114. * @throws \Hoa\Compiler\Exception
  115. */
  116. public static function load(Stream\IStream\In $stream)
  117. {
  118. $pp = $stream->readAll();
  119. if (empty($pp)) {
  120. $message = 'The grammar is empty';
  121. if ($stream instanceof Stream\IStream\Pointable) {
  122. if (0 < $stream->tell()) {
  123. $message .=
  124. ': the stream ' . $stream->getStreamName() .
  125. ' is pointable and not rewinded, maybe it ' .
  126. 'could be the reason';
  127. } else {
  128. $message .=
  129. ': nothing to read on the stream ' .
  130. $stream->getStreamName();
  131. }
  132. }
  133. throw new Compiler\Exception($message . '.', 0);
  134. }
  135. static::parsePP($pp, $tokens, $rawRules, $stream->getStreamName());
  136. $ruleAnalyzer = new Rule\Analyzer($tokens);
  137. $rules = $ruleAnalyzer->analyzeRules($rawRules);
  138. return new Parser($tokens, $rules);
  139. }
  140. /**
  141. * Parse PP.
  142. *
  143. * @param string $pp PP.
  144. * @param array $tokens Extracted tokens.
  145. * @param array $rules Extracted raw rules.
  146. * @param string $streamName The name of the stream that contains the grammar.
  147. * @return void
  148. * @throws \Hoa\Compiler\Exception
  149. */
  150. public static function parsePP($pp, &$tokens, &$rules, $streamName)
  151. {
  152. $lines = explode("\n", $pp);
  153. $tokens = ['default' => []];
  154. $rules = [];
  155. for ($i = 0, $m = count($lines); $i < $m; ++$i) {
  156. $line = rtrim($lines[$i]);
  157. if (0 === strlen($line) || '//' == substr($line, 0, 2)) {
  158. continue;
  159. }
  160. if ('%' == $line[0]) {
  161. if (0 !== preg_match('#^%skip\s+(?:([^:]+):)?([^\s]+)\s+(.*)$#u', $line, $matches)) {
  162. if (empty($matches[1])) {
  163. $matches[1] = 'default';
  164. }
  165. if (!isset($tokens[$matches[1]])) {
  166. $tokens[$matches[1]] = [];
  167. }
  168. if (!isset($tokens[$matches[1]]['skip'])) {
  169. $tokens[$matches[1]]['skip'] = $matches[3];
  170. } else {
  171. $tokens[$matches[1]]['skip'] =
  172. '(?:' . $matches[3] . ')|' .
  173. $tokens[$matches[1]]['skip'];
  174. }
  175. } elseif (0 !== preg_match('#^%token\s+(?:([^:]+):)?([^\s]+)\s+(.*?)(?:\s+->\s+(.*))?$#u', $line, $matches)) {
  176. if (empty($matches[1])) {
  177. $matches[1] = 'default';
  178. }
  179. if (isset($matches[4]) && !empty($matches[4])) {
  180. $matches[2] = $matches[2] . ':' . $matches[4];
  181. }
  182. if (!isset($tokens[$matches[1]])) {
  183. $tokens[$matches[1]] = [];
  184. }
  185. $tokens[$matches[1]][$matches[2]] = $matches[3];
  186. } else {
  187. throw new Compiler\Exception(
  188. 'Unrecognized instructions:' . "\n" .
  189. ' %s' . "\n" . 'in file %s at line %d.',
  190. 1,
  191. [
  192. $line,
  193. $streamName,
  194. $i + 1
  195. ]
  196. );
  197. }
  198. continue;
  199. }
  200. $ruleName = substr($line, 0, -1);
  201. $rule = null;
  202. ++$i;
  203. while ($i < $m &&
  204. isset($lines[$i][0]) &&
  205. (' ' === $lines[$i][0] ||
  206. "\t" === $lines[$i][0] ||
  207. '//' === substr($lines[$i], 0, 2))) {
  208. if ('//' === substr($lines[$i], 0, 2)) {
  209. ++$i;
  210. continue;
  211. }
  212. $rule .= ' ' . trim($lines[$i++]);
  213. }
  214. if (isset($lines[$i][0])) {
  215. --$i;
  216. }
  217. $rules[$ruleName] = $rule;
  218. }
  219. return;
  220. }
  221. }
  222. /**
  223. * Flex entity.
  224. */
  225. Consistency::flexEntity('Hoa\Compiler\Llk\Llk');